Last updated: 2018-04-15
Code version: 3b5dcc4
source("../code/gdash_lik.R")
source("../code/gdfit.R")
source("../code/count_to_summary.R")
library(limma)
library(edgeR)
library(ashr)
library(plyr)
library(ggplot2)
library(reshape2)
d <- 10
n <- 1e4
B <- matrix(rnorm(n * d), n, d)
Sigma <- B %*% t(B) + diag(n)
sigma <- diag(Sigma)
Rho <- cov2cor(Sigma)
par(mar = c(5.1, 4.1, 1, 2.1))
hist(Rho[lower.tri(Rho)], xlab = expression(rho[ij]), main = "")
rhobar <- c()
for (l in 1 : 10) {
rhobar[l] <- (sum(Rho^l) - n) / (n * (n - 1))
}
set.seed(20)
z <- rnorm(d)
Z <- B %*% z + rnorm(n)
Z <- Z / sqrt(sigma)
sd(Z)
hist(Z, breaks = 20, prob = TRUE)
lines(seq(-5, 5, by = 0.1), dnorm(seq(-5, 5, by = 0.1)), col = "blue")
p <- pnorm(-abs(Z)) * 2
par(mfcol = c(2, 2))
par(mar = c(5.1, 4.1, 1, 2.1))
hist(p, breaks = 100, main = "", xlab = "p-value")
plot(-log(p), ylim = range(-log(p), -log(pnorm(-sqrt(2 * log(n))) * 2), -log(0.05 / n)))
abline(h = -log(pnorm(-sqrt(2 * log(n))) * 2), col = "maroon")
abline(h = -log(0.05 / n), col = "red")
abline(h = -log(0.001), col = "green")
abline(h = -log(0.05), col = "blue")
Z <- rnorm(n)
p <- pnorm(-abs(Z)) * 2
hist(p, breaks = 100, main = "", xlab = "p-value")
plot(-log(p), ylim = range(-log(p), -log(pnorm(-sqrt(2 * log(n))) * 2), -log(0.05 / n)))
abline(h = -log(pnorm(-sqrt(2 * log(n))) * 2), col = "maroon")
abline(h = -log(0.05 / n), col = "red")
abline(h = -log(0.001), col = "green")
abline(h = -log(0.05), col = "blue")
nsim <- 1e4
Z.list <- W <- list()
for (i in 1 : nsim) {
z <- rnorm(d)
Z <- B %*% z + rnorm(n)
Z <- Z / sqrt(sigma)
Z.list[[i]] <- Z
Z.GD <- gdfit.mom(Z, 100)
W[[i]] <- Z.GD$w
}
Z.sim <- Z.list
W.sim <- W
r <- readRDS("../data/liver.rds")
top_genes_index = function (g, X) {
return(order(rowSums(X), decreasing = TRUE)[1 : g])
}
lcpm = function (r) {
R = colSums(r)
t(log2(((t(r) + 0.5) / (R + 1)) * 10^6))
}
nsamp <- 5
ngene <- 1e4
Y = lcpm(r)
subset = top_genes_index(ngene, Y)
r = r[subset,]
nsim <- 1e4
Z.list <- W <- list()
for (i in 1 : nsim) {
## generate data
counts <- r[, sample(ncol(r), 2 * nsamp)]
design <- model.matrix(~c(rep(0, nsamp), rep(1, nsamp)))
summary <- count_to_summary(counts, design)
Z <- summary$z
Z.list[[i]] <- Z
Z.GD <- gdfit.mom(Z, 100)
W[[i]] <- Z.GD$w
}
Z.gtex <- Z.list
W.gtex <- W
quantile.vec <- seq(5e-5, 1 - 5e-5, by = 5e-5)
emp.cdf.Z <- sapply(quantile.vec, function(x) {sapply(Z.gtex, function(y) mean(y <= qnorm(x)))})
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.4
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] reshape2_1.4.3 ggplot2_2.2.1 plyr_1.8.4
[4] edgeR_3.20.2 limma_3.34.4 ashr_2.2-2
[7] Rmosek_8.0.69 PolynomF_1.0-1 CVXR_0.94-4
[10] REBayes_1.2 Matrix_1.2-12 SQUAREM_2017.10-1
[13] EQL_1.0-0 ttutils_1.0-1
loaded via a namespace (and not attached):
[1] gmp_0.5-13.1 Rcpp_0.12.16 pillar_1.0.1
[4] compiler_3.4.3 git2r_0.21.0 R.methodsS3_1.7.1
[7] R.utils_2.6.0 iterators_1.0.9 tools_3.4.3
[10] digest_0.6.15 bit_1.1-12 tibble_1.4.1
[13] gtable_0.2.0 evaluate_0.10.1 lattice_0.20-35
[16] rlang_0.1.6 foreach_1.4.4 yaml_2.1.18
[19] parallel_3.4.3 Rmpfr_0.6-1 ECOSolveR_0.3-2
[22] stringr_1.3.0 knitr_1.20 locfit_1.5-9.1
[25] rprojroot_1.3-2 bit64_0.9-7 grid_3.4.3
[28] R6_2.2.2 rmarkdown_1.9 magrittr_1.5
[31] scales_0.5.0 MASS_7.3-47 backports_1.1.2
[34] codetools_0.2-15 htmltools_0.3.6 scs_1.1-1
[37] colorspace_1.3-2 stringi_1.1.6 lazyeval_0.2.1
[40] munsell_0.4.3 doParallel_1.0.11 pscl_1.5.2
[43] truncnorm_1.0-7 R.oo_1.21.0
This R Markdown site was created with workflowr