BH
robust to correlation?Last updated: 2018-05-06
Code version: 0b0a394
source("../code/gdash_lik.R")
source("../code/gdfit.R")
source("../code/count_to_summary.R")
library(limma)
library(edgeR)
library(ashr)
library(plyr)
library(ggplot2)
library(reshape2)
set.seed(777)
d <- 10
n <- 1e4
B <- matrix(rnorm(n * d), n, d)
Sigma <- B %*% t(B) + diag(n)
sigma <- diag(Sigma)
Rho <- cov2cor(Sigma)
par(mar = c(5.1, 4.1, 1, 2.1))
hist(Rho[lower.tri(Rho)], xlab = expression(rho[ij]), main = "")
rhobar <- c()
for (l in 1 : 10) {
rhobar[l] <- (sum(Rho^l) - n) / (n * (n - 1))
}
nsim <- 1e4
Z.list <- W <- list()
for (i in 1 : nsim) {
z <- rnorm(d)
Z <- B %*% z + rnorm(n)
Z <- Z / sqrt(sigma)
Z.list[[i]] <- Z
Z.GD <- gdfit.mom(Z, 100)
W[[i]] <- Z.GD$w
}
Z.sim <- Z.list
W.sim <- W
r <- readRDS("../data/liver.rds")
top_genes_index = function (g, X) {
return(order(rowSums(X), decreasing = TRUE)[1 : g])
}
lcpm = function (r) {
R = colSums(r)
t(log2(((t(r) + 0.5) / (R + 1)) * 10^6))
}
nsamp <- 5
ngene <- n
Y = lcpm(r)
subset = top_genes_index(ngene, Y)
r = r[subset,]
nsim <- 1e4
Z.list <- W <- list()
for (i in 1 : nsim) {
## generate data
counts <- r[, sample(ncol(r), 2 * nsamp)]
design <- model.matrix(~c(rep(0, nsamp), rep(1, nsamp)))
summary <- count_to_summary(counts, design)
Z <- summary$z
Z.list[[i]] <- Z
Z.GD <- gdfit.mom(Z, 100)
W[[i]] <- Z.GD$w
}
Z.gtex <- Z.list
W.sim <- W
p <- lapply(Z.sim, function(x) {pnorm(-abs(x)) * 2})
q <- lapply(p, p.adjust, method = "BH")
q.cutoff <- seq(0.01, 0.99, by = 0.01)
fd <- list()
for (i in seq(q.cutoff)) {
fd[[i]] <- lapply(q, function(x) {sum(x <= q.cutoff[i])})
}
fdp <- lapply(fd, function(x) {mean(x != 0)})
plot(q.cutoff, fdp, xlab = "Nominal FDR", ylab = "FDP",
xlim = range(q.cutoff, fdp), ylim = range(q.cutoff, fdp),
type = "l")
abline(0, 1, col = "red", lty = 3)
p <- lapply(Z.gtex, function(x) {pnorm(-abs(x)) * 2})
q <- lapply(p, p.adjust, method = "BH")
q.cutoff <- seq(0.001, 0.200, by = 0.001)
fd <- list()
for (i in seq(q.cutoff)) {
fd[[i]] <- lapply(q, function(x) {sum(x <= q.cutoff[i])})
}
fdp <- lapply(fd, function(x) {mean(x != 0)})
plot(q.cutoff, fdp, xlab = "Nominal FDR", ylab = "FDP",
xlim = range(q.cutoff, fdp), ylim = range(q.cutoff, fdp),
type = "l")
abline(0, 1, col = "red", lty = 3)
theta <- list()
for (j in 1 : 1e4) {
theta[[j]] <- sample(c(rep(0, 9.5e3), rep(3, 0.5e3)))
}
X.gtex <- list()
for (j in 1 : 1e4) {
X.gtex[[j]] <- theta[[j]] + Z.gtex[[j]]
}
p <- lapply(X.gtex, function(x) {pnorm(-abs(x)) * 2})
q <- lapply(p, p.adjust, method = "BH")
q.cutoff <- seq(0.001, 0.200, by = 0.001)
fdp <- tdp <- list()
for (i in seq(q.cutoff)) {
fdp.vec <- tdp.vec <- c()
for (j in 1 : 1e4) {
fdp.vec[j] <- sum(theta[[j]][q[[j]] <= q.cutoff[i]] == 0) / max(1, length(q[[j]] <= q.cutoff[i]))
tdp.vec[j] <- sum(theta[[j]][q[[j]] <= q.cutoff[i]] != 0) / 1e3
}
fdp[[i]] <- fdp.vec
tdp[[i]] <- tdp.vec
}
fdp.avg <- lapply(fdp, mean)
tdp.avg <- lapply(tdp, mean)
plot(q.cutoff, fdp.avg, type = "l", xlim = range(q.cutoff, fdp.avg), ylim = range(q.cutoff, fdp.avg), xlab = "Nominal FDR", ylab = "Average FDP")
abline(0, 1, col = "red")
plot(q.cutoff, tdp.avg, type = "l", xlab = "Nominal FDR", ylab = "TDP")
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.4
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] compiler_3.4.3 backports_1.1.2 magrittr_1.5 rprojroot_1.3-2
[5] tools_3.4.3 htmltools_0.3.6 yaml_2.1.18 Rcpp_0.12.16
[9] stringi_1.1.6 rmarkdown_1.9 knitr_1.20 git2r_0.21.0
[13] stringr_1.3.0 digest_0.6.15 evaluate_0.10.1
This R Markdown site was created with workflowr