Is BH robust to correlation?

Last updated: 2018-05-06

Code version: 0b0a394

source("../code/gdash_lik.R")
source("../code/gdfit.R")
source("../code/count_to_summary.R")
library(limma)
library(edgeR)
library(ashr)
library(plyr)
library(ggplot2)
library(reshape2)
set.seed(777)

Simulated Data

d <- 10
n <- 1e4
B <- matrix(rnorm(n * d), n, d)
Sigma <- B %*% t(B) + diag(n)
sigma <- diag(Sigma)
Rho <- cov2cor(Sigma)
par(mar = c(5.1, 4.1, 1, 2.1))
hist(Rho[lower.tri(Rho)], xlab = expression(rho[ij]), main = "")

rhobar <- c()
for (l in 1 : 10) {
  rhobar[l] <- (sum(Rho^l) - n) / (n * (n - 1))
}

nsim <- 1e4
Z.list <- W <- list()
for (i in 1 : nsim) {
z <- rnorm(d)
Z <- B %*% z + rnorm(n)
Z <- Z / sqrt(sigma)
Z.list[[i]] <- Z
Z.GD <- gdfit.mom(Z, 100)
W[[i]] <- Z.GD$w
}
Z.sim <- Z.list
W.sim <- W

GTEx data

r <- readRDS("../data/liver.rds")

top_genes_index = function (g, X) {
  return(order(rowSums(X), decreasing = TRUE)[1 : g])
}
lcpm = function (r) {
  R = colSums(r)
  t(log2(((t(r) + 0.5) / (R + 1)) * 10^6))
}

nsamp <- 5
ngene <- n

Y = lcpm(r)
subset = top_genes_index(ngene, Y)
r = r[subset,]

nsim <- 1e4
Z.list <- W <- list()
for (i in 1 : nsim) {
  ## generate data
  counts <- r[, sample(ncol(r), 2 * nsamp)]
  design <- model.matrix(~c(rep(0, nsamp), rep(1, nsamp)))
  summary <- count_to_summary(counts, design)
  Z <- summary$z
  Z.list[[i]] <- Z
  Z.GD <- gdfit.mom(Z, 100)
  W[[i]] <- Z.GD$w
}
Z.gtex <- Z.list
W.sim <- W

BH

p <- lapply(Z.sim, function(x) {pnorm(-abs(x)) * 2})
q <- lapply(p, p.adjust, method = "BH")
q.cutoff <- seq(0.01, 0.99, by = 0.01)
fd <- list()
for (i in seq(q.cutoff)) {
  fd[[i]] <- lapply(q, function(x) {sum(x <= q.cutoff[i])})
}
fdp <- lapply(fd, function(x) {mean(x != 0)})
plot(q.cutoff, fdp, xlab = "Nominal FDR", ylab = "FDP",
     xlim = range(q.cutoff, fdp), ylim = range(q.cutoff, fdp),
     type = "l")
abline(0, 1, col = "red", lty = 3)

p <- lapply(Z.gtex, function(x) {pnorm(-abs(x)) * 2})
q <- lapply(p, p.adjust, method = "BH")
q.cutoff <- seq(0.001, 0.200, by = 0.001)
fd <- list()
for (i in seq(q.cutoff)) {
  fd[[i]] <- lapply(q, function(x) {sum(x <= q.cutoff[i])})
}
fdp <- lapply(fd, function(x) {mean(x != 0)})
plot(q.cutoff, fdp, xlab = "Nominal FDR", ylab = "FDP",
     xlim = range(q.cutoff, fdp), ylim = range(q.cutoff, fdp),
     type = "l")
abline(0, 1, col = "red", lty = 3)

theta <- list()
for (j in 1 : 1e4) {
  theta[[j]] <- sample(c(rep(0, 9.5e3), rep(3, 0.5e3)))
}
X.gtex <- list()
for (j in 1 : 1e4) {
  X.gtex[[j]] <- theta[[j]] + Z.gtex[[j]]
}
p <- lapply(X.gtex, function(x) {pnorm(-abs(x)) * 2})
q <- lapply(p, p.adjust, method = "BH")
q.cutoff <- seq(0.001, 0.200, by = 0.001)
fdp <- tdp <- list()
for (i in seq(q.cutoff)) {
  fdp.vec <- tdp.vec <- c()
  for (j in 1 : 1e4) {
    fdp.vec[j] <- sum(theta[[j]][q[[j]] <= q.cutoff[i]] == 0) / max(1, length(q[[j]] <= q.cutoff[i]))
    tdp.vec[j] <- sum(theta[[j]][q[[j]] <= q.cutoff[i]] != 0) / 1e3
  }
  fdp[[i]] <- fdp.vec
  tdp[[i]] <- tdp.vec
}
fdp.avg <- lapply(fdp, mean)
tdp.avg <- lapply(tdp, mean)
plot(q.cutoff, fdp.avg, type = "l", xlim = range(q.cutoff, fdp.avg), ylim = range(q.cutoff, fdp.avg), xlab = "Nominal FDR", ylab = "Average FDP")
abline(0, 1, col = "red")

plot(q.cutoff, tdp.avg, type = "l", xlab = "Nominal FDR", ylab = "TDP")

Session information

sessionInfo()

R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.4

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] compiler_3.4.3  backports_1.1.2 magrittr_1.5    rprojroot_1.3-2
 [5] tools_3.4.3     htmltools_0.3.6 yaml_2.1.18     Rcpp_0.12.16   
 [9] stringi_1.1.6   rmarkdown_1.9   knitr_1.20      git2r_0.21.0   
[13] stringr_1.3.0   digest_0.6.15   evaluate_0.10.1

This R Markdown site was created with workflowr

Is `BH` robust to correlation?

Lei Sun

2018-04-14

Simulated Data

GTEx data

BH

Session information