Last updated: 2017-06-03
Code version: 82f2a7d
library(edgeR)
library(limma)
library(sva)
library(cate)
library(vicar)
library(ashr)
library(pROC)
source("../code/gdash.R")
mat = readRDS("../data/liver.sim.rds")
counts_to_summary = function (counts, design) {
dgecounts = edgeR::calcNormFactors(edgeR::DGEList(counts = counts, group = design[, 2]))
v = limma::voom(dgecounts, design, plot = FALSE)
lim = limma::lmFit(v)
r.ebayes = limma::eBayes(lim)
p = r.ebayes$p.value[, 2]
t = r.ebayes$t[, 2]
z = sign(t) * qnorm(1 - p/2)
betahat = lim$coefficients[,2]
sebetahat = betahat / z
return (list(betahat = betahat, sebetahat = sebetahat, z = z))
}
one_sim <- function (mat, ngene, nsamp, pi0, sd) {
## add simulated signals
mat.sim = seqgendiff::poisthin(t(mat), nsamp = nsamp, ngene = ngene, gselect = "random", signal_params = list(mean = 0, sd = sd), prop_null = pi0)
counts = t(mat.sim$Y) ## ngene * nsamples matrix
design = mat.sim$X
beta = mat.sim$beta
which_signal = (beta != 0)
## methods using summary statistics only
summary = counts_to_summary(counts, design)
fit.pvalue = (1 - pnorm(abs(summary$z))) * 2
fit.BH = p.adjust(fit.pvalue, method = "BH")
fit.qvalue = qvalue::qvalue(fit.pvalue)
fit.locfdr = locfdr::locfdr(summary$z, bre = round(ngene / 20), plot = 0)
fit.ash = ashr::ash(summary$betahat, summary$sebetahat, mixcompdist = "normal", method = "fdr")
fit.gdash = gdash(summary$betahat, summary$sebetahat)
fit.gdash.ash = ashr::ash(summary$betahat, summary$sebetahat, fixg = TRUE, g = fit.gdash$fitted_g)
## methods using data matrix
Y = t(log(counts + 0.5))
X = design
num_sv <- sva::num.sv(dat = t(Y), mod = X, method = "be")
mout <- vicar::mouthwash(Y = Y, X = X, k = num_sv, cov_of_interest = 2, include_intercept = FALSE)
cate_cate <- cate::cate.fit(X.primary = X[, 2, drop = FALSE], X.nuis = X[, -2, drop = FALSE], Y = Y, r = num_sv, adj.method = "rr")
sva_sva <- sva::sva(dat = t(Y), mod = X, mod0 = X[, -2, drop = FALSE], n.sv = num_sv)
X.sva <- cbind(X, sva_sva$sv)
lmout <- limma::lmFit(object = t(Y), design = X.sva)
eout <- limma::ebayes(lmout)
svaout <- list()
svaout$betahat <- lmout$coefficients[, 2]
svaout$sebetahat <- lmout$stdev.unscaled[, 2] * sqrt(eout$s2.post)
svaout$pvalues <- eout$p.value[, 2]
## result: roc auc
roc_res = c(
pvalue = pROC::roc(response = which_signal, predictor = fit.pvalue)$auc,
BH = pROC::roc(response = which_signal, predictor = fit.BH)$auc,
qvalue = pROC::roc(response = which_signal, predictor = fit.qvalue$lfdr)$auc,
locfdr = pROC::roc(response = which_signal, predictor = fit.locfdr$fdr)$auc,
ash = pROC::roc(response = which_signal, predictor = ashr::get_lfdr(fit.ash))$auc,
cash = pROC::roc(response = which_signal, predictor = ashr::get_lfdr(fit.gdash.ash))$auc,
mouthwash = pROC::roc(response = which_signal, predictor = c(mout$result$lfdr))$auc,
cate = pROC::roc(response = which_signal, predictor = c(cate_cate$beta.p.value))$auc,
sva = pROC::roc(response = which_signal, predictor = c(svaout$pvalues))$auc
)
## ash with summary statistics
method_list <- list()
method_list$cate <- list()
method_list$cate$betahat <- c(cate_cate$beta)
method_list$cate$sebetahat <- c(sqrt(cate_cate$beta.cov.row * cate_cate$beta.cov.col) / sqrt(nrow(X)))
method_list$sva <- list()
method_list$sva$betahat <- c(svaout$betahat)
method_list$sva$sebetahat <- c(svaout$sebetahat)
ashfit <- lapply(method_list, FUN = function(x) {ashr::ash(x$betahat, x$sebetahat, mixcompdist = "normal", method = "fdr")})
ashfit$ash <- fit.ash
ashfit$cash <- fit.gdash.ash
ashfit$mouthwash <- mout
ashfit = ashfit[c("ash", "cash", "mouthwash", "cate", "sva")]
## pi0
pi0_res <- sapply(ashfit, FUN = ashr::get_pi0)
pi0_res <- c(
qvalue = fit.qvalue$pi0,
locfdr = min(1, fit.locfdr$fp0["mlest", "p0"]),
pi0_res
)
## mse
mse_res <- sapply(ashfit, FUN = function(x) {mean((ashr::get_pm(x) - beta)^2)})
mse_res <- c(ols = mean((summary$betahat - beta)^2), mse_res)
## pFDP calibration
pFDP_alpha = function (alpha, tail_stat, true, obs) {
return(1 - mean(true[tail_stat <= alpha]))
}
pFSP_alpha = function (alpha, tail_stat, true, obs) {
return(mean(sign(obs[tail_stat <= alpha]) != sign(true[tail_stat <= alpha])))
}
tail_cali_list = function (alpha_list, tail_cali_alpha, tail_stat, true, obs) {
sapply(alpha_list, tail_cali_alpha, tail_stat, true, obs)
}
alpha_list = seq(0, 0.2, by = 0.001)
pFDP <- sapply(
ashfit, FUN = function (x) {
tail_cali_list(alpha_list, pFDP_alpha, ashr::get_qvalue(x), which_signal, x$data$x)
}
)
pFDP_BH = tail_cali_list(alpha_list, pFDP_alpha, fit.BH, which_signal, summary$betahat)
pFDP_qvalue = tail_cali_list(alpha_list, pFDP_alpha, fit.qvalue$qvalues, which_signal, summary$betahat)
pFDP_res = cbind(BH = pFDP_BH, qvalue = pFDP_qvalue, pFDP)
## pFSR calibration
pFSP_res <- sapply(
ashfit, FUN = function (x) {
tail_cali_list(alpha_list, pFSP_alpha, ashr::get_svalue(x), beta, x$data$x)
}
)
return(list(pi = pi0_res, mse = mse_res, auc = roc_res, alpha = alpha_list, pFDP = pFDP_res, pFSP = pFSP_res))
}
n_sim = function (n, mat, ngene, nsamp, pi0, sd) {
pi0_list = mse_list = auc_list = pFDP_list = pFSP_list = list()
for (i in 1 : n) {
one_res = one_sim(mat, ngene, nsamp, pi0, sd)
pi0_list[[i]] = one_res$pi
mse_list[[i]] = one_res$mse
auc_list[[i]] = one_res$auc
pFDP_list[[i]] = one_res$pFDP
pFSP_list[[i]] = one_res$pFSP
}
alpha_vec = one_res$alpha
pi0_mat = matrix(unlist(pi0_list), nrow = n, byrow = TRUE)
colnames(pi0_mat) = names(pi0_list[[1]])
mse_mat = matrix(unlist(mse_list), nrow = n, byrow = TRUE)
colnames(mse_mat) = names(mse_list[[1]])
auc_mat = matrix(unlist(auc_list), nrow = n, byrow = TRUE)
colnames(auc_mat) = names(auc_list[[1]])
pFDP_mat = list()
for (j in 1 : ncol(pFDP_list[[1]])) {
pFDP_mat[[j]] = t(sapply(pFDP_list, FUN = function(x) {rbind(x[, j])}))
}
names(pFDP_mat) = colnames(pFDP_list[[1]])
pFSP_mat = list()
for (j in 1 : ncol(pFSP_list[[1]])) {
pFSP_mat[[j]] = t(sapply(pFSP_list, FUN = function(x) {rbind(x[, j])}))
}
names(pFSP_mat) = colnames(pFSP_list[[1]])
return(list(pi0 = pi0_mat, mse = mse_mat, auc = auc_mat, alpha = alpha_vec, pFDP = pFDP_mat, pFSP = pFSP_mat))
}
sd = 0.6
pi0 = 0.9
ngene = 1e3
nsamp = 10
nsim = 100
set.seed(777)
system.time(res <- n_sim(nsim, mat, ngene, nsamp, pi0, sd))
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Warning in log(rowSums(sweep(x = exp(ldmix - ldmax), MARGIN = 2, STATS =
pi_vals, : NaNs produced
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
Warning in locfdr::locfdr(summary$z, bre = round(ngene/20), plot = 0): CM
estimation failed, middle of histogram non-normal
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5 Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5
user system elapsed
1703.469 381.549 2135.044
sessionInfo()
R version 3.3.3 (2017-03-06)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.5
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Rmosek_7.1.3 PolynomF_0.94 cvxr_0.0.0.9009
[4] REBayes_0.62 Matrix_1.2-8 SQUAREM_2016.10-1
[7] EQL_1.0-0 ttutils_1.0-1 pROC_1.9.1
[10] ashr_2.1-19 vicar_0.1.6 cate_1.0.4
[13] sva_3.20.0 genefilter_1.54.2 mgcv_1.8-17
[16] nlme_3.1-131 edgeR_3.14.0 limma_3.28.5
loaded via a namespace (and not attached):
[1] Rcpp_0.12.11 lattice_0.20-34 corpcor_1.6.8
[4] esaBcv_1.2.1 assertthat_0.2.0 rprojroot_1.2
[7] digest_0.6.12 foreach_1.4.3 truncnorm_1.0-7
[10] plyr_1.8.4 backports_1.0.5 stats4_3.3.3
[13] RSQLite_1.0.0 evaluate_0.10 ggplot2_2.2.1
[16] rlang_0.1 lazyeval_0.2.0 pscl_1.4.9
[19] svd_0.4 annotate_1.50.0 S4Vectors_0.10.1
[22] qvalue_2.4.2 rmarkdown_1.5 splines_3.3.3
[25] stringr_1.2.0 munsell_0.4.3 BiocGenerics_0.18.0
[28] ruv_0.9.6 htmltools_0.3.6 tibble_1.3.1
[31] IRanges_2.6.0 codetools_0.2-15 leapp_1.2
[34] XML_3.98-1.4 MASS_7.3-45 grid_3.3.3
[37] xtable_1.8-2 gtable_0.2.0 DBI_0.6-1
[40] git2r_0.18.0 magrittr_1.5 scales_0.4.1
[43] stringi_1.1.2 reshape2_1.4.2 doParallel_1.0.10
[46] seqgendiff_0.1.0 locfdr_1.1-8 iterators_1.0.8
[49] tools_3.3.3 Biobase_2.32.0 parallel_3.3.3
[52] survival_2.40-1 yaml_2.1.14 AnnotationDbi_1.34.3
[55] colorspace_1.2-6 knitr_1.16
This R Markdown site was created with workflowr