Last updated: 2018-11-17

workflowr checks: (Click a bullet for more information)
Expand here to see past versions:


Overview

We applied two-sample tests for mean difference to simulated datasets.

The methods applied include:

The simulated data were made from experimental datasets as described below.

  1. Draw (sample without replacement) p genes and N samples from the experimental dataset.

  2. For the Correlated case, assign samples to Group 1 or Group 2.

  3. For the Uncorrelated case, permute sample labels for each gene, then in the permuted sample, assign samples to Group 1 or Group 2.

In addition, the parameter setting is:

Regarding the experimental data:


Results

library(dscrutils)
#setwd("~/Dropbox/GitHub/dsc-log-fold-change/dsc")
dir_dsc <- "/Users/joycehsiao/Dropbox/GitHub/dsc-log-fold-change/dsc/benchmark"
out <- dscquery(dir_dsc, 
                c("get_data", "get_data.n1", "get_data.n2", "method", "method.p"))
Loading dsc-query output from CSV file.
Reading DSC outputs:
 - method.p: vectors not extracted (set max.extract.vector = 946 to extract)
plotRest <- function(dir_dsc, dscoutput, 
                     sim_case=c("sample_correlated", "sample_uncorrelated"), 
                     sample_size=c(50,300),
                     title_label, plot=T, return_res=T) {
  n_methods <- nlevels(dscoutput$method)
#dscoutput <- out; sim_case="random_sample"; sample_size=50  
  out.sub <- dscoutput[dscoutput$get_data==sim_case & dscoutput$get_data.n1==sample_size,]
  
  res <- vector("list",n_methods)
  for (i in 1:nrow(out.sub)) {
#    print(i)
    fl <- readRDS(file.path(dir_dsc,
                         paste0(as.character(out.sub$method.p[i]), ".rds")))
    res[[i]] <- data.frame(method = as.character(out.sub$method)[i],
               n1_n2 = paste0(out.sub$get_data.n1[i],".",
                              out.sub$get_data.n2[i]),
               pval = fl$p,
               stringsAsFactors = F)
  }
  names(res) <- as.character(out.sub$method)
  
  if (plot) {
    cols <- RColorBrewer::brewer.pal(n_methods,name="Accent")
    par(mfrow=c(3,2))
    for (i in 1:length(res)) {
      hist(res[[i]]$pval, main="",
           xlab = "p-values", ylab = "Frequency",
           nclass = 20, col=cols[i])
      title(main=names(res)[i])
    }
    title(title_label, outer=T, line=-1)
    
    qq <- lapply(1:length(res), function(i) {
      qqplot(x=runif(sample_size*2,0,1), y=res[[i]]$pval, plot.it=F)
    })
    plot(qq[[1]]$x, qq[[1]]$y, col = cols[1], cex=.7, pch = 16,
         xlab = "Uniform(0,1)", ylab = "Empirical distribution",
         main = "QQ-plot")
    for (i in 2:n_methods) {
      points(qq[[i]]$x, qq[[i]]$y, col = cols[i], cex=.7, pch = 16)
    }
    abline(0,1, col = "black")
    title(title_label, outer=TRUE, line=-1)
  }
  
  if (return_res) {
  return(res)
  }
}
out.corr.50 <- plotRest(dir_dsc=dir_dsc, dscoutput=out, 
                         sim_case="sample_correlated", 
                         sample_size=50,
                         title_label="Correlated case: 50 vs 50", plot=T, return_res=T)

out.corr.300 <- plotRest(dir_dsc=dir_dsc, dscoutput=out, 
                         sim_case="sample_correlated", 
                         sample_size=300,
                         title_label="Correlated case: 300 vs 300", plot=T, return_res=T)

out.uncorr.50 <- plotRest(dir_dsc=dir_dsc, dscoutput=out, 
                         sim_case="sample_uncorrelated", 
                         sample_size=50,
                         title_label="Uncorrelated case: 50 vs 50", plot=T, return_res=T)

out.uncorr.300 <- plotRest(dir_dsc=dir_dsc, dscoutput=out, 
                         sim_case="sample_uncorrelated", 
                         sample_size=300,
                         title_label="Uncorrelated case: 300 vs 300", plot=T, return_res=T)

Session information

sessionInfo()
R version 3.4.1 (2017-06-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dscrutils_0.2.7.11

loaded via a namespace (and not attached):
 [1] workflowr_1.1.1    Rcpp_1.0.0         digest_0.6.18     
 [4] rprojroot_1.3-2    R.methodsS3_1.7.1  backports_1.1.2   
 [7] magrittr_1.5       git2r_0.23.0       evaluate_0.12     
[10] stringi_1.2.4      whisker_0.3-2      R.oo_1.22.0       
[13] R.utils_2.7.0      rmarkdown_1.10     RColorBrewer_1.1-2
[16] tools_3.4.1        stringr_1.3.1      yaml_2.2.0        
[19] compiler_3.4.1     htmltools_0.3.6    knitr_1.20        

This reproducible R Markdown analysis was created with workflowr 1.1.1