Last updated: 2018-06-23
Code version: 635b68f
Extract data from the top 101 genes identified
library(Biobase)
df <- readRDS(file="../data/eset-final.rds")
pdata <- pData(df)
fdata <- fData(df)
# select endogeneous genes
counts <- exprs(df)[grep("ENSG", rownames(df)), ]
log2cpm.all <- t(log2(1+(10^6)*(t(counts)/pdata$molecules)))
#macosko <- readRDS("data/cellcycle-genes-previous-studies/rds/macosko-2015.rds")
counts <- counts[,order(pdata$theta)]
log2cpm.all <- log2cpm.all[,order(pdata$theta)]
pdata <- pdata[order(pdata$theta),]
log2cpm.quant <- readRDS("../output/npreg-trendfilter-quantile.Rmd/log2cpm.quant.rds")
# select external validation samples
set.seed(99)
nvalid <- round(ncol(log2cpm.quant)*.15)
ii.valid <- sample(1:ncol(log2cpm.quant), nvalid, replace = F)
ii.nonvalid <- setdiff(1:ncol(log2cpm.quant), ii.valid)
log2cpm.quant.nonvalid <- log2cpm.quant[,ii.nonvalid]
log2cpm.quant.valid <- log2cpm.quant[,ii.valid]
theta <- pdata$theta
names(theta) <- rownames(pdata)
# theta.nonvalid <- theta_moved[ii.nonvalid]
theta.nonvalid <- theta[ii.nonvalid]
theta.valid <- theta[ii.valid]
sig.genes <- readRDS("../output/npreg-trendfilter-quantile.Rmd/out.stats.ordered.sig.101.rds")
expr.sig <- log2cpm.quant.nonvalid[rownames(log2cpm.quant.nonvalid) %in% rownames(sig.genes), ]
# get predicted times
# set training samples
source("../peco/R/primes.R")
source("../peco/R/partitionSamples.R")
parts <- partitionSamples(1:ncol(log2cpm.quant.nonvalid), runs=5,
nsize.each = rep(151,5))
part_indices <- parts$partitions
Fitting
source("../peco/R/fit.trendfilter.generic.R")
source("../peco/R/cycle.npreg.R")
source("../code/utility.R")
expr.sig <- expr.sig[seq(1,nrow(expr.sig), by=2),]
fits.nw <- vector("list", 5)
for (run in 1:5) {
print(run)
# fitting training data
Y_train <- expr.sig[,part_indices[[run]]$train]
theta_train <- theta.nonvalid[part_indices[[run]]$train]
fit.train <- cycle.npreg.insample(Y = Y_train,
theta = theta_train,
ncores=10,
method.trend="npcirc.nw")
# fitting test data
Y_test <- expr.sig[,part_indices[[run]]$test]
theta_test <- theta.nonvalid[part_indices[[run]]$test]
fit.test <- cycle.npreg.outsample(Y_test=Y_test,
sigma_est=fit.train$sigma_est,
funs_est=fit.train$funs_est,
method.grid = "uniform",
method.trend="npcirc.nw",
ncores=12)
fits.nw[[run]] <- list(fit.train=fit.train,
fit.test=fit.test)
}
saveRDS(fits.nw, file = "../output/method-npreg-prelim-results.Rmd/fits.nw.rds")
fits.ll <- vector("list", 5)
for (run in 1:5) {
print(run)
# fitting training data
Y_train <- expr.sig[,part_indices[[run]]$train]
theta_train <- theta.nonvalid[part_indices[[run]]$train]
fit.train <- cycle.npreg.insample(Y = Y_train,
theta = theta_train,
ncores=10,
method.trend="npcirc.ll")
# fitting test data
Y_test <- expr.sig[,part_indices[[run]]$test]
theta_test <- theta.nonvalid[part_indices[[run]]$test]
fit.test <- cycle.npreg.outsample(Y_test=Y_test,
sigma_est=fit.train$sigma_est,
funs_est=fit.train$funs_est,
method.grid = "uniform",
method.trend="npcirc.ll",
ncores=12)
fits.ll[[run]] <- list(fit.train=fit.train,
fit.test=fit.test)
}
saveRDS(fits.ll, file = "../output/method-npreg-prelim-results.Rmd/fits.ll.rds")
fits.trend2 <- vector("list", 5)
for (run in 1:5) {
print(run)
# fitting training data
Y_train <- expr.sig[,part_indices[[run]]$train]
theta_train <- theta.nonvalid[part_indices[[run]]$train]
fit.train <- cycle.npreg.insample(Y = Y_train,
theta = theta_train,
polyorder=2,
ncores=10,
method.trend="trendfilter")
# fitting test data
Y_test <- expr.sig[,part_indices[[run]]$test]
theta_test <- theta.nonvalid[part_indices[[run]]$test]
fit.test <- cycle.npreg.outsample(Y_test=Y_test,
sigma_est=fit.train$sigma_est,
funs_est=fit.train$funs_est,
method.grid = "uniform",
method.trend="trendfilter",
polyorder=2,
ncores=12)
fits.trend2[[run]] <- list(fit.train=fit.train,
fit.test=fit.test)
}
saveRDS(fits.trend2, file = "../output/method-npreg-prelim-results.Rmd/fits.trend2.rds")
fits.trend3 <- vector("list", 5)
for (run in 1:5) {
print(run)
# fitting training data
Y_train <- expr.sig[,part_indices[[run]]$train]
theta_train <- theta.nonvalid[part_indices[[run]]$train]
fit.train <- cycle.npreg.insample(Y = Y_train,
theta = theta_train,
polyorder=3,
ncores=10,
method.trend="trendfilter")
# fitting test data
Y_test <- expr.sig[,part_indices[[run]]$test]
theta_test <- theta.nonvalid[part_indices[[run]]$test]
fit.test <- cycle.npreg.outsample(Y_test=Y_test,
sigma_est=fit.train$sigma_est,
funs_est=fit.train$funs_est,
method.grid = "uniform",
method.trend="trendfilter",
ncores=12)
fits.trend3[[run]] <- list(fit.train=fit.train,
fit.test=fit.test)
}
saveRDS(fits.trend3, file = "../output/method-npreg-prelim-results.Rmd/fits.trend3.rds")
load results
fits.nw <- readRDS(file = "../output/method-npreg-prelim-results.Rmd/fits.nw.rds")
fits.ll <- readRDS(file = "../output/method-npreg-prelim-results.Rmd/fits.ll.rds")
fits.trend2 <- readRDS(file = "../output/method-npreg-prelim-results.Rmd/fits.trend2.rds")
fits.trend3 <- readRDS(file = "../output/method-npreg-prelim-results.Rmd/fits.trend3.rds")
run=1
Y_test <- expr.sig[,part_indices[[run]]$test]
theta_test <- theta.nonvalid[part_indices[[run]]$test]
time_nw <- fits.nw[[1]]$fit.test$cell_times_est[match(names(theta_test),
names(fits.nw[[1]]$fit.test$cell_times_est))]
time_ll <- fits.ll[[1]]$fit.test$cell_times_est[match(names(theta_test),
names(fits.ll[[1]]$fit.test$cell_times_est))]
time_trend2 <- fits.trend2[[1]]$fit.test$cell_times_est[match(names(theta_test),
names(fits.trend2[[1]]$fit.test$cell_times_est))]
time_trend3 <- fits.trend3[[1]]$fit.test$cell_times_est[match(names(theta_test),
names(fits.trend3[[1]]$fit.test$cell_times_est))]
par(mfrow=c(2,2))
plot(theta_test, time_nw)
plot(theta_test, time_ll)
plot(theta_test, time_trend2)
plot(theta_test, time_trend3)
Misclassification rate
xy_time <- lapply(1:5, function(run) {
xy <- data.frame(
ref_time=theta.nonvalid[part_indices[[run]]$test],
pred_time_nw=fits.nw[[run]]$fit.test$cell_times_est[
match(names(theta.nonvalid[part_indices[[run]]$test]),
names(fits.nw[[run]]$fit.test$cell_times_est))],
pred_time_ll=fits.ll[[run]]$fit.test$cell_times_est[
match(names(theta.nonvalid[part_indices[[run]]$test]),
names(fits.ll[[run]]$fit.test$cell_times_est))],
pred_time_trend2=fits.trend2[[run]]$fit.test$cell_times_est[
match(names(theta.nonvalid[part_indices[[run]]$test]),
names(fits.trend2[[run]]$fit.test$cell_times_est))],
pred_time_trend3=fits.trend3[[run]]$fit.test$cell_times_est[
match(names(theta.nonvalid[part_indices[[run]]$test]),
names(fits.trend3[[run]]$fit.test$cell_times_est))],
dapi=pdata$gfp.median.log10sum.adjust[match(names(theta.nonvalid[part_indices[[run]]$test]),
rownames(pdata))])
return(xy)
})
for (i in 1:5) {
xy_time[[i]]$diff_time_nw <- pmin(
abs(xy_time[[i]]$pred_time_nw-xy_time[[i]]$ref_time),
abs(xy_time[[i]]$pred_time_nw-(2*pi-xy_time[[i]]$ref_time)))
xy_time[[i]]$diff_time_ll <- pmin(
abs(xy_time[[i]]$pred_time_ll-xy_time[[i]]$ref_time),
abs(xy_time[[i]]$pred_time_ll-(2*pi-xy_time[[i]]$ref_time)))
xy_time[[i]]$diff_time_trend2 <- pmin(
abs(xy_time[[i]]$pred_time_trend2-xy_time[[i]]$ref_time),
abs(xy_time[[i]]$pred_time_trend2-(2*pi-xy_time[[i]]$ref_time)))
xy_time[[i]]$diff_time_trend3 <- pmin(
abs(xy_time[[i]]$pred_time_trend3-xy_time[[i]]$ref_time),
abs(xy_time[[i]]$pred_time_trend3-(2*pi-xy_time[[i]]$ref_time)))
}
mean(sapply(xy_time, function(x) mean(x$diff_time_nw))/2/pi)
[1] 0.1041805
mean(sapply(xy_time, function(x) mean(x$diff_time_ll))/2/pi)
[1] 0.1028109
mean(sapply(xy_time, function(x) mean(x$diff_time_trend2))/2/pi)
[1] 0.1077922
mean(sapply(xy_time, function(x) mean(x$diff_time_trend3))/2/pi)
[1] 0.1082553
sd(sapply(xy_time, function(x) mean(x$diff_time_nw))/2/pi)
[1] 0.00582703
sd(sapply(xy_time, function(x) mean(x$diff_time_ll))/2/pi)
[1] 0.007288811
sd(sapply(xy_time, function(x) mean(x$diff_time_trend2))/2/pi)
[1] 0.007913356
sd(sapply(xy_time, function(x) mean(x$diff_time_trend3))/2/pi)
[1] 0.006872838
Circular rank correlation
source("../peco/R/cycle.corr.R")
corrs.rank <- lapply(1:5, function(i) {
data.frame(cbind(nw=rFLRank.IndTestRand(xy_time[[i]]$ref_time, xy_time[[i]]$pred_time_nw),
ll=rFLRank.IndTestRand(xy_time[[i]]$ref_time, xy_time[[i]]$pred_time_ll),
trend2=rFLRank.IndTestRand(xy_time[[i]]$ref_time, xy_time[[i]]$pred_time_trend2),
trend3=rFLRank.IndTestRand(xy_time[[i]]$ref_time, xy_time[[i]]$pred_time_trend3)) )
})
mean(sapply(1:5, function(i) corrs.rank[[i]]$nw[1]))
[1] 0.1837925
mean(sapply(1:5, function(i) corrs.rank[[i]]$ll[1]))
[1] 0.1682122
mean(sapply(1:5, function(i) corrs.rank[[i]]$trend2[1]))
[1] 0.1900646
mean(sapply(1:5, function(i) corrs.rank[[i]]$trend3[1]))
[1] 0.1832577
sd(sapply(1:5, function(i) corrs.rank[[i]]$nw[1]))
[1] 0.02875289
sd(sapply(1:5, function(i) corrs.rank[[i]]$ll[1]))
[1] 0.02452356
sd(sapply(1:5, function(i) corrs.rank[[i]]$trend2[1]))
[1] 0.02569556
sd(sapply(1:5, function(i) corrs.rank[[i]]$trend3[1]))
[1] 0.04631589
PVE
source("../peco/R/utility.R")
nw <- sapply(1:5, function(i) get.pve(with(xy_time[[i]],dapi[order(pred_time_nw)])))
ll <- sapply(1:5, function(i) get.pve(with(xy_time[[i]],dapi[order(pred_time_ll)])))
trend2 <- sapply(1:5, function(i) get.pve(with(xy_time[[i]],dapi[order(pred_time_trend2)])))
trend3 <- sapply(1:5, function(i) get.pve(with(xy_time[[i]],dapi[order(pred_time_trend3)])))
save(nw, ll, trend2, trend3,
file="../output/method-npreg-prelim-results.Rmd/pve.methods.rda")
load(file="../output/method-npreg-prelim-results.Rmd/pve.methods.rda")
mean(nw)
[1] 0.1209519
mean(ll)
[1] 0.1537196
mean(trend2)
[1] 0.2085997
mean(trend3)
[1] 0.1370636
sd(nw)
[1] 0.1234897
sd(ll)
[1] 0.1032134
sd(trend2)
[1] 0.09199753
sd(trend3)
[1] 0.1128983
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Scientific Linux 7.4 (Nitrogen)
Matrix products: default
BLAS/LAPACK: /software/openblas-0.2.19-el7-x86_64/lib/libopenblas_haswellp-r0.2.19.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] Biobase_2.38.0 BiocGenerics_0.24.0
loaded via a namespace (and not attached):
[1] Rcpp_0.12.17 digest_0.6.15 rprojroot_1.3-2 backports_1.1.2
[5] git2r_0.21.0 magrittr_1.5 evaluate_0.10.1 stringi_1.1.6
[9] rmarkdown_1.8 tools_3.4.3 stringr_1.2.0 yaml_2.1.16
[13] compiler_3.4.3 htmltools_0.3.6 knitr_1.18
This R Markdown site was created with workflowr