Last updated: 2017-11-29
Code version: ee0de70
Previously, we saw a wide range of variability across plates in DAPI intensities, and to a lesser extent, in Green and Red intensities. Here we look at variation between individuals and see that there’s significantly smaller variation between individuals in all three measurements. In addition, in the across-plate results, the DAPI distributions have similar shape across plates and a possible mean-shift between distributions. While, the shape of the green/red distributions are not consistent across plates, possibly reflecting differences in proprotion of samples expressing red/green fluorescent proteins.
For normalization approach, we can use mean correction approaches for DAPI. While, for Green/Red, it’s less obvious what would be a good approach. Let me try qsmooth
and see….
library(data.table)
library(dplyr)
library(ggplot2)
library(cowplot)
library(wesanderson)
library(RColorBrewer)
library(Biobase)
Name all plates.
plates <- c("18511_18855","18855_19101","18855_19160","18870_18511",
"18870_18855","18870_19101","18870_19160","19098_18511",
"19098_18870","19098_19160","19101_18511","19101_19098",
"19160_18870","19101_19160","19160_18511", "18855_19098")
Combine intensity stats from different plates.
# make the negative ones be the samllest one within its own plate
ints <- do.call(rbind, lapply(1:length(plates), function(index) {
tmp <- readRDS(paste0("/project2/gilad/fucci-seq/intensities_stats/",plates[index],".stats.rds"))
tmp <- data.frame(plate=plates[index],
well=as.character(droplevels(tmp$wellID)),
rfp.sum.zoom.mean.log10=log10(tmp$rfp.sum.zoom.mean),
gfp.sum.zoom.mean.log10=log10(tmp$gfp.sum.zoom.mean),
dapi.sum.zoom.mean.log10=log10(tmp$dapi.sum.zoom.mean))
tmp$rfp.sum.zoom.mean.log10[which(tmp$rfp.sum.zoom.mean.log10 == "NaN")] <- min(tmp$rfp.sum.zoom.mean.log10, na.rm=TRUE)
tmp$gfp.sum.zoom.mean.log10[which(tmp$gfp.sum.zoom.mean.log10 == "NaN")] <- min(tmp$gfp.sum.zoom.mean.log10, na.rm=TRUE)
tmp$dapi.sum.zoom.mean.log10[which(tmp$dapi.sum.zoom.mean.log10 == "NaN")] <- min(tmp$dapi.sum.zoom.mean.log10, na.rm=TRUE)
return(tmp)
}) )
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
Warning in data.frame(plate = plates[index], well =
as.character(droplevels(tmp$wellID)), : NaNs produced
ints <- ints %>% mutate(dapi_4quant=ntile(dapi.sum.zoom.mean.log10,4),
dapi_3quant=ntile(dapi.sum.zoom.mean.log10,3))
saveRDS(ints, file = "/project2/gilad/joycehsiao/fucci-seq/output/ints.long.rds")
Load the expression set info.
eset_fls <- list.files("../data/eset", full.names=TRUE)
anno <- do.call(rbind, lapply(1:length(eset_fls), function(index) {
eset_index <- readRDS(eset_fls[[index]])
pdata_index <- pData(eset_index)
return(pdata_index)
}))
# make unique id in both
ints$unique <- paste0(ints$plate,"_",as.numeric(ints$well))
anno$unique <- paste0(anno$image_individual,"_",anno$image_label)
subset_index1 <- which(anno$unique %in% ints$unique)
anno_subset <- anno[subset_index1,]
subset_index2 <- match(anno_subset$unique, ints$unique)
ints_tmp <- ints[subset_index2,]
all.equal(ints_tmp$unique, anno_subset$unique)
[1] TRUE
ints_tmp$chip_id <- anno_subset$chip_id
# compute plate specific DAPI quantiles
ints_tmp2 <- ints_tmp %>% group_by(plate) %>% mutate(dapi_4quant_plate=ntile(dapi.sum.zoom.mean.log10,4),
dapi_3quant_plate=ntile(dapi.sum.zoom.mean.log10,3))
ggplot(ints_tmp2, aes(x=gfp.sum.zoom.mean.log10,col = as.factor(plate))) +
geom_density(alpha = .5, cex = .7) +
labs(title = "Green (log10 pixel sum) by plate",
x="Green channel log10 pixel sum", y = "Density") + theme_gray()
ggplot(ints_tmp2, aes(x=rfp.sum.zoom.mean.log10,col = as.factor(plate))) +
geom_density(alpha = .5, cex = .7) +
labs(title = "Red (log10 pixel sum) by plate",
x="Red channel log10 pixel sum", y = "Density") + theme_gray()
ggplot(ints_tmp2, aes(x=dapi.sum.zoom.mean.log10,col = as.factor(plate))) +
geom_density(alpha = .5, cex = .7) +
labs(title = "DAPI (log10 pixel sum) by plate",
x="DAPI channel log10 pixel sum", y = "Density") + theme_gray()
ggplot(ints_tmp2, aes(x=gfp.sum.zoom.mean.log10,col = as.factor(chip_id))) +
geom_density(alpha = .5, cex = .7) +
labs(title = "Green (log10 pixel sum) by individual",
x="Green channel log10 pixel sum", y = "Density") + theme_gray()
ggplot(ints_tmp2, aes(x=rfp.sum.zoom.mean.log10,col = as.factor(chip_id))) +
geom_density(alpha = .5, cex = .7) +
labs(title = "Red (log10 pixel sum) by individual",
x="Red channel log10 pixel sum", y = "Density") + theme_gray()
ggplot(ints_tmp2, aes(x=dapi.sum.zoom.mean.log10,col = as.factor(chip_id))) +
geom_density(alpha = .5, cex = .7) +
labs(title = "DAPI (log10 pixel sum) by individual",
x="DAPI channel log10 pixel sum", y = "Density") + theme_gray()
R version 3.4.1 (2017-06-30)
Platform: x86_64-redhat-linux-gnu (64-bit)
Running under: Scientific Linux 7.2 (Nitrogen)
Matrix products: default
BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] Biobase_2.38.0 BiocGenerics_0.24.0 RColorBrewer_1.1-2
[4] wesanderson_0.3.4 cowplot_0.9.1 ggplot2_2.2.1
[7] dplyr_0.7.4 data.table_1.10.4-3
loaded via a namespace (and not attached):
[1] Rcpp_0.12.14 knitr_1.17 bindr_0.1 magrittr_1.5
[5] munsell_0.4.3 colorspace_1.3-2 R6_2.2.2 rlang_0.1.4
[9] plyr_1.8.4 stringr_1.2.0 tools_3.4.1 grid_3.4.1
[13] gtable_0.2.0 git2r_0.19.0 htmltools_0.3.6 lazyeval_0.2.1
[17] yaml_2.1.14 rprojroot_1.2 digest_0.6.12 assertthat_0.2.0
[21] tibble_1.3.4 bindrcpp_0.2 glue_1.2.0 evaluate_0.10.1
[25] rmarkdown_1.8 labeling_0.3 stringi_1.1.6 compiler_3.4.1
[29] scales_0.5.0 backports_1.1.1 pkgconfig_2.0.1
This R Markdown site was created with workflowr