######################################
# #
# pQTL & Mediation Module #
# October 2, 2015 #
# Short Course on Systems Genetics #
# #
######################################
### First, we need to load in some R packages and our data
options(stringsAsFactors = F)
library(DOQTL)
## Loading required package: DBI
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
## intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unlist, unsplit
##
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Loading required package: GenomeInfoDb
## Loading required package: S4Vectors
## Loading required package: IRanges
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:GenomeInfoDb':
##
## species
library(devtools)
## Warning: package 'devtools' was built under R version 3.1.3
install_github("simecek/intermediate")
## Downloading GitHub repo simecek/intermediate@master
## Installing intermediate
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file \
## --no-environ --no-save --no-restore CMD INSTALL \
## '/private/var/folders/gw/rbl8hmgx6zx5w22hy6v56rd5zl1fd2/T/RtmpAWwudX/devtools928125f7741b/simecek-intermediate-f94eca4' \
## --library='/Users/scm/Library/R/3.1/library' --install-tests
install_github("kbroman/qtlcharts")
## Downloading GitHub repo kbroman/qtlcharts@master
## Installing qtlcharts
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file \
## --no-environ --no-save --no-restore CMD INSTALL \
## '/private/var/folders/gw/rbl8hmgx6zx5w22hy6v56rd5zl1fd2/T/RtmpAWwudX/devtools92812cc0a104/kbroman-qtlcharts-d0f05ab' \
## --library='/Users/scm/Library/R/3.1/library' --install-tests
library(intermediate)
library(qtlcharts)
load("~/DO192_DataforSysGenCourse.Rdata") ###Load in the dataset
### Setting covariates
X <- model.matrix(~Sex*Diet, covariates.protein.192)
colnames(X)[2] <- "sex" # DOQTL requirement
### SCAN for Tmem68 eQTL and pQTL
## 1) Scan for eQTL
my.gene = "Tmem68" ### Input your gene of interest
target.rna.index <- which(annotations.rna.192$Gene == my.gene)
annotations.rna.192[target.rna.index,] ### Show information for the gene of interest
## EnsemblID Gene Chr Start.Mbp End.Mbp
## ENSMUSG00000028232 ENSMUSG00000028232 Tmem68 4 3.549041 3.574853
## Gene.Biotype Strand Mean.per.Expr Nonzeros
## ENSMUSG00000028232 protein_coding -1 1.382734 192
scanone.rna <- scanone(expr.rna.192, pheno.col=target.rna.index, probs=probs.192, snps=snps.64K, addcovar=X[,-1])
## Mapping with 192 samples.
## [1] "ENSMUSG00000028232"
plot(scanone.rna)
# A little function to find the SNP maximizing LOD score (autosomes only)
argmax.lod <- function(scanone.fit)
scanone.fit$lod$A$SNP_ID[which.max(scanone.fit$lod$A$lod)[1]]
# Let's plot the founder coefficients for the autosome with max. LOD
argmax.snp.rna <- argmax.lod(scanone.rna)
coefplot(scanone.rna, chr=snps.64K[argmax.snp.rna,"Chr"])
## 2) Scan for pQTL
target.protein.index <- which(annotations.protein.192$Associated.Gene.Name == my.gene)
scanone.protein <- scanone(expr.protein.192, pheno.col=target.protein.index, probs=probs.192, snps=snps.64K, addcovar=X[,-1])
## Mapping with 192 samples.
## [1] "ENSMUSP00000029891"
plot(scanone.protein)
# effect plot for autosome with max. LOD
argmax.snp.protein <- argmax.lod(scanone.protein)
coefplot(scanone.protein, chr=snps.64K[argmax.snp.protein,"Chr"])
# Mediation Scan
#
##### target - numeric vector with gene/protein expression
##### mediator - matrix, each column is one gene/protein's expression
##### annotation - data.frame with mediator annotation, must include columns "chr" and "pos"
##### qtl.geno - matrix, haplotype probabilities at QTL we try to mediate
##### covar - additive covariates
##### method = c("ignore", "lod-diff", "double-lod-diff", "lod-ratio")
## 3) Mediation Scan - Condition distant pQTL on protein intermediates
y <- expr.protein.192[,target.protein.index]
geno.argmax.protein <- probs.192[,-1,argmax.snp.protein]
# trim annotation, calculate middle point
annot.protein <- annotations.protein.192[,c("Ensembl.Protein.ID", "Ensembl.Gene.ID", "Associated.Gene.Name")]
annot.protein$Chr <- annotations.protein.192$Chromosome.Name
annot.protein$Pos <- (annotations.protein.192$Gene.Start..bp. + annotations.protein.192$Gene.End..bp.)/2
med <- mediation.scan(target=y, mediator=expr.protein.192, annotation=annot.protein,
covar=X[,-1], qtl.geno=geno.argmax.protein,method="double-lod-diff")
## [1] 1000
## [1] 2000
## [1] 3000
## [1] 4000
## [1] 5000
## [1] 6000
## [1] 7000
## [1] 8000
kplot(med) #Interactive Plot
## Set screen size to height=700 x width=1000
plot(med)
identify(med)
## [1] Ensembl.Protein.ID Ensembl.Gene.ID Associated.Gene.Name
## [4] Chr Pos LOD
## <0 rows> (or 0-length row.names)
## 4) Mediation Scan - Condition distant pQTL on transcript intermediates
# trim annotation, calculate middle point
annot.rna <- annotations.rna.192[,c("EnsemblID", "Gene", "Chr")]
colnames(annot.rna) = c("Ensemble.Gene.ID","Associated.Gene.Name","Chr")
annot.rna$Pos <- (annotations.rna.192$Start.Mbp + annotations.rna.192$End.Mbp)/2
med <- mediation.scan(target=y, mediator=expr.rna.192, annotation=annot.rna,
covar=X[,-1], qtl.geno=geno.argmax.protein)
## [1] 1000
## [1] 2000
## [1] 3000
## [1] 4000
## [1] 5000
## [1] 6000
## [1] 7000
## [1] 8000
## [1] 9000
## [1] 10000
## [1] 11000
## [1] 12000
## [1] 13000
## [1] 14000
## [1] 15000
## [1] 16000
## [1] 17000
## [1] 18000
## [1] 19000
## [1] 20000
## [1] 21000
## [1] 22000
## [1] 23000
## [1] 24000
## [1] 25000
kplot(med) #Interactive Plot
plot(med)
identify(med)
## [1] Ensemble.Gene.ID Associated.Gene.Name Chr
## [4] Pos LOD
## <0 rows> (or 0-length row.names)
#### Other example proteins to scan
## Ndufaf1
## Mtr
## Cct7
## Glul
## Xrcc6
## Elp3
## Aven
## Klc4