######################################
#                                    #
#     pQTL & Mediation Module        #
#        October 2, 2015             #
#  Short Course on Systems Genetics  #
#                                    #
######################################



### First, we need to load in some R packages and our data
options(stringsAsFactors = F)
library(DOQTL)
## Loading required package: DBI
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist, unsplit
## 
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: GenomeInfoDb
## Loading required package: S4Vectors
## Loading required package: IRanges
## 
## Attaching package: 'AnnotationDbi'
## 
## The following object is masked from 'package:GenomeInfoDb':
## 
##     species
library(devtools)
## Warning: package 'devtools' was built under R version 3.1.3
install_github("simecek/intermediate")
## Downloading GitHub repo simecek/intermediate@master
## Installing intermediate
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore CMD INSTALL  \
##   '/private/var/folders/gw/rbl8hmgx6zx5w22hy6v56rd5zl1fd2/T/RtmpAWwudX/devtools928125f7741b/simecek-intermediate-f94eca4'  \
##   --library='/Users/scm/Library/R/3.1/library' --install-tests
install_github("kbroman/qtlcharts")
## Downloading GitHub repo kbroman/qtlcharts@master
## Installing qtlcharts
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore CMD INSTALL  \
##   '/private/var/folders/gw/rbl8hmgx6zx5w22hy6v56rd5zl1fd2/T/RtmpAWwudX/devtools92812cc0a104/kbroman-qtlcharts-d0f05ab'  \
##   --library='/Users/scm/Library/R/3.1/library' --install-tests
library(intermediate)
library(qtlcharts)


load("~/DO192_DataforSysGenCourse.Rdata")  ###Load in the dataset


### Setting covariates
X <- model.matrix(~Sex*Diet, covariates.protein.192)
colnames(X)[2] <- "sex" # DOQTL requirement


### SCAN for Tmem68 eQTL and pQTL

## 1) Scan for eQTL
my.gene = "Tmem68"  ### Input your gene of interest
target.rna.index <- which(annotations.rna.192$Gene == my.gene)
annotations.rna.192[target.rna.index,]  ### Show information for the gene of interest
##                             EnsemblID   Gene Chr Start.Mbp  End.Mbp
## ENSMUSG00000028232 ENSMUSG00000028232 Tmem68   4  3.549041 3.574853
##                      Gene.Biotype Strand Mean.per.Expr Nonzeros
## ENSMUSG00000028232 protein_coding     -1      1.382734      192
scanone.rna <- scanone(expr.rna.192, pheno.col=target.rna.index, probs=probs.192, snps=snps.64K, addcovar=X[,-1])
## Mapping with 192 samples.
## [1] "ENSMUSG00000028232"
plot(scanone.rna)

# A little function to find the SNP maximizing LOD score (autosomes only) 
argmax.lod <- function(scanone.fit)
  scanone.fit$lod$A$SNP_ID[which.max(scanone.fit$lod$A$lod)[1]]


# Let's plot the founder coefficients for the autosome with max. LOD
argmax.snp.rna <- argmax.lod(scanone.rna)
coefplot(scanone.rna, chr=snps.64K[argmax.snp.rna,"Chr"])

## 2) Scan for pQTL
target.protein.index <- which(annotations.protein.192$Associated.Gene.Name == my.gene)
scanone.protein <- scanone(expr.protein.192, pheno.col=target.protein.index, probs=probs.192, snps=snps.64K, addcovar=X[,-1])
## Mapping with 192 samples.
## [1] "ENSMUSP00000029891"
plot(scanone.protein)

# effect plot for autosome with max. LOD
argmax.snp.protein <- argmax.lod(scanone.protein)
coefplot(scanone.protein, chr=snps.64K[argmax.snp.protein,"Chr"])

# Mediation Scan
# 
##### target - numeric vector with gene/protein expression
##### mediator - matrix, each column is one gene/protein's expression
##### annotation - data.frame with mediator annotation, must include columns "chr" and "pos"
##### qtl.geno - matrix, haplotype probabilities at QTL we try to mediate
##### covar - additive covariates
##### method = c("ignore", "lod-diff", "double-lod-diff", "lod-ratio")


## 3) Mediation Scan - Condition distant pQTL on protein intermediates
y <- expr.protein.192[,target.protein.index]
geno.argmax.protein <- probs.192[,-1,argmax.snp.protein]

# trim annotation, calculate middle point
annot.protein <- annotations.protein.192[,c("Ensembl.Protein.ID", "Ensembl.Gene.ID", "Associated.Gene.Name")]
annot.protein$Chr <- annotations.protein.192$Chromosome.Name
annot.protein$Pos <- (annotations.protein.192$Gene.Start..bp. + annotations.protein.192$Gene.End..bp.)/2

med <- mediation.scan(target=y, mediator=expr.protein.192, annotation=annot.protein, 
                            covar=X[,-1], qtl.geno=geno.argmax.protein,method="double-lod-diff")
## [1] 1000
## [1] 2000
## [1] 3000
## [1] 4000
## [1] 5000
## [1] 6000
## [1] 7000
## [1] 8000
kplot(med) #Interactive Plot
## Set screen size to height=700 x width=1000

plot(med)
identify(med)

## [1] Ensembl.Protein.ID   Ensembl.Gene.ID      Associated.Gene.Name
## [4] Chr                  Pos                  LOD                 
## <0 rows> (or 0-length row.names)
## 4) Mediation Scan - Condition distant pQTL on transcript intermediates

# trim annotation, calculate middle point
annot.rna <- annotations.rna.192[,c("EnsemblID", "Gene", "Chr")]
colnames(annot.rna) = c("Ensemble.Gene.ID","Associated.Gene.Name","Chr")
annot.rna$Pos <- (annotations.rna.192$Start.Mbp + annotations.rna.192$End.Mbp)/2

med <- mediation.scan(target=y, mediator=expr.rna.192, annotation=annot.rna, 
                            covar=X[,-1], qtl.geno=geno.argmax.protein)
## [1] 1000
## [1] 2000
## [1] 3000
## [1] 4000
## [1] 5000
## [1] 6000
## [1] 7000
## [1] 8000
## [1] 9000
## [1] 10000
## [1] 11000
## [1] 12000
## [1] 13000
## [1] 14000
## [1] 15000
## [1] 16000
## [1] 17000
## [1] 18000
## [1] 19000
## [1] 20000
## [1] 21000
## [1] 22000
## [1] 23000
## [1] 24000
## [1] 25000
kplot(med)  #Interactive Plot

plot(med)
identify(med)

## [1] Ensemble.Gene.ID     Associated.Gene.Name Chr                 
## [4] Pos                  LOD                 
## <0 rows> (or 0-length row.names)
#### Other example proteins to scan

##  Ndufaf1
##  Mtr
##  Cct7
##  Glul
##  Xrcc6
##  Elp3
##  Aven
##  Klc4