Last updated: 2018-07-16

Code version: b451a0b


Getting annotations

topgenes_double <- readRDS(file="../data/results/ind_results_topgenes.rds")
topgenes_triple <- readRDS(file="../data/results/ind_results_topgenes_triple.rds")

all_genes <- topgenes_double[length(topgenes_double)][[1]]

library(biomaRt)
ensembl <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
symbols <- getBM(attributes = c("hgnc_symbol",'ensembl_gene_id'), 
      filters = c('ensembl_gene_id'),
      values = all_genes, 
      mart = ensembl)


genes_symbols_double <- lapply(1:length(topgenes_double), function(i) {
  ll <- topgenes_double[i][[1]]
  #symbols[match(ll,symbols$ensembl_gene_id),]
  symbs <- symbols[which(symbols$ensembl_gene_id %in% ll),]
  non_symbs <- ll[which(!(ll %in% symbols$ensembl_gene_id))]
  df_non_symbs <- data.frame(hgnc_symbol=NA,
                             ensembl_gene_id=non_symbs)  
  out <- rbind(symbs, df_non_symbs)
  out <- out[match(ll,out$ensembl_gene_id),]
  return(out)
})
names(genes_symbols_double) <- names(topgenes_double)
saveRDS(genes_symbols_double, 
        "../output/method-train-ind-genes.Rmd/genes_symbols_double.rds")


genes_symbols_triple <- lapply(1:length(topgenes_triple), function(i) {
#  print(i)
  ll <- topgenes_triple[i][[1]]
  #symbols[match(ll,symbols$ensembl_gene_id),]
  symbs <- symbols[which(symbols$ensembl_gene_id %in% ll),]
  non_symbs <- ll[which(!(ll %in% symbols$ensembl_gene_id))]
  if (length(non_symbs)==0) {
    out <- symbs
    out <- out[match(ll,out$ensembl_gene_id),]
    return(out)    
  }
  if (length(non_symbs)>0) {
    df_non_symbs <- data.frame(hgnc_symbol=NA,
                               ensembl_gene_id=non_symbs)  
    out <- rbind(symbs, df_non_symbs)
    out <- out[match(ll,out$ensembl_gene_id),]
    return(out)
  }
})
names(genes_symbols_triple) <- names(topgenes_triple)
saveRDS(genes_symbols_triple, 
        "../output/method-train-ind-genes.Rmd/genes_symbols_triple.rds")

Save out output to table.

topgenes_double <- readRDS(file="../data/results/ind_results_topgenes.rds")
topgenes_triple <- readRDS(file="../data/results/ind_results_topgenes_triple.rds")
symbols_double <- readRDS("../output/method-train-ind-genes.Rmd/genes_symbols_double.rds")
symbols_triple <- readRDS("../output/method-train-ind-genes.Rmd/genes_symbols_triple.rds")


write.table(symbols_double[[11]]$hgnc_symbol,
            file = "../output/method-train-ind-genes.Rmd/topgenes100_double.txt",
            row.names=F,
            col.names=F, quote=F)

write.table(symbols_triple[[11]]$hgnc_symbol,
            file = "../output/method-train-ind-genes.Rmd/topgenes100_triple.txt",
            row.names=F,
            col.names=F, quote=F)

Session information

sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Scientific Linux 7.4 (Nitrogen)

Matrix products: default
BLAS/LAPACK: /software/openblas-0.2.19-el7-x86_64/lib/libopenblas_haswellp-r0.2.19.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] compiler_3.4.3  backports_1.1.2 magrittr_1.5    rprojroot_1.3-2
 [5] tools_3.4.3     htmltools_0.3.6 yaml_2.1.16     Rcpp_0.12.17   
 [9] stringi_1.1.6   rmarkdown_1.10  knitr_1.20      git2r_0.21.0   
[13] stringr_1.2.0   digest_0.6.15   evaluate_0.10.1

This R Markdown site was created with workflowr