library(dplyr)
library(DT)
options(stringsAsFactors = FALSE)
Compound mappings
# construct a rxnorm to drugbank mapping (through FDA-SRS UNII)
rxnorm.df <- dplyr::inner_join(
file.path('data', 'rxcui-unii-map.tsv') %>%
read.delim(),
'http://git.dhimmel.com/drugbank/data/mapping/fdasrs.tsv' %>%
read.delim()
)
## Joining by: "fdasrs_id"
Disease mappings
# read disease ontology slim mappings
domap.df <-
'http://git.dhimmel.com/disease-ontology/data/xrefs-prop-slim.tsv' %>%
read.delim()
# extract the DO to UMLS mapping
umls.df <- domap.df %>%
dplyr::filter(resource == 'UMLS') %>%
dplyr::select(-resource) %>%
dplyr::rename(disease_cui = resource_id)
# extract the DO to ICD9 mapping
icd9.df <- domap.df %>%
dplyr::filter(resource == 'ICD9') %>%
dplyr::select(-resource) %>%
dplyr::rename(disease_icd9 = resource_id)
# extract the DO to OMIM mapping
omim.df <- domap.df %>%
dplyr::filter(resource == 'OMIM') %>%
dplyr::select(-resource) %>%
dplyr::rename(disease_omim = resource_id) %>%
dplyr::mutate(disease_omim = as.integer(disease_omim))
Read and map labeledin
labin.df <-
# read labeledin data
file.path('labeledin', 'data', 'indications.tsv') %>%
read.delim() %>%
# remove combo drugs
dplyr::mutate(rxnorm_id = as.integer(rxnorm_id)) %>%
dplyr::filter(! is.na(rxnorm_id)) %>%
# map umls diseases to DO
dplyr::inner_join(umls.df) %>%
# map rxnorm compounds to drugbank
dplyr::inner_join(rxnorm.df)
## Warning in mutate_impl(.data, dots): NAs introduced by coercion
## Joining by: "disease_cui"
## Joining by: "rxnorm_id"
Read and map MEDI
medi.df <-
file.path('medi', 'data', 'medi-umls.tsv') %>%
read.delim() %>%
dplyr::inner_join(rxnorm.df)
## Joining by: "rxnorm_id"
medi.df <- dplyr::bind_rows(
umls.df %>%
dplyr::inner_join(medi.df),
icd9.df %>%
dplyr::inner_join(medi.df)
)
## Joining by: "disease_cui"
## Joining by: "disease_icd9"
Read and map PREDICT
predict.df <-
file.path('msb-predict', 'data', 'indications-umls.tsv') %>%
read.delim() %>%
dplyr::rename(disease_cui = umls_cui, disease_omim = omim_id)
predict.df <- dplyr::bind_rows(
umls.df %>%
dplyr::inner_join(predict.df),
omim.df %>%
dplyr::inner_join(predict.df)
)
## Joining by: "disease_cui"
## Joining by: "disease_omim"
Join resources
indication.df <- dplyr::bind_rows(
# LabeledIn
labin.df %>%
dplyr::select(doid_code, drugbank_id) %>%
dplyr::distinct() %>%
dplyr::mutate(resource = 'labeledin'),
# MEDI
medi.df %>%
dplyr::group_by(doid_code, drugbank_id) %>%
dplyr::summarize(
resource = ifelse(max(hps), 'medi_hps', 'medi_lps')
) %>%
dplyr::ungroup(),
# PREDICT
predict.df %>%
dplyr::select(doid_code, drugbank_id) %>%
dplyr::distinct() %>%
dplyr::mutate(resource = 'predict')
)
# indications in gold standard
indication.df %>%
dplyr::filter(resource != 'medi_lps') %>%
dplyr::select(doid_code, drugbank_id) %>%
dplyr::distinct() %>%
nrow()
## [1] 1187
Join resources
drugbank.df <- 'http://git.dhimmel.com/drugbank/data/drugbank.tsv' %>%
read.delim() %>%
dplyr::transmute(drugbank_id, drugbank_name = name)
do.df <- 'http://git.dhimmel.com/disease-ontology/data/slim-terms.tsv' %>%
read.delim() %>%
dplyr::transmute(doid_code = doid, doid_name = name)
indication.df <- indication.df %>%
dplyr::left_join(drugbank.df) %>%
dplyr::left_join(do.df)
## Joining by: "drugbank_id"
## Joining by: "doid_code"
path <- file.path('data', 'indications.tsv')
write.table(indication.df, path, sep='\t', row.names=FALSE, quote=FALSE)
DT::datatable(indication.df %>% dplyr::filter(resource != 'medi_lps'))