library(dplyr)
library(DT)

options(stringsAsFactors = FALSE)

Compound mappings

# construct a rxnorm to drugbank mapping (through FDA-SRS UNII)
rxnorm.df <- dplyr::inner_join(
  file.path('data', 'rxcui-unii-map.tsv') %>%
    read.delim(),
  'http://git.dhimmel.com/drugbank/data/mapping/fdasrs.tsv' %>%
    read.delim()
)
## Joining by: "fdasrs_id"

Disease mappings

# read disease ontology slim mappings
domap.df <- 
  'http://git.dhimmel.com/disease-ontology/data/xrefs-prop-slim.tsv' %>%
   read.delim()

# extract the DO to UMLS mapping
umls.df <- domap.df %>%
  dplyr::filter(resource == 'UMLS') %>%
  dplyr::select(-resource) %>%
  dplyr::rename(disease_cui = resource_id)

# extract the DO to ICD9 mapping
icd9.df <- domap.df %>%
  dplyr::filter(resource == 'ICD9') %>%
  dplyr::select(-resource) %>%
  dplyr::rename(disease_icd9 = resource_id)

# extract the DO to OMIM mapping
omim.df <- domap.df %>%
  dplyr::filter(resource == 'OMIM') %>%
  dplyr::select(-resource) %>%
  dplyr::rename(disease_omim = resource_id) %>%
  dplyr::mutate(disease_omim = as.integer(disease_omim))

Read and map labeledin

labin.df <-
  # read labeledin data
  file.path('labeledin', 'data', 'indications.tsv') %>%
  read.delim() %>%
  # remove combo drugs
  dplyr::mutate(rxnorm_id = as.integer(rxnorm_id)) %>%
  dplyr::filter(! is.na(rxnorm_id)) %>%
  # map umls diseases to DO
  dplyr::inner_join(umls.df) %>%
  # map rxnorm compounds to drugbank
  dplyr::inner_join(rxnorm.df)
## Warning in mutate_impl(.data, dots): NAs introduced by coercion
## Joining by: "disease_cui"
## Joining by: "rxnorm_id"

Read and map MEDI

medi.df <- 
  file.path('medi', 'data', 'medi-umls.tsv') %>%
  read.delim() %>%
  dplyr::inner_join(rxnorm.df)
## Joining by: "rxnorm_id"
medi.df <- dplyr::bind_rows(
  umls.df %>%
    dplyr::inner_join(medi.df),
  icd9.df %>%
    dplyr::inner_join(medi.df)
)
## Joining by: "disease_cui"
## Joining by: "disease_icd9"

Read and map PREDICT

predict.df <- 
  file.path('msb-predict', 'data', 'indications-umls.tsv') %>%
  read.delim() %>%
  dplyr::rename(disease_cui = umls_cui, disease_omim = omim_id)

predict.df <- dplyr::bind_rows(
  umls.df %>%
    dplyr::inner_join(predict.df),
  omim.df %>%
    dplyr::inner_join(predict.df)
)
## Joining by: "disease_cui"
## Joining by: "disease_omim"

Join resources

indication.df <- dplyr::bind_rows(
  # LabeledIn
  labin.df %>%
    dplyr::select(doid_code, drugbank_id) %>%
    dplyr::distinct() %>%
    dplyr::mutate(resource = 'labeledin'),
  # MEDI
  medi.df %>%
    dplyr::group_by(doid_code, drugbank_id) %>%
    dplyr::summarize(
      resource = ifelse(max(hps), 'medi_hps', 'medi_lps')
    ) %>%
    dplyr::ungroup(),
  # PREDICT
  predict.df %>%
    dplyr::select(doid_code, drugbank_id) %>%
    dplyr::distinct() %>%
    dplyr::mutate(resource = 'predict')
)

# indications in gold standard
indication.df %>%
  dplyr::filter(resource != 'medi_lps') %>%
  dplyr::select(doid_code, drugbank_id) %>%
  dplyr::distinct() %>%
  nrow()
## [1] 1187

Join resources

drugbank.df <- 'http://git.dhimmel.com/drugbank/data/drugbank.tsv' %>%
  read.delim() %>%
  dplyr::transmute(drugbank_id, drugbank_name = name)

do.df <- 'http://git.dhimmel.com/disease-ontology/data/slim-terms.tsv' %>%
  read.delim() %>%
  dplyr::transmute(doid_code = doid, doid_name = name)

indication.df <- indication.df %>%
  dplyr::left_join(drugbank.df) %>%
  dplyr::left_join(do.df)
## Joining by: "drugbank_id"
## Joining by: "doid_code"
path <- file.path('data', 'indications.tsv')
write.table(indication.df, path, sep='\t', row.names=FALSE, quote=FALSE)

DT::datatable(indication.df %>% dplyr::filter(resource != 'medi_lps'))