Skip to content

Commit

Permalink
train drugbank data
Browse files Browse the repository at this point in the history
  • Loading branch information
Default user committed Dec 7, 2023
1 parent b137d08 commit 2c014b8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
16 changes: 14 additions & 2 deletions src/train_opentargets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from src.embeddings import compute_drug_embedding, compute_target_embedding
from src.train import train, compute_and_train
from src.utils import COLLECTIONS, log
from src.utils import COLLECTIONS, log, get_pref_ids
from src.vectordb import init_vectordb

# NOTE: script to run the WHOLE pipeline on opentargets data
Expand Down Expand Up @@ -81,5 +81,17 @@ def train_opentargets(input_dir, out_dir):
scores = compute_and_train(df_known_dt, out_dir)


def train_drugbank():
df_known_dt = "data/drugbank/DB_DTI_4vectordb.csv"
out_dir = "data/drugbank"

df = pd.read_csv(df_known_dt)
convert_dict = get_pref_ids(df["drug"].values, ["PUBCHEM.COMPOUND"])
print(convert_dict)
df["drug"] = df["drug"].apply(lambda curie: convert_dict[curie])
print(df)
scores = compute_and_train(df, out_dir)

if __name__ == "__main__":
train_opentargets("data/download/opentargets/knownDrugsAggregated", "data/opentargets")
train_drugbank()
# train_opentargets("data/download/opentargets/knownDrugsAggregated", "data/opentargets")
6 changes: 3 additions & 3 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,22 @@ def get_pref_ids(ids_list: list, accepted_namespaces: list[str] = None):
resp = resolve_curies.json()
# print(resp)
for original_id, available_ids in resp.items():
pref_id = None
pref_id = original_id
try:
if not accepted_namespaces:
pref_id = available_ids["id"]["identifier"]
else:
for ns in accepted_namespaces:
if available_ids["id"]["identifier"].lower().startswith(ns.lower()):
pref_id = available_ids["id"]["identifier"]
if not pref_id:
if pref_id == original_id:
for alt_id in available_ids["equivalent_identifiers"]:
for ns in accepted_namespaces:
if alt_id["identifier"].lower().startswith(ns.lower()):
pref_id = alt_id["identifier"]
# log.debug(f"{original_id} > {pref_id}")
except Exception:
log.debug(f"Could not find pref ID for {original_id} in {available_ids}")
pref_id = original_id
# pref_id = original_id
pref_ids[original_id] = pref_id
return pref_ids

0 comments on commit 2c014b8

Please sign in to comment.