Skip to content

Commit

Permalink
add smiles to uploader keylookup
Browse files Browse the repository at this point in the history
  • Loading branch information
DylanWelzel committed Nov 11, 2024
1 parent 22bb8d5 commit e512d47
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
4 changes: 3 additions & 1 deletion src/hub/dataload/sources/chebi/chebi_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ class ChebiUploader(BaseDrugUploader):
__metadata__ = {"src_meta": SRC_META}
keylookup = MyChemKeyLookup([('inchikey', 'chebi.inchikey'),
('drugbank', 'chebi.xrefs.drugbank'),
('chebi', 'chebi.id')], copy_from_doc=True)
('chebi', 'chebi.id'),
('smiles', 'chebi.smiles')],
copy_from_doc=True)

"""
A document with an ID from `exclusion_ids` would have a long list for one or more of the following fields:
Expand Down
3 changes: 2 additions & 1 deletion src/hub/dataload/sources/chembl/chembl_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class ChemblUploader(BaseDrugUploader, ParallelizedSourceUploader):
("chembl", "chembl.molecule_chembl_id"),
("chebi", "chembl.chebi_par_id"),
("drugcentral", "chembl.xrefs.drugcentral.id"),
("drugname", "chembl.pref_name")],
("drugname", "chembl.pref_name"),
('smiles', 'chembl.smiles')],
# TODO: handle duplicate keys from pubchem
# we use RootKeyMergerStorage, but the num. duplicates is too high (>10000)
# ("pubchem", "chembl.xrefs.pubchem.sid"),
Expand Down
5 changes: 3 additions & 2 deletions src/hub/dataload/sources/drugcentral/drugcentral_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class DrugCentralUploader(BaseDrugUploader):
('drugbank', 'drugcentral.xrefs.drugbank_id'),
('chebi', 'drugcentral.xrefs.chebi'),
('chembl', 'drugcentral.xrefs.chembl_id'),
('pubchem', 'drugcentral.xrefs.pubchem_cid')],
('pubchem', 'drugcentral.xrefs.pubchem_cid'),
('smiles', 'drugcentral.structures.smiles')],
# ('drugname', 'drugcentral.synonyms')], # unhashable type - list
copy_from_doc=True,
)
Expand All @@ -41,7 +42,7 @@ def load_data(self, data_folder):
drugcentral_docs = load_data(data_folder)
return drugcentral_docs

@classmethod
@ classmethod
def get_mapping(klass):
mapping = {
"drugcentral": {
Expand Down
3 changes: 2 additions & 1 deletion src/hub/dataload/sources/unii/unii_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ class UniiUploader(BaseDrugUploader):

keylookup = MyChemKeyLookup([('inchikey', 'unii.inchikey'),
('pubchem', 'unii.pubchem'),
('unii', 'unii.unii')],
('unii', 'unii.unii'),
('smiles', 'unii.smiles')],
copy_from_doc=True,
)

Expand Down

0 comments on commit e512d47

Please sign in to comment.