From aae97c596f5d335fc28d7551c3e83bbc722be33d Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Fri, 13 Aug 2021 13:07:50 -0700 Subject: [PATCH] fix: change id priorities to match biolink's this should fix BTE's querying of Automat APIs to use their preferred ID tests fixed that depended on primary ID --- README.md | 122 +++++++++--------- .../integration/default_resolver.test.ts | 2 +- __tests__/unittest/bioentity.test.ts | 2 +- src/config.ts | 21 +-- 4 files changed, 74 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index d17dc5c..f89d816 100644 --- a/README.md +++ b/README.md @@ -110,11 +110,11 @@ let input = { 1. NCBIGene 2. ENSEMBL 3. HGNC - 4. SYMBOL + 4. MGI 5. OMIM - 6. UniProtKB - 7. UMLS - 8. MGI + 6. UMLS + 7. SYMBOL + 8. UniProtKB 9. name - Transcript @@ -125,52 +125,52 @@ let input = { - Protein 1. UniProtKB 2. ENSEMBL - 3. SYMBOL - 4. UMLS + 3. UMLS + 4. SYMBOL 5. name > Variant ID resolution is done through MyVariant.info API - SequenceVariant - 1. HGVS + 1. CLINVAR 2. DBSNP - 3. MYVARIANT_HG19 - 4. ClinVar + 3. HGVS + 4. MYVARIANT_HG19 > SmallMolecule, Drug ID resolution is done through MyChem.info API - SmallMolecule - 1. CHEBI + 1. PUBCHEM.COMPOUND 2. CHEMBL.COMPOUND - 3. DRUGBANK - 4. PUBCHEM.COMPOUND - 5. MESH - 6. INCHI - 7. INCHIKEY - 8. UNII + 3. UNII + 4. CHEBI + 5. DRUGBANK + 6. MESH + 7. CAS + 8. HMDB 9. KEGG.COMPOUND - 10. UMLS - 11. LINCS - 12. CAS - 13. HMDB + 10. INCHI + 11. INCHIKEY + 12. UMLS + 13. LINCS 14. name - Drug - 1. CHEBI - 2. CHEMBL.COMPOUND + 1. RXCUI + 2. NDC 3. DRUGBANK 4. PUBCHEM.COMPOUND - 5. MESH - 6. INCHI - 7. INCHIKEY - 8. UNII - 9. KEGG.COMPOUND - 10. UMLS - 11. LINCS - 12. CAS - 13. HMDB - 14. RXCUI - 15. NDC + 5. CHEMBL.COMPOUND + 6. UNII + 7. CHEBI + 8. MESH + 9. CAS + 10. HMDB + 11. KEGG.COMPOUND + 12. INCHI + 13. INCHIKEY + 14. UMLS + 15. LINCS 16. name > Disease, ClinicalFinding ID Resolution is done through MyDisease.info API @@ -181,14 +181,14 @@ let input = { 2. DOID 3. OMIM 4. ORPHANET - 5. SNOMEDCT - 6. NCIT - 7. EFO - 8. UMLS - 9. MESH - 10. HP - 11. GARD - 12. MEDDRA + 5. EFO + 6. UMLS + 7. MESH + 8. MEDDRA + 9. NCIT + 10. SNOMEDCT + 11. HP + 12. GARD 13. name - ClinicalFinding @@ -200,23 +200,23 @@ let input = { > Pathway ID Resolution is done through biothings.ncats.io/geneset API - Pathway - 1. REACT - 2. KEGG - 3. PHARMGKB.PATHWAYS - 4. WIKIPATHWAYS - 5. BIOCARTA - 6. GO - 7. SMPDB + 1. GO + 2. REACT + 3. KEGG + 4. SMPDB + 5. PHARMGKB.PATHWAYS + 6. WIKIPATHWAYS + 7. BIOCARTA 8. name > MolecularActivity ID Resolution is done through BioThings Gene Ontology Molecular Activity API - MolecularActivity 1. GO - 2. MetaCyc + 2. REACT 3. RHEA - 4. KEGG.REACTION - 5. REACT + 4. MetaCyc + 5. KEGG.REACTION 6. name > CellularComponent ID Resolution is done through BioThings Gene Ontology Cellular Component API @@ -231,8 +231,8 @@ let input = { - BiologicalProcess 1. GO - 2. MetaCyc - 3. REACT + 2. REACT + 3. MetaCyc 4. KEGG 5. name @@ -248,14 +248,14 @@ let input = { > PhenotypicFeature ID Resolution is done through BioThings HPO API - PhenotypicFeature - 1. UMLS - 2. SNOMEDCT - 3. HP - 4. MEDDRA - 5. EFO - 6. NCIT - 7. MESH - 8. MP + 1. HP + 2. EFO + 3. NCIT + 4. UMLS + 5. MEDDRA + 6. MP + 7. SNOMEDCT + 8. MESH 9. name > Cell ID Resolution is done through Biothings Cell Ontology API diff --git a/__tests__/integration/default_resolver.test.ts b/__tests__/integration/default_resolver.test.ts index bf55890..e4ac5f5 100644 --- a/__tests__/integration/default_resolver.test.ts +++ b/__tests__/integration/default_resolver.test.ts @@ -30,7 +30,7 @@ describe("Test ID Resolver", () => { expect(res).toHaveProperty("LINCS:LSM-2471"); expect(res['LINCS:LSM-2471']).toHaveLength(1); expect(res['LINCS:LSM-2471'][0]).toBeInstanceOf(ResolvableBioEntity); - expect(res['LINCS:LSM-2471'][0].primaryID).toEqual("CHEBI:8863"); + expect(res['LINCS:LSM-2471'][0].primaryID).toEqual("PUBCHEM.COMPOUND:5070"); expect(res['LINCS:LSM-2471'][0].dbIDs.LINCS).toEqual(["LSM-2471"]); }) diff --git a/__tests__/unittest/bioentity.test.ts b/__tests__/unittest/bioentity.test.ts index 0b55f10..06d9399 100644 --- a/__tests__/unittest/bioentity.test.ts +++ b/__tests__/unittest/bioentity.test.ts @@ -69,7 +69,7 @@ describe("Test ResolvableBioEntity Class", () => { test("if both SYMBOL and name are not provided in db ids, should return primary id", () => { const entity = new ResolvableBioEntity("SmallMolecule", CHEMBL7512_DB_IDS, {}); const label = entity.label; - expect(label).toBe("CHEMBL.COMPOUND:CHEMBL7512"); + expect(label).toBe("PUBCHEM.COMPOUND:53428"); }) }); diff --git a/src/config.ts b/src/config.ts index 57fad07..daedf8a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -12,7 +12,7 @@ export const MAX_CONCURRENT_QUERIES = 3; export const APIMETA: MetaDataItemsObject = { Gene: { - id_ranks: ['NCBIGene', 'ENSEMBL', 'HGNC', 'SYMBOL', 'OMIM', 'UniProtKB', 'UMLS', 'MGI', 'name'], + id_ranks: ['NCBIGene', 'ENSEMBL', 'HGNC', 'MGI', 'OMIM', 'UMLS', 'SYMBOL', 'UniProtKB', 'name'], semantic: 'Gene', api_name: 'mygene.info', url: 'https://mygene.info/v3/query', @@ -47,7 +47,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, Protein: { - id_ranks: ['UniProtKB', 'ENSEMBL', 'SYMBOL', 'UMLS', 'name'], + id_ranks: ['UniProtKB', 'ENSEMBL', 'UMLS', 'SYMBOL', 'name'], semantic: 'Protein', api_name: 'mygene.info', url: 'https://mygene.info/v3/query', @@ -63,7 +63,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, SequenceVariant: { - id_ranks: ['HGVS', 'DBSNP', 'MYVARIANT_HG19', 'CLINVAR'], + id_ranks: ['CLINVAR', 'DBSNP', 'HGVS', 'MYVARIANT_HG19'], api_name: 'myvariant.info', semantic: 'SequenceVariant', url: 'https://myvariant.info/v1/query', @@ -82,7 +82,8 @@ export const APIMETA: MetaDataItemsObject = { }, }, SmallMolecule: { - id_ranks: ['CHEBI', 'CHEMBL.COMPOUND', 'DRUGBANK', 'PUBCHEM.COMPOUND', 'MESH', 'INCHI', 'INCHIKEY', 'UNII', 'KEGG.COMPOUND', 'UMLS', 'LINCS', 'CAS', 'HMDB', 'name'], + id_ranks: ['PUBCHEM.COMPOUND', 'CHEMBL.COMPOUND', 'UNII', 'CHEBI', 'DRUGBANK', 'MESH', 'CAS', 'HMDB', 'KEGG.COMPOUND', + 'INCHI', 'INCHIKEY', 'UMLS', 'LINCS', 'name'], semantic: 'SmallMolecule', api_name: 'mychem.info', url: 'https://mychem.info/v1/query', @@ -117,7 +118,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, Drug: { - id_ranks: ['CHEBI', 'CHEMBL.COMPOUND', 'DRUGBANK', 'PUBCHEM.COMPOUND', 'MESH', 'INCHI', 'INCHIKEY', 'UNII', 'KEGG.COMPOUND', 'UMLS', 'LINCS', 'CAS', 'HMDB', 'RXCUI', 'NDC', 'name'], + id_ranks: ['RXCUI', 'NDC', 'DRUGBANK', 'PUBCHEM.COMPOUND', 'CHEMBL.COMPOUND', 'UNII', 'CHEBI', 'MESH', 'CAS', 'HMDB', 'KEGG.COMPOUND', 'INCHI', 'INCHIKEY', 'UMLS', 'LINCS', 'name'], semantic: 'Drug', api_name: 'mychem.info', url: 'https://mychem.info/v1/query', @@ -154,7 +155,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, PhenotypicFeature: { - id_ranks: ['UMLS', 'SNOMEDCT', 'HP', 'MEDDRA', 'EFO', 'NCIT', 'MESH', 'MP', 'name'], + id_ranks: ['HP', 'EFO', 'NCIT', 'UMLS', 'MEDDRA', 'MP', 'SNOMEDCT', 'MESH', 'name'], semantic: 'PhenotypicFeature', api_name: 'HPO API', url: 'https://biothings.ncats.io/hpo/query', @@ -171,7 +172,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, Disease: { - id_ranks: ['MONDO', 'DOID', 'OMIM', 'ORPHANET', 'SNOMEDCT', 'NCIT', 'EFO', 'UMLS', 'MESH', 'HP', 'GARD', 'MEDDRA', 'name'], + id_ranks: ['MONDO', 'DOID', 'OMIM', 'ORPHANET', 'EFO', 'UMLS', 'MESH', 'MEDDRA', 'NCIT', 'SNOMEDCT', 'HP', 'GARD', 'name'], semantic: 'Disease', api_name: 'mydisease.info', url: 'https://mydisease.info/v1/query', @@ -204,7 +205,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, MolecularActivity: { - id_ranks: ['GO', 'MetaCyc', 'RHEA', 'KEGG.REACTION', 'REACT', 'name'], + id_ranks: ['GO', 'REACT', 'RHEA', 'MetaCyc', 'KEGG.REACTION', 'name'], semantic: 'MolecularActivity', api_name: 'Gene Ontology Molecular Function API', url: 'https://biothings.ncats.io/go_mf/query', @@ -218,7 +219,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, BiologicalProcess: { - id_ranks: ['GO', 'MetaCyc', 'REACT', 'KEGG', 'name'], + id_ranks: ['GO', 'REACT', 'MetaCyc', 'KEGG', 'name'], semantic: 'BiologicalProcess', api_name: 'Gene Ontology Biological Process API', url: 'https://biothings.ncats.io/go_bp/query', @@ -242,7 +243,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, Pathway: { - id_ranks: ['REACT', 'KEGG', 'PHARMGKB.PATHWAYS', 'WIKIPATHWAYS', 'BIOCARTA', 'GO', 'SMPDB', 'name'], + id_ranks: ['GO', 'REACT', 'KEGG', 'SMPDB', 'PHARMGKB.PATHWAYS', 'WIKIPATHWAYS', 'BIOCARTA', 'name'], semantic: 'Pathway', api_name: 'Geneset API', url: 'https://biothings.ncats.io/geneset/query',