diff --git a/README.md b/README.md index e11f8b5..e4420bc 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -![Test Coveralls](https://github.com/kevinxin90/biomedical_id_resolver.js/workflows/Test%20Coveralls/badge.svg) -[![Coverage Status](https://coveralls.io/repos/github/kevinxin90/biomedical_id_resolver.js/badge.svg?branch=master)](https://coveralls.io/github/kevinxin90/biomedical_id_resolver.js?branch=master) +![Test Coveralls](https://github.com/biothings/biomedical_id_resolver.js/workflows/Test%20Coveralls/badge.svg) +[![Coverage Status](https://coveralls.io/repos/github/biothings/biomedical_id_resolver.js/badge.svg?branch=main)](https://coveralls.io/github/biothings/biomedical_id_resolver.js?branch=main) ![npm](https://img.shields.io/npm/dw/biomedical_id_resolver) -![GitHub issues](https://img.shields.io/github/issues/kevinxin90/biomedical_id_resolver.js) +![GitHub issues](https://img.shields.io/github/issues/biothings/biomedical_id_resolver.js) ![NPM](https://img.shields.io/npm/l/biomedical_id_resolver) ![npm](https://img.shields.io/npm/v/biomedical_id_resolver?style=plastic) -![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/kevinxin90/biomedical_id_resolver.js) +![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/biothings/biomedical_id_resolver.js) @@ -104,7 +104,7 @@ let input = { ## Available Semantic Types & prefixes -> Gene ID resolution is done through MyGene.info API +> Gene, Transcript, Protein ID resolution is done through MyGene.info API - Gene 1. NCBIGene @@ -114,7 +114,20 @@ let input = { 5. OMIM 6. UniProtKB 7. UMLS - 8. name + 8. MGI + 9. name + +- Transcript + 1. ENSEMBL + 2. SYMBOL + 3. name + +- Protein + 1. UniProtKB + 2. ENSEMBL + 3. SYMBOL + 4. UMLS + 5. name > Variant ID resolution is done through MyVariant.info API @@ -124,21 +137,35 @@ let input = { 3. MYVARIANT_HG19 4. ClinVar -> ChemicalSubstance ID resolution is done through MyChem.info API +> ChemicalSubstance, Drug ID resolution is done through MyChem.info API - ChemicalSubstance 1. CHEBI 2. CHEMBL.COMPOUND 3. DRUGBANK - 4. PUBCHEM + 4. PUBCHEM.COMPOUND + 5. MESH + 6. INCHI + 7. INCHIKEY + 8. UNII + 9. KEGG.COMPOUND + 10. UMLS + 11. LINCS + 12. name + +- Drug + 1. CHEBI + 2. CHEMBL.COMPOUND + 3. DRUGBANK + 4. PUBCHEM.COMPOUND 5. MESH 6. INCHI 7. INCHIKEY 8. UNII - 9. KEGG + 9. KEGG.COMPOUND 10. UMLS - 11. name - 12. id + 11. LINCS + 12. name > Disease ID Resolution is done through MyDisease.info API @@ -148,20 +175,24 @@ let input = { 2. DOID 3. OMIM 4. ORPHANET - 5. EFO - 6. UMLS - 7. MESH - 8. GARD - 9. name + 5. SNOMEDCT + 6. NCIT + 7. EFO + 8. UMLS + 9. MESH + 10. HP + 11. GARD + 12. name > Pathway ID Resolution is done through biothings.ncats.io/geneset API - Pathway - 1. Reactome + 1. REACT 2. KEGG - 3. PHARMGKB + 3. PHARMGKB.PATHWAYS 4. WIKIPATHWAYS - 5. name + 5. BIOCARTA + 6. name > MolecularActivity ID Resolution is done through BioThings Gene Ontology Molecular Activity API @@ -170,19 +201,14 @@ let input = { 2. MetaCyc 3. RHEA 4. KEGG.REACTION - 5. Reactome + 5. REACT + 6. name > CellularComponent ID Resolution is done through BioThings Gene Ontology Cellular Component API - CellularComponent 1. GO - 2. MESH - 3. UMLS - 4. NCIT - 5. SNOMEDCT - 6. UBERON - 7. CL - 8. name + 2. name > BiologicalProcess ID Resolution is done through BioThings Gene Ontology Biological Process API @@ -190,17 +216,17 @@ let input = { 1. GO 2. MetaCyc - 3. Reactome - 4. name + 3. KEGG + 4. REACT + 5. name > AnatomicalEntity ID Resolution is done through BioThings UBERON API - AnatomicalEntity 1. UBERON 2. UMLS - 3. NCIT - 4. MESH - 5. name + 3. MESH + 4. name > PhenotypicFeature ID Resolution is done through BioThings HPO API @@ -215,16 +241,13 @@ let input = { 8. MP 9. name -> Cell ID Resolution is done through nodenormalization API +> Cell ID Resolution is done through Biothings Cell Ontology API - Cell 1. CL - 2. UMLS - 3. NCIT - 4. MESH - 5. UBERON - 6. SNOMEDCT - 7. name + 2. NCIT + 3. MESH + 4. name ## Development @@ -234,9 +257,9 @@ let input = { 3. Run `npm ci` to install the dependencies. 4. scripts are stored in `/src` folder 5. Add test to `/__tests__` folder -6. run `npm run release` to bump version and generte change log +6. run `npm run release` to bump version and generate change log 7. run `npx depcheck` to check for unused packages in package.json ## CHANGELOG -See [CHANGELOG.md](https://github.com/kevinxin90/biomedical_id_resolver.js/blob/master/CHANGELOG.md) +See [CHANGELOG.md](https://github.com/biothings/biomedical_id_resolver.js/blob/main/CHANGELOG.md) diff --git a/__tests__/unittest/bioentity.test.ts b/__tests__/unittest/bioentity.test.ts index 9819457..ecb86dd 100644 --- a/__tests__/unittest/bioentity.test.ts +++ b/__tests__/unittest/bioentity.test.ts @@ -11,7 +11,7 @@ const CDK2_DB_IDs = { const RILUZOLE_DB_IDS = { "CHEMBL.COMPOUND": ["CHEMBL744"], "name": ["Riluzole", "RILUZOLE"], - "PUBCHEM": ["5070"], + "PUBCHEM.COMPOUND": ["5070"], } const DB_ID_WITH_NO_PRIMARY = { @@ -22,7 +22,11 @@ const DISEASE_DB_IDS = { "MONDO": ["MONDO:12345"] } -const CHEMBL7512_DB_IDS = { "CHEMBL.COMPOUND": ["CHEMBL7512"], "PUBCHEM": ["53428"] } +const CHEMBL7512_DB_IDS = { + "CHEMBL.COMPOUND": ["CHEMBL7512"], + "PUBCHEM.COMPOUND": ["53428"] +} + describe("Test ResolvableBioEntity Class", () => { test("return semanticType when called semanticType property", () => { const entity = new ResolvableBioEntity("Gene", CDK2_DB_IDs, {}); diff --git a/src/common/types.ts b/src/common/types.ts index 892469d..96621d2 100644 --- a/src/common/types.ts +++ b/src/common/types.ts @@ -13,8 +13,11 @@ export interface BioThingsAPIFailedQueryResponse extends BioThingsAPIQueryRespon export type ResolvableSemanticTypes = | 'Gene' + | 'Transcript' + | 'Protein' | 'SequenceVariant' | 'ChemicalSubstance' + | 'Drug' | 'Disease' | 'DiseaseOrPhenotypicFeature' | 'PhenotypicFeature' @@ -27,8 +30,11 @@ export type ResolvableSemanticTypes = export enum ResolvableTypes { Gene = 'Gene', + Transcript = 'Transcript', + Protein = 'Protein', SequenceVariant = 'SequenceVariant', ChemicalSubstance = 'ChemicalSubstance', + Drug = 'Drug', Disease = 'Disease', DiseaseOrPhenotypicFeature = 'DiseaseOrPhenotypicFeature', PhenotypicFeature = 'PhenotypicFeature', diff --git a/src/config.ts b/src/config.ts index d8b64e9..88fa7d3 100644 --- a/src/config.ts +++ b/src/config.ts @@ -32,6 +32,20 @@ export const APIMETA: MetaDataItemsObject = { type_of_gene: ['type_of_gene'], }, }, + Transcript: { + id_ranks: ['ENSEMBL', 'SYMBOL', 'name'], + semantic: 'Transcript', + api_name: 'mygene.info', + url: 'https://mygene.info/v3/query', + mapping: { + ENSEMBL: ['ensembl.transcript'], + SYMBOL: ['symbol'], + name: ['name'], + }, + additional_attributes_mapping: { + interpro: ['interpro.desc'], + }, + }, Protein: { id_ranks: ['UniProtKB', 'ENSEMBL', 'SYMBOL', 'UMLS', 'name'], semantic: 'Protein', @@ -77,8 +91,9 @@ export const APIMETA: MetaDataItemsObject = { 'INCHI', 'INCHIKEY', 'UNII', - 'KEGG', + 'KEGG.COMPOUND', 'UMLS', + 'LINCS', 'name', ], semantic: 'ChemicalSubstance', @@ -99,8 +114,8 @@ export const APIMETA: MetaDataItemsObject = { UNII: ['drugcentral.xrefs.unii', 'unii.unii', 'aeolus.unii', 'ginas.unii'], INCHIKEY: ['drugbank.inchi_key', 'ginas.inchikey', 'unii.inchikey', 'chebi.inchikey'], INCHI: ['drugbank.inchi', 'chebi.inchi', 'chembl.inchi'], - KEGG: ['drugbank.xrefs.kegg.cid'], - LINCS: ['unichem.lincs'], + 'KEGG.COMPOUND': ['drugbank.xrefs.kegg.cid'], + LINCS: ['unichem.lincs', 'chebi.xrefs.lincs'], name: ['chembl.pref_name', 'drugbank.name', 'umls.name', 'ginas.preferred_name', 'pharmgkb.name', 'chebi.name'], }, additional_attributes_mapping: { @@ -127,12 +142,12 @@ export const APIMETA: MetaDataItemsObject = { 'INCHI', 'INCHIKEY', 'UNII', - 'KEGG', + 'KEGG.COMPOUND', 'UMLS', 'LINCS', 'name', ], - semantic: 'ChemicalSubstance', + semantic: 'Drug', api_name: 'mychem.info', url: 'https://mychem.info/v1/query', mapping: { @@ -150,7 +165,8 @@ export const APIMETA: MetaDataItemsObject = { UNII: ['drugcentral.xrefs.unii', 'unii.unii', 'aeolus.unii', 'ginas.unii'], INCHIKEY: ['drugbank.inchi_key', 'ginas.inchikey', 'unii.inchikey', 'chebi.inchikey'], INCHI: ['drugbank.inchi', 'chebi.inchi', 'chembl.inchi'], - KEGG: ['drugbank.xrefs.kegg.cid'], + 'KEGG.COMPOUND': ['drugbank.xrefs.kegg.cid'], + LINCS: ['unichem.lincs', 'chebi.xrefs.lincs'], name: ['chembl.pref_name', 'drugbank.name', 'umls.name', 'ginas.preferred_name', 'pharmgkb.name', 'chebi.name'], }, additional_attributes_mapping: { @@ -174,7 +190,7 @@ export const APIMETA: MetaDataItemsObject = { url: 'https://biothings.ncats.io/hpo/query', mapping: { UMLS: ['xrefs.umls'], - SNOMEDCT: ['xrefs.snomed_ct'], + SNOMEDCT: ['xrefs.snomed_ct', 'xrefs.snomedct_us'], HP: ['_id'], MEDDRA: ['xrefs.meddra'], EFO: ['xrefs.efo'], @@ -211,7 +227,7 @@ export const APIMETA: MetaDataItemsObject = { }, }, MolecularActivity: { - id_ranks: ['GO', 'MetaCyc', 'RHEA', 'KEGG', 'REACT', 'name'], + id_ranks: ['GO', 'MetaCyc', 'RHEA', 'KEGG.REACTION', 'REACT', 'name'], semantic: 'MolecularActivity', api_name: 'Gene Ontology Molecular Function API', url: 'https://biothings.ncats.io/go_mf/query', @@ -219,7 +235,7 @@ export const APIMETA: MetaDataItemsObject = { GO: ['_id'], MetaCyc: ['xrefs.metacyc'], RHEA: ['xrefs.rhea'], - KEGG: ['xrefs.kegg_reaction'], + 'KEGG.REACTION': ['xrefs.kegg_reaction'], REACT: ['xrefs.reactome'], name: ['name'], }, @@ -244,21 +260,21 @@ export const APIMETA: MetaDataItemsObject = { url: 'https://biothings.ncats.io/go_cc/query', mapping: { GO: ['_id'], - MetaCyc: ['xrefs.metacyc'], + MetaCyc: ['xrefs.metacyc'], // field missing in the API? name: ['name'], }, }, Pathway: { - id_ranks: ['REACT', 'KEGG', 'PHARMGKB', 'WIKIPATHWAYS', 'BIOCARTA', 'name'], + id_ranks: ['REACT', 'KEGG', 'PHARMGKB.PATHWAYS', 'WIKIPATHWAYS', 'BIOCARTA', 'name'], semantic: 'Pathway', - api_name: 'geneset API', + api_name: 'Geneset API', url: 'https://biothings.ncats.io/geneset/query', mapping: { REACT: ['reactome'], WIKIPATHWAYS: ['wikipathways'], KEGG: ['kegg'], BIOCARTA: ['biocarta'], - PHARMGKB: ['pharmgkb'], + 'PHARMGKB.PATHWAYS': ['pharmgkb'], name: ['name'], }, additional_attributes_mapping: { @@ -274,20 +290,20 @@ export const APIMETA: MetaDataItemsObject = { UBERON: ['_id'], UMLS: ['xrefs.umls'], MESH: ['xrefs.mesh'], - NCIT: ['xrefs.ncit'], + NCIT: ['xrefs.ncit'], // field missing in the API? name: ['name'], }, }, Cell: { id_ranks: ['CL', 'NCIT', 'MESH', 'EFO', 'name'], semantic: 'Cell', - api_name: 'Cell Onotlogy API', + api_name: 'Cell Ontology API', url: 'https://biothings.ncats.io/cell_ontology/query', mapping: { CL: ['_id'], NCIT: ['xrefs.ncit'], MESH: ['xrefs.mesh'], - EFO: ['xrefs.efo'], + EFO: ['xrefs.efo'], // field missing in the API? name: ['name'], }, },