From 71c52dbede6a248fa5f84cfca0fbc4d21c102d86 Mon Sep 17 00:00:00 2001 From: Dylan Welzel Date: Wed, 12 Jun 2024 14:17:17 -0700 Subject: [PATCH] add name group to mapping for sources --- .../dataload/sources/aeolus/aeolus_upload.py | 3 +- .../dataload/sources/chebi/chebi_upload.py | 20 +- .../dataload/sources/chembl/chembl_upload.py | 37 +- .../drugbank_open/drugbank_open_mapping.py | 3 +- .../sources/drugcentral/drugcentral_upload.py | 884 +++++++++--------- .../dataload/sources/ginas/ginas_upload.py | 171 ++-- src/hub/dataload/sources/ndc/ndc_upload.py | 112 +-- .../sources/pharmgkb/pharmgkb_upload.py | 97 +- src/hub/dataload/sources/umls/umls_upload.py | 11 +- src/hub/dataload/sources/unii/unii_upload.py | 188 ++-- src/plugins/fda_orphan_drug/mapping.py | 87 +- 11 files changed, 820 insertions(+), 793 deletions(-) diff --git a/src/hub/dataload/sources/aeolus/aeolus_upload.py b/src/hub/dataload/sources/aeolus/aeolus_upload.py index 08c2af78..0f6d12f3 100644 --- a/src/hub/dataload/sources/aeolus/aeolus_upload.py +++ b/src/hub/dataload/sources/aeolus/aeolus_upload.py @@ -110,7 +110,8 @@ def get_mapping(klass): "drug_name": { "type": "text", "copy_to": [ - "all" + "all", + "name" ] }, "pt": { diff --git a/src/hub/dataload/sources/chebi/chebi_upload.py b/src/hub/dataload/sources/chebi/chebi_upload.py index d55f9f49..c0b676db 100644 --- a/src/hub/dataload/sources/chebi/chebi_upload.py +++ b/src/hub/dataload/sources/chebi/chebi_upload.py @@ -1,15 +1,15 @@ import os -import pymongo -from .chebi_parser import ChebiParser, CompoundReader, OntologyReader -from .exclusion_ids import exclusion_ids -from hub.dataload.uploader import BaseDrugUploader -from biothings.utils.mongo import get_src_db import biothings.hub.dataload.storage as storage +import pymongo from biothings.utils.exclude_ids import ExcludeFieldsById +from biothings.utils.mongo import get_src_db +from hub.dataload.uploader import BaseDrugUploader from hub.datatransform.keylookup import MyChemKeyLookup +from .chebi_parser import ChebiParser, CompoundReader, OntologyReader +from .exclusion_ids import exclusion_ids SRC_META = { "url": 'https://www.ebi.ac.uk/chebi/', @@ -38,7 +38,7 @@ class ChebiUploader(BaseDrugUploader): - `chebi.xrefs.patent` `ExcludeFieldsById` acts like a filter to truncate the length of such long lists to 1,000. - + See the comment on the ExcludeFieldsById for use of this class. """ exclude_fields = ExcludeFieldsById(exclusion_ids, [ @@ -53,10 +53,12 @@ def load_data(self, data_folder): self.logger.info("Load data from '%s'" % data_folder) sdf_input_file = os.path.join(data_folder, "ChEBI_complete.sdf") - assert os.path.exists(sdf_input_file), "Can't find input file '%s'" % sdf_input_file + assert os.path.exists( + sdf_input_file), "Can't find input file '%s'" % sdf_input_file obo_input_file = os.path.join(data_folder, "chebi_lite.obo") - assert os.path.exists(obo_input_file), "Can't find input file '%s'" % obo_input_file + assert os.path.exists( + obo_input_file), "Can't find input file '%s'" % obo_input_file # get others source collection for inchi key conversion drugbank_col = get_src_db()["drugbank"] @@ -276,7 +278,7 @@ def get_mapping(klass): }, "name": { "type": "text", - 'copy_to': ['all'], + 'copy_to': ['all', 'name'], }, "charge": { "type": "integer" diff --git a/src/hub/dataload/sources/chembl/chembl_upload.py b/src/hub/dataload/sources/chembl/chembl_upload.py index d5023ed3..dea9b0b2 100644 --- a/src/hub/dataload/sources/chembl/chembl_upload.py +++ b/src/hub/dataload/sources/chembl/chembl_upload.py @@ -1,15 +1,18 @@ """ Chembl uploader """ +import glob + # pylint: disable=E0401, E0611 import os -import glob + import biothings.hub.dataload.storage as storage from biothings.hub.dataload.uploader import ParallelizedSourceUploader + from hub.dataload.uploader import BaseDrugUploader from hub.datatransform.keylookup import MyChemKeyLookup -from .chembl_parser import AuxiliaryDataLoader, MoleculeDataLoader, load_chembl_data +from .chembl_parser import AuxiliaryDataLoader, MoleculeDataLoader, load_chembl_data SRC_META = { "url": 'https://www.ebi.ac.uk/chembl/', @@ -50,13 +53,19 @@ def jobs(self): this method will be called by self.update_data() and then generate arguments for self.load.data() method, allowing parallelization """ - molecule_filepaths = glob.glob(os.path.join(self.data_folder, self.MOLECULE_FILENAME_PATTERN)) - mol_data_loaders = [MoleculeDataLoader(molecule_filepath=filepath) for filepath in molecule_filepaths] + molecule_filepaths = glob.glob(os.path.join( + self.data_folder, self.MOLECULE_FILENAME_PATTERN)) + mol_data_loaders = [MoleculeDataLoader( + molecule_filepath=filepath) for filepath in molecule_filepaths] - drug_indication_filepaths = glob.iglob(os.path.join(self.data_folder, self.DRUG_INDICATION_FILENAME_PATTERN)) - mechanism_filepaths = glob.iglob(os.path.join(self.data_folder, self.MECHANISM_FILENAME_PATTERN)) - target_filepaths = glob.iglob(os.path.join(self.data_folder, self.TARGET_FILENAME_PATTERN)) - binding_site_filepaths = glob.iglob(os.path.join(self.data_folder, self.BINDING_SITE_FILENAME_PATTERN)) + drug_indication_filepaths = glob.iglob(os.path.join( + self.data_folder, self.DRUG_INDICATION_FILENAME_PATTERN)) + mechanism_filepaths = glob.iglob(os.path.join( + self.data_folder, self.MECHANISM_FILENAME_PATTERN)) + target_filepaths = glob.iglob(os.path.join( + self.data_folder, self.TARGET_FILENAME_PATTERN)) + binding_site_filepaths = glob.iglob(os.path.join( + self.data_folder, self.BINDING_SITE_FILENAME_PATTERN)) aux_data_loader = AuxiliaryDataLoader(drug_indication_filepaths=drug_indication_filepaths, mechanism_filepaths=mechanism_filepaths, target_filepaths=target_filepaths, @@ -67,7 +76,8 @@ def jobs(self): def load_data(self, mol_data_loader: MoleculeDataLoader, aux_data_loader: AuxiliaryDataLoader): """load data from an input file""" - self.logger.info("Load data from file '%s'" % mol_data_loader.molecule_filepath) + self.logger.info("Load data from file '%s'" % + mol_data_loader.molecule_filepath) return self.keylookup(load_chembl_data, debug=True)(mol_data_loader, aux_data_loader) @@ -103,9 +113,9 @@ def get_mapping(cls): } } }, - "first_approval": { - "type": "integer" - }, + "first_approval": { + "type": "integer" + }, "indication_refs": { "properties": { "id": { @@ -535,7 +545,8 @@ def get_mapping(cls): "pref_name": { "type": "text", "copy_to": [ - "all" + "all", + "name" ] }, "first_approval": { diff --git a/src/hub/dataload/sources/drugbank_open/drugbank_open_mapping.py b/src/hub/dataload/sources/drugbank_open/drugbank_open_mapping.py index a42bf4d7..645d0f24 100644 --- a/src/hub/dataload/sources/drugbank_open/drugbank_open_mapping.py +++ b/src/hub/dataload/sources/drugbank_open/drugbank_open_mapping.py @@ -11,7 +11,8 @@ "type": "keyword" }, "name": { - "type": "text" + "type": "text", + "copy_to": ["name"] }, "cas_number": { "normalizer": "keyword_lowercase_normalizer", diff --git a/src/hub/dataload/sources/drugcentral/drugcentral_upload.py b/src/hub/dataload/sources/drugcentral/drugcentral_upload.py index c1586789..b2b98339 100644 --- a/src/hub/dataload/sources/drugcentral/drugcentral_upload.py +++ b/src/hub/dataload/sources/drugcentral/drugcentral_upload.py @@ -1,4 +1,5 @@ import biothings.hub.dataload.storage as storage + from hub.dataload.uploader import BaseDrugUploader from hub.datatransform.keylookup import MyChemKeyLookup @@ -12,31 +13,31 @@ class DrugCentralUploader(BaseDrugUploader): storage_class = storage.RootKeyMergerStorage __metadata__ = { - "src_meta" : { - "url" : "http://drugcentral.org/", - "license_url" : "http://drugcentral.org/privacy", - "license_url_short" : "http://bit.ly/2SeEhUy", - "license" : "CC BY-SA 4.0", - } - } + "src_meta": { + "url": "http://drugcentral.org/", + "license_url": "http://drugcentral.org/privacy", + "license_url_short": "http://bit.ly/2SeEhUy", + "license": "CC BY-SA 4.0", + } + } # Keylookup is a callable object keylookup = MyChemKeyLookup( - [('inchikey', 'drugcentral.structures.inchikey'), - ('unii', 'drugcentral.xref.unii'), - # other keys are present but not currently used by keylookup - ('inchi', 'drugcentral.structures.inchi'), - ('drugbank', 'drugcentral.xrefs.drugbank_id'), - ('chebi', 'drugcentral.xrefs.chebi'), - ('chembl', 'drugcentral.xrefs.chembl_id'), - ('pubchem', 'drugcentral.xrefs.pubchem_cid')], - # ('drugname', 'drugcentral.synonyms')], # unhashable type - list - copy_from_doc=True, - ) + [('inchikey', 'drugcentral.structures.inchikey'), + ('unii', 'drugcentral.xref.unii'), + # other keys are present but not currently used by keylookup + ('inchi', 'drugcentral.structures.inchi'), + ('drugbank', 'drugcentral.xrefs.drugbank_id'), + ('chebi', 'drugcentral.xrefs.chebi'), + ('chembl', 'drugcentral.xrefs.chembl_id'), + ('pubchem', 'drugcentral.xrefs.pubchem_cid')], + # ('drugname', 'drugcentral.synonyms')], # unhashable type - list + copy_from_doc=True, + ) def load_data(self, data_folder): # Commented out keylookup call, as overlapping work is performed by the parser. - #drugcentral_docs = self.keylookup(load_data, debug=True)(data_folder) + # drugcentral_docs = self.keylookup(load_data, debug=True)(data_folder) drugcentral_docs = load_data(data_folder) return drugcentral_docs @@ -45,457 +46,456 @@ def get_mapping(klass): mapping = { "drugcentral": { "properties": { - "pharmacology_class": { - "properties": { - "mesh_pa": { - "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } - } - }, - "fda_moa": { + "pharmacology_class": { "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } + "mesh_pa": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "fda_moa": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "fda_epc": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "chebi": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "fda_cs": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "fda_pe": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "fda_ext": { + "properties": { + "description": { + "type": "text" + }, + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + } + } + }, + "fda_chemical/ingredient": { + "properties": { + "code": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "description": { + "type": "text" + } + } + } } }, - "fda_epc": { + "fda_adverse_event": { "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } - } - }, - "chebi": { - "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } - } - }, - "fda_cs": { - "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } - } - }, - "fda_pe": { - "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } - } - }, - "fda_ext": { - "properties": { - "description": { - "type": "text" - }, - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - } - } - }, - "fda_chemical/ingredient": { - "properties": { - "code": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "description": { - "type": "text" - } + "meddra_code": { + "type": "integer" + }, + "level": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "llr": { + "type": "float" + }, + "llr_threshold": { + "type": "float" + }, + "drug_ae": { + "type": "integer" + }, + "drug_no_ae": { + "type": "integer" + }, + "no_drug_ae": { + "type": "integer" + }, + "no_drug_no_ar": { + "type": "integer" + }, + "meddra_term": { + "type": "text" + } } - } - } - }, - "fda_adverse_event": { - "properties": { - "meddra_code": { - "type": "integer" - }, - "level": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "llr": { - "type": "float" - }, - "llr_threshold": { - "type": "float" - }, - "drug_ae": { - "type": "integer" - }, - "drug_no_ae": { - "type": "integer" }, - "no_drug_ae": { - "type": "integer" - }, - "no_drug_no_ar": { - "type": "integer" - }, - "meddra_term": { - "type": "text" - } - } - }, - "drug_use": { - "properties": { - "indication": { + "drug_use": { "properties": { - "umls_cui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "cui_semantic_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomed_concept_id": { - "type": "long" - }, - "concept_name": { - "type": "text" - }, - "snomed_full_name": { - "type": "text" - } + "indication": { + "properties": { + "umls_cui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "cui_semantic_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomed_concept_id": { + "type": "long" + }, + "concept_name": { + "type": "text" + }, + "snomed_full_name": { + "type": "text" + } + } + }, + "contraindication": { + "properties": { + "umls_cui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "cui_semantic_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomed_concept_id": { + "type": "long" + }, + "concept_name": { + "type": "text" + }, + "snomed_full_name": { + "type": "text" + } + } + }, + "off_label_use": { + "properties": { + "umls_cui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "cui_semantic_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomed_concept_id": { + "type": "long" + }, + "concept_name": { + "type": "text" + }, + "snomed_full_name": { + "type": "text" + } + } + }, + "symptomatic_treatment": { + "properties": { + "umls_cui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "concept_name": { + "type": "text" + }, + "snomed_full_name": { + "type": "text" + }, + "cui_semantic_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomed_concept_id": { + "type": "integer" + } + } + }, + "reduce_risk": { + "properties": { + "umls_cui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "concept_name": { + "type": "text" + }, + "snomed_full_name": { + "type": "text" + }, + "cui_semantic_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomed_concept_id": { + "type": "integer" + } + } + }, + "diagnosis": { + "properties": { + "umls_cui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "concept_name": { + "type": "text" + }, + "snomed_full_name": { + "type": "text" + }, + "cui_semantic_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomed_concept_id": { + "type": "long" + } + } + } } }, - "contraindication": { + "drug_dosage": { "properties": { - "umls_cui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "cui_semantic_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomed_concept_id": { - "type": "long" - }, - "concept_name": { + "dosage": { + "type": "float" + }, + "unit": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "route": { "type": "text" - }, - "snomed_full_name": { - "type": "text" - } + } } }, - "off_label_use": { + "structures": { "properties": { - "umls_cui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "cui_semantic_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomed_concept_id": { - "type": "long" - }, - "concept_name": { - "type": "text" - }, - "snomed_full_name": { - "type": "text" - } + "cas_rn": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "inchi": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "inchikey": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "smiles": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "inn": { + "type": "text", + "copy_to": [ + "all" + ] + } } }, - "symptomatic_treatment": { + "xrefs": { "properties": { - "umls_cui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "concept_name": { - "type": "text" - }, - "snomed_full_name": { - "type": "text" - }, - "cui_semantic_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomed_concept_id": { - "type": "integer" - } + "kegg_drug": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "secondary_cas_rn": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "umlscui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "chebi": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "chembl_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "mesh_supplemental_record_ui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "pubchem_cid": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "inn_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "unii": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "drugbank_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "rxnorm": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "nddf": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "snomedct_us": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "vandf": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "mmsl": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "nui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "mesh_descriptor_ui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "pdb_chem_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "vuid": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "iuphar_ligand_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + } } }, - "reduce_risk": { + "approval": { "properties": { - "umls_cui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "concept_name": { - "type": "text" - }, - "snomed_full_name": { - "type": "text" - }, - "cui_semantic_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomed_concept_id": { - "type": "integer" - } + "date": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "agency": { + "type": "text" + }, + "orphan": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "company": { + "type": "text" + } } }, - "diagnosis": { + "bioactivity": { "properties": { - "umls_cui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "concept_name": { - "type": "text" - }, - "snomed_full_name": { - "type": "text" - }, - "cui_semantic_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomed_concept_id": { - "type": "long" - } - } - } - } - }, - "drug_dosage": { - "properties": { - "dosage": { - "type": "float" - }, - "unit": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "route": { - "type": "text" - } - } - }, - "structures": { - "properties": { - "cas_rn": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "inchi": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "inchikey": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "smiles": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "inn": { - "type": "text", - "copy_to": [ - "all" - ] - } - } - }, - "xrefs": { - "properties": { - "kegg_drug": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "secondary_cas_rn": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "umlscui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "chebi": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "chembl_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "mesh_supplemental_record_ui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "pubchem_cid": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "inn_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "unii": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "drugbank_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "rxnorm": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "nddf": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "snomedct_us": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "vandf": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "mmsl": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "nui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "mesh_descriptor_ui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "pdb_chem_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "vuid": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "iuphar_ligand_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - } - } - }, - "approval": { - "properties": { - "date": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "agency": { - "type": "text" - }, - "orphan": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "company": { - "type": "text" - } - } - }, - "bioactivity": { - "properties": { - "uniprot": { - "properties": { - "uniprot_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "gene_symbol": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "swissprot_entry": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - } + "uniprot": { + "properties": { + "uniprot_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "gene_symbol": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "swissprot_entry": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + } + } + }, + "moa": { + "type": "float" + }, + "act_value": { + "type": "float" + }, + "act_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "moa_source": { + "type": "text" + }, + "action_type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" + }, + "act_source": { + "type": "text" + }, + "target_class": { + "type": "text" + }, + "target_name": { + "type": "text" + }, + "organism": { + "type": "text" + } } }, - "moa": { - "type": "float" - }, - "act_value": { - "type": "float" - }, - "act_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "moa_source": { - "type": "text" - }, - "action_type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" - }, - "act_source": { - "type": "text" - }, - "target_class": { - "type": "text" - }, - "target_name": { - "type": "text" - }, - "organism": { + "synonyms": { "type": "text" } - } - }, - "synonyms": { - "type": "text" } - } - } } + } return mapping - diff --git a/src/hub/dataload/sources/ginas/ginas_upload.py b/src/hub/dataload/sources/ginas/ginas_upload.py index 62861b76..15ceea63 100644 --- a/src/hub/dataload/sources/ginas/ginas_upload.py +++ b/src/hub/dataload/sources/ginas/ginas_upload.py @@ -1,107 +1,108 @@ import biothings.hub.dataload.uploader as uploader + class GinasUploader(uploader.DummySourceUploader): name = "ginas" __metadata__ = { - "src_meta" : { - "url" : "https://ginas.ncats.nih.gov", - "license_url" : "?", - } - } + "src_meta": { + "url": "https://ginas.ncats.nih.gov", + "license_url": "?", + } + } @classmethod def get_mapping(klass): mapping = { - "ginas": { + "ginas": { + "properties": { + "cas_primary": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "relationships": { + "properties": { + "type": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + } + } + }, + "unii": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "approvalID": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "definitionLevel": { + "type": "text", + }, + "inchikey": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "mixture_unii": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "mixture_inchikey": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "names_list": { + "type": "text" + }, + "preferred_name": { + "type": "text", + "copy_to": ["all", "name"] + }, "properties": { - "cas_primary": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "relationships": { - "properties": { - "type": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - } - } - }, - "unii": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "approvalID": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "definitionLevel": { - "type": "text", - }, - "inchikey": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "mixture_unii": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "mixture_inchikey": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "names_list": { - "type": "text" - }, - "preferred_name": { - "type": "text", - "copy_to": ["all"] - }, + "type": "text", + }, + "status": { + "type": "text" + }, + "substanceClass": { + "type": "text" + }, + "tags": { + "type": "text", + }, + "uuid": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "xrefs": { "properties": { - "type": "text", + "CAS": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "DRUG BANK": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", }, - "status": { - "type": "text" + "MESH": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", }, - "substanceClass": { - "type": "text" + "NCI_THESAURUS": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", }, - "tags": { - "type": "text", - }, - "uuid": { + "RXCUI": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "xrefs": { - "properties": { - "CAS": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "DRUG BANK": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "MESH": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "NCI_THESAURUS": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "RXCUI": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "WIKIPEDIA": { - "type": "text", - } - } - }, + }, + "WIKIPEDIA": { + "type": "text", + } } + }, } } + } return mapping diff --git a/src/hub/dataload/sources/ndc/ndc_upload.py b/src/hub/dataload/sources/ndc/ndc_upload.py index c06cac49..0caa8c92 100644 --- a/src/hub/dataload/sources/ndc/ndc_upload.py +++ b/src/hub/dataload/sources/ndc/ndc_upload.py @@ -4,27 +4,29 @@ # pylint: disable=E0401, E0611 import biothings.hub.dataload.storage as storage from biothings.utils.exclude_ids import ExcludeFieldsById + from hub.dataload.uploader import BaseDrugUploader from hub.datatransform.keylookup import MyChemKeyLookup -from .ndc_parser import load_data -from .exclusion_ids import exclusion_ids +from .exclusion_ids import exclusion_ids +from .ndc_parser import load_data SRC_META = { - "url" : "http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm", - "license_url" : + "url": "http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm", + "license_url": "https://www.fda.gov/AboutFDA/AboutThisWebsite/WebsitePolicies/default.htm#linking", "lincese_url_short": "http://bit.ly/2KAojBn", "license": "public domain" } + class NDCUploader(BaseDrugUploader): """ NDCUploader - Biothings Uploader class for NDC """ name = "ndc" storage_class = (storage.RootKeyMergerStorage, storage.CheckSizeStorage) - __metadata__ = {"src_meta" : SRC_META} + __metadata__ = {"src_meta": SRC_META} keylookup = MyChemKeyLookup( [("ndc", "ndc.productndc"), ("drugname", "ndc.nonproprietaryname")]) @@ -39,90 +41,92 @@ def load_data(self, data_folder): def get_mapping(cls): """return mapping data for the class""" mapping = { - "ndc" : { - "properties" : { - "product_id" : { + "ndc": { + "properties": { + "product_id": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "productndc" : { - "type" : "text" - }, - "producttypename" : { + }, + "productndc": { + "type": "text" + }, + "producttypename": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "proprietaryname" : { + }, + "proprietaryname": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "proprietarynamesuffix" : { + "copy_to": ["name"] + }, + "proprietarynamesuffix": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "nonproprietaryname" : { + }, + "nonproprietaryname": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "dosageformname" : { + "copy_to": ["name"] + }, + "dosageformname": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "routename" : { + }, + "routename": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "startmarketingdate" : { - "type" : "text" - }, - "endmarketingdate" : { - "type" : "text" - }, - "marketingcategoryname" : { + }, + "startmarketingdate": { + "type": "text" + }, + "endmarketingdate": { + "type": "text" + }, + "marketingcategoryname": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "applicationnumber" : { + }, + "applicationnumber": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "labelername" : { + }, + "labelername": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "substancename" : { + }, + "substancename": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", "copy_to": ["all"] - }, - "active_numerator_strength" : { - "type" : "text" - }, - "active_ingred_unit" : { + }, + "active_numerator_strength": { + "type": "text" + }, + "active_ingred_unit": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "pharm_classes" : { + }, + "pharm_classes": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "deaschedule" : { + }, + "deaschedule": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "package" : { - "properties" : { - "packagedescription" : { + }, + "package": { + "properties": { + "packagedescription": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, - "ndcpackagecode" : { - "type" : "text" - } + }, + "ndcpackagecode": { + "type": "text" } } } } } + } return mapping diff --git a/src/hub/dataload/sources/pharmgkb/pharmgkb_upload.py b/src/hub/dataload/sources/pharmgkb/pharmgkb_upload.py index 504e14b3..18c3d43f 100644 --- a/src/hub/dataload/sources/pharmgkb/pharmgkb_upload.py +++ b/src/hub/dataload/sources/pharmgkb/pharmgkb_upload.py @@ -3,18 +3,20 @@ """ # pylint: disable=E0401, E0611 import os + import biothings.hub.dataload.storage as storage + from hub.dataload.uploader import BaseDrugUploader from hub.datatransform.keylookup import MyChemKeyLookup -from .pharmgkb_parser import load_data +from .pharmgkb_parser import load_data SRC_META = { "url": 'https://www.pharmgkb.org/', "license_url": "https://www.pharmgkb.org/page/dataUsagePolicy", "license_url_short": "http://bit.ly/2zqM8aJ", "license": "CC BY-SA 4.0" - } +} class PharmGkbUploader(BaseDrugUploader): @@ -24,7 +26,7 @@ class PharmGkbUploader(BaseDrugUploader): name = "pharmgkb" storage_class = storage.RootKeyMergerStorage - __metadata__ = {"src_meta" : SRC_META} + __metadata__ = {"src_meta": SRC_META} keylookup = MyChemKeyLookup( [('inchi', 'pharmgkb.inchi'), ('pubchem', 'pharmgkb.xrefs.pubchem.cid'), @@ -35,13 +37,14 @@ def load_data(self, data_folder): """load_data method""" self.logger.info("Load data from '%s'" % data_folder) input_file = os.path.join(data_folder, "drugs.tsv") - assert os.path.exists(input_file), "Can't find input file '%s'" % input_file + assert os.path.exists( + input_file), "Can't find input file '%s'" % input_file return self.keylookup(load_data)(input_file) - def post_update_data(self,*args,**kwargs): + def post_update_data(self, *args, **kwargs): field = "pharmgkb.id" self.logger.info("Indexing '%s'" % field) - self.collection.create_index(field,background=True) + self.collection.create_index(field, background=True) @classmethod def get_mapping(cls): @@ -53,27 +56,27 @@ def get_mapping(cls): "normalizer": "keyword_lowercase_normalizer", "type": "keyword", 'copy_to': ['all'], - }, + }, "dosing_guideline": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "inchi": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "name": { "type": "text", - 'copy_to': ['all'], - }, + 'copy_to': ['all', 'name'], + }, "smiles": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "generic_names": { "type": "text", 'copy_to': ['all'], - }, + }, 'brand_mixtures': { 'properties': { 'brand_name': { @@ -86,143 +89,143 @@ def get_mapping(cls): }, "trade_names": { "type": "text" - }, + }, "type": { "type": "text" - }, + }, "xrefs": { "properties": { "web_resource": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "uniprotkb": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "pubchem": { "properties": { "sid": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "cid": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - } } - }, + } + }, "het": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "wikipedia": { "properties": { "url_stub": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - } } - }, + } + }, "iuphar_ligand": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "meddra": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "atc": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "kegg_compound": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "umls": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "clinicaltrials": { "properties": { "gov": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - } } - }, + } + }, "genbank": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "rxnorm": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "chebi": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "cas": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "ttd": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "kegg_drug": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "mesh": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "ndc": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "chemspider": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "hmdb": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "dailymed": { "properties": { "setid": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - } } - }, + } + }, "ndfrt": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "bindingdb": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "drugbank": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "pdb": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - }, + }, "dpd": { "normalizer": "keyword_lowercase_normalizer", "type": "keyword", - } } } } } } + } return mapping diff --git a/src/hub/dataload/sources/umls/umls_upload.py b/src/hub/dataload/sources/umls/umls_upload.py index 38f988b2..30ed67c2 100644 --- a/src/hub/dataload/sources/umls/umls_upload.py +++ b/src/hub/dataload/sources/umls/umls_upload.py @@ -1,11 +1,12 @@ -from .umls_parser import load_data import biothings.hub.dataload.uploader as uploader +from .umls_parser import load_data SRC_META = { - "url": 'https://www.nlm.nih.gov/research/umls/index.html', - "license_url": "https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/license_agreement.html" - } + "url": 'https://www.nlm.nih.gov/research/umls/index.html', + "license_url": "https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/license_agreement.html" +} + class UMLSUploader(uploader.BaseSourceUploader): @@ -33,7 +34,7 @@ def get_mapping(klass): "name": { "type": "keyword", "normalizer": "keyword_lowercase_normalizer", - "copy_to": ["all"] + "copy_to": ["all", "name"] } } } diff --git a/src/hub/dataload/sources/unii/unii_upload.py b/src/hub/dataload/sources/unii/unii_upload.py index 7e650f8f..a88a905b 100644 --- a/src/hub/dataload/sources/unii/unii_upload.py +++ b/src/hub/dataload/sources/unii/unii_upload.py @@ -1,126 +1,128 @@ -import os import glob +import os -from .unii_parser import load_data -from hub.dataload.uploader import BaseDrugUploader import biothings.hub.dataload.storage as storage - from biothings.hub.datatransform import DataTransformMDB + +from hub.dataload.uploader import BaseDrugUploader from hub.datatransform.keylookup import MyChemKeyLookup +from .unii_parser import load_data SRC_META = { - "url": 'https://precision.fda.gov/uniisearch', - "license": "public domain", - "license_url" : "https://www.nlm.nih.gov/web_policies.html", - "license_url_short": "http://bit.ly/2Pg8Oo9" - } + "url": 'https://precision.fda.gov/uniisearch', + "license": "public domain", + "license_url": "https://www.nlm.nih.gov/web_policies.html", + "license_url_short": "http://bit.ly/2Pg8Oo9" +} class UniiUploader(BaseDrugUploader): name = "unii" storage_class = storage.IgnoreDuplicatedStorage - __metadata__ = {"src_meta" : SRC_META} + __metadata__ = {"src_meta": SRC_META} keylookup = MyChemKeyLookup([('inchikey', 'unii.inchikey'), - ('pubchem', 'unii.pubchem'), - ('unii', 'unii.unii')], - copy_from_doc=True, - ) + ('pubchem', 'unii.pubchem'), + ('unii', 'unii.unii')], + copy_from_doc=True, + ) - def load_data(self,data_folder): + def load_data(self, data_folder): self.logger.info("Load data from '%s'" % data_folder) - record_files = glob.glob(os.path.join(data_folder,"*Records*.txt")) - assert len(record_files) == 1, "Expecting one record.txt file, got %s" % repr(record_files) + record_files = glob.glob(os.path.join(data_folder, "*Records*.txt")) + assert len(record_files) == 1, "Expecting one record.txt file, got %s" % repr( + record_files) input_file = record_files.pop() - assert os.path.exists(input_file), "Can't find input file '%s'" % input_file + assert os.path.exists( + input_file), "Can't find input file '%s'" % input_file # disable keylookup - unii is a base collection used for drugname lookup # and should be loaded first, (keylookup commented out) return self.keylookup(load_data)(input_file) # return load_data(input_file) - def post_update_data(self,*args,**kwargs): - for field in ("unii.unii","unii.preferred_term"): + def post_update_data(self, *args, **kwargs): + for field in ("unii.unii", "unii.preferred_term"): self.logger.info("Indexing '%s'" % field) - self.collection.create_index(field,background=True) + self.collection.create_index(field, background=True) @classmethod def get_mapping(klass): mapping = { - "unii": { - "properties": { - "unii": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - 'copy_to': ['all'], - }, - "preferred_term": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "registry_number": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "ec": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "ncit": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "rxcui": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "itis": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "ncbi": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "plants": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "grin": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "inn_id": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "molecular_formula": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "inchikey": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "smiles": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "ingredient_type": { - "type": "text" - }, - "pubchem": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - }, - "mpns": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword", - } - } + "unii": { + "properties": { + "unii": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + 'copy_to': ['all'], + }, + "display_name": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + "copy_to": ["name"] + }, + "registry_number": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "ec": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "ncit": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "rxcui": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "itis": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "ncbi": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "plants": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "grin": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "inn_id": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "molecular_formula": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "inchikey": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "smiles": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "ingredient_type": { + "type": "text" + }, + "pubchem": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + }, + "mpns": { + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword", + } + } } } return mapping - diff --git a/src/plugins/fda_orphan_drug/mapping.py b/src/plugins/fda_orphan_drug/mapping.py index dd7bc88f..1a973080 100644 --- a/src/plugins/fda_orphan_drug/mapping.py +++ b/src/plugins/fda_orphan_drug/mapping.py @@ -1,8 +1,8 @@ def get_customized_mapping(cls): mapping = { - "fda_orphan_drug": { - "properties": { - "designated_date": { + "fda_orphan_drug": { + "properties": { + "designated_date": { "type": "date" }, "designation_status": { @@ -10,70 +10,71 @@ def get_customized_mapping(cls): "type": "keyword" }, "orphan_designation": { - "properties": { - "original_text": { - "type": "text", - "copy_to": [ - "all" - ] - }, - "umls": { - "type": "text", - "copy_to": [ - "all" - ] - }, - "parsed_text": { - "type": "text", - "copy_to": [ - "all" - ] + "properties": { + "original_text": { + "type": "text", + "copy_to": [ + "all" + ] + }, + "umls": { + "type": "text", + "copy_to": [ + "all" + ] + }, + "parsed_text": { + "type": "text", + "copy_to": [ + "all" + ] + } } - } }, "marketing_approval_date": { - "type": "date" + "type": "date" }, "exclusivity_end_date": { - "type": "date" + "type": "date" }, "pubchem_cid": { - "type": "integer", - "copy_to": [ - "all" - ] + "type": "integer", + "copy_to": [ + "all" + ] }, "inchikey": { - "normalizer": "keyword_lowercase_normalizer", - "type": "keyword" + "normalizer": "keyword_lowercase_normalizer", + "type": "keyword" }, "trade_name": { - "type": "text" + "type": "text" }, "approved_labeled_indication": { - "type": "text" + "type": "text" }, "exclusivity_protected_indication": { - "type": "text" + "type": "text" }, "pubchem_sid": { - "type": "text" + "type": "text" }, "generic_name": { - "type": "text", - "copy_to": [ - "all" - ] + "type": "text", + "copy_to": [ + "all", + "name" + ] }, "approval_status": { - "type": "text" + "type": "text" }, "sponsor": { - "type": "text", - "index": "false" + "type": "text", + "index": "false" } } } - - } + + } return mapping