diff --git a/chord_metadata_service/chord/ingest.py b/chord_metadata_service/chord/ingest.py index cd678a929..b9ed56f15 100644 --- a/chord_metadata_service/chord/ingest.py +++ b/chord_metadata_service/chord/ingest.py @@ -11,7 +11,7 @@ import jsonschema from dateutil.parser import isoparse -from typing import Callable +from typing import Callable, Optional from urllib.parse import urlparse from django.conf import settings @@ -485,17 +485,27 @@ def ingest_experiment(experiment_data, table_id): extraction_protocol = experiment_data.get("extraction_protocol") reference_registry_id = experiment_data.get("reference_registry_id") qc_flags = experiment_data.get("qc_flags", []) - biosample = experiment_data.get("biosample") + biosample_id = experiment_data.get("biosample") experiment_results = experiment_data.get("experiment_results", []) instrument = experiment_data.get("instrument", {}) extra_properties = experiment_data.get("extra_properties", {}) + + biosample: Optional[pm.Biosample] = None + # get existing biosample id - if biosample is not None: - biosample = pm.Biosample.objects.get(id=biosample) # TODO: Handle error nicer + if biosample_id is not None: + try: + biosample = pm.Biosample.objects.get(id=biosample_id) # TODO: Handle error nicer + except pm.Biosample.DoesNotExist as e: + logger.error(f"Could not find biosample with ID: {biosample_id}") + raise e + # create related experiment results experiment_results_db = [create_experiment_result(er) for er in experiment_results] + # create related instrument instrument_db = create_instrument(instrument) + # create new experiment new_experiment = em.Experiment.objects.create( id=new_experiment_id, diff --git a/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json b/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json new file mode 100644 index 000000000..b1f051378 --- /dev/null +++ b/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json @@ -0,0 +1,78 @@ +{ + "experiments": [ + { + "id": "experiment:1", + "biosample": "sample1ohno!", + "study_type": "Epigenomics", + "experiment_type": "Other", + "experiment_ontology": [ + { + "id": "http://www.ebi.ac.uk/efo/EFO_0002692", + "label": "ChIP-seq" + } + ], + "library_strategy": "ChIP-Seq", + "library_source": "Genomic", + "library_selection": "Random", + "library_layout": "Single", + "extraction_protocol": "NGS", + "molecule": "genomic DNA", + "molecule_ontology": [ + { + "id": "SO:0000991", + "label": "genomic DNA" + } + ], + "experiment_results": [ + { + "identifier": "sample1_01", + "description": "test", + "filename": "sample1_01.vcf.gz", + "file_format": "VCF", + "data_output_type": "Derived data", + "usage": "Visualized", + "creation_date": "01-09-2021", + "created_by": "Admin", + "extra_properties": { + "test": "test" + } + }, + { + "identifier": "sample1_02", + "description": "test2", + "filename": "sample1_02.vcf.gz", + "file_format": "CRAM", + "data_output_type": "Raw data", + "usage": "Visualized", + "creation_date": "01-09-2021", + "created_by": "Admin", + "extra_properties": { + "test": "test" + } + } + ], + "instrument": { + "identifier": "instrument:01", + "platform": "Illumina", + "description": "Test description", + "model": "Illumina HiSeq 4000", + "extra_properties": { + "date": "2021-06-21" + } + }, + "extra_properties": { + "date_uploaded": "2021-03-16" + } + } + ], + "resources": [ + { + "name": "Sequence types and features ontology", + "version": "2021-02-16", + "namespace_prefix": "SO", + "id": "SO:2021-02-16", + "iri_prefix": "http://purl.obolibrary.org/obo/so.owl#", + "url": "http://purl.obolibrary.org/obo/so.owl" + } + ] +} \ No newline at end of file diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py index e061539f8..9aff50ec7 100644 --- a/chord_metadata_service/chord/tests/example_ingest.py +++ b/chord_metadata_service/chord/tests/example_ingest.py @@ -2,10 +2,17 @@ import os -__all__ = ["EXAMPLE_INGEST_PHENOPACKET", "EXAMPLE_INGEST_OUTPUTS", - "EXAMPLE_INGEST_EXPERIMENT", "EXAMPLE_INGEST_OUTPUTS_EXPERIMENT", - "EXAMPLE_INGEST_INVALID_EXPERIMENT", "EXAMPLE_INGEST_INVALID_PHENOPACKET", - "EXAMPLE_INGEST_MULTIPLE_PHENOPACKETS", "EXAMPLE_INGEST_MULTIPLE_OUTPUTS"] +__all__ = [ + "EXAMPLE_INGEST_PHENOPACKET", + "EXAMPLE_INGEST_OUTPUTS", + "EXAMPLE_INGEST_EXPERIMENT", + "EXAMPLE_INGEST_OUTPUTS_EXPERIMENT", + "EXAMPLE_INGEST_OUTPUTS_EXPERIMENT_BAD_BIOSAMPLE", + "EXAMPLE_INGEST_INVALID_EXPERIMENT", + "EXAMPLE_INGEST_INVALID_PHENOPACKET", + "EXAMPLE_INGEST_MULTIPLE_PHENOPACKETS", + "EXAMPLE_INGEST_MULTIPLE_OUTPUTS", +] with open(os.path.join(os.path.dirname(__file__), "example_phenopacket.json"), "r") as pf: EXAMPLE_INGEST_PHENOPACKET = json.load(pf) @@ -22,6 +29,10 @@ "json_document": os.path.join(os.path.dirname(__file__), "example_experiment.json"), } +EXAMPLE_INGEST_OUTPUTS_EXPERIMENT_BAD_BIOSAMPLE = { + "json_document": os.path.join(os.path.dirname(__file__), "example_experiment_bad_biosample.json"), +} + with open(os.path.join(os.path.dirname(__file__), "example_invalid_experiment.json"), "r") as pf: EXAMPLE_INGEST_INVALID_EXPERIMENT = json.load(pf) diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py index 058e03397..15aa2fc6c 100644 --- a/chord_metadata_service/chord/tests/test_ingest.py +++ b/chord_metadata_service/chord/tests/test_ingest.py @@ -15,7 +15,7 @@ WORKFLOW_EXPERIMENTS_JSON, schema_validation ) -from chord_metadata_service.phenopackets.models import PhenotypicFeature, Phenopacket +from chord_metadata_service.phenopackets.models import Biosample, PhenotypicFeature, Phenopacket from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA from chord_metadata_service.resources.models import Resource from chord_metadata_service.experiments.models import Experiment, ExperimentResult, Instrument @@ -30,6 +30,7 @@ EXAMPLE_INGEST_OUTPUTS, EXAMPLE_INGEST_EXPERIMENT, EXAMPLE_INGEST_OUTPUTS_EXPERIMENT, + EXAMPLE_INGEST_OUTPUTS_EXPERIMENT_BAD_BIOSAMPLE, EXAMPLE_INGEST_EXPERIMENT_RESULT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_MULTIPLE_OUTPUTS, @@ -118,28 +119,40 @@ def test_ingesting_experiments_json(self): # ingest phenopackets data in order to match to biosample ids p = WORKFLOW_INGEST_FUNCTION_MAP[WORKFLOW_PHENOPACKETS_JSON](EXAMPLE_INGEST_OUTPUTS, self.t.identifier) self.assertEqual(p.id, Phenopacket.objects.get(id=p.id).id) + # ingest list of experiments experiments = WORKFLOW_INGEST_FUNCTION_MAP[WORKFLOW_EXPERIMENTS_JSON]( EXAMPLE_INGEST_OUTPUTS_EXPERIMENT, self.t_exp.identifier ) + # experiments self.assertEqual(len(experiments), Experiment.objects.all().count()) self.assertEqual(experiments[0].id, EXAMPLE_INGEST_EXPERIMENT["experiments"][0]["id"]) self.assertEqual(experiments[0].biosample.id, EXAMPLE_INGEST_EXPERIMENT["experiments"][0]["biosample"]) self.assertEqual(experiments[0].experiment_type, EXAMPLE_INGEST_EXPERIMENT["experiments"][0]["experiment_type"]) + # experiment results self.assertEqual(experiments[0].experiment_results.count(), ExperimentResult.objects.all().count()) + # instrument self.assertEqual(Instrument.objects.all().count(), 1) + # resources for experiments - # check that experiments resource is in database + # - check that experiments resource is in database self.assertIn(EXAMPLE_INGEST_EXPERIMENT["resources"][0]["id"], [v["id"] for v in Resource.objects.values("id")]) + # try ingesting the file with an invalid biosample ID + with self.assertRaises(Biosample.DoesNotExist): + WORKFLOW_INGEST_FUNCTION_MAP[WORKFLOW_EXPERIMENTS_JSON]( + EXAMPLE_INGEST_OUTPUTS_EXPERIMENT_BAD_BIOSAMPLE, self.t_exp.identifier + ) + def test_ingesting_invalid_experiment_json(self): # check invalid experiment, must fail validation for exp in EXAMPLE_INGEST_INVALID_EXPERIMENT["experiments"]: validation = schema_validation(exp, EXPERIMENT_SCHEMA) self.assertEqual(validation, False) + # check valid experiment, must pass validation for exp in EXAMPLE_INGEST_EXPERIMENT["experiments"]: validation_2 = schema_validation(exp, EXPERIMENT_SCHEMA) diff --git a/chord_metadata_service/package.cfg b/chord_metadata_service/package.cfg index 9170268b0..776531ed5 100644 --- a/chord_metadata_service/package.cfg +++ b/chord_metadata_service/package.cfg @@ -1,4 +1,4 @@ [package] name = katsu -version = 2.15.0 +version = 2.15.1 authors = Ksenia Zaytseva, David Lougheed, Simon Chénard, Romain Grégoire, Paul Pillot, Son Chau