Skip to content

Commit

Permalink
Merge pull request #260 from bento-platform/develop
Browse files Browse the repository at this point in the history
Version 2.2.6
  • Loading branch information
zxenia authored Jul 30, 2021
2 parents 80b90bc + 63ff725 commit e151bee
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 21 deletions.
41 changes: 24 additions & 17 deletions chord_metadata_service/chord/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from chord_metadata_service.mcode.parse_fhir_mcode import parse_bundle
from chord_metadata_service.mcode.mcode_ingest import ingest_mcodepacket
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA
from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA

requests_unixsocket.monkeypatch()

Expand Down Expand Up @@ -224,6 +225,20 @@ class IngestError(Exception):
pass


def schema_validation(obj, schema):
v = jsonschema.Draft7Validator(schema, format_checker=jsonschema.FormatChecker())
try:
jsonschema.validate(obj, schema, format_checker=jsonschema.FormatChecker())
logger.info("JSON schema validation passed.")
return True
except jsonschema.exceptions.ValidationError:
errors = [e for e in v.iter_errors(obj)]
logger.info("JSON schema validation failed.")
for i, error in enumerate(errors, 1):
logger.error(f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}")
return False


def create_phenotypic_feature(pf):
pf_obj = pm.PhenotypicFeature(
description=pf.get("description", ""),
Expand Down Expand Up @@ -293,9 +308,14 @@ def ingest_resource(resource: dict) -> rm.Resource:
return rs_obj


def ingest_experiment(experiment_data, table_id) -> em.Experiment:
def ingest_experiment(experiment_data, table_id):
"""Ingests a single experiment."""

# validate experiment data against experiments schema
validation = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
if not validation:
return

new_experiment_id = experiment_data.get("id", str(uuid.uuid4()))
study_type = experiment_data.get("study_type")
experiment_type = experiment_data["experiment_type"]
Expand Down Expand Up @@ -349,22 +369,9 @@ def ingest_experiment(experiment_data, table_id) -> em.Experiment:
def ingest_phenopacket(phenopacket_data, table_id):
"""Ingests a single phenopacket."""

v = jsonschema.Draft7Validator(PHENOPACKET_SCHEMA, format_checker=jsonschema.FormatChecker())
try:
jsonschema.validate(
phenopacket_data,
PHENOPACKET_SCHEMA,
format_checker=jsonschema.FormatChecker(),
)
logger.info("JSON schema validation passed.")

except jsonschema.exceptions.ValidationError:
errors = [e for e in v.iter_errors(phenopacket_data)]
logger.info(f"JSON schema validation failed. Errors: {errors}")
for i, error in enumerate(errors, 1):
logger.error(
f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}",
)
# validate phenopackets data against phenopacket schema
validation = schema_validation(phenopacket_data, PHENOPACKET_SCHEMA)
if not validation:
return

new_phenopacket_id = phenopacket_data.get("id", str(uuid.uuid4()))
Expand Down
7 changes: 6 additions & 1 deletion chord_metadata_service/chord/tests/example_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@


__all__ = ["EXAMPLE_INGEST_PHENOPACKET", "EXAMPLE_INGEST_OUTPUTS",
"EXAMPLE_INGEST_EXPERIMENT", "EXAMPLE_INGEST_OUTPUTS_EXPERIMENT"]
"EXAMPLE_INGEST_EXPERIMENT", "EXAMPLE_INGEST_OUTPUTS_EXPERIMENT",
"EXAMPLE_INGEST_INVALID_PHENOPACKET"]

with open(os.path.join(os.path.dirname(__file__), "example_phenopacket.json"), "r") as pf:
EXAMPLE_INGEST_PHENOPACKET = json.load(pf)
Expand All @@ -19,3 +20,7 @@
EXAMPLE_INGEST_OUTPUTS_EXPERIMENT = {
"json_document": os.path.join(os.path.dirname(__file__), "example_experiment.json"),
}


with open(os.path.join(os.path.dirname(__file__), "example_invalid_phenopacket.json"), "r") as pf:
EXAMPLE_INGEST_INVALID_PHENOPACKET = json.load(pf)
232 changes: 232 additions & 0 deletions chord_metadata_service/chord/tests/example_invalid_phenopacket.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
{
"subject": {
"id": "patient1",
"date_of_birth": "1964-03-15T00:00:00Z",
"sex": "MALE",
"age": "P50Y",
"karyotypic_sex": "UNKNOWN_KARYOTYPE"
},
"phenotypic_features": [
{
"description": "",
"type": {
"id": "HP:0000790",
"label": "Hematuria"
},
"negated": false,
"modifier": []
},
{
"description": "",
"type": {
"id": "HP:0100518",
"label": "Dysuria"
},
"negated": false,
"severity": {
"id": "HP:0012828",
"label": "Severe"
},
"modifier": []
}
],
"diseases": [
{
"term": {
"id": "NCIT:C39853",
"label": "Infiltrating Urothelial Carcinoma"
},
"disease_stage": [
{
"id": "NCIT:C48766",
"label": "pT2b Stage Finding"
},
{
"id": "NCIT:C48750",
"label": "pN2 Stage Finding"
}
]
}
],
"meta_data": {
"created": "2019-04-03T15:31:40.765Z",
"created_by": "Peter R",
"submitted_by": "Peter R",
"resources": [
{
"id": "HP:2019-04-08",
"name": "human phenotype ontology",
"namespace_prefix": "HP",
"url": "http://purl.obolibrary.org/obo/hp.owl",
"version": "2019-04-08",
"iri_prefix": "http://purl.obolibrary.org/obo/HP_"
},
{
"id": "UBERON:2019-03-08",
"name": "uber anatomy ontology",
"namespace_prefix": "UBERON",
"url": "http://purl.obolibrary.org/obo/uberon.owl",
"version": "2019-03-08",
"iri_prefix": "http://purl.obolibrary.org/obo/UBERON_"
},
{
"id": "NCIT:18.05d",
"name": "NCI Thesaurus OBO Edition",
"namespace_prefix": "NCIT",
"url": "http://purl.obolibrary.org/obo/ncit.owl",
"version": "18.05d",
"iri_prefix": "http://purl.obolibrary.org/obo/NCIT_"
}
],
"updated": [],
"external_references": [
{
"id": "PMID:29221636",
"description": "Urothelial neoplasms in pediatric and young adult patients: A large single-center series"
}
]
},
"biosamples": [
{
"id": "sample1",
"individual_id": "patient1",
"description": "",
"sampled_tissue": {
"id": "UBERON_0001256",
"label": "wall of urinary bladder"
},
"phenotypic_features": [],
"individual_age_at_collection": {
"age": "P52Y2M"
},
"histological_diagnosis": {
"id": "NCIT:C39853",
"label": "Infiltrating Urothelial Carcinoma"
},
"tumor_progression": {
"id": "NCIT:C84509",
"label": "Primary Malignant Neoplasm"
},
"diagnostic_markers": [],
"procedure": {
"code": {
"id": "NCIT:C5189",
"label": "Radical Cystoprostatectomy"
}
},
"is_control_sample": false
},
{
"id": "sample2",
"individual_id": "patient1",
"description": "",
"sampled_tissue": {
"id": "UBERON:0002367",
"label": "prostate gland"
},
"phenotypic_features": [],
"individual_age_at_collection": {
"age": "P52Y2M"
},
"histological_diagnosis": {
"id": "NCIT:C5596",
"label": "Prostate Acinar Adenocarcinoma"
},
"tumor_progression": {
"id": "NCIT:C95606",
"label": "Second Primary Malignant Neoplasm"
},
"tumor_grade": {
"id": "NCIT:C28091",
"label": "Gleason Score 7"
},
"disease_stage": [],
"diagnostic_markers": [],
"procedure": {
"code": {
"id": "NCIT:C15189",
"label": "Biopsy"
}
},
"is_control_sample": false
},
{
"id": "sample3",
"individual_id": "patient1",
"description": "",
"sampled_tissue": {
"id": "UBERON:0001223",
"label": "left ureter"
},
"phenotypic_features": [],
"individual_age_at_collection": {
"age": "P52Y2M"
},
"histological_diagnosis": {
"id": "NCIT:C38757",
"label": "Negative Finding"
},
"disease_stage": [],
"diagnostic_markers": [],
"procedure": {
"code": {
"id": "NCIT:C15189",
"label": "Biopsy"
}
},
"is_control_sample": false
},
{
"id": "sample4",
"individual_id": "patient1",
"description": "",
"sampled_tissue": {
"id": "UBERON:0001222",
"label": "right ureter"
},
"phenotypic_features": [],
"individual_age_at_collection": {
"age": "P52Y2M"
},
"histological_diagnosis": {
"id": "NCIT:C38757",
"label": "Negative Finding"
},
"disease_stage": [],
"diagnostic_markers": [],
"procedure": {
"code": {
"id": "NCIT:C15189",
"label": "Biopsy"
}
},
"is_control_sample": false
},
{
"id": "sample5",
"individual_id": "patient1",
"description": "",
"sampled_tissue": {
"id": "UBERON:0015876",
"label": "pelvic lymph node"
},
"phenotypic_features": [],
"individual_age_at_collection": {
"age": "P52Y2M"
},
"tumor_progression": {
"id": "NCIT:C3261",
"label": "Metastatic Neoplasm"
},
"disease_stage": [],
"diagnostic_markers": [],
"procedure": {
"code": {
"id": "NCIT:C15189",
"label": "Biopsy"
}
},
"is_control_sample": false
}
]
}
21 changes: 19 additions & 2 deletions chord_metadata_service/chord/tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,23 @@
WORKFLOW_PHENOPACKETS_JSON,
create_phenotypic_feature,
WORKFLOW_INGEST_FUNCTION_MAP,
WORKFLOW_EXPERIMENTS_JSON
WORKFLOW_EXPERIMENTS_JSON,
schema_validation
)
from chord_metadata_service.phenopackets.models import PhenotypicFeature, Phenopacket
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA
from chord_metadata_service.resources.models import Resource
from chord_metadata_service.experiments.models import Experiment, ExperimentResult, Instrument
from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA


from .constants import VALID_DATA_USE_1
from .example_ingest import (
EXAMPLE_INGEST_PHENOPACKET,
EXAMPLE_INGEST_OUTPUTS,
EXAMPLE_INGEST_EXPERIMENT,
EXAMPLE_INGEST_OUTPUTS_EXPERIMENT
EXAMPLE_INGEST_OUTPUTS_EXPERIMENT,
EXAMPLE_INGEST_INVALID_PHENOPACKET,
)


Expand Down Expand Up @@ -90,6 +95,18 @@ def test_ingesting_phenopackets_json(self):
self.assertNotEqual(p.id, p2.id)
# TODO: More

def test_ingesting_invalid_phenopackets_json(self):
# check invalid phenopacket, must fail validation
validation = schema_validation(EXAMPLE_INGEST_INVALID_PHENOPACKET, PHENOPACKET_SCHEMA)
self.assertEqual(validation, False)
# valid phenopacket passes validation
validation_2 = schema_validation(EXAMPLE_INGEST_PHENOPACKET, PHENOPACKET_SCHEMA)
self.assertEqual(validation_2, True)
# valid experiments pass validation
for exp in EXAMPLE_INGEST_EXPERIMENT["experiments"]:
validation_3 = schema_validation(exp, EXPERIMENT_SCHEMA)
self.assertEqual(validation_3, True)

def test_ingesting_experiments_json(self):
# ingest phenopackets data in order to match to biosample ids
p = WORKFLOW_INGEST_FUNCTION_MAP[WORKFLOW_PHENOPACKETS_JSON](EXAMPLE_INGEST_OUTPUTS, self.t.identifier)
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/package.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[package]
name = katsu
version = 2.2.5
version = 2.2.6
authors = Ksenia Zaytseva, David Lougheed, Simon Chénard, Romain Grégoire

0 comments on commit e151bee

Please sign in to comment.