From 11012974b68c91e5f7c46b435a3665271b67a599 Mon Sep 17 00:00:00 2001 From: zxenia Date: Wed, 28 Jul 2021 15:02:54 -0400 Subject: [PATCH 1/2] add schema validation in phenopacket ingestion --- chord_metadata_service/chord/ingest.py | 22 ++++++++++++++++++- .../chord/tests/example_phenopacket.json | 6 ++--- .../phenopackets/schemas.py | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/chord_metadata_service/chord/ingest.py b/chord_metadata_service/chord/ingest.py index 3f82af9cd..a43ed677a 100644 --- a/chord_metadata_service/chord/ingest.py +++ b/chord_metadata_service/chord/ingest.py @@ -8,6 +8,7 @@ import shutil import tempfile import uuid +import jsonschema from dateutil.parser import isoparse from typing import Callable @@ -30,6 +31,7 @@ ) from chord_metadata_service.mcode.parse_fhir_mcode import parse_bundle from chord_metadata_service.mcode.mcode_ingest import ingest_mcodepacket +from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA requests_unixsocket.monkeypatch() @@ -344,9 +346,27 @@ def ingest_experiment(experiment_data, table_id) -> em.Experiment: return new_experiment -def ingest_phenopacket(phenopacket_data, table_id) -> pm.Phenopacket: +def ingest_phenopacket(phenopacket_data, table_id): """Ingests a single phenopacket.""" + v = jsonschema.Draft7Validator(PHENOPACKET_SCHEMA, format_checker=jsonschema.FormatChecker()) + try: + jsonschema.validate( + phenopacket_data, + PHENOPACKET_SCHEMA, + format_checker=jsonschema.FormatChecker(), + ) + logger.info("JSON schema validation passed.") + + except jsonschema.exceptions.ValidationError: + errors = [e for e in v.iter_errors(phenopacket_data)] + logger.info(f"JSON schema validation failed. Errors: {errors}") + for i, error in enumerate(errors, 1): + logger.error( + f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}", + ) + return + new_phenopacket_id = phenopacket_data.get("id", str(uuid.uuid4())) subject = phenopacket_data.get("subject") diff --git a/chord_metadata_service/chord/tests/example_phenopacket.json b/chord_metadata_service/chord/tests/example_phenopacket.json index 10d1d8f87..2da1a3dcc 100644 --- a/chord_metadata_service/chord/tests/example_phenopacket.json +++ b/chord_metadata_service/chord/tests/example_phenopacket.json @@ -13,8 +13,7 @@ "label": "Hematuria" }, "negated": false, - "modifier": [], - "evidence": [] + "modifier": [] }, { "description": "", @@ -27,8 +26,7 @@ "id": "HP:0012828", "label": "Severe" }, - "modifier": [], - "evidence": [] + "modifier": [] } ], "diseases": [ diff --git a/chord_metadata_service/phenopackets/schemas.py b/chord_metadata_service/phenopackets/schemas.py index 33817646b..7667891d2 100644 --- a/chord_metadata_service/phenopackets/schemas.py +++ b/chord_metadata_service/phenopackets/schemas.py @@ -360,5 +360,5 @@ "meta_data": PHENOPACKET_META_DATA_SCHEMA, "extra_properties": EXTRA_PROPERTIES_SCHEMA }, - "required": ["id", "meta_data"], + "required": ["meta_data"], }, descriptions.PHENOPACKET) From 9d93e48e8391548f3a29abda04f024a481720414 Mon Sep 17 00:00:00 2001 From: zxenia Date: Wed, 28 Jul 2021 15:16:55 -0400 Subject: [PATCH 2/2] update version in package.cfg --- chord_metadata_service/package.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/package.cfg b/chord_metadata_service/package.cfg index 49b50469d..b4b56f60e 100644 --- a/chord_metadata_service/package.cfg +++ b/chord_metadata_service/package.cfg @@ -1,4 +1,4 @@ [package] name = katsu -version = 2.2.0 +version = 2.2.1 authors = Ksenia Zaytseva, David Lougheed, Simon Chénard, Romain Grégoire