From 289492bdba4010f93733a9c6ae58bf29f37fd7d5 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 10:55:42 -0500 Subject: [PATCH 01/15] Try to fix ingest null handling in get_or_create contexts --- chord_metadata_service/chord/views_ingest.py | 60 ++++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py index 13be51e15..98c5f8cf0 100644 --- a/chord_metadata_service/chord/views_ingest.py +++ b/chord_metadata_service/chord/views_ingest.py @@ -15,6 +15,8 @@ from chord_lib.responses.errors import * from chord_lib.workflows import get_workflow, get_workflow_resource, workflow_exists +from typing import Callable + from chord_metadata_service.chord.models import * from chord_metadata_service.phenopackets.models import * @@ -149,6 +151,11 @@ def ingest(request): return Response(status=204) +def _query_and_check_nulls(obj: dict, key: str, transform: Callable = lambda x: x): + value = obj.get(key, None) + return {f"{key}__isnull": True} if value is None else {key: transform(value)} + + def ingest_phenopacket(phenopacket_data, table_id): """ Ingests one phenopacket. """ @@ -162,15 +169,10 @@ def ingest_phenopacket(phenopacket_data, table_id): meta_data = phenopacket_data["meta_data"] if subject: - subject, _ = Individual.objects.get_or_create( - id=subject["id"], - alternate_ids=subject.get("alternate_ids", None), - date_of_birth=isoparse(subject["date_of_birth"]) if "date_of_birth" in subject else None, - age=subject.get("age", ""), # TODO: Shouldn't this be nullable, since it's recommended in the spec? - sex=subject.get("sex", None), - karyotypic_sex=subject.get("karyotypic_sex", None), - taxonomy=subject.get("taxonomy", None) - ) + subject_query = _query_and_check_nulls(subject, "date_of_birth", transform=isoparse) + for k in ("alternate_ids", "age", "sex", "karyotypic_sex", "taxonomy"): + subject_query.update(_query_and_check_nulls(subject, k)) + subject, _ = Individual.objects.get_or_create(id=subject["id"], **subject_query) phenotypic_features_db = [create_phenotypic_feature(pf) for pf in phenotypic_features] @@ -178,26 +180,26 @@ def ingest_phenopacket(phenopacket_data, table_id): for bs in biosamples: # TODO: This should probably be a JSON field, or compound key with code/body_site procedure, _ = Procedure.objects.get_or_create(**bs["procedure"]) - bs_pfs = [create_phenotypic_feature(pf) for pf in bs.get("phenotypic_features", [])] - bs_obj, _ = Biosample.objects.get_or_create( + bs_query = _query_and_check_nulls(bs, "individual_id", lambda i: Individual.objects.get(id=i)) + for k in ("sampled_issue", "taxonomy", "individual_age_at_collection", "histological_diagnosis", + "tumor_progression", "tumor_grade"): + bs_query.update(_query_and_check_nulls(bs, k)) + + bs_obj, bs_created = Biosample.objects.get_or_create( id=bs["id"], - individual=(Individual.objects.get(id=bs["individual_id"]) - if "individual_id" in bs else None), description=bs.get("description", ""), - sampled_tissue=bs.get("sampled_tissue", None), - taxonomy=bs.get("taxonomy", None), - individual_age_at_collection=bs.get("individual_age_at_collection", None), - histological_diagnosis=bs.get("histological_diagnosis", None), - tumor_progression=bs.get("tumor_progression", None), - tumor_grade=bs.get("tumor_grade", None), procedure=procedure, is_control_sample=bs.get("is_control_sample", False), - - diagnostic_markers=bs.get("diagnostic_markers", []) + diagnostic_markers=bs.get("diagnostic_markers", []), + **bs_query ) - bs_obj.phenotypic_features.set(bs_pfs) + if bs_created: + bs_pfs = [create_phenotypic_feature(pf) for pf in bs.get("phenotypic_features", [])] + bs_obj.phenotypic_features.set(bs_pfs) + + # TODO: Update phenotypic features otherwise? biosamples_db.append(bs_obj) @@ -206,24 +208,21 @@ def ingest_phenopacket(phenopacket_data, table_id): for g in genes: # TODO: Validate CURIE # TODO: Rename alternate_id - g_obj, _ = Gene.objects.get_or_create( id=g["id"], alternate_ids=g.get("alternate_ids", []), symbol=g["symbol"] ) - genes_db.append(g_obj) diseases_db = [] - for d in diseases: + for disease in diseases: # TODO: Primary key, should this be a model? - d_obj, _ = Disease.objects.get_or_create( - term=d["term"], - onset=d.get("onset", None), - disease_stage=d.get("disease_stage", []), - tnm_finding=d.get("tnm_finding", []) + term=disease["term"], + disease_stage=disease.get("disease_stage", []), + tnm_finding=disease.get("tnm_finding", []), + **_query_and_check_nulls(disease, "onset") ) diseases_db.append(d_obj.id) @@ -237,7 +236,6 @@ def ingest_phenopacket(phenopacket_data, table_id): version=rs["version"], iri_prefix=rs["iri_prefix"] ) - # rs_obj.save() resources_db.append(rs_obj) meta_data_obj = MetaData( From 9a0cea15017710d544d330c901f9730344b77472 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 11:14:09 -0500 Subject: [PATCH 02/15] Fix typo (sampled_issue -> sampled_tissue) --- chord_metadata_service/chord/views_ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py index 98c5f8cf0..df4d60d3a 100644 --- a/chord_metadata_service/chord/views_ingest.py +++ b/chord_metadata_service/chord/views_ingest.py @@ -182,7 +182,7 @@ def ingest_phenopacket(phenopacket_data, table_id): procedure, _ = Procedure.objects.get_or_create(**bs["procedure"]) bs_query = _query_and_check_nulls(bs, "individual_id", lambda i: Individual.objects.get(id=i)) - for k in ("sampled_issue", "taxonomy", "individual_age_at_collection", "histological_diagnosis", + for k in ("sampled_tissue", "taxonomy", "individual_age_at_collection", "histological_diagnosis", "tumor_progression", "tumor_grade"): bs_query.update(_query_and_check_nulls(bs, k)) From 0ba4ff9c0ccbe59248c17d2fe00561ef9ecf9f1b Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 11:14:19 -0500 Subject: [PATCH 03/15] Update single_req example --- examples/single_req.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/single_req.json b/examples/single_req.json index 526c445ae..2d4bae995 100644 --- a/examples/single_req.json +++ b/examples/single_req.json @@ -1,5 +1,5 @@ { - "dataset_id": "3df5e8b0-3949-4d3c-a37f-1c6a81940d50", + "table_id": "3df5e8b0-3949-4d3c-a37f-1c6a81940d50", "workflow_id": "phenopackets_json", "workflow_metadata": { "inputs": [ From 5494fefff6b1c7bceec4a7f057b376fed479cb71 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 12:06:03 -0500 Subject: [PATCH 04/15] Travis diagnostics --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 81041e10f..10ceeeed1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ addons: - postgresql-11 - postgresql-contrib-11 before_install: + - ls -a /var/run/postgres - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';" install: - pip install -r requirements.txt From d165b7ccd62a999028297b8d411954ac5534cce0 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 12:09:51 -0500 Subject: [PATCH 05/15] Travis diags cont'd --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 10ceeeed1..f1130cfc2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ addons: - postgresql-11 - postgresql-contrib-11 before_install: - - ls -a /var/run/postgres + - ls -a /var/run - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';" install: - pip install -r requirements.txt From 54538de89f63a2be87b750dd74ab8b0e5d1101a1 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 12:13:09 -0500 Subject: [PATCH 06/15] pt3 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f1130cfc2..c549aa73d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ addons: - postgresql-11 - postgresql-contrib-11 before_install: - - ls -a /var/run + - ls -a /var/run/postgresql - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';" install: - pip install -r requirements.txt From c22883d9bc46560bc67ac6b945814f0f7672b52a Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 12:16:49 -0500 Subject: [PATCH 07/15] Change travis postgres port --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index c549aa73d..0c27e4f49 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,13 +9,12 @@ addons: - postgresql-11 - postgresql-contrib-11 before_install: - - ls -a /var/run/postgresql - - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';" + - sudo -u postgres psql -U postgres -p 5432 -d postgres -c "alter user postgres with password 'hj38f3Ntr';" install: - pip install -r requirements.txt - pip install . script: - - export POSTGRES_USER="postgres" && export POSTGRES_PASSWORD="hj38f3Ntr" && export POSTGRES_PORT=5433 + - export POSTGRES_USER="postgres" && export POSTGRES_PASSWORD="hj38f3Ntr" && export POSTGRES_PORT=5432 - python3 -m coverage run ./manage.py test - codecov - rm -rf chord_metadata_service From 9bca906597bb472727e65e17189bfea92405bd02 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 14:32:13 -0500 Subject: [PATCH 08/15] Add ingest test --- .../chord/tests/example_ingest.py | 234 ++++++++++++++++++ .../chord/tests/test_api_ingest.py | 89 +++++++ .../chord/tests/test_ingest.py | 133 ++++------ 3 files changed, 375 insertions(+), 81 deletions(-) create mode 100644 chord_metadata_service/chord/tests/example_ingest.py create mode 100644 chord_metadata_service/chord/tests/test_api_ingest.py diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py new file mode 100644 index 000000000..16d56a4b2 --- /dev/null +++ b/chord_metadata_service/chord/tests/example_ingest.py @@ -0,0 +1,234 @@ +EXAMPLE_INGEST = { + "subject": { + "id": "patient1", + "date_of_birth": "1964-03-15T00:00:00Z", + "sex": "MALE", + "karyotypic_sex": "UNKNOWN_KARYOTYPE" + }, + "phenotypic_features": [ + { + "description": "", + "type": { + "id": "HP:0000790", + "label": "Hematuria" + }, + "negated": False, + "modifier": [], + "evidence": [] + }, + { + "description": "", + "type": { + "id": "HP:0100518", + "label": "Dysuria" + }, + "negated": False, + "severity": { + "id": "HP:0012828", + "label": "Severe" + }, + "modifier": [], + "evidence": [] + } + ], + "diseases": [ + { + "term": { + "id": "NCIT:C39853", + "label": "Infiltrating Urothelial Carcinoma" + }, + "disease_stage": [ + { + "id": "NCIT:C48766", + "label": "pT2b Stage Finding" + }, + { + "id": "NCIT:C48750", + "label": "pN2 Stage Finding" + } + ] + } + ], + "meta_data": { + "created": "2019-04-03T15:31:40.765Z", + "created_by": "Peter R", + "submitted_by": "Peter R", + "resources": [ + { + "id": "hp", + "name": "human phenotype ontology", + "namespace_prefix": "HP", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2019-04-08", + "iri_prefix": "http://purl.obolibrary.org/obo/HP_" + }, + { + "id": "uberon", + "name": "uber anatomy ontology", + "namespace_prefix": "UBERON", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2019-03-08", + "iri_prefix": "http://purl.obolibrary.org/obo/UBERON_" + }, + { + "id": "ncit", + "name": "NCI Thesaurus OBO Edition", + "namespace_prefix": "NCIT", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "18.05d", + "iri_prefix": "http://purl.obolibrary.org/obo/NCIT_" + } + ], + "updated": [], + "external_references": [ + { + "id": "PMID:29221636", + "description": "Urothelial neoplasms in pediatric and young adult patients: A large single-center " + "series" + } + ] + }, + "biosamples": [ + { + "id": "sample1", + "individual_id": "patient1", + "description": "", + "sampled_tissue": { + "id": "UBERON_0001256", + "label": "wall of urinary bladder" + }, + "phenotypic_features": [], + "individual_age_at_collection": { + "age": "P52Y2M" + }, + "histological_diagnosis": { + "id": "NCIT:C39853", + "label": "Infiltrating Urothelial Carcinoma" + }, + "tumor_progression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "diagnostic_markers": [], + "procedure": { + "code": { + "id": "NCIT:C5189", + "label": "Radical Cystoprostatectomy" + } + }, + "is_control_sample": False + }, + { + "id": "sample2", + "individual_id": "patient1", + "description": "", + "sampled_tissue": { + "id": "UBERON:0002367", + "label": "prostate gland" + }, + "phenotypic_features": [], + "individual_age_at_collection": { + "age": "P52Y2M" + }, + "histological_diagnosis": { + "id": "NCIT:C5596", + "label": "Prostate Acinar Adenocarcinoma" + }, + "tumor_progression": { + "id": "NCIT:C95606", + "label": "Second Primary Malignant Neoplasm" + }, + "tumor_grade": { + "id": "NCIT:C28091", + "label": "Gleason Score 7" + }, + "disease_stage": [], + "diagnostic_markers": [], + "procedure": { + "code": { + "id": "NCIT:C15189", + "label": "Biopsy" + } + }, + "is_control_sample": False + }, + { + "id": "sample3", + "individual_id": "patient1", + "description": "", + "sampled_tissue": { + "id": "UBERON:0001223", + "label": "left ureter" + }, + "phenotypic_features": [], + "individual_age_at_collection": { + "age": "P52Y2M" + }, + "histological_diagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "disease_stage": [], + "diagnostic_markers": [], + "procedure": { + "code": { + "id": "NCIT:C15189", + "label": "Biopsy" + } + }, + "is_control_sample": False + }, + { + "id": "sample4", + "individual_id": "patient1", + "description": "", + "sampled_tissue": { + "id": "UBERON:0001222", + "label": "right ureter" + }, + "phenotypic_features": [], + "individual_age_at_collection": { + "age": "P52Y2M" + }, + "histological_diagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "disease_stage": [], + "diagnostic_markers": [], + "procedure": { + "code": { + "id": "NCIT:C15189", + "label": "Biopsy" + } + }, + "is_control_sample": False + }, + { + "id": "sample5", + "individual_id": "patient1", + "description": "", + "sampled_tissue": { + "id": "UBERON:0015876", + "label": "pelvic lymph node" + }, + "phenotypic_features": [], + "individual_age_at_collection": { + "age": "P52Y2M" + }, + "tumor_progression": { + "id": "NCIT:C3261", + "label": "Metastatic Neoplasm" + }, + "disease_stage": [], + "diagnostic_markers": [], + "procedure": { + "code": { + "id": "NCIT:C15189", + "label": "Biopsy" + } + }, + "is_control_sample": False + } + ] +} diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py new file mode 100644 index 000000000..5e028e410 --- /dev/null +++ b/chord_metadata_service/chord/tests/test_api_ingest.py @@ -0,0 +1,89 @@ +import json + +from django.test import override_settings +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APITestCase +from uuid import uuid4 + +from .constants import * +from ..views_ingest import METADATA_WORKFLOWS + + +def generate_ingest(table_id): + return { + "table_id": table_id, + "workflow_id": "phenopackets_json", + "workflow_metadata": METADATA_WORKFLOWS["ingestion"]["phenopackets_json"], + "workflow_outputs": { + "json_document": "" # TODO + }, + "workflow_params": { + "json_document": "" # TODO + } + } + + +class WorkflowTest(APITestCase): + def test_workflows(self): + r = self.client.get(reverse("workflows"), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_200_OK) + self.assertDictEqual(r.json(), METADATA_WORKFLOWS) + + # Non-existent workflow + r = self.client.get(reverse("workflow-detail", args=("invalid_workflow",)), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) + + # Valid workflow + r = self.client.get(reverse("workflow-detail", args=("phenopackets_json",)), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_200_OK) + self.assertDictEqual(r.json(), METADATA_WORKFLOWS["ingestion"]["phenopackets_json"]) + + # Non-existent workflow file + r = self.client.get(reverse("workflow-file", args=("invalid_workflow",)), content_type="text/plain") + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) + + # Valid workflow file + r = self.client.get(reverse("workflow-file", args=("phenopackets_json",)), content_type="text/plain") + self.assertEqual(r.status_code, status.HTTP_200_OK) + # TODO: Check file contents + + +class IngestTest(APITestCase): + @override_settings(AUTH_OVERRIDE=True) # For permissions + def setUp(self) -> None: + r = self.client.post(reverse("project-list"), data=json.dumps(VALID_PROJECT_1), content_type="application/json") + self.project = r.json() + + r = self.client.post(reverse("dataset-list"), data=json.dumps(valid_dataset_1(self.project["identifier"])), + content_type="application/json") + self.dataset = r.json() + + @override_settings(AUTH_OVERRIDE=True) # For permissions + def test_ingest(self): + # No ingestion body + r = self.client.post(reverse("ingest"), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + + # Invalid ingestion request + r = self.client.post(reverse("ingest"), data=json.dumps({}), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + + # Non-existent dataset ID + r = self.client.post(reverse("ingest"), data=json.dumps(generate_ingest(str(uuid4()))), + content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + + # Non-existent workflow ID + bad_wf = generate_ingest(self.dataset["identifier"]) + bad_wf["workflow_id"] += "_invalid" + r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + + # json_document not in output + bad_wf = generate_ingest(self.dataset["identifier"]) + bad_wf["workflow_outputs"] = {} + r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json") + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + + # TODO: More diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py index 5e028e410..974049b1e 100644 --- a/chord_metadata_service/chord/tests/test_ingest.py +++ b/chord_metadata_service/chord/tests/test_ingest.py @@ -1,89 +1,60 @@ -import json +from django.test import TestCase +from dateutil.parser import isoparse -from django.test import override_settings -from django.urls import reverse -from rest_framework import status -from rest_framework.test import APITestCase -from uuid import uuid4 +from chord_metadata_service.chord.models import Project, Dataset +from chord_metadata_service.chord.views_ingest import create_phenotypic_feature, ingest_phenopacket +from chord_metadata_service.phenopackets.models import PhenotypicFeature, Phenopacket -from .constants import * -from ..views_ingest import METADATA_WORKFLOWS +from .constants import VALID_DATA_USE_1 +from .example_ingest import EXAMPLE_INGEST -def generate_ingest(table_id): - return { - "table_id": table_id, - "workflow_id": "phenopackets_json", - "workflow_metadata": METADATA_WORKFLOWS["ingestion"]["phenopackets_json"], - "workflow_outputs": { - "json_document": "" # TODO - }, - "workflow_params": { - "json_document": "" # TODO - } - } - - -class WorkflowTest(APITestCase): - def test_workflows(self): - r = self.client.get(reverse("workflows"), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_200_OK) - self.assertDictEqual(r.json(), METADATA_WORKFLOWS) - - # Non-existent workflow - r = self.client.get(reverse("workflow-detail", args=("invalid_workflow",)), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) - - # Valid workflow - r = self.client.get(reverse("workflow-detail", args=("phenopackets_json",)), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_200_OK) - self.assertDictEqual(r.json(), METADATA_WORKFLOWS["ingestion"]["phenopackets_json"]) - - # Non-existent workflow file - r = self.client.get(reverse("workflow-file", args=("invalid_workflow",)), content_type="text/plain") - self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) - - # Valid workflow file - r = self.client.get(reverse("workflow-file", args=("phenopackets_json",)), content_type="text/plain") - self.assertEqual(r.status_code, status.HTTP_200_OK) - # TODO: Check file contents - - -class IngestTest(APITestCase): - @override_settings(AUTH_OVERRIDE=True) # For permissions +class IngestTest(TestCase): def setUp(self) -> None: - r = self.client.post(reverse("project-list"), data=json.dumps(VALID_PROJECT_1), content_type="application/json") - self.project = r.json() - - r = self.client.post(reverse("dataset-list"), data=json.dumps(valid_dataset_1(self.project["identifier"])), - content_type="application/json") - self.dataset = r.json() - - @override_settings(AUTH_OVERRIDE=True) # For permissions - def test_ingest(self): - # No ingestion body - r = self.client.post(reverse("ingest"), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) - - # Invalid ingestion request - r = self.client.post(reverse("ingest"), data=json.dumps({}), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) - - # Non-existent dataset ID - r = self.client.post(reverse("ingest"), data=json.dumps(generate_ingest(str(uuid4()))), - content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) - - # Non-existent workflow ID - bad_wf = generate_ingest(self.dataset["identifier"]) - bad_wf["workflow_id"] += "_invalid" - r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + p = Project.objects.create(title="Project 1", description="") + self.d = Dataset.objects.create(title="Dataset 1", description="Some dataset", data_use=VALID_DATA_USE_1, + project=p) + + def test_create_pf(self): + p1 = create_phenotypic_feature({ + "description": "test", + "type": { + "id": "HP:0000790", + "label": "Hematuria" + }, + "negated": False, + "modifier": [], + "evidence": [] + }) + + p2 = PhenotypicFeature.objects.get(description="test") + + self.assertEqual(p1.pk, p2.pk) + + def test_ingesting_json(self): + p = ingest_phenopacket(EXAMPLE_INGEST, self.d.identifier) + self.assertEqual(p.id, Phenopacket.objects.get(id=p.id).id) + + self.assertEqual(p.subject.id, EXAMPLE_INGEST["subject"]["id"]) + self.assertEqual(p.subject.date_of_birth, isoparse(EXAMPLE_INGEST["subject"]["date_of_birth"])) + self.assertEqual(p.subject.sex, EXAMPLE_INGEST["subject"]["sex"]) + self.assertEqual(p.subject.karyotypic_sex, EXAMPLE_INGEST["subject"]["karyotypic_sex"]) + + pfs = list(p.phenotypic_features.all().order_by("pftype__id")) + + self.assertEqual(len(pfs), 2) + self.assertEqual(pfs[0].description, EXAMPLE_INGEST["phenotypic_features"][0]["description"]) + self.assertEqual(pfs[0].pftype["id"], EXAMPLE_INGEST["phenotypic_features"][0]["type"]["id"]) + self.assertEqual(pfs[0].pftype["label"], EXAMPLE_INGEST["phenotypic_features"][0]["type"]["label"]) + self.assertEqual(pfs[0].negated, EXAMPLE_INGEST["phenotypic_features"][0]["negated"]) + # TODO: Test more properties + + diseases = list(p.diseases.all().order_by("term__id")) + self.assertEqual(len(diseases), 1) + # TODO: More - # json_document not in output - bad_wf = generate_ingest(self.dataset["identifier"]) - bad_wf["workflow_outputs"] = {} - r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json") - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + # TODO: Test Metadata + biosamples = list(p.biosamples.all().order_by("id")) + self.assertEqual(len(biosamples), 5) # TODO: More From d7efc9b7024f0400ce015fbabb11c41d7fad638e Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 14:32:21 -0500 Subject: [PATCH 09/15] Reformat chord.tests.constants --- chord_metadata_service/chord/tests/constants.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/chord_metadata_service/chord/tests/constants.py b/chord_metadata_service/chord/tests/constants.py index 429b6af74..957e2af8c 100644 --- a/chord_metadata_service/chord/tests/constants.py +++ b/chord_metadata_service/chord/tests/constants.py @@ -92,7 +92,11 @@ def dats_dataset(project_id, creators): } ], "title": "1000 Genomes Project", - "description": "The 1000 Genomes Project provides a comprehensive description of common human variation by applying a combination of whole-genome sequencing, deep exome sequencing and dense microarray genotyping to a diverse set of 2504 individuals from 26 populations. Over 88 million variants are characterised, including >99% of SNP variants with a frequency of >1% for a variety of ancestries.", + "description": "The 1000 Genomes Project provides a comprehensive description of common human variation by " + "applying a combination of whole-genome sequencing, deep exome sequencing and dense microarray " + "genotyping to a diverse set of 2504 individuals from 26 populations. Over 88 million variants " + "are characterised, including >99% of SNP variants with a frequency of >1% for a variety of " + "ancestries.", "storedIn": { "name": "European Bioinformatics Institute" }, @@ -140,13 +144,14 @@ def dats_dataset(project_id, creators): "category": "contact", "values": [ { - "value": "Jennifer Tremblay-Mercier, Research Co-ordinator, jennifer.tremblay-mercier@douglas.mcgill.ca, 514-761-6131 #3329" + "value": "Jennifer Tremblay-Mercier, Research Co-ordinator, " + "jennifer.tremblay-mercier@douglas.mcgill.ca, 514-761-6131 #3329" } ] } ], "data_use": VALID_DATA_USE_1 -} + } TEST_SEARCH_QUERY_1 = ["#eq", ["#resolve", "subject", "sex"], "FEMALE"] From 7a2a392af68860d6e12aaaf8fc7b7e977d338702 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 14:32:35 -0500 Subject: [PATCH 10/15] Fix issue with ingesting phenopacket phenotypic features --- chord_metadata_service/chord/views_ingest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py index df4d60d3a..1f46b9359 100644 --- a/chord_metadata_service/chord/views_ingest.py +++ b/chord_metadata_service/chord/views_ingest.py @@ -162,7 +162,7 @@ def ingest_phenopacket(phenopacket_data, table_id): new_phenopacket_id = str(uuid.uuid4()) # TODO: Is this provided? subject = phenopacket_data.get("subject", None) - phenotypic_features = phenopacket_data.get("phenotypicFeatures", []) + phenotypic_features = phenopacket_data.get("phenotypic_features", []) biosamples = phenopacket_data.get("biosamples", []) genes = phenopacket_data.get("genes", []) diseases = phenopacket_data.get("diseases", []) @@ -254,10 +254,12 @@ def ingest_phenopacket(phenopacket_data, table_id): meta_data=meta_data_obj, dataset=Dataset.objects.get(identifier=table_id) ) - new_phenopacket.save() new_phenopacket.phenotypic_features.set(phenotypic_features_db) new_phenopacket.biosamples.set(biosamples_db) new_phenopacket.genes.set(genes_db) new_phenopacket.diseases.set(diseases_db) + + new_phenopacket.save() + return new_phenopacket From c84870dce85fe11ed484c270dba5654b0608ed32 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 14:36:00 -0500 Subject: [PATCH 11/15] Test double-ingest --- chord_metadata_service/chord/tests/test_ingest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py index 974049b1e..9924cd56b 100644 --- a/chord_metadata_service/chord/tests/test_ingest.py +++ b/chord_metadata_service/chord/tests/test_ingest.py @@ -58,3 +58,8 @@ def test_ingesting_json(self): biosamples = list(p.biosamples.all().order_by("id")) self.assertEqual(len(biosamples), 5) # TODO: More + + # Test ingesting again + p2 = ingest_phenopacket(EXAMPLE_INGEST, self.d.identifier) + self.assertNotEqual(p.id, p2.id) + # TODO: More From 95d9bc6a33ccc83a49e1f7bbcf47572caa7cc156 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 25 Feb 2020 15:24:50 -0500 Subject: [PATCH 12/15] Fix save order for phenopackets --- chord_metadata_service/chord/views_ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py index 1f46b9359..041741309 100644 --- a/chord_metadata_service/chord/views_ingest.py +++ b/chord_metadata_service/chord/views_ingest.py @@ -255,11 +255,11 @@ def ingest_phenopacket(phenopacket_data, table_id): dataset=Dataset.objects.get(identifier=table_id) ) + new_phenopacket.save() + new_phenopacket.phenotypic_features.set(phenotypic_features_db) new_phenopacket.biosamples.set(biosamples_db) new_phenopacket.genes.set(genes_db) new_phenopacket.diseases.set(diseases_db) - new_phenopacket.save() - return new_phenopacket From 423670f6d0cc357302257107d5bc2f9813402dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Ch=C3=A9nard?= Date: Tue, 25 Feb 2020 16:53:53 -0500 Subject: [PATCH 13/15] fixed a few typos, some wording / stylistic changes --- chord_metadata_service/phenopackets/models.py | 2 +- docs/conf.py | 6 +++--- docs/modules/installation.rst | 6 +++--- docs/modules/introduction.rst | 16 ++++++++-------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/chord_metadata_service/phenopackets/models.py b/chord_metadata_service/phenopackets/models.py index 7767dcdcf..938f2668d 100644 --- a/chord_metadata_service/phenopackets/models.py +++ b/chord_metadata_service/phenopackets/models.py @@ -112,7 +112,7 @@ def __str__(self): class Procedure(models.Model): """ Class to represent a clinical procedure performed on an individual - (subject) in oder to extract a biosample + (subject) in order to extract a biosample FHIR: Procedure """ diff --git a/docs/conf.py b/docs/conf.py index de505a516..6ef8f6b1c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,8 +21,8 @@ # -- Project information ----------------------------------------------------- project = 'Metadata service' -copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chenard' -author = 'Ksenia Zaytseva, David Lougheed, Simon Chenard' +copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chénard' +author = 'Ksenia Zaytseva, David Lougheed, Simon Chénard' # The full version, including alpha/beta/rc tags release = '0.5.0' @@ -53,4 +53,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/docs/modules/installation.rst b/docs/modules/installation.rst index 561e8a04e..75d592b21 100644 --- a/docs/modules/installation.rst +++ b/docs/modules/installation.rst @@ -14,9 +14,9 @@ Installation git submodule update --init -3. Create and activate virtual environment +3. Create and activate a virtual environment -4. Cd to the main directory and install required packages: +4. Move to the main directory and install required packages: .. code-block:: @@ -49,4 +49,4 @@ e.g. settings if running database on localhost, default port for PostgreSQL is 5 python manage.py migrate python manage.py runserver -8. Development server runs at :code:`localhost:8000` \ No newline at end of file +8. Development server runs at :code:`localhost:8000` diff --git a/docs/modules/introduction.rst b/docs/modules/introduction.rst index d90e2990a..1431b3ecf 100644 --- a/docs/modules/introduction.rst +++ b/docs/modules/introduction.rst @@ -19,8 +19,8 @@ Besides PostgreSQL the data can be indexed and queried in Elasticsearch. Architecture ------------ -Metadata Service contains several services that share one API. -Services depend on each other and separated based on their scope. +The Metadata Service contains several services that share one API. +Services depend on each other and are separated based on their scope. **1. Patients service** handles anonymized individual’s data (e.g. individual id, sex, age or date of birth) @@ -68,16 +68,16 @@ REST API highlights - Other available renderers: - - Currently the following classes can be retirved in FHIR format by appending :code:`?format=fhir`: Phenopackets, Individual, Biosample, PhenotypicFeature, HtsFile, Gene, Variants, Disease, Procedure. + - Currently the following classes can be retrieved in FHIR format by appending :code:`?format=fhir`: Phenopacket, Individual, Biosample, PhenotypicFeature, HtsFile, Gene, Variant, Disease, Procedure. - - JSON-LD context to schema.org provided for Dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint. + - JSON-LD context to schema.org provided for dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint. - - Dataset description can also be retrived in RDF format: append :code:`?format=rdf` when querying dataset endpoint. + - Dataset description can also be retrieved in RDF format: append :code:`?format=rdf` when querying dataset endpoint. **Data ingest** Currently only the data that follow Phenopackets schema can be ingested. -Ingest endpoint is :code:`/private/ingest` . +Ingest endpoint is :code:`/private/ingest`. Example of POST request body: .. code-block:: @@ -118,7 +118,7 @@ Elasticsearch index (optional) ------------------------------ Data in FHIR format can be indexed in Elasticsearch - this is optional. -If an Elasticsearch instance is running on the server (so on :code:`localhost:9000`) these models will automatically be indexed on creation/update. +If an Elasticsearch instance is running on the server (so on :code:`localhost:9000`) these models will be automatically indexed on creation/update. There are also two scripts provided to update these indexes all at once: .. code-block:: @@ -126,7 +126,7 @@ There are also two scripts provided to update these indexes all at once: python manage.py patients_build_index python manage.py phenopackets_build_index -To query this information, here is an example request : +To query this information, here is an example request: .. code-block:: From c7febd83d984d085f3397c3b3c8866a45b91ecb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Ch=C3=A9nard?= Date: Tue, 25 Feb 2020 17:18:37 -0500 Subject: [PATCH 14/15] few more corrections as per David's suggestions --- docs/modules/introduction.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/modules/introduction.rst b/docs/modules/introduction.rst index 1431b3ecf..68b18250c 100644 --- a/docs/modules/introduction.rst +++ b/docs/modules/introduction.rst @@ -70,9 +70,9 @@ REST API highlights - Currently the following classes can be retrieved in FHIR format by appending :code:`?format=fhir`: Phenopacket, Individual, Biosample, PhenotypicFeature, HtsFile, Gene, Variant, Disease, Procedure. - - JSON-LD context to schema.org provided for dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint. + - JSON-LD context to schema.org provided for the Dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint. - - Dataset description can also be retrieved in RDF format: append :code:`?format=rdf` when querying dataset endpoint. + - Dataset description can also be retrieved in RDF format: append :code:`?format=rdf` when querying the dataset endpoint. **Data ingest** @@ -126,7 +126,7 @@ There are also two scripts provided to update these indexes all at once: python manage.py patients_build_index python manage.py phenopackets_build_index -To query this information, here is an example request: +Here is an example request for querying this information: .. code-block:: From e7e5363d18a0d8f933160aa3c07390fa14a08f08 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 26 Feb 2020 11:46:07 -0500 Subject: [PATCH 15/15] Bump version to 0.5.1 --- docs/conf.py | 8 ++++---- requirements.txt | 2 +- setup.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index de505a516..b009b01e2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,11 +21,11 @@ # -- Project information ----------------------------------------------------- project = 'Metadata service' -copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chenard' -author = 'Ksenia Zaytseva, David Lougheed, Simon Chenard' +copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chénard' +author = 'Ksenia Zaytseva, David Lougheed, Simon Chénard' # The full version, including alpha/beta/rc tags -release = '0.5.0' +release = '0.5.1' # -- General configuration --------------------------------------------------- @@ -53,4 +53,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/requirements.txt b/requirements.txt index 5bdf14221..4a80cafaf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ attrs==19.3.0 certifi==2019.11.28 chardet==3.0.4 chord-lib==0.5.0 -codecov==2.0.15 +codecov==2.0.16 coreapi==2.3.3 coreschema==0.0.4 coverage==5.0.3 diff --git a/setup.py b/setup.py index 91ad47d8f..4463e5687 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setuptools.setup( name="chord_metadata_service", - version="0.5.0", + version="0.5.1", python_requires=">=3.6", install_requires=[