From de21d53d08f60a26357e78c7879b5ab1bf295eb3 Mon Sep 17 00:00:00 2001 From: zxenia Date: Wed, 6 Jan 2021 17:51:06 -0500 Subject: [PATCH 1/7] add overview endpoint (based on tables summary solution) --- .../chord/views_overview.py | 84 +++++++++++++++++++ chord_metadata_service/restapi/urls.py | 6 +- 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 chord_metadata_service/chord/views_overview.py diff --git a/chord_metadata_service/chord/views_overview.py b/chord_metadata_service/chord/views_overview.py new file mode 100644 index 000000000..d5cb1d3aa --- /dev/null +++ b/chord_metadata_service/chord/views_overview.py @@ -0,0 +1,84 @@ +from collections import Counter +from rest_framework.decorators import api_view, permission_classes +from rest_framework.response import Response + +from chord_metadata_service.patients.models import Individual +from chord_metadata_service.phenopackets.models import Phenopacket +from .permissions import OverrideOrSuperUserOnly + + +@api_view(["GET"]) +@permission_classes([OverrideOrSuperUserOnly]) +def phenopackets_overview(_request): + phenopackets = Phenopacket.objects.all() + + diseases_counter = Counter() + phenotypic_features_counter = Counter() + + biosamples_set = set() + individuals_set = set() + + biosamples_taxonomy = Counter() + biosamples_sampled_tissue = Counter() + + individuals_sex = Counter() + individuals_k_sex = Counter() + individuals_taxonomy = Counter() + + def count_individual(ind): + individuals_set.add(ind.id) + individuals_sex.update((ind.sex,)) + individuals_k_sex.update((ind.karyotypic_sex,)) + if ind.taxonomy is not None: + individuals_taxonomy.update((ind.taxonomy["label"],)) + + for p in phenopackets.prefetch_related("biosamples"): + for b in p.biosamples.all(): + biosamples_set.add(b.id) + biosamples_sampled_tissue.update((b.sampled_tissue["label"],)) + + if b.taxonomy is not None: + biosamples_taxonomy.update((b.taxonomy["label"],)) + + if b.individual is not None: + count_individual(b.individual) + + for pf in b.phenotypic_features.all(): + phenotypic_features_counter.update((pf.pftype["label"],)) + + for d in p.diseases.all(): + diseases_counter.update((d.term["label"],)) + + for pf in p.phenotypic_features.all(): + phenotypic_features_counter.update((pf.pftype["label"],)) + + # Currently, phenopacket subject is required so we can assume it's not None + count_individual(p.subject) + + return Response({ + "count": phenopackets.count(), + "data_type_specific": { + "biosamples": { + "count": len(biosamples_set), + "taxonomy": dict(biosamples_taxonomy), + "sampled_tissue": dict(biosamples_sampled_tissue), + }, + "diseases": { + # count is a number of unique disease terms (not all diseases in the database) + "count": len(diseases_counter.keys()), + "term": dict(diseases_counter) + }, + "individuals": { + "count": len(individuals_set), + "sex": {k: individuals_sex[k] for k in (s[0] for s in Individual.SEX)}, + "karyotypic_sex": {k: individuals_k_sex[k] for k in (s[0] for s in Individual.KARYOTYPIC_SEX)}, + "taxonomy": dict(individuals_taxonomy), + # TODO: how to count age: it can be represented by three different schemas + }, + "phenotypic_features": { + # count is a number of unique phenotypic feature types (not all pfs in the database) + "count": len(phenotypic_features_counter.keys()), + "type": dict(phenotypic_features_counter) + }, + } + }) diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index 20e8aaf61..ecb73b7ee 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -2,16 +2,15 @@ from rest_framework import routers from chord_metadata_service.chord import api_views as chord_views +from chord_metadata_service.chord import views_overview from chord_metadata_service.experiments import api_views as experiment_views from chord_metadata_service.mcode import api_views as mcode_views from chord_metadata_service.patients import api_views as individual_views from chord_metadata_service.phenopackets import api_views as phenopacket_views from chord_metadata_service.resources import api_views as resources_views - __all__ = ["router", "urlpatterns"] - router = routers.DefaultRouter(trailing_slash=False) # CHORD app urls @@ -64,4 +63,7 @@ name="experiment-schema"), path('mcode_schema', mcode_views.get_mcode_schema, name="mcode-schema"), + # overview + path('overview', views_overview.phenopackets_overview, + name="overview"), ] From 454db79d200413edbfb582ad045cb045707a1ae9 Mon Sep 17 00:00:00 2001 From: zxenia Date: Thu, 7 Jan 2021 15:57:22 -0500 Subject: [PATCH 2/7] move overview view to phenopackets app --- .../chord/views_overview.py | 84 ------------------- .../phenopackets/api_views.py | 83 ++++++++++++++++++ chord_metadata_service/restapi/urls.py | 3 +- 3 files changed, 84 insertions(+), 86 deletions(-) delete mode 100644 chord_metadata_service/chord/views_overview.py diff --git a/chord_metadata_service/chord/views_overview.py b/chord_metadata_service/chord/views_overview.py deleted file mode 100644 index d5cb1d3aa..000000000 --- a/chord_metadata_service/chord/views_overview.py +++ /dev/null @@ -1,84 +0,0 @@ -from collections import Counter -from rest_framework.decorators import api_view, permission_classes -from rest_framework.response import Response - -from chord_metadata_service.patients.models import Individual -from chord_metadata_service.phenopackets.models import Phenopacket -from .permissions import OverrideOrSuperUserOnly - - -@api_view(["GET"]) -@permission_classes([OverrideOrSuperUserOnly]) -def phenopackets_overview(_request): - phenopackets = Phenopacket.objects.all() - - diseases_counter = Counter() - phenotypic_features_counter = Counter() - - biosamples_set = set() - individuals_set = set() - - biosamples_taxonomy = Counter() - biosamples_sampled_tissue = Counter() - - individuals_sex = Counter() - individuals_k_sex = Counter() - individuals_taxonomy = Counter() - - def count_individual(ind): - individuals_set.add(ind.id) - individuals_sex.update((ind.sex,)) - individuals_k_sex.update((ind.karyotypic_sex,)) - if ind.taxonomy is not None: - individuals_taxonomy.update((ind.taxonomy["label"],)) - - for p in phenopackets.prefetch_related("biosamples"): - for b in p.biosamples.all(): - biosamples_set.add(b.id) - biosamples_sampled_tissue.update((b.sampled_tissue["label"],)) - - if b.taxonomy is not None: - biosamples_taxonomy.update((b.taxonomy["label"],)) - - if b.individual is not None: - count_individual(b.individual) - - for pf in b.phenotypic_features.all(): - phenotypic_features_counter.update((pf.pftype["label"],)) - - for d in p.diseases.all(): - diseases_counter.update((d.term["label"],)) - - for pf in p.phenotypic_features.all(): - phenotypic_features_counter.update((pf.pftype["label"],)) - - # Currently, phenopacket subject is required so we can assume it's not None - count_individual(p.subject) - - return Response({ - "count": phenopackets.count(), - "data_type_specific": { - "biosamples": { - "count": len(biosamples_set), - "taxonomy": dict(biosamples_taxonomy), - "sampled_tissue": dict(biosamples_sampled_tissue), - }, - "diseases": { - # count is a number of unique disease terms (not all diseases in the database) - "count": len(diseases_counter.keys()), - "term": dict(diseases_counter) - }, - "individuals": { - "count": len(individuals_set), - "sex": {k: individuals_sex[k] for k in (s[0] for s in Individual.SEX)}, - "karyotypic_sex": {k: individuals_k_sex[k] for k in (s[0] for s in Individual.KARYOTYPIC_SEX)}, - "taxonomy": dict(individuals_taxonomy), - # TODO: how to count age: it can be represented by three different schemas - }, - "phenotypic_features": { - # count is a number of unique phenotypic feature types (not all pfs in the database) - "count": len(phenotypic_features_counter.keys()), - "type": dict(phenotypic_features_counter) - }, - } - }) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index c7044bd79..ea4c330df 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -1,3 +1,4 @@ +from collections import Counter from rest_framework import viewsets from rest_framework.settings import api_settings from rest_framework.decorators import api_view, permission_classes @@ -7,6 +8,7 @@ from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, FHIRRenderer from chord_metadata_service.restapi.pagination import LargeResultsSetPagination +from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA from . import models as m, serializers as s, filters as f @@ -232,3 +234,84 @@ def get_chord_phenopacket_schema(_request): Chord phenopacket schema that can be shared with data providers. """ return Response(PHENOPACKET_SCHEMA) + + +@api_view(["GET"]) +@permission_classes([OverrideOrSuperUserOnly]) +def phenopackets_overview(_request): + """ + get: + Overview of all Phenopackets in the database + """ + phenopackets = m.Phenopacket.objects.all() + + diseases_counter = Counter() + phenotypic_features_counter = Counter() + + biosamples_set = set() + individuals_set = set() + + biosamples_taxonomy = Counter() + biosamples_sampled_tissue = Counter() + + individuals_sex = Counter() + individuals_k_sex = Counter() + individuals_taxonomy = Counter() + + def count_individual(ind): + individuals_set.add(ind.id) + individuals_sex.update((ind.sex,)) + individuals_k_sex.update((ind.karyotypic_sex,)) + if ind.taxonomy is not None: + individuals_taxonomy.update((ind.taxonomy["label"],)) + + for p in phenopackets.prefetch_related("biosamples"): + for b in p.biosamples.all(): + biosamples_set.add(b.id) + biosamples_sampled_tissue.update((b.sampled_tissue["label"],)) + + if b.taxonomy is not None: + biosamples_taxonomy.update((b.taxonomy["label"],)) + + if b.individual is not None: + count_individual(b.individual) + + for pf in b.phenotypic_features.all(): + phenotypic_features_counter.update((pf.pftype["label"],)) + + for d in p.diseases.all(): + diseases_counter.update((d.term["label"],)) + + for pf in p.phenotypic_features.all(): + phenotypic_features_counter.update((pf.pftype["label"],)) + + # Currently, phenopacket subject is required so we can assume it's not None + count_individual(p.subject) + + return Response({ + "count": phenopackets.count(), + "data_type_specific": { + "biosamples": { + "count": len(biosamples_set), + "taxonomy": dict(biosamples_taxonomy), + "sampled_tissue": dict(biosamples_sampled_tissue), + }, + "diseases": { + # count is a number of unique disease terms (not all diseases in the database) + "count": len(diseases_counter.keys()), + "term": dict(diseases_counter) + }, + "individuals": { + "count": len(individuals_set), + "sex": {k: individuals_sex[k] for k in (s[0] for s in m.Individual.SEX)}, + "karyotypic_sex": {k: individuals_k_sex[k] for k in (s[0] for s in m.Individual.KARYOTYPIC_SEX)}, + "taxonomy": dict(individuals_taxonomy), + # TODO: how to count age: it can be represented by three different schemas + }, + "phenotypic_features": { + # count is a number of unique phenotypic feature types (not all pfs in the database) + "count": len(phenotypic_features_counter.keys()), + "type": dict(phenotypic_features_counter) + }, + } + }) diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index ecb73b7ee..bfc4f53ae 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -2,7 +2,6 @@ from rest_framework import routers from chord_metadata_service.chord import api_views as chord_views -from chord_metadata_service.chord import views_overview from chord_metadata_service.experiments import api_views as experiment_views from chord_metadata_service.mcode import api_views as mcode_views from chord_metadata_service.patients import api_views as individual_views @@ -64,6 +63,6 @@ path('mcode_schema', mcode_views.get_mcode_schema, name="mcode-schema"), # overview - path('overview', views_overview.phenopackets_overview, + path('overview', phenopacket_views.phenopackets_overview, name="overview"), ] From bef543c63aa3d7a065999421fd3fb048e1785d7b Mon Sep 17 00:00:00 2001 From: zxenia Date: Thu, 7 Jan 2021 18:00:07 -0500 Subject: [PATCH 3/7] add test for overview --- .../phenopackets/api_views.py | 14 ++++--- .../phenopackets/tests/test_api.py | 38 +++++++++++++++++++ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index ea4c330df..7c5241561 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -273,12 +273,14 @@ def count_individual(ind): if b.taxonomy is not None: biosamples_taxonomy.update((b.taxonomy["label"],)) - if b.individual is not None: - count_individual(b.individual) - - for pf in b.phenotypic_features.all(): - phenotypic_features_counter.update((pf.pftype["label"],)) - + # if b.individual is not None: + # count_individual(b.individual) + + # for pf in b.phenotypic_features.all(): + # phenotypic_features_counter.update((pf.pftype["label"],)) + # according to Phenopackets standard + # phenotypic features also can be linked to a Biosample + # but we count them here because all our use cases current have them linked to Phenopacket not biosample for d in p.diseases.all(): diseases_counter.update((d.term["label"],)) diff --git a/chord_metadata_service/phenopackets/tests/test_api.py b/chord_metadata_service/phenopackets/tests/test_api.py index 5c7b06cac..5f269a01d 100644 --- a/chord_metadata_service/phenopackets/tests/test_api.py +++ b/chord_metadata_service/phenopackets/tests/test_api.py @@ -282,3 +282,41 @@ def test_interpretation(self): response = get_response('interpretation-list', self.interpretation) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + +class OverviewTest(APITestCase): + + def setUp(self) -> None: + # create 2 phenopackets for 2 individuals; each individual has 1 biosample; + # one of phenopackets has 1 phenotypic feature and 1 disease + self.individual_1 = m.Individual.objects.create(**c.VALID_INDIVIDUAL_1) + self.individual_2 = m.Individual.objects.create(**c.VALID_INDIVIDUAL_2) + self.metadata_1 = m.MetaData.objects.create(**c.VALID_META_DATA_1) + self.metadata_2 = m.MetaData.objects.create(**c.VALID_META_DATA_2) + self.phenopacket_1 = m.Phenopacket.objects.create( + **c.valid_phenopacket(subject=self.individual_1, meta_data=self.metadata_1) + ) + self.phenopacket_2 = m.Phenopacket.objects.create( + id='phenopacket:2', subject=self.individual_2, meta_data=self.metadata_2 + ) + self.disease = m.Disease.objects.create(**c.VALID_DISEASE_1) + self.procedure = m.Procedure.objects.create(**c.VALID_PROCEDURE_1) + self.biosample_1 = m.Biosample.objects.create(**c.valid_biosample_1(self.individual_1, self.procedure)) + self.biosample_2 = m.Biosample.objects.create(**c.valid_biosample_2(self.individual_2, self.procedure)) + self.phenotypic_feature = m.PhenotypicFeature.objects.create( + **c.valid_phenotypic_feature(self.biosample_1, self.phenopacket_1) + ) + self.phenopacket_1.biosamples.set([self.biosample_1]) + self.phenopacket_2.biosamples.set([self.biosample_2]) + self.phenopacket_1.diseases.set([self.disease]) + + def test_overview(self): + response = self.client.get('/api/overview') + response_obj = response.json() + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertIsInstance(response_obj, dict) + self.assertEqual(response_obj['count'], 2) + self.assertEqual(response_obj['data_type_specific']['individuals']['count'], 2) + self.assertEqual(response_obj['data_type_specific']['biosamples']['count'], 2) + self.assertEqual(response_obj['data_type_specific']['phenotypic_features']['count'], 1) + self.assertEqual(response_obj['data_type_specific']['diseases']['count'], 1) From 9d6d186addd55fd273d084ffcbbc8624eba293cb Mon Sep 17 00:00:00 2001 From: zxenia Date: Mon, 1 Feb 2021 22:23:59 -0500 Subject: [PATCH 4/7] add parsed age to overview --- .../phenopackets/api_views.py | 11 +++++++--- chord_metadata_service/restapi/utils.py | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index 7c5241561..0092356be 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -8,6 +8,7 @@ from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, FHIRRenderer from chord_metadata_service.restapi.pagination import LargeResultsSetPagination +from chord_metadata_service.restapi.utils import parse_individual_age from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA from . import models as m, serializers as s, filters as f @@ -257,11 +258,14 @@ def phenopackets_overview(_request): individuals_sex = Counter() individuals_k_sex = Counter() individuals_taxonomy = Counter() + individuals_age = Counter() def count_individual(ind): individuals_set.add(ind.id) individuals_sex.update((ind.sex,)) individuals_k_sex.update((ind.karyotypic_sex,)) + if ind.age is not None: + individuals_age.update((parse_individual_age(ind.age),)) if ind.taxonomy is not None: individuals_taxonomy.update((ind.taxonomy["label"],)) @@ -273,11 +277,11 @@ def count_individual(ind): if b.taxonomy is not None: biosamples_taxonomy.update((b.taxonomy["label"],)) - # if b.individual is not None: - # count_individual(b.individual) - + # TODO decide what to do with nested Phenotypic features and Subject in Biosample + # This might serve future use cases that Biosample as a have main focus of study # for pf in b.phenotypic_features.all(): # phenotypic_features_counter.update((pf.pftype["label"],)) + # according to Phenopackets standard # phenotypic features also can be linked to a Biosample # but we count them here because all our use cases current have them linked to Phenopacket not biosample @@ -308,6 +312,7 @@ def count_individual(ind): "sex": {k: individuals_sex[k] for k in (s[0] for s in m.Individual.SEX)}, "karyotypic_sex": {k: individuals_k_sex[k] for k in (s[0] for s in m.Individual.KARYOTYPIC_SEX)}, "taxonomy": dict(individuals_taxonomy), + "age": dict(individuals_age), # TODO: how to count age: it can be represented by three different schemas }, "phenotypic_features": { diff --git a/chord_metadata_service/restapi/utils.py b/chord_metadata_service/restapi/utils.py index 0f5d9224a..b5b84d274 100644 --- a/chord_metadata_service/restapi/utils.py +++ b/chord_metadata_service/restapi/utils.py @@ -40,3 +40,23 @@ def parse_onset(onset): return f"{onset['start']['age']} - {onset['end']['age']}" else: return None + + +def parse_duration(string): + """ Returns years integer. """ + string = string.split('P')[-1] + return int(float(string.split('Y')[0])) + + +def parse_individual_age(age_obj): + """ Parses two possible age representations and returns average age or age as integer. """ + if 'start' in age_obj: + start_age = parse_duration(age_obj['start']['age']) + end_age = parse_duration(age_obj['end']['age']) + # for the duration calculate the average age + age = (start_age + end_age) // 2 + elif isinstance(age_obj, str): + age = parse_duration(age_obj) + else: + raise ValueError(f"Error: {age_obj} format not supported") + return age From 4d1c90c1dd487d7bf62b7f221fbeaac466b474fc Mon Sep 17 00:00:00 2001 From: zxenia Date: Mon, 1 Feb 2021 22:41:31 -0500 Subject: [PATCH 5/7] rename count to phenopackets --- chord_metadata_service/phenopackets/api_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index 0092356be..ea8757486 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -295,7 +295,7 @@ def count_individual(ind): count_individual(p.subject) return Response({ - "count": phenopackets.count(), + "phenopackets": phenopackets.count(), "data_type_specific": { "biosamples": { "count": len(biosamples_set), From c4c381e168194e040f21f3bfa4721c7ca33ddfb0 Mon Sep 17 00:00:00 2001 From: zxenia Date: Tue, 2 Feb 2021 17:20:07 -0500 Subject: [PATCH 6/7] update katsu version to 1.3.1 --- chord_metadata_service/package.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/package.cfg b/chord_metadata_service/package.cfg index 207a64f97..7c639c7a2 100644 --- a/chord_metadata_service/package.cfg +++ b/chord_metadata_service/package.cfg @@ -1,4 +1,4 @@ [package] name = katsu -version = 1.3.0 +version = 1.3.1 authors = Ksenia Zaytseva, David Lougheed, Simon Chénard, Romain Grégoire From b04a95f54e42115c0f8c77dbb072df9a9b79cbe0 Mon Sep 17 00:00:00 2001 From: zxenia Date: Tue, 2 Feb 2021 17:39:05 -0500 Subject: [PATCH 7/7] small fix in test --- chord_metadata_service/phenopackets/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/phenopackets/tests/test_api.py b/chord_metadata_service/phenopackets/tests/test_api.py index 5f269a01d..8551ce44b 100644 --- a/chord_metadata_service/phenopackets/tests/test_api.py +++ b/chord_metadata_service/phenopackets/tests/test_api.py @@ -315,7 +315,7 @@ def test_overview(self): response_obj = response.json() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIsInstance(response_obj, dict) - self.assertEqual(response_obj['count'], 2) + self.assertEqual(response_obj['phenopackets'], 2) self.assertEqual(response_obj['data_type_specific']['individuals']['count'], 2) self.assertEqual(response_obj['data_type_specific']['biosamples']['count'], 2) self.assertEqual(response_obj['data_type_specific']['phenotypic_features']['count'], 1)