Skip to content

Commit

Permalink
Merge pull request #67 from c3g/develop
Browse files Browse the repository at this point in the history
Release v0.3.0
  • Loading branch information
zxenia authored Feb 19, 2020
2 parents 6fcb2ce + 724f1f9 commit e63e202
Show file tree
Hide file tree
Showing 43 changed files with 798 additions and 78 deletions.
2 changes: 1 addition & 1 deletion chord_metadata_service/chord/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class DatasetViewSet(CHORDPublicModelViewSet):
renderer_classes = tuple(CHORDModelViewSet.renderer_classes) + (JSONLDDatasetRenderer, RDFDatasetRenderer,)


class TableOwnershipViewSet(CHORDModelViewSet): # TODO: Public?
class TableOwnershipViewSet(CHORDPublicModelViewSet):
"""
get:
Return a list of table-(dataset|dataset,biosample) relationships
Expand Down
18 changes: 18 additions & 0 deletions chord_metadata_service/chord/migrations/0008_dataset_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.9 on 2020-02-17 21:48

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('chord', '0007_auto_20200129_1537'),
]

operations = [
migrations.AddField(
model_name='dataset',
name='version',
field=models.CharField(blank=True, default='version_2020-02-17 16:47:59.425036', help_text='A release point for the dataset when applicable.', max_length=200),
),
]
19 changes: 19 additions & 0 deletions chord_metadata_service/chord/migrations/0009_auto_20200218_1615.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 2.2.9 on 2020-02-18 21:15

import chord_metadata_service.chord.models
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('chord', '0008_dataset_version'),
]

operations = [
migrations.AlterField(
model_name='dataset',
name='version',
field=models.CharField(blank=True, default=chord_metadata_service.chord.models.version_default, help_text='A release point for the dataset when applicable.', max_length=200),
),
]
6 changes: 6 additions & 0 deletions chord_metadata_service/chord/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

from django.contrib.postgres.fields import JSONField, ArrayField
from django.db import models
from django.utils import timezone


__all__ = ["Project", "Dataset", "TableOwnership"]


def version_default():
return f"version_{timezone.now()}"

#############################################################
# #
# Project Management #
Expand Down Expand Up @@ -116,6 +120,8 @@ def n_of_tables(self):
help_text="The grant(s) which funded and supported the work reported by the dataset.")
keywords = ArrayField(JSONField(null=True, blank=True), blank=True, null=True,
help_text="Tags associated with the dataset, which will help in its discovery.")
version = models.CharField(max_length=200, blank=True, default=version_default,
help_text="A release point for the dataset when applicable.")
extra_properties = JSONField(blank=True, null=True,
help_text="Extra properties that do not fit in the previous specified attributes.")

Expand Down
2 changes: 2 additions & 0 deletions chord_metadata_service/chord/tests/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"dats_dataset",
"TEST_SEARCH_QUERY_1",
"TEST_SEARCH_QUERY_2",
"TEST_FHIR_SEARCH_QUERY",
]


Expand Down Expand Up @@ -150,3 +151,4 @@ def dats_dataset(project_id, creators):

TEST_SEARCH_QUERY_1 = ["#eq", ["#resolve", "subject", "sex"], "FEMALE"]
TEST_SEARCH_QUERY_2 = ["#eq", ["#resolve", "subject", "sex"], "MALE"]
TEST_FHIR_SEARCH_QUERY = {"query": {"match": {"gender": "FEMALE"}}}
56 changes: 56 additions & 0 deletions chord_metadata_service/chord/tests/es_mocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
SEARCH_SUCCESS = {
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "Individual|patient:1",
"_index": "fhir_metadata",
"_score": 6.334576,
"_source": {
"birthDate": "1994-09-11",
"extension": [
{
"url": "http://ga4gh.org/fhir/phenopackets/StructureDefinition/individual-karyotypic-sex",
"valueCodeableConcept": {
"coding": [
{
"code": "XX",
"display": "XX",
"system": "http://ga4gh.org/fhir/phenopackets/CodeSystem/karyotypic-sex"
}
]
}
},
{
"url": "http://ga4gh.org/fhir/phenopackets/StructureDefinition/individual-taxonomy",
"valueCodeableConcept": {
"coding": [
{
"code": "FAKE_CODE",
"display": "Homo sapiens"
}
]
}
}
],
"gender": "FEMALE",
"id": "patient:1",
"resourceType": "Patient"
},
"_type": "_doc"
}
],
"max_score": 6.334576,
"total": {
"relation": "eq",
"value": 1
}
},
"timed_out": False,
"took": 5
}
38 changes: 37 additions & 1 deletion chord_metadata_service/chord/tests/test_api_search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from unittest.mock import Mock, patch

from django.test import override_settings
from django.urls import reverse
Expand All @@ -8,6 +9,7 @@
from chord_metadata_service.phenopackets.tests.constants import *
from chord_metadata_service.phenopackets.models import *

from chord_metadata_service.chord.tests.es_mocks import SEARCH_SUCCESS
from .constants import *
from ..models import *
from ..views_search import PHENOPACKET_DATA_TYPE_ID, PHENOPACKET_SCHEMA, PHENOPACKET_METADATA_SCHEMA
Expand Down Expand Up @@ -90,7 +92,7 @@ def setUp(self) -> None:
# Set up a dummy phenopacket

self.individual, _ = Individual.objects.get_or_create(
id='patient:1', sex='FEMALE', age='P25Y3M2D')
id='patient:1', sex='FEMALE', age={"age": "P25Y3M2D"})

self.procedure = Procedure.objects.create(**VALID_PROCEDURE_1)

Expand Down Expand Up @@ -205,3 +207,37 @@ def test_private_table_search_4(self):
c = r.json()
self.assertEqual(len(c["results"]), 1)
self.assertEqual(self.phenopacket.id, c["results"][0]["id"])

@patch('chord_metadata_service.chord.views_search.es')
def test_fhir_search(self, mocked_es):
mocked_es.search.return_value = SEARCH_SUCCESS
# Valid search with result
r = self.client.post(reverse("fhir-search"), data=json.dumps({
"data_type": PHENOPACKET_DATA_TYPE_ID,
"query": TEST_FHIR_SEARCH_QUERY
}), content_type="application/json")

self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()

self.assertEqual(len(c["results"]), 1)
self.assertDictEqual(c["results"][0], {
"id": str(self.dataset.identifier),
"data_type": PHENOPACKET_DATA_TYPE_ID
})

@patch('chord_metadata_service.chord.views_search.es')
def test_private_fhir_search(self, mocked_es):
mocked_es.search.return_value = SEARCH_SUCCESS
# Valid search with result
r = self.client.post(reverse("fhir-private-search"), data=json.dumps({
"data_type": PHENOPACKET_DATA_TYPE_ID,
"query": TEST_FHIR_SEARCH_QUERY
}), content_type="application/json")

self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()

self.assertIn(str(self.dataset.identifier), c["results"])
self.assertEqual(c["results"][str(self.dataset.identifier)]["data_type"], PHENOPACKET_DATA_TYPE_ID)
self.assertEqual(self.phenopacket.id, c["results"][str(self.dataset.identifier)]["matches"][0]["id"])
13 changes: 9 additions & 4 deletions chord_metadata_service/chord/views_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def workflow_file(_request, workflow_id):
def create_phenotypic_feature(pf):
pf_obj = PhenotypicFeature(
description=pf.get("description", ""),
pftype=pf["type"]["id"],
pftype=pf["type"],
negated=pf.get("negated", False),
severity=pf.get("severity", None),
modifier=pf.get("modifier", []), # TODO: Validate ontology term in schema...
Expand Down Expand Up @@ -206,7 +206,7 @@ def ingest_phenopacket(phenopacket_data, table_id):

g_obj, _ = Gene.objects.get_or_create(
id=g["id"],
alternate_id=g.get("alternate_ids", []),
alternate_ids=g.get("alternate_ids", []),
symbol=g["symbol"]
)

Expand All @@ -216,8 +216,13 @@ def ingest_phenopacket(phenopacket_data, table_id):
for d in diseases:
# TODO: Primary key, should this be a model?

d_obj = Disease(term=d["term"], onset=d.get("onset", None), disease_stage=d.get("disease_stage", []))
d_obj.save()
d_obj, _ = Disease.objects.get_or_create(
term=d["term"],
onset=d.get("onset", None),
disease_stage=d.get("disease_stage", []),
tnm_finding=d.get("tnm_finding", [])
)
diseases_db.append(d_obj.id)

resources_db = []
for rs in meta_data.get("resources", []):
Expand Down
122 changes: 115 additions & 7 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
import itertools

from chord_lib.responses.errors import *
from chord_lib.search import build_search_response, postgres
from datetime import datetime
import json

from django.db import connection
from django.conf import settings
from psycopg2 import sql
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import AllowAny
from rest_framework.response import Response

from .models import Dataset
from .permissions import OverrideOrSuperUserOnly
from chord_lib.responses.errors import *
from chord_lib.search import build_search_response, postgres
from chord_metadata_service.metadata.settings import DEBUG
from chord_metadata_service.phenopackets.api_views import PHENOPACKET_PREFETCH
from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA
from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer
from chord_metadata_service.metadata.elastic import es
from .models import Dataset
from .permissions import OverrideOrSuperUserOnly

PHENOPACKET_DATA_TYPE_ID = "phenopacket"

Expand Down Expand Up @@ -105,8 +108,19 @@ def phenopacket_results(query, params, key="id"):

def phenopacket_query_results(query, params):
# TODO: possibly a quite inefficient way of doing things...
return Phenopacket.objects.filter(id__in=phenopacket_results(query, params, "id")).prefetch_related(
*PHENOPACKET_PREFETCH)
# To expand further on this query : the select_related call
# will join on these tables we'd call anyway, thus 2 less request
# to the DB. prefetch_related works on M2M relationships and makes
# sure that, for instance, when querying diseases, we won't make multiple call
# for the same set of data
return Phenopacket.objects.filter(
id__in=phenopacket_results(query, params, "id")
).select_related(
'subject',
'meta_data'
).prefetch_related(
*PHENOPACKET_PREFETCH
)


def search(request, internal_data=False):
Expand Down Expand Up @@ -160,6 +174,100 @@ def chord_private_search(request):
return search(request, internal_data=True)


def phenopacket_filter_results(subject_ids, htsfile_ids, disease_ids, biosample_ids,
phenotypicfeature_ids, phenopacket_ids, prefetch=False):

query = Phenopacket.objects.get_queryset()

if subject_ids:
query = query.filter(subject__id__in=subject_ids)

if htsfile_ids:
query = query.filter(htsfiles__id__in=htsfile_ids)

if disease_ids:
query = query.filter(diseases__id__in=disease_ids)

if biosample_ids:
query = query.filter(biosamples__id__in=biosample_ids)

if phenotypicfeature_ids:
query = query.filter(phenotypic_features__id__in=phenotypicfeature_ids)

if phenopacket_ids:
query = query.filter(id__in=phenopacket_ids)

res = query.prefetch_related(*PHENOPACKET_PREFETCH)

return res


# TODO: unsure why we chose POST for this endpoint? Should be GET me thinks
def fhir_search(request, internal_data=False):
# TODO: not all that sure about the query format we'll want
# keep it simple for now
if "query" not in request.data:
return Response(bad_request_error("Missing query in request body"), status=400)

query = request.data["query"]
start = datetime.now()

if not es:
return Response(build_search_response([], start))

res = es.search(index=settings.FHIR_INDEX_NAME, body=query)

subject_ids = [hit['_id'].split('|')[1] for hit in res['hits']['hits'] if hit['_source']['resourceType'] == 'Patient']
htsfile_ids = [hit['_id'].split('|')[1] for hit in res['hits']['hits'] if hit['_source']['resourceType'] == 'DocumentReference']
disease_ids = [hit['_id'].split('|')[1] for hit in res['hits']['hits'] if hit['_source']['resourceType'] == 'Condition']
biosample_ids = [hit['_id'].split('|')[1] for hit in res['hits']['hits'] if hit['_source']['resourceType'] == 'Specimen']
phenotypicfeature_ids = [hit['_id'].split('|')[1] for hit in res['hits']['hits'] if hit['_source']['resourceType'] == 'Observation']
phenopacket_ids = [hit['_id'].split('|')[1] for hit in res['hits']['hits'] if hit['_source']['resourceType'] == 'Composition']

if (not subject_ids and not htsfile_ids and not disease_ids
and not biosample_ids and not phenotypicfeature_ids and not phenopacket_ids):
return Response(build_search_response([], start))
else:
phenopackets = phenopacket_filter_results(
subject_ids,
htsfile_ids,
disease_ids,
biosample_ids,
phenotypicfeature_ids,
phenopacket_ids
)

if not internal_data:
datasets = Dataset.objects.filter(
identifier__in = [
p.dataset_id for p in phenopackets
]
) # TODO: Maybe can avoid hitting DB here
return Response(build_search_response([{"id": d.identifier, "data_type": PHENOPACKET_DATA_TYPE_ID}
for d in datasets], start))
return Response(build_search_response({
dataset_id: {
"data_type": PHENOPACKET_DATA_TYPE_ID,
"matches": list(PhenopacketSerializer(p).data for p in dataset_phenopackets)
} for dataset_id, dataset_phenopackets in itertools.groupby(
phenopackets,
key=lambda p: str(p.dataset_id)
)
}, start))


@api_view(["POST"])
@permission_classes([AllowAny])
def fhir_public_search(request):
return fhir_search(request)


@api_view(["POST"])
@permission_classes([AllowAny])
def fhir_private_search(request):
return fhir_search(request, internal_data=True)


@api_view(["POST"])
def chord_private_table_search(request, table_id):
# Search phenopacket data types in specific tables
Expand Down
Loading

0 comments on commit e63e202

Please sign in to comment.