From 289492bdba4010f93733a9c6ae58bf29f37fd7d5 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 10:55:42 -0500
Subject: [PATCH 01/15] Try to fix ingest null handling in get_or_create
 contexts

---
 chord_metadata_service/chord/views_ingest.py | 60 ++++++++++----------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py
index 13be51e15..98c5f8cf0 100644
--- a/chord_metadata_service/chord/views_ingest.py
+++ b/chord_metadata_service/chord/views_ingest.py
@@ -15,6 +15,8 @@
 from chord_lib.responses.errors import *
 from chord_lib.workflows import get_workflow, get_workflow_resource, workflow_exists
 
+from typing import Callable
+
 from chord_metadata_service.chord.models import *
 from chord_metadata_service.phenopackets.models import *
 
@@ -149,6 +151,11 @@ def ingest(request):
         return Response(status=204)
 
 
+def _query_and_check_nulls(obj: dict, key: str, transform: Callable = lambda x: x):
+    value = obj.get(key, None)
+    return {f"{key}__isnull": True} if value is None else {key: transform(value)}
+
+
 def ingest_phenopacket(phenopacket_data, table_id):
     """ Ingests one phenopacket. """
 
@@ -162,15 +169,10 @@ def ingest_phenopacket(phenopacket_data, table_id):
     meta_data = phenopacket_data["meta_data"]
 
     if subject:
-        subject, _ = Individual.objects.get_or_create(
-            id=subject["id"],
-            alternate_ids=subject.get("alternate_ids", None),
-            date_of_birth=isoparse(subject["date_of_birth"]) if "date_of_birth" in subject else None,
-            age=subject.get("age", ""),  # TODO: Shouldn't this be nullable, since it's recommended in the spec?
-            sex=subject.get("sex", None),
-            karyotypic_sex=subject.get("karyotypic_sex", None),
-            taxonomy=subject.get("taxonomy", None)
-        )
+        subject_query = _query_and_check_nulls(subject, "date_of_birth", transform=isoparse)
+        for k in ("alternate_ids", "age", "sex", "karyotypic_sex", "taxonomy"):
+            subject_query.update(_query_and_check_nulls(subject, k))
+        subject, _ = Individual.objects.get_or_create(id=subject["id"], **subject_query)
 
     phenotypic_features_db = [create_phenotypic_feature(pf) for pf in phenotypic_features]
 
@@ -178,26 +180,26 @@ def ingest_phenopacket(phenopacket_data, table_id):
     for bs in biosamples:
         # TODO: This should probably be a JSON field, or compound key with code/body_site
         procedure, _ = Procedure.objects.get_or_create(**bs["procedure"])
-        bs_pfs = [create_phenotypic_feature(pf) for pf in bs.get("phenotypic_features", [])]
 
-        bs_obj, _ = Biosample.objects.get_or_create(
+        bs_query = _query_and_check_nulls(bs, "individual_id", lambda i: Individual.objects.get(id=i))
+        for k in ("sampled_issue", "taxonomy", "individual_age_at_collection", "histological_diagnosis",
+                  "tumor_progression", "tumor_grade"):
+            bs_query.update(_query_and_check_nulls(bs, k))
+
+        bs_obj, bs_created = Biosample.objects.get_or_create(
             id=bs["id"],
-            individual=(Individual.objects.get(id=bs["individual_id"])
-                        if "individual_id" in bs else None),
             description=bs.get("description", ""),
-            sampled_tissue=bs.get("sampled_tissue", None),
-            taxonomy=bs.get("taxonomy", None),
-            individual_age_at_collection=bs.get("individual_age_at_collection", None),
-            histological_diagnosis=bs.get("histological_diagnosis", None),
-            tumor_progression=bs.get("tumor_progression", None),
-            tumor_grade=bs.get("tumor_grade", None),
             procedure=procedure,
             is_control_sample=bs.get("is_control_sample", False),
-
-            diagnostic_markers=bs.get("diagnostic_markers", [])
+            diagnostic_markers=bs.get("diagnostic_markers", []),
+            **bs_query
         )
 
-        bs_obj.phenotypic_features.set(bs_pfs)
+        if bs_created:
+            bs_pfs = [create_phenotypic_feature(pf) for pf in bs.get("phenotypic_features", [])]
+            bs_obj.phenotypic_features.set(bs_pfs)
+
+        # TODO: Update phenotypic features otherwise?
 
         biosamples_db.append(bs_obj)
 
@@ -206,24 +208,21 @@ def ingest_phenopacket(phenopacket_data, table_id):
     for g in genes:
         # TODO: Validate CURIE
         # TODO: Rename alternate_id
-
         g_obj, _ = Gene.objects.get_or_create(
             id=g["id"],
             alternate_ids=g.get("alternate_ids", []),
             symbol=g["symbol"]
         )
-
         genes_db.append(g_obj)
 
     diseases_db = []
-    for d in diseases:
+    for disease in diseases:
         # TODO: Primary key, should this be a model?
-
         d_obj, _ = Disease.objects.get_or_create(
-            term=d["term"],
-            onset=d.get("onset", None),
-            disease_stage=d.get("disease_stage", []),
-            tnm_finding=d.get("tnm_finding", [])
+            term=disease["term"],
+            disease_stage=disease.get("disease_stage", []),
+            tnm_finding=disease.get("tnm_finding", []),
+            **_query_and_check_nulls(disease, "onset")
         )
         diseases_db.append(d_obj.id)
 
@@ -237,7 +236,6 @@ def ingest_phenopacket(phenopacket_data, table_id):
             version=rs["version"],
             iri_prefix=rs["iri_prefix"]
         )
-        # rs_obj.save()
         resources_db.append(rs_obj)
 
     meta_data_obj = MetaData(

From 9a0cea15017710d544d330c901f9730344b77472 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 11:14:09 -0500
Subject: [PATCH 02/15] Fix typo (sampled_issue -> sampled_tissue)

---
 chord_metadata_service/chord/views_ingest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py
index 98c5f8cf0..df4d60d3a 100644
--- a/chord_metadata_service/chord/views_ingest.py
+++ b/chord_metadata_service/chord/views_ingest.py
@@ -182,7 +182,7 @@ def ingest_phenopacket(phenopacket_data, table_id):
         procedure, _ = Procedure.objects.get_or_create(**bs["procedure"])
 
         bs_query = _query_and_check_nulls(bs, "individual_id", lambda i: Individual.objects.get(id=i))
-        for k in ("sampled_issue", "taxonomy", "individual_age_at_collection", "histological_diagnosis",
+        for k in ("sampled_tissue", "taxonomy", "individual_age_at_collection", "histological_diagnosis",
                   "tumor_progression", "tumor_grade"):
             bs_query.update(_query_and_check_nulls(bs, k))
 

From 0ba4ff9c0ccbe59248c17d2fe00561ef9ecf9f1b Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 11:14:19 -0500
Subject: [PATCH 03/15] Update single_req example

---
 examples/single_req.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/single_req.json b/examples/single_req.json
index 526c445ae..2d4bae995 100644
--- a/examples/single_req.json
+++ b/examples/single_req.json
@@ -1,5 +1,5 @@
 {
-  "dataset_id": "3df5e8b0-3949-4d3c-a37f-1c6a81940d50",
+  "table_id": "3df5e8b0-3949-4d3c-a37f-1c6a81940d50",
   "workflow_id": "phenopackets_json",
   "workflow_metadata": {
     "inputs": [

From 5494fefff6b1c7bceec4a7f057b376fed479cb71 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 12:06:03 -0500
Subject: [PATCH 04/15] Travis diagnostics

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 81041e10f..10ceeeed1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ addons:
       - postgresql-11
       - postgresql-contrib-11
 before_install:
+  - ls -a /var/run/postgres
   - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';"
 install:
   - pip install -r requirements.txt

From d165b7ccd62a999028297b8d411954ac5534cce0 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 12:09:51 -0500
Subject: [PATCH 05/15] Travis diags cont'd

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 10ceeeed1..f1130cfc2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ addons:
       - postgresql-11
       - postgresql-contrib-11
 before_install:
-  - ls -a /var/run/postgres
+  - ls -a /var/run
   - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';"
 install:
   - pip install -r requirements.txt

From 54538de89f63a2be87b750dd74ab8b0e5d1101a1 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 12:13:09 -0500
Subject: [PATCH 06/15] pt3

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f1130cfc2..c549aa73d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ addons:
       - postgresql-11
       - postgresql-contrib-11
 before_install:
-  - ls -a /var/run
+  - ls -a /var/run/postgresql
   - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';"
 install:
   - pip install -r requirements.txt

From c22883d9bc46560bc67ac6b945814f0f7672b52a Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 12:16:49 -0500
Subject: [PATCH 07/15] Change travis postgres port

---
 .travis.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c549aa73d..0c27e4f49 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,13 +9,12 @@ addons:
       - postgresql-11
       - postgresql-contrib-11
 before_install:
-  - ls -a /var/run/postgresql
-  - sudo -u postgres psql -U postgres -p 5433 -d postgres -c "alter user postgres with password 'hj38f3Ntr';"
+  - sudo -u postgres psql -U postgres -p 5432 -d postgres -c "alter user postgres with password 'hj38f3Ntr';"
 install:
   - pip install -r requirements.txt
   - pip install .
 script:
-  - export POSTGRES_USER="postgres" && export POSTGRES_PASSWORD="hj38f3Ntr" && export POSTGRES_PORT=5433
+  - export POSTGRES_USER="postgres" && export POSTGRES_PASSWORD="hj38f3Ntr" && export POSTGRES_PORT=5432
   - python3 -m coverage run ./manage.py test
   - codecov
   - rm -rf chord_metadata_service

From 9bca906597bb472727e65e17189bfea92405bd02 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 14:32:13 -0500
Subject: [PATCH 08/15] Add ingest test

---
 .../chord/tests/example_ingest.py             | 234 ++++++++++++++++++
 .../chord/tests/test_api_ingest.py            |  89 +++++++
 .../chord/tests/test_ingest.py                | 133 ++++------
 3 files changed, 375 insertions(+), 81 deletions(-)
 create mode 100644 chord_metadata_service/chord/tests/example_ingest.py
 create mode 100644 chord_metadata_service/chord/tests/test_api_ingest.py

diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py
new file mode 100644
index 000000000..16d56a4b2
--- /dev/null
+++ b/chord_metadata_service/chord/tests/example_ingest.py
@@ -0,0 +1,234 @@
+EXAMPLE_INGEST = {
+    "subject": {
+        "id": "patient1",
+        "date_of_birth": "1964-03-15T00:00:00Z",
+        "sex": "MALE",
+        "karyotypic_sex": "UNKNOWN_KARYOTYPE"
+    },
+    "phenotypic_features": [
+        {
+            "description": "",
+            "type": {
+                "id": "HP:0000790",
+                "label": "Hematuria"
+            },
+            "negated": False,
+            "modifier": [],
+            "evidence": []
+        },
+        {
+            "description": "",
+            "type": {
+                "id": "HP:0100518",
+                "label": "Dysuria"
+            },
+            "negated": False,
+            "severity": {
+                "id": "HP:0012828",
+                "label": "Severe"
+            },
+            "modifier": [],
+            "evidence": []
+        }
+    ],
+    "diseases": [
+        {
+            "term": {
+                "id": "NCIT:C39853",
+                "label": "Infiltrating Urothelial Carcinoma"
+            },
+            "disease_stage": [
+                {
+                    "id": "NCIT:C48766",
+                    "label": "pT2b Stage Finding"
+                },
+                {
+                    "id": "NCIT:C48750",
+                    "label": "pN2 Stage Finding"
+                }
+            ]
+        }
+    ],
+    "meta_data": {
+        "created": "2019-04-03T15:31:40.765Z",
+        "created_by": "Peter R",
+        "submitted_by": "Peter R",
+        "resources": [
+            {
+                "id": "hp",
+                "name": "human phenotype ontology",
+                "namespace_prefix": "HP",
+                "url": "http://purl.obolibrary.org/obo/hp.owl",
+                "version": "2019-04-08",
+                "iri_prefix": "http://purl.obolibrary.org/obo/HP_"
+            },
+            {
+                "id": "uberon",
+                "name": "uber anatomy ontology",
+                "namespace_prefix": "UBERON",
+                "url": "http://purl.obolibrary.org/obo/uberon.owl",
+                "version": "2019-03-08",
+                "iri_prefix": "http://purl.obolibrary.org/obo/UBERON_"
+            },
+            {
+                "id": "ncit",
+                "name": "NCI Thesaurus OBO Edition",
+                "namespace_prefix": "NCIT",
+                "url": "http://purl.obolibrary.org/obo/ncit.owl",
+                "version": "18.05d",
+                "iri_prefix": "http://purl.obolibrary.org/obo/NCIT_"
+            }
+        ],
+        "updated": [],
+        "external_references": [
+            {
+                "id": "PMID:29221636",
+                "description": "Urothelial neoplasms in pediatric and young adult patients: A large single-center "
+                               "series"
+            }
+        ]
+    },
+    "biosamples": [
+        {
+            "id": "sample1",
+            "individual_id": "patient1",
+            "description": "",
+            "sampled_tissue": {
+                "id": "UBERON_0001256",
+                "label": "wall of urinary bladder"
+            },
+            "phenotypic_features": [],
+            "individual_age_at_collection": {
+                "age": "P52Y2M"
+            },
+            "histological_diagnosis": {
+                "id": "NCIT:C39853",
+                "label": "Infiltrating Urothelial Carcinoma"
+            },
+            "tumor_progression": {
+                "id": "NCIT:C84509",
+                "label": "Primary Malignant Neoplasm"
+            },
+            "diagnostic_markers": [],
+            "procedure": {
+                "code": {
+                    "id": "NCIT:C5189",
+                    "label": "Radical Cystoprostatectomy"
+                }
+            },
+            "is_control_sample": False
+        },
+        {
+            "id": "sample2",
+            "individual_id": "patient1",
+            "description": "",
+            "sampled_tissue": {
+                "id": "UBERON:0002367",
+                "label": "prostate gland"
+            },
+            "phenotypic_features": [],
+            "individual_age_at_collection": {
+                "age": "P52Y2M"
+            },
+            "histological_diagnosis": {
+                "id": "NCIT:C5596",
+                "label": "Prostate Acinar Adenocarcinoma"
+            },
+            "tumor_progression": {
+                "id": "NCIT:C95606",
+                "label": "Second Primary Malignant Neoplasm"
+            },
+            "tumor_grade": {
+                "id": "NCIT:C28091",
+                "label": "Gleason Score 7"
+            },
+            "disease_stage": [],
+            "diagnostic_markers": [],
+            "procedure": {
+                "code": {
+                    "id": "NCIT:C15189",
+                    "label": "Biopsy"
+                }
+            },
+            "is_control_sample": False
+        },
+        {
+            "id": "sample3",
+            "individual_id": "patient1",
+            "description": "",
+            "sampled_tissue": {
+                "id": "UBERON:0001223",
+                "label": "left ureter"
+            },
+            "phenotypic_features": [],
+            "individual_age_at_collection": {
+                "age": "P52Y2M"
+            },
+            "histological_diagnosis": {
+                "id": "NCIT:C38757",
+                "label": "Negative Finding"
+            },
+            "disease_stage": [],
+            "diagnostic_markers": [],
+            "procedure": {
+                "code": {
+                    "id": "NCIT:C15189",
+                    "label": "Biopsy"
+                }
+            },
+            "is_control_sample": False
+        },
+        {
+            "id": "sample4",
+            "individual_id": "patient1",
+            "description": "",
+            "sampled_tissue": {
+                "id": "UBERON:0001222",
+                "label": "right ureter"
+            },
+            "phenotypic_features": [],
+            "individual_age_at_collection": {
+                "age": "P52Y2M"
+            },
+            "histological_diagnosis": {
+                "id": "NCIT:C38757",
+                "label": "Negative Finding"
+            },
+            "disease_stage": [],
+            "diagnostic_markers": [],
+            "procedure": {
+                "code": {
+                    "id": "NCIT:C15189",
+                    "label": "Biopsy"
+                }
+            },
+            "is_control_sample": False
+        },
+        {
+            "id": "sample5",
+            "individual_id": "patient1",
+            "description": "",
+            "sampled_tissue": {
+                "id": "UBERON:0015876",
+                "label": "pelvic lymph node"
+            },
+            "phenotypic_features": [],
+            "individual_age_at_collection": {
+                "age": "P52Y2M"
+            },
+            "tumor_progression": {
+                "id": "NCIT:C3261",
+                "label": "Metastatic Neoplasm"
+            },
+            "disease_stage": [],
+            "diagnostic_markers": [],
+            "procedure": {
+                "code": {
+                    "id": "NCIT:C15189",
+                    "label": "Biopsy"
+                }
+            },
+            "is_control_sample": False
+        }
+    ]
+}
diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py
new file mode 100644
index 000000000..5e028e410
--- /dev/null
+++ b/chord_metadata_service/chord/tests/test_api_ingest.py
@@ -0,0 +1,89 @@
+import json
+
+from django.test import override_settings
+from django.urls import reverse
+from rest_framework import status
+from rest_framework.test import APITestCase
+from uuid import uuid4
+
+from .constants import *
+from ..views_ingest import METADATA_WORKFLOWS
+
+
+def generate_ingest(table_id):
+    return {
+        "table_id": table_id,
+        "workflow_id": "phenopackets_json",
+        "workflow_metadata": METADATA_WORKFLOWS["ingestion"]["phenopackets_json"],
+        "workflow_outputs": {
+            "json_document": ""  # TODO
+        },
+        "workflow_params": {
+            "json_document": ""  # TODO
+        }
+    }
+
+
+class WorkflowTest(APITestCase):
+    def test_workflows(self):
+        r = self.client.get(reverse("workflows"), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_200_OK)
+        self.assertDictEqual(r.json(), METADATA_WORKFLOWS)
+
+        # Non-existent workflow
+        r = self.client.get(reverse("workflow-detail", args=("invalid_workflow",)), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
+
+        # Valid workflow
+        r = self.client.get(reverse("workflow-detail", args=("phenopackets_json",)), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_200_OK)
+        self.assertDictEqual(r.json(), METADATA_WORKFLOWS["ingestion"]["phenopackets_json"])
+
+        # Non-existent workflow file
+        r = self.client.get(reverse("workflow-file", args=("invalid_workflow",)), content_type="text/plain")
+        self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
+
+        # Valid workflow file
+        r = self.client.get(reverse("workflow-file", args=("phenopackets_json",)), content_type="text/plain")
+        self.assertEqual(r.status_code, status.HTTP_200_OK)
+        # TODO: Check file contents
+
+
+class IngestTest(APITestCase):
+    @override_settings(AUTH_OVERRIDE=True)  # For permissions
+    def setUp(self) -> None:
+        r = self.client.post(reverse("project-list"), data=json.dumps(VALID_PROJECT_1), content_type="application/json")
+        self.project = r.json()
+
+        r = self.client.post(reverse("dataset-list"), data=json.dumps(valid_dataset_1(self.project["identifier"])),
+                             content_type="application/json")
+        self.dataset = r.json()
+
+    @override_settings(AUTH_OVERRIDE=True)  # For permissions
+    def test_ingest(self):
+        # No ingestion body
+        r = self.client.post(reverse("ingest"), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # Invalid ingestion request
+        r = self.client.post(reverse("ingest"), data=json.dumps({}), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # Non-existent dataset ID
+        r = self.client.post(reverse("ingest"), data=json.dumps(generate_ingest(str(uuid4()))),
+                             content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # Non-existent workflow ID
+        bad_wf = generate_ingest(self.dataset["identifier"])
+        bad_wf["workflow_id"] += "_invalid"
+        r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # json_document not in output
+        bad_wf = generate_ingest(self.dataset["identifier"])
+        bad_wf["workflow_outputs"] = {}
+        r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json")
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # TODO: More
diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py
index 5e028e410..974049b1e 100644
--- a/chord_metadata_service/chord/tests/test_ingest.py
+++ b/chord_metadata_service/chord/tests/test_ingest.py
@@ -1,89 +1,60 @@
-import json
+from django.test import TestCase
+from dateutil.parser import isoparse
 
-from django.test import override_settings
-from django.urls import reverse
-from rest_framework import status
-from rest_framework.test import APITestCase
-from uuid import uuid4
+from chord_metadata_service.chord.models import Project, Dataset
+from chord_metadata_service.chord.views_ingest import create_phenotypic_feature, ingest_phenopacket
+from chord_metadata_service.phenopackets.models import PhenotypicFeature, Phenopacket
 
-from .constants import *
-from ..views_ingest import METADATA_WORKFLOWS
+from .constants import VALID_DATA_USE_1
+from .example_ingest import EXAMPLE_INGEST
 
 
-def generate_ingest(table_id):
-    return {
-        "table_id": table_id,
-        "workflow_id": "phenopackets_json",
-        "workflow_metadata": METADATA_WORKFLOWS["ingestion"]["phenopackets_json"],
-        "workflow_outputs": {
-            "json_document": ""  # TODO
-        },
-        "workflow_params": {
-            "json_document": ""  # TODO
-        }
-    }
-
-
-class WorkflowTest(APITestCase):
-    def test_workflows(self):
-        r = self.client.get(reverse("workflows"), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_200_OK)
-        self.assertDictEqual(r.json(), METADATA_WORKFLOWS)
-
-        # Non-existent workflow
-        r = self.client.get(reverse("workflow-detail", args=("invalid_workflow",)), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
-
-        # Valid workflow
-        r = self.client.get(reverse("workflow-detail", args=("phenopackets_json",)), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_200_OK)
-        self.assertDictEqual(r.json(), METADATA_WORKFLOWS["ingestion"]["phenopackets_json"])
-
-        # Non-existent workflow file
-        r = self.client.get(reverse("workflow-file", args=("invalid_workflow",)), content_type="text/plain")
-        self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
-
-        # Valid workflow file
-        r = self.client.get(reverse("workflow-file", args=("phenopackets_json",)), content_type="text/plain")
-        self.assertEqual(r.status_code, status.HTTP_200_OK)
-        # TODO: Check file contents
-
-
-class IngestTest(APITestCase):
-    @override_settings(AUTH_OVERRIDE=True)  # For permissions
+class IngestTest(TestCase):
     def setUp(self) -> None:
-        r = self.client.post(reverse("project-list"), data=json.dumps(VALID_PROJECT_1), content_type="application/json")
-        self.project = r.json()
-
-        r = self.client.post(reverse("dataset-list"), data=json.dumps(valid_dataset_1(self.project["identifier"])),
-                             content_type="application/json")
-        self.dataset = r.json()
-
-    @override_settings(AUTH_OVERRIDE=True)  # For permissions
-    def test_ingest(self):
-        # No ingestion body
-        r = self.client.post(reverse("ingest"), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
-
-        # Invalid ingestion request
-        r = self.client.post(reverse("ingest"), data=json.dumps({}), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
-
-        # Non-existent dataset ID
-        r = self.client.post(reverse("ingest"), data=json.dumps(generate_ingest(str(uuid4()))),
-                             content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
-
-        # Non-existent workflow ID
-        bad_wf = generate_ingest(self.dataset["identifier"])
-        bad_wf["workflow_id"] += "_invalid"
-        r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        p = Project.objects.create(title="Project 1", description="")
+        self.d = Dataset.objects.create(title="Dataset 1", description="Some dataset", data_use=VALID_DATA_USE_1,
+                                        project=p)
+
+    def test_create_pf(self):
+        p1 = create_phenotypic_feature({
+            "description": "test",
+            "type": {
+                "id": "HP:0000790",
+                "label": "Hematuria"
+            },
+            "negated": False,
+            "modifier": [],
+            "evidence": []
+        })
+
+        p2 = PhenotypicFeature.objects.get(description="test")
+
+        self.assertEqual(p1.pk, p2.pk)
+
+    def test_ingesting_json(self):
+        p = ingest_phenopacket(EXAMPLE_INGEST, self.d.identifier)
+        self.assertEqual(p.id, Phenopacket.objects.get(id=p.id).id)
+
+        self.assertEqual(p.subject.id, EXAMPLE_INGEST["subject"]["id"])
+        self.assertEqual(p.subject.date_of_birth, isoparse(EXAMPLE_INGEST["subject"]["date_of_birth"]))
+        self.assertEqual(p.subject.sex, EXAMPLE_INGEST["subject"]["sex"])
+        self.assertEqual(p.subject.karyotypic_sex, EXAMPLE_INGEST["subject"]["karyotypic_sex"])
+
+        pfs = list(p.phenotypic_features.all().order_by("pftype__id"))
+
+        self.assertEqual(len(pfs), 2)
+        self.assertEqual(pfs[0].description, EXAMPLE_INGEST["phenotypic_features"][0]["description"])
+        self.assertEqual(pfs[0].pftype["id"], EXAMPLE_INGEST["phenotypic_features"][0]["type"]["id"])
+        self.assertEqual(pfs[0].pftype["label"], EXAMPLE_INGEST["phenotypic_features"][0]["type"]["label"])
+        self.assertEqual(pfs[0].negated, EXAMPLE_INGEST["phenotypic_features"][0]["negated"])
+        # TODO: Test more properties
+
+        diseases = list(p.diseases.all().order_by("term__id"))
+        self.assertEqual(len(diseases), 1)
+        # TODO: More
 
-        # json_document not in output
-        bad_wf = generate_ingest(self.dataset["identifier"])
-        bad_wf["workflow_outputs"] = {}
-        r = self.client.post(reverse("ingest"), data=json.dumps(bad_wf), content_type="application/json")
-        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        # TODO: Test Metadata
 
+        biosamples = list(p.biosamples.all().order_by("id"))
+        self.assertEqual(len(biosamples), 5)
         # TODO: More

From d7efc9b7024f0400ce015fbabb11c41d7fad638e Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 14:32:21 -0500
Subject: [PATCH 09/15] Reformat chord.tests.constants

---
 chord_metadata_service/chord/tests/constants.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/chord_metadata_service/chord/tests/constants.py b/chord_metadata_service/chord/tests/constants.py
index 429b6af74..957e2af8c 100644
--- a/chord_metadata_service/chord/tests/constants.py
+++ b/chord_metadata_service/chord/tests/constants.py
@@ -92,7 +92,11 @@ def dats_dataset(project_id, creators):
           }
        ],
        "title": "1000 Genomes Project",
-       "description": "The 1000 Genomes Project provides a comprehensive description of common human variation by applying a combination of whole-genome sequencing, deep exome sequencing and dense microarray genotyping to a diverse set of 2504 individuals from 26 populations.  Over 88 million variants are characterised, including >99% of SNP variants with a frequency of >1% for a variety of ancestries.",
+       "description": "The 1000 Genomes Project provides a comprehensive description of common human variation by "
+                      "applying a combination of whole-genome sequencing, deep exome sequencing and dense microarray "
+                      "genotyping to a diverse set of 2504 individuals from 26 populations.  Over 88 million variants "
+                      "are characterised, including >99% of SNP variants with a frequency of >1% for a variety of "
+                      "ancestries.",
        "storedIn": {
           "name": "European Bioinformatics Institute"
        },
@@ -140,13 +144,14 @@ def dats_dataset(project_id, creators):
              "category": "contact",
              "values": [
                 {
-                   "value": "Jennifer Tremblay-Mercier, Research Co-ordinator, jennifer.tremblay-mercier@douglas.mcgill.ca, 514-761-6131 #3329"
+                   "value": "Jennifer Tremblay-Mercier, Research Co-ordinator, "
+                            "jennifer.tremblay-mercier@douglas.mcgill.ca, 514-761-6131 #3329"
                 }
              ]
           }
        ],
        "data_use": VALID_DATA_USE_1
-}
+    }
 
 
 TEST_SEARCH_QUERY_1 = ["#eq", ["#resolve", "subject", "sex"], "FEMALE"]

From 7a2a392af68860d6e12aaaf8fc7b7e977d338702 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 14:32:35 -0500
Subject: [PATCH 10/15] Fix issue with ingesting phenopacket phenotypic
 features

---
 chord_metadata_service/chord/views_ingest.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py
index df4d60d3a..1f46b9359 100644
--- a/chord_metadata_service/chord/views_ingest.py
+++ b/chord_metadata_service/chord/views_ingest.py
@@ -162,7 +162,7 @@ def ingest_phenopacket(phenopacket_data, table_id):
     new_phenopacket_id = str(uuid.uuid4())  # TODO: Is this provided?
 
     subject = phenopacket_data.get("subject", None)
-    phenotypic_features = phenopacket_data.get("phenotypicFeatures", [])
+    phenotypic_features = phenopacket_data.get("phenotypic_features", [])
     biosamples = phenopacket_data.get("biosamples", [])
     genes = phenopacket_data.get("genes", [])
     diseases = phenopacket_data.get("diseases", [])
@@ -254,10 +254,12 @@ def ingest_phenopacket(phenopacket_data, table_id):
         meta_data=meta_data_obj,
         dataset=Dataset.objects.get(identifier=table_id)
     )
-    new_phenopacket.save()
 
     new_phenopacket.phenotypic_features.set(phenotypic_features_db)
     new_phenopacket.biosamples.set(biosamples_db)
     new_phenopacket.genes.set(genes_db)
     new_phenopacket.diseases.set(diseases_db)
+
+    new_phenopacket.save()
+
     return new_phenopacket

From c84870dce85fe11ed484c270dba5654b0608ed32 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 14:36:00 -0500
Subject: [PATCH 11/15] Test double-ingest

---
 chord_metadata_service/chord/tests/test_ingest.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py
index 974049b1e..9924cd56b 100644
--- a/chord_metadata_service/chord/tests/test_ingest.py
+++ b/chord_metadata_service/chord/tests/test_ingest.py
@@ -58,3 +58,8 @@ def test_ingesting_json(self):
         biosamples = list(p.biosamples.all().order_by("id"))
         self.assertEqual(len(biosamples), 5)
         # TODO: More
+
+        # Test ingesting again
+        p2 = ingest_phenopacket(EXAMPLE_INGEST, self.d.identifier)
+        self.assertNotEqual(p.id, p2.id)
+        # TODO: More

From 95d9bc6a33ccc83a49e1f7bbcf47572caa7cc156 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 25 Feb 2020 15:24:50 -0500
Subject: [PATCH 12/15] Fix save order for phenopackets

---
 chord_metadata_service/chord/views_ingest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chord_metadata_service/chord/views_ingest.py b/chord_metadata_service/chord/views_ingest.py
index 1f46b9359..041741309 100644
--- a/chord_metadata_service/chord/views_ingest.py
+++ b/chord_metadata_service/chord/views_ingest.py
@@ -255,11 +255,11 @@ def ingest_phenopacket(phenopacket_data, table_id):
         dataset=Dataset.objects.get(identifier=table_id)
     )
 
+    new_phenopacket.save()
+
     new_phenopacket.phenotypic_features.set(phenotypic_features_db)
     new_phenopacket.biosamples.set(biosamples_db)
     new_phenopacket.genes.set(genes_db)
     new_phenopacket.diseases.set(diseases_db)
 
-    new_phenopacket.save()
-
     return new_phenopacket

From 423670f6d0cc357302257107d5bc2f9813402dce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20Ch=C3=A9nard?= <chenard.simon@gmail.com>
Date: Tue, 25 Feb 2020 16:53:53 -0500
Subject: [PATCH 13/15] fixed a few typos, some wording / stylistic changes

---
 chord_metadata_service/phenopackets/models.py |  2 +-
 docs/conf.py                                  |  6 +++---
 docs/modules/installation.rst                 |  6 +++---
 docs/modules/introduction.rst                 | 16 ++++++++--------
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/chord_metadata_service/phenopackets/models.py b/chord_metadata_service/phenopackets/models.py
index 7767dcdcf..938f2668d 100644
--- a/chord_metadata_service/phenopackets/models.py
+++ b/chord_metadata_service/phenopackets/models.py
@@ -112,7 +112,7 @@ def __str__(self):
 class Procedure(models.Model):
     """
     Class to represent a clinical procedure performed on an individual
-    (subject) in oder to extract a biosample
+    (subject) in order to extract a biosample
 
     FHIR: Procedure
     """
diff --git a/docs/conf.py b/docs/conf.py
index de505a516..6ef8f6b1c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,8 +21,8 @@
 # -- Project information -----------------------------------------------------
 
 project = 'Metadata service'
-copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chenard'
-author = 'Ksenia Zaytseva, David Lougheed, Simon Chenard'
+copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chénard'
+author = 'Ksenia Zaytseva, David Lougheed, Simon Chénard'
 
 # The full version, including alpha/beta/rc tags
 release = '0.5.0'
@@ -53,4 +53,4 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
\ No newline at end of file
+html_static_path = ['_static']
diff --git a/docs/modules/installation.rst b/docs/modules/installation.rst
index 561e8a04e..75d592b21 100644
--- a/docs/modules/installation.rst
+++ b/docs/modules/installation.rst
@@ -14,9 +14,9 @@ Installation
 
     git submodule update --init
 
-3. Create and activate virtual environment
+3. Create and activate a virtual environment
 
-4. Cd to the main directory and install required packages:
+4. Move to the main directory and install required packages:
 
 .. code-block::
 
@@ -49,4 +49,4 @@ e.g. settings if running database on localhost, default port for PostgreSQL is 5
     python manage.py migrate
     python manage.py runserver
 
-8. Development server runs at :code:`localhost:8000`
\ No newline at end of file
+8. Development server runs at :code:`localhost:8000`
diff --git a/docs/modules/introduction.rst b/docs/modules/introduction.rst
index d90e2990a..1431b3ecf 100644
--- a/docs/modules/introduction.rst
+++ b/docs/modules/introduction.rst
@@ -19,8 +19,8 @@ Besides PostgreSQL the data can be indexed and queried in Elasticsearch.
 Architecture
 ------------
 
-Metadata Service contains several services that share one API.
-Services depend on each other and separated based on their scope.
+The Metadata Service contains several services that share one API.
+Services depend on each other and are separated based on their scope.
 
 **1. Patients service** handles anonymized individual’s data (e.g. individual id, sex, age or date of birth)
 
@@ -68,16 +68,16 @@ REST API highlights
 
 - Other available renderers:
 
-  - Currently the following classes can be retirved in FHIR format by appending :code:`?format=fhir`: Phenopackets, Individual, Biosample, PhenotypicFeature, HtsFile, Gene, Variants, Disease, Procedure.
+  - Currently the following classes can be retrieved in FHIR format by appending :code:`?format=fhir`: Phenopacket, Individual, Biosample, PhenotypicFeature, HtsFile, Gene, Variant, Disease, Procedure.
 
-  - JSON-LD context to schema.org provided for Dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint.
+  - JSON-LD context to schema.org provided for dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint.
 
-  - Dataset description can also be retrived in RDF format: append :code:`?format=rdf` when querying dataset endpoint.
+  - Dataset description can also be retrieved in RDF format: append :code:`?format=rdf` when querying dataset endpoint.
 
 **Data ingest**
 
 Currently only the data that follow Phenopackets schema can be ingested.
-Ingest endpoint is :code:`/private/ingest` .
+Ingest endpoint is :code:`/private/ingest`.
 Example of POST request body:
 
 .. code-block::
@@ -118,7 +118,7 @@ Elasticsearch index (optional)
 ------------------------------
 
 Data in FHIR format can be indexed in Elasticsearch - this is optional.
-If an Elasticsearch instance is running on the server (so on :code:`localhost:9000`) these models will automatically be indexed on creation/update.
+If an Elasticsearch instance is running on the server (so on :code:`localhost:9000`) these models will be automatically indexed on creation/update.
 There are also two scripts provided to update these indexes all at once:
 
 .. code-block::
@@ -126,7 +126,7 @@ There are also two scripts provided to update these indexes all at once:
     python manage.py patients_build_index
     python manage.py phenopackets_build_index
 
-To query this information, here is an example request :
+To query this information, here is an example request:
 
 .. code-block::
 

From c7febd83d984d085f3397c3b3c8866a45b91ecb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20Ch=C3=A9nard?= <chenard.simon@gmail.com>
Date: Tue, 25 Feb 2020 17:18:37 -0500
Subject: [PATCH 14/15] few more corrections as per David's suggestions

---
 docs/modules/introduction.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/modules/introduction.rst b/docs/modules/introduction.rst
index 1431b3ecf..68b18250c 100644
--- a/docs/modules/introduction.rst
+++ b/docs/modules/introduction.rst
@@ -70,9 +70,9 @@ REST API highlights
 
   - Currently the following classes can be retrieved in FHIR format by appending :code:`?format=fhir`: Phenopacket, Individual, Biosample, PhenotypicFeature, HtsFile, Gene, Variant, Disease, Procedure.
 
-  - JSON-LD context to schema.org provided for dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint.
+  - JSON-LD context to schema.org provided for the Dataset class in order to allow for a Google dataset search for Open Access Data: append :code:`?format=json-ld` when querying dataset endpoint.
 
-  - Dataset description can also be retrieved in RDF format: append :code:`?format=rdf` when querying dataset endpoint.
+  - Dataset description can also be retrieved in RDF format: append :code:`?format=rdf` when querying the dataset endpoint.
 
 **Data ingest**
 
@@ -126,7 +126,7 @@ There are also two scripts provided to update these indexes all at once:
     python manage.py patients_build_index
     python manage.py phenopackets_build_index
 
-To query this information, here is an example request:
+Here is an example request for querying this information:
 
 .. code-block::
 

From e7e5363d18a0d8f933160aa3c07390fa14a08f08 Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Wed, 26 Feb 2020 11:46:07 -0500
Subject: [PATCH 15/15] Bump version to 0.5.1

---
 docs/conf.py     | 8 ++++----
 requirements.txt | 2 +-
 setup.py         | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index de505a516..b009b01e2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,11 +21,11 @@
 # -- Project information -----------------------------------------------------
 
 project = 'Metadata service'
-copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chenard'
-author = 'Ksenia Zaytseva, David Lougheed, Simon Chenard'
+copyright = '2020, Ksenia Zaytseva, David Lougheed, Simon Chénard'
+author = 'Ksenia Zaytseva, David Lougheed, Simon Chénard'
 
 # The full version, including alpha/beta/rc tags
-release = '0.5.0'
+release = '0.5.1'
 
 # -- General configuration ---------------------------------------------------
 
@@ -53,4 +53,4 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
\ No newline at end of file
+html_static_path = ['_static']
diff --git a/requirements.txt b/requirements.txt
index 5bdf14221..4a80cafaf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ attrs==19.3.0
 certifi==2019.11.28
 chardet==3.0.4
 chord-lib==0.5.0
-codecov==2.0.15
+codecov==2.0.16
 coreapi==2.3.3
 coreschema==0.0.4
 coverage==5.0.3
diff --git a/setup.py b/setup.py
index 91ad47d8f..4463e5687 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 
 setuptools.setup(
     name="chord_metadata_service",
-    version="0.5.0",
+    version="0.5.1",
 
     python_requires=">=3.6",
     install_requires=[