From 798f8137f3bb1897f56657507813e8a4521e63ca Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Fri, 22 Dec 2023 20:41:18 +0000 Subject: [PATCH 01/11] fix genomic interpretation ingestion --- .../chord/ingest/phenopackets.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 4c70f86b0..37ea0f1dd 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -236,14 +236,14 @@ def get_or_create_genomic_interpretation(gen_interp: dict) -> pm.GenomicInterpre gene_descriptor = _get_or_create_opt("gene_descriptor", gen_interp, get_or_create_gene_descriptor) variant_interpretation = _get_or_create_opt("variant_interpretation", gen_interp, get_or_create_variant_interp) - gen_obj, _ = pm.GenomicInterpretation.objects.get_or_create( + gen_obj, created = pm.GenomicInterpretation.objects.get_or_create( interpretation_status=gen_interp["interpretation_status"], gene_descriptor=gene_descriptor, variant_interpretation=variant_interpretation, extra_properties=gen_interp.get("extra_properties", {}), ) - if related_obj: + if related_obj and created: # Set the link with Biosample/Individual related_obj.genomic_interpretations.add(gen_obj) related_obj.save() @@ -263,20 +263,20 @@ def get_or_create_disease(disease) -> pm.Disease: def get_or_create_diagnosis(diagnosis: dict) -> pm.Diagnosis: - # Create GenomicInterpretation - genomic_interpretations_data = diagnosis.get("genomic_interpretations", []) - genomic_interpretations = [ - get_or_create_genomic_interpretation(gen_interp) - for gen_interp - in genomic_interpretations_data - ] - # disease = pm.Disease.objects.get_or_create(diagnosis["disease"]) - diag_obj, _ = pm.Diagnosis.objects.get_or_create( + diag_obj, created = pm.Diagnosis.objects.get_or_create( disease=diagnosis.get("disease", {}), - extra_properties=diagnosis.get("extra_properties", {}) + extra_properties=diagnosis.get("extra_properties", {}), ) - # diag_obj.disease.set(disease) - diag_obj.genomic_interpretations.set(genomic_interpretations) + + if created: + # Create GenomicInterpretation + genomic_interpretations_data = diagnosis.get("genomic_interpretations", []) + genomic_interpretations = [ + get_or_create_genomic_interpretation(gen_interp) + for gen_interp + in genomic_interpretations_data + ] + diag_obj.genomic_interpretations.set(genomic_interpretations) return diag_obj From 014452c0cb34f43172d719b800e6bf343cc8e48b Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 3 Jan 2024 14:50:13 -0500 Subject: [PATCH 02/11] one-to-one interpretation and diagnosis --- .../chord/ingest/phenopackets.py | 12 ++++++++-- ...nosis_id_alter_interpretation_diagnosis.py | 24 +++++++++++++++++++ chord_metadata_service/phenopackets/models.py | 7 ++++-- .../phenopackets/tests/constants.py | 3 ++- .../phenopackets/tests/test_api.py | 4 +++- 5 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 37ea0f1dd..53dff2535 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -262,8 +262,16 @@ def get_or_create_disease(disease) -> pm.Disease: return d_obj -def get_or_create_diagnosis(diagnosis: dict) -> pm.Diagnosis: +def get_or_create_interpretation_diagnosis(interpretation: dict) -> pm.Diagnosis: + diagnosis = interpretation.get("diagnosis") + + if not diagnosis: + return + + # If an interpretation has a diagnosis, the diagnosis shares the interpretation's ID + id = interpretation.get("id") diag_obj, created = pm.Diagnosis.objects.get_or_create( + id=id, disease=diagnosis.get("disease", {}), extra_properties=diagnosis.get("extra_properties", {}), ) @@ -281,7 +289,7 @@ def get_or_create_diagnosis(diagnosis: dict) -> pm.Diagnosis: def get_or_create_interpretation(interpretation: dict) -> pm.Interpretation: - diagnosis = get_or_create_diagnosis(interpretation["diagnosis"]) + diagnosis = get_or_create_interpretation_diagnosis(interpretation) interp_obj, _ = pm.Interpretation.objects.get_or_create( id=interpretation["id"], diagnosis=diagnosis, diff --git a/chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py b/chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py new file mode 100644 index 000000000..43172b1e4 --- /dev/null +++ b/chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.7 on 2024-01-03 19:19 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('phenopackets', '0008_v6_0_0'), + ] + + operations = [ + migrations.AlterField( + model_name='diagnosis', + name='id', + field=models.CharField(max_length=200, primary_key=True, serialize=False), + ), + migrations.AlterField( + model_name='interpretation', + name='diagnosis', + field=models.OneToOneField(blank=True, help_text='One or more diagnoses, if made.', null=True, on_delete=django.db.models.deletion.CASCADE, to='phenopackets.diagnosis'), + ), + ] diff --git a/chord_metadata_service/phenopackets/models.py b/chord_metadata_service/phenopackets/models.py index a50640655..301d1bccd 100644 --- a/chord_metadata_service/phenopackets/models.py +++ b/chord_metadata_service/phenopackets/models.py @@ -356,6 +356,7 @@ class Diagnosis(BaseTimeStamp): FHIR: Condition """ + id = models.CharField(primary_key=True, max_length=200) disease = models.JSONField(null=True, blank=True, validators=[ontology_validator]) genomic_interpretations = models.ManyToManyField( GenomicInterpretation, blank=True, @@ -389,8 +390,10 @@ class Interpretation(BaseTimeStamp): id = models.CharField(primary_key=True, max_length=200, help_text='An arbitrary identifier for the interpretation.') progress_status = models.CharField(choices=PROGRESS_STATUS, max_length=200, blank=True, help_text='The current status of work on the case.') - diagnosis = models.ForeignKey(Diagnosis, blank=True, null=True, on_delete=models.CASCADE, - help_text='One or more diagnoses, if made.') + # diagnosis = models.ForeignKey(Diagnosis, blank=True, null=True, on_delete=models.CASCADE, + # help_text='One or more diagnoses, if made.') + diagnosis = models.OneToOneField(Diagnosis, blank=True, null=True, on_delete=models.CASCADE, + help_text='One or more diagnoses, if made.') summary = models.CharField(max_length=200, blank=True, help_text='Free text summary of the interpretation.') extra_properties = JSONField(blank=True, null=True, help_text='Extra properties that are not supported by current schema') diff --git a/chord_metadata_service/phenopackets/tests/constants.py b/chord_metadata_service/phenopackets/tests/constants.py index fd7e25726..085637b2a 100644 --- a/chord_metadata_service/phenopackets/tests/constants.py +++ b/chord_metadata_service/phenopackets/tests/constants.py @@ -603,8 +603,9 @@ def valid_genomic_interpretation(gene_descriptor=None, variant_interpretation=No return base -def valid_diagnosis(disease): +def valid_diagnosis(disease, id="interpretation:1"): return dict( + id=id, disease=disease, extra_properties={ "comment": "test data" diff --git a/chord_metadata_service/phenopackets/tests/test_api.py b/chord_metadata_service/phenopackets/tests/test_api.py index bdd3ef8b3..592bb8853 100644 --- a/chord_metadata_service/phenopackets/tests/test_api.py +++ b/chord_metadata_service/phenopackets/tests/test_api.py @@ -260,11 +260,13 @@ class CreateDiagnosisTest(APITestCase): def setUp(self): self.disease_ontology = c.VALID_DISEASE_ONTOLOGY - self.diagnosis = c.valid_diagnosis(self.disease_ontology) + self.diagnosis = c.valid_diagnosis(self.disease_ontology, "interpretation:unique_id") def test_diagnosis(self): response = get_post_response('diagnoses-list', self.diagnosis) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + def test_serializer(self): serializer = s.DiagnosisSerializer(data=self.diagnosis) self.assertEqual(serializer.is_valid(), True) From d781d14698068a2a05d41aeb1fec4e4bf7bd96db Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 3 Jan 2024 20:33:47 +0000 Subject: [PATCH 03/11] merge migrations --- .../phenopackets/migrations/0008_v6_0_0.py | 7 +++++- ...nosis_id_alter_interpretation_diagnosis.py | 24 ------------------- 2 files changed, 6 insertions(+), 25 deletions(-) delete mode 100644 chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py diff --git a/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py b/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py index 4d8f30cef..6aad44ab9 100644 --- a/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py +++ b/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py @@ -476,7 +476,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='interpretation', name='diagnosis', - field=models.ForeignKey(blank=True, help_text='One or more diagnoses, if made.', null=True, on_delete=django.db.models.deletion.CASCADE, to='phenopackets.diagnosis'), + field=models.OneToOneField(blank=True, help_text='One or more diagnoses, if made.', null=True, on_delete=django.db.models.deletion.CASCADE, to='phenopackets.diagnosis'), ), migrations.DeleteModel( name='Gene', @@ -491,4 +491,9 @@ class Migration(migrations.Migration): name='subject', field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='genomic_interpretations', to='patients.individual'), ), + migrations.AlterField( + model_name='diagnosis', + name='id', + field=models.CharField(max_length=200, primary_key=True, serialize=False), + ), ] diff --git a/chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py b/chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py deleted file mode 100644 index 43172b1e4..000000000 --- a/chord_metadata_service/phenopackets/migrations/0009_alter_diagnosis_id_alter_interpretation_diagnosis.py +++ /dev/null @@ -1,24 +0,0 @@ -# Generated by Django 4.2.7 on 2024-01-03 19:19 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('phenopackets', '0008_v6_0_0'), - ] - - operations = [ - migrations.AlterField( - model_name='diagnosis', - name='id', - field=models.CharField(max_length=200, primary_key=True, serialize=False), - ), - migrations.AlterField( - model_name='interpretation', - name='diagnosis', - field=models.OneToOneField(blank=True, help_text='One or more diagnoses, if made.', null=True, on_delete=django.db.models.deletion.CASCADE, to='phenopackets.diagnosis'), - ), - ] From 2531e3940d3c1a6473129306ea9df6c4e9b49113 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 3 Jan 2024 16:50:58 -0500 Subject: [PATCH 04/11] help text, lint --- .../phenopackets/migrations/0008_v6_0_0.py | 4 ++-- chord_metadata_service/phenopackets/models.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py b/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py index 6aad44ab9..f9d76ce7d 100644 --- a/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py +++ b/chord_metadata_service/phenopackets/migrations/0008_v6_0_0.py @@ -476,7 +476,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='interpretation', name='diagnosis', - field=models.OneToOneField(blank=True, help_text='One or more diagnoses, if made.', null=True, on_delete=django.db.models.deletion.CASCADE, to='phenopackets.diagnosis'), + field=models.OneToOneField(blank=True, help_text='The diagnosis, if made.', null=True, on_delete=django.db.models.deletion.CASCADE, to='phenopackets.diagnosis'), ), migrations.DeleteModel( name='Gene', @@ -494,6 +494,6 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='diagnosis', name='id', - field=models.CharField(max_length=200, primary_key=True, serialize=False), + field=models.CharField(help_text="Unique ID, uses the parent Interpretation's id when ingesting.", max_length=200, primary_key=True, serialize=False), ), ] diff --git a/chord_metadata_service/phenopackets/models.py b/chord_metadata_service/phenopackets/models.py index 301d1bccd..17f65f77b 100644 --- a/chord_metadata_service/phenopackets/models.py +++ b/chord_metadata_service/phenopackets/models.py @@ -356,7 +356,8 @@ class Diagnosis(BaseTimeStamp): FHIR: Condition """ - id = models.CharField(primary_key=True, max_length=200) + id = models.CharField(primary_key=True, max_length=200, + help_text="Unique ID, uses the parent Interpretation's id when ingesting.") disease = models.JSONField(null=True, blank=True, validators=[ontology_validator]) genomic_interpretations = models.ManyToManyField( GenomicInterpretation, blank=True, @@ -390,10 +391,8 @@ class Interpretation(BaseTimeStamp): id = models.CharField(primary_key=True, max_length=200, help_text='An arbitrary identifier for the interpretation.') progress_status = models.CharField(choices=PROGRESS_STATUS, max_length=200, blank=True, help_text='The current status of work on the case.') - # diagnosis = models.ForeignKey(Diagnosis, blank=True, null=True, on_delete=models.CASCADE, - # help_text='One or more diagnoses, if made.') diagnosis = models.OneToOneField(Diagnosis, blank=True, null=True, on_delete=models.CASCADE, - help_text='One or more diagnoses, if made.') + help_text='The diagnosis, if made.') summary = models.CharField(max_length=200, blank=True, help_text='Free text summary of the interpretation.') extra_properties = JSONField(blank=True, null=True, help_text='Extra properties that are not supported by current schema') From c36bd46789d072d9e6322cfe42b9778d8c2487b8 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 4 Jan 2024 10:37:03 -0500 Subject: [PATCH 05/11] ingestion clarification comment --- chord_metadata_service/chord/ingest/phenopackets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 53dff2535..ed5a2cec9 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -268,7 +268,9 @@ def get_or_create_interpretation_diagnosis(interpretation: dict) -> pm.Diagnosis if not diagnosis: return - # If an interpretation has a diagnosis, the diagnosis shares the interpretation's ID + # One-to-one relation between Interpretation and Diagnosis. + # If an Interpretation has a Diagnosis, the created Diagnosis row uses the interpretation's ID as its PK + # This ensures unique diagnoses if more than one share the same disease/extra_properties id = interpretation.get("id") diag_obj, created = pm.Diagnosis.objects.get_or_create( id=id, From 38b093599afa28a4b35cb256662ce1f95031e4a1 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 4 Jan 2024 18:22:50 +0000 Subject: [PATCH 06/11] adress review comments --- chord_metadata_service/chord/ingest/phenopackets.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index ed5a2cec9..9da15cbc1 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -236,14 +236,14 @@ def get_or_create_genomic_interpretation(gen_interp: dict) -> pm.GenomicInterpre gene_descriptor = _get_or_create_opt("gene_descriptor", gen_interp, get_or_create_gene_descriptor) variant_interpretation = _get_or_create_opt("variant_interpretation", gen_interp, get_or_create_variant_interp) - gen_obj, created = pm.GenomicInterpretation.objects.get_or_create( + gen_obj, _ = pm.GenomicInterpretation.objects.get_or_create( interpretation_status=gen_interp["interpretation_status"], gene_descriptor=gene_descriptor, variant_interpretation=variant_interpretation, extra_properties=gen_interp.get("extra_properties", {}), ) - if related_obj and created: + if related_obj: # Set the link with Biosample/Individual related_obj.genomic_interpretations.add(gen_obj) related_obj.save() @@ -262,10 +262,11 @@ def get_or_create_disease(disease) -> pm.Disease: return d_obj -def get_or_create_interpretation_diagnosis(interpretation: dict) -> pm.Diagnosis: +def get_or_create_interpretation_diagnosis(interpretation: dict) -> pm.Diagnosis | None: diagnosis = interpretation.get("diagnosis") if not diagnosis: + # No diagnosis to create return # One-to-one relation between Interpretation and Diagnosis. From 43fb398de654ea83d8816e71da5a44fbb301e1de Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 4 Jan 2024 18:52:48 +0000 Subject: [PATCH 07/11] interpretation summary, genomic interp related type extra property --- chord_metadata_service/chord/ingest/phenopackets.py | 2 +- chord_metadata_service/phenopackets/serializers.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 9da15cbc1..d14554369 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -297,7 +297,7 @@ def get_or_create_interpretation(interpretation: dict) -> pm.Interpretation: id=interpretation["id"], diagnosis=diagnosis, progress_status=interpretation["progress_status"], - summary=interpretation.get("summary", {}), + summary=interpretation.get("summary", ""), extra_properties=interpretation.get("extra_properties", {}) ) diff --git a/chord_metadata_service/phenopackets/serializers.py b/chord_metadata_service/phenopackets/serializers.py index ed6f27314..acccd1e01 100644 --- a/chord_metadata_service/phenopackets/serializers.py +++ b/chord_metadata_service/phenopackets/serializers.py @@ -160,10 +160,18 @@ def to_representation(self, instance): instance.variant_interpretation, many=False, required=False).data # subject_or_biosample_id is obtained from the referenced subject/biosample + # the referenced object type is added to extra_properties to disambiguate on the client side + extra_properties = response.get("extra_properties", {}) if instance.subject: - response["subject_or_biosample_id"] = instance.subject.id + subject_or_biosample_id = instance.subject.id + related_type = "subject" elif instance.biosample: - response["subject_or_biosample_id"] = instance.biosample.id + subject_or_biosample_id = instance.biosample.id + related_type = "biosample" + + extra_properties["related_type"] = related_type + response["subject_or_biosample_id"] = subject_or_biosample_id + response["extra_properties"] = extra_properties return response From 26f4988c183a57e3c15a04d0735c297a05c86257 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 4 Jan 2024 19:19:44 +0000 Subject: [PATCH 08/11] fix test --- chord_metadata_service/phenopackets/serializers.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/chord_metadata_service/phenopackets/serializers.py b/chord_metadata_service/phenopackets/serializers.py index acccd1e01..d51e47461 100644 --- a/chord_metadata_service/phenopackets/serializers.py +++ b/chord_metadata_service/phenopackets/serializers.py @@ -163,16 +163,13 @@ def to_representation(self, instance): # the referenced object type is added to extra_properties to disambiguate on the client side extra_properties = response.get("extra_properties", {}) if instance.subject: - subject_or_biosample_id = instance.subject.id - related_type = "subject" + response["subject_or_biosample_id"] = instance.subject.id + extra_properties["related_type"] = "subject" elif instance.biosample: - subject_or_biosample_id = instance.biosample.id - related_type = "biosample" + response["subject_or_biosample_id"] = instance.biosample.id + extra_properties["related_type"] = "biosample" - extra_properties["related_type"] = related_type - response["subject_or_biosample_id"] = subject_or_biosample_id response["extra_properties"] = extra_properties - return response From 8652d5d1a6831bf0be63cbdc7454945b9cc305c8 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 4 Jan 2024 21:20:15 +0000 Subject: [PATCH 09/11] serializer computed extra_properties, clean computed props at ingestion --- .../chord/ingest/phenopackets.py | 26 +++++++++++++------ .../chord/tests/example_phenopacket_v2.json | 3 ++- .../chord/tests/test_ingest.py | 3 +++ .../phenopackets/serializers.py | 4 +-- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index d14554369..b2ecf0848 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -34,6 +34,16 @@ def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> O return obj +def _clean_extra_properties(extra_properties: dict) -> Dict: + """ + Removes computed properties from an extra_properties dictionary. + Computed extra_properties start with "__" and should never be ingested. + """ + if extra_properties: + return {k: v for k, v in extra_properties.items() if not k.startswith("__")} + return extra_properties + + def get_or_create_phenotypic_feature(pf: dict) -> pm.PhenotypicFeature: # Below is code for if we want to re-use phenotypic features in the future # For now, the lack of a many-to-many relationship doesn't let us do that. @@ -64,7 +74,7 @@ def get_or_create_phenotypic_feature(pf: dict) -> pm.PhenotypicFeature: severity=pf.get("severity"), onset=pf.get("onset"), evidence=pf.get("evidence"), # TODO: Separate class for evidence? - extra_properties=pf.get("extra_properties", {}), + extra_properties=_clean_extra_properties(pf.get("extra_properties", {})), ) pf_obj.save() return pf_obj @@ -84,7 +94,7 @@ def validate_phenopacket(phenopacket_data: dict[str, Any], def update_or_create_subject(subject: dict) -> pm.Individual: - extra_properties: dict[str, Any] = subject.get("extra_properties", {}) + extra_properties: dict[str, Any] = _clean_extra_properties(subject.get("extra_properties", {})) # Pre-process subject data: --------------------------------------------------------------------------------- @@ -145,7 +155,7 @@ def get_or_create_biosample(bs: dict) -> pm.Biosample: pathological_stage=bs.get("pathological_stage", {}), is_control_sample=bs.get("is_control_sample", False), diagnostic_markers=bs.get("diagnostic_markers", []), - extra_properties=bs.get("extra_properties", {}), + extra_properties=_clean_extra_properties(bs.get("extra_properties", {})), **bs_query ) @@ -240,7 +250,7 @@ def get_or_create_genomic_interpretation(gen_interp: dict) -> pm.GenomicInterpre interpretation_status=gen_interp["interpretation_status"], gene_descriptor=gene_descriptor, variant_interpretation=variant_interpretation, - extra_properties=gen_interp.get("extra_properties", {}), + extra_properties=_clean_extra_properties(gen_interp.get("extra_properties", {})), ) if related_obj: @@ -256,7 +266,7 @@ def get_or_create_disease(disease) -> pm.Disease: term=disease["term"], disease_stage=disease.get("disease_stage", []), clinical_tnm_finding=disease.get("clinical_tnm_finding", []), - extra_properties=disease.get("extra_properties", {}), + extra_properties=_clean_extra_properties(disease.get("extra_properties", {})), **query_and_check_nulls(disease, "onset") ) return d_obj @@ -276,7 +286,7 @@ def get_or_create_interpretation_diagnosis(interpretation: dict) -> pm.Diagnosis diag_obj, created = pm.Diagnosis.objects.get_or_create( id=id, disease=diagnosis.get("disease", {}), - extra_properties=diagnosis.get("extra_properties", {}), + extra_properties=_clean_extra_properties(diagnosis.get("extra_properties", {})), ) if created: @@ -298,7 +308,7 @@ def get_or_create_interpretation(interpretation: dict) -> pm.Interpretation: diagnosis=diagnosis, progress_status=interpretation["progress_status"], summary=interpretation.get("summary", ""), - extra_properties=interpretation.get("extra_properties", {}) + extra_properties=_clean_extra_properties(interpretation.get("extra_properties", {})) ) return interp_obj @@ -384,7 +394,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any], submitted_by=meta_data.get("submitted_by"), phenopacket_schema_version=meta_data.get("phenopacket_schema_version"), external_references=meta_data.get("external_references", []), - extra_properties=meta_data.get("extra_properties", {}), + extra_properties=_clean_extra_properties(meta_data.get("extra_properties", {})), ) meta_data_obj.save() diff --git a/chord_metadata_service/chord/tests/example_phenopacket_v2.json b/chord_metadata_service/chord/tests/example_phenopacket_v2.json index 290d17498..facdf5492 100644 --- a/chord_metadata_service/chord/tests/example_phenopacket_v2.json +++ b/chord_metadata_service/chord/tests/example_phenopacket_v2.json @@ -7,7 +7,8 @@ "karyotypic_sex": "UNKNOWN_KARYOTYPE", "extra_properties": { "cool_guy": true, - "smoker": false + "smoker": false, + "__computed": "This extra_property was computed and should not be ingested." } }, "phenotypic_features": [ diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py index 3977049da..468505475 100644 --- a/chord_metadata_service/chord/tests/test_ingest.py +++ b/chord_metadata_service/chord/tests/test_ingest.py @@ -119,6 +119,9 @@ def test_ingesting_phenopackets_json(self): self.assertEqual(p.subject.sex, EXAMPLE_INGEST_PHENOPACKET["subject"]["sex"]) self.assertEqual(p.subject.karyotypic_sex, EXAMPLE_INGEST_PHENOPACKET["subject"]["karyotypic_sex"]) + self.assertIn("__computed", EXAMPLE_INGEST_PHENOPACKET["subject"]["extra_properties"]) + self.assertNotIn("__computed", p.subject.extra_properties) + pfs = list(p.phenotypic_features.all().order_by("pftype__id")) self.assertEqual(len(pfs), 2) diff --git a/chord_metadata_service/phenopackets/serializers.py b/chord_metadata_service/phenopackets/serializers.py index d51e47461..b1099ab4d 100644 --- a/chord_metadata_service/phenopackets/serializers.py +++ b/chord_metadata_service/phenopackets/serializers.py @@ -164,10 +164,10 @@ def to_representation(self, instance): extra_properties = response.get("extra_properties", {}) if instance.subject: response["subject_or_biosample_id"] = instance.subject.id - extra_properties["related_type"] = "subject" + extra_properties["__related_type"] = "subject" elif instance.biosample: response["subject_or_biosample_id"] = instance.biosample.id - extra_properties["related_type"] = "biosample" + extra_properties["__related_type"] = "biosample" response["extra_properties"] = extra_properties return response From 0d161afc8fcfca0c1f612357787ff223e2194f70 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 4 Jan 2024 16:30:36 -0500 Subject: [PATCH 10/11] improve comment --- chord_metadata_service/phenopackets/serializers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/chord_metadata_service/phenopackets/serializers.py b/chord_metadata_service/phenopackets/serializers.py index b1099ab4d..868f44d89 100644 --- a/chord_metadata_service/phenopackets/serializers.py +++ b/chord_metadata_service/phenopackets/serializers.py @@ -159,8 +159,9 @@ def to_representation(self, instance): response["variant_interpretation"] = VariantInterpretationSerializer( instance.variant_interpretation, many=False, required=False).data - # subject_or_biosample_id is obtained from the referenced subject/biosample - # the referenced object type is added to extra_properties to disambiguate on the client side + # The 'subject_or_biosample_id' value is obtained from the referenced subject/biosample + # The '__related_type' property is added to extra_properties as a computed value ("__" prefix) + # This allows us to disambiguate on the client side for links extra_properties = response.get("extra_properties", {}) if instance.subject: response["subject_or_biosample_id"] = instance.subject.id From 3b5a27f3e8481fd9a11f7264f7ba3d5efa2476d4 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 4 Jan 2024 21:56:12 +0000 Subject: [PATCH 11/11] factor out computed extra_property code --- chord_metadata_service/chord/ingest/phenopackets.py | 4 ++-- chord_metadata_service/phenopackets/serializers.py | 7 +++++-- chord_metadata_service/restapi/utils.py | 9 +++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index b2ecf0848..03353dcd8 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -10,7 +10,7 @@ from chord_metadata_service.patients.values import KaryotypicSex from chord_metadata_service.restapi.schema_utils import patch_project_schemas from chord_metadata_service.restapi.types import ExtensionSchemaDict -from chord_metadata_service.restapi.utils import time_element_to_years +from chord_metadata_service.restapi.utils import COMPUTED_PROPERTY_PREFIX, time_element_to_years from .exceptions import IngestError from .resources import ingest_resource @@ -40,7 +40,7 @@ def _clean_extra_properties(extra_properties: dict) -> Dict: Computed extra_properties start with "__" and should never be ingested. """ if extra_properties: - return {k: v for k, v in extra_properties.items() if not k.startswith("__")} + return {k: v for k, v in extra_properties.items() if not k.startswith(COMPUTED_PROPERTY_PREFIX)} return extra_properties diff --git a/chord_metadata_service/phenopackets/serializers.py b/chord_metadata_service/phenopackets/serializers.py index 868f44d89..04ad56408 100644 --- a/chord_metadata_service/phenopackets/serializers.py +++ b/chord_metadata_service/phenopackets/serializers.py @@ -1,4 +1,6 @@ from rest_framework import serializers + +from chord_metadata_service.restapi.utils import computed_property from .models import ( MetaData, PhenotypicFeature, @@ -163,12 +165,13 @@ def to_representation(self, instance): # The '__related_type' property is added to extra_properties as a computed value ("__" prefix) # This allows us to disambiguate on the client side for links extra_properties = response.get("extra_properties", {}) + computed_related_type = computed_property("related_type") if instance.subject: response["subject_or_biosample_id"] = instance.subject.id - extra_properties["__related_type"] = "subject" + extra_properties[computed_related_type] = "subject" elif instance.biosample: response["subject_or_biosample_id"] = instance.biosample.id - extra_properties["__related_type"] = "biosample" + extra_properties[computed_related_type] = "biosample" response["extra_properties"] = extra_properties return response diff --git a/chord_metadata_service/restapi/utils.py b/chord_metadata_service/restapi/utils.py index 28be837a9..010b8fa0b 100644 --- a/chord_metadata_service/restapi/utils.py +++ b/chord_metadata_service/restapi/utils.py @@ -25,6 +25,8 @@ "biosample": pheno_models.Biosample, } +COMPUTED_PROPERTY_PREFIX = "__" + class BinWithValue(TypedDict): label: str @@ -691,3 +693,10 @@ def bento_public_format_count_and_stats_list(annotated_queryset) -> tuple[int, l stats_list.append({"label": label, "value": value}) return total, stats_list + + +def computed_property(name: str): + """ + Takes a name and returns it prefixed with "__" + """ + return COMPUTED_PROPERTY_PREFIX + name