From a30a41a33425d0a9f8873e745b925b219441aaa7 Mon Sep 17 00:00:00 2001 From: Paul Pillot Date: Tue, 19 Apr 2022 10:49:12 -0400 Subject: [PATCH] skip samples with no individual --- chord_metadata_service/chord/export_cbio.py | 4 ++++ .../chord/tests/test_export_cbio.py | 23 +++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/chord/export_cbio.py b/chord_metadata_service/chord/export_cbio.py index 95920b055..20788298f 100644 --- a/chord_metadata_service/chord/export_cbio.py +++ b/chord_metadata_service/chord/export_cbio.py @@ -175,6 +175,10 @@ def sample_export(results, file_handle: TextIO): samples = [] for sample in results: + # sample.inidividual can be null. Skip the sample in that case. + if sample.individual is None: + continue + sample_obj = { 'individual_id': sample.individual.id, 'id': sample.id diff --git a/chord_metadata_service/chord/tests/test_export_cbio.py b/chord_metadata_service/chord/tests/test_export_cbio.py index b3c74ca6d..4e9fddf78 100644 --- a/chord_metadata_service/chord/tests/test_export_cbio.py +++ b/chord_metadata_service/chord/tests/test_export_cbio.py @@ -58,6 +58,11 @@ def setUp(self) -> None: self.p = WORKFLOW_INGEST_FUNCTION_MAP[WORKFLOW_PHENOPACKETS_JSON](EXAMPLE_INGEST_OUTPUTS, self.t.identifier) + # Update the last sample to remove reference to any individual. + PhModel.Biosample.objects.filter( + id=EXAMPLE_INGEST_PHENOPACKET["biosamples"][-1]["id"] + ).update(individual=None) + def stream_to_dict(self, output: TextIO) -> Dict[str, str]: """ Utility function. Parses cBioPortal meta data text files (lines of @@ -158,6 +163,7 @@ def test_export_cbio_sample_data(self): output.seek(0) field_count = None field_names = [] + sample_count = 0 for i, line in enumerate(output): # 4 first header lines begin with `#` if i < 4: @@ -178,10 +184,19 @@ def test_export_cbio_sample_data(self): self.assertIn('SAMPLE_ID', pieces) continue - # TSV body. Inspect first line and break + # TSV body. self.assertEqual(field_count, len(pieces)) record = dict(zip(field_names, pieces)) - self.assertEqual(record["PATIENT_ID"], EXAMPLE_INGEST_PHENOPACKET["subject"]["id"]) - self.assertEqual(record["SAMPLE_ID"], EXAMPLE_INGEST_PHENOPACKET["biosamples"][0]["id"]) - break + self.assertEqual( + record["PATIENT_ID"], + EXAMPLE_INGEST_PHENOPACKET["biosamples"][sample_count]["individual_id"] + ) + self.assertEqual( + record["SAMPLE_ID"], + EXAMPLE_INGEST_PHENOPACKET["biosamples"][sample_count]["id"] + ) + sample_count += 1 + + # samples not attached to an individual are not exported + self.assertEqual(sample_count, samples.filter(individual_id__isnull=False).count())