From cf50c5eac1bfb0ac2d4698273c751e69f86b5d1f Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Fri, 10 Nov 2023 10:37:22 -0600 Subject: [PATCH] Updated some notebooks and logged errors --- ...oskin_PMID_34722527_RunGenoPhenoCorr.ipynb | 645 ++++---- notebooks/STXBP1/STXBP1.ipynb | 1365 ++++++++++++++++- src/genophenocorr/analysis/_analyzers.py | 8 +- .../preprocessing/_phenopacket.py | 7 +- src/genophenocorr/preprocessing/_vep.py | 5 +- 5 files changed, 1646 insertions(+), 384 deletions(-) diff --git a/notebooks/RPGRIP1/RPGRIP1_Beryoskin_PMID_34722527_RunGenoPhenoCorr.ipynb b/notebooks/RPGRIP1/RPGRIP1_Beryoskin_PMID_34722527_RunGenoPhenoCorr.ipynb index c225791e..f5c9ffdc 100644 --- a/notebooks/RPGRIP1/RPGRIP1_Beryoskin_PMID_34722527_RunGenoPhenoCorr.ipynb +++ b/notebooks/RPGRIP1/RPGRIP1_Beryoskin_PMID_34722527_RunGenoPhenoCorr.ipynb @@ -72,17 +72,7 @@ "execution_count": 6, "id": "b0533219", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Patient 237–523 has unknown alternative variant GNNNN and will not be included.\n", - "Patient 79–194 has unknown alternative variant GNNNN and will not be included.\n", - "Expected at least one variant per patient, but received none for patient 79–194\n" - ] - } - ], + "outputs": [], "source": [ "patientCohort = load_phenopacket_folder(fpath_phenopackets, phenotype_creator)" ] @@ -96,125 +86,124 @@ { "data": { "text/plain": [ - "[('14_21312458_A/-', 25),\n", - " ('14_21312434_deletion', 9),\n", - " ('14_21302531_G/-', 8),\n", - " ('14_21345145_C/T', 8),\n", - " ('14_21325252_G/A', 7),\n", - " ('14_21345140_AAGGCCG/-', 7),\n", - " ('14_21327672_-/T', 6),\n", - " ('14_21303542_C/T', 5),\n", - " ('14_21325265_A/G', 5),\n", - " ('14_21348211_AAAG/-', 4),\n", - " ('14_21326131_C/T', 4),\n", - " ('14_21326545_TTTTTAGTAGAGATGGGATTTCTCCATGTTGGTCAGGCTGGTCTTCAACTCCCGACCTCAGGTGAACCTCCCACCTGAGCCTCCCAAAGTGCTGGGATTACAGACGTGAGCCACCGCGCCTGGCTGAACAAACTTTTTCAAGCTCTGTAATGCTGTCTAGTATCTGTCTTTACTAAAGGCCTGTTGTTTCTTAGTGCATGACTACATAGATATCTGATTATAAACTGAGACCTTAACACTCCCCCATCATTCTCTCACTTCTTTTAAACACTGGACACAAGTTAGAGAGATTTCCACACCAGATCATGACAAACACAAATTTCTTGGATTTTTTTTTTCCTCCCAATGTGGAGCTGAGCTCCATACTGTCTTTCCTAACTTTTATACCTAGGATTGTGGGGGTGTACCAAGAGGGGTCAACTCTTTGACTACAGTCCTGGGAGGGTGAGGTGGGGGTATCCATGTTTTCCTTAGGAAGTGGGGATAGCTGCAGTCAGAAACAACCATATTTAACAAGATTCTGGATGCTCCAGGACATGTATGCAGCTCTCTCCTCAATACAACTGCTTAAAAAAAGGCTGACACTTCTGGACACAACTCCTTTGCCAAACAGGGGAGGCAGTATAAGCCACCTGTTAATCAGTGTTACAAATCAGACATCTGGCATTTCGAAAGAGCCATTTTGCTTAAGTTTTCTTGGGACCACTTGAGGGTAGAGGTAACAGTTTTCTTGGTACAACTAAGGCACAGTAAGCATTTGATAATAGTAATAATAATGCAGCCTTGTTCATTGGCTGGATGACTGATGACAAGCAAGCTGTACTCCTTTTCATACACCCTTCACTATCTCTTCCTGAATCCTAGAGATAACCCATCTTCCCTGATTAATATTCTAACTGCACTGCTGTTTGATTTTACTTCTGAGTGTATCATCATCGTAATTATTTAATGGATGTTAATTAATTGCTGATAAAATATGTTGAAATTAAAAATGGGAAGGAAGTAGATAAGGTGCTGACAAATGCTCACTTGCTTATTTCATGTGATCAGGTCTTATTAATATCTGTTTGTTTCTCAGGTGATTTTAACCTCACTGACCCTGCAGAGAAACCCAACGGATCTATTCAAGTGCAACTGGATTGGAAGTTTCCCTACATACCCCCTGAGAGCTTCCTGAAACCAGAAGCTCAGACTAAGGGGAAGGATACCAAGGACAGTTCAAAGATCTCATCTGAAGAGGAAAAGGCTTCATTTCCTTCCCAGGTAACTCTCCAGGACTCCACAGGTAGCAGATCTCTGCCAATCCTATGGAGCAGATTTGAAGGAGACAGTATTATAG/-',\n", + "[('14_21312457_21312458_GA_G', 25),\n", + " ('14_21312434_21327807_DEL', 9),\n", + " ('14_21345145_21345145_C_T', 8),\n", + " ('14_21302530_21302531_AG_A', 8),\n", + " ('14_21325252_21325252_G_A', 7),\n", + " ('14_21345139_21345146_CAAGGCCG_C', 7),\n", + " ('14_21327671_21327672_A_AT', 6),\n", + " ('14_21303542_21303542_C_T', 5),\n", + " ('14_21325265_21325265_A_G', 5),\n", + " ('14_21326131_21326131_C_T', 4),\n", + " ('14_21327800_21327801_CT_C', 4),\n", + " ('14_21348210_21348214_AAAAG_A', 4),\n", + " ('14_21326544_21327883_ATTTTTAGTAGAGATGGGATTTCTCCATGTTGGTCAGGCTGGTCTTCAACTCCCGACCTCAGGTGAACCTCCCACCTGAGCCTCCCAAAGTGCTGGGATTACAGACGTGAGCCACCGCGCCTGGCTGAACAAACTTTTTCAAGCTCTGTAATGCTGTCTAGTATCTGTCTTTACTAAAGGCCTGTTGTTTCTTAGTGCATGACTACATAGATATCTGATTATAAACTGAGACCTTAACACTCCCCCATCATTCTCTCACTTCTTTTAAACACTGGACACAAGTTAGAGAGATTTCCACACCAGATCATGACAAACACAAATTTCTTGGATTTTTTTTTTCCTCCCAATGTGGAGCTGAGCTCCATACTGTCTTTCCTAACTTTTATACCTAGGATTGTGGGGGTGTACCAAGAGGGGTCAACTCTTTGACTACAGTCCTGGGAGGGTGAGGTGGGGGTATCCATGTTTTCCTTAGGAAGTGGGGATAGCTGCAGTCAGAAACAACCATATTTAACAAGATTCTGGATGCTCCAGGACATGTATGCAGCTCTCTCCTCAATACAACTGCTTAAAAAAAGGCTGACACTTCTGGACACAACTCCTTTGCCAAACAGGGGAGGCAGTATAAGCCACCTGTTAATCAGTGTTACAAATCAGACATCTGGCATTTCGAAAGAGCCATTTTGCTTAAGTTTTCTTGGGACCACTTGAGGGTAGAGGTAACAGTTTTCTTGGTACAACTAAGGCACAGTAAGCATTTGATAATAGTAATAATAATGCAGCCTTGTTCATTGGCTGGATGACTGATGACAAGCAAGCTGTACTCCTTTTCATACACCCTTCACTATCTCTTCCTGAATCCTAGAGATAACCCATCTTCCCTGATTAATATTCTAACTGCACTGCTGTTTGATTTTACTTCTGAGTGTATCATCATCGTAATTATTTAATGGATGTTAATTAATTGCTGATAAAATATGTTGAAATTAAAAATGGGAAGGAAGTAGATAAGGTGCTGACAAATGCTCACTTGCTTATTTCATGTGATCAGGTCTTATTAATATCTGTTTGTTTCTCAGGTGATTTTAACCTCACTGACCCTGCAGAGAAACCCAACGGATCTATTCAAGTGCAACTGGATTGGAAGTTTCCCTACATACCCCCTGAGAGCTTCCTGAAACCAGAAGCTCAGACTAAGGGGAAGGATACCAAGGACAGTTCAAAGATCTCATCTGAAGAGGAAAAGGCTTCATTTCCTTCCCAGGTAACTCTCCAGGACTCCACAGGTAGCAGATCTCTGCCAATCCTATGGAGCAGATTTGAAGGAGACAGTATTATAG_A',\n", " 4),\n", - " ('14_21327801_T/-', 4),\n", - " ('14_21326055_T/G', 3),\n", - " ('14_21328463_C/T', 3),\n", - " ('14_21326072_-/A', 3),\n", - " ('14_21317850_G/T', 3),\n", - " ('14_21294785_G/A', 3),\n", - " ('14_21326125_C/T', 3),\n", - " ('14_21321856_GGAACTGGAG/-', 3),\n", - " ('14_21328469_C/T', 3),\n", - " ('14_21301105_C/T', 3),\n", - " ('14_21325318_C/T', 3),\n", - " ('14_21294745_C/T', 3),\n", - " ('14_21325311_C/A', 2),\n", - " ('14_21317760_C/-', 2),\n", - " ('14_21301114_C/T', 2),\n", - " ('14_21324867_G/A', 2),\n", - " ('14_21325880_C/T', 2),\n", - " ('14_21307824_AG/-', 2),\n", - " ('14_21325405_G/-', 2),\n", - " ('14_21312439_GAGA/-', 2),\n", - " ('14_21303510_C/G', 2),\n", - " ('14_21327708_-/T', 2),\n", - " ('14_21326119_C/T', 2),\n", - " ('14_21325241_GA/-', 2),\n", - " ('14_21301238_G/T', 2),\n", - " ('14_21317769_C/T', 2),\n", - " ('14_21317724_C/T', 2),\n", - " ('14_21324876_C/A', 2),\n", - " ('14_21312466_C/T', 2),\n", - " ('14_21302508_T/-', 2),\n", - " ('14_21320155_T/A', 2),\n", - " ('14_21325330_C/T', 2),\n", - " ('14_21320178_G/T', 2),\n", - " ('14_21324747_A/T', 2),\n", - " ('14_21321257_A/G', 2),\n", - " ('14_21325943_G/T', 2),\n", - " ('14_21326017_C/T', 2),\n", - " ('14_21334696_T/A', 2),\n", - " ('14_21312440_-/A', 2),\n", - " ('14_21351103_G/T', 2),\n", - " ('14_21303544_G/A', 2),\n", - " ('14_21348186_T/A', 2),\n", - " ('14_21317778_C/T', 2),\n", - " ('14_21321929_C/T', 1),\n", - " ('14_21326031_-/T', 1),\n", - " ('14_21348184_-/G', 1),\n", - " ('14_21303543_G/A', 1),\n", - " ('14_21324747_A/C', 1),\n", - " ('14_21325931_A/G', 1),\n", - " ('14_21343084_G/C', 1),\n", - " ('14_21325832_deletion', 1),\n", - " ('14_21321316_C/T', 1),\n", - " ('14_21303373_A/-', 1),\n", - " ('14_21321865_G/-', 1),\n", - " ('14_21312468_A/-', 1),\n", - " ('14_21348167_TTTAG/-', 1),\n", - " ('14_21324616_A/G', 1),\n", - " ('14_21325903_C/T', 1),\n", - " ('14_21280084_deletion', 1),\n", - " ('14_21317847_A/T', 1),\n", - " ('14_21307704_AGAATAATTTAGCGCCTTTCTCTGCAGAGCTTCCATTAAAGAGAAGGTAGAGCTGATTCGACTTAAGA/-',\n", + " ('14_21326055_21326055_T_G', 3),\n", + " ('14_21326071_21326072_T_TA', 3),\n", + " ('14_21325318_21325318_C_T', 3),\n", + " ('14_21294745_21294745_C_T', 3),\n", + " ('14_21328469_21328469_C_T', 3),\n", + " ('14_21321855_21321865_AGGAACTGGAG_A', 3),\n", + " ('14_21301105_21301105_C_T', 3),\n", + " ('14_21326125_21326125_C_T', 3),\n", + " ('14_21328463_21328463_C_T', 3),\n", + " ('14_21317850_21317850_G_T', 3),\n", + " ('14_21294785_21294785_G_A', 3),\n", + " ('14_21312438_21312440_G_GGA', 2),\n", + " ('14_21351103_21351103_G_T', 2),\n", + " ('14_21325880_21325880_C_T', 2),\n", + " ('14_21324876_21324876_C_A', 2),\n", + " ('14_21317769_21317769_C_T', 2),\n", + " ('14_21321257_21321257_A_G', 2),\n", + " ('14_21348186_21348186_T_A', 2),\n", + " ('14_21325240_21325242_GGA_G', 2),\n", + " ('14_21324747_21324747_A_T', 2),\n", + " ('14_21317778_21317778_C_T', 2),\n", + " ('14_21307823_21307825_CAG_C', 2),\n", + " ('14_21325404_21325405_AG_A', 2),\n", + " ('14_21320178_21320178_G_T', 2),\n", + " ('14_21303544_21303544_G_A', 2),\n", + " ('14_21301114_21301114_C_T', 2),\n", + " ('14_21334696_21334696_T_A', 2),\n", + " ('14_21327707_21327708_C_CT', 2),\n", + " ('14_21325311_21325311_C_A', 2),\n", + " ('14_21312466_21312466_C_T', 2),\n", + " ('14_21317759_21317760_GC_G', 2),\n", + " ('14_21325943_21325943_G_T', 2),\n", + " ('14_21317724_21317724_C_T', 2),\n", + " ('14_21312438_21312442_GGAGA_G', 2),\n", + " ('14_21303510_21303510_C_G', 2),\n", + " ('14_21326119_21326119_C_T', 2),\n", + " ('14_21324867_21324867_G_A', 2),\n", + " ('14_21301238_21301238_G_T', 2),\n", + " ('14_21326017_21326017_C_T', 2),\n", + " ('14_21320155_21320155_T_A', 2),\n", + " ('14_21325330_21325330_C_T', 2),\n", + " ('14_21302507_21302508_CT_C', 2),\n", + " ('14_21310608_21310608_G_A', 1),\n", + " ('14_21312500_21312500_T_A', 1),\n", + " ('14_21343054_21343054_A_G', 1),\n", + " ('14_21280084_21343228_DEL', 1),\n", + " ('14_21324875_21324875_C_T', 1),\n", + " ('14_21294674_21294674_T_G', 1),\n", + " ('14_21327670_21327670_C_T', 1),\n", + " ('14_21324747_21324747_A_C', 1),\n", + " ('14_21324934_21324934_C_G', 1),\n", + " ('14_21325063_21325088_ACTGATTGGTAAGTGCCGTTGGCTTC_A', 1),\n", + " ('14_21287940_21288061_DUP', 1),\n", + " ('14_21326029_21326031_C_CTT', 1),\n", + " ('14_21348183_21348184_C_CG', 1),\n", + " ('14_21328499_21328500_GA_G', 1),\n", + " ('14_21326018_21326018_G_A', 1),\n", + " ('14_21348174_21348174_T_G', 1),\n", + " ('14_21301166_21301167_AG_A', 1),\n", + " ('14_21321291_21321295_A_ACTGT', 1),\n", + " ('14_21325898_21325898_G_A', 1),\n", + " ('14_21311823_21311824_GA_G', 1),\n", + " ('14_21325903_21325903_C_T', 1),\n", + " ('14_21325931_21325931_A_G', 1),\n", + " ('14_21300989_21300989_T_A', 1),\n", + " ('14_21312487_21312490_TATG_T', 1),\n", + " ('14_21312467_21312468_GA_G', 1),\n", + " ('14_21320157_21320157_C_T', 1),\n", + " ('14_21326107_21326107_C_T', 1),\n", + " ('14_21325276_21325277_CT_C', 1),\n", + " ('14_21348166_21348171_TTTTAG_T', 1),\n", + " ('14_21327626_21327627_A_AT', 1),\n", + " ('14_21321864_21321865_AG_A', 1),\n", + " ('14_21326039_21326041_C_CTT', 1),\n", + " ('14_21302585_21302585_G_C', 1),\n", + " ('14_21325832_21328627_DEL', 1),\n", + " ('14_21312500_21312504_TAGAA_T', 1),\n", + " ('14_21325927_21325931_C_CCATA', 1),\n", + " ('14_21343084_21343084_G_C', 1),\n", + " ('14_21280084_21288061_DUP', 1),\n", + " ('14_21326174_21326174_G_A', 1),\n", + " ('14_21321402_21321402_G_A', 1),\n", + " ('14_21303372_21303373_TA_T', 1),\n", + " ('14_21325372_21325372_C_T', 1),\n", + " ('14_21303453_21303454_CA_C', 1),\n", + " ('14_21303543_21303543_G_A', 1),\n", + " ('14_21325253_21325253_G_A', 1),\n", + " ('14_21317754_21317754_G_T', 1),\n", + " ('14_21311854_21311858_G_GCCCT', 1),\n", + " ('14_21321316_21321316_C_T', 1),\n", + " ('14_21345188_21345189_AT_A', 1),\n", + " ('14_21325861_21325861_G_A', 1),\n", + " ('14_21324912_21324913_C_CA', 1),\n", + " ('14_21324616_21324616_A_G', 1),\n", + " ('14_21348303_21348303_G_A', 1),\n", + " ('14_21351187_21351190_AAGG_A', 1),\n", + " ('14_21321929_21321929_C_T', 1),\n", + " ('14_21330247_21330386_AGCAGGTGAATTACACTGAGTGGAAGTTCTCAGAGACTAACAGCTTCATAGGTGATGGCTTTAAAAATCAGCACGAGGAAGAGGAAATGACATTATCCCATTCAGCACTGAAACAGAAGGAACCTCTACATCCTGTAAAT_A',\n", " 1),\n", - " ('14_21343054_A/G', 1),\n", - " ('14_21326107_C/T', 1),\n", - " ('14_21310608_G/A', 1),\n", - " ('14_129365_deletion', 1),\n", - " ('14_21348174_T/G', 1),\n", - " ('14_21326018_G/A', 1),\n", - " ('14_21280084_duplication', 1),\n", - " ('14_21325277_T/-', 1),\n", - " ('14_21325861_G/A', 1),\n", - " ('14_21324913_-/A', 1),\n", - " ('14_21330248_GCAGGTGAATTACACTGAGTGGAAGTTCTCAGAGACTAACAGCTTCATAGGTGATGGCTTTAAAAATCAGCACGAGGAAGAGGAAATGACATTATCCCATTCAGCACTGAAACAGAAGGAACCTCTACATCCTGTAAAT/-',\n", - " 1),\n", - " ('14_21327670_C/T', 1),\n", - " ('14_21294674_T/G', 1),\n", - " ('14_21345189_T/-', 1),\n", - " ('14_21320157_C/T', 1),\n", - " ('14_21325372_C/T', 1),\n", - " ('14_21326174_G/A', 1),\n", - " ('14_21300989_T/A', 1),\n", - " ('14_21301167_G/-', 1),\n", - " ('14_21324875_C/T', 1),\n", - " ('14_21312488_ATG/-', 1),\n", - " ('14_21312501_AGAA/-', 1),\n", - " ('14_21325931_-/A', 1),\n", - " ('14_21326041_-/T', 1),\n", - " ('14_21345198_G/A', 1),\n", - " ('14_21311824_A/-', 1),\n", - " ('14_21327627_-/T', 1),\n", - " ('14_21287940_duplication', 1),\n", - " ('14_21303454_A/-', 1),\n", - " ('14_21324865_-/G', 1),\n", - " ('14_21325253_G/A', 1),\n", - " ('14_21324934_C/G', 1),\n", - " ('14_21325064_CTGATTGGTAAGTGCCGTTGGCTTC/-', 1),\n", - " ('14_21312500_T/A', 1),\n", - " ('14_21317754_G/T', 1),\n", - " ('14_21311858_-/T', 1),\n", - " ('14_21348303_G/A', 1),\n", - " ('14_21351188_AGG/-', 1),\n", - " ('14_21321295_-/T', 1),\n", - " ('14_21321402_G/A', 1),\n", - " ('14_21302585_G/C', 1),\n", - " ('14_21325898_G/A', 1),\n", - " ('14_21328500_A/-', 1)]" + " ('14_21324864_21324865_T_TG', 1),\n", + " ('14_21345198_21345198_G_A', 1),\n", + " ('14_21317847_21317847_A_T', 1),\n", + " ('14_21307703_21307771_CAGAATAATTTAGCGCCTTTCTCTGCAGAGCTTCCATTAAAGAGAAGGTAGAGCTGATTCGACTTAAGA_C',\n", + " 1)]" ] }, "execution_count": 7, @@ -235,94 +224,94 @@ { "data": { "text/plain": [ - "{'NM_001377949.1': Counter({'intron_variant': 47,\n", - " 'frameshift_variant': 16,\n", - " 'splice_acceptor_variant': 7,\n", - " 'splice_region_variant': 5,\n", - " 'stop_gained': 18,\n", - " 'missense_variant': 4,\n", - " 'splice_donor_variant': 5,\n", - " 'feature_truncation': 4,\n", - " 'coding_sequence_variant': 7,\n", - " '5_prime_UTR_variant': 3,\n", - " 'inframe_deletion': 1,\n", - " 'splice_donor_5th_base_variant': 2,\n", - " 'synonymous_variant': 1,\n", - " 'splice_polypyrimidine_tract_variant': 1}),\n", - " 'NM_001377948.1': Counter({'frameshift_variant': 24,\n", - " 'splice_acceptor_variant': 7,\n", - " 'missense_variant': 22,\n", - " 'splice_region_variant': 5,\n", - " 'stop_gained': 30,\n", - " 'intron_variant': 9,\n", - " 'splice_donor_variant': 7,\n", - " 'feature_truncation': 4,\n", - " 'coding_sequence_variant': 8,\n", - " '5_prime_UTR_variant': 3,\n", - " 'inframe_deletion': 1,\n", - " 'splice_donor_5th_base_variant': 3,\n", - " 'synonymous_variant': 1,\n", - " 'splice_polypyrimidine_tract_variant': 1}),\n", - " 'NM_001377950.1': Counter({'intron_variant': 49,\n", - " 'frameshift_variant': 16,\n", - " 'splice_acceptor_variant': 6,\n", - " 'splice_region_variant': 5,\n", - " 'stop_gained': 18,\n", - " 'missense_variant': 4,\n", - " 'splice_donor_variant': 5,\n", - " 'feature_truncation': 4,\n", - " 'coding_sequence_variant': 7,\n", - " '5_prime_UTR_variant': 3,\n", - " 'inframe_deletion': 1,\n", - " 'splice_donor_5th_base_variant': 2,\n", - " 'synonymous_variant': 1,\n", - " 'splice_polypyrimidine_tract_variant': 1}),\n", - " 'NM_001377523.1': Counter({'intron_variant': 48,\n", - " 'frameshift_variant': 16,\n", - " 'splice_acceptor_variant': 6,\n", - " 'splice_region_variant': 5,\n", - " 'stop_gained': 18,\n", - " 'missense_variant': 4,\n", - " 'splice_donor_variant': 5,\n", - " 'feature_truncation': 4,\n", - " 'coding_sequence_variant': 7,\n", - " '5_prime_UTR_variant': 3,\n", - " 'inframe_deletion': 1,\n", - " 'splice_donor_5th_base_variant': 2,\n", - " 'synonymous_variant': 1,\n", - " 'splice_polypyrimidine_tract_variant': 1}),\n", - " 'NM_001377951.1': Counter({'intron_variant': 48,\n", - " 'frameshift_variant': 14,\n", - " 'splice_acceptor_variant': 6,\n", - " 'splice_region_variant': 4,\n", - " '5_prime_UTR_variant': 7,\n", - " 'upstream_gene_variant': 9,\n", - " 'stop_gained': 8,\n", - " 'missense_variant': 3,\n", - " 'splice_donor_variant': 5,\n", - " 'feature_truncation': 4,\n", - " 'coding_sequence_variant': 7,\n", - " 'inframe_deletion': 1,\n", - " 'splice_donor_5th_base_variant': 2,\n", - " 'synonymous_variant': 1,\n", - " 'splice_polypyrimidine_tract_variant': 1}),\n", - " 'NM_020366.4': Counter({'frameshift_variant': 38,\n", - " 'splice_acceptor_variant': 8,\n", - " 'missense_variant': 23,\n", - " 'splice_region_variant': 11,\n", - " 'stop_gained': 43,\n", - " 'intron_variant': 12,\n", - " 'inframe_deletion': 2,\n", - " 'splice_donor_variant': 12,\n", - " 'splice_polypyrimidine_tract_variant': 2,\n", - " 'coding_sequence_variant': 11,\n", - " 'start_lost': 3,\n", - " 'feature_truncation': 4,\n", - " 'start_retained_variant': 3,\n", - " '5_prime_UTR_variant': 3,\n", - " 'splice_donor_5th_base_variant': 3,\n", - " 'feature_elongation': 2,\n", - " 'synonymous_variant': 1})}" + "{'NM_001377949.1': Counter({'INTRON_VARIANT': 85,\n", + " 'FEATURE_TRUNCATION': 11,\n", + " 'CODING_SEQUENCE_VARIANT': 16,\n", + " 'FIVE_PRIME_UTR_VARIANT': 10,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 11,\n", + " 'SPLICE_DONOR_VARIANT': 8,\n", + " 'SPLICE_DONOR_5TH_BASE_VARIANT': 4,\n", + " 'FRAMESHIFT_VARIANT': 34,\n", + " 'SPLICE_REGION_VARIANT': 8,\n", + " 'STOP_GAINED': 31,\n", + " 'MISSENSE_VARIANT': 7,\n", + " 'SYNONYMOUS_VARIANT': 1,\n", + " 'INFRAME_DELETION': 1,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 1}),\n", + " 'NM_020366.4': Counter({'MISSENSE_VARIANT': 40,\n", + " 'FEATURE_TRUNCATION': 11,\n", + " 'CODING_SEQUENCE_VARIANT': 20,\n", + " 'INTRON_VARIANT': 22,\n", + " 'STOP_GAINED': 76,\n", + " 'FRAMESHIFT_VARIANT': 93,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 12,\n", + " 'SPLICE_DONOR_VARIANT': 16,\n", + " 'SPLICE_DONOR_5TH_BASE_VARIANT': 5,\n", + " 'SPLICE_REGION_VARIANT': 17,\n", + " 'START_LOST': 3,\n", + " 'FEATURE_ELONGATION': 2,\n", + " 'START_RETAINED_VARIANT': 3,\n", + " 'FIVE_PRIME_UTR_VARIANT': 3,\n", + " 'SYNONYMOUS_VARIANT': 1,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 2,\n", + " 'INFRAME_DELETION': 2}),\n", + " 'NM_001377950.1': Counter({'INTRON_VARIANT': 87,\n", + " 'FEATURE_TRUNCATION': 11,\n", + " 'CODING_SEQUENCE_VARIANT': 16,\n", + " 'FIVE_PRIME_UTR_VARIANT': 10,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 10,\n", + " 'SPLICE_DONOR_VARIANT': 8,\n", + " 'SPLICE_DONOR_5TH_BASE_VARIANT': 4,\n", + " 'FRAMESHIFT_VARIANT': 34,\n", + " 'SPLICE_REGION_VARIANT': 8,\n", + " 'STOP_GAINED': 31,\n", + " 'MISSENSE_VARIANT': 7,\n", + " 'SYNONYMOUS_VARIANT': 1,\n", + " 'INFRAME_DELETION': 1,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 1}),\n", + " 'NM_001377948.1': Counter({'MISSENSE_VARIANT': 39,\n", + " 'FEATURE_TRUNCATION': 11,\n", + " 'CODING_SEQUENCE_VARIANT': 17,\n", + " 'FIVE_PRIME_UTR_VARIANT': 10,\n", + " 'INTRON_VARIANT': 19,\n", + " 'STOP_GAINED': 54,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 11,\n", + " 'SPLICE_DONOR_VARIANT': 10,\n", + " 'SPLICE_DONOR_5TH_BASE_VARIANT': 5,\n", + " 'FRAMESHIFT_VARIANT': 45,\n", + " 'SPLICE_REGION_VARIANT': 8,\n", + " 'SYNONYMOUS_VARIANT': 1,\n", + " 'INFRAME_DELETION': 1,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 1}),\n", + " 'NM_001377523.1': Counter({'INTRON_VARIANT': 86,\n", + " 'FEATURE_TRUNCATION': 11,\n", + " 'CODING_SEQUENCE_VARIANT': 16,\n", + " 'FIVE_PRIME_UTR_VARIANT': 10,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 10,\n", + " 'SPLICE_DONOR_VARIANT': 8,\n", + " 'SPLICE_DONOR_5TH_BASE_VARIANT': 4,\n", + " 'FRAMESHIFT_VARIANT': 34,\n", + " 'SPLICE_REGION_VARIANT': 8,\n", + " 'STOP_GAINED': 31,\n", + " 'MISSENSE_VARIANT': 7,\n", + " 'SYNONYMOUS_VARIANT': 1,\n", + " 'INFRAME_DELETION': 1,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 1}),\n", + " 'NM_001377951.1': Counter({'INTRON_VARIANT': 86,\n", + " 'FEATURE_TRUNCATION': 11,\n", + " 'CODING_SEQUENCE_VARIANT': 16,\n", + " 'FIVE_PRIME_UTR_VARIANT': 15,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 10,\n", + " 'SPLICE_DONOR_VARIANT': 8,\n", + " 'SPLICE_DONOR_5TH_BASE_VARIANT': 4,\n", + " 'FRAMESHIFT_VARIANT': 31,\n", + " 'SPLICE_REGION_VARIANT': 5,\n", + " 'STOP_GAINED': 19,\n", + " 'UPSTREAM_GENE_VARIANT': 13,\n", + " 'SYNONYMOUS_VARIANT': 1,\n", + " 'MISSENSE_VARIANT': 4,\n", + " 'INFRAME_DELETION': 1,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 1})}" ] }, "execution_count": 8, @@ -346,28 +335,28 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "ef5b47de", + "execution_count": 12, + "id": "3a3ddcdc", "metadata": {}, "outputs": [], "source": [ - "rec_analysis = CohortAnalysis(patientCohort, 'NM_020366.4', hpo, recessive=True, include_unmeasured=False)" + "from genophenocorr.model import VariantEffect\n", + "from genophenocorr.model import FeatureType" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "3a3ddcdc", + "execution_count": 10, + "id": "ef5b47de", "metadata": {}, "outputs": [], "source": [ - "from genophenocorr.constants import VariantEffect\n", - "from genophenocorr.model import FeatureType" + "rec_analysis = CohortAnalysis(patientCohort, 'NM_020366.4', hpo, recessive=True, include_unmeasured=False)" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "id": "19112877", "metadata": {}, "outputs": [ @@ -399,9 +388,9 @@ " \n", " \n", " \n", - " Homozygous frameshift_variant\n", - " Heterozygous frameshift_variant\n", - " No frameshift_variant\n", + " Homozygous missense_variant\n", + " Heterozygous missense_variant\n", + " No missense_variant\n", " \n", " \n", " \n", @@ -418,102 +407,95 @@ " \n", " \n", " \n", - " HP:0001483 (Eye poking)\n", - " 21\n", - " 84.00%\n", - " 2\n", - " 100.00%\n", - " 5\n", - " 41.67%\n", - " 0.024229\n", - " 0.096914\n", - " \n", - " \n", " HP:0012758 (Neurodevelopmental delay)\n", - " 8\n", - " 11.94%\n", " 0\n", " 0.00%\n", - " 4\n", - " 3.31%\n", - " 0.038327\n", - " 0.153306\n", + " 0\n", + " 0.00%\n", + " 12\n", + " 7.06%\n", + " 0.299821\n", + " 1.0\n", " \n", " \n", - " HP:0000613 (Photophobia)\n", - " 12\n", + " HP:0001483 (Eye poking)\n", + " 2\n", + " 50.00%\n", + " 1\n", + " 100.00%\n", + " 25\n", + " 73.53%\n", + " 0.687055\n", + " 1.0\n", + " \n", + " \n", + " HP:0000496 (Abnormality of eye movement)\n", + " 10\n", " 100.00%\n", " 2\n", " 100.00%\n", - " 16\n", + " 79\n", " 100.00%\n", " 1.000000\n", - " 1.000000\n", + " 1.0\n", " \n", " \n", - " HP:0000496 (Abnormality of eye movement)\n", - " 32\n", + " HP:0000613 (Photophobia)\n", + " 7\n", " 100.00%\n", - " 9\n", + " 1\n", " 100.00%\n", - " 50\n", + " 22\n", " 100.00%\n", " 1.000000\n", - " 1.000000\n", + " 1.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Homozygous frameshift_variant \\\n", - " Count \n", - "HP:0001483 (Eye poking) 21 \n", - "HP:0012758 (Neurodevelopmental delay) 8 \n", - "HP:0000613 (Photophobia) 12 \n", - "HP:0000496 (Abnormality of eye movement) 32 \n", + " Homozygous missense_variant \\\n", + " Count Percent \n", + "HP:0012758 (Neurodevelopmental delay) 0 0.00% \n", + "HP:0001483 (Eye poking) 2 50.00% \n", + "HP:0000496 (Abnormality of eye movement) 10 100.00% \n", + "HP:0000613 (Photophobia) 7 100.00% \n", "\n", - " \\\n", - " Percent \n", - "HP:0001483 (Eye poking) 84.00% \n", - "HP:0012758 (Neurodevelopmental delay) 11.94% \n", - "HP:0000613 (Photophobia) 100.00% \n", - "HP:0000496 (Abnormality of eye movement) 100.00% \n", - "\n", - " Heterozygous frameshift_variant \\\n", - " Count \n", - "HP:0001483 (Eye poking) 2 \n", - "HP:0012758 (Neurodevelopmental delay) 0 \n", - "HP:0000613 (Photophobia) 2 \n", - "HP:0000496 (Abnormality of eye movement) 9 \n", + " Heterozygous missense_variant \\\n", + " Count \n", + "HP:0012758 (Neurodevelopmental delay) 0 \n", + "HP:0001483 (Eye poking) 1 \n", + "HP:0000496 (Abnormality of eye movement) 2 \n", + "HP:0000613 (Photophobia) 1 \n", "\n", - " No frameshift_variant \\\n", - " Percent Count \n", - "HP:0001483 (Eye poking) 100.00% 5 \n", - "HP:0012758 (Neurodevelopmental delay) 0.00% 4 \n", - "HP:0000613 (Photophobia) 100.00% 16 \n", - "HP:0000496 (Abnormality of eye movement) 100.00% 50 \n", + " No missense_variant \\\n", + " Percent Count \n", + "HP:0012758 (Neurodevelopmental delay) 0.00% 12 \n", + "HP:0001483 (Eye poking) 100.00% 25 \n", + "HP:0000496 (Abnormality of eye movement) 100.00% 79 \n", + "HP:0000613 (Photophobia) 100.00% 22 \n", "\n", " \n", " Percent p-value Corrected p-values \n", - "HP:0001483 (Eye poking) 41.67% 0.024229 0.096914 \n", - "HP:0012758 (Neurodevelopmental delay) 3.31% 0.038327 0.153306 \n", - "HP:0000613 (Photophobia) 100.00% 1.000000 1.000000 \n", - "HP:0000496 (Abnormality of eye movement) 100.00% 1.000000 1.000000 " + "HP:0012758 (Neurodevelopmental delay) 7.06% 0.299821 1.0 \n", + "HP:0001483 (Eye poking) 73.53% 0.687055 1.0 \n", + "HP:0000496 (Abnormality of eye movement) 100.00% 1.000000 1.0 \n", + "HP:0000613 (Photophobia) 100.00% 1.000000 1.0 " ] }, - "execution_count": 20, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "rec_analysis.compare_by_variant_type(VariantEffect.FRAMESHIFT_VARIANT)" + "rec_analysis.compare_by_variant_type(VariantEffect.MISSENSE_VARIANT)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "b9bf9ccf", "metadata": {}, "outputs": [], @@ -523,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "d835f075", "metadata": {}, "outputs": [ @@ -582,10 +564,10 @@ " 0.648221\n", " \n", " \n", - " HP:0000613 (Photophobia)\n", - " 14\n", + " HP:0000496 (Abnormality of eye movement)\n", + " 41\n", " 100.00%\n", - " 16\n", + " 50\n", " 100.00%\n", " 1.000000\n", " 1.000000\n", @@ -600,10 +582,10 @@ " 1.000000\n", " \n", " \n", - " HP:0000496 (Abnormality of eye movement)\n", - " 41\n", + " HP:0000613 (Photophobia)\n", + " 14\n", " 100.00%\n", - " 50\n", + " 16\n", " 100.00%\n", " 1.000000\n", " 1.000000\n", @@ -617,28 +599,28 @@ " Count Percent \n", "HP:0001483 (Eye poking) 23 85.19% \n", "HP:0012758 (Neurodevelopmental delay) 8 8.99% \n", - "HP:0000613 (Photophobia) 14 100.00% \n", - "HP:0000662 (Nyctalopia) 3 100.00% \n", "HP:0000496 (Abnormality of eye movement) 41 100.00% \n", + "HP:0000662 (Nyctalopia) 3 100.00% \n", + "HP:0000613 (Photophobia) 14 100.00% \n", "\n", " Without frameshift_variant \\\n", " Count Percent \n", "HP:0001483 (Eye poking) 5 41.67% \n", "HP:0012758 (Neurodevelopmental delay) 4 3.31% \n", - "HP:0000613 (Photophobia) 16 100.00% \n", - "HP:0000662 (Nyctalopia) 3 100.00% \n", "HP:0000496 (Abnormality of eye movement) 50 100.00% \n", + "HP:0000662 (Nyctalopia) 3 100.00% \n", + "HP:0000613 (Photophobia) 16 100.00% \n", "\n", " \n", " p-value Corrected p-values \n", "HP:0001483 (Eye poking) 0.016983 0.084915 \n", "HP:0012758 (Neurodevelopmental delay) 0.129644 0.648221 \n", - "HP:0000613 (Photophobia) 1.000000 1.000000 \n", + "HP:0000496 (Abnormality of eye movement) 1.000000 1.000000 \n", "HP:0000662 (Nyctalopia) 1.000000 1.000000 \n", - "HP:0000496 (Abnormality of eye movement) 1.000000 1.000000 " + "HP:0000613 (Photophobia) 1.000000 1.000000 " ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -649,7 +631,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "44006715", "metadata": {}, "outputs": [ @@ -681,8 +663,8 @@ " \n", " \n", " \n", - " With 14_21312458_A/-\n", - " Without 14_21312458_A/-\n", + " With 14_21312457_21312458_GA_G\n", + " Without 14_21312457_21312458_GA_G\n", " \n", " \n", " \n", @@ -715,19 +697,19 @@ " 0.161052\n", " \n", " \n", - " HP:0000613 (Photophobia)\n", - " 5\n", + " HP:0000496 (Abnormality of eye movement)\n", + " 16\n", " 100.00%\n", - " 25\n", + " 75\n", " 100.00%\n", " 1.000000\n", " 1.000000\n", " \n", " \n", - " HP:0000496 (Abnormality of eye movement)\n", - " 16\n", + " HP:0000613 (Photophobia)\n", + " 5\n", " 100.00%\n", - " 75\n", + " 25\n", " 100.00%\n", " 1.000000\n", " 1.000000\n", @@ -737,35 +719,42 @@ "" ], "text/plain": [ - " With 14_21312458_A/- \\\n", - " Count Percent \n", - "HP:0001483 (Eye poking) 16 100.00% \n", - "HP:0012758 (Neurodevelopmental delay) 4 16.00% \n", - "HP:0000613 (Photophobia) 5 100.00% \n", - "HP:0000496 (Abnormality of eye movement) 16 100.00% \n", + " With 14_21312457_21312458_GA_G \\\n", + " Count \n", + "HP:0001483 (Eye poking) 16 \n", + "HP:0012758 (Neurodevelopmental delay) 4 \n", + "HP:0000496 (Abnormality of eye movement) 16 \n", + "HP:0000613 (Photophobia) 5 \n", "\n", - " Without 14_21312458_A/- \\\n", - " Count Percent \n", - "HP:0001483 (Eye poking) 12 52.17% \n", - "HP:0012758 (Neurodevelopmental delay) 8 4.32% \n", - "HP:0000613 (Photophobia) 25 100.00% \n", - "HP:0000496 (Abnormality of eye movement) 75 100.00% \n", + " \\\n", + " Percent \n", + "HP:0001483 (Eye poking) 100.00% \n", + "HP:0012758 (Neurodevelopmental delay) 16.00% \n", + "HP:0000496 (Abnormality of eye movement) 100.00% \n", + "HP:0000613 (Photophobia) 100.00% \n", "\n", - " \n", - " p-value Corrected p-values \n", - "HP:0001483 (Eye poking) 0.000919 0.003677 \n", - "HP:0012758 (Neurodevelopmental delay) 0.040263 0.161052 \n", - "HP:0000613 (Photophobia) 1.000000 1.000000 \n", - "HP:0000496 (Abnormality of eye movement) 1.000000 1.000000 " + " Without 14_21312457_21312458_GA_G \\\n", + " Count \n", + "HP:0001483 (Eye poking) 12 \n", + "HP:0012758 (Neurodevelopmental delay) 8 \n", + "HP:0000496 (Abnormality of eye movement) 75 \n", + "HP:0000613 (Photophobia) 25 \n", + "\n", + " \n", + " Percent p-value Corrected p-values \n", + "HP:0001483 (Eye poking) 52.17% 0.000919 0.003677 \n", + "HP:0012758 (Neurodevelopmental delay) 4.32% 0.040263 0.161052 \n", + "HP:0000496 (Abnormality of eye movement) 100.00% 1.000000 1.000000 \n", + "HP:0000613 (Photophobia) 100.00% 1.000000 1.000000 " ] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dom_analysis.compare_by_variant('14_21312458_A/-')" + "dom_analysis.compare_by_variant('14_21312457_21312458_GA_G')" ] }, { diff --git a/notebooks/STXBP1/STXBP1.ipynb b/notebooks/STXBP1/STXBP1.ipynb index 59c27b18..b9a14875 100644 --- a/notebooks/STXBP1/STXBP1.ipynb +++ b/notebooks/STXBP1/STXBP1.ipynb @@ -38,13 +38,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Loaded HPO v2022-10-05\n" + "Loaded HPO v2023-01-27\n" ] } ], "source": [ "import hpotk\n", - "hpo_purl = \"/Users/robinp/data/hpo/hp.json\"\n", + "hpo_purl = \"../hpo_data/hp.json\"\n", "hpo = hpotk.load_minimal_ontology(hpo_purl)\n", "print(f'Loaded HPO v{hpo.version}')" ] @@ -57,7 +57,7 @@ "outputs": [], "source": [ "from genophenocorr.preprocessing import configure_caching_patient_creator\n", - "patient_creator = configure_caching_patient_creator(hpo)" + "patient_creator = configure_caching_patient_creator(hpo, cache_dir = \"annotations\")" ] }, { @@ -75,78 +75,1351 @@ "execution_count": 5, "id": "3c07a0a5-560a-4970-9f6e-deed8f06fe35", "metadata": {}, + "outputs": [], + "source": [ + "phenopacket_input_folder = \"input\"\n", + "patientCohort = load_phenopacket_folder(pp_directory=phenopacket_input_folder, patient_creator=patient_creator)" + ] + }, + { + "cell_type": "markdown", + "id": "175d2b14-b75a-499a-9251-5154aa31c683", + "metadata": {}, + "source": [ + "

CohortViewer

\n", + "

The CohortViewer class offers various methods to create tables and graphics to view the data in the cohort.\n", + "It is still very much in progress

" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a41be80d-0678-4dc3-97b6-2876d69648e0", + "metadata": {}, + "outputs": [], + "source": [ + "from genophenocorr.view import CohortViewer\n", + "from IPython.display import display, HTML" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1c7c5c1e-8e5e-488a-98ec-85689adff61c", + "metadata": {}, + "outputs": [], + "source": [ + "viewer = CohortViewer(hpo=hpo)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ef78af2f", + "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "Expected a result but got an Error for variant: 9_127684372_127684373_A_A_heterozygous\n" - ] - }, + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ItemDescription
Description of the cohort.
Total Individuals462
Total Unique HPO Terms540
Total Unique Variants462
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(viewer.cohort_summary_table(patientCohort)))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dadb3b22", + "metadata": {}, + "outputs": [ { - "ename": "HTTPError", - "evalue": "400 Client Error: Bad Request for url: https://rest.ensembl.org/vep/human/region/9:127684373-127684373/A?LoF=1&canonical=1&domains=1&hgvs=1&mutfunc=1&numbers=1&protein=1&refseq=1&mane=1&transcript_version=1&variant_class=1", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m phenopacket_input_folder \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m patient_list \u001b[38;5;241m=\u001b[39m \u001b[43mload_phenopacket_folder\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpp_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mphenopacket_input_folder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpatient_creator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpatient_creator\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_phenopacket.py:196\u001b[0m, in \u001b[0;36mload_phenopacket_folder\u001b[0;34m(pp_directory, patient_creator)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo JSON Phenopackets were found in \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpp_directory\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;66;03m# turn phenopackets into patients using patient creator\u001b[39;00m\n\u001b[0;32m--> 196\u001b[0m patients \u001b[38;5;241m=\u001b[39m [patient_creator\u001b[38;5;241m.\u001b[39mcreate_patient(pp) \u001b[38;5;28;01mfor\u001b[39;00m pp \u001b[38;5;129;01min\u001b[39;00m pps]\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# create cohort from patients\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Cohort\u001b[38;5;241m.\u001b[39mfrom_patients(patients)\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_phenopacket.py:196\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo JSON Phenopackets were found in \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpp_directory\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;66;03m# turn phenopackets into patients using patient creator\u001b[39;00m\n\u001b[0;32m--> 196\u001b[0m patients \u001b[38;5;241m=\u001b[39m [\u001b[43mpatient_creator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_patient\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpp\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m pp \u001b[38;5;129;01min\u001b[39;00m pps]\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# create cohort from patients\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Cohort\u001b[38;5;241m.\u001b[39mfrom_patients(patients)\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_phenopacket.py:116\u001b[0m, in \u001b[0;36mPhenopacketPatientCreator.create_patient\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Creates a Patient from the data in a given Phenopacket\u001b[39;00m\n\u001b[1;32m 109\u001b[0m \n\u001b[1;32m 110\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m Patient: A Patient object\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 115\u001b[0m phenotypes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_add_phenotypes(item)\n\u001b[0;32m--> 116\u001b[0m variants \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_add_variants\u001b[49m\u001b[43m(\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 117\u001b[0m protein_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_add_protein_data(variants)\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Patient(item\u001b[38;5;241m.\u001b[39mid, phenotypes, variants, protein_data)\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_phenopacket.py:136\u001b[0m, in \u001b[0;36mPhenopacketPatientCreator._add_variants\u001b[0;34m(self, pp)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_logger\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPatient \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpp\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m has unknown alternative variant \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvc\u001b[38;5;241m.\u001b[39malt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and will not be included.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m--> 136\u001b[0m variant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_func_ann\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mannotate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 137\u001b[0m variants_list\u001b[38;5;241m.\u001b[39mappend(variant)\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_variant.py:227\u001b[0m, in \u001b[0;36mVarCachingFunctionalAnnotator.annotate\u001b[0;34m(self, variant_coordinates)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m annotations\n\u001b[1;32m 226\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 227\u001b[0m ann \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fallback\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mannotate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariant_coordinates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cache\u001b[38;5;241m.\u001b[39mstore_annotations(variant_coordinates, ann)\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ann\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_variant.py:79\u001b[0m, in \u001b[0;36mVepFunctionalAnnotator.annotate\u001b[0;34m(self, variant_coordinates)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mannotate\u001b[39m(\u001b[38;5;28mself\u001b[39m, variant_coordinates: VariantCoordinates) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Variant:\n\u001b[1;32m 72\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Creates a Variant object by searching variant coordinates with Variant Effect Predictor (VEP) REST API. \u001b[39;00m\n\u001b[1;32m 73\u001b[0m \n\u001b[1;32m 74\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;124;03m Variant: A Variant object\u001b[39;00m\n\u001b[1;32m 78\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 79\u001b[0m variant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_vep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariant_coordinates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 80\u001b[0m variant_id \u001b[38;5;241m=\u001b[39m variant\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 81\u001b[0m variant_class \u001b[38;5;241m=\u001b[39m variant\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvariant_class\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m~/GIT/genophenocorr/src/genophenocorr/preprocessing/_variant.py:130\u001b[0m, in \u001b[0;36mVepFunctionalAnnotator._query_vep\u001b[0;34m(self, variant_coordinates)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m r\u001b[38;5;241m.\u001b[39mok:\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_logging\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected a result but got an Error for variant: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariant_coordinates\u001b[38;5;241m.\u001b[39mas_string()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 130\u001b[0m \u001b[43mr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 131\u001b[0m results \u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(results, \u001b[38;5;28mlist\u001b[39m):\n", - "File \u001b[0;32m~/GIT/genophenocorr/gpc2venv/lib/python3.8/site-packages/requests/models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1016\u001b[0m http_error_msg \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Server Error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mreason\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1018\u001b[0m )\n\u001b[1;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n", - "\u001b[0;31mHTTPError\u001b[0m: 400 Client Error: Bad Request for url: https://rest.ensembl.org/vep/human/region/9:127684373-127684373/A?LoF=1&canonical=1&domains=1&hgvs=1&mutfunc=1&numbers=1&protein=1&refseq=1&mane=1&transcript_version=1&variant_class=1" - ] + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
VariantEffectCountKey
c.1216C>TMISSENSE_VARIANT199_127675909_127675909_C_T
c.1217G>AMISSENSE_VARIANT199_127675910_127675910_G_A
c.1651C>TMISSENSE_VARIANT189_127682509_127682509_C_T
c.875G>AMISSENSE_VARIANT189_127668160_127668160_G_A
c.416C>TMISSENSE_VARIANT129_127661192_127661192_C_T
c.568C>TMISSENSE_VARIANT119_127663343_127663343_C_T
c.1162C>TSTOP_GAINED109_127675855_127675855_C_T
c.874C>TMISSENSE_VARIANT109_127668159_127668159_C_T
c.1439C>TMISSENSE_VARIANT109_127678510_127678510_C_T
c.364C>TSTOP_GAINED99_127661140_127661140_C_T
c.703C>TSTOP_GAINED89_127666205_127666205_C_T
c.1099C>TSTOP_GAINED69_127673250_127673250_C_T
c.1060T>CMISSENSE_VARIANT59_127673211_127673211_T_C
c.847G>AMISSENSE_VARIANT59_127668132_127668132_G_A
c.388_389delFRAMESHIFT_VARIANT59_127661161_127661163_ACT_A
c.569G>AMISSENSE_VARIANT59_127663344_127663344_G_A
c.795-1G>ASPLICE_ACCEPTOR_VARIANT49_127668079_127668079_G_A
c.734A>GMISSENSE_VARIANT49_127666236_127666236_A_G
c.704G>AMISSENSE_VARIANT49_127666206_127666206_G_A
c.1652G>AMISSENSE_VARIANT49_127682510_127682510_G_A
c.1702+1G>ASPLICE_DONOR_VARIANT49_127682561_127682561_G_A
c.1061G>AMISSENSE_VARIANT39_127673212_127673212_G_A
c.902+1G>ASPLICE_DONOR_VARIANT39_127668188_127668188_G_A
c.57_59delINFRAME_DELETION39_127651621_127651624_TAAA_T
c.1095_1096delFRAMESHIFT_VARIANT39_127673243_127673245_ACT_A
c.1315A>TMISSENSE_VARIANT39_127676709_127676709_A_T
c.758G>AMISSENSE_VARIANT39_127666260_127666260_G_A
c.1249+2T>CSPLICE_DONOR_VARIANT29_127675944_127675944_T_C
c.755T>CMISSENSE_VARIANT29_127666257_127666257_T_C
c.904G>TSTOP_GAINED29_127669899_127669899_G_T
c.83G>ASTOP_GAINED29_127651648_127651648_G_A
c.1206T>ASTOP_GAINED29_127675899_127675899_T_A
c.1630G>CMISSENSE_VARIANT29_127682488_127682488_G_C
c.1631G>AMISSENSE_VARIANT29_127682489_127682489_G_A
c.1029+1G>TSPLICE_DONOR_VARIANT29_127672117_127672117_G_T
c.1038_1039delFRAMESHIFT_VARIANT29_127673187_127673189_ACC_A
c.1631G>TMISSENSE_VARIANT29_127682489_127682489_G_T
c.122T>CMISSENSE_VARIANT29_127653749_127653749_T_C
c.1359+1G>ASPLICE_DONOR_VARIANT29_127676754_127676754_G_A
c.1261G>TSTOP_GAINED29_127676655_127676655_G_T
c.922A>TSTOP_GAINED29_127669917_127669917_A_T
c.795-2A>GSPLICE_ACCEPTOR_VARIANT29_127668078_127668078_A_G
c.703C>GMISSENSE_VARIANT29_127666205_127666205_C_G
c.748C>TSTOP_GAINED29_127666250_127666250_C_T
c.84G>ASTOP_GAINED29_127651649_127651649_G_A
c.998_1000delINFRAME_DELETION29_127672079_127672082_TGAA_T
c.578+1G>ASPLICE_DONOR_VARIANT29_127663354_127663354_G_A
c.733C>GMISSENSE_VARIANT29_127666235_127666235_C_G
c.1565G>ASTOP_GAINED29_127682423_127682423_G_A
\n", + "

Additionally, the following variants were observed 1 or fewer times: \n", + "c.125C>T; c.88-1G>A; c.747dup; c.1645G>A; c.1360-1_1360insC; c.548T>G; c.1482dup; c.1197C>A; c.578+1dup; c.1702+1G>C; c.164T>C; c.1420C>T; c.236C>T; c.360dup; c.227T>C; c.170-12_173del; c.987del; c.1672del; c.1652G>T; c.860T>C; c.87+1G>T; c.1021_1029+1del; c.247-1del; c.374_375del; c.*20C>T; c.16del; c.296A>G; c.1431_1447delinsAT; c.1578_1579del; c.989dup; c.444del; c.437_438del; c.1249G>C; c.771dup; c.420T>A; c.1656C>A; c.1110+1G>A; c.389del; c.679C>A; c.62_64del; c.157G>T; c.268G>C; c.1058_1061del; c.901del; c.794+1G>T; c.1094T>A; c.536T>G; c.308A>C; c.795-1G>C; c.170-2A>G; c.175G>A; c.126C>T; c.961A>T; c.385A>C; c.1193A>G; c.79del; c.1218C>A; c.1328T>G; c.607C>T; c.224A>G; c.88-2A>G; c.325+1G>T; c.685C>T; c.736G>A; c.1130dup; c.167C>G; c.164T>A; c.1606del; c.1347del; c.1303G>T; c.107T>A; c.620A>G; c.430-1G>C; c.754_755del; c.37+1G>A; c.893_894del; c.725del; c.326-1G>T; c.169+1G>A; c.560del; c.268G>T; c.931dup; c.230T>A; c.124T>C; c.41T>G; c.897_898del; c.1461G>A; c.1461G>C; c.1614_1616del; c.1006C>T; c.1194T>G; c.1029+1delinsAA; c.1495_1497del; c.1696_1697delinsA; c.585C>G; c.17T>A; c.1569_1570del; c.1265del; c.9_13del; c.*23C>T; c.217G>C; c.1105G>A; c.1547+1G>A; c.548T>C; c.1249+1G>T; c.1434G>A; c.295T>G; c.1497C>A; c.1708A>G; c.1651C>G; c.714C>A; c.1548-2A>G; c.794+2dup; c.1438C>T; c.1019_1020del; c.1643A>T; c.1030T>G; c.1277T>C; c.663+1G>A; c.1359+1G>T; c.827dup; c.1157del; c.875G>T; c.187A>T; c.539G>A; c.142del; c.551del; c.353T>G; c.88-1G>C; c.167C>A; c.1382dup; c.1250-2A>C; c.1548_1558delinsAT; c.238T>C; c.767T>C; c.1627G>A; c.1387G>T; c.430G>C; c.1607G>A; c.1334A>C; c.1630G>T; c.538T>C; c.661G>T; c.1659del; c.778G>T; c.579-2A>G; c.1324A>G; c.1654T>C; c.721del; c.251T>A; c.1408G>T; c.429+1G>A; c.875G>C; c.663+1G>T; c.1022T>C; c.795-2A>T; c.429+1G>C; c.1249+1G>A; c.1030-1G>A; c.956del; c.533C>T; c.1004C>T; c.1268T>C; c.1157dup; c.232_235del; c.296A>C; c.1462-2A>T; c.1461+1G>A; c.416C>A; c.99delinsTC; c.717del; c.901C>T; c.1769C>T; c.734A>C; c.1082C>T; c.842T>C; c.74del; c.430delinsAGA; c.148dup; c.17T>C; c.749A>G; c.867dup; c.1720A>C; c.54del; c.301G>C; c.60del; c.1105G>C; c.464del; c.1275del; c.1501_1519del; c.586A>T; c.1249G>A; c.751G>A; c.701A>G; c.1105G>T; c.1282del; c.731T>G; c.172del; c.1075C>T; c.37+1_37+2del; c.169+2T>C; c.1661A>G; c.1642A>G; c.1723C>T; c.663+1G>C; c.430-1G>T; c.466T>A; c.518C>A; c.1151dup.

\n", + "

Use the entry in the \"Key\" column to investigate whether specific variants display genotype-phenotype correlations

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "phenopacket_input_folder = \"input\"\n", - "patient_list = load_phenopacket_folder(pp_directory=phenopacket_input_folder, patient_creator=patient_creator)" + "display(HTML(viewer.variants_table(patientCohort, 'NM_003165.6')))" ] }, { "cell_type": "code", - "execution_count": null, - "id": "a41be80d-0678-4dc3-97b6-2876d69648e0", + "execution_count": 13, + "id": "c57441a4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
HPO TermCount
Counts of annotations to HPO terms for the 462 in the cohort
Global developmental delay (HP:0001263)255
Absent speech (HP:0001344)152
Infantile spasms (HP:0012469)143
Seizure (HP:0001250)129
Infantile onset (HP:0003593)121
Hypsarrhythmia (HP:0002521)120
Neonatal onset (HP:0003623)117
Intellectual disability, severe (HP:0010864)106
Inability to walk (HP:0002540)93
Generalized tonic seizure (HP:0010818)92
Multifocal epileptiform discharges (HP:0010841)86
Bilateral tonic-clonic seizure (HP:0002069)84
Ataxia (HP:0001251)82
EEG with burst suppression (HP:0010851)75
Tremor (HP:0001337)73
Focal impaired awareness seizure (HP:0002384)71
EEG with focal epileptiform discharges (HP:0011185)71
Focal-onset seizure (HP:0007359)71
Epileptic spasm (HP:0011097)70
Motor stereotypy (HP:0000733)68
Hypotonia (HP:0001252)67
Delayed speech and language development (HP:0000750)66
Refractory (HP:0031375)63
Severe global developmental delay (HP:0011344)62
Intellectual disability (HP:0001249)54
Generalized myoclonic seizure (HP:0002123)51
Developmental regression (HP:0002376)50
Focal clonic seizure (HP:0002266)50
EEG with generalized slow activity (HP:0010845)47
Infantile muscular hypotonia (HP:0008947)43
EEG with abnormally slow frequencies (HP:0011203)43
Focal tonic seizure (HP:0011167)42
Autistic behavior (HP:0000729)41
Generalized hypotonia (HP:0001290)40
Focal motor seizure (HP:0011153)37
Epileptic encephalopathy (HP:0200134)37
Autism (HP:0000717)33
Sleep disturbance (HP:0002360)33
Axial hypotonia (HP:0008936)29
Dystonia (HP:0001332)27
Intellectual disability, moderate (HP:0002342)25
Gait ataxia (HP:0002066)23
Spastic tetraplegia (HP:0002510)21
Intellectual disability, profound (HP:0002187)21
Profound global developmental delay (HP:0012736)20
Bruxism (HP:0003763)20
Hypomimic face (HP:0000338)19
Gastroesophageal reflux (HP:0002020)19
Strabismus (HP:0000486)18
Poor eye contact (HP:0000817)18
Dyskinesia (HP:0100660)18
Febrile seizure (within the age range of 3 months to 6 years) (HP:0002373)17
Generalized non-motor (absence) seizure (HP:0002121)17
Childhood onset (HP:0011463)17
Scoliosis (HP:0002650)17
Hyperactivity (HP:0000752)17
Delayed CNS myelination (HP:0002188)16
EEG with generalized epileptiform discharges (HP:0011198)16
Intention tremor (HP:0002080)16
Poor head control (HP:0002421)16
Hypoplasia of the corpus callosum (HP:0002079)16
EEG with spike-wave complexes (HP:0010850)16
Neurodevelopmental delay (HP:0012758)16
Spasticity (HP:0001257)15
Moderate global developmental delay (HP:0011343)15
Atonic seizure (HP:0010819)15
Multifocal seizures (HP:0031165)15
Constipation (HP:0002019)14
Microcephaly (HP:0000252)14
Gait disturbance (HP:0001288)14
Interictal epileptiform activity (HP:0011182)14
Bilateral tonic-clonic seizure with focal onset (HP:0007334)14
EEG with focal slow activity (HP:0010843)13
Motor delay (HP:0001270)13
Feeding difficulties (HP:0011968)13
Action tremor (HP:0002345)13
Truncal ataxia (HP:0002078)12
Bilateral multifocal epileptiform discharges (HP:0011189)12
Severe muscular hypotonia (HP:0006829)12
Dysmetria (HP:0001310)12
Limb hypertonia (HP:0002509)12
Self-injurious behavior (HP:0100716)12
Aggressive behavior (HP:0000718)12
Gastrostomy tube feeding in infancy (HP:0011471)12
Intellectual disability, mild (HP:0001256)11
Generalized-onset seizure (HP:0002197)11
Cerebral atrophy (HP:0002059)11
Hypertonia (HP:0001276)11
Broad-based gait (HP:0002136)11
EEG abnormality (HP:0002353)11
Neonatal hypotonia (HP:0001319)11
Status epilepticus (HP:0002133)11
Abnormality of movement (HP:0100022)11
Brain atrophy (HP:0012444)11
EEG with temporal focal spikes (HP:0012018)10
Behavioral abnormality (HP:0000708)10
Stereotypical hand wringing (HP:0012171)10
Spastic tetraparesis (HP:0001285)10
Frontal cortical atrophy (HP:0006913)9
Cerebral visual impairment (HP:0100704)9
Focal myoclonic seizure (HP:0011166)9
Normal interictal EEG (HP:0002372)9
Resting tremor (HP:0002322)9
Language impairment (HP:0002463)9
Cerebral cortical atrophy (HP:0002120)8
Nystagmus (HP:0000639)8
Babinski sign (HP:0003487)8
Myoclonus (HP:0001336)8
EEG with focal sharp waves (HP:0011196)8
Hyperreflexia (HP:0001347)8
Delayed gross motor development (HP:0002194)7
Abnormality of vision (HP:0000504)7
Generalized clonic seizure (HP:0011169)7
Intermittent hyperventilation (HP:0004879)7
Unsteady gait (HP:0002317)7
Bilateral tonic-clonic seizure with generalized onset (HP:0025190)7
Premature birth (HP:0001622)7
EEG with focal spikes (HP:0011193)7
Prominent fingertip pads (HP:0001212)7
Lower limb spasticity (HP:0002061)7
Impulsivity (HP:0100710)7
Focal hemiclonic seizure (HP:0006813)7
Chorea (HP:0002072)6
Cogwheel rigidity (HP:0002396)6
Bradykinesia (HP:0002067)6
Short attention span (HP:0000736)6
Postural instability (HP:0002172)6
Encephalopathy (HP:0001298)6
Impaired social interactions (HP:0000735)6
EEG with occipital focal spikes (HP:0012016)6
Infantile axial hypotonia (HP:0009062)6
Brachycephaly (HP:0000248)5
Macrocephaly (HP:0000256)5
Nocturnal seizures (HP:0031951)5
Impaired pain sensation (HP:0007328)5
Focal autonomic seizure (HP:0011154)5
EEG with spike-wave complexes (<2.5 Hz) (HP:0010847)5
Cognitive impairment (HP:0100543)5
Eczema (HP:0000964)5
Joint hypermobility (HP:0001382)5
Chronic constipation (HP:0012450)5
EEG with frontal focal spike waves (HP:0012010)5
Stereotypical body rocking (HP:0012172)5
Emotional lability (HP:0000712)5
EEG with polyspike wave complexes (HP:0002392)5
Delayed fine motor development (HP:0010862)4
Narrow mouth (HP:0000160)4
Pes valgus (HP:0008081)4
Cerebral hypomyelination (HP:0006808)4
Abnormal eating behavior (HP:0100738)4
Attention deficit hyperactivity disorder (HP:0007018)4
Hand tremor (HP:0002378)4
Spastic diplegia (HP:0001264)4
Limb dystonia (HP:0002451)4
Anxiety (HP:0000739)4
EEG with occipital slowing (HP:0011210)4
Continuous spike and waves during slow sleep (HP:0031491)4
Gait imbalance (HP:0002141)4
Sleep-wake cycle disturbance (HP:0006979)4
Failure to thrive (HP:0001508)4
Progressive microcephaly (HP:0000253)4
Head tremor (HP:0002346)4
Involuntary movements (HP:0004305)4
Focal impaired awareness autonomic seizure (HP:0032755)4
Abnormal facial shape (HP:0001999)4
EEG with temporal sharp waves (HP:0011296)4
EEG with generalized polyspikes (HP:0012001)4
Difficulty walking (HP:0002355)4
Increased body weight (HP:0004324)4
Atypical absence seizure (HP:0007270)4
Joint laxity (HP:0001388)4
Global brain atrophy (HP:0002283)4
Dementia (HP:0000726)3
Upslanted palpebral fissure (HP:0000582)3
Laryngomalacia (HP:0001601)3
Cerebral palsy (HP:0100021)3
Postural tremor (HP:0002174)3
Hypokinesia (HP:0002375)3
Umbilical hernia (HP:0001537)3
Choreoathetosis (HP:0001266)3
Abnormality of the nervous system (HP:0000707)3
EEG with centrotemporal focal spike waves (HP:0012557)3
Abnormal muscle tone (HP:0003808)3
Torticollis (HP:0000473)3
Visual impairment (HP:0000505)3
Skeletal muscle atrophy (HP:0003202)3
Loss of speech (HP:0002371)3
Flat occiput (HP:0005469)3
Head titubation (HP:0002599)3
Loss of ability to walk in first decade (HP:0006794)3
Arachnoid cyst (HP:0100702)3
Mild global developmental delay (HP:0011342)3
Focal motor seizure with version (HP:0011175)3
EEG with frontal focal spikes (HP:0012015)3
Tetraparesis (HP:0002273)3
Inappropriate laughter (HP:0000748)3
Malar flattening (HP:0000272)3
Tapered finger (HP:0001182)3
Delayed myelination (HP:0012448)3
EEG with multifocal slow activity (HP:0010844)3
Slender finger (HP:0001238)3
Irritability (HP:0000737)3
Exaggerated startle response (HP:0002267)3
Ventriculomegaly (HP:0002119)3
Hypermetropia (HP:0000540)3
Astigmatism (HP:0000483)3
Osteopenia (HP:0000938)3
Round face (HP:0000311)2
Hyperintensity of cerebral white matter on MRI (HP:0030890)2
Arm dystonia (HP:0031960)2
Growth delay (HP:0001510)2
EEG with temporal sharp slow waves (HP:0011289)2
Opisthotonus (HP:0002179)2
Focal behavior arrest seizure (HP:0011173)2
Widened subarachnoid space (HP:0012704)2
Focal cortical dysplasia (HP:0032046)2
Focal EEG discharges with secondary generalization (HP:0011188)2
Recurrent hand flapping (HP:0100023)2
Hyperventilation (HP:0002883)2
Secondary microcephaly (HP:0005484)2
Expressive language delay (HP:0002474)2
Focal T2 hyperintense basal ganglia lesion (HP:0007183)2
Periventricular leukomalacia (HP:0006970)2
Abnormal pattern of respiration (HP:0002793)2
Abnormal subarachnoid space morphology (HP:0012703)2
Limb ataxia (HP:0002070)2
Dysphagia (HP:0002015)2
Hearing impairment (HP:0000365)2
Short foot (HP:0001773)2
Ankle clonus (HP:0011448)2
Hemihypsarrhythmia (HP:0011215)2
Juvenile onset (HP:0003621)2
Talipes equinovarus (HP:0001762)2
EEG with changes in voltage (HP:0011201)2
Delayed eruption of teeth (HP:0000684)2
Developmental stagnation (HP:0007281)2
Dysplastic corpus callosum (HP:0006989)2
Limb joint contracture (HP:0003121)2
Short stature (HP:0004322)2
Relative macrocephaly (HP:0004482)2
Typical absence seizure (HP:0011147)2
EEG with parietal focal spikes (HP:0012017)2
Pes planus (HP:0001763)2
Gliosis (HP:0002171)2
Upper limb postural tremor (HP:0007351)2
Upper limb spasticity (HP:0006986)2
Increased theta frequency activity in EEG (HP:0031535)2
Generalized muscle weakness (HP:0003324)2
Epicanthus (HP:0000286)2
Caesarian section (HP:0011410)2
Lower limb hypertonia (HP:0006895)2
Precocious puberty (HP:0000826)2
Status epilepticus without prominent motor symptoms (HP:0031475)2
Inverted nipples (HP:0003186)2
Clinodactyly (HP:0030084)2
Apnea (HP:0002104)2
Poor fine motor coordination (HP:0007010)2
Poor gross motor coordination (HP:0007015)2
Paroxysmal dyskinesia (HP:0007166)2
Rigidity (HP:0002063)2
Enlarged cisterna magna (HP:0002280)2
Obesity (HP:0001513)2
Insomnia (HP:0100785)2
Tip-toe gait (HP:0030051)2
Focal aware seizure (HP:0002349)2
Drooling (HP:0002307)2
Focal hyperkinetic seizure (HP:0011174)2
Hyporeflexia (HP:0001265)2
Abnormal temper tantrums (HP:0025160)2
Death in infancy (HP:0001522)2
Plagiocephaly (HP:0001357)2
Abnormal upper lip morphology (HP:0000177)2
High palate (HP:0000218)2
Protruding ear (HP:0000411)2
Frontal bossing (HP:0002007)2
Depressed nasal bridge (HP:0005280)2
Pain insensitivity (HP:0007021)2
Clumsiness (HP:0002312)2
Lower limb hyperreflexia (HP:0002395)2
Interictal EEG abnormality (HP:0025373)2
Breathing dysregulation (HP:0005957)2
Failure to thrive in infancy (HP:0001531)2
Polyminimyoclonus (HP:0031986)2
Periventricular white matter hyperintensities (HP:0030891)2
Esotropia (HP:0000565)2
Meconium stained amniotic fluid (HP:0012420)2
Poor coordination (HP:0002370)2
Truncal titubation (HP:0030147)2
EEG with focal sharp slow waves (HP:0011195)2
EEG with generalized spikes (HP:0012000)2
Short stepped shuffling gait (HP:0007311)2
Pregnancy exposure (HP:0031437)2
Feeding difficulties in infancy (HP:0008872)2
Limb dysmetria (HP:0002406)2
Low levels of vitamin D (HP:0100512)1
High forehead (HP:0000348)1
Adducted thumb (HP:0001181)1
Hypothyroidism (HP:0000821)1
Decreased CSF homovanillic acid concentration (HP:0003785)1
Decreased CSF 5-hydroxyindolacetic acid concentration (HP:0025455)1
Generalized myoclonic-atonic seizure (HP:0011170)1
Myoclonic spasms (HP:0003739)1
Abnormality of coordination (HP:0011443)1
Psychosis (HP:0000709)1
Polyhydramnios (HP:0001561)1
High pitched voice (HP:0001620)1
Rotary nystagmus (HP:0001583)1
Poor motor coordination (HP:0002275)1
Focal seizure with eyelid myoclonia (HP:0011168)1
Recurrent cystitis (HP:0012786)1
Soft skin (HP:0000977)1
Episodic ataxia (HP:0002131)1
Increased serum lactate (HP:0002151)1
Increased serum pyruvate (HP:0003542)1
Medial flaring of the eyebrow (HP:0010747)1
Prominent eyelashes (HP:0011231)1
Focal hemifacial clonic seizure (HP:0007332)1
Athetosis (HP:0002305)1
Parasomnia (HP:0025234)1
Blue nevus (HP:0100814)1
Hemiplegia/hemiparesis (HP:0004374)1
Talipes (HP:0001883)1
Inguinal hernia (HP:0000023)1
Enamel hypoplasia (HP:0006297)1
Abdominal obesity (HP:0012743)1
Recurrent otitis media (HP:0000403)1
Tented upper lip vermilion (HP:0010804)1
Abnormal CNS myelination (HP:0011400)1
Abnormal choroid plexus morphology (HP:0007376)1
Central nervous system cyst (HP:0030724)1
Peripheral neuropathy (HP:0009830)1
Asthma (HP:0002099)1
Bronchitis (HP:0012387)1
Abnormality of the kidney (HP:0000077)1
Ventricular septal defect (HP:0001629)1
Abnormality of pulmonary circulation (HP:0030875)1
Upper motor neuron dysfunction (HP:0002493)1
Parkinsonism (HP:0001300)1
Generalized hyperreflexia (HP:0007034)1
Decreased activity of mitochondrial complex I (HP:0011923)1
Downslanted palpebral fissures (HP:0000494)1
Anteverted nares (HP:0000463)1
Deeply set eye (HP:0000490)1
Hirsutism (HP:0001007)1
Horizontal eyebrow (HP:0011228)1
Reduced bone mineral density (HP:0004349)1
Abnormality of eye movement (HP:0000496)1
Hyperlordosis (HP:0003307)1
Brisk reflexes (HP:0001348)1
Dysdiadochokinesis (HP:0002075)1
Short philtrum (HP:0000322)1
Thoracic kyphosis (HP:0002942)1
Enlarged sylvian cistern (HP:0100952)1
Focal cortical dysplasia type Ib (HP:0032049)1
Focal cortical dysplasia type III (HP:0032054)1
EEG with frontal sharp waves (HP:0011294)1
Monocular strabismus (HP:0010877)1
Wide nasal bridge (HP:0000431)1
Sleep-wake inversion (HP:0031849)1
Abnormal respiratory system physiology (HP:0002795)1
Everted upper lip vermilion (HP:0010803)1
Death in adolescence (HP:0011421)1
Accessory spleen (HP:0001747)1
Unilateral renal hypoplasia (HP:0012583)1
Pigmentary retinopathy (HP:0000580)1
Proportionate tall stature (HP:0011407)1
Occipital cortical atrophy (HP:0012105)1
Gray matter heterotopia (HP:0002282)1
Oculomotor apraxia (HP:0000657)1
Foot dorsiflexor weakness (HP:0009027)1
Bipolar affective disorder (HP:0007302)1
Ulcerative colitis (HP:0100279)1
Neurogenic bladder (HP:0000011)1
EEG with central focal spikes (HP:0012014)1
Bilateral ptosis (HP:0001488)1
Axial muscle weakness (HP:0003327)1
Enuresis (HP:0000805)1
Speech apraxia (HP:0011098)1
Long toe (HP:0010511)1
Inflexible adherence to routines or rituals (HP:0000732)1
Punctate periventricular T2 hyperintense foci (HP:0030081)1
Apneic episodes in infancy (HP:0005949)1
Non-rapid eye movement parasomnia (HP:0025235)1
Abnormal skull morphology (HP:0000929)1
Triggered by physical trauma (HP:0031135)1
Carious teeth (HP:0000670)1
Dandy-Walker malformation (HP:0001305)1
CNS hypomyelination (HP:0003429)1
Oromotor apraxia (HP:0007301)1
Sleep terror (HP:0030765)1
EEG with parietal focal spike waves (HP:0012012)1
Hyperkinetic movements (HP:0002487)1
Submucous cleft of soft and hard palate (HP:0410031)1
Bifid uvula (HP:0000193)1
Cerebellar atrophy (HP:0001272)1
Agenesis of corpus callosum (HP:0001274)1
Overgrowth (HP:0001548)1
EEG with central sharp slow waves (HP:0011291)1
EEG with parietal sharp slow waves (HP:0011288)1
EEG with intermittent slow activity (HP:0011205)1
Abnormality of brain morphology (HP:0012443)1
Supraventricular tachycardia (HP:0004755)1
Shuffling gait (HP:0002362)1
Hydrocele testis (HP:0000034)1
Unilateral multifocal epileptiform discharges (HP:0011191)1
Somatic sensory dysfunction (HP:0003474)1
Poor hand-eye coordination (HP:0007057)1
Upper limb dysmetria (HP:0020036)1
Restless legs (HP:0012452)1
Elevated brain lactate level by MRS (HP:0012707)1
Progressive gait ataxia (HP:0007240)1
Abnormal corpus callosum morphology (HP:0001273)1
Lumbar hyperlordosis (HP:0002938)1
Polydipsia (HP:0001959)1
Aphasia (HP:0002381)1
Abnormal aggressive, impulsive or violent behavior (HP:0006919)1
Esodeviation (HP:0020045)1
Heart murmur (HP:0030148)1
Abnormal dental enamel morphology (HP:0000682)1
Abnormality of the nose (HP:0000366)1
Intracranial cystic lesion (HP:0010576)1
Hydronephrosis (HP:0000126)1
Laryngeal cleft (HP:0008751)1
2-3 toe syndactyly (HP:0004691)1
Broad forehead (HP:0000337)1
Cupped ear (HP:0000378)1
Abnormal hair pattern (HP:0010720)1
Cerebral white matter atrophy (HP:0012762)1
Decreased activity of mitochondrial respiratory chain (HP:0008972)1
Respiratory distress (HP:0002098)1
Abnormal dentate nucleus morphology (HP:0100321)1
EEG with central sharp waves (HP:0011293)1
Profound static encephalopathy (HP:0007069)1
Pill-rolling tremor (HP:0025387)1
Primary microcephaly (HP:0011451)1
Broad face (HP:0000283)1
Short nose (HP:0003196)1
Broad palm (HP:0001169)1
Broad hallux (HP:0010055)1
Prenatal movement abnormality (HP:0001557)1
Hypoglycemia (HP:0001943)1
Lateral ventricle dilatation (HP:0006956)1
Wide mouth (HP:0000154)1
Simple febrile seizure (HP:0011171)1
Visual hallucinations (HP:0002367)1
Nasogastric tube feeding in infancy (HP:0011470)1
Ventouse delivery (HP:0011412)1
Developmental stagnation at onset of seizures (HP:0006834)1
Recurrent pneumonia (HP:0006532)1
Abnormal brain FDG positron emission tomography (HP:0012658)1
Hyperacusis (HP:0010780)1
EEG with generalized sharp slow waves (HP:0011199)1
Facial hypotonia (HP:0000297)1
Exotropia (HP:0000577)1
Tetraplegia (HP:0002445)1
Prominent forehead (HP:0011220)1
Abnormal eyelid morphology (HP:0000492)1
Abnormality of the outer ear (HP:0000356)1
Precocious puberty in males (HP:0008185)1
Auditory hallucinations (HP:0008765)1
Focal emotional seizure with laughing (HP:0010821)1
Focal dystonia (HP:0004373)1
Brachydactyly (HP:0001156)1
EEG with continuous slow activity (HP:0011204)1
Abnormal cerebral white matter morphology (HP:0002500)1
Moderate sensorineural hearing impairment (HP:0008504)1
Precocious puberty in females (HP:0010465)1
Breech presentation (HP:0001623)1
Eyelid myoclonus (HP:0025097)1
Allergy (HP:0012393)1
Small hand (HP:0200055)1
Spastic hemiparesis (HP:0011099)1
Abnormal cerebral subcortex morphology (HP:0010993)1
Agitation (HP:0000713)1
Maintenance insomnia (HP:0031355)1
Neonatal respiratory distress (HP:0002643)1
Hypertelorism (HP:0000316)1
Obsessive-compulsive behavior (HP:0000722)1
Infectious encephalitis (HP:0002383)1
Oculogyric crisis (HP:0010553)1
Widened cerebral subarachnoid space (HP:0012766)1
Nasogastric tube feeding (HP:0040288)1
Central sleep apnea (HP:0010536)1
Kinetic tremor (HP:0030186)1
Cyanotic episode (HP:0200048)1
Motor deterioration (HP:0002333)1
Abnormality of the face (HP:0000271)1
Stooped posture (HP:0025403)1
Maternal hypertension (HP:0008071)1
Prominent superficial veins (HP:0001015)1
Dermal translucency (HP:0010648)1
Aplasia/Hypoplasia of the nails (HP:0008386)1
Recurrent urinary tract infections (HP:0000010)1
Recurrent upper respiratory tract infections (HP:0002788)1
EEG with frontal sharp slow waves (HP:0011290)1
Deep philtrum (HP:0002002)1
Exaggerated cupid's bow (HP:0002263)1
Echolalia (HP:0010529)1
Exodeviation (HP:0020049)1
Gastrointestinal dysmotility (HP:0002579)1
Gastroparesis (HP:0002578)1
Fatigue (HP:0012378)1
Subependymal nodules (HP:0009716)1
Basal ganglia gliosis (HP:0006999)1
Abnormality of the sphenoid sinus (HP:0430022)1
Titubation (HP:0030187)1
Beta-EEG (HP:0011179)1
Multifocal hyperintensity of cerebral white matter on MRI (HP:0040329)1
Extrapyramidal muscular rigidity (HP:0007076)1
Death in childhood (HP:0003819)1
Poor speech (HP:0002465)1
Expressive aphasia (HP:0002427)1
Hearing abnormality (HP:0000364)1
Ophthalmoplegia (HP:0000602)1
Hip dysplasia (HP:0001385)1
Hip osteoarthritis (HP:0008843)1
EEG with spike-wave complexes (2.5-3.5 Hz) (HP:0010848)1
Focal cortical dysplasia type Ia (HP:0032048)1
Abnormal brainstem MRI signal intensity (HP:0012747)1
Muscle weakness (HP:0001324)1
EEG with focal spike waves (HP:0011197)1
Delayed social development (HP:0012434)1
Upper limb hypertonia (HP:0200049)1
Axial dystonia (HP:0002530)1
EEG with generalized slow activity grade 1 (HP:0011206)1
Phimosis (HP:0001741)1
Hip subluxation (HP:0030043)1
Unilateral cryptorchidism (HP:0012741)1
Bulbous nose (HP:0000414)1
Long eyelashes (HP:0000527)1
Kyphosis (HP:0002808)1
Short neck (HP:0000470)1
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "from genophenocorr.model import Cohort\n", - "from genophenocorr.view import CohortViewer\n", - "from IPython.display import display, HTML\n", - "cohort = Cohort.from_patients(patient_list)" + "display(HTML(viewer.hpo_term_counts_table(patientCohort)))" ] }, { - "cell_type": "markdown", - "id": "175d2b14-b75a-499a-9251-5154aa31c683", + "cell_type": "code", + "execution_count": 14, + "id": "dc826732", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'NM_003165.6': Counter({'FRAMESHIFT_VARIANT': 68,\n", + " 'MISSENSE_VARIANT': 249,\n", + " 'SPLICE_DONOR_VARIANT': 36,\n", + " 'STOP_GAINED': 72,\n", + " 'SPLICE_REGION_VARIANT': 18,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 22,\n", + " 'THREE_PRIME_UTR_VARIANT': 2,\n", + " 'INFRAME_DELETION': 8,\n", + " 'SYNONYMOUS_VARIANT': 4,\n", + " 'PROTEIN_ALTERING_VARIANT': 2,\n", + " 'SPLICE_POLYPYRIMIDINE_TRACT_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1})}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "

CohortViewer

\n", - "

The CohortViewer class offers various methods to create tables and graphics to view the data in the cohort.\n", - "It is still very much in progress

" + "patientCohort.list_data_by_tx('NM_003165.6')" ] }, { "cell_type": "code", - "execution_count": null, - "id": "1c7c5c1e-8e5e-488a-98ec-85689adff61c", + "execution_count": 18, + "id": "cc3cc692", "metadata": {}, "outputs": [], "source": [ - "viewer = CohortViewer(hpo=hpo)" + "from genophenocorr.analysis import CohortAnalysis\n", + "from genophenocorr.model import VariantEffect" ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3a54a6a4", + "metadata": {}, + "outputs": [], + "source": [ + "analysis = CohortAnalysis(patientCohort,'NM_003165.6',hpo,p_val_correction='fdr_bh')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c7ab8b6b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
With missense_variantWithout missense_variant
CountPercentCountPercentp-valueCorrected p-values
HP:0001251 (Ataxia)4718.88%6630.99%0.0032820.095184
HP:0002376 (Developmental regression)3514.06%157.04%0.0164020.237831
HP:0011097 (Epileptic spasm)9237.40%9947.37%0.0360730.262600
HP:0001252 (Hypotonia)8835.34%9645.07%0.0362210.262600
HP:0003623 (Neonatal onset)5522.09%6229.11%0.0871380.427422
HP:0012469 (Infantile spasms)6928.05%7435.41%0.1050550.427422
HP:0002521 (Hypsarrhythmia)5723.08%6329.72%0.1112640.427422
HP:0002266 (Focal clonic seizure)2510.16%3215.31%0.1179090.427422
HP:0002540 (Inability to walk)5823.29%3817.84%0.1679980.513213
HP:0031375 (Refractory)3915.66%2411.27%0.1769700.513213
HP:0001337 (Tremor)5522.09%5827.23%0.2323870.594057
HP:0002384 (Focal impaired awareness seizure)4317.48%2813.40%0.2458170.594057
HP:0010845 (EEG with generalized slow activity)3012.15%198.96%0.2918500.651050
HP:0001249 (Intellectual disability)10843.37%10147.42%0.3998340.775947
HP:0010818 (Generalized tonic seizure)4618.70%4622.01%0.4131710.775947
HP:0001344 (Absent speech)8634.54%6630.99%0.4286170.775947
HP:0001263 (Global developmental delay)17469.88%15673.24%0.4699260.775947
HP:0011344 (Severe global developmental delay)3614.46%2612.21%0.4969880.775947
HP:0011185 (EEG with focal epileptiform discharges)10843.72%8640.57%0.5083790.775947
HP:0010841 (Multifocal epileptiform discharges)5622.67%4320.28%0.5703020.826938
HP:0010851 (EEG with burst suppression)3815.38%3717.45%0.6129060.846394
HP:0003593 (Infantile onset)6325.30%5827.23%0.6717620.885505
HP:0002069 (Bilateral tonic-clonic seizure)5522.36%4320.57%0.7315280.914891
HP:0001250 (Seizure)22089.43%18990.43%0.7571520.914891
HP:0010864 (Intellectual disability, severe)5622.49%5023.47%0.8249330.949882
HP:0000750 (Delayed speech and language development)11445.78%10046.95%0.8516180.949882
HP:0002123 (Generalized myoclonic seizure)2811.38%2311.00%1.0000001.000000
HP:0000733 (Motor stereotypy)4317.27%3717.37%1.0000001.000000
HP:0007359 (Focal-onset seizure)12350.00%10449.76%1.0000001.000000
\n", + "
" + ], + "text/plain": [ + " With missense_variant \\\n", + " Count \n", + "HP:0001251 (Ataxia) 47 \n", + "HP:0002376 (Developmental regression) 35 \n", + "HP:0011097 (Epileptic spasm) 92 \n", + "HP:0001252 (Hypotonia) 88 \n", + "HP:0003623 (Neonatal onset) 55 \n", + "HP:0012469 (Infantile spasms) 69 \n", + "HP:0002521 (Hypsarrhythmia) 57 \n", + "HP:0002266 (Focal clonic seizure) 25 \n", + "HP:0002540 (Inability to walk) 58 \n", + "HP:0031375 (Refractory) 39 \n", + "HP:0001337 (Tremor) 55 \n", + "HP:0002384 (Focal impaired awareness seizure) 43 \n", + "HP:0010845 (EEG with generalized slow activity) 30 \n", + "HP:0001249 (Intellectual disability) 108 \n", + "HP:0010818 (Generalized tonic seizure) 46 \n", + "HP:0001344 (Absent speech) 86 \n", + "HP:0001263 (Global developmental delay) 174 \n", + "HP:0011344 (Severe global developmental delay) 36 \n", + "HP:0011185 (EEG with focal epileptiform dischar... 108 \n", + "HP:0010841 (Multifocal epileptiform discharges) 56 \n", + "HP:0010851 (EEG with burst suppression) 38 \n", + "HP:0003593 (Infantile onset) 63 \n", + "HP:0002069 (Bilateral tonic-clonic seizure) 55 \n", + "HP:0001250 (Seizure) 220 \n", + "HP:0010864 (Intellectual disability, severe) 56 \n", + "HP:0000750 (Delayed speech and language develop... 114 \n", + "HP:0002123 (Generalized myoclonic seizure) 28 \n", + "HP:0000733 (Motor stereotypy) 43 \n", + "HP:0007359 (Focal-onset seizure) 123 \n", + "\n", + " \\\n", + " Percent \n", + "HP:0001251 (Ataxia) 18.88% \n", + "HP:0002376 (Developmental regression) 14.06% \n", + "HP:0011097 (Epileptic spasm) 37.40% \n", + "HP:0001252 (Hypotonia) 35.34% \n", + "HP:0003623 (Neonatal onset) 22.09% \n", + "HP:0012469 (Infantile spasms) 28.05% \n", + "HP:0002521 (Hypsarrhythmia) 23.08% \n", + "HP:0002266 (Focal clonic seizure) 10.16% \n", + "HP:0002540 (Inability to walk) 23.29% \n", + "HP:0031375 (Refractory) 15.66% \n", + "HP:0001337 (Tremor) 22.09% \n", + "HP:0002384 (Focal impaired awareness seizure) 17.48% \n", + "HP:0010845 (EEG with generalized slow activity) 12.15% \n", + "HP:0001249 (Intellectual disability) 43.37% \n", + "HP:0010818 (Generalized tonic seizure) 18.70% \n", + "HP:0001344 (Absent speech) 34.54% \n", + "HP:0001263 (Global developmental delay) 69.88% \n", + "HP:0011344 (Severe global developmental delay) 14.46% \n", + "HP:0011185 (EEG with focal epileptiform dischar... 43.72% \n", + "HP:0010841 (Multifocal epileptiform discharges) 22.67% \n", + "HP:0010851 (EEG with burst suppression) 15.38% \n", + "HP:0003593 (Infantile onset) 25.30% \n", + "HP:0002069 (Bilateral tonic-clonic seizure) 22.36% \n", + "HP:0001250 (Seizure) 89.43% \n", + "HP:0010864 (Intellectual disability, severe) 22.49% \n", + "HP:0000750 (Delayed speech and language develop... 45.78% \n", + "HP:0002123 (Generalized myoclonic seizure) 11.38% \n", + "HP:0000733 (Motor stereotypy) 17.27% \n", + "HP:0007359 (Focal-onset seizure) 50.00% \n", + "\n", + " Without missense_variant \\\n", + " Count \n", + "HP:0001251 (Ataxia) 66 \n", + "HP:0002376 (Developmental regression) 15 \n", + "HP:0011097 (Epileptic spasm) 99 \n", + "HP:0001252 (Hypotonia) 96 \n", + "HP:0003623 (Neonatal onset) 62 \n", + "HP:0012469 (Infantile spasms) 74 \n", + "HP:0002521 (Hypsarrhythmia) 63 \n", + "HP:0002266 (Focal clonic seizure) 32 \n", + "HP:0002540 (Inability to walk) 38 \n", + "HP:0031375 (Refractory) 24 \n", + "HP:0001337 (Tremor) 58 \n", + "HP:0002384 (Focal impaired awareness seizure) 28 \n", + "HP:0010845 (EEG with generalized slow activity) 19 \n", + "HP:0001249 (Intellectual disability) 101 \n", + "HP:0010818 (Generalized tonic seizure) 46 \n", + "HP:0001344 (Absent speech) 66 \n", + "HP:0001263 (Global developmental delay) 156 \n", + "HP:0011344 (Severe global developmental delay) 26 \n", + "HP:0011185 (EEG with focal epileptiform dischar... 86 \n", + "HP:0010841 (Multifocal epileptiform discharges) 43 \n", + "HP:0010851 (EEG with burst suppression) 37 \n", + "HP:0003593 (Infantile onset) 58 \n", + "HP:0002069 (Bilateral tonic-clonic seizure) 43 \n", + "HP:0001250 (Seizure) 189 \n", + "HP:0010864 (Intellectual disability, severe) 50 \n", + "HP:0000750 (Delayed speech and language develop... 100 \n", + "HP:0002123 (Generalized myoclonic seizure) 23 \n", + "HP:0000733 (Motor stereotypy) 37 \n", + "HP:0007359 (Focal-onset seizure) 104 \n", + "\n", + " \\\n", + " Percent p-value \n", + "HP:0001251 (Ataxia) 30.99% 0.003282 \n", + "HP:0002376 (Developmental regression) 7.04% 0.016402 \n", + "HP:0011097 (Epileptic spasm) 47.37% 0.036073 \n", + "HP:0001252 (Hypotonia) 45.07% 0.036221 \n", + "HP:0003623 (Neonatal onset) 29.11% 0.087138 \n", + "HP:0012469 (Infantile spasms) 35.41% 0.105055 \n", + "HP:0002521 (Hypsarrhythmia) 29.72% 0.111264 \n", + "HP:0002266 (Focal clonic seizure) 15.31% 0.117909 \n", + "HP:0002540 (Inability to walk) 17.84% 0.167998 \n", + "HP:0031375 (Refractory) 11.27% 0.176970 \n", + "HP:0001337 (Tremor) 27.23% 0.232387 \n", + "HP:0002384 (Focal impaired awareness seizure) 13.40% 0.245817 \n", + "HP:0010845 (EEG with generalized slow activity) 8.96% 0.291850 \n", + "HP:0001249 (Intellectual disability) 47.42% 0.399834 \n", + "HP:0010818 (Generalized tonic seizure) 22.01% 0.413171 \n", + "HP:0001344 (Absent speech) 30.99% 0.428617 \n", + "HP:0001263 (Global developmental delay) 73.24% 0.469926 \n", + "HP:0011344 (Severe global developmental delay) 12.21% 0.496988 \n", + "HP:0011185 (EEG with focal epileptiform dischar... 40.57% 0.508379 \n", + "HP:0010841 (Multifocal epileptiform discharges) 20.28% 0.570302 \n", + "HP:0010851 (EEG with burst suppression) 17.45% 0.612906 \n", + "HP:0003593 (Infantile onset) 27.23% 0.671762 \n", + "HP:0002069 (Bilateral tonic-clonic seizure) 20.57% 0.731528 \n", + "HP:0001250 (Seizure) 90.43% 0.757152 \n", + "HP:0010864 (Intellectual disability, severe) 23.47% 0.824933 \n", + "HP:0000750 (Delayed speech and language develop... 46.95% 0.851618 \n", + "HP:0002123 (Generalized myoclonic seizure) 11.00% 1.000000 \n", + "HP:0000733 (Motor stereotypy) 17.37% 1.000000 \n", + "HP:0007359 (Focal-onset seizure) 49.76% 1.000000 \n", + "\n", + " \n", + " Corrected p-values \n", + "HP:0001251 (Ataxia) 0.095184 \n", + "HP:0002376 (Developmental regression) 0.237831 \n", + "HP:0011097 (Epileptic spasm) 0.262600 \n", + "HP:0001252 (Hypotonia) 0.262600 \n", + "HP:0003623 (Neonatal onset) 0.427422 \n", + "HP:0012469 (Infantile spasms) 0.427422 \n", + "HP:0002521 (Hypsarrhythmia) 0.427422 \n", + "HP:0002266 (Focal clonic seizure) 0.427422 \n", + "HP:0002540 (Inability to walk) 0.513213 \n", + "HP:0031375 (Refractory) 0.513213 \n", + "HP:0001337 (Tremor) 0.594057 \n", + "HP:0002384 (Focal impaired awareness seizure) 0.594057 \n", + "HP:0010845 (EEG with generalized slow activity) 0.651050 \n", + "HP:0001249 (Intellectual disability) 0.775947 \n", + "HP:0010818 (Generalized tonic seizure) 0.775947 \n", + "HP:0001344 (Absent speech) 0.775947 \n", + "HP:0001263 (Global developmental delay) 0.775947 \n", + "HP:0011344 (Severe global developmental delay) 0.775947 \n", + "HP:0011185 (EEG with focal epileptiform dischar... 0.775947 \n", + "HP:0010841 (Multifocal epileptiform discharges) 0.826938 \n", + "HP:0010851 (EEG with burst suppression) 0.846394 \n", + "HP:0003593 (Infantile onset) 0.885505 \n", + "HP:0002069 (Bilateral tonic-clonic seizure) 0.914891 \n", + "HP:0001250 (Seizure) 0.914891 \n", + "HP:0010864 (Intellectual disability, severe) 0.949882 \n", + "HP:0000750 (Delayed speech and language develop... 0.949882 \n", + "HP:0002123 (Generalized myoclonic seizure) 1.000000 \n", + "HP:0000733 (Motor stereotypy) 1.000000 \n", + "HP:0007359 (Focal-onset seizure) 1.000000 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analysis.compare_by_variant_type(VariantEffect.MISSENSE_VARIANT)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0981332", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "gpc2venv", + "display_name": "enviro", "language": "python", - "name": "gpc2venv" + "name": "enviro" }, "language_info": { "codemirror_mode": { @@ -158,7 +1431,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/src/genophenocorr/analysis/_analyzers.py b/src/genophenocorr/analysis/_analyzers.py index 82e6ebcd..f34b5e9f 100644 --- a/src/genophenocorr/analysis/_analyzers.py +++ b/src/genophenocorr/analysis/_analyzers.py @@ -242,7 +242,7 @@ def compare_by_variant_effect(self, effect: VariantEffect, tx_id: str): self._transcript = previous return result - def compare_by_variant_type(self, var_type1:VariantEffect, var_type2:VariantEffect = None): + def compare_by_variant_type(self, var_type1:VariantEffect, var_type2:typing.Optional[VariantEffect] = None): """Runs Fisher Exact analysis, finds any correlation between given variant effects across phenotypes. Args: @@ -309,7 +309,7 @@ def compare_by_variant(self, variant1:str, variant2:str = None): final_df.insert(5, ('', "Corrected p-values"), corrected_pvals, True) return final_df.sort_values([('', 'Corrected p-values'), ('', 'p-value')]) - def compare_by_exon(self, exon1:int, exon2:int = None): + def compare_by_exon(self, exon1:int, exon2:typing.Optional[int] = None): """Runs Fisher Exact analysis, finds any correlation between given exons across phenotypes. Args: @@ -341,7 +341,7 @@ def compare_by_exon(self, exon1:int, exon2:int = None): final_df.insert(5, ('', "Corrected p-values"), corrected_pvals, True) return final_df.sort_values([('', 'Corrected p-values'), ('', 'p-value')]) - def compare_by_protein_feature_type(self, feature1:FeatureType, feature2:FeatureType = None): + def compare_by_protein_feature_type(self, feature1:FeatureType, feature2:typing.Optional[FeatureType] = None): """Runs Fisher Exact analysis, finds any correlation between given feature type across phenotypes. Args: @@ -374,7 +374,7 @@ def compare_by_protein_feature_type(self, feature1:FeatureType, feature2:Feature return final_df.sort_values([('', 'Corrected p-values'), ('', 'p-value')]) - def compare_by_protein_feature(self, feature1:str, feature2:str = None): + def compare_by_protein_feature(self, feature1:str, feature2:typing.Optional[str] = None): """Runs Fisher Exact analysis, finds any correlation between given feature and phenotypes. Args: diff --git a/src/genophenocorr/preprocessing/_phenopacket.py b/src/genophenocorr/preprocessing/_phenopacket.py index 145ac693..ce904e1d 100644 --- a/src/genophenocorr/preprocessing/_phenopacket.py +++ b/src/genophenocorr/preprocessing/_phenopacket.py @@ -185,17 +185,18 @@ def _add_variants(self, sample_id: str, pp: Phenopacket) -> typing.Sequence[Vari for genomic_interp in interp.diagnosis.genomic_interpretations: vc, gt = self._coord_finder.find_coordinates(genomic_interp) if "N" in vc.alt: - self._logger.warning(f'Patient {pp.id} has unknown alternative variant {vc.alt}, this variant will not be included.') + self._logger.warning('Patient %s has unknown alternative variant %s, this variant will not be included.', pp.id, vc.variant_key) continue tx_annotations = self._func_ann.annotate(vc) if tx_annotations is None: - raise ValueError(f"Patient {pp.id} has an error with variant {vc.variant_key}, examine logs for more details.") + self._logger.error("Patient %s has an error with variant %s, this variant will not be included.", pp.id, vc.variant_key) + continue genotype = Genotypes.single(sample_id, gt) variant = Variant(vc, tx_annotations, genotype) variants_list.append(variant) if len(variants_list) == 0: - self._logger.warning(f'Expected at least one variant per patient, but received none for patient {pp.id}') + self._logger.warning('Expected at least one variant per patient, but received none for patient %s', pp.id) return variants_list def _add_phenotypes(self, pp: Phenopacket) -> typing.Sequence[Phenotype]: diff --git a/src/genophenocorr/preprocessing/_vep.py b/src/genophenocorr/preprocessing/_vep.py index a36466a6..cabefc55 100644 --- a/src/genophenocorr/preprocessing/_vep.py +++ b/src/genophenocorr/preprocessing/_vep.py @@ -85,9 +85,8 @@ def annotate(self, variant_coordinates: VariantCoordinates) -> typing.Sequence[T self._logging.error('VEP did not finish successfully.') return None if 'transcript_consequences' not in response: - raise ValueError( - f'The VEP response lacked the required `transcript_consequences` field') - + self._logging.error('The VEP response lacked the required `transcript_consequences` field. %s', response) + return None for trans in response['transcript_consequences']: annotation = self._process_item(trans) if annotation is not None: