From 095c77fa9334faff35979c5add95cd5e1d3cae65 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Tue, 10 Oct 2023 17:14:46 -0400 Subject: [PATCH 1/9] removed "No HPO Terms" Error and created exluded --- ...tinez_PMID_36446582_RunGenoPhenoCorr.ipynb | 2780 +++-------------- src/genophenocorr/model/_cohort.py | 23 +- .../preprocessing/_phenopacket.py | 7 +- src/genophenocorr/view/_cohort.py | 54 +- 4 files changed, 462 insertions(+), 2402 deletions(-) diff --git a/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb b/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb index cf9e5520..c558d963 100644 --- a/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb +++ b/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb @@ -85,19 +85,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P2\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P12\n", - "Expected at least one HPO term per patient, but received none for patient Reuter2020\n", - "Expected at least one HPO term per patient, but received none for patient Novara, 2017_P10\n", "Expected at least one variant per patient, but received none for patient Parenti2016_P1\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P13\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P8\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P4\n", "Expected at least one variant per patient, but received none for patient Low, 2016_P7 (8)\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P5\n", - "Expected at least one HPO term per patient, but received none for patient KBG31B\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P9\n", - "Expected at least one HPO term per patient, but received none for patient VanDongen2019_P7\n", "Expected at least one variant per patient, but received none for patient KBG42\n" ] } @@ -109,1444 +98,462 @@ { "cell_type": "code", "execution_count": 8, - "id": "c39889b9", + "id": "e6c58342", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import HTML, display\n", + "from genophenocorr.view import CohortViewer\n", + "\n", + "viewer = CohortViewer(hpo)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8c40d2fb", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ItemDescription
Description of the cohort. 11 individuals were removed from the cohort because they had no HPO terms.
Total Individuals340
Excluded Individuals11: VanDongen2019_P5;Reuter2020;KBG31B;VanDongen2019_P9;VanDongen2019_P2;VanDongen2019_P4;VanDongen2019_P8;VanDongen2019_P12;VanDongen2019_P7;VanDongen2019_P13;Novara, 2017_P10
Total Unique HPO Terms28
Total Unique Variants326
" + ], "text/plain": [ - "[('HP:0006482', 224),\n", - " ('HP:0011446', 220),\n", - " ('HP:0001249', 194),\n", - " ('HP:0001155', 189),\n", - " ('HP:0012758', 176),\n", - " ('HP:0004322', 150),\n", - " ('HP:0010938', 134),\n", - " ('HP:0000534', 126),\n", - " ('HP:0000343', 121),\n", - " ('HP:0000365', 97),\n", - " ('HP:0000325', 83),\n", - " ('HP:0000356', 77),\n", - " ('HP:0007018', 61),\n", - " ('HP:0000729', 56)]" + "" ] }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "patientCohort.list_all_phenotypes() ## Add Labels to output" + "display(HTML(viewer.cohort_summary_table(patientCohort)))" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "f294ca99", + "execution_count": 10, + "id": "c39889b9", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
HPO TermCount
Counts of annotations to HPO terms for the 340 in the cohort
Abnormality of dental morphology (HP:0006482)224
Abnormality of higher mental function (HP:0011446)220
Intellectual disability (HP:0001249)194
Abnormality of the hand (HP:0001155)189
Neurodevelopmental delay (HP:0012758)176
Short stature (HP:0004322)150
Abnormal external nose morphology (HP:0010938)134
Abnormal eyebrow morphology (HP:0000534)126
Long philtrum (HP:0000343)121
Hearing impairment (HP:0000365)97
Triangular face (HP:0000325)83
Abnormality of the outer ear (HP:0000356)77
Attention deficit hyperactivity disorder (HP:0007018)61
Autistic behavior (HP:0000729)56
" + ], "text/plain": [ - "[('16_89284634_89284639_GTGTTT_G', 33),\n", - " ('16_89284129_89284134_CTTTTT_C', 10),\n", - " ('16_89284140_89284144_TTTTC_T', 9),\n", - " ('16_89285157_89285161_GTTTC_G', 8),\n", - " ('16_89279749_89279750_C_CG', 5),\n", - " ('16_89275180_89275181_A_AG', 5),\n", - " ('16_89182742_89309778_DEL', 4),\n", - " ('16_89284565_89284565_G_C', 3),\n", - " ('16_89274958_89274958_C_G', 3),\n", - " ('16_89284345_89284345_G_A', 3),\n", - " ('16_89283314_89283318_CCTTT_C', 3),\n", - " ('16_89282136_89282136_C_T', 3),\n", - " ('16_89284363_89284367_CTTTG_C', 3),\n", - " ('16_89282710_89282710_T_A', 3),\n", - " ('16_89284524_89373231_DUP', 3),\n", - " ('16_89284358_89284360_GAT_G', 3),\n", - " ('16_89285171_89285175_CTTCT_C', 2),\n", - " ('16_89284209_89284213_TTCTC_T', 2),\n", - " ('16_89283496_89283497_CG_C', 2),\n", - " ('16_89280752_89280752_G_T', 2),\n", - " ('16_89283232_89283233_C_CT', 2),\n", - " ('16_89285153_89285157_TTTTG_T', 2),\n", - " ('16_89282157_89282158_C_CT', 2),\n", - " ('16_89282455_89282455_G_A', 2),\n", - " ('16_89268636_89268636_C_A', 2),\n", - " ('16_89275128_89275128_G_A', 2),\n", - " ('16_89279326_89279326_G_A', 2),\n", - " ('16_89262070_89410643_DEL', 2),\n", - " ('16_89280028_89280029_C_CG', 2),\n", - " ('16_89282834_89282838_CTGTT_C', 2),\n", - " ('16_89321706_89475518_DEL', 2),\n", - " ('16_89095277_89438698_DEL', 2),\n", - " ('16_89281054_89281054_C_A', 2),\n", - " ('16_88197356_89297194_DEL', 2),\n", - " ('16_89285224_89285224_G_A', 2),\n", - " ('16_89282947_89282952_CTTTTT_C', 2),\n", - " ('16_89283319_89283320_C_CT', 1),\n", - " ('16_87766621_89492922_DEL', 1),\n", - " ('16_89277486_89517986_DEL', 1),\n", - " ('16_89285422_89285422_C_A', 1),\n", - " ('16_87618421_89440922_DEL', 1),\n", - " ('16_87306530_89269020_DEL', 1),\n", - " ('16_89284169_89284170_CT_C', 1),\n", - " ('16_89194992_89352723_DEL', 1),\n", - " ('16_89284174_89284175_CT_C', 1),\n", - " ('16_89282012_89282014_CGG_C', 1),\n", - " ('16_88489784_89491503_DEL', 1),\n", - " ('16_89280583_89280585_ATC_A', 1),\n", - " ('16_89283349_89283349_T_A', 1),\n", - " ('16_89285626_89285627_GT_G', 1),\n", - " ('16_89282255_89282259_ATCTT_A', 1),\n", - " ('16_89283231_89283232_T_TC', 1),\n", - " ('16_89481148_89489612_DEL', 1),\n", - " ('16_88564200_89541334_DEL', 1),\n", - " ('16_89190071_89439815_DEL', 1),\n", - " ('16_89281983_89281984_TC_T', 1),\n", - " ('16_87464659_89530534_DEL', 1),\n", - " ('16_89282012_89282013_C_CG', 1),\n", - " ('16_89228900_89593971_DEL', 1),\n", - " ('16_89283207_89283208_CT_C', 1),\n", - " ('16_89282433_89282435_TTC_T', 1),\n", - " ('16_89277486_89499248_DEL', 1),\n", - " ('16_89270870_89270870_G_A', 1),\n", - " ('16_89279353_89279353_G_A', 1),\n", - " ('16_89056332_89434622_DEL', 1),\n", - " ('16_89277486_89489140_DEL', 1),\n", - " ('16_88575401_89265641_DEL', 1),\n", - " ('16_89280355_89280355_C_A', 1),\n", - " ('16_89280829_89280830_C_CA', 1),\n", - " ('16_88788350_89511297_DEL', 1),\n", - " ('16_89279566_89279574_CCTTCGGGG_C', 1),\n", - " ('16_89368851_89487299_DEL', 1),\n", - " ('16_89283158_89283160_GTC_G', 1),\n", - " ('16_89217282_89536982_DEL', 1),\n", - " ('16_89280457_89280471_CGGGCAGAGCGTACG_C', 1),\n", - " ('16_89269900_89287677_DEL', 1),\n", - " ('16_89284411_89284412_GC_G', 1),\n", - " ('16_88197155_89297334_DEL', 1),\n", - " ('16_89305431_89541006_DEL', 1),\n", - " ('16_89283895_89283895_C_A', 1),\n", - " ('16_89283342_89283344_TTA_T', 1),\n", - " ('16_89283948_89283949_T_TA', 1),\n", - " ('16_89283850_89283850_G_A', 1),\n", - " ('16_89283203_89283203_C_T', 1),\n", - " ('16_89282770_89282772_CTT_C', 1),\n", - " ('16_89286064_89286064_G_C', 1),\n", - " ('16_89441369_89499248_DEL', 1),\n", - " ('16_89269021_89492781_DEL', 1),\n", - " ('16_89283775_89283777_GCT_G', 1),\n", - " ('16_89284648_89284649_G_GT', 1),\n", - " ('16_89409760_89418313_DEL', 1),\n", - " ('16_89395176_89492781_DEL', 1),\n", - " ('16_89305204_89305221_ACCTGTGTCCGAGTCCTT_A', 1),\n", - " ('16_89283523_89283523_G_A', 1),\n", - " ('16_89283361_89283362_C_CT', 1),\n", - " ('16_89283080_89283105_TTCCTCCTTCTCCTGGAGGCCGTCCG_T', 1),\n", - " ('16_89249291_89481685_DEL', 1),\n", - " ('16_88197485_89321695_DEL', 1),\n", - " ('16_87886395_88066394_DEL', 1),\n", - " ('16_88197356_89317078_DEL', 1),\n", - " ('16_89281336_89281337_CG_C', 1),\n", - " ('16_89284601_89284602_GG_A', 1),\n", - " ('16_89286064_89286065_G_GT', 1),\n", - " ('16_89280652_89280653_CG_C', 1),\n", - " ('16_89290674_89290678_GATGC_G', 1),\n", - " ('16_89284741_89284741_G_A', 1),\n", - " ('16_89363269_89492781_DEL', 1),\n", - " ('16_89286099_89286100_TG_T', 1),\n", - " ('16_89282336_89282336_G_C', 1),\n", - " ('16_89285152_89285153_C_CT', 1),\n", - " ('16_89283712_89283714_ACT_A', 1),\n", - " ('16_89282653_89282654_T_TG', 1),\n", - " ('16_88697053_89277641_DEL', 1),\n", - " ('16_89279542_89279542_G_A', 1),\n", - " ('16_89282611_89282611_G_A', 1),\n", - " ('16_89279119_89279131_AGCCCGTGTAGGT_A', 1),\n", - " ('16_89284696_89284696_C_A', 1),\n", - " ('16_86647052_89511661_DEL', 1),\n", - " ('16_89283790_89283791_C_CA', 1),\n", - " ('16_89281314_89281343_TGCGAGTCGGCGCAGTCGAACACGAGGTCC_T', 1),\n", - " ('16_89280174_89280178_GCGTC_G', 1),\n", - " ('16_89284030_89284030_G_A', 1),\n", - " ('16_89274954_89274957_TCTC_T', 1),\n", - " ('16_89281397_89281397_G_C', 1),\n", - " ('16_89283094_89283094_G_A', 1),\n", - " ('16_89283332_89283334_ATG_A', 1),\n", - " ('16_89275110_89275110_G_A', 1),\n", - " ('16_89290706_89290706_G_A', 1),\n", - " ('16_89274920_89274920_C_T', 1),\n", - " ('16_89284129_89284130_CT_C', 1),\n", - " ('16_89282947_89282951_CTTTT_C', 1),\n", - " ('16_89282324_89282324_G_T', 1),\n", - " ('16_89279178_89279179_AG_A', 1),\n", - " ('16_89282959_89282960_TC_T', 1),\n", - " ('16_89279914_89279914_C_A', 1),\n", - " ('16_89275127_89275127_C_T', 1),\n", - " ('16_88743576_89406219_DEL', 1),\n", - " ('16_89217282_89512722_DEL', 1),\n", - " ('16_89274920_89274920_C_G', 1),\n", - " ('16_89279070_89279070_A_G', 1),\n", - " ('16_89282635_89282637_CCT_C', 1),\n", - " ('16_89281302_89281304_GGC_G', 1),\n", - " ('16_89279850_89279851_G_GC', 1),\n", - " ('16_89206685_89510638_DEL', 1),\n", - " ('16_89458996_89487166_DEL', 1),\n", - " ('16_89269019_89305395_DEL', 1),\n", - " ('16_89284810_89284811_C_CA', 1),\n", - " ('16_89284241_89284245_CTTCT_C', 1),\n", - " ('16_89281756_89281756_C_A', 1),\n", - " ('16_89283415_89283419_GGATT_G', 1),\n", - " ('16_89283891_89283892_TC_T', 1),\n", - " ('16_89281419_89281419_G_T', 1),\n", - " ('16_89284779_89284779_G_T', 1),\n", - " ('16_89280131_89280133_GGA_G', 1),\n", - " ('16_89285369_89285369_G_C', 1),\n", - " ('16_89284147_89284147_T_A', 1),\n", - " ('16_89280070_89280070_C_A', 1),\n", - " ('16_89293294_89312898_DEL', 1),\n", - " ('16_89279185_89279186_T_TG', 1),\n", - " ('16_89280526_89280527_C_CT', 1),\n", - " ('16_89279362_89279362_G_A', 1),\n", - " ('16_88599770_89406219_DEL', 1),\n", - " ('16_89228900_89492781_DEL', 1),\n", - " ('16_89279126_89279126_G_C', 1),\n", - " ('16_89285078_89285082_ACTCT_A', 1),\n", - " ('16_89281576_89281578_CTT_C', 1),\n", - " ('16_89268821_89406360_DEL', 1),\n", - " ('16_89217282_89363327_DEL', 1),\n", - " ('16_89279859_89279860_TC_T', 1),\n", - " ('16_89266046_89305443_DEL', 1),\n", - " ('16_88788350_89454555_DEL', 1),\n", - " ('16_89279840_89279841_AT_A', 1),\n", - " ('16_89283304_89541333_DEL', 1),\n", - " ('16_89282437_89282439_CTT_C', 1),\n", - " ('16_89285255_89285257_CGA_C', 1),\n", - " ('16_89285085_89285085_G_C', 1),\n", - " ('16_89283388_89283389_AT_A', 1),\n", - " ('16_89279350_89279350_G_A', 1),\n", - " ('16_89280484_89280489_CGGGAG_C', 1),\n", - " ('16_89171713_89274753_DEL', 1),\n", - " ('16_89280587_89280589_CTG_C', 1),\n", - " ('16_89282766_89282768_TTC_T', 1),\n", - " ('16_89279776_89279776_G_A', 1),\n", - " ('16_88568593_89562542_DEL', 1),\n", - " ('16_89282166_89282168_TTC_T', 1),\n", - " ('16_89258980_89342739_DEL', 1),\n", - " ('16_89275192_89275192_C_G', 1),\n", - " ('16_87921246_89417758_DEL', 1),\n", - " ('16_89279708_89279725_AGTGTTCGGGGCGGGGCC_A', 1),\n", - " ('16_89284236_89284237_GA_G', 1),\n", - " ('16_87892207_89455452_DEL', 1),\n", - " ('16_89281111_89281116_TGAAGA_T', 1),\n", - " ('16_89282151_89282153_TTC_T', 1),\n", - " ('16_89282352_89282365_AGGAAGTCCTTTTC_A', 1),\n", - " ('16_89195407_89489612_DUP', 1),\n", - " ('16_88688905_89518004_DEL', 1),\n", - " ('16_89282149_89282151_GTT_G', 1),\n", - " ('16_89280202_89280202_G_A', 1),\n", - " ('16_89223569_89414599_DEL', 1),\n", - " ('16_89283676_89283676_C_A', 1),\n", - " ('16_89282371_89282371_G_A', 1),\n", - " ('16_89284818_89284831_CTTGTCAGTCTCGT_C', 1),\n", - " ('16_87468556_89622209_DUP', 1),\n", - " ('16_89476288_89589843_DUP', 1),\n", - " ('16_89284756_89284756_GC_AA', 1),\n", - " ('16_89281380_89281380_G_A', 1),\n", - " ('16_89217282_89506042_DEL', 1),\n", - " ('16_89290721_89290721_C_A', 1),\n", - " ('16_89279135_89279135_G_C', 1),\n", - " ('16_89409044_89585740_DEL', 1),\n", - " ('16_89283559_89527445_DEL', 1),\n", - " ('16_89281396_89281396_C_A', 1),\n", - " ('16_89282770_89282771_C_CT', 1),\n", - " ('16_89284445_89284450_TTTCTC_T', 1),\n", - " ('16_89283495_89283496_TC_T', 1),\n", - " ('16_89279458_89279459_TG_T', 1),\n", - " ('16_89283082_89283082_C_A', 1),\n", - " ('16_89283319_89283321_CTT_C', 1),\n", - " ('16_89275191_89275191_T_G', 1),\n", - " ('16_89282044_89282044_G_A', 1),\n", - " ('16_89282567_89282568_GA_G', 1),\n", - " ('16_87150056_89454395_DEL', 1),\n", - " ('16_88555247_89317078_DEL', 1),\n", - " ('16_89414599_89516032_DEL', 1),\n", - " ('16_89283245_89283247_GAA_G', 1),\n", - " ('16_89206685_89472351_DEL', 1),\n", - " ('16_89280357_89280358_AG_A', 1),\n", - " ('16_89490211_89490596_DEL', 1),\n", - " ('16_89282134_89282134_T_A', 1),\n", - " ('16_89133200_89406219_DEL', 1),\n", - " ('16_89281267_89281268_A_AG', 1),\n", - " ('16_89277486_89431539_DEL', 1)]" + "" ] }, - "execution_count": 9, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "patientCohort.list_all_variants()" + "display(HTML(viewer.hpo_term_counts_table(patientCohort))) ## Add Labels to output" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "8136e3b2", + "execution_count": 11, + "id": "f294ca99", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[WARN] could not identify a single variant for target transcript (got 0), variant 16_87886395_88066394_DEL\n", + "c.1903_1907del - 32\n", + "c.2408_2412del - 10\n", + "c.1381_1384del - 8\n", + "c.2398_2401del - 8\n", + "c.7481_7482insC - 5\n", + "c.6792_6793insC - 5\n", + "c.2197C>T - 3\n", + "c.4406G>A - 3\n", + "c.1977C>G - 3\n", + "c.3224_3227del - 3\n", + "c.2175_2178del - 3\n", + "c.3832A>T - 3\n", + "c.2182_2183del - 3\n", + "c.7570-1G>C - 3\n", + "c.1367_1370del - 2\n", + "c.3045del - 2\n", + "c.3590_3594del - 2\n", + "c.2329_2332del - 2\n", + "c.4384_4385insA - 2\n", + "c.3309_3310insA - 2\n", + "c.5790C>A - 2\n", + "c.6513_6514insC - 2\n", + "c.1385_1388del - 2\n", + "c.7216C>T - 2\n", + "c.1318C>T - 2\n", + "c.5488G>T - 2\n", + "c.7534C>T - 2\n", + "c.3704_3707del - 2\n", + "c.4087C>T - 2\n", + "c.2512C>T - 1\n", + "c.4786G>T - 1\n", + "c.3931C>T - 1\n", + "c.548_551del - 1\n", + "NA - 1\n", + "c.3208_3209del - 1\n", + "c.2647G>T - 1\n", + "c.3974del - 1\n", + "c.7411_7422del - 1\n", + "c.1460_1463del - 1\n", + "c.2395A>T - 1\n", + "c.7552C>T - 1\n", + "c.4389_4390del - 1\n", + "c.6472G>T - 1\n", + "c.7470+2T>C - 1\n", + "c.915del - 1\n", + "c.4529_4530insC - 1\n", + "c.4103_4104del - 1\n", + "c.5889del - 1\n", + "c.5146G>T - 1\n", + "c.3888_3889insC - 1\n", + "c.7180C>T - 1\n", + "c.6184del - 1\n", + "c.3382_3383del - 1\n", + "c.4408A>T - 1\n", + "c.3771_3772insA - 1\n", + "c.4218C>A - 1\n", + "c.7000C>T - 1\n", + "c.1893_1894insA - 1\n", + "c.7471A>C - 1\n", + "c.6766C>T - 1\n", + "c.3339G>A - 1\n", + "c.3437_3461del - 1\n", + "c.5205del - 1\n", + "c.4206C>G - 1\n", + "c.2828_2829del - 1\n", + "c.3905_3906del - 1\n", + "c.1389_1390insA - 1\n", + "c.4964_4965del - 1\n", + "c.7192C>T - 1\n", + "c.3180_3181insA - 1\n", + "c.3046del - 1\n", + "c.6053_6057del - 1\n", + "c.6340C>T - 1\n", + "c.1763C>A - 1\n", + "c.3582del - 1\n", + "c.7834G>T - 1\n", + "c.5274_5275insC - 1\n", + "c.2765_2766del - 1\n", + "c.3774_3775del - 1\n", + "c.3221_3222del - 1\n", + "c.6409_6410del - 1\n", + "c.5123C>A - 1\n", + "c.6364_6367del - 1\n", + "c.831del - 1\n", + "c.3334del - 1\n", + "c.6691_6692insG - 1\n", + "c.3193A>T - 1\n", + "c.3591_3594del - 1\n", + "c.1786delinsTT - 1\n", + "c.7083del - 1\n", + "c.5957_5958del - 1\n", + "c.7535G>A - 1\n", + "c.211_226+1del - 1\n", + "c.6682del - 1\n", + "c.1285_1286del - 1\n", + "c.1940_1941delinsT - 1\n", + "c.2367del - 1\n", + "c.4374_4375del - 1\n", + "c.1731_1732insT - 1\n", + "c.7471-1G>C - 1\n", + "c.3153del - 1\n", + "c.7356_7357insC - 1\n", + "c.3460G>T - 1\n", + "c.2297_2300del - 1\n", + "c.7363del - 1\n", + "c.4498C>T - 1\n", + "c.4107_4108del - 1\n", + "c.2593_2594insT - 1\n", + "c.5712_5713insT - 1\n", + "c.1711_1723del - 1\n", + "c.2866G>T - 1\n", + "c.4558del - 1\n", + "c.2412del - 1\n", + "c.2692C>T - 1\n", + "c.6817_6833del - 1\n", + "c.5426_5430del - 1\n", + "c.867C>G - 1\n", + "c.866_867insA - 1\n", + "c.5199_5227del - 1\n", + "c.4171C>T - 1\n", + "c.505G>T - 1\n", + "c.4391_4392del - 1\n", + "c.7753C>T - 1\n", + "c.1846G>T - 1\n", + "c.5145C>G - 1\n", + "c.7189C>T - 1\n", + "c.7407C>G - 1\n", + "c.3310_3311insG - 1\n", + "c.2130del - 1\n", + "c.3295_3296del - 1\n", + "c.7570_7572del - 1\n", + "c.6071_6084del - 1\n", + "c.2650del - 1\n", + "c.3448C>T - 1\n", + "c.1173C>G - 1\n", + "c.5953_5954del - 1\n", + "c.3198_3199del - 1\n", + "c.6187G>T - 1\n", + "c.3123_3126del - 1\n", + "16_87886395_88066394_DEL - 1\n", + "c.1801C>T - 1\n", + "c.1457C>G - 1\n", + "c.6015_6016insA - 1\n", + "c.3019C>T - 1\n", + "c.520C>T - 1\n", + "c.7607G>A - 1\n", + "c.2305del - 1\n", + "c.6628G>T - 1\n", + "c.6701del - 1\n", + "c.2751_2752insT - 1\n", + "c.4528_4529del - 1\n", + "c.6968_6975del - 1\n", + "c.3770_3771del - 1\n", + "c.7416C>G - 1\n", + "c.4177_4189del - 1\n", + "c.4283_4286del - 1\n", + "c.1120G>T - 1\n", + "c.3222_3223insA - 1\n" + ] + }, { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
VariantEffectCountKey
c.1903_1907delFRAMESHIFT_VARIANT3216_89284634_89284639_GTGTTT_G
c.2408_2412delFRAMESHIFT_VARIANT1016_89284129_89284134_CTTTTT_C
c.1381_1384delFRAMESHIFT_VARIANT816_89285157_89285161_GTTTC_G
c.2398_2401delFRAMESHIFT_VARIANT816_89284140_89284144_TTTTC_T
c.7481_7482insCFRAMESHIFT_VARIANT516_89275180_89275181_A_AG
c.6792_6793insCFRAMESHIFT_VARIANT516_89279749_89279750_C_CG
c.2197C>TSTOP_GAINED316_89284345_89284345_G_A
c.4406G>ASTOP_GAINED316_89282136_89282136_C_T
c.1977C>GSTOP_GAINED316_89284565_89284565_G_C
c.3224_3227delFRAMESHIFT_VARIANT316_89283314_89283318_CCTTT_C
c.2175_2178delFRAMESHIFT_VARIANT316_89284363_89284367_CTTTG_C
c.3832A>TSTOP_GAINED316_89282710_89282710_T_A
c.2182_2183delFRAMESHIFT_VARIANT316_89284358_89284360_GAT_G
c.7570-1G>CSPLICE_ACCEPTOR_VARIANT316_89274958_89274958_C_G
c.1367_1370delFRAMESHIFT_VARIANT216_89285171_89285175_CTTCT_C
c.3045delFRAMESHIFT_VARIANT216_89283496_89283497_CG_C
c.3590_3594delFRAMESHIFT_VARIANT216_89282947_89282952_CTTTTT_C
c.2329_2332delFRAMESHIFT_VARIANT216_89284209_89284213_TTCTC_T
c.4384_4385insAFRAMESHIFT_VARIANT216_89282157_89282158_C_CT
c.3309_3310insAFRAMESHIFT_VARIANT216_89283232_89283233_C_CT
c.5790C>ASTOP_GAINED216_89280752_89280752_G_T
c.6513_6514insCFRAMESHIFT_VARIANT216_89280028_89280029_C_CG
c.1385_1388delFRAMESHIFT_VARIANT216_89285153_89285157_TTTTG_T
c.7216C>TSTOP_GAINED216_89279326_89279326_G_A
c.1318C>TSTOP_GAINED216_89285224_89285224_G_A
c.5488G>TSTOP_GAINED216_89281054_89281054_C_A
c.7534C>TMISSENSE_VARIANT216_89275128_89275128_G_A
c.3704_3707delFRAMESHIFT_VARIANT216_89282834_89282838_CTGTT_C
c.4087C>TSTOP_GAINED216_89282455_89282455_G_A
\n", + "

Additionally, the following variants were observed 1 or fewer times: \n", + "c.3019C>T; c.7083del; c.2367del; c.4206C>G; c.3123_3126del; c.2650del; c.7192C>T; c.3974del; c.1711_1723del; c.1940_1941delinsT; c.3208_3209del; c.2866G>T; c.7471-1G>C; c.5274_5275insC; c.7356_7357insC; c.2593_2594insT; c.3460G>T; c.6184del; c.3339G>A; c.3193A>T; c.2130del; c.2297_2300del; c.4558del; c.4103_4104del; c.2647G>T; c.6682del; c.3046del; c.2765_2766del; c.866_867insA; c.6053_6057del; c.7000C>T; c.6409_6410del; c.1460_1463del; c.4498C>T; c.7416C>G; c.6187G>T; c.1846G>T; c.6472G>T; c.3198_3199del; c.7470+2T>C; c.4218C>A; c.7753C>T; c.3437_3461del; c.3382_3383del; c.3310_3311insG; c.7407C>G; c.4964_4965del; c.6766C>T; c.4283_4286del; NA; c.211_226+1del; c.5145C>G; 16_87886395_88066394_DEL; c.3770_3771del; c.1173C>G; c.3180_3181insA; c.6968_6975del; c.7363del; c.2828_2829del; c.3888_3889insC; c.4389_4390del; c.1285_1286del; c.2305del; c.4374_4375del; c.867C>G; c.6340C>T; c.6015_6016insA; c.1763C>A; c.5953_5954del; c.2692C>T; c.505G>T; c.2412del; c.7471A>C; c.6071_6084del; c.5205del; c.4107_4108del; c.6691_6692insG; c.4177_4189del; c.520C>T; c.4171C>T; c.1801C>T; c.4408A>T; c.5199_5227del; c.1731_1732insT; c.3334del; c.7834G>T; c.3222_3223insA; c.7570_7572del; c.3591_3594del; c.5146G>T; c.3295_3296del; c.3221_3222del; c.6701del; c.4391_4392del; c.7607G>A; c.6817_6833del; c.6628G>T; c.1786delinsTT; c.5889del; c.4529_4530insC; c.3931C>T; c.3905_3906del; c.1389_1390insA; c.2751_2752insT; c.3774_3775del; c.7535G>A; c.5712_5713insT; c.7180C>T; c.548_551del; c.2395A>T; c.6364_6367del; c.1457C>G; c.2512C>T; c.1120G>T; c.7189C>T; c.3582del; c.3153del; c.7552C>T; c.4528_4529del; c.5123C>A; c.915del; c.5426_5430del; c.3771_3772insA; c.831del; c.4786G>T; c.5957_5958del; c.3448C>T; c.7411_7422del; c.1893_1894insA.

\n", + "

Use the entry in the \"Key\" column to investigate whether specific variants display genotype-phenotype correlations

" + ], "text/plain": [ - "['Gnazzo, 2020_P8',\n", - " 'Low, 2016_P24 (21)',\n", - " 'Scarano, 2013_P5',\n", - " 'Goldenberg2016_P32',\n", - " 'Goldenberg2016_P17',\n", - " 'Kutkowska-Kazmierczak2021_P16',\n", - " 'KBG17',\n", - " 'Walz2015_Pf',\n", - " 'Sirmaci2011_P1/F1? (previously published Tekin, 2004)',\n", - " 'Gnazzo, 2020_P10',\n", - " 'Goldenberg2016_P13',\n", - " 'Goldenberg2016_P14',\n", - " 'Willemsen2010_P4',\n", - " 'KBG58',\n", - " 'Gnazzo, 2020_P31',\n", - " 'VanDongen2019_P2',\n", - " 'Parenti2021_P12',\n", - " 'KBG65',\n", - " 'Reuter2020',\n", - " 'Novara, 2017_P2',\n", - " 'KBG6',\n", - " 'Ockeloen2015_P18',\n", - " 'VanDongen2019_P8',\n", - " 'Low, 2016_P2 (26)',\n", - " 'KBG26',\n", - " 'Gnazzo, 2020_P30',\n", - " 'Kutkowska-Kazmierczak2021_P20',\n", - " 'KBG56',\n", - " 'DeBernardi2018',\n", - " 'Goldenberg2016_P16',\n", - " 'KBG8B',\n", - " 'KBG43',\n", - " 'Parenti2021_P14',\n", - " 'Parenti2021_P23',\n", - " 'Novara, 2017_P5',\n", - " 'Scarano, 2013_P3',\n", - " 'KBG39',\n", - " 'KBG59',\n", - " 'KBG16',\n", - " 'Parenti2021_P1',\n", - " 'KBG35',\n", - " 'Walz2015_PD',\n", - " 'Jin Kim, 2020_P2',\n", - " 'Libianto2019',\n", - " 'Low, 2016_P6 (7)',\n", - " 'Kutkowska-Kazmierczak2021_P21',\n", - " 'Novara, 2017_P8',\n", - " 'KBG2',\n", - " 'Parenti2016_P2',\n", - " 'Kutkowska-Kazmierczak2021_P4',\n", - " 'Low, 2016_P13 (27)',\n", - " 'Willemsen2010_P3',\n", - " 'KBG28',\n", - " 'KBG51',\n", - " 'Sacharow, 2012_P2',\n", - " 'Goldenberg2016_P35',\n", - " 'KBG40',\n", - " 'Ockeloen2015_P8',\n", - " 'KBG11',\n", - " 'Gnazzo, 2020_P17',\n", - " 'Low, 2016_P11 (20)',\n", - " 'Isrie, 2012_P1',\n", - " 'Sayed, 2020_P2',\n", - " 'Scarano, 2013_P8',\n", - " 'Parenti2021_P21',\n", - " 'KBG18',\n", - " 'VanDongen2019_P1',\n", - " 'Murray, 2017_P11 (8.1.)',\n", - " 'Murray, 2017_P9 (5.1.)',\n", - " 'Parenti2021_P3',\n", - " 'Gnazzo, 2020_P2',\n", - " 'Kutkowska-Kazmierczak2021_P5',\n", - " 'Murray, 2017_P16 (13.1)',\n", - " 'Sirmaci2011_P2/F1? (previously published Tekin, 2004)',\n", - " 'KBG22',\n", - " 'Murray, 2017_P1 (1.1)',\n", - " 'Scarano, 2013_P1',\n", - " 'Sirmaci2011_P3',\n", - " 'Willemsen2010_P1',\n", - " 'Gnazzo, 2020_P7',\n", - " 'Parenti2021_P7',\n", - " 'Low, 2016_P3 (4)',\n", - " 'Murray, 2017_P3 (1.3)',\n", - " 'Parenti2021_P20',\n", - " 'Kleyner, 2016',\n", - " 'Miyatake, 2017_P2',\n", - " 'KBG49',\n", - " 'Novara, 2017_P12',\n", - " 'Low, 2016_33 (31)',\n", - " 'Goldenberg2016_P31',\n", - " 'Goldenberg2016_P36',\n", - " 'Gnazzo, 2020_P28',\n", - " 'Ockeloen2015_P9',\n", - " 'Kutkowska-Kazmierczak2021_P23',\n", - " 'VanDongen2019_P3',\n", - " 'Parenti2021_P9',\n", - " 'VanDongen2019_P9',\n", - " 'Isrie, 2012_P2',\n", - " 'Gnazzo, 2020_P9',\n", - " 'Scarano, 2013_P10',\n", - " 'Goldenberg2016_P11',\n", - " 'Parenti2021_P22',\n", - " 'KBG63',\n", - " 'Miyatake, 2013',\n", - " 'Miyatake, 2017_P3',\n", - " 'Goldenberg2016_P27',\n", - " 'KBG20',\n", - " 'Kutkowska-Kazmierczak2021_P18',\n", - " 'KBG66',\n", - " 'Gnazzo, 2020_P23',\n", - " 'Low, 2016_P9 (1)',\n", - " 'KBG5',\n", - " 'Parenti2021_P11',\n", - " 'KBG10B',\n", - " 'KBG9',\n", - " 'KBG55',\n", - " 'KBG38',\n", - " 'Low, 2016_P20 (14)',\n", - " 'Gnazzo, 2020_P1',\n", - " 'Low, 2016_P14 (2)',\n", - " 'Murray, 2017_P6 (3.2)',\n", - " 'Behnert, 2018',\n", - " 'KBG15',\n", - " 'Low, 2016_31 (29)',\n", - " 'Goldenberg2016_P24',\n", - " 'VanDongen2019_P6',\n", - " 'Ockeloen2015_P15',\n", - " 'Gnazzo, 2020_P5',\n", - " 'Sacharow, 2012_P1',\n", - " 'Sirmaci2011_P5',\n", - " 'Parenti2021_P5',\n", - " 'KBG1',\n", - " 'Gnazzo, 2020_P3',\n", - " 'Goldenberg2016_P6',\n", - " 'Walz2015_PC',\n", - " 'Parenti2021_P4',\n", - " 'Mattei2021',\n", - " 'KBG31A',\n", - " 'Gnazzo, 2020_P26',\n", - " 'Low, 2016_P12 (13)',\n", - " 'Novara, 2017_P7',\n", - " 'Spengler, 2013',\n", - " 'Parenti2021_P17',\n", - " 'Ockeloen2015_P4',\n", - " 'KBG12',\n", - " 'Kutkowska-Kazmierczak2021_P13',\n", - " 'Goldenberg2016_P9',\n", - " 'Low, 2016_P18 (12)',\n", - " 'Goldenberg2016_P25',\n", - " 'Cucco, 2020 (Patient B)',\n", - " 'Goldenberg2016_P15',\n", - " 'KBG52',\n", - " 'Parenti2021_P6',\n", - " 'KBG3',\n", - " 'Scarano, 2013_P9',\n", - " 'Kutkowska-Kazmierczak2021_P15',\n", - " 'Sirmaci2011_P4 (previously published Brancati, 2004)',\n", - " 'Parenti2016_P1',\n", - " 'KBG44',\n", - " 'Goldenberg2016_P23',\n", - " 'Gnazzo, 2020_P15',\n", - " 'Gnazzo, 2020_P16',\n", - " 'Low, 2016_P10 (18)',\n", - " 'Goldenberg2016_P30',\n", - " 'Goldenberg2016_P38',\n", - " 'KBG57',\n", - " 'Parenti2021_P19',\n", - " 'Gnazzo, 2020_P11',\n", - " 'Goldenberg2016_P33',\n", - " 'Scarano, 2013_P11',\n", - " 'Ockeloen2015_P19',\n", - " 'KBG45',\n", - " 'KBG32',\n", - " 'Ockeloen2015_P2',\n", - " 'Goldenberg2016_P39',\n", - " 'Low, 2016_P7 (8)',\n", - " 'Novara, 2017_P1',\n", - " 'Ockeloen2015_P1',\n", - " 'Goldenberg2016_P20',\n", - " 'Kutkowska-Kazmierczak2021_P22',\n", - " 'KBG31B',\n", - " 'Gnazzo, 2020_P18',\n", - " 'KBG23',\n", - " 'Low, 2016_P4 (5)',\n", - " 'Scarano, 2013_P7',\n", - " 'Gnazzo, 2020_P27',\n", - " 'Goldenberg2016_P29',\n", - " 'Goldenberg2016_P19',\n", - " 'Rentas2021_P1',\n", - " 'Gnazzo, 2020_P24',\n", - " 'Parenti2021_P18',\n", - " 'Gnazzo, 2020_P13',\n", - " 'Gnazzo, 2020_P14',\n", - " 'KBG8A',\n", - " 'Low2017',\n", - " 'Kutkowska-Kazmierczak2021_P14',\n", - " 'Walz2015_PE',\n", - " 'Ockeloen2015_P13',\n", - " 'Murray, 2017_P2 (1.2)',\n", - " 'Low, 2016_P17 (10)',\n", - " 'KBG42',\n", - " 'Sirmaci2011_P2',\n", - " 'Goldenberg2016_P12',\n", - " 'Goldenberg2016_P34',\n", - " 'Ockeloen2015_P20',\n", - " 'Goldenberg2016_P26',\n", - " 'KBG29',\n", - " 'Low, 2016_P15 (3)',\n", - " 'Ockeloen2015_P17',\n", - " 'Khalifa, 2013_P1B',\n", - " 'KBG36',\n", - " 'Goldenberg2016_P2',\n", - " 'Youngs2011',\n", - " 'Gnazzo, 2020_P19',\n", - " 'Murray, 2017_P12 (9.1)',\n", - " 'Goldenberg2016_P22',\n", - " 'Gnazzo, 2020_P22',\n", - " 'KBG7',\n", - " 'KBG30',\n", - " 'Goldenberg2016_P3',\n", - " 'Kim, 2015_P2',\n", - " 'Low, 2016_P23 (17)',\n", - " 'Murray, 2017_P5 (3.1)',\n", - " 'Bianchi, 2018',\n", - " 'Jin Kim, 2020_P1',\n", - " 'Kutkowska-Kazmierczak2021_P17',\n", - " 'KBG33',\n", - " 'Novara, 2017_P4',\n", - " 'Low, 2016_P29 (27)',\n", - " 'Scarano, 2013_P12',\n", - " 'KBG34',\n", - " 'Ockeloen2015_P6',\n", - " 'Kutkowska-Kazmierczak2021_P9',\n", - " 'VanDongen2019_P5',\n", - " 'Goldenberg2016_P4',\n", - " 'Low, 2016_P26 (23)',\n", - " 'Kutkowska-Kazmierczak2021_P6',\n", - " 'Kutkowska-Kazmierczak2021_P8',\n", - " 'Sirmaci2011_P3/F1? (previously published Tekin, 2004)',\n", - " 'Scarano, 2013_P6',\n", - " 'Murray, 2017_P13 (11.1)',\n", - " 'Kutkowska-Kazmierczak2021_P2',\n", - " 'KBG27',\n", - " 'Murray, 2017_P7 (3.3)',\n", - " 'Murray, 2017_P8 (4.1)',\n", - " 'KBG46',\n", - " 'KBG19',\n", - " 'Crippa2015_P2',\n", - " 'Parenti2021_P15',\n", - " 'Crippa2015_P3',\n", - " 'Goldenberg2016_P28',\n", - " 'Novara, 2017_P3',\n", - " 'Gnazzo, 2020_P25',\n", - " 'Parenti2021_P2',\n", - " 'Palumbo 2016',\n", - " 'VanDongen2019_P4',\n", - " 'Bucerzan2020',\n", - " 'KBG21',\n", - " 'Walz2015_PA',\n", - " 'Kim, 2015_P1',\n", - " 'VanDongen2019_P13',\n", - " 'Parenti2021_P8',\n", - " 'KBG62',\n", - " 'Kutkowska-Kazmierczak2021_P12',\n", - " 'Low, 2016_P25 (22)',\n", - " 'Gnazzo, 2020_P29',\n", - " 'Crippa2015_P1',\n", - " 'Murray, 2017_P4 (2.1)',\n", - " 'Goldenberg2016_P1',\n", - " 'KBG53',\n", - " 'Gnazzo, 2020_P4',\n", - " 'KBG64',\n", - " 'Srivastava, 2017_P1',\n", - " 'Miyatake, 2017_P1',\n", - " 'Ockeloen2015_P14',\n", - " 'Ockeloen2015_P11',\n", - " 'Novara, 2017_P10',\n", - " 'Parenti2021_P13',\n", - " 'Lim2014',\n", - " 'Low, 2016_P27 (24)',\n", - " 'Ockeloen2015_P10',\n", - " 'Khalifa, 2013_P1A',\n", - " 'Kutkowska-Kazmierczak2021_P1',\n", - " 'Low, 2016_P28 (25)',\n", - " 'Kim, 2015_P3',\n", - " 'KBG13',\n", - " 'Low, 2016_34 (32)',\n", - " 'Gnazzo, 2020_P6',\n", - " 'Gnazzo, 2020_P21',\n", - " 'Low, 2016_P8 (33)',\n", - " 'KBG50',\n", - " 'Low, 2016_32 (30)',\n", - " 'Novara, 2017_P11',\n", - " 'Alves, 2019',\n", - " 'Low, 2016_P5 (6)',\n", - " 'KBG47',\n", - " 'Kutkowska-Kazmierczak2021_P11',\n", - " 'VanDongen2019_P12',\n", - " 'KBG37',\n", - " 'KBG4',\n", - " 'VanDongen2019_P10',\n", - " 'Low, 2016_P22 (16)',\n", - " 'Scarano, 2013_P2',\n", - " 'KBG48',\n", - " 'KBG41',\n", - " 'Kutkowska-Kazmierczak2021_P7',\n", - " 'KBG14',\n", - " 'Goldenberg2016_P8',\n", - " 'Murray, 2017_P10 (7.1.)',\n", - " 'Gnazzo, 2020_P20',\n", - " 'Scarano, 2013_P4',\n", - " 'Goldenberg2016_P21',\n", - " 'KBG54',\n", - " 'KBG24',\n", - " 'Novara, 2017_P9',\n", - " 'Walz2015_PB',\n", - " 'KBG10A',\n", - " 'Ockeloen2015_P16',\n", - " 'Kutkowska-Kazmierczak2021_P3',\n", - " 'Gnazzo, 2020_P12',\n", - " 'Goldenberg2016_P18',\n", - " 'VanDongen2019_P7',\n", - " 'Kutkowska-Kazmierczak2021_P10',\n", - " 'Willemsen2010_P2',\n", - " 'Sayed, 2020_P1',\n", - " 'Ockeloen2015_P7',\n", - " 'Parenti2021_P16',\n", - " 'Ockeloen2015_P5',\n", - " 'Goldenberg2016_P7',\n", - " 'Ockeloen2015_P3',\n", - " 'Ockeloen2015_P12',\n", - " 'KBG25',\n", - " 'Parenti2021_P10',\n", - " 'Low, 2016_P21 (15)',\n", - " 'Low, 2016_P1 (19)',\n", - " 'Low, 2016_30 (28)',\n", - " 'Goldenberg2016_P5',\n", - " 'Goldenberg2016_P10',\n", - " 'Low, 2016_P16 (9)',\n", - " 'Kutkowska-Kazmierczak2021_P19']" + "" ] }, - "execution_count": 10, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], + "source": [ + "display(HTML(viewer.variants_table(patientCohort, tx_id))) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8136e3b2", + "metadata": {}, + "outputs": [], "source": [ "patientCohort.list_all_patients()" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "d3481476", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/plain": [ - "[('NP_001243111.1', 336),\n", - " ('NP_037407.4', 336),\n", - " ('NP_001243112.1', 336),\n", - " ('NP_872337.2', 46),\n", - " ('NP_004924.1', 37),\n", - " ('NP_001120686.1', 29),\n", - " ('NP_001230208.1', 29),\n", - " ('NP_777577.2', 29),\n", - " ('NP_001073956.2', 25),\n", - " ('NP_001305459.1', 25),\n", - " ('NP_112190.2', 25),\n", - " ('NP_001025189.1', 25),\n", - " ('NP_001305458.1', 25),\n", - " ('NP_001136336.2', 25),\n", - " ('NP_057293.1', 25),\n", - " ('NP_001305455.1', 25),\n", - " ('NP_001305453.1', 25),\n", - " ('NP_001305454.1', 25),\n", - " ('NP_000503.1', 25),\n", - " ('NP_001305461.1', 25),\n", - " ('NP_001305456.1', 25),\n", - " ('NP_001305457.1', 25),\n", - " ('NP_787127.1', 25),\n", - " ('NP_005178.4', 25),\n", - " ('NP_000476.1', 25),\n", - " ('NP_001281257.1', 25),\n", - " ('NP_001305436.1', 22),\n", - " ('NP_001012777.1', 22),\n", - " ('NP_001012780.1', 22),\n", - " ('NP_001165287.1', 22),\n", - " ('NP_001165286.1', 22),\n", - " ('NP_849163.1', 22),\n", - " ('NP_001305442.1', 22),\n", - " ('NP_840101.1', 21),\n", - " ('NP_037410.1', 20),\n", - " ('NP_653205.3', 20),\n", - " ('NP_000092.2', 20),\n", - " ('NP_001281269.1', 20),\n", - " ('NP_955399.1', 19),\n", - " ('NP_003110.1', 19),\n", - " ('NP_722520.2', 15),\n", - " ('NP_001167014.1', 10),\n", - " ('NP_001167012.1', 10),\n", - " ('NP_001167010.1', 10),\n", - " ('NP_001167011.1', 10),\n", - " ('NP_524576.2', 10),\n", - " ('NP_060339.2', 10),\n", - " ('NP_001730.1', 10),\n", - " ('NP_001167013.1', 10),\n", - " ('NP_001171783.1', 7),\n", - " ('NP_060036.2', 7),\n", - " ('NP_001171785.1', 7),\n", - " ('NP_003477.4', 7),\n", - " ('NP_065706.2', 6),\n", - " ('NP_150254.1', 5),\n", - " ('NP_001230060.1', 5),\n", - " ('NP_000968.2', 5),\n", - " ('NP_705900.1', 4),\n", - " ('NP_055242.1', 4),\n", - " ('NP_001182054.1', 3),\n", - " ('NP_079011.3', 3),\n", - " ('NP_073729.1', 3),\n", - " ('NP_001269612.1', 3),\n", - " ('NP_004404.1', 1),\n", - " ('NP_001121613.1', 1)]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "patientCohort.list_all_proteins()" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "74ba64ea", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'NM_013275.6': Counter({: 4,\n", - " : 14,\n", - " : 176,\n", - " : 66,\n", - " : 3,\n", - " : 51,\n", - " : 47,\n", - " : 63,\n", - " : 34,\n", - " : 58,\n", - " : 35,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 8,\n", - " : 1,\n", - " : 1})}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "patientCohort.list_data_by_tx('NM_013275.6')" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "9268c27b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'NM_005187.6': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001030018.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384937.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001256917.2': Counter({: 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 2}),\n", - " 'NM_199367.3': Counter({: 10,\n", - " : 10,\n", - " : 10,\n", - " : 10,\n", - " : 6,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_153813.3': Counter({: 13,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384766.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001384775.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_014427.5': Counter({: 2,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 1}),\n", - " 'NM_020655.4': Counter({: 4,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001318527.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384920.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001318524.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384926.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_000512.5': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001318526.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384921.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001318528.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001271605.3': Counter({: 4,\n", - " : 1}),\n", - " 'NM_001384931.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001389467.1': Counter({: 1,\n", - " : 1}),\n", - " 'NM_001294328.4': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001128141.3': Counter({: 1,\n", - " : 1}),\n", - " 'NM_001243279.3': Counter({: 25,\n", - " : 1,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 3,\n", - " : 3}),\n", - " 'NM_001384942.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384771.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001323543.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384768.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001195124.3': Counter({: 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 2}),\n", - " 'NM_001367624.2': Counter({: 13,\n", - " : 1}),\n", - " 'NM_002461.3': Counter({: 19,\n", - " : 1}),\n", - " 'NM_001384770.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001384919.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384918.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001201407.2': Counter({: 41,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_182531.5': Counter({: 41,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_000485.3': Counter({: 24,\n", - " : 1}),\n", - " 'NM_000977.4': Counter({: 2,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001184856.2': Counter({: 1,\n", - " : 5,\n", - " : 1}),\n", - " 'NM_004933.3': Counter({: 29,\n", - " : 6,\n", - " : 6,\n", - " : 6,\n", - " : 7,\n", - " : 5,\n", - " : 1}),\n", - " 'NM_001389470.1': Counter({: 1,\n", - " : 1}),\n", - " 'NM_001385709.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384941.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384772.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001012762.3': Counter({: 21,\n", - " : 1}),\n", - " 'NM_175931.3': Counter({: 24,\n", - " : 1}),\n", - " 'NM_013275.6': Counter({: 4,\n", - " : 14,\n", - " : 176,\n", - " : 66,\n", - " : 3,\n", - " : 51,\n", - " : 47,\n", - " : 63,\n", - " : 34,\n", - " : 58,\n", - " : 35,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 8,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001318532.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384927.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001739.2': Counter({: 7,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 1}),\n", - " 'NM_001184854.2': Counter({: 1,\n", - " : 5,\n", - " : 1}),\n", - " 'NM_001384944.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001386991.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_017566.4': Counter({: 1,\n", - " : 5,\n", - " : 1}),\n", - " 'NM_001318525.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001323544.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001384925.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384939.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384923.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_004413.4': Counter({: 1,\n", - " : 1}),\n", - " 'NM_013278.4': Counter({: 19,\n", - " : 1}),\n", - " 'NM_001384769.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_178310.4': Counter({: 19,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384922.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001284316.2': Counter({: 25,\n", - " : 1,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 3,\n", - " : 3}),\n", - " 'NM_001173543.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384929.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384763.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_033251.2': Counter({: 2,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001351938.2': Counter({: 1,\n", - " : 5,\n", - " : 1}),\n", - " 'NM_001384764.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001384928.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001351937.2': Counter({: 1,\n", - " : 5,\n", - " : 1}),\n", - " 'NM_001384916.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001080487.4': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001318530.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001294340.2': Counter({: 17,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001173542.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001256182.2': Counter({: 4,\n", - " : 14,\n", - " : 176,\n", - " : 66,\n", - " : 3,\n", - " : 51,\n", - " : 47,\n", - " : 63,\n", - " : 34,\n", - " : 58,\n", - " : 35,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 8,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001127214.4': Counter({: 25,\n", - " : 1,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 3,\n", - " : 3}),\n", - " 'NM_079837.3': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001389466.1': Counter({: 1,\n", - " : 1}),\n", - " 'NM_015144.3': Counter({: 1,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 3}),\n", - " 'NM_153636.3': Counter({: 2,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 1}),\n", - " 'NM_001256183.2': Counter({: 4,\n", - " : 14,\n", - " : 176,\n", - " : 66,\n", - " : 3,\n", - " : 51,\n", - " : 47,\n", - " : 63,\n", - " : 34,\n", - " : 58,\n", - " : 35,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 8,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_000101.4': Counter({: 19,\n", - " : 1}),\n", - " 'NM_016209.5': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001318507.2': Counter({: 21,\n", - " : 1}),\n", - " 'NM_001378881.1': Counter({: 41,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384935.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384936.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384767.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001171816.2': Counter({: 20,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001195125.3': Counter({: 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 2}),\n", - " 'NM_001384773.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_003486.7': Counter({: 6,\n", - " : 1}),\n", - " 'NM_001366322.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001318513.2': Counter({: 21,\n", - " : 1}),\n", - " 'NM_001243131.1': Counter({: 2,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001173540.2': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001142864.4': Counter({: 21,\n", - " : 2,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_030928.4': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001173541.2': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_178841.4': Counter({: 20,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_003119.4': Counter({: 13,\n", - " : 13,\n", - " : 13,\n", - " : 13,\n", - " : 3,\n", - " : 2,\n", - " : 1}),\n", - " 'NM_001173539.2': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_174917.5': Counter({: 25,\n", - " : 1,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 2,\n", - " : 3,\n", - " : 3}),\n", - " 'NM_001271604.4': Counter({: 4,\n", - " : 1}),\n", - " 'NM_001384943.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384938.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001367225.1': Counter({: 6,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 3,\n", - " : 1}),\n", - " 'NM_001318529.2': Counter({: 24,\n", - " : 1}),\n", - " 'NM_001012759.3': Counter({: 21,\n", - " : 1}),\n", - " 'NM_001363850.1': Counter({: 13,\n", - " : 13,\n", - " : 13,\n", - " : 13,\n", - " : 3,\n", - " : 2,\n", - " : 1}),\n", - " 'NM_017869.4': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001171815.2': Counter({: 20,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_144604.4': Counter({: 17,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 2,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384765.1': Counter({: 35,\n", - " : 2}),\n", - " 'NM_001386992.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1}),\n", - " 'NM_001384940.1': Counter({: 8,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1,\n", - " : 1})}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "patientCohort.list_data_by_tx()" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "63705c17", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{ProteinMetadata(id=NP_000092.2, label=Cytochrome b-245 light chain, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=134, end=195)),)),\n", - " ProteinMetadata(id=NP_000476.1, label=Adenine phosphoribosyltransferase, features=()),\n", - " ProteinMetadata(id=NP_000503.1, label=N-acetylgalactosamine-6-sulfatase, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Catalytic domain, start=27, end=379)),)),\n", - " ProteinMetadata(id=NP_000968.2, label=Large ribosomal subunit protein eL13, features=()),\n", - " ProteinMetadata(id=NP_001012777.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", - " ProteinMetadata(id=NP_001012780.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", - " ProteinMetadata(id=NP_001025189.1, label=Adenine phosphoribosyltransferase, features=()),\n", - " ProteinMetadata(id=NP_001073956.2, label=Embryonic polyadenylate-binding protein 2, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=RRM, start=147, end=224)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=21, end=66)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=101, end=128)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=227, end=278)))),\n", - " ProteinMetadata(id=NP_001120686.1, label=Malonate--CoA ligase ACSF3, mitochondrial, features=()),\n", - " ProteinMetadata(id=NP_001121613.1, label=Dipeptidase 1, features=()),\n", - " ProteinMetadata(id=NP_001136336.2, label=Piezo-type mechanosensitive ion channel component 1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=738, end=769)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1356, end=1402)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1462, end=1498)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1576, end=1630)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1811, end=1921)))),\n", - " ProteinMetadata(id=NP_001165286.1, label=E3 ubiquitin-protein ligase RNF166, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=UIM, start=221, end=237)),)),\n", - " ProteinMetadata(id=NP_001165287.1, label=E3 ubiquitin-protein ligase RNF166, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=UIM, start=221, end=237)),)),\n", - " ProteinMetadata(id=NP_001167010.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_001167011.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_001167012.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_001167013.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_001167014.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_001171783.1, label=Kelch domain-containing protein 4, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 1, start=77, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 2, start=133, end=187)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 3, start=188, end=241)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 4, start=243, end=289)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 5, start=308, end=361)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 6, start=443, end=494)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=33)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=346, end=379)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=402, end=432)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=481, end=520)))),\n", - " ProteinMetadata(id=NP_001171785.1, label=Kelch domain-containing protein 4, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 1, start=77, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 2, start=133, end=187)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 3, start=188, end=241)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 4, start=243, end=289)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 5, start=308, end=361)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 6, start=443, end=494)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=33)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=346, end=379)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=402, end=432)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=481, end=520)))),\n", - " ProteinMetadata(id=NP_001182054.1, label=Uncharacterized protein C16orf95, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=26)),)),\n", - " ProteinMetadata(id=NP_001230060.1, label=Large ribosomal subunit protein eL13, features=()),\n", - " ProteinMetadata(id=NP_001230208.1, label=Malonate--CoA ligase ACSF3, mitochondrial, features=()),\n", - " ProteinMetadata(id=NP_001243111.1, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),\n", - " ProteinMetadata(id=NP_001243112.1, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),\n", - " ProteinMetadata(id=NP_001269612.1, label=F-box only protein 31, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=F-box, start=64, end=110)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=11, end=53)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=377, end=446)))),\n", - " ProteinMetadata(id=NP_001281257.1, label=Embryonic polyadenylate-binding protein 2, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=RRM, start=147, end=224)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=21, end=66)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=101, end=128)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=227, end=278)))),\n", - " ProteinMetadata(id=NP_001281269.1, label=Zinc finger CCCH domain-containing protein 18, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=222)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=391, end=928)))),\n", - " ProteinMetadata(id=NP_001305436.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", - " ProteinMetadata(id=NP_001305442.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", - " ProteinMetadata(id=NP_001305453.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305454.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305455.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305456.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305457.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305458.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305459.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001305461.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_001730.1, label=Carbonic anhydrase 5A, mitochondrial, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Alpha-carbonic anhydrase, start=39, end=296)),)),\n", - " ProteinMetadata(id=NP_003110.1, label=Paraplegin, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=108, end=133)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with PPIF, start=701, end=795)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=751, end=795)))),\n", - " ProteinMetadata(id=NP_003477.4, label=Large neutral amino acids transporter small subunit 1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=40)),)),\n", - " ProteinMetadata(id=NP_004404.1, label=Dipeptidase 1, features=()),\n", - " ProteinMetadata(id=NP_004924.1, label=Cadherin-15, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 1, start=61, end=152)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 2, start=153, end=260)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 3, start=261, end=375)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 4, start=376, end=481)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 5, start=482, end=590)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=636, end=663)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=676, end=703)))),\n", - " ProteinMetadata(id=NP_005178.4, label=Protein CBFA2T3, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=TAFH, start=171, end=266)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates localization to the nucleus, start=1, end=435)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PDE7A (in isoform 2), start=1, end=430)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Required for nucleolar targeting (in isoform 1), start=1, end=127)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=109)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with ZBTB33, start=145, end=242)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with HIF1A, start=176, end=268)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=284, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 2 (NHR2); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=394, end=412)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=434, end=472)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 3 (NHR3); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=485, end=533)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PRKAR2A, start=485, end=506)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=603, end=653)))),\n", - " ProteinMetadata(id=NP_037407.4, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),\n", - " ProteinMetadata(id=NP_037410.1, label=Interleukin-17C, features=()),\n", - " ProteinMetadata(id=NP_055242.1, label=Copine-7, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 1, start=2, end=133)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 2, start=212, end=339)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=VWFA, start=382, end=581)))),\n", - " ProteinMetadata(id=NP_057293.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", - " ProteinMetadata(id=NP_060036.2, label=Kelch domain-containing protein 4, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 1, start=77, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 2, start=133, end=187)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 3, start=188, end=241)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 4, start=243, end=289)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 5, start=308, end=361)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 6, start=443, end=494)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=33)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=346, end=379)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=402, end=432)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=481, end=520)))),\n", - " ProteinMetadata(id=NP_060339.2, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_065706.2, label=Junctophilin-3, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 1, start=15, end=37)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 2, start=39, end=60)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 3, start=61, end=82)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 4, start=83, end=105)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 5, start=107, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 6, start=130, end=152)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 7, start=288, end=310)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 8, start=311, end=333)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=230, end=259)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=416, end=496)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=526, end=597)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=624, end=649)))),\n", - " ProteinMetadata(id=NP_073729.1, label=Microtubule-associated proteins 1A/1B light chain 3B, features=()),\n", - " ProteinMetadata(id=NP_079011.3, label=F-box only protein 31, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=F-box, start=64, end=110)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=11, end=53)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=377, end=446)))),\n", - " ProteinMetadata(id=NP_112190.2, label=DNA replication factor Cdt1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=118)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=143, end=165)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with GMNN, start=150, end=190)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=383, end=415)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with LRWD1, start=451, end=546)), SimpleProteinFeature(type=FeatureType.MOTIF, info=FeatureInfo(name=PIP-box K+4 motif, start=1, end=23)), SimpleProteinFeature(type=FeatureType.MOTIF, info=FeatureInfo(name=Cyclin-binding motif, start=68, end=70)))),\n", - " ProteinMetadata(id=NP_150254.1, label=Large ribosomal subunit protein eL13, features=()),\n", - " ProteinMetadata(id=NP_524576.2, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", - " ProteinMetadata(id=NP_653205.3, label=Zinc finger CCCH domain-containing protein 18, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=222)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=391, end=928)))),\n", - " ProteinMetadata(id=NP_705900.1, label=Copine-7, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 1, start=2, end=133)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 2, start=212, end=339)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=VWFA, start=382, end=581)))),\n", - " ProteinMetadata(id=NP_722520.2, label=Zinc finger protein ZFPM1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=93)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=114, end=133)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with TACC3, start=330, end=341)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=384, end=409)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=438, end=460)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=473, end=515)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=605, end=681)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=708, end=810)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CTBP2, start=794, end=800)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=889, end=971)))),\n", - " ProteinMetadata(id=NP_777577.2, label=Malonate--CoA ligase ACSF3, mitochondrial, features=()),\n", - " ProteinMetadata(id=NP_787127.1, label=Protein CBFA2T3, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=TAFH, start=171, end=266)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates localization to the nucleus, start=1, end=435)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PDE7A (in isoform 2), start=1, end=430)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Required for nucleolar targeting (in isoform 1), start=1, end=127)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=109)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with ZBTB33, start=145, end=242)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with HIF1A, start=176, end=268)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=284, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 2 (NHR2); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=394, end=412)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=434, end=472)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 3 (NHR3); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=485, end=533)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PRKAR2A, start=485, end=506)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=603, end=653)))),\n", - " ProteinMetadata(id=NP_840101.1, label=Zinc finger protein SNAI3, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=SNAG domain, start=1, end=20)),)),\n", - " ProteinMetadata(id=NP_849163.1, label=E3 ubiquitin-protein ligase RNF166, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=UIM, start=221, end=237)),)),\n", - " ProteinMetadata(id=NP_872337.2, label=Zinc finger protein 778, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=KRAB, start=42, end=110)),)),\n", - " ProteinMetadata(id=NP_955399.1, label=Paraplegin, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=108, end=133)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with PPIF, start=701, end=795)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=751, end=795))))}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "patientCohort.all_proteins" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "ec6c204f", "metadata": {}, "outputs": [], @@ -1556,7 +563,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "df922d31", "metadata": {}, "outputs": [], @@ -1566,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "91809a38", "metadata": {}, "outputs": [], @@ -1576,810 +583,42 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "7a83a5ca", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'SO:0001589'" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "VariantEffect.FRAMESHIFT_VARIANT.value" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "57dd0e23", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
With frameshift_variantWithout frameshift_variant
CountPercentCountPercentp-valueCorrected p-values
HP:0011446 (Abnormality of higher mental function)11482.61%10694.64%0.0033470.046856
HP:0001249 (Intellectual disability)10071.43%9486.24%0.0055750.078048
HP:0007018 (Attention deficit hyperactivity disorder)3581.40%2666.67%0.1390301.000000
HP:0000325 (Triangular face)4571.43%3858.46%0.1415181.000000
HP:0001155 (Abnormality of the hand)10067.11%8972.36%0.3583601.000000
HP:0012758 (Neurodevelopmental delay)8694.51%9096.77%0.4944871.000000
HP:0006482 (Abnormality of dental morphology)12485.52%10081.97%0.5045341.000000
HP:0000365 (Hearing impairment)5280.00%4576.27%0.6668751.000000
HP:0010938 (Abnormal external nose morphology)7189.87%6392.65%0.7720391.000000
HP:0000729 (Autistic behavior)2756.25%2960.42%0.8361561.000000
HP:0000534 (Abnormal eyebrow morphology)7082.35%5680.00%0.8364371.000000
HP:0000343 (Long philtrum)6678.57%5580.88%0.8401971.000000
HP:0004322 (Short stature)7658.46%7457.36%0.9001401.000000
HP:0000356 (Abnormality of the outer ear)3778.72%4080.00%1.0000001.000000
\n", - "
" - ], - "text/plain": [ - " With frameshift_variant \\\n", - " Count \n", - "HP:0011446 (Abnormality of higher mental function) 114 \n", - "HP:0001249 (Intellectual disability) 100 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 35 \n", - "HP:0000325 (Triangular face) 45 \n", - "HP:0001155 (Abnormality of the hand) 100 \n", - "HP:0012758 (Neurodevelopmental delay) 86 \n", - "HP:0006482 (Abnormality of dental morphology) 124 \n", - "HP:0000365 (Hearing impairment) 52 \n", - "HP:0010938 (Abnormal external nose morphology) 71 \n", - "HP:0000729 (Autistic behavior) 27 \n", - "HP:0000534 (Abnormal eyebrow morphology) 70 \n", - "HP:0000343 (Long philtrum) 66 \n", - "HP:0004322 (Short stature) 76 \n", - "HP:0000356 (Abnormality of the outer ear) 37 \n", - "\n", - " \\\n", - " Percent \n", - "HP:0011446 (Abnormality of higher mental function) 82.61% \n", - "HP:0001249 (Intellectual disability) 71.43% \n", - "HP:0007018 (Attention deficit hyperactivity dis... 81.40% \n", - "HP:0000325 (Triangular face) 71.43% \n", - "HP:0001155 (Abnormality of the hand) 67.11% \n", - "HP:0012758 (Neurodevelopmental delay) 94.51% \n", - "HP:0006482 (Abnormality of dental morphology) 85.52% \n", - "HP:0000365 (Hearing impairment) 80.00% \n", - "HP:0010938 (Abnormal external nose morphology) 89.87% \n", - "HP:0000729 (Autistic behavior) 56.25% \n", - "HP:0000534 (Abnormal eyebrow morphology) 82.35% \n", - "HP:0000343 (Long philtrum) 78.57% \n", - "HP:0004322 (Short stature) 58.46% \n", - "HP:0000356 (Abnormality of the outer ear) 78.72% \n", - "\n", - " Without frameshift_variant \\\n", - " Count \n", - "HP:0011446 (Abnormality of higher mental function) 106 \n", - "HP:0001249 (Intellectual disability) 94 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 26 \n", - "HP:0000325 (Triangular face) 38 \n", - "HP:0001155 (Abnormality of the hand) 89 \n", - "HP:0012758 (Neurodevelopmental delay) 90 \n", - "HP:0006482 (Abnormality of dental morphology) 100 \n", - "HP:0000365 (Hearing impairment) 45 \n", - "HP:0010938 (Abnormal external nose morphology) 63 \n", - "HP:0000729 (Autistic behavior) 29 \n", - "HP:0000534 (Abnormal eyebrow morphology) 56 \n", - "HP:0000343 (Long philtrum) 55 \n", - "HP:0004322 (Short stature) 74 \n", - "HP:0000356 (Abnormality of the outer ear) 40 \n", - "\n", - " \\\n", - " Percent p-value \n", - "HP:0011446 (Abnormality of higher mental function) 94.64% 0.003347 \n", - "HP:0001249 (Intellectual disability) 86.24% 0.005575 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 66.67% 0.139030 \n", - "HP:0000325 (Triangular face) 58.46% 0.141518 \n", - "HP:0001155 (Abnormality of the hand) 72.36% 0.358360 \n", - "HP:0012758 (Neurodevelopmental delay) 96.77% 0.494487 \n", - "HP:0006482 (Abnormality of dental morphology) 81.97% 0.504534 \n", - "HP:0000365 (Hearing impairment) 76.27% 0.666875 \n", - "HP:0010938 (Abnormal external nose morphology) 92.65% 0.772039 \n", - "HP:0000729 (Autistic behavior) 60.42% 0.836156 \n", - "HP:0000534 (Abnormal eyebrow morphology) 80.00% 0.836437 \n", - "HP:0000343 (Long philtrum) 80.88% 0.840197 \n", - "HP:0004322 (Short stature) 57.36% 0.900140 \n", - "HP:0000356 (Abnormality of the outer ear) 80.00% 1.000000 \n", - "\n", - " \n", - " Corrected p-values \n", - "HP:0011446 (Abnormality of higher mental function) 0.046856 \n", - "HP:0001249 (Intellectual disability) 0.078048 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", - "HP:0000325 (Triangular face) 1.000000 \n", - "HP:0001155 (Abnormality of the hand) 1.000000 \n", - "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", - "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", - "HP:0000365 (Hearing impairment) 1.000000 \n", - "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", - "HP:0000729 (Autistic behavior) 1.000000 \n", - "HP:0000534 (Abnormal eyebrow morphology) 1.000000 \n", - "HP:0000343 (Long philtrum) 1.000000 \n", - "HP:0004322 (Short stature) 1.000000 \n", - "HP:0000356 (Abnormality of the outer ear) 1.000000 " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "analysis.compare_by_variant_type(VariantEffect.FRAMESHIFT_VARIANT)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "1c2c01fd", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
With 16_89284634_89284639_GTGTTT_GWithout 16_89284634_89284639_GTGTTT_G
CountPercentCountPercentp-valueCorrected p-values
HP:0001249 (Intellectual disability)1557.69%17980.27%0.0129390.181150
HP:0011446 (Abnormality of higher mental function)1973.08%20189.73%0.0225750.316053
HP:0010938 (Abnormal external nose morphology)1381.25%12192.37%0.1530851.000000
HP:0007018 (Attention deficit hyperactivity disorder)7100.00%5472.00%0.1821091.000000
HP:0000534 (Abnormal eyebrow morphology)1071.43%11682.27%0.2992781.000000
HP:0001155 (Abnormality of the hand)2177.78%16868.57%0.3846091.000000
HP:0000729 (Autistic behavior)342.86%5359.55%0.4455761.000000
HP:0000365 (Hearing impairment)770.00%9078.95%0.4526611.000000
HP:0012758 (Neurodevelopmental delay)1493.33%16295.86%0.5005261.000000
HP:0000343 (Long philtrum)1173.33%11080.29%0.5089201.000000
HP:0000356 (Abnormality of the outer ear)675.00%7179.78%0.6672011.000000
HP:0004322 (Short stature)1453.85%13658.37%0.6798031.000000
HP:0000325 (Triangular face)675.00%7764.17%0.7118301.000000
HP:0006482 (Abnormality of dental morphology)2284.62%20283.82%1.0000001.000000
\n", - "
" - ], - "text/plain": [ - " With 16_89284634_89284639_GTGTTT_G \\\n", - " Count \n", - "HP:0001249 (Intellectual disability) 15 \n", - "HP:0011446 (Abnormality of higher mental function) 19 \n", - "HP:0010938 (Abnormal external nose morphology) 13 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 7 \n", - "HP:0000534 (Abnormal eyebrow morphology) 10 \n", - "HP:0001155 (Abnormality of the hand) 21 \n", - "HP:0000729 (Autistic behavior) 3 \n", - "HP:0000365 (Hearing impairment) 7 \n", - "HP:0012758 (Neurodevelopmental delay) 14 \n", - "HP:0000343 (Long philtrum) 11 \n", - "HP:0000356 (Abnormality of the outer ear) 6 \n", - "HP:0004322 (Short stature) 14 \n", - "HP:0000325 (Triangular face) 6 \n", - "HP:0006482 (Abnormality of dental morphology) 22 \n", - "\n", - " \\\n", - " Percent \n", - "HP:0001249 (Intellectual disability) 57.69% \n", - "HP:0011446 (Abnormality of higher mental function) 73.08% \n", - "HP:0010938 (Abnormal external nose morphology) 81.25% \n", - "HP:0007018 (Attention deficit hyperactivity dis... 100.00% \n", - "HP:0000534 (Abnormal eyebrow morphology) 71.43% \n", - "HP:0001155 (Abnormality of the hand) 77.78% \n", - "HP:0000729 (Autistic behavior) 42.86% \n", - "HP:0000365 (Hearing impairment) 70.00% \n", - "HP:0012758 (Neurodevelopmental delay) 93.33% \n", - "HP:0000343 (Long philtrum) 73.33% \n", - "HP:0000356 (Abnormality of the outer ear) 75.00% \n", - "HP:0004322 (Short stature) 53.85% \n", - "HP:0000325 (Triangular face) 75.00% \n", - "HP:0006482 (Abnormality of dental morphology) 84.62% \n", - "\n", - " Without 16_89284634_89284639_GTGTTT_G \\\n", - " Count \n", - "HP:0001249 (Intellectual disability) 179 \n", - "HP:0011446 (Abnormality of higher mental function) 201 \n", - "HP:0010938 (Abnormal external nose morphology) 121 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 54 \n", - "HP:0000534 (Abnormal eyebrow morphology) 116 \n", - "HP:0001155 (Abnormality of the hand) 168 \n", - "HP:0000729 (Autistic behavior) 53 \n", - "HP:0000365 (Hearing impairment) 90 \n", - "HP:0012758 (Neurodevelopmental delay) 162 \n", - "HP:0000343 (Long philtrum) 110 \n", - "HP:0000356 (Abnormality of the outer ear) 71 \n", - "HP:0004322 (Short stature) 136 \n", - "HP:0000325 (Triangular face) 77 \n", - "HP:0006482 (Abnormality of dental morphology) 202 \n", - "\n", - " \\\n", - " Percent p-value \n", - "HP:0001249 (Intellectual disability) 80.27% 0.012939 \n", - "HP:0011446 (Abnormality of higher mental function) 89.73% 0.022575 \n", - "HP:0010938 (Abnormal external nose morphology) 92.37% 0.153085 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 72.00% 0.182109 \n", - "HP:0000534 (Abnormal eyebrow morphology) 82.27% 0.299278 \n", - "HP:0001155 (Abnormality of the hand) 68.57% 0.384609 \n", - "HP:0000729 (Autistic behavior) 59.55% 0.445576 \n", - "HP:0000365 (Hearing impairment) 78.95% 0.452661 \n", - "HP:0012758 (Neurodevelopmental delay) 95.86% 0.500526 \n", - "HP:0000343 (Long philtrum) 80.29% 0.508920 \n", - "HP:0000356 (Abnormality of the outer ear) 79.78% 0.667201 \n", - "HP:0004322 (Short stature) 58.37% 0.679803 \n", - "HP:0000325 (Triangular face) 64.17% 0.711830 \n", - "HP:0006482 (Abnormality of dental morphology) 83.82% 1.000000 \n", - "\n", - " \n", - " Corrected p-values \n", - "HP:0001249 (Intellectual disability) 0.181150 \n", - "HP:0011446 (Abnormality of higher mental function) 0.316053 \n", - "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", - "HP:0000534 (Abnormal eyebrow morphology) 1.000000 \n", - "HP:0001155 (Abnormality of the hand) 1.000000 \n", - "HP:0000729 (Autistic behavior) 1.000000 \n", - "HP:0000365 (Hearing impairment) 1.000000 \n", - "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", - "HP:0000343 (Long philtrum) 1.000000 \n", - "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", - "HP:0004322 (Short stature) 1.000000 \n", - "HP:0000325 (Triangular face) 1.000000 \n", - "HP:0006482 (Abnormality of dental morphology) 1.000000 " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "analysis.compare_by_variant('16_89284634_89284639_GTGTTT_G')" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "f306c5ef", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Inside Exon 9Outside Exon 9
CountPercentCountPercentp-valueCorrected p-values
HP:0004322 (Short stature)10762.94%836.36%0.0210110.294157
HP:0011446 (Abnormality of higher mental function)15585.64%23100.00%0.0501960.702743
HP:0001155 (Abnormality of the hand)13569.95%1990.48%0.0701280.981790
HP:0001249 (Intellectual disability)13775.27%2191.30%0.1138381.000000
HP:0007018 (Attention deficit hyperactivity disorder)4381.13%861.54%0.1516091.000000
HP:0000365 (Hearing impairment)6578.31%969.23%0.4866901.000000
HP:0000534 (Abnormal eyebrow morphology)9482.46%1076.92%0.7033221.000000
HP:0000729 (Autistic behavior)3356.90%866.67%0.7490491.000000
HP:0010938 (Abnormal external nose morphology)9890.74%1392.86%1.0000001.000000
HP:0000343 (Long philtrum)9279.31%1083.33%1.0000001.000000
HP:0000356 (Abnormality of the outer ear)4981.67%990.00%1.0000001.000000
HP:0012758 (Neurodevelopmental delay)12096.00%12100.00%1.0000001.000000
HP:0006482 (Abnormality of dental morphology)16084.66%2187.50%1.0000001.000000
HP:0000325 (Triangular face)5771.25%1173.33%1.0000001.000000
\n", - "
" - ], - "text/plain": [ - " Inside Exon 9 \\\n", - " Count Percent \n", - "HP:0004322 (Short stature) 107 62.94% \n", - "HP:0011446 (Abnormality of higher mental function) 155 85.64% \n", - "HP:0001155 (Abnormality of the hand) 135 69.95% \n", - "HP:0001249 (Intellectual disability) 137 75.27% \n", - "HP:0007018 (Attention deficit hyperactivity dis... 43 81.13% \n", - "HP:0000365 (Hearing impairment) 65 78.31% \n", - "HP:0000534 (Abnormal eyebrow morphology) 94 82.46% \n", - "HP:0000729 (Autistic behavior) 33 56.90% \n", - "HP:0010938 (Abnormal external nose morphology) 98 90.74% \n", - "HP:0000343 (Long philtrum) 92 79.31% \n", - "HP:0000356 (Abnormality of the outer ear) 49 81.67% \n", - "HP:0012758 (Neurodevelopmental delay) 120 96.00% \n", - "HP:0006482 (Abnormality of dental morphology) 160 84.66% \n", - "HP:0000325 (Triangular face) 57 71.25% \n", - "\n", - " Outside Exon 9 \\\n", - " Count Percent \n", - "HP:0004322 (Short stature) 8 36.36% \n", - "HP:0011446 (Abnormality of higher mental function) 23 100.00% \n", - "HP:0001155 (Abnormality of the hand) 19 90.48% \n", - "HP:0001249 (Intellectual disability) 21 91.30% \n", - "HP:0007018 (Attention deficit hyperactivity dis... 8 61.54% \n", - "HP:0000365 (Hearing impairment) 9 69.23% \n", - "HP:0000534 (Abnormal eyebrow morphology) 10 76.92% \n", - "HP:0000729 (Autistic behavior) 8 66.67% \n", - "HP:0010938 (Abnormal external nose morphology) 13 92.86% \n", - "HP:0000343 (Long philtrum) 10 83.33% \n", - "HP:0000356 (Abnormality of the outer ear) 9 90.00% \n", - "HP:0012758 (Neurodevelopmental delay) 12 100.00% \n", - "HP:0006482 (Abnormality of dental morphology) 21 87.50% \n", - "HP:0000325 (Triangular face) 11 73.33% \n", - "\n", - " \\\n", - " p-value \n", - "HP:0004322 (Short stature) 0.021011 \n", - "HP:0011446 (Abnormality of higher mental function) 0.050196 \n", - "HP:0001155 (Abnormality of the hand) 0.070128 \n", - "HP:0001249 (Intellectual disability) 0.113838 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 0.151609 \n", - "HP:0000365 (Hearing impairment) 0.486690 \n", - "HP:0000534 (Abnormal eyebrow morphology) 0.703322 \n", - "HP:0000729 (Autistic behavior) 0.749049 \n", - "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", - "HP:0000343 (Long philtrum) 1.000000 \n", - "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", - "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", - "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", - "HP:0000325 (Triangular face) 1.000000 \n", - "\n", - " \n", - " Corrected p-values \n", - "HP:0004322 (Short stature) 0.294157 \n", - "HP:0011446 (Abnormality of higher mental function) 0.702743 \n", - "HP:0001155 (Abnormality of the hand) 0.981790 \n", - "HP:0001249 (Intellectual disability) 1.000000 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", - "HP:0000365 (Hearing impairment) 1.000000 \n", - "HP:0000534 (Abnormal eyebrow morphology) 1.000000 \n", - "HP:0000729 (Autistic behavior) 1.000000 \n", - "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", - "HP:0000343 (Long philtrum) 1.000000 \n", - "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", - "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", - "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", - "HP:0000325 (Triangular face) 1.000000 " - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "analysis2 = CohortAnalysis(patientCohort, 'NM_013275.6', hpo, include_unmeasured=False, include_large_SV=False)\n", "\n", @@ -2388,7 +627,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "f6861157", "metadata": {}, "outputs": [], @@ -2398,251 +637,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "d3c6fd9d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Inside REGIONOutside REGION
CountPercentCountPercentp-valueCorrected p-values
HP:0000325 (Triangular face)5471.05%2955.77%0.0910361.0
HP:0000365 (Hearing impairment)5775.00%4083.33%0.3721951.0
HP:0000356 (Abnormality of the outer ear)4882.76%2974.36%0.4431331.0
HP:0001249 (Intellectual disability)12276.73%7280.00%0.6342381.0
HP:0000534 (Abnormal eyebrow morphology)7979.80%4783.93%0.6688601.0
HP:0000343 (Long philtrum)7980.61%4277.78%0.6793941.0
HP:0012758 (Neurodevelopmental delay)10896.43%6894.44%0.7135011.0
HP:0001155 (Abnormality of the hand)11870.24%7168.27%0.7868031.0
HP:0000729 (Autistic behavior)3356.90%2360.53%0.8330531.0
HP:0011446 (Abnormality of higher mental function)13988.54%8187.10%0.8407681.0
HP:0004322 (Short stature)9358.49%5757.00%0.8972111.0
HP:0010938 (Abnormal external nose morphology)8790.62%4792.16%1.0000001.0
HP:0007018 (Attention deficit hyperactivity disorder)4275.00%1973.08%1.0000001.0
HP:0006482 (Abnormality of dental morphology)13883.64%8684.31%1.0000001.0
\n", - "
" - ], - "text/plain": [ - " Inside REGION \\\n", - " Count Percent \n", - "HP:0000325 (Triangular face) 54 71.05% \n", - "HP:0000365 (Hearing impairment) 57 75.00% \n", - "HP:0000356 (Abnormality of the outer ear) 48 82.76% \n", - "HP:0001249 (Intellectual disability) 122 76.73% \n", - "HP:0000534 (Abnormal eyebrow morphology) 79 79.80% \n", - "HP:0000343 (Long philtrum) 79 80.61% \n", - "HP:0012758 (Neurodevelopmental delay) 108 96.43% \n", - "HP:0001155 (Abnormality of the hand) 118 70.24% \n", - "HP:0000729 (Autistic behavior) 33 56.90% \n", - "HP:0011446 (Abnormality of higher mental function) 139 88.54% \n", - "HP:0004322 (Short stature) 93 58.49% \n", - "HP:0010938 (Abnormal external nose morphology) 87 90.62% \n", - "HP:0007018 (Attention deficit hyperactivity dis... 42 75.00% \n", - "HP:0006482 (Abnormality of dental morphology) 138 83.64% \n", - "\n", - " Outside REGION \\\n", - " Count Percent \n", - "HP:0000325 (Triangular face) 29 55.77% \n", - "HP:0000365 (Hearing impairment) 40 83.33% \n", - "HP:0000356 (Abnormality of the outer ear) 29 74.36% \n", - "HP:0001249 (Intellectual disability) 72 80.00% \n", - "HP:0000534 (Abnormal eyebrow morphology) 47 83.93% \n", - "HP:0000343 (Long philtrum) 42 77.78% \n", - "HP:0012758 (Neurodevelopmental delay) 68 94.44% \n", - "HP:0001155 (Abnormality of the hand) 71 68.27% \n", - "HP:0000729 (Autistic behavior) 23 60.53% \n", - "HP:0011446 (Abnormality of higher mental function) 81 87.10% \n", - "HP:0004322 (Short stature) 57 57.00% \n", - "HP:0010938 (Abnormal external nose morphology) 47 92.16% \n", - "HP:0007018 (Attention deficit hyperactivity dis... 19 73.08% \n", - "HP:0006482 (Abnormality of dental morphology) 86 84.31% \n", - "\n", - " \\\n", - " p-value \n", - "HP:0000325 (Triangular face) 0.091036 \n", - "HP:0000365 (Hearing impairment) 0.372195 \n", - "HP:0000356 (Abnormality of the outer ear) 0.443133 \n", - "HP:0001249 (Intellectual disability) 0.634238 \n", - "HP:0000534 (Abnormal eyebrow morphology) 0.668860 \n", - "HP:0000343 (Long philtrum) 0.679394 \n", - "HP:0012758 (Neurodevelopmental delay) 0.713501 \n", - "HP:0001155 (Abnormality of the hand) 0.786803 \n", - "HP:0000729 (Autistic behavior) 0.833053 \n", - "HP:0011446 (Abnormality of higher mental function) 0.840768 \n", - "HP:0004322 (Short stature) 0.897211 \n", - "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", - "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", - "\n", - " \n", - " Corrected p-values \n", - "HP:0000325 (Triangular face) 1.0 \n", - "HP:0000365 (Hearing impairment) 1.0 \n", - "HP:0000356 (Abnormality of the outer ear) 1.0 \n", - "HP:0001249 (Intellectual disability) 1.0 \n", - "HP:0000534 (Abnormal eyebrow morphology) 1.0 \n", - "HP:0000343 (Long philtrum) 1.0 \n", - "HP:0012758 (Neurodevelopmental delay) 1.0 \n", - "HP:0001155 (Abnormality of the hand) 1.0 \n", - "HP:0000729 (Autistic behavior) 1.0 \n", - "HP:0011446 (Abnormality of higher mental function) 1.0 \n", - "HP:0004322 (Short stature) 1.0 \n", - "HP:0010938 (Abnormal external nose morphology) 1.0 \n", - "HP:0007018 (Attention deficit hyperactivity dis... 1.0 \n", - "HP:0006482 (Abnormality of dental morphology) 1.0 " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "analysis.compare_by_protein_feature_type(FeatureType.REGION)" ] diff --git a/src/genophenocorr/model/_cohort.py b/src/genophenocorr/model/_cohort.py index 63fe5ffa..983fb505 100644 --- a/src/genophenocorr/model/_cohort.py +++ b/src/genophenocorr/model/_cohort.py @@ -89,7 +89,7 @@ def __hash__(self) -> int: class Cohort(typing.Sized): @staticmethod - def from_patients(members: typing.Sequence[Patient]): + def from_patients(members: typing.Sequence[Patient], include_patients_with_no_HPO: bool = False): """ Create a cohort from a sequence of patients. @@ -99,17 +99,21 @@ def from_patients(members: typing.Sequence[Patient]): cohort_variants, cohort_phenotypes, cohort_proteins = set(), set(), set() # , cohort_proteins var_counts, pheno_count, prot_counts = Counter(), Counter(), Counter() # , prot_counts members = set(members) + excluded_members = [] for patient in members: + if len(patient.phenotypes) == 0 and not include_patients_with_no_HPO: + excluded_members.append(patient) + continue + cohort_phenotypes.update(patient.phenotypes) cohort_variants.update(patient.variants) var_counts.update([var.variant_coordinates.variant_key for var in patient.variants]) - cohort_phenotypes.update(patient.phenotypes) pheno_count.update([pheno.identifier.value for pheno in patient.phenotypes if pheno.observed == True]) cohort_proteins.update(patient.proteins) prot_counts.update([prot.protein_id for prot in patient.proteins]) all_counts = {'patients': len(members), 'variants': var_counts, 'phenotypes': pheno_count, 'proteins': prot_counts} # 'proteins':prot_counts return Cohort(members, cohort_phenotypes, cohort_variants, cohort_proteins, - all_counts) # cohort_proteins, all_counts + all_counts, excluded_members) # cohort_proteins, all_counts """This class creates a collection of patients and makes it easier to determine overlapping diseases, phenotypes, variants, and proteins among the patients. If a list of JSON files is given, it will @@ -130,7 +134,7 @@ def from_patients(members: typing.Sequence[Patient]): list_data_by_tx(transcript:Optional[string]): A list and count of all the variants effects found for all transcripts or a given transcript if transcript is not None. """ - def __init__(self, patient_set: typing.Set[Patient], phenotype_set, variant_set, protein_set, counts_dict, + def __init__(self, patient_set: typing.Set[Patient], phenotype_set, variant_set, protein_set, counts_dict, excluded_members, recessive=False): """Constructs all necessary attributes for a Cohort object @@ -151,6 +155,7 @@ def __init__(self, patient_set: typing.Set[Patient], phenotype_set, variant_set, self._protein_set = protein_set self._variant_set = variant_set self._all_counts_dict = counts_dict + self._excluded_members = excluded_members self._recessive = recessive @property @@ -196,6 +201,10 @@ def all_transcripts(self): all_trans.update([trans.transcript_id for trans in var.tx_annotations]) return all_trans + @property + def all_excluded_patients(self): + return self._excluded_members + @property def total_patient_count(self): """ @@ -261,5 +270,11 @@ def list_data_by_tx(self, transcript=None): del var_type_dict[tx_id] return var_type_dict + def get_excluded_ids(self): + return [ex.patient_id for ex in self.all_excluded_patients] + + def get_excluded_count(self): + return len(self.all_excluded_patients) + def __len__(self) -> int: return len(self._patient_set) diff --git a/src/genophenocorr/preprocessing/_phenopacket.py b/src/genophenocorr/preprocessing/_phenopacket.py index 15e39e69..67354bd2 100644 --- a/src/genophenocorr/preprocessing/_phenopacket.py +++ b/src/genophenocorr/preprocessing/_phenopacket.py @@ -171,7 +171,7 @@ def _add_phenotypes(self, pp: Phenopacket) -> typing.Sequence[Phenotype]: for hpo_id in pp.phenotypic_features: hpo_id_list.append((hpo_id.type.id, not hpo_id.excluded)) if len(hpo_id_list) == 0: - self._logger.warning(f'Expected at least one HPO term per patient, but received none for patient {pp.id}') + #self._logger.warning(f'Expected at least one HPO term per patient, but received none for patient {pp.id}') return [] return self._phenotype_creator.create_phenotype(hpo_id_list) @@ -192,7 +192,8 @@ def _add_protein_data(self, variants: typing.Sequence[Variant]) -> typing.Collec def load_phenopacket_folder(pp_directory: str, - patient_creator: PhenopacketPatientCreator) -> Cohort: + patient_creator: PhenopacketPatientCreator, + include_patients_with_no_HPO: bool = False) -> Cohort: """ Creates a Patient object for each phenopacket formatted JSON file in the given directory `pp_directory`. @@ -214,7 +215,7 @@ def load_phenopacket_folder(pp_directory: str, patients = [patient_creator.create_patient(pp) for pp in pps] # create cohort from patients - return Cohort.from_patients(patients) + return Cohort.from_patients(patients, include_patients_with_no_HPO) def _load_phenopacket_dir(pp_dir: str) -> typing.Sequence[Phenopacket]: diff --git a/src/genophenocorr/view/_cohort.py b/src/genophenocorr/view/_cohort.py index f0db0c3e..81135ace 100644 --- a/src/genophenocorr/view/_cohort.py +++ b/src/genophenocorr/view/_cohort.py @@ -130,17 +130,20 @@ def variants_table(self, cohort, preferred_transcript, min_count=2) -> str: all_variant_counter = {x[0]:x[1] for x in all_variant_tuple_list} for variant in cohort.all_variants: var_count = all_variant_counter[variant.variant_string] - targets = [txa for txa in variant.tx_annotations if txa.transcript_id == "NM_001318852.2"] + targets = [txa for txa in variant.tx_annotations if txa.transcript_id == preferred_transcript] if len(targets) == 1: target_txa = targets[0] - hgvsc_id = target_txa.hgvsc_id + if target_txa.hgvsc_id is not None: + hgvsc_id = target_txa.hgvsc_id + else: + hgvsc_id = "NA" # split out the variant fields = hgvsc_id.split(":") if len(fields) == 2: hgvs = fields[1] else: hgvs = hgvsc_id - effect_tuple = target_txa.variant_effects + effect_tuple = [var_eff.name for var_eff in target_txa.variant_effects] variant_count_d[hgvs] = var_count variant_to_effect_d[hgvs] = effect_tuple[0] # for simplicity, just display first effect variant_to_key[hgvs] = variant.variant_string @@ -160,7 +163,7 @@ def variants_table(self, cohort, preferred_transcript, min_count=2) -> str: for var in sorted_vars: items = [] var_count = variant_count_d.get(var) - print(f"{var} - {var_count}") + #print(f"{var} - {var_count}") if var_count >= min_count: variant_key = variant_to_key.get(var) items.append(var) @@ -177,3 +180,46 @@ def variants_table(self, cohort, preferred_transcript, min_count=2) -> str: rows.append(f"{var_str}.

") rows.append("

Use the entry in the \"Key\" column to investigate whether specific variants display genotype-phenotype correlations

") return "\n".join(rows) + + + def cohort_summary_table(self, cohort, min_count=1) -> str: + """ + Generate HTML code designed to be displayed on a Jupyter notebook using ipython/display/HTML + Show the number of annotations per HPO terms. Provide an explanation. + + :param cohort: A cohort of patients to be analyzed + :type cohort: Cohort + :param min_count: Minimum number of annotations to be displayed in the table + :type min_count: int + :returns: HTML code for display + """ + if not isinstance(cohort, Cohort): + raise ValueError(f"cohort argument must be a Cohort object but we got a {type(cohort)} object") + rows = list() + rows.append(f"\n") + rows.append("") + header_items = ["Item", "Description"] + rows.append(CohortViewer.html_row(header_items)) + n_individuals = cohort.total_patient_count + n_unique_hpo = len(cohort.all_phenotypes) + n_unique_variants = len(cohort.all_variants) + n_excluded_patients = cohort.get_excluded_count() + excluded_patients = cohort.get_excluded_ids() + + cap = "Description of the cohort. " + if n_excluded_patients > 0: + cap = cap + f"{n_excluded_patients} individuals were removed from the cohort because they had no HPO terms." + + capt = f"" + rows.append(capt) + + rows.append(CohortViewer.html_row(["Total Individuals", str(n_individuals)])) + #TODO: Add Diseases + if n_excluded_patients > 0: + rows.append(CohortViewer.html_row(["Excluded Individuals", f"{str(n_excluded_patients)}: {';'.join(excluded_patients)}"])) + rows.append(CohortViewer.html_row(["Total Unique HPO Terms", str(n_unique_hpo)])) + rows.append(CohortViewer.html_row(["Total Unique Variants", str(n_unique_variants)])) + + rows.append("
{cap}
") + + return "\n".join(rows) \ No newline at end of file From b58bb3e3dec55d43b9e7b85d5d0fc1e2cd1a3ac1 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Fri, 13 Oct 2023 10:40:36 -0500 Subject: [PATCH 2/9] created function get_protein_features_affected --- docs/tutorial.rst | 4 +- ...tinez_PMID_36446582_RunGenoPhenoCorr.ipynb | 2458 +++++++++++++++-- src/genophenocorr/model/_cohort.py | 22 +- src/genophenocorr/model/_protein.py | 16 + src/genophenocorr/preprocessing/_vep.py | 2 +- src/genophenocorr/view/_cohort.py | 77 + 6 files changed, 2374 insertions(+), 205 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index eebe9707..2c32821d 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -51,7 +51,7 @@ We can then view the data using the list commands. [('NP_09876.5', 26)] >>> tx_dict = cohort.list_data_by_tx('NM_1234.5') >>> sorted(tx_dict['NM_1234.5'].items()) - [(VariantEffect.FRAMESHIFT_VARIANT, 1), (VariantEffect.MISSENSE_VARIANT, 1)] + [('FRAMESHIFT_VARIANT', 1), ('MISSENSE_VARIANT', 1)] Using the counts, we can choose and run what analyses we want. For instance, we can partition the patients into two groups based on presence/absence of a *frameshift* variant: @@ -59,7 +59,7 @@ For instance, we can partition the patients into two groups based on presence/ab .. doctest:: tutorial >>> from genophenocorr.analysis import CohortAnalysis - >>> from genophenocorr.constants import VariantEffect + >>> from genophenocorr.model import VariantEffect >>> cohort_analysis = CohortAnalysis(cohort, 'NM_1234.5', hpo, include_unmeasured=False) >>> frameshift = cohort_analysis.compare_by_variant_type(VariantEffect.FRAMESHIFT_VARIANT) >>> frameshift # doctest: +NORMALIZE_WHITESPACE diff --git a/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb b/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb index c558d963..48a4a4aa 100644 --- a/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb +++ b/notebooks/KBG/KBG_Martinez_PMID_36446582_RunGenoPhenoCorr.ipynb @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "e6c58342", + "id": "3b7a08b2", "metadata": {}, "outputs": [], "source": [ @@ -111,7 +111,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "8c40d2fb", + "id": "c4960518", "metadata": {}, "outputs": [ { @@ -154,7 +154,7 @@ "ItemDescription\n", "Description of the cohort. 11 individuals were removed from the cohort because they had no HPO terms.\n", "Total Individuals340\n", - "Excluded Individuals11: VanDongen2019_P5;Reuter2020;KBG31B;VanDongen2019_P9;VanDongen2019_P2;VanDongen2019_P4;VanDongen2019_P8;VanDongen2019_P12;VanDongen2019_P7;VanDongen2019_P13;Novara, 2017_P10\n", + "Excluded Individuals11: Reuter2020;VanDongen2019_P2;VanDongen2019_P7;VanDongen2019_P9;VanDongen2019_P12;VanDongen2019_P5;VanDongen2019_P4;Novara, 2017_P10;KBG31B;VanDongen2019_P8;VanDongen2019_P13\n", "Total Unique HPO Terms28\n", "Total Unique Variants326\n", "" @@ -254,165 +254,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[WARN] could not identify a single variant for target transcript (got 0), variant 16_87886395_88066394_DEL\n", - "c.1903_1907del - 32\n", - "c.2408_2412del - 10\n", - "c.1381_1384del - 8\n", - "c.2398_2401del - 8\n", - "c.7481_7482insC - 5\n", - "c.6792_6793insC - 5\n", - "c.2197C>T - 3\n", - "c.4406G>A - 3\n", - "c.1977C>G - 3\n", - "c.3224_3227del - 3\n", - "c.2175_2178del - 3\n", - "c.3832A>T - 3\n", - "c.2182_2183del - 3\n", - "c.7570-1G>C - 3\n", - "c.1367_1370del - 2\n", - "c.3045del - 2\n", - "c.3590_3594del - 2\n", - "c.2329_2332del - 2\n", - "c.4384_4385insA - 2\n", - "c.3309_3310insA - 2\n", - "c.5790C>A - 2\n", - "c.6513_6514insC - 2\n", - "c.1385_1388del - 2\n", - "c.7216C>T - 2\n", - "c.1318C>T - 2\n", - "c.5488G>T - 2\n", - "c.7534C>T - 2\n", - "c.3704_3707del - 2\n", - "c.4087C>T - 2\n", - "c.2512C>T - 1\n", - "c.4786G>T - 1\n", - "c.3931C>T - 1\n", - "c.548_551del - 1\n", - "NA - 1\n", - "c.3208_3209del - 1\n", - "c.2647G>T - 1\n", - "c.3974del - 1\n", - "c.7411_7422del - 1\n", - "c.1460_1463del - 1\n", - "c.2395A>T - 1\n", - "c.7552C>T - 1\n", - "c.4389_4390del - 1\n", - "c.6472G>T - 1\n", - "c.7470+2T>C - 1\n", - "c.915del - 1\n", - "c.4529_4530insC - 1\n", - "c.4103_4104del - 1\n", - "c.5889del - 1\n", - "c.5146G>T - 1\n", - "c.3888_3889insC - 1\n", - "c.7180C>T - 1\n", - "c.6184del - 1\n", - "c.3382_3383del - 1\n", - "c.4408A>T - 1\n", - "c.3771_3772insA - 1\n", - "c.4218C>A - 1\n", - "c.7000C>T - 1\n", - "c.1893_1894insA - 1\n", - "c.7471A>C - 1\n", - "c.6766C>T - 1\n", - "c.3339G>A - 1\n", - "c.3437_3461del - 1\n", - "c.5205del - 1\n", - "c.4206C>G - 1\n", - "c.2828_2829del - 1\n", - "c.3905_3906del - 1\n", - "c.1389_1390insA - 1\n", - "c.4964_4965del - 1\n", - "c.7192C>T - 1\n", - "c.3180_3181insA - 1\n", - "c.3046del - 1\n", - "c.6053_6057del - 1\n", - "c.6340C>T - 1\n", - "c.1763C>A - 1\n", - "c.3582del - 1\n", - "c.7834G>T - 1\n", - "c.5274_5275insC - 1\n", - "c.2765_2766del - 1\n", - "c.3774_3775del - 1\n", - "c.3221_3222del - 1\n", - "c.6409_6410del - 1\n", - "c.5123C>A - 1\n", - "c.6364_6367del - 1\n", - "c.831del - 1\n", - "c.3334del - 1\n", - "c.6691_6692insG - 1\n", - "c.3193A>T - 1\n", - "c.3591_3594del - 1\n", - "c.1786delinsTT - 1\n", - "c.7083del - 1\n", - "c.5957_5958del - 1\n", - "c.7535G>A - 1\n", - "c.211_226+1del - 1\n", - "c.6682del - 1\n", - "c.1285_1286del - 1\n", - "c.1940_1941delinsT - 1\n", - "c.2367del - 1\n", - "c.4374_4375del - 1\n", - "c.1731_1732insT - 1\n", - "c.7471-1G>C - 1\n", - "c.3153del - 1\n", - "c.7356_7357insC - 1\n", - "c.3460G>T - 1\n", - "c.2297_2300del - 1\n", - "c.7363del - 1\n", - "c.4498C>T - 1\n", - "c.4107_4108del - 1\n", - "c.2593_2594insT - 1\n", - "c.5712_5713insT - 1\n", - "c.1711_1723del - 1\n", - "c.2866G>T - 1\n", - "c.4558del - 1\n", - "c.2412del - 1\n", - "c.2692C>T - 1\n", - "c.6817_6833del - 1\n", - "c.5426_5430del - 1\n", - "c.867C>G - 1\n", - "c.866_867insA - 1\n", - "c.5199_5227del - 1\n", - "c.4171C>T - 1\n", - "c.505G>T - 1\n", - "c.4391_4392del - 1\n", - "c.7753C>T - 1\n", - "c.1846G>T - 1\n", - "c.5145C>G - 1\n", - "c.7189C>T - 1\n", - "c.7407C>G - 1\n", - "c.3310_3311insG - 1\n", - "c.2130del - 1\n", - "c.3295_3296del - 1\n", - "c.7570_7572del - 1\n", - "c.6071_6084del - 1\n", - "c.2650del - 1\n", - "c.3448C>T - 1\n", - "c.1173C>G - 1\n", - "c.5953_5954del - 1\n", - "c.3198_3199del - 1\n", - "c.6187G>T - 1\n", - "c.3123_3126del - 1\n", - "16_87886395_88066394_DEL - 1\n", - "c.1801C>T - 1\n", - "c.1457C>G - 1\n", - "c.6015_6016insA - 1\n", - "c.3019C>T - 1\n", - "c.520C>T - 1\n", - "c.7607G>A - 1\n", - "c.2305del - 1\n", - "c.6628G>T - 1\n", - "c.6701del - 1\n", - "c.2751_2752insT - 1\n", - "c.4528_4529del - 1\n", - "c.6968_6975del - 1\n", - "c.3770_3771del - 1\n", - "c.7416C>G - 1\n", - "c.4177_4189del - 1\n", - "c.4283_4286del - 1\n", - "c.1120G>T - 1\n", - "c.3222_3223insA - 1\n" + "[WARN] could not identify a single variant for target transcript (got 0), variant 16_87886395_88066394_DEL\n" ] }, { @@ -459,32 +301,32 @@ "c.2398_2401delFRAMESHIFT_VARIANT816_89284140_89284144_TTTTC_T\n", "c.7481_7482insCFRAMESHIFT_VARIANT516_89275180_89275181_A_AG\n", "c.6792_6793insCFRAMESHIFT_VARIANT516_89279749_89279750_C_CG\n", - "c.2197C>TSTOP_GAINED316_89284345_89284345_G_A\n", + "c.2175_2178delFRAMESHIFT_VARIANT316_89284363_89284367_CTTTG_C\n", "c.4406G>ASTOP_GAINED316_89282136_89282136_C_T\n", + "c.2197C>TSTOP_GAINED316_89284345_89284345_G_A\n", + "c.2182_2183delFRAMESHIFT_VARIANT316_89284358_89284360_GAT_G\n", + "c.7570-1G>CSPLICE_ACCEPTOR_VARIANT316_89274958_89274958_C_G\n", "c.1977C>GSTOP_GAINED316_89284565_89284565_G_C\n", "c.3224_3227delFRAMESHIFT_VARIANT316_89283314_89283318_CCTTT_C\n", - "c.2175_2178delFRAMESHIFT_VARIANT316_89284363_89284367_CTTTG_C\n", "c.3832A>TSTOP_GAINED316_89282710_89282710_T_A\n", - "c.2182_2183delFRAMESHIFT_VARIANT316_89284358_89284360_GAT_G\n", - "c.7570-1G>CSPLICE_ACCEPTOR_VARIANT316_89274958_89274958_C_G\n", "c.1367_1370delFRAMESHIFT_VARIANT216_89285171_89285175_CTTCT_C\n", - "c.3045delFRAMESHIFT_VARIANT216_89283496_89283497_CG_C\n", - "c.3590_3594delFRAMESHIFT_VARIANT216_89282947_89282952_CTTTTT_C\n", + "c.7534C>TMISSENSE_VARIANT216_89275128_89275128_G_A\n", "c.2329_2332delFRAMESHIFT_VARIANT216_89284209_89284213_TTCTC_T\n", + "c.4087C>TSTOP_GAINED216_89282455_89282455_G_A\n", + "c.3045delFRAMESHIFT_VARIANT216_89283496_89283497_CG_C\n", "c.4384_4385insAFRAMESHIFT_VARIANT216_89282157_89282158_C_CT\n", + "c.7216C>TSTOP_GAINED216_89279326_89279326_G_A\n", + "c.3704_3707delFRAMESHIFT_VARIANT216_89282834_89282838_CTGTT_C\n", + "c.3590_3594delFRAMESHIFT_VARIANT216_89282947_89282952_CTTTTT_C\n", "c.3309_3310insAFRAMESHIFT_VARIANT216_89283232_89283233_C_CT\n", - "c.5790C>ASTOP_GAINED216_89280752_89280752_G_T\n", - "c.6513_6514insCFRAMESHIFT_VARIANT216_89280028_89280029_C_CG\n", "c.1385_1388delFRAMESHIFT_VARIANT216_89285153_89285157_TTTTG_T\n", - "c.7216C>TSTOP_GAINED216_89279326_89279326_G_A\n", - "c.1318C>TSTOP_GAINED216_89285224_89285224_G_A\n", "c.5488G>TSTOP_GAINED216_89281054_89281054_C_A\n", - "c.7534C>TMISSENSE_VARIANT216_89275128_89275128_G_A\n", - "c.3704_3707delFRAMESHIFT_VARIANT216_89282834_89282838_CTGTT_C\n", - "c.4087C>TSTOP_GAINED216_89282455_89282455_G_A\n", + "c.6513_6514insCFRAMESHIFT_VARIANT216_89280028_89280029_C_CG\n", + "c.1318C>TSTOP_GAINED216_89285224_89285224_G_A\n", + "c.5790C>ASTOP_GAINED216_89280752_89280752_G_T\n", "\n", "

Additionally, the following variants were observed 1 or fewer times: \n", - "c.3019C>T; c.7083del; c.2367del; c.4206C>G; c.3123_3126del; c.2650del; c.7192C>T; c.3974del; c.1711_1723del; c.1940_1941delinsT; c.3208_3209del; c.2866G>T; c.7471-1G>C; c.5274_5275insC; c.7356_7357insC; c.2593_2594insT; c.3460G>T; c.6184del; c.3339G>A; c.3193A>T; c.2130del; c.2297_2300del; c.4558del; c.4103_4104del; c.2647G>T; c.6682del; c.3046del; c.2765_2766del; c.866_867insA; c.6053_6057del; c.7000C>T; c.6409_6410del; c.1460_1463del; c.4498C>T; c.7416C>G; c.6187G>T; c.1846G>T; c.6472G>T; c.3198_3199del; c.7470+2T>C; c.4218C>A; c.7753C>T; c.3437_3461del; c.3382_3383del; c.3310_3311insG; c.7407C>G; c.4964_4965del; c.6766C>T; c.4283_4286del; NA; c.211_226+1del; c.5145C>G; 16_87886395_88066394_DEL; c.3770_3771del; c.1173C>G; c.3180_3181insA; c.6968_6975del; c.7363del; c.2828_2829del; c.3888_3889insC; c.4389_4390del; c.1285_1286del; c.2305del; c.4374_4375del; c.867C>G; c.6340C>T; c.6015_6016insA; c.1763C>A; c.5953_5954del; c.2692C>T; c.505G>T; c.2412del; c.7471A>C; c.6071_6084del; c.5205del; c.4107_4108del; c.6691_6692insG; c.4177_4189del; c.520C>T; c.4171C>T; c.1801C>T; c.4408A>T; c.5199_5227del; c.1731_1732insT; c.3334del; c.7834G>T; c.3222_3223insA; c.7570_7572del; c.3591_3594del; c.5146G>T; c.3295_3296del; c.3221_3222del; c.6701del; c.4391_4392del; c.7607G>A; c.6817_6833del; c.6628G>T; c.1786delinsTT; c.5889del; c.4529_4530insC; c.3931C>T; c.3905_3906del; c.1389_1390insA; c.2751_2752insT; c.3774_3775del; c.7535G>A; c.5712_5713insT; c.7180C>T; c.548_551del; c.2395A>T; c.6364_6367del; c.1457C>G; c.2512C>T; c.1120G>T; c.7189C>T; c.3582del; c.3153del; c.7552C>T; c.4528_4529del; c.5123C>A; c.915del; c.5426_5430del; c.3771_3772insA; c.831del; c.4786G>T; c.5957_5958del; c.3448C>T; c.7411_7422del; c.1893_1894insA.

\n", + "c.7407C>G; c.3123_3126del; c.5426_5430del; c.5712_5713insT; c.6409_6410del; c.7570_7572del; c.867C>G; c.6184del; c.7753C>T; c.4964_4965del; c.1893_1894insA; c.3221_3222del; c.3974del; c.4206C>G; c.4408A>T; c.1457C>G; c.7000C>T; c.1173C>G; 16_87886395_88066394_DEL; c.6015_6016insA; c.6628G>T; c.3582del; c.5889del; c.4389_4390del; c.3310_3311insG; c.505G>T; c.7411_7422del; c.7192C>T; c.3334del; c.3382_3383del; c.5953_5954del; c.2412del; c.6472G>T; c.2305del; c.1801C>T; c.3905_3906del; c.2866G>T; c.7356_7357insC; c.866_867insA; c.6340C>T; c.7471A>C; c.7470+2T>C; c.5205del; c.5274_5275insC; c.6691_6692insG; c.6968_6975del; c.4529_4530insC; c.6766C>T; c.5199_5227del; c.3295_3296del; c.3222_3223insA; c.5957_5958del; c.6364_6367del; c.1786delinsTT; c.548_551del; c.3193A>T; c.7535G>A; c.1711_1723del; c.3198_3199del; c.4171C>T; c.3153del; c.1460_1463del; c.2593_2594insT; c.7552C>T; c.7363del; c.6053_6057del; c.2765_2766del; c.7189C>T; c.4103_4104del; c.7834G>T; c.2692C>T; c.3770_3771del; c.2512C>T; c.4786G>T; c.1846G>T; c.3180_3181insA; c.3774_3775del; c.3208_3209del; c.4374_4375del; c.5123C>A; c.4391_4392del; NA; c.4528_4529del; c.6187G>T; c.7416C>G; c.3591_3594del; c.4558del; c.6071_6084del; c.3019C>T; c.1389_1390insA; c.3046del; c.3339G>A; c.4107_4108del; c.4177_4189del; c.3931C>T; c.915del; c.1120G>T; c.7083del; c.4218C>A; c.2297_2300del; c.3888_3889insC; c.3460G>T; c.2647G>T; c.1285_1286del; c.3771_3772insA; c.3448C>T; c.1731_1732insT; c.1940_1941delinsT; c.831del; c.4283_4286del; c.7471-1G>C; c.3437_3461del; c.7607G>A; c.2751_2752insT; c.6682del; c.2650del; c.6817_6833del; c.2130del; c.7180C>T; c.5145C>G; c.2828_2829del; c.520C>T; c.5146G>T; c.4498C>T; c.6701del; c.211_226+1del; c.2367del; c.1763C>A; c.2395A>T.

\n", "

Use the entry in the \"Key\" column to investigate whether specific variants display genotype-phenotype correlations

" ], "text/plain": [ @@ -501,59 +343,1145 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "8136e3b2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Willemsen2010_P2',\n", + " 'Gnazzo, 2020_P2',\n", + " 'Bucerzan2020',\n", + " 'Gnazzo, 2020_P20',\n", + " 'KBG38',\n", + " 'Willemsen2010_P3',\n", + " 'Ockeloen2015_P20',\n", + " 'Parenti2021_P12',\n", + " 'Goldenberg2016_P24',\n", + " 'Parenti2021_P9',\n", + " 'Kutkowska-Kazmierczak2021_P2',\n", + " 'KBG11',\n", + " 'KBG9',\n", + " 'KBG23',\n", + " 'Low, 2016_P15 (3)',\n", + " 'Murray, 2017_P1 (1.1)',\n", + " 'Novara, 2017_P5',\n", + " 'Crippa2015_P3',\n", + " 'Gnazzo, 2020_P8',\n", + " 'Parenti2021_P8',\n", + " 'Goldenberg2016_P9',\n", + " 'Goldenberg2016_P26',\n", + " 'Kutkowska-Kazmierczak2021_P19',\n", + " 'Parenti2021_P3',\n", + " 'Parenti2021_P19',\n", + " 'KBG40',\n", + " 'Kutkowska-Kazmierczak2021_P7',\n", + " 'Jin Kim, 2020_P1',\n", + " 'Reuter2020',\n", + " 'Parenti2021_P1',\n", + " 'KBG8A',\n", + " 'Goldenberg2016_P14',\n", + " 'Murray, 2017_P6 (3.2)',\n", + " 'Murray, 2017_P4 (2.1)',\n", + " 'VanDongen2019_P3',\n", + " 'KBG62',\n", + " 'Goldenberg2016_P8',\n", + " 'Gnazzo, 2020_P25',\n", + " 'Scarano, 2013_P8',\n", + " 'KBG51',\n", + " 'Parenti2021_P23',\n", + " 'KBG10A',\n", + " 'Gnazzo, 2020_P1',\n", + " 'Ockeloen2015_P5',\n", + " 'Kutkowska-Kazmierczak2021_P18',\n", + " 'Khalifa, 2013_P1B',\n", + " 'Novara, 2017_P10',\n", + " 'Low, 2016_P22 (16)',\n", + " 'Parenti2016_P1',\n", + " 'Ockeloen2015_P8',\n", + " 'Scarano, 2013_P10',\n", + " 'KBG14',\n", + " 'Low, 2016_P12 (13)',\n", + " 'Rentas2021_P1',\n", + " 'KBG29',\n", + " 'Miyatake, 2017_P2',\n", + " 'Goldenberg2016_P19',\n", + " 'Gnazzo, 2020_P30',\n", + " 'KBG30',\n", + " 'Low, 2016_P28 (25)',\n", + " 'Novara, 2017_P8',\n", + " 'Gnazzo, 2020_P3',\n", + " 'Kleyner, 2016',\n", + " 'Parenti2021_P22',\n", + " 'VanDongen2019_P1',\n", + " 'Novara, 2017_P12',\n", + " 'Gnazzo, 2020_P21',\n", + " 'Murray, 2017_P10 (7.1.)',\n", + " 'Murray, 2017_P2 (1.2)',\n", + " 'KBG12',\n", + " 'Kutkowska-Kazmierczak2021_P1',\n", + " 'KBG28',\n", + " 'Gnazzo, 2020_P22',\n", + " 'KBG34',\n", + " 'KBG3',\n", + " 'Alves, 2019',\n", + " 'Low, 2016_P11 (20)',\n", + " 'Ockeloen2015_P16',\n", + " 'Low, 2016_P18 (12)',\n", + " 'Goldenberg2016_P36',\n", + " 'KBG31B',\n", + " 'KBG10B',\n", + " 'Ockeloen2015_P4',\n", + " 'Low, 2016_P21 (15)',\n", + " 'Ockeloen2015_P9',\n", + " 'KBG19',\n", + " 'KBG27',\n", + " 'KBG66',\n", + " 'Parenti2021_P13',\n", + " 'Kutkowska-Kazmierczak2021_P20',\n", + " 'Sayed, 2020_P2',\n", + " 'Novara, 2017_P3',\n", + " 'Murray, 2017_P13 (11.1)',\n", + " 'Gnazzo, 2020_P9',\n", + " 'Kutkowska-Kazmierczak2021_P16',\n", + " 'VanDongen2019_P2',\n", + " 'Ockeloen2015_P3',\n", + " 'Low, 2016_30 (28)',\n", + " 'Kutkowska-Kazmierczak2021_P4',\n", + " 'Gnazzo, 2020_P4',\n", + " 'Ockeloen2015_P11',\n", + " 'KBG16',\n", + " 'Sirmaci2011_P4 (previously published Brancati, 2004)',\n", + " 'Ockeloen2015_P2',\n", + " 'Crippa2015_P2',\n", + " 'KBG46',\n", + " 'VanDongen2019_P7',\n", + " 'Ockeloen2015_P15',\n", + " 'KBG21',\n", + " 'Ockeloen2015_P14',\n", + " 'Sirmaci2011_P2',\n", + " 'Kutkowska-Kazmierczak2021_P22',\n", + " 'Goldenberg2016_P29',\n", + " 'KBG59',\n", + " 'VanDongen2019_P9',\n", + " 'Novara, 2017_P11',\n", + " 'Low, 2016_P17 (10)',\n", + " 'Low, 2016_P8 (33)',\n", + " 'KBG32',\n", + " 'Low, 2016_P3 (4)',\n", + " 'KBG45',\n", + " 'Walz2015_PB',\n", + " 'KBG52',\n", + " 'Murray, 2017_P16 (13.1)',\n", + " 'Goldenberg2016_P35',\n", + " 'Miyatake, 2017_P3',\n", + " 'Murray, 2017_P12 (9.1)',\n", + " 'Low, 2016_34 (32)',\n", + " 'Low, 2016_P14 (2)',\n", + " 'Srivastava, 2017_P1',\n", + " 'Goldenberg2016_P12',\n", + " 'Kutkowska-Kazmierczak2021_P9',\n", + " 'Gnazzo, 2020_P5',\n", + " 'Goldenberg2016_P27',\n", + " 'Gnazzo, 2020_P10',\n", + " 'Low, 2016_P26 (23)',\n", + " 'Murray, 2017_P11 (8.1.)',\n", + " 'Goldenberg2016_P33',\n", + " 'Low, 2016_P10 (18)',\n", + " 'Parenti2021_P14',\n", + " 'KBG2',\n", + " 'Isrie, 2012_P2',\n", + " 'KBG58',\n", + " 'Parenti2021_P20',\n", + " 'VanDongen2019_P8',\n", + " 'Goldenberg2016_P31',\n", + " 'Gnazzo, 2020_P18',\n", + " 'Kutkowska-Kazmierczak2021_P10',\n", + " 'Scarano, 2013_P9',\n", + " 'KBG57',\n", + " 'Gnazzo, 2020_P6',\n", + " 'KBG24',\n", + " 'Gnazzo, 2020_P19',\n", + " 'Ockeloen2015_P6',\n", + " 'Kim, 2015_P2',\n", + " 'Gnazzo, 2020_P31',\n", + " 'Goldenberg2016_P38',\n", + " 'Gnazzo, 2020_P27',\n", + " 'Low, 2016_P2 (26)',\n", + " 'Low, 2016_33 (31)',\n", + " 'KBG50',\n", + " 'Sirmaci2011_P5',\n", + " 'Goldenberg2016_P34',\n", + " 'KBG42',\n", + " 'Scarano, 2013_P1',\n", + " 'Gnazzo, 2020_P12',\n", + " 'Low, 2016_P9 (1)',\n", + " 'KBG64',\n", + " 'Ockeloen2015_P13',\n", + " 'Sayed, 2020_P1',\n", + " 'Goldenberg2016_P15',\n", + " 'Goldenberg2016_P20',\n", + " 'Gnazzo, 2020_P23',\n", + " 'Gnazzo, 2020_P13',\n", + " 'Parenti2021_P16',\n", + " 'Isrie, 2012_P1',\n", + " 'Ockeloen2015_P10',\n", + " 'Novara, 2017_P7',\n", + " 'Kutkowska-Kazmierczak2021_P14',\n", + " 'KBG47',\n", + " 'KBG20',\n", + " 'Goldenberg2016_P4',\n", + " 'KBG13',\n", + " 'Goldenberg2016_P18',\n", + " 'KBG4',\n", + " 'KBG18',\n", + " 'Parenti2016_P2',\n", + " 'Kutkowska-Kazmierczak2021_P12',\n", + " 'Gnazzo, 2020_P17',\n", + " 'Miyatake, 2017_P1',\n", + " 'Scarano, 2013_P11',\n", + " 'Parenti2021_P4',\n", + " 'Parenti2021_P15',\n", + " 'Jin Kim, 2020_P2',\n", + " 'Sirmaci2011_P1/F1? (previously published Tekin, 2004)',\n", + " 'KBG17',\n", + " 'Gnazzo, 2020_P26',\n", + " 'KBG5',\n", + " 'Murray, 2017_P9 (5.1.)',\n", + " 'Parenti2021_P5',\n", + " 'KBG15',\n", + " 'Kutkowska-Kazmierczak2021_P11',\n", + " 'VanDongen2019_P12',\n", + " 'Goldenberg2016_P2',\n", + " 'Palumbo 2016',\n", + " 'Sacharow, 2012_P2',\n", + " 'Low, 2016_P5 (6)',\n", + " 'Gnazzo, 2020_P28',\n", + " 'KBG63',\n", + " 'Scarano, 2013_P3',\n", + " 'VanDongen2019_P6',\n", + " 'KBG26',\n", + " 'Scarano, 2013_P6',\n", + " 'Gnazzo, 2020_P11',\n", + " 'KBG39',\n", + " 'Goldenberg2016_P28',\n", + " 'Walz2015_PD',\n", + " 'VanDongen2019_P5',\n", + " 'VanDongen2019_P13',\n", + " 'Behnert, 2018',\n", + " 'Scarano, 2013_P4',\n", + " 'Low, 2016_P24 (21)',\n", + " 'Goldenberg2016_P11',\n", + " 'KBG31A',\n", + " 'Novara, 2017_P4',\n", + " 'Kutkowska-Kazmierczak2021_P3',\n", + " 'Ockeloen2015_P7',\n", + " 'Lim2014',\n", + " 'KBG48',\n", + " 'Low, 2016_P27 (24)',\n", + " 'Ockeloen2015_P1',\n", + " 'Scarano, 2013_P7',\n", + " 'Low, 2016_P25 (22)',\n", + " 'KBG33',\n", + " 'Novara, 2017_P2',\n", + " 'Kutkowska-Kazmierczak2021_P15',\n", + " 'Murray, 2017_P7 (3.3)',\n", + " 'Scarano, 2013_P5',\n", + " 'Libianto2019',\n", + " 'KBG1',\n", + " 'KBG25',\n", + " 'Kutkowska-Kazmierczak2021_P13',\n", + " 'KBG55',\n", + " 'Low, 2016_31 (29)',\n", + " 'Walz2015_PE',\n", + " 'Youngs2011',\n", + " 'KBG36',\n", + " 'Low, 2016_P16 (9)',\n", + " 'VanDongen2019_P10',\n", + " 'Kutkowska-Kazmierczak2021_P8',\n", + " 'Crippa2015_P1',\n", + " 'Scarano, 2013_P2',\n", + " 'KBG56',\n", + " 'Low, 2016_P6 (7)',\n", + " 'Goldenberg2016_P23',\n", + " 'Murray, 2017_P3 (1.3)',\n", + " 'Parenti2021_P2',\n", + " 'Goldenberg2016_P6',\n", + " 'KBG7',\n", + " 'Goldenberg2016_P39',\n", + " 'Miyatake, 2013',\n", + " 'Murray, 2017_P8 (4.1)',\n", + " 'KBG53',\n", + " 'Kutkowska-Kazmierczak2021_P23',\n", + " 'Mattei2021',\n", + " 'Goldenberg2016_P21',\n", + " 'Parenti2021_P17',\n", + " 'Kim, 2015_P1',\n", + " 'Bianchi, 2018',\n", + " 'Low, 2016_P1 (19)',\n", + " 'Sirmaci2011_P3',\n", + " 'Low, 2016_P20 (14)',\n", + " 'Cucco, 2020 (Patient B)',\n", + " 'Goldenberg2016_P16',\n", + " 'Gnazzo, 2020_P14',\n", + " 'Sirmaci2011_P2/F1? (previously published Tekin, 2004)',\n", + " 'Spengler, 2013',\n", + " 'KBG6',\n", + " 'Goldenberg2016_P32',\n", + " 'Low, 2016_P4 (5)',\n", + " 'Parenti2021_P21',\n", + " 'KBG22',\n", + " 'Khalifa, 2013_P1A',\n", + " 'KBG44',\n", + " 'Gnazzo, 2020_P24',\n", + " 'Gnazzo, 2020_P29',\n", + " 'Novara, 2017_P9',\n", + " 'Gnazzo, 2020_P7',\n", + " 'Gnazzo, 2020_P16',\n", + " 'Parenti2021_P11',\n", + " 'Goldenberg2016_P22',\n", + " 'KBG37',\n", + " 'Parenti2021_P10',\n", + " 'Parenti2021_P6',\n", + " 'Kim, 2015_P3',\n", + " 'Sacharow, 2012_P1',\n", + " 'Parenti2021_P7',\n", + " 'Ockeloen2015_P12',\n", + " 'Goldenberg2016_P30',\n", + " 'VanDongen2019_P4',\n", + " 'Low, 2016_P29 (27)',\n", + " 'Parenti2021_P18',\n", + " 'KBG35',\n", + " 'Ockeloen2015_P17',\n", + " 'Low, 2016_P13 (27)',\n", + " 'KBG43',\n", + " 'Goldenberg2016_P17',\n", + " 'Kutkowska-Kazmierczak2021_P17',\n", + " 'Willemsen2010_P1',\n", + " 'Gnazzo, 2020_P15',\n", + " 'Novara, 2017_P1',\n", + " 'Walz2015_PA',\n", + " 'KBG8B',\n", + " 'Goldenberg2016_P1',\n", + " 'Scarano, 2013_P12',\n", + " 'Kutkowska-Kazmierczak2021_P5',\n", + " 'DeBernardi2018',\n", + " 'Ockeloen2015_P19',\n", + " 'Ockeloen2015_P18',\n", + " 'Walz2015_Pf',\n", + " 'Goldenberg2016_P7',\n", + " 'Low, 2016_32 (30)',\n", + " 'Goldenberg2016_P25',\n", + " 'KBG49',\n", + " 'Murray, 2017_P5 (3.1)',\n", + " 'Low2017',\n", + " 'KBG54',\n", + " 'Low, 2016_P23 (17)',\n", + " 'Sirmaci2011_P3/F1? (previously published Tekin, 2004)',\n", + " 'KBG41',\n", + " 'Walz2015_PC',\n", + " 'Kutkowska-Kazmierczak2021_P6',\n", + " 'KBG65',\n", + " 'Goldenberg2016_P3',\n", + " 'Goldenberg2016_P5',\n", + " 'Low, 2016_P7 (8)',\n", + " 'Willemsen2010_P4',\n", + " 'Goldenberg2016_P10',\n", + " 'Kutkowska-Kazmierczak2021_P21',\n", + " 'Goldenberg2016_P13']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "patientCohort.list_all_patients()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "d3481476", "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('NP_037407.4', 325),\n", + " ('NP_001243111.1', 325),\n", + " ('NP_001243112.1', 325),\n", + " ('NP_872337.2', 45),\n", + " ('NP_004924.1', 37),\n", + " ('NP_777577.2', 29),\n", + " ('NP_001120686.1', 29),\n", + " ('NP_001230208.1', 29),\n", + " ('NP_112190.2', 25),\n", + " ('NP_005178.4', 25),\n", + " ('NP_000476.1', 25),\n", + " ('NP_001305458.1', 25),\n", + " ('NP_000503.1', 25),\n", + " ('NP_001305453.1', 25),\n", + " ('NP_001305461.1', 25),\n", + " ('NP_001136336.2', 25),\n", + " ('NP_001305455.1', 25),\n", + " ('NP_001073956.2', 25),\n", + " ('NP_001305456.1', 25),\n", + " ('NP_001281257.1', 25),\n", + " ('NP_001305459.1', 25),\n", + " ('NP_787127.1', 25),\n", + " ('NP_057293.1', 25),\n", + " ('NP_001305457.1', 25),\n", + " ('NP_001305454.1', 25),\n", + " ('NP_001025189.1', 25),\n", + " ('NP_001305436.1', 22),\n", + " ('NP_001165286.1', 22),\n", + " ('NP_001012780.1', 22),\n", + " ('NP_001305442.1', 22),\n", + " ('NP_001012777.1', 22),\n", + " ('NP_849163.1', 22),\n", + " ('NP_001165287.1', 22),\n", + " ('NP_840101.1', 21),\n", + " ('NP_000092.2', 20),\n", + " ('NP_001281269.1', 20),\n", + " ('NP_653205.3', 20),\n", + " ('NP_037410.1', 20),\n", + " ('NP_955399.1', 17),\n", + " ('NP_003110.1', 17),\n", + " ('NP_722520.2', 15),\n", + " ('NP_001167011.1', 10),\n", + " ('NP_524576.2', 10),\n", + " ('NP_001167012.1', 10),\n", + " ('NP_001167014.1', 10),\n", + " ('NP_060339.2', 10),\n", + " ('NP_001167010.1', 10),\n", + " ('NP_001730.1', 10),\n", + " ('NP_001167013.1', 10),\n", + " ('NP_001171785.1', 7),\n", + " ('NP_060036.2', 7),\n", + " ('NP_003477.4', 7),\n", + " ('NP_001171783.1', 7),\n", + " ('NP_065706.2', 6),\n", + " ('NP_000968.2', 4),\n", + " ('NP_001230060.1', 4),\n", + " ('NP_150254.1', 4),\n", + " ('NP_705900.1', 3),\n", + " ('NP_055242.1', 3),\n", + " ('NP_001182054.1', 3),\n", + " ('NP_079011.3', 3),\n", + " ('NP_073729.1', 3),\n", + " ('NP_001269612.1', 3),\n", + " ('NP_001121613.1', 1),\n", + " ('NP_004404.1', 1)]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "patientCohort.list_all_proteins()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "74ba64ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'NM_013275.6': Counter({'TRANSCRIPT_ABLATION': 14,\n", + " 'FRAMESHIFT_VARIANT': 171,\n", + " 'STOP_GAINED': 65,\n", + " 'FEATURE_ELONGATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 50,\n", + " 'FIVE_PRIME_UTR_VARIANT': 44,\n", + " 'INTRON_VARIANT': 60,\n", + " 'STOP_LOST': 33,\n", + " 'FEATURE_TRUNCATION': 55,\n", + " 'THREE_PRIME_UTR_VARIANT': 34,\n", + " 'MISSENSE_VARIANT': 6,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 4,\n", + " 'SPLICE_DONOR_VARIANT': 2,\n", + " 'DOWNSTREAM_GENE_VARIANT': 1,\n", + " 'SPLICE_REGION_VARIANT': 2,\n", + " 'INFRAME_DELETION': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1})}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "patientCohort.list_data_by_tx('NM_013275.6')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "9268c27b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'NM_178841.4': Counter({'TRANSCRIPT_ABLATION': 20,\n", + " 'STOP_LOST': 1,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384764.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001351937.2': Counter({'TRANSCRIPT_ABLATION': 5,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384939.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318529.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001351938.2': Counter({'TRANSCRIPT_ABLATION': 5,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_000101.4': Counter({'TRANSCRIPT_ABLATION': 19,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_016209.5': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001386992.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001080487.4': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384772.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001384765.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001271604.4': Counter({'TRANSCRIPT_ABLATION': 4,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384938.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001271605.3': Counter({'TRANSCRIPT_ABLATION': 4,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001173542.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384926.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_004413.4': Counter({'FIVE_PRIME_UTR_VARIANT': 1, 'INTRON_VARIANT': 1}),\n", + " 'NM_013275.6': Counter({'TRANSCRIPT_ABLATION': 14,\n", + " 'FRAMESHIFT_VARIANT': 171,\n", + " 'STOP_GAINED': 65,\n", + " 'FEATURE_ELONGATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 50,\n", + " 'FIVE_PRIME_UTR_VARIANT': 44,\n", + " 'INTRON_VARIANT': 60,\n", + " 'STOP_LOST': 33,\n", + " 'FEATURE_TRUNCATION': 55,\n", + " 'THREE_PRIME_UTR_VARIANT': 34,\n", + " 'MISSENSE_VARIANT': 6,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 4,\n", + " 'SPLICE_DONOR_VARIANT': 2,\n", + " 'DOWNSTREAM_GENE_VARIANT': 1,\n", + " 'SPLICE_REGION_VARIANT': 2,\n", + " 'INFRAME_DELETION': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_003486.7': Counter({'TRANSCRIPT_ABLATION': 6,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001256182.2': Counter({'TRANSCRIPT_ABLATION': 14,\n", + " 'FRAMESHIFT_VARIANT': 171,\n", + " 'STOP_GAINED': 65,\n", + " 'FEATURE_ELONGATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 50,\n", + " 'FIVE_PRIME_UTR_VARIANT': 44,\n", + " 'INTRON_VARIANT': 60,\n", + " 'STOP_LOST': 33,\n", + " 'FEATURE_TRUNCATION': 55,\n", + " 'THREE_PRIME_UTR_VARIANT': 34,\n", + " 'MISSENSE_VARIANT': 6,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 4,\n", + " 'SPLICE_DONOR_VARIANT': 2,\n", + " 'DOWNSTREAM_GENE_VARIANT': 1,\n", + " 'SPLICE_REGION_VARIANT': 2,\n", + " 'INFRAME_DELETION': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_174917.5': Counter({'TRANSCRIPT_ABLATION': 25,\n", + " 'STOP_LOST': 3,\n", + " 'FEATURE_TRUNCATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 3,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 3,\n", + " 'INTRON_VARIANT': 3,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_000977.4': Counter({'TRANSCRIPT_ABLATION': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1}),\n", + " 'NM_001173543.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384920.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001012759.3': Counter({'TRANSCRIPT_ABLATION': 21,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001128141.3': Counter({'FIVE_PRIME_UTR_VARIANT': 1, 'INTRON_VARIANT': 1}),\n", + " 'NM_001384775.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001318524.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384766.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001384928.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001284316.2': Counter({'TRANSCRIPT_ABLATION': 25,\n", + " 'STOP_LOST': 3,\n", + " 'FEATURE_TRUNCATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 3,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 3,\n", + " 'INTRON_VARIANT': 3,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384921.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318528.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_014427.5': Counter({'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 2,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'INTRON_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384763.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_017869.4': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384767.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001201407.2': Counter({'TRANSCRIPT_ABLATION': 41,\n", + " 'FEATURE_TRUNCATION': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001386991.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001012762.3': Counter({'TRANSCRIPT_ABLATION': 21,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001367624.2': Counter({'TRANSCRIPT_ABLATION': 13,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384936.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001366322.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_000512.5': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_004933.3': Counter({'TRANSCRIPT_ABLATION': 29,\n", + " 'STOP_LOST': 6,\n", + " 'FEATURE_TRUNCATION': 6,\n", + " 'CODING_SEQUENCE_VARIANT': 6,\n", + " 'THREE_PRIME_UTR_VARIANT': 7,\n", + " 'INTRON_VARIANT': 5,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001294328.4': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_030928.4': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384918.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_199367.3': Counter({'FEATURE_TRUNCATION': 9,\n", + " 'CODING_SEQUENCE_VARIANT': 9,\n", + " 'FIVE_PRIME_UTR_VARIANT': 9,\n", + " 'INTRON_VARIANT': 9,\n", + " 'TRANSCRIPT_ABLATION': 5,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'STOP_LOST': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_153636.3': Counter({'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 2,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'INTRON_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001294340.2': Counter({'TRANSCRIPT_ABLATION': 17,\n", + " 'STOP_LOST': 2,\n", + " 'FEATURE_TRUNCATION': 2,\n", + " 'CODING_SEQUENCE_VARIANT': 2,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 2,\n", + " 'INTRON_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001243131.1': Counter({'TRANSCRIPT_ABLATION': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1}),\n", + " 'NM_001318507.2': Counter({'TRANSCRIPT_ABLATION': 21,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384940.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384941.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001173541.2': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318532.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001195125.3': Counter({'TRANSCRIPT_ABLATION': 2,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1}),\n", + " 'NM_001384768.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001384771.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_144604.4': Counter({'TRANSCRIPT_ABLATION': 17,\n", + " 'STOP_LOST': 2,\n", + " 'FEATURE_TRUNCATION': 2,\n", + " 'CODING_SEQUENCE_VARIANT': 2,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 2,\n", + " 'INTRON_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_178310.4': Counter({'TRANSCRIPT_ABLATION': 19,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1,\n", + " 'UPSTREAM_GENE_VARIANT': 1}),\n", + " 'NM_033251.2': Counter({'TRANSCRIPT_ABLATION': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1}),\n", + " 'NM_001323544.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001195124.3': Counter({'TRANSCRIPT_ABLATION': 2,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1}),\n", + " 'NM_001127214.4': Counter({'TRANSCRIPT_ABLATION': 25,\n", + " 'STOP_LOST': 3,\n", + " 'FEATURE_TRUNCATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 3,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 3,\n", + " 'INTRON_VARIANT': 3,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_175931.3': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384927.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001171815.2': Counter({'TRANSCRIPT_ABLATION': 20,\n", + " 'STOP_LOST': 1,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_079837.3': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001256183.2': Counter({'TRANSCRIPT_ABLATION': 14,\n", + " 'FRAMESHIFT_VARIANT': 171,\n", + " 'STOP_GAINED': 65,\n", + " 'FEATURE_ELONGATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 50,\n", + " 'FIVE_PRIME_UTR_VARIANT': 44,\n", + " 'INTRON_VARIANT': 60,\n", + " 'STOP_LOST': 33,\n", + " 'FEATURE_TRUNCATION': 55,\n", + " 'THREE_PRIME_UTR_VARIANT': 34,\n", + " 'MISSENSE_VARIANT': 6,\n", + " 'SPLICE_ACCEPTOR_VARIANT': 4,\n", + " 'SPLICE_DONOR_VARIANT': 2,\n", + " 'DOWNSTREAM_GENE_VARIANT': 1,\n", + " 'SPLICE_REGION_VARIANT': 2,\n", + " 'INFRAME_DELETION': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001243279.3': Counter({'TRANSCRIPT_ABLATION': 25,\n", + " 'STOP_LOST': 3,\n", + " 'FEATURE_TRUNCATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 3,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 3,\n", + " 'INTRON_VARIANT': 3,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001389470.1': Counter({'FIVE_PRIME_UTR_VARIANT': 1, 'INTRON_VARIANT': 1}),\n", + " 'NM_001384931.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384935.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_003119.4': Counter({'FEATURE_TRUNCATION': 12,\n", + " 'CODING_SEQUENCE_VARIANT': 12,\n", + " 'FIVE_PRIME_UTR_VARIANT': 12,\n", + " 'INTRON_VARIANT': 12,\n", + " 'TRANSCRIPT_ABLATION': 2,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_017566.4': Counter({'TRANSCRIPT_ABLATION': 5,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384916.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_020655.4': Counter({'TRANSCRIPT_ABLATION': 4,\n", + " 'STOP_LOST': 1,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_182531.5': Counter({'TRANSCRIPT_ABLATION': 41,\n", + " 'FEATURE_TRUNCATION': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001173539.2': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384942.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384770.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001363850.1': Counter({'FEATURE_TRUNCATION': 12,\n", + " 'CODING_SEQUENCE_VARIANT': 12,\n", + " 'FIVE_PRIME_UTR_VARIANT': 12,\n", + " 'INTRON_VARIANT': 12,\n", + " 'TRANSCRIPT_ABLATION': 2,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001389466.1': Counter({'FIVE_PRIME_UTR_VARIANT': 1, 'INTRON_VARIANT': 1}),\n", + " 'NM_001384929.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384937.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001385709.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_153813.3': Counter({'STOP_LOST': 1,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_ABLATION': 13,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_005187.6': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318513.2': Counter({'TRANSCRIPT_ABLATION': 21,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384769.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001367225.1': Counter({'FEATURE_TRUNCATION': 3,\n", + " 'CODING_SEQUENCE_VARIANT': 3,\n", + " 'FIVE_PRIME_UTR_VARIANT': 3,\n", + " 'INTRON_VARIANT': 3,\n", + " 'TRANSCRIPT_ABLATION': 6,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_015144.3': Counter({'TRANSCRIPT_ABLATION': 3,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 2,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'INTRON_VARIANT': 2}),\n", + " 'NM_001384919.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001389467.1': Counter({'FIVE_PRIME_UTR_VARIANT': 1, 'INTRON_VARIANT': 1}),\n", + " 'NM_001384944.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001378881.1': Counter({'TRANSCRIPT_ABLATION': 41,\n", + " 'FEATURE_TRUNCATION': 2,\n", + " 'THREE_PRIME_UTR_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001171816.2': Counter({'TRANSCRIPT_ABLATION': 20,\n", + " 'STOP_LOST': 1,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'THREE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001323543.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318530.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_002461.3': Counter({'TRANSCRIPT_ABLATION': 19,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318527.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318525.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001142864.4': Counter({'TRANSCRIPT_ABLATION': 21,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'UPSTREAM_GENE_VARIANT': 2,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384922.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384925.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001184856.2': Counter({'TRANSCRIPT_ABLATION': 5,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001256917.2': Counter({'TRANSCRIPT_ABLATION': 2,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1}),\n", + " 'NM_001384943.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_013278.4': Counter({'TRANSCRIPT_ABLATION': 19,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001739.2': Counter({'FEATURE_TRUNCATION': 2,\n", + " 'CODING_SEQUENCE_VARIANT': 2,\n", + " 'FIVE_PRIME_UTR_VARIANT': 2,\n", + " 'INTRON_VARIANT': 2,\n", + " 'TRANSCRIPT_ABLATION': 7,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384923.1': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001384773.1': Counter({'TRANSCRIPT_ABLATION': 35,\n", + " 'TRANSCRIPT_AMPLIFICATION': 2}),\n", + " 'NM_001173540.2': Counter({'TRANSCRIPT_ABLATION': 8,\n", + " 'FEATURE_TRUNCATION': 1,\n", + " 'CODING_SEQUENCE_VARIANT': 1,\n", + " 'FIVE_PRIME_UTR_VARIANT': 1,\n", + " 'INTRON_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001030018.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001184854.2': Counter({'TRANSCRIPT_ABLATION': 5,\n", + " 'UPSTREAM_GENE_VARIANT': 1,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_001318526.2': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1}),\n", + " 'NM_000485.3': Counter({'TRANSCRIPT_ABLATION': 24,\n", + " 'TRANSCRIPT_AMPLIFICATION': 1})}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "patientCohort.list_data_by_tx()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "63705c17", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{ProteinMetadata(id=NP_000092.2, label=Cytochrome b-245 light chain, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=134, end=195)),)),\n", + " ProteinMetadata(id=NP_000476.1, label=Adenine phosphoribosyltransferase, features=()),\n", + " ProteinMetadata(id=NP_000503.1, label=N-acetylgalactosamine-6-sulfatase, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Catalytic domain, start=27, end=379)),)),\n", + " ProteinMetadata(id=NP_000968.2, label=Large ribosomal subunit protein eL13, features=()),\n", + " ProteinMetadata(id=NP_001012777.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", + " ProteinMetadata(id=NP_001012780.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", + " ProteinMetadata(id=NP_001025189.1, label=Adenine phosphoribosyltransferase, features=()),\n", + " ProteinMetadata(id=NP_001073956.2, label=Embryonic polyadenylate-binding protein 2, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=RRM, start=147, end=224)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=21, end=66)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=101, end=128)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=227, end=278)))),\n", + " ProteinMetadata(id=NP_001120686.1, label=Malonate--CoA ligase ACSF3, mitochondrial, features=()),\n", + " ProteinMetadata(id=NP_001121613.1, label=Dipeptidase 1, features=()),\n", + " ProteinMetadata(id=NP_001136336.2, label=Piezo-type mechanosensitive ion channel component 1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=738, end=769)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1356, end=1402)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1462, end=1498)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1576, end=1630)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1811, end=1921)))),\n", + " ProteinMetadata(id=NP_001165286.1, label=E3 ubiquitin-protein ligase RNF166, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=UIM, start=221, end=237)),)),\n", + " ProteinMetadata(id=NP_001165287.1, label=E3 ubiquitin-protein ligase RNF166, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=UIM, start=221, end=237)),)),\n", + " ProteinMetadata(id=NP_001167010.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_001167011.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_001167012.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_001167013.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_001167014.1, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_001171783.1, label=Kelch domain-containing protein 4, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 1, start=77, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 2, start=133, end=187)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 3, start=188, end=241)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 4, start=243, end=289)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 5, start=308, end=361)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 6, start=443, end=494)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=33)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=346, end=379)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=402, end=432)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=481, end=520)))),\n", + " ProteinMetadata(id=NP_001171785.1, label=Kelch domain-containing protein 4, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 1, start=77, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 2, start=133, end=187)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 3, start=188, end=241)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 4, start=243, end=289)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 5, start=308, end=361)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 6, start=443, end=494)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=33)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=346, end=379)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=402, end=432)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=481, end=520)))),\n", + " ProteinMetadata(id=NP_001182054.1, label=Uncharacterized protein C16orf95, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=26)),)),\n", + " ProteinMetadata(id=NP_001230060.1, label=Large ribosomal subunit protein eL13, features=()),\n", + " ProteinMetadata(id=NP_001230208.1, label=Malonate--CoA ligase ACSF3, mitochondrial, features=()),\n", + " ProteinMetadata(id=NP_001243111.1, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),\n", + " ProteinMetadata(id=NP_001243112.1, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),\n", + " ProteinMetadata(id=NP_001269612.1, label=F-box only protein 31, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=F-box, start=64, end=110)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=11, end=53)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=377, end=446)))),\n", + " ProteinMetadata(id=NP_001281257.1, label=Embryonic polyadenylate-binding protein 2, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=RRM, start=147, end=224)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=21, end=66)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=101, end=128)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=227, end=278)))),\n", + " ProteinMetadata(id=NP_001281269.1, label=Zinc finger CCCH domain-containing protein 18, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=222)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=391, end=928)))),\n", + " ProteinMetadata(id=NP_001305436.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", + " ProteinMetadata(id=NP_001305442.1, label=Cytoplasmic tRNA 2-thiolation protein 2, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=24)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=188, end=217)))),\n", + " ProteinMetadata(id=NP_001305453.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305454.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305455.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305456.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305457.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305458.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305459.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001305461.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_001730.1, label=Carbonic anhydrase 5A, mitochondrial, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Alpha-carbonic anhydrase, start=39, end=296)),)),\n", + " ProteinMetadata(id=NP_003110.1, label=Paraplegin, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=108, end=133)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with PPIF, start=701, end=795)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=751, end=795)))),\n", + " ProteinMetadata(id=NP_003477.4, label=Large neutral amino acids transporter small subunit 1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=40)),)),\n", + " ProteinMetadata(id=NP_004404.1, label=Dipeptidase 1, features=()),\n", + " ProteinMetadata(id=NP_004924.1, label=Cadherin-15, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 1, start=61, end=152)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 2, start=153, end=260)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 3, start=261, end=375)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 4, start=376, end=481)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=Cadherin 5, start=482, end=590)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=636, end=663)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=676, end=703)))),\n", + " ProteinMetadata(id=NP_005178.4, label=Protein CBFA2T3, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=TAFH, start=171, end=266)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates localization to the nucleus, start=1, end=435)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PDE7A (in isoform 2), start=1, end=430)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Required for nucleolar targeting (in isoform 1), start=1, end=127)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=109)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with ZBTB33, start=145, end=242)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with HIF1A, start=176, end=268)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=284, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 2 (NHR2); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=394, end=412)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=434, end=472)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 3 (NHR3); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=485, end=533)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PRKAR2A, start=485, end=506)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=603, end=653)))),\n", + " ProteinMetadata(id=NP_037407.4, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),\n", + " ProteinMetadata(id=NP_037410.1, label=Interleukin-17C, features=()),\n", + " ProteinMetadata(id=NP_055242.1, label=Copine-7, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 1, start=2, end=133)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 2, start=212, end=339)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=VWFA, start=382, end=581)))),\n", + " ProteinMetadata(id=NP_057293.1, label=Trafficking protein particle complex subunit 2-like protein, features=()),\n", + " ProteinMetadata(id=NP_060036.2, label=Kelch domain-containing protein 4, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 1, start=77, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 2, start=133, end=187)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 3, start=188, end=241)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 4, start=243, end=289)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 5, start=308, end=361)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=Kelch 6, start=443, end=494)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=33)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=346, end=379)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=402, end=432)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=481, end=520)))),\n", + " ProteinMetadata(id=NP_060339.2, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_065706.2, label=Junctophilin-3, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 1, start=15, end=37)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 2, start=39, end=60)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 3, start=61, end=82)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 4, start=83, end=105)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 5, start=107, end=129)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 6, start=130, end=152)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 7, start=288, end=310)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=MORN 8, start=311, end=333)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=230, end=259)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=416, end=496)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=526, end=597)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=624, end=649)))),\n", + " ProteinMetadata(id=NP_073729.1, label=Microtubule-associated proteins 1A/1B light chain 3B, features=()),\n", + " ProteinMetadata(id=NP_079011.3, label=F-box only protein 31, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=F-box, start=64, end=110)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=11, end=53)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=377, end=446)))),\n", + " ProteinMetadata(id=NP_112190.2, label=DNA replication factor Cdt1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=118)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=143, end=165)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with GMNN, start=150, end=190)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=383, end=415)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with LRWD1, start=451, end=546)), SimpleProteinFeature(type=FeatureType.MOTIF, info=FeatureInfo(name=PIP-box K+4 motif, start=1, end=23)), SimpleProteinFeature(type=FeatureType.MOTIF, info=FeatureInfo(name=Cyclin-binding motif, start=68, end=70)))),\n", + " ProteinMetadata(id=NP_150254.1, label=Large ribosomal subunit protein eL13, features=()),\n", + " ProteinMetadata(id=NP_524576.2, label=Protein BANP, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=BEN, start=226, end=322)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CUX1 and HDAC1, start=152, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=168, end=196)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=327, end=364)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=DNA-binding, start=342, end=393)))),\n", + " ProteinMetadata(id=NP_653205.3, label=Zinc finger CCCH domain-containing protein 18, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=222)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=391, end=928)))),\n", + " ProteinMetadata(id=NP_705900.1, label=Copine-7, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 1, start=2, end=133)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=C2 2, start=212, end=339)), SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=VWFA, start=382, end=581)))),\n", + " ProteinMetadata(id=NP_722520.2, label=Zinc finger protein ZFPM1, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=93)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=114, end=133)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with TACC3, start=330, end=341)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=384, end=409)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=438, end=460)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=473, end=515)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=605, end=681)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=708, end=810)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with CTBP2, start=794, end=800)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=889, end=971)))),\n", + " ProteinMetadata(id=NP_777577.2, label=Malonate--CoA ligase ACSF3, mitochondrial, features=()),\n", + " ProteinMetadata(id=NP_787127.1, label=Protein CBFA2T3, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=TAFH, start=171, end=266)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates localization to the nucleus, start=1, end=435)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PDE7A (in isoform 2), start=1, end=430)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Required for nucleolar targeting (in isoform 1), start=1, end=127)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=109)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with ZBTB33, start=145, end=242)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with HIF1A, start=176, end=268)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=284, end=342)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 2 (NHR2); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=394, end=412)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=434, end=472)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Nervy homology region 3 (NHR3); essential for down-regulation of PFKFB3, PFKFB4 and PDK1 expression, start=485, end=533)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Mediates interaction with PRKAR2A, start=485, end=506)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=603, end=653)))),\n", + " ProteinMetadata(id=NP_840101.1, label=Zinc finger protein SNAI3, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=SNAG domain, start=1, end=20)),)),\n", + " ProteinMetadata(id=NP_849163.1, label=E3 ubiquitin-protein ligase RNF166, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=UIM, start=221, end=237)),)),\n", + " ProteinMetadata(id=NP_872337.2, label=Zinc finger protein 778, features=(SimpleProteinFeature(type=FeatureType.DOMAIN, info=FeatureInfo(name=KRAB, start=42, end=110)),)),\n", + " ProteinMetadata(id=NP_955399.1, label=Paraplegin, features=(SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=108, end=133)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Interaction with PPIF, start=701, end=795)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=751, end=795))))}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "patientCohort.all_proteins" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "ec6c204f", "metadata": {}, "outputs": [], @@ -563,7 +1491,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "df922d31", "metadata": {}, "outputs": [], @@ -573,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "91809a38", "metadata": {}, "outputs": [], @@ -583,42 +1511,810 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "7a83a5ca", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'SO:0001589'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "VariantEffect.FRAMESHIFT_VARIANT.value" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "57dd0e23", "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
With frameshift_variantWithout frameshift_variant
CountPercentCountPercentp-valueCorrected p-values
HP:0011446 (Abnormality of higher mental function)11482.61%10694.64%0.0033470.046856
HP:0001249 (Intellectual disability)10071.43%9486.24%0.0055750.078048
HP:0007018 (Attention deficit hyperactivity disorder)3581.40%2666.67%0.1390301.000000
HP:0000325 (Triangular face)4571.43%3858.46%0.1415181.000000
HP:0001155 (Abnormality of the hand)10067.11%8972.36%0.3583601.000000
HP:0012758 (Neurodevelopmental delay)8694.51%9096.77%0.4944871.000000
HP:0006482 (Abnormality of dental morphology)12485.52%10081.97%0.5045341.000000
HP:0000365 (Hearing impairment)5280.00%4576.27%0.6668751.000000
HP:0010938 (Abnormal external nose morphology)7189.87%6392.65%0.7720391.000000
HP:0000729 (Autistic behavior)2756.25%2960.42%0.8361561.000000
HP:0000534 (Abnormal eyebrow morphology)7082.35%5680.00%0.8364371.000000
HP:0000343 (Long philtrum)6678.57%5580.88%0.8401971.000000
HP:0004322 (Short stature)7658.46%7457.36%0.9001401.000000
HP:0000356 (Abnormality of the outer ear)3778.72%4080.00%1.0000001.000000
\n", + "
" + ], + "text/plain": [ + " With frameshift_variant \\\n", + " Count \n", + "HP:0011446 (Abnormality of higher mental function) 114 \n", + "HP:0001249 (Intellectual disability) 100 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 35 \n", + "HP:0000325 (Triangular face) 45 \n", + "HP:0001155 (Abnormality of the hand) 100 \n", + "HP:0012758 (Neurodevelopmental delay) 86 \n", + "HP:0006482 (Abnormality of dental morphology) 124 \n", + "HP:0000365 (Hearing impairment) 52 \n", + "HP:0010938 (Abnormal external nose morphology) 71 \n", + "HP:0000729 (Autistic behavior) 27 \n", + "HP:0000534 (Abnormal eyebrow morphology) 70 \n", + "HP:0000343 (Long philtrum) 66 \n", + "HP:0004322 (Short stature) 76 \n", + "HP:0000356 (Abnormality of the outer ear) 37 \n", + "\n", + " \\\n", + " Percent \n", + "HP:0011446 (Abnormality of higher mental function) 82.61% \n", + "HP:0001249 (Intellectual disability) 71.43% \n", + "HP:0007018 (Attention deficit hyperactivity dis... 81.40% \n", + "HP:0000325 (Triangular face) 71.43% \n", + "HP:0001155 (Abnormality of the hand) 67.11% \n", + "HP:0012758 (Neurodevelopmental delay) 94.51% \n", + "HP:0006482 (Abnormality of dental morphology) 85.52% \n", + "HP:0000365 (Hearing impairment) 80.00% \n", + "HP:0010938 (Abnormal external nose morphology) 89.87% \n", + "HP:0000729 (Autistic behavior) 56.25% \n", + "HP:0000534 (Abnormal eyebrow morphology) 82.35% \n", + "HP:0000343 (Long philtrum) 78.57% \n", + "HP:0004322 (Short stature) 58.46% \n", + "HP:0000356 (Abnormality of the outer ear) 78.72% \n", + "\n", + " Without frameshift_variant \\\n", + " Count \n", + "HP:0011446 (Abnormality of higher mental function) 106 \n", + "HP:0001249 (Intellectual disability) 94 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 26 \n", + "HP:0000325 (Triangular face) 38 \n", + "HP:0001155 (Abnormality of the hand) 89 \n", + "HP:0012758 (Neurodevelopmental delay) 90 \n", + "HP:0006482 (Abnormality of dental morphology) 100 \n", + "HP:0000365 (Hearing impairment) 45 \n", + "HP:0010938 (Abnormal external nose morphology) 63 \n", + "HP:0000729 (Autistic behavior) 29 \n", + "HP:0000534 (Abnormal eyebrow morphology) 56 \n", + "HP:0000343 (Long philtrum) 55 \n", + "HP:0004322 (Short stature) 74 \n", + "HP:0000356 (Abnormality of the outer ear) 40 \n", + "\n", + " \\\n", + " Percent p-value \n", + "HP:0011446 (Abnormality of higher mental function) 94.64% 0.003347 \n", + "HP:0001249 (Intellectual disability) 86.24% 0.005575 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 66.67% 0.139030 \n", + "HP:0000325 (Triangular face) 58.46% 0.141518 \n", + "HP:0001155 (Abnormality of the hand) 72.36% 0.358360 \n", + "HP:0012758 (Neurodevelopmental delay) 96.77% 0.494487 \n", + "HP:0006482 (Abnormality of dental morphology) 81.97% 0.504534 \n", + "HP:0000365 (Hearing impairment) 76.27% 0.666875 \n", + "HP:0010938 (Abnormal external nose morphology) 92.65% 0.772039 \n", + "HP:0000729 (Autistic behavior) 60.42% 0.836156 \n", + "HP:0000534 (Abnormal eyebrow morphology) 80.00% 0.836437 \n", + "HP:0000343 (Long philtrum) 80.88% 0.840197 \n", + "HP:0004322 (Short stature) 57.36% 0.900140 \n", + "HP:0000356 (Abnormality of the outer ear) 80.00% 1.000000 \n", + "\n", + " \n", + " Corrected p-values \n", + "HP:0011446 (Abnormality of higher mental function) 0.046856 \n", + "HP:0001249 (Intellectual disability) 0.078048 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", + "HP:0000325 (Triangular face) 1.000000 \n", + "HP:0001155 (Abnormality of the hand) 1.000000 \n", + "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", + "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", + "HP:0000365 (Hearing impairment) 1.000000 \n", + "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", + "HP:0000729 (Autistic behavior) 1.000000 \n", + "HP:0000534 (Abnormal eyebrow morphology) 1.000000 \n", + "HP:0000343 (Long philtrum) 1.000000 \n", + "HP:0004322 (Short stature) 1.000000 \n", + "HP:0000356 (Abnormality of the outer ear) 1.000000 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analysis.compare_by_variant_type(VariantEffect.FRAMESHIFT_VARIANT)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "1c2c01fd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
With 16_89284634_89284639_GTGTTT_GWithout 16_89284634_89284639_GTGTTT_G
CountPercentCountPercentp-valueCorrected p-values
HP:0001249 (Intellectual disability)1557.69%17980.27%0.0129390.181150
HP:0011446 (Abnormality of higher mental function)1973.08%20189.73%0.0225750.316053
HP:0010938 (Abnormal external nose morphology)1381.25%12192.37%0.1530851.000000
HP:0007018 (Attention deficit hyperactivity disorder)7100.00%5472.00%0.1821091.000000
HP:0000534 (Abnormal eyebrow morphology)1071.43%11682.27%0.2992781.000000
HP:0001155 (Abnormality of the hand)2177.78%16868.57%0.3846091.000000
HP:0000729 (Autistic behavior)342.86%5359.55%0.4455761.000000
HP:0000365 (Hearing impairment)770.00%9078.95%0.4526611.000000
HP:0012758 (Neurodevelopmental delay)1493.33%16295.86%0.5005261.000000
HP:0000343 (Long philtrum)1173.33%11080.29%0.5089201.000000
HP:0000356 (Abnormality of the outer ear)675.00%7179.78%0.6672011.000000
HP:0004322 (Short stature)1453.85%13658.37%0.6798031.000000
HP:0000325 (Triangular face)675.00%7764.17%0.7118301.000000
HP:0006482 (Abnormality of dental morphology)2284.62%20283.82%1.0000001.000000
\n", + "
" + ], + "text/plain": [ + " With 16_89284634_89284639_GTGTTT_G \\\n", + " Count \n", + "HP:0001249 (Intellectual disability) 15 \n", + "HP:0011446 (Abnormality of higher mental function) 19 \n", + "HP:0010938 (Abnormal external nose morphology) 13 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 7 \n", + "HP:0000534 (Abnormal eyebrow morphology) 10 \n", + "HP:0001155 (Abnormality of the hand) 21 \n", + "HP:0000729 (Autistic behavior) 3 \n", + "HP:0000365 (Hearing impairment) 7 \n", + "HP:0012758 (Neurodevelopmental delay) 14 \n", + "HP:0000343 (Long philtrum) 11 \n", + "HP:0000356 (Abnormality of the outer ear) 6 \n", + "HP:0004322 (Short stature) 14 \n", + "HP:0000325 (Triangular face) 6 \n", + "HP:0006482 (Abnormality of dental morphology) 22 \n", + "\n", + " \\\n", + " Percent \n", + "HP:0001249 (Intellectual disability) 57.69% \n", + "HP:0011446 (Abnormality of higher mental function) 73.08% \n", + "HP:0010938 (Abnormal external nose morphology) 81.25% \n", + "HP:0007018 (Attention deficit hyperactivity dis... 100.00% \n", + "HP:0000534 (Abnormal eyebrow morphology) 71.43% \n", + "HP:0001155 (Abnormality of the hand) 77.78% \n", + "HP:0000729 (Autistic behavior) 42.86% \n", + "HP:0000365 (Hearing impairment) 70.00% \n", + "HP:0012758 (Neurodevelopmental delay) 93.33% \n", + "HP:0000343 (Long philtrum) 73.33% \n", + "HP:0000356 (Abnormality of the outer ear) 75.00% \n", + "HP:0004322 (Short stature) 53.85% \n", + "HP:0000325 (Triangular face) 75.00% \n", + "HP:0006482 (Abnormality of dental morphology) 84.62% \n", + "\n", + " Without 16_89284634_89284639_GTGTTT_G \\\n", + " Count \n", + "HP:0001249 (Intellectual disability) 179 \n", + "HP:0011446 (Abnormality of higher mental function) 201 \n", + "HP:0010938 (Abnormal external nose morphology) 121 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 54 \n", + "HP:0000534 (Abnormal eyebrow morphology) 116 \n", + "HP:0001155 (Abnormality of the hand) 168 \n", + "HP:0000729 (Autistic behavior) 53 \n", + "HP:0000365 (Hearing impairment) 90 \n", + "HP:0012758 (Neurodevelopmental delay) 162 \n", + "HP:0000343 (Long philtrum) 110 \n", + "HP:0000356 (Abnormality of the outer ear) 71 \n", + "HP:0004322 (Short stature) 136 \n", + "HP:0000325 (Triangular face) 77 \n", + "HP:0006482 (Abnormality of dental morphology) 202 \n", + "\n", + " \\\n", + " Percent p-value \n", + "HP:0001249 (Intellectual disability) 80.27% 0.012939 \n", + "HP:0011446 (Abnormality of higher mental function) 89.73% 0.022575 \n", + "HP:0010938 (Abnormal external nose morphology) 92.37% 0.153085 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 72.00% 0.182109 \n", + "HP:0000534 (Abnormal eyebrow morphology) 82.27% 0.299278 \n", + "HP:0001155 (Abnormality of the hand) 68.57% 0.384609 \n", + "HP:0000729 (Autistic behavior) 59.55% 0.445576 \n", + "HP:0000365 (Hearing impairment) 78.95% 0.452661 \n", + "HP:0012758 (Neurodevelopmental delay) 95.86% 0.500526 \n", + "HP:0000343 (Long philtrum) 80.29% 0.508920 \n", + "HP:0000356 (Abnormality of the outer ear) 79.78% 0.667201 \n", + "HP:0004322 (Short stature) 58.37% 0.679803 \n", + "HP:0000325 (Triangular face) 64.17% 0.711830 \n", + "HP:0006482 (Abnormality of dental morphology) 83.82% 1.000000 \n", + "\n", + " \n", + " Corrected p-values \n", + "HP:0001249 (Intellectual disability) 0.181150 \n", + "HP:0011446 (Abnormality of higher mental function) 0.316053 \n", + "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", + "HP:0000534 (Abnormal eyebrow morphology) 1.000000 \n", + "HP:0001155 (Abnormality of the hand) 1.000000 \n", + "HP:0000729 (Autistic behavior) 1.000000 \n", + "HP:0000365 (Hearing impairment) 1.000000 \n", + "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", + "HP:0000343 (Long philtrum) 1.000000 \n", + "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", + "HP:0004322 (Short stature) 1.000000 \n", + "HP:0000325 (Triangular face) 1.000000 \n", + "HP:0006482 (Abnormality of dental morphology) 1.000000 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analysis.compare_by_variant('16_89284634_89284639_GTGTTT_G')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "f306c5ef", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Inside Exon 9Outside Exon 9
CountPercentCountPercentp-valueCorrected p-values
HP:0004322 (Short stature)10762.94%836.36%0.0210110.294157
HP:0011446 (Abnormality of higher mental function)15585.64%23100.00%0.0501960.702743
HP:0001155 (Abnormality of the hand)13569.95%1990.48%0.0701280.981790
HP:0001249 (Intellectual disability)13775.27%2191.30%0.1138381.000000
HP:0007018 (Attention deficit hyperactivity disorder)4381.13%861.54%0.1516091.000000
HP:0000365 (Hearing impairment)6578.31%969.23%0.4866901.000000
HP:0000534 (Abnormal eyebrow morphology)9482.46%1076.92%0.7033221.000000
HP:0000729 (Autistic behavior)3356.90%866.67%0.7490491.000000
HP:0010938 (Abnormal external nose morphology)9890.74%1392.86%1.0000001.000000
HP:0000325 (Triangular face)5771.25%1173.33%1.0000001.000000
HP:0000356 (Abnormality of the outer ear)4981.67%990.00%1.0000001.000000
HP:0000343 (Long philtrum)9279.31%1083.33%1.0000001.000000
HP:0006482 (Abnormality of dental morphology)16084.66%2187.50%1.0000001.000000
HP:0012758 (Neurodevelopmental delay)12096.00%12100.00%1.0000001.000000
\n", + "
" + ], + "text/plain": [ + " Inside Exon 9 \\\n", + " Count Percent \n", + "HP:0004322 (Short stature) 107 62.94% \n", + "HP:0011446 (Abnormality of higher mental function) 155 85.64% \n", + "HP:0001155 (Abnormality of the hand) 135 69.95% \n", + "HP:0001249 (Intellectual disability) 137 75.27% \n", + "HP:0007018 (Attention deficit hyperactivity dis... 43 81.13% \n", + "HP:0000365 (Hearing impairment) 65 78.31% \n", + "HP:0000534 (Abnormal eyebrow morphology) 94 82.46% \n", + "HP:0000729 (Autistic behavior) 33 56.90% \n", + "HP:0010938 (Abnormal external nose morphology) 98 90.74% \n", + "HP:0000325 (Triangular face) 57 71.25% \n", + "HP:0000356 (Abnormality of the outer ear) 49 81.67% \n", + "HP:0000343 (Long philtrum) 92 79.31% \n", + "HP:0006482 (Abnormality of dental morphology) 160 84.66% \n", + "HP:0012758 (Neurodevelopmental delay) 120 96.00% \n", + "\n", + " Outside Exon 9 \\\n", + " Count Percent \n", + "HP:0004322 (Short stature) 8 36.36% \n", + "HP:0011446 (Abnormality of higher mental function) 23 100.00% \n", + "HP:0001155 (Abnormality of the hand) 19 90.48% \n", + "HP:0001249 (Intellectual disability) 21 91.30% \n", + "HP:0007018 (Attention deficit hyperactivity dis... 8 61.54% \n", + "HP:0000365 (Hearing impairment) 9 69.23% \n", + "HP:0000534 (Abnormal eyebrow morphology) 10 76.92% \n", + "HP:0000729 (Autistic behavior) 8 66.67% \n", + "HP:0010938 (Abnormal external nose morphology) 13 92.86% \n", + "HP:0000325 (Triangular face) 11 73.33% \n", + "HP:0000356 (Abnormality of the outer ear) 9 90.00% \n", + "HP:0000343 (Long philtrum) 10 83.33% \n", + "HP:0006482 (Abnormality of dental morphology) 21 87.50% \n", + "HP:0012758 (Neurodevelopmental delay) 12 100.00% \n", + "\n", + " \\\n", + " p-value \n", + "HP:0004322 (Short stature) 0.021011 \n", + "HP:0011446 (Abnormality of higher mental function) 0.050196 \n", + "HP:0001155 (Abnormality of the hand) 0.070128 \n", + "HP:0001249 (Intellectual disability) 0.113838 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 0.151609 \n", + "HP:0000365 (Hearing impairment) 0.486690 \n", + "HP:0000534 (Abnormal eyebrow morphology) 0.703322 \n", + "HP:0000729 (Autistic behavior) 0.749049 \n", + "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", + "HP:0000325 (Triangular face) 1.000000 \n", + "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", + "HP:0000343 (Long philtrum) 1.000000 \n", + "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", + "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", + "\n", + " \n", + " Corrected p-values \n", + "HP:0004322 (Short stature) 0.294157 \n", + "HP:0011446 (Abnormality of higher mental function) 0.702743 \n", + "HP:0001155 (Abnormality of the hand) 0.981790 \n", + "HP:0001249 (Intellectual disability) 1.000000 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 1.000000 \n", + "HP:0000365 (Hearing impairment) 1.000000 \n", + "HP:0000534 (Abnormal eyebrow morphology) 1.000000 \n", + "HP:0000729 (Autistic behavior) 1.000000 \n", + "HP:0010938 (Abnormal external nose morphology) 1.000000 \n", + "HP:0000325 (Triangular face) 1.000000 \n", + "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", + "HP:0000343 (Long philtrum) 1.000000 \n", + "HP:0006482 (Abnormality of dental morphology) 1.000000 \n", + "HP:0012758 (Neurodevelopmental delay) 1.000000 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analysis2 = CohortAnalysis(patientCohort, 'NM_013275.6', hpo, include_unmeasured=False, include_large_SV=False)\n", "\n", @@ -627,7 +2323,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "f6861157", "metadata": {}, "outputs": [], @@ -637,18 +2333,378 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "d3c6fd9d", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Inside REGIONOutside REGION
CountPercentCountPercentp-valueCorrected p-values
HP:0000534 (Abnormal eyebrow morphology)7578.95%2990.62%0.1869271.0
HP:0006482 (Abnormality of dental morphology)13483.23%4790.38%0.2671541.0
HP:0004322 (Short stature)9058.06%2567.57%0.3521091.0
HP:0001155 (Abnormality of the hand)11570.12%3978.00%0.3685741.0
HP:0000365 (Hearing impairment)5674.67%1885.71%0.3851031.0
HP:0000325 (Triangular face)4969.01%1979.17%0.4368471.0
HP:0011446 (Abnormality of higher mental function)13588.24%4384.31%0.4725161.0
HP:0000729 (Autistic behavior)3256.14%969.23%0.5359571.0
HP:0001249 (Intellectual disability)11976.28%3979.59%0.7004601.0
HP:0007018 (Attention deficit hyperactivity disorder)4175.93%1083.33%0.7188131.0
HP:0010938 (Abnormal external nose morphology)8290.11%2993.55%0.7277681.0
HP:0000356 (Abnormality of the outer ear)4483.02%1482.35%1.0000001.0
HP:0000343 (Long philtrum)7579.79%2779.41%1.0000001.0
HP:0012758 (Neurodevelopmental delay)10696.36%2696.30%1.0000001.0
\n", + "
" + ], + "text/plain": [ + " Inside REGION \\\n", + " Count Percent \n", + "HP:0000534 (Abnormal eyebrow morphology) 75 78.95% \n", + "HP:0006482 (Abnormality of dental morphology) 134 83.23% \n", + "HP:0004322 (Short stature) 90 58.06% \n", + "HP:0001155 (Abnormality of the hand) 115 70.12% \n", + "HP:0000365 (Hearing impairment) 56 74.67% \n", + "HP:0000325 (Triangular face) 49 69.01% \n", + "HP:0011446 (Abnormality of higher mental function) 135 88.24% \n", + "HP:0000729 (Autistic behavior) 32 56.14% \n", + "HP:0001249 (Intellectual disability) 119 76.28% \n", + "HP:0007018 (Attention deficit hyperactivity dis... 41 75.93% \n", + "HP:0010938 (Abnormal external nose morphology) 82 90.11% \n", + "HP:0000356 (Abnormality of the outer ear) 44 83.02% \n", + "HP:0000343 (Long philtrum) 75 79.79% \n", + "HP:0012758 (Neurodevelopmental delay) 106 96.36% \n", + "\n", + " Outside REGION \\\n", + " Count Percent \n", + "HP:0000534 (Abnormal eyebrow morphology) 29 90.62% \n", + "HP:0006482 (Abnormality of dental morphology) 47 90.38% \n", + "HP:0004322 (Short stature) 25 67.57% \n", + "HP:0001155 (Abnormality of the hand) 39 78.00% \n", + "HP:0000365 (Hearing impairment) 18 85.71% \n", + "HP:0000325 (Triangular face) 19 79.17% \n", + "HP:0011446 (Abnormality of higher mental function) 43 84.31% \n", + "HP:0000729 (Autistic behavior) 9 69.23% \n", + "HP:0001249 (Intellectual disability) 39 79.59% \n", + "HP:0007018 (Attention deficit hyperactivity dis... 10 83.33% \n", + "HP:0010938 (Abnormal external nose morphology) 29 93.55% \n", + "HP:0000356 (Abnormality of the outer ear) 14 82.35% \n", + "HP:0000343 (Long philtrum) 27 79.41% \n", + "HP:0012758 (Neurodevelopmental delay) 26 96.30% \n", + "\n", + " \\\n", + " p-value \n", + "HP:0000534 (Abnormal eyebrow morphology) 0.186927 \n", + "HP:0006482 (Abnormality of dental morphology) 0.267154 \n", + "HP:0004322 (Short stature) 0.352109 \n", + "HP:0001155 (Abnormality of the hand) 0.368574 \n", + "HP:0000365 (Hearing impairment) 0.385103 \n", + "HP:0000325 (Triangular face) 0.436847 \n", + "HP:0011446 (Abnormality of higher mental function) 0.472516 \n", + "HP:0000729 (Autistic behavior) 0.535957 \n", + "HP:0001249 (Intellectual disability) 0.700460 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 0.718813 \n", + "HP:0010938 (Abnormal external nose morphology) 0.727768 \n", + "HP:0000356 (Abnormality of the outer ear) 1.000000 \n", + "HP:0000343 (Long philtrum) 1.000000 \n", + "HP:0012758 (Neurodevelopmental delay) 1.000000 \n", + "\n", + " \n", + " Corrected p-values \n", + "HP:0000534 (Abnormal eyebrow morphology) 1.0 \n", + "HP:0006482 (Abnormality of dental morphology) 1.0 \n", + "HP:0004322 (Short stature) 1.0 \n", + "HP:0001155 (Abnormality of the hand) 1.0 \n", + "HP:0000365 (Hearing impairment) 1.0 \n", + "HP:0000325 (Triangular face) 1.0 \n", + "HP:0011446 (Abnormality of higher mental function) 1.0 \n", + "HP:0000729 (Autistic behavior) 1.0 \n", + "HP:0001249 (Intellectual disability) 1.0 \n", + "HP:0007018 (Attention deficit hyperactivity dis... 1.0 \n", + "HP:0010938 (Abnormal external nose morphology) 1.0 \n", + "HP:0000356 (Abnormality of the outer ear) 1.0 \n", + "HP:0000343 (Long philtrum) 1.0 \n", + "HP:0012758 (Neurodevelopmental delay) 1.0 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analysis2.compare_by_protein_feature_type(FeatureType.REGION)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a97a6a09", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16_88489784_89491503_DEL does not have a Protein Effect Location\n", + "16_87892207_89455452_DEL does not have a Protein Effect Location\n", + "16_88743576_89406219_DEL does not have a Protein Effect Location\n", + "16_86647052_89511661_DEL does not have a Protein Effect Location\n", + "16_89228900_89593971_DEL does not have a Protein Effect Location\n", + "16_89481148_89489612_DEL does not have a Protein Effect Location\n", + "16_87306530_89269020_DEL does not have a Protein Effect Location\n", + "16_89277486_89499248_DEL does not have a Protein Effect Location\n", + "16_89182742_89309778_DEL does not have a Protein Effect Location\n", + "16_89182742_89309778_DEL does not have a Protein Effect Location\n", + "16_89321706_89475518_DEL does not have a Protein Effect Location\n", + "16_89182742_89309778_DEL does not have a Protein Effect Location\n", + "16_88697053_89277641_DEL does not have a Protein Effect Location\n", + "16_88788350_89454555_DEL does not have a Protein Effect Location\n", + "16_89056332_89434622_DEL does not have a Protein Effect Location\n", + "16_89217282_89506042_DEL does not have a Protein Effect Location\n", + "16_89274958_89274958_C_G does not have a Protein Effect Location\n", + "16_89275192_89275192_C_G does not have a Protein Effect Location\n", + "16_89190071_89439815_DEL does not have a Protein Effect Location\n", + "16_89095277_89438698_DEL does not have a Protein Effect Location\n", + "16_89249291_89481685_DEL does not have a Protein Effect Location\n", + "16_89269021_89492781_DEL does not have a Protein Effect Location\n", + "16_87618421_89440922_DEL does not have a Protein Effect Location\n", + "16_89458996_89487166_DEL does not have a Protein Effect Location\n", + "16_89269900_89287677_DEL does not have a Protein Effect Location\n", + "16_89268821_89406360_DEL does not have a Protein Effect Location\n", + "16_89277486_89489140_DEL does not have a Protein Effect Location\n", + "16_89266046_89305443_DEL does not have a Protein Effect Location\n", + "16_88575401_89265641_DEL does not have a Protein Effect Location\n", + "16_89206685_89472351_DEL does not have a Protein Effect Location\n", + "16_89277486_89431539_DEL does not have a Protein Effect Location\n", + "16_88788350_89511297_DEL does not have a Protein Effect Location\n", + "16_89279070_89279070_A_G does not have a Protein Effect Location\n", + "16_89305431_89541006_DEL does not have a Protein Effect Location\n", + "16_89395176_89492781_DEL does not have a Protein Effect Location\n", + "16_89217282_89512722_DEL does not have a Protein Effect Location\n", + "16_89409760_89418313_DEL does not have a Protein Effect Location\n", + "16_89206685_89510638_DEL does not have a Protein Effect Location\n", + "16_88197485_89321695_DEL does not have a Protein Effect Location\n", + "16_89182742_89309778_DEL does not have a Protein Effect Location\n", + "16_89262070_89410643_DEL does not have a Protein Effect Location\n", + "16_87766621_89492922_DEL does not have a Protein Effect Location\n", + "16_88599770_89406219_DEL does not have a Protein Effect Location\n", + "16_89441369_89499248_DEL does not have a Protein Effect Location\n", + "16_88197356_89297194_DEL does not have a Protein Effect Location\n", + "16_89133200_89406219_DEL does not have a Protein Effect Location\n", + "16_89490211_89490596_DEL does not have a Protein Effect Location\n", + "16_87464659_89530534_DEL does not have a Protein Effect Location\n", + "16_89363269_89492781_DEL does not have a Protein Effect Location\n", + "16_89269019_89305395_DEL does not have a Protein Effect Location\n", + "16_87150056_89454395_DEL does not have a Protein Effect Location\n", + "16_89274958_89274958_C_G does not have a Protein Effect Location\n", + "16_89171713_89274753_DEL does not have a Protein Effect Location\n", + "16_89368851_89487299_DEL does not have a Protein Effect Location\n", + "16_89274958_89274958_C_G does not have a Protein Effect Location\n", + "16_89277486_89517986_DEL does not have a Protein Effect Location\n", + "16_89293294_89312898_DEL does not have a Protein Effect Location\n", + "16_89095277_89438698_DEL does not have a Protein Effect Location\n", + "16_88197356_89297194_DEL does not have a Protein Effect Location\n", + "16_89217282_89536982_DEL does not have a Protein Effect Location\n", + "16_87468556_89622209_DUP does not have a Protein Effect Location\n", + "16_88555247_89317078_DEL does not have a Protein Effect Location\n", + "16_89228900_89492781_DEL does not have a Protein Effect Location\n", + "16_89217282_89363327_DEL does not have a Protein Effect Location\n", + "16_88197356_89317078_DEL does not have a Protein Effect Location\n", + "16_89258980_89342739_DEL does not have a Protein Effect Location\n", + "16_89195407_89489612_DUP does not have a Protein Effect Location\n", + "16_89262070_89410643_DEL does not have a Protein Effect Location\n", + "16_88564200_89541334_DEL does not have a Protein Effect Location\n", + "16_89476288_89589843_DUP does not have a Protein Effect Location\n", + "16_89194992_89352723_DEL does not have a Protein Effect Location\n", + "16_89321706_89475518_DEL does not have a Protein Effect Location\n", + "16_88688905_89518004_DEL does not have a Protein Effect Location\n", + "16_87921246_89417758_DEL does not have a Protein Effect Location\n", + "16_88568593_89562542_DEL does not have a Protein Effect Location\n", + "16_88197155_89297334_DEL does not have a Protein Effect Location\n", + "(ProteinMetadata(id=NP_037407.4, label=Ankyrin repeat domain-containing protein 11, features=(SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 1, start=167, end=196)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 2, start=200, end=229)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 3, start=233, end=262)), SimpleProteinFeature(type=FeatureType.REPEAT, info=FeatureInfo(name=ANK 4, start=266, end=292)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1, end=90)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=128, end=169)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=289, end=380)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=398, end=647)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=723, end=783)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=881, end=1043)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1059, end=1393)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1424, end=1710)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1814, end=1836)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=1988, end=2019)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Disordered, start=2131, end=2406)), SimpleProteinFeature(type=FeatureType.REGION, info=FeatureInfo(name=Important for protein degradation, start=2369, end=2663)))),)\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'tuple' object has no attribute 'get_features_variant_overlaps'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpatientCohort\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_protein_features_affected\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNM_013275.6\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/model/_cohort.py:298\u001b[0m, in \u001b[0;36mCohort.get_protein_features_affected\u001b[0;34m(self, transcript)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28mprint\u001b[39m(protein)\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m pair \u001b[38;5;129;01min\u001b[39;00m var_coords:\n\u001b[0;32m--> 298\u001b[0m all_features\u001b[38;5;241m.\u001b[39mupdate([\u001b[43mprotein\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_features_variant_overlaps\u001b[49m(pair[\u001b[38;5;241m0\u001b[39m], pair[\u001b[38;5;241m1\u001b[39m])])\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m all_features\n", + "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'get_features_variant_overlaps'" + ] + } + ], + "source": [ + "patientCohort.get_protein_features_affected('NM_013275.6')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7e51324", + "metadata": {}, "outputs": [], "source": [ - "analysis.compare_by_protein_feature_type(FeatureType.REGION)" + "for var in patientCohort.all_variants:\n", + " print(var.variant_string)\n", + " for tx in var.tx_annotations:\n", + " print(tx.is_preferred)" ] }, { "cell_type": "code", "execution_count": null, - "id": "a97a6a09", + "id": "97a07a2c", "metadata": {}, "outputs": [], "source": [] diff --git a/src/genophenocorr/model/_cohort.py b/src/genophenocorr/model/_cohort.py index 983fb505..500a32b4 100644 --- a/src/genophenocorr/model/_cohort.py +++ b/src/genophenocorr/model/_cohort.py @@ -261,7 +261,7 @@ def list_data_by_tx(self, transcript=None): for var in self.all_variants: for trans in var.tx_annotations: if trans.transcript_id in var_type_dict: - var_type_dict.get(trans.transcript_id).update(trans.variant_effects) + var_type_dict.get(trans.transcript_id).update([var_eff.name for var_eff in trans.variant_effects]) too_small = [] for tx_id, var_effect_counter in var_type_dict.items(): if len(var_effect_counter) <= 1: @@ -276,5 +276,25 @@ def get_excluded_ids(self): def get_excluded_count(self): return len(self.all_excluded_patients) + def get_protein_features_affected(self, transcript): + all_features = Counter() + protein_set = set() + var_coords = [] + for var in self.all_variants: + for tx in var.tx_annotations: + if tx.transcript_id == transcript: + protein_set.add(tx.protein_affected) + if tx.protein_effect_location is None or tx.protein_effect_location[0] is None or tx.protein_effect_location[1] is None: + continue + else: + var_coords.append(tx.protein_effect_location) + if len(protein_set) != 1: + raise ValueError(f"Found more than 1 protein: {protein_set}") + else: + protein = list(protein_set)[0][0] + for pair in var_coords: + all_features.update(list(protein.get_features_variant_overlaps(pair[0], pair[1]))) + return all_features + def __len__(self) -> int: return len(self._patient_set) diff --git a/src/genophenocorr/model/_protein.py b/src/genophenocorr/model/_protein.py index 39c126f1..1b168700 100644 --- a/src/genophenocorr/model/_protein.py +++ b/src/genophenocorr/model/_protein.py @@ -7,6 +7,7 @@ from .genome import Region + class FeatureInfo: """A class that represents a protein feature (e.g. a repeated sequence given the name "ANK 1" in protein "Ankyrin repeat domain-containing protein 11") @@ -141,6 +142,9 @@ def feature_type(self) -> FeatureType: """ return self._type + def to_string(self) -> str: + return f"{self.feature_type.name}-{self.info.name}-{self.info.region}" + def __str__(self) -> str: return f"SimpleProteinFeature(type={self.feature_type}, " \ f"info={str(self.info)})" @@ -242,6 +246,18 @@ def motifs(self) -> typing.Iterable[ProteinFeature]: """ return filter(lambda f: f.feature_type == FeatureType.MOTIF, self.protein_features) + def get_features_variant_overlaps(self, var_start: int, var_end: int) -> typing.Sequence[ProteinFeature]: + affected_features = set() + for feat in self.protein_features: + if feat.info.start is None or feat.info.end is None: + print(f"{feat.info.name} has no start and end info") + continue + if feat.info.start <= var_start <= feat.info.end: + affected_features.add(feat.to_string()) + elif feat.info.start <= var_end <= feat.info.end: + affected_features.add(feat.to_string()) + return affected_features + def __str__(self) -> str: return f"ProteinMetadata(id={self.protein_id}, " \ f"label={self.label}, " \ diff --git a/src/genophenocorr/preprocessing/_vep.py b/src/genophenocorr/preprocessing/_vep.py index 5a7ba30e..107507c3 100644 --- a/src/genophenocorr/preprocessing/_vep.py +++ b/src/genophenocorr/preprocessing/_vep.py @@ -88,7 +88,7 @@ def _process_item(self, item) -> typing.Optional[TranscriptAnnotation]: if not self._include_computational_txs and not trans_id.startswith('NM_'): # Skipping a computational transcript return None - is_preferred = True if 'canonical' in item and item['canonical'] else False + is_preferred = True if ('canonical' in item and item['canonical'] == 1) else False hgvsc_id = item.get('hgvsc') var_effects = [] consequences = item.get('consequence_terms') diff --git a/src/genophenocorr/view/_cohort.py b/src/genophenocorr/view/_cohort.py index 81135ace..68e9c2c2 100644 --- a/src/genophenocorr/view/_cohort.py +++ b/src/genophenocorr/view/_cohort.py @@ -222,4 +222,81 @@ def cohort_summary_table(self, cohort, min_count=1) -> str: rows.append("") + return "\n".join(rows) + + + def protein_features_table(self, cohort, preferred_transcript, min_count=2) -> str: + """ + Generate HTML code designed to be displayed on a Jupyter notebook using ipython/display/HTML + Show genotype-phenotype correlation tests that could be run. + + :param cohort: A cohort of patients to be analyzed + :type cohort: Cohort + :param min_count: Minimum number of annotations to be displayed in the table + :type min_count: int + :returns: HTML code for display + """ + if not isinstance(cohort, Cohort): + raise ValueError(f"cohort argument must be a Cohort object but we got a {type(cohort)} object") + rows = list() + # Get a list of variants for the preferred transcript + variant_count_d = defaultdict(int) + variant_to_effect_d = defaultdict() + variant_to_key = defaultdict() + # key, variant string, value int + all_variant_tuple_list = cohort.list_all_variants() + if not isinstance(all_variant_tuple_list, list): + raise ValueError(f"all_variant_tuple_list is not a list but is a {type(all_variant_tuple_list)}") + all_variant_counter = {x[0]:x[1] for x in all_variant_tuple_list} + for variant in cohort.all_variants: + var_count = all_variant_counter[variant.variant_string] + targets = [txa for txa in variant.tx_annotations if txa.transcript_id == preferred_transcript] + if len(targets) == 1: + target_txa = targets[0] + if target_txa.hgvsc_id is not None: + hgvsc_id = target_txa.hgvsc_id + else: + hgvsc_id = "NA" + # split out the variant + fields = hgvsc_id.split(":") + if len(fields) == 2: + hgvs = fields[1] + else: + hgvs = hgvsc_id + effect_tuple = [var_eff.name for var_eff in target_txa.variant_effects] + variant_count_d[hgvs] = var_count + variant_to_effect_d[hgvs] = effect_tuple[0] # for simplicity, just display first effect + variant_to_key[hgvs] = variant.variant_string + else: + print(f"[WARN] could not identify a single variant for target transcript (got {len(targets)}), variant {variant.variant_string}") + # could not find an entry for our transcript, so just show the genomic coordinates + variant_count_d[variant.variant_string] = var_count + variant_to_key[variant.variant_string] = variant.variant_string + # sort the variants by count and put variants below mininum count for display into a separate set + sorted_vars = sorted(variant_count_d.items(), key=lambda x:x[1], reverse=True) # sort descending by values + sorted_vars = [x[0] for x in sorted_vars] # take the first item from each sorted tuple + below_threshold_vars = set() + rows.append(f"\n") + rows.append("") + header_items = ["Variant", "Effect", "Count", "Key"] + rows.append(CohortViewer.html_row(header_items)) + for var in sorted_vars: + items = [] + var_count = variant_count_d.get(var) + #print(f"{var} - {var_count}") + if var_count >= min_count: + variant_key = variant_to_key.get(var) + items.append(var) + items.append(variant_to_effect_d.get(var, "n/a")) + items.append(str(var_count)) + items.append(variant_key) + rows.append(CohortViewer.html_row(items)) + else: + below_threshold_vars.add(var) + rows.append("
") + if len(below_threshold_vars) > 0: + var_str = "; ".join(below_threshold_vars) + rows.append(f"

Additionally, the following variants were observed {min_count-1} or fewer times: ") + rows.append(f"{var_str}.

") + rows.append("

Use the entry in the \"Key\" column to investigate whether specific variants display genotype-phenotype correlations

") return "\n".join(rows) \ No newline at end of file From 58983586f26ed5a62dd020ee0ca8996a6702b36f Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Wed, 18 Oct 2023 17:50:59 -0500 Subject: [PATCH 3/9] Fixed some logging things --- src/genophenocorr/preprocessing/_uniprot.py | 18 +++++++++++------- src/genophenocorr/preprocessing/_vep.py | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/genophenocorr/preprocessing/_uniprot.py b/src/genophenocorr/preprocessing/_uniprot.py index 2e950086..6e6c995b 100644 --- a/src/genophenocorr/preprocessing/_uniprot.py +++ b/src/genophenocorr/preprocessing/_uniprot.py @@ -20,6 +20,11 @@ def __init__(self): """Constructs all necessary attributes for a UniprotProteinMetadataService object """ self._logger = logging.getLogger(__name__) + self._logger.setLevel(logging.INFO) + handler = logging.FileHandler(f"{__name__}.log", mode='w') + formatter = logging.Formatter("%(name)s %(asctime)s %(levelname)s %(message)s") + handler.setFormatter(formatter) + self._logger.addHandler(handler) self._url = 'https://rest.uniprot.org/uniprotkb/search?query=(%s)AND(reviewed:true)&fields=accession,id,' \ 'gene_names,gene_primary,protein_name,ft_domain,ft_motif,ft_region,ft_repeat,xref_refseq' @@ -42,11 +47,10 @@ def annotate(self, protein_id: str) -> typing.Sequence[ProteinMetadata]: return [] protein_list = [] for protein in results: - verify = False + unis = [] for uni in protein['uniProtKBCrossReferences']: - if uni['id'] == protein_id: - verify = True - if verify: + unis.append(uni['id']) + if protein_id in unis: try: protein_name = protein['proteinDescription']['recommendedName']['fullName']['value'] except KeyError: @@ -64,8 +68,8 @@ def annotate(self, protein_id: str) -> typing.Sequence[ProteinMetadata]: self._logger.warning(f"No features for {protein_id}") protein_list.append(ProteinMetadata(protein_id, protein_name, all_features_list)) else: - self._logger.warning(f"ID {protein_id} did not match") - self._logger.warning(f'Protein ID {protein_id} got {len(protein_list)} results') - + self._logger.warning(f"UniProt did not return a protein ID that matches the ID we searched for: {protein_id} not in {unis}") + if len(protein_list) > 1: + self._logger.info(f'UniProt found {len(protein_list)} results for ID {protein_id}') # TODO - DD would like to discuss an example when there are >1 items in this list. return protein_list diff --git a/src/genophenocorr/preprocessing/_vep.py b/src/genophenocorr/preprocessing/_vep.py index 107507c3..940bdec8 100644 --- a/src/genophenocorr/preprocessing/_vep.py +++ b/src/genophenocorr/preprocessing/_vep.py @@ -129,7 +129,7 @@ def _query_vep(self, variant_coordinates) -> dict: api_url = self._url % (verify_start_end_coordinates(variant_coordinates)) r = requests.get(api_url, headers={'Content-Type': 'application/json'}) if not r.ok: - self._logging.error(f"Expected a result but got an Error for variant: {variant_coordinates.as_string()}") + self._logging.error(f"Expected a result but got an Error for variant: {variant_coordinates.variant_key}") r.raise_for_status() results = r.json() if not isinstance(results, list): From eccc962d74a347a756762ba556c5f6cb8ab2ac93 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Wed, 25 Oct 2023 12:49:26 -0500 Subject: [PATCH 4/9] Changes for PR --- src/genophenocorr/model/_protein.py | 6 +++--- src/genophenocorr/model/_variant_effects.py | 8 -------- src/genophenocorr/preprocessing/_phenopacket.py | 2 +- src/genophenocorr/preprocessing/_uniprot.py | 9 ++++----- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/genophenocorr/model/_protein.py b/src/genophenocorr/model/_protein.py index 1b168700..56b66f84 100644 --- a/src/genophenocorr/model/_protein.py +++ b/src/genophenocorr/model/_protein.py @@ -246,16 +246,16 @@ def motifs(self) -> typing.Iterable[ProteinFeature]: """ return filter(lambda f: f.feature_type == FeatureType.MOTIF, self.protein_features) - def get_features_variant_overlaps(self, var_start: int, var_end: int) -> typing.Sequence[ProteinFeature]: + def get_features_variant_overlaps(self, var_start: int, var_end: int) -> typing.Set[ProteinFeature]: affected_features = set() for feat in self.protein_features: if feat.info.start is None or feat.info.end is None: print(f"{feat.info.name} has no start and end info") continue if feat.info.start <= var_start <= feat.info.end: - affected_features.add(feat.to_string()) + affected_features.add(feat) elif feat.info.start <= var_end <= feat.info.end: - affected_features.add(feat.to_string()) + affected_features.add(feat) return affected_features def __str__(self) -> str: diff --git a/src/genophenocorr/model/_variant_effects.py b/src/genophenocorr/model/_variant_effects.py index 973f1ed1..2d29e44d 100644 --- a/src/genophenocorr/model/_variant_effects.py +++ b/src/genophenocorr/model/_variant_effects.py @@ -74,11 +74,3 @@ def curie(self) -> str: def __str__(self) -> str: return self.name.lower() - - def __eq__(self, other) -> bool: - return isinstance(other, VariantEffect) \ - and self.value == other.value \ - and self.name == other.name - - def __hash__(self) -> int: - return hash((self.value, self.name)) \ No newline at end of file diff --git a/src/genophenocorr/preprocessing/_phenopacket.py b/src/genophenocorr/preprocessing/_phenopacket.py index 9277c10f..d926d3d7 100644 --- a/src/genophenocorr/preprocessing/_phenopacket.py +++ b/src/genophenocorr/preprocessing/_phenopacket.py @@ -172,7 +172,7 @@ def _add_phenotypes(self, pp: Phenopacket) -> typing.Sequence[Phenotype]: for hpo_id in pp.phenotypic_features: hpo_id_list.append((hpo_id.type.id, not hpo_id.excluded)) if len(hpo_id_list) == 0: - #self._logger.warning(f'Expected at least one HPO term per patient, but received none for patient {pp.id}') + self._logger.warning(f'Expected at least one HPO term per patient, but received none for patient {pp.id}') return [] return self._phenotype_creator.create_phenotype(hpo_id_list) diff --git a/src/genophenocorr/preprocessing/_uniprot.py b/src/genophenocorr/preprocessing/_uniprot.py index 6e6c995b..b99dd3ce 100644 --- a/src/genophenocorr/preprocessing/_uniprot.py +++ b/src/genophenocorr/preprocessing/_uniprot.py @@ -20,7 +20,6 @@ def __init__(self): """Constructs all necessary attributes for a UniprotProteinMetadataService object """ self._logger = logging.getLogger(__name__) - self._logger.setLevel(logging.INFO) handler = logging.FileHandler(f"{__name__}.log", mode='w') formatter = logging.Formatter("%(name)s %(asctime)s %(levelname)s %(message)s") handler.setFormatter(formatter) @@ -43,7 +42,7 @@ def annotate(self, protein_id: str) -> typing.Sequence[ProteinMetadata]: r = requests.get(api_url).json() results = r['results'] if len(results) == 0: - self._logger.warning(f"No proteins found for ID {protein_id}. Please verify refseq ID.") + self._logger.warning("No proteins found for ID %s. Please verify refseq ID.", protein_id) return [] protein_list = [] for protein in results: @@ -65,11 +64,11 @@ def annotate(self, protein_id: str) -> typing.Sequence[ProteinMetadata]: feat = ProteinFeature.create(FeatureInfo(feat_name, Region(feat_start, feat_end)), FeatureType[feat_type.upper()]) all_features_list.append(feat) except KeyError: - self._logger.warning(f"No features for {protein_id}") + self._logger.warning("No features for %s", protein_id) protein_list.append(ProteinMetadata(protein_id, protein_name, all_features_list)) else: - self._logger.warning(f"UniProt did not return a protein ID that matches the ID we searched for: {protein_id} not in {unis}") + self._logger.warning("UniProt did not return a protein ID that matches the ID we searched for: %s not in %s", protein_id, unis) if len(protein_list) > 1: - self._logger.info(f'UniProt found {len(protein_list)} results for ID {protein_id}') + self._logger.info('UniProt found %d results for ID %s', len(protein_list), protein_id) # TODO - DD would like to discuss an example when there are >1 items in this list. return protein_list From a9f5cf406db04474ab31b936531b3170bf62ed44 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 25 Oct 2023 22:14:58 -0400 Subject: [PATCH 5/9] Fix overlap bug. --- src/genophenocorr/model/_protein.py | 8 +-- src/genophenocorr/model/genome/_genome.py | 63 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/genophenocorr/model/_protein.py b/src/genophenocorr/model/_protein.py index 56b66f84..3688b218 100644 --- a/src/genophenocorr/model/_protein.py +++ b/src/genophenocorr/model/_protein.py @@ -252,11 +252,11 @@ def get_features_variant_overlaps(self, var_start: int, var_end: int) -> typing. if feat.info.start is None or feat.info.end is None: print(f"{feat.info.name} has no start and end info") continue - if feat.info.start <= var_start <= feat.info.end: - affected_features.add(feat) - elif feat.info.start <= var_end <= feat.info.end: + + if feat.info.region.overlaps_with(var_start, var_end): affected_features.add(feat) - return affected_features + + return affected_features def __str__(self) -> str: return f"ProteinMetadata(id={self.protein_id}, " \ diff --git a/src/genophenocorr/model/genome/_genome.py b/src/genophenocorr/model/genome/_genome.py index 29b4ffe5..3329536b 100644 --- a/src/genophenocorr/model/genome/_genome.py +++ b/src/genophenocorr/model/genome/_genome.py @@ -180,6 +180,10 @@ def __repr__(self): return f"GenomeBuild(identifier={self._id.identifier}, contigs={self.contigs})" +def _a_contains_b(a_start: int, a_end: int, b_start: int, b_end: int) -> bool: + return a_start <= b_start and b_end <= a_end + + class Region(typing.Sized): """ `Region` represents a contiguous region/slice of a biological sequence, such as DNA, RNA, or protein. @@ -215,6 +219,65 @@ def end(self) -> int: """ return self._end + def overlaps_with_region(self, other) -> bool: + """ + Test if this `Region` overlaps with the `other`. + + :param other: another :class:`Region` + """ + if isinstance(other, Region): + raise ValueError(f'`other` is not instance of `Region`: {type(other)}') + + return self.overlaps_with(other.start, other.end) + + def overlaps_with(self, start: int, end: int) -> bool: + """ + Test if this `Region` overlaps with `start` and `end` coordinates of another region. + + .. warning:: + + `start` must be at or before `end`. Otherwise, the results are UNDEFINED. + + :param start: 0-based start coordinate of the other region + :param end: 0-based end coordinate of the other region + """ + if self.is_empty(): + return _a_contains_b(start, end, self._start, self._end) + if end - start == 0: + return _a_contains_b(self._start, self._end, start, end) + + return self.start < end and start < self.end + + def contains_region(self, other) -> bool: + """ + Test if this `Region` contains the `other` region. + + :param other: another :class:`Region` + """ + if isinstance(other, Region): + raise ValueError(f'`other` is not instance of `Region`: {type(other)}') + + return self.contains(other.start, other.end) + + def contains(self, start: int, end: int) -> bool: + """ + Test if this `Region` contains the another region denoted by `start` and `end` coordinates. + + .. warning:: + + `start` must be at or before `end`. Otherwise, the results are UNDEFINED. + + :param start: 0-based start coordinate of the other region + :param end: 0-based end coordinate of the other region + """ + return _a_contains_b(self._start, self._end, start, end) + + def is_empty(self) -> bool: + """ + Return `True` if the region is empty, i.e. it spans 0 units/bases/aminoacids... + """ + return self._end - self._start == 0 + def __len__(self) -> int: return self._end - self._start From 0c98f0c3173932138a6ac77ba70041d17f9ea6fc Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 25 Oct 2023 22:15:59 -0400 Subject: [PATCH 6/9] Remove `to_string` method. --- src/genophenocorr/model/_protein.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/genophenocorr/model/_protein.py b/src/genophenocorr/model/_protein.py index 3688b218..87519462 100644 --- a/src/genophenocorr/model/_protein.py +++ b/src/genophenocorr/model/_protein.py @@ -142,9 +142,6 @@ def feature_type(self) -> FeatureType: """ return self._type - def to_string(self) -> str: - return f"{self.feature_type.name}-{self.info.name}-{self.info.region}" - def __str__(self) -> str: return f"SimpleProteinFeature(type={self.feature_type}, " \ f"info={str(self.info)})" From 6a9cf203f21c43c71a0dd1e6dafd5a531fec37b8 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 25 Oct 2023 22:18:33 -0400 Subject: [PATCH 7/9] Remove logger config from `UniprotProteinMetadataService` init --- src/genophenocorr/preprocessing/_uniprot.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/genophenocorr/preprocessing/_uniprot.py b/src/genophenocorr/preprocessing/_uniprot.py index b99dd3ce..dbf4b3a5 100644 --- a/src/genophenocorr/preprocessing/_uniprot.py +++ b/src/genophenocorr/preprocessing/_uniprot.py @@ -20,10 +20,6 @@ def __init__(self): """Constructs all necessary attributes for a UniprotProteinMetadataService object """ self._logger = logging.getLogger(__name__) - handler = logging.FileHandler(f"{__name__}.log", mode='w') - formatter = logging.Formatter("%(name)s %(asctime)s %(levelname)s %(message)s") - handler.setFormatter(formatter) - self._logger.addHandler(handler) self._url = 'https://rest.uniprot.org/uniprotkb/search?query=(%s)AND(reviewed:true)&fields=accession,id,' \ 'gene_names,gene_primary,protein_name,ft_domain,ft_motif,ft_region,ft_repeat,xref_refseq' From 151f7489f2309c10ced69f926ba5de2db8a11846 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Fri, 27 Oct 2023 11:16:51 -0500 Subject: [PATCH 8/9] fixed "is None" statement in protein model --- src/genophenocorr/model/_protein.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/genophenocorr/model/_protein.py b/src/genophenocorr/model/_protein.py index 87519462..372e11d2 100644 --- a/src/genophenocorr/model/_protein.py +++ b/src/genophenocorr/model/_protein.py @@ -246,10 +246,6 @@ def motifs(self) -> typing.Iterable[ProteinFeature]: def get_features_variant_overlaps(self, var_start: int, var_end: int) -> typing.Set[ProteinFeature]: affected_features = set() for feat in self.protein_features: - if feat.info.start is None or feat.info.end is None: - print(f"{feat.info.name} has no start and end info") - continue - if feat.info.region.overlaps_with(var_start, var_end): affected_features.add(feat) From 016629ee022fddff50ea17423cb895fa849cfa7f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 30 Oct 2023 10:46:10 -0400 Subject: [PATCH 9/9] Use `Region` to get overlapping protein features. Use `Region` in `TranscriptAnnotation`, and validate inputs in `TranscriptAnnotation`. --- .../analysis/predicate/_all_predicates.py | 32 +++++----- src/genophenocorr/model/_protein.py | 12 +++- src/genophenocorr/model/_variant.py | 59 ++++++++----------- src/genophenocorr/preprocessing/_vep.py | 40 ++++++++----- tests/fixtures.py | 16 ++--- tests/genome_data.py | 8 +-- 6 files changed, 88 insertions(+), 79 deletions(-) diff --git a/src/genophenocorr/analysis/predicate/_all_predicates.py b/src/genophenocorr/analysis/predicate/_all_predicates.py index 4e10c8b9..3aa4b5c3 100644 --- a/src/genophenocorr/analysis/predicate/_all_predicates.py +++ b/src/genophenocorr/analysis/predicate/_all_predicates.py @@ -12,10 +12,10 @@ class HPOPresentPredicate(PolyPredicate[hpotk.TermId]): name='Observed', description=""" The sample *is* annotated with the tested phenotype feature `q`. - + This is either because the sample is annotated with `q` (exact match), or because one of sample's annotations is a descendant `q` (annotation propagation). - For instance, we tested for a Seizure and the sample *had* a Clonic seizure + For instance, we tested for a Seizure and the sample *had* a Clonic seizure (a descendant of Seizure). """) @@ -23,11 +23,11 @@ class HPOPresentPredicate(PolyPredicate[hpotk.TermId]): name='Not observed', description=""" We are particular about the sample *not* having the tested feature `q`. - + In other words, `q` was *excluded* in the sample or the sample is annotated with an excluded ancestor of `q`. - - For instance, we tested for a Clonic seizure and the sample did *not* have any Seizure, which implies - *not* Clonic seizure. + + For instance, we tested for a Clonic seizure and the sample did *not* have any Seizure, which implies + *not* Clonic seizure. """) NOT_MEASURED = PatientCategory(cat_id=2, @@ -36,7 +36,7 @@ class HPOPresentPredicate(PolyPredicate[hpotk.TermId]): We do not know if the sample has or has not the tested feature. """) - def __init__(self, + def __init__(self, hpo: hpotk.MinimalOntology) -> None: self._hpo = hpotk.util.validate_instance(hpo, hpotk.MinimalOntology, 'hpo') @@ -69,13 +69,13 @@ def test(self, patient: Patient, query: hpotk.TermId) -> typing.Optional[Patient HETEROZYGOUS = PatientCategory(cat_id=0, name='Heterozygous', description=""" - This sample has the tested attribute on one allele. + This sample has the tested attribute on one allele. """) HOMOZYGOUS = PatientCategory(cat_id=1, name='Homozygous', description=""" - This sample has the tested attribute on both alleles. + This sample has the tested attribute on both alleles. """) NO_VARIANT = PatientCategory(cat_id=2, @@ -87,7 +87,7 @@ def test(self, patient: Patient, query: hpotk.TermId) -> typing.Optional[Patient class VariantEffectPredicate(PolyPredicate[VariantEffect]): - + def __init__(self, transcript:str) -> None: self._transcript = transcript @@ -205,11 +205,12 @@ def test(self, patient: Patient, query:FeatureType) -> typing.Optional[PatientCa for var in patient.variants: for trans in var.tx_annotations: if trans.transcript_id == self._transcript: - if trans.protein_effect_location is not None and trans.protein_effect_location[0] is not None and trans.protein_effect_location[1] is not None: + protein_location = trans.protein_effect_location + if protein_location is not None: for prot in trans.protein_affected: for feat in prot.protein_features: if feat.feature_type == query: - if len(list(range(max(trans.protein_effect_location[0], feat.info.start), min(trans.protein_effect_location[1], feat.info.end) + 1))) > 0: + if len(list(range(max(protein_location.start, feat.info.start), min(protein_location.end, feat.info.end) + 1))) > 0: vars.add(var) if len(vars) == 1: for v in vars: @@ -249,11 +250,12 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory] for var in patient.variants: for trans in var.tx_annotations: if trans.transcript_id == self._transcript: - if trans.protein_effect_location is not None and trans.protein_effect_location[0] is not None and trans.protein_effect_location[1] is not None: + protein_location = trans.protein_effect_location + if protein_location is not None: for prot in trans.protein_affected: for feat in prot.protein_features: if feat.info.name == query: - if len(list(range(max(trans.protein_effect_location[0], feat.info.start), min(trans.protein_effect_location[1], feat.info.end) + 1))) > 0: + if len(list(range(max(protein_location.start, feat.info.start), min(protein_location.end, feat.info.end) + 1))) > 0: vars.add(var) if len(vars) == 1: for v in vars: @@ -268,4 +270,4 @@ def test(self, patient: Patient, query: str) -> typing.Optional[PatientCategory] elif len(vars) > 1: return HOMOZYGOUS else: - return NO_VARIANT \ No newline at end of file + return NO_VARIANT diff --git a/src/genophenocorr/model/_protein.py b/src/genophenocorr/model/_protein.py index 372e11d2..f928d525 100644 --- a/src/genophenocorr/model/_protein.py +++ b/src/genophenocorr/model/_protein.py @@ -243,10 +243,18 @@ def motifs(self) -> typing.Iterable[ProteinFeature]: """ return filter(lambda f: f.feature_type == FeatureType.MOTIF, self.protein_features) - def get_features_variant_overlaps(self, var_start: int, var_end: int) -> typing.Set[ProteinFeature]: + def get_features_variant_overlaps(self, region: Region) -> typing.Collection[ProteinFeature]: + """ + Get a collection of protein features that overlap with the `region`. + Args: + region: the query region. + + Returns: + Collection[ProteinFeature]: a collection of overlapping protein features. + """ affected_features = set() for feat in self.protein_features: - if feat.info.region.overlaps_with(var_start, var_end): + if feat.info.region.overlaps_with_region(region): affected_features.add(feat) return affected_features diff --git a/src/genophenocorr/model/_variant.py b/src/genophenocorr/model/_variant.py index 9ba97911..d9a8b95e 100644 --- a/src/genophenocorr/model/_variant.py +++ b/src/genophenocorr/model/_variant.py @@ -4,7 +4,7 @@ import hpotk -from .genome import GenomicRegion +from .genome import Region, GenomicRegion from ._gt import Genotyped, Genotypes from ._protein import ProteinMetadata from ._variant_effects import VariantEffect @@ -38,15 +38,16 @@ def transcript_id(self) -> str: class TranscriptAnnotation(TranscriptInfoAware): """Class that represents results of the functional annotation of a variant with respect to single transcript of a gene. - Attributes: + Args: gene_id (string): The gene symbol associated with the transcript - transcript_id (string): The transcript ID - hgvsc_id (string): The HGVS "coding-DNA" ID if available, else None + tx_id (string): The transcript ID + hgvsc (string): The HGVS "coding-DNA" ID if available, else None is_preferred (bool): The transcript is a MANE transcript, canonical Ensembl transcript, etc. - variant_effects (Sequence[string]): A sequence of predicted effects given by VEP - overlapping_exons (Sequence[integer]): A sequence of exons affected by the variant. Returns None if none are affected. - protein_affected (ProteinMetadata): A ProteinMetadata object representing the protein affected by this transcript - protein_effect_location (Tuple(integer, integer)): The start and end coordinates of the effect on the protein sequence. + variant_effects (Iterable[string]): An iterable of predicted effects given by VEP + affected_exons (Iterable[integer]): An iterable of exons affected by the variant. Returns None if none are affected. + affected_protein (ProteinMetadata): A ProteinMetadata object representing the protein affected by this transcript + protein_effect_coordinates (Region, optional): An optional :class:`Region` with start and end coordinates + of the effect on the protein sequence. """ def __init__(self, gene_id: str, @@ -54,33 +55,21 @@ def __init__(self, gene_id: str, hgvsc: typing.Optional[str], is_preferred: bool, variant_effects: typing.Iterable[VariantEffect], - affected_exons: typing.Optional[typing.Sequence[int]], - affected_protein: typing.Sequence[ProteinMetadata], - protein_effect_start: typing.Optional[int], - protein_effect_end: typing.Optional[int]): - """Constructs all necessary attributes for a TranscriptAnnotation object - - Args: - gene_id (string): The gene symbol associated with the transcript - tx_id (string): The transcript ID - hgvsc (string, Optional): The HGVS "coding-DNA" ID if available, else None - variant_effects (Iterable[string]): An iterable of predicted effects given by functional annotator - affected_exons (Sequence[integer], Optional): A sequence of exons affected by the variant. Returns None if none are affected. - affected_protein (Sequence[ProteinMetadata]): A ProteinMetadata object representing the protein affected by this transcript - protein_effect_start (integer, Optional): The start coordinate of the effect on the protein sequence. - protein_effect_end (integer, Optional): The end coordinate of the effect on the protein sequence. - """ - self._gene_id = gene_id - self._tx_id = tx_id - self._hgvsc_id = hgvsc - self._is_preferred = is_preferred + affected_exons: typing.Optional[typing.Iterable[int]], + affected_protein: typing.Iterable[ProteinMetadata], + protein_effect_coordinates: typing.Optional[Region]): + self._gene_id = hpotk.util.validate_instance(gene_id, str, 'gene_id') + self._tx_id = hpotk.util.validate_instance(tx_id, str, 'tx_id') + self._hgvsc_id = hpotk.util.validate_optional_instance(hgvsc, str, 'hgvsc') + self._is_preferred = hpotk.util.validate_instance(is_preferred, bool, 'is_preferred') self._variant_effects = tuple(variant_effects) if affected_exons is not None: self._affected_exons = tuple(affected_exons) else: self._affected_exons = None self._affected_protein = tuple(affected_protein) - self._protein_effect_location = (protein_effect_start, protein_effect_end) + self._protein_effect_location = hpotk.util.validate_optional_instance(protein_effect_coordinates, Region, + 'protein_effect_coordinates') @property def gene_id(self) -> str: @@ -142,10 +131,11 @@ def protein_affected(self) -> typing.Sequence[ProteinMetadata]: return self._affected_protein @property - def protein_effect_location(self) -> typing.Tuple[int, int]: + def protein_effect_location(self) -> typing.Optional[Region]: """ Returns: - Tuple(integer, integer): The start and end position on the protein sequence that the variant effects. (e.g. (1234, 1235)) + Region: a :class:`genophenocorr.model.genome.Region` with start and end position on the protein sequence + that the variant affects. """ return self._protein_effect_location @@ -381,9 +371,10 @@ def create_variant_from_scratch(variant_coordinates: VariantCoordinates, protein_effect_start: int, protein_effect_end: int, genotypes: Genotypes): - transcript = TranscriptAnnotation(gene_name, trans_id, hgvsc_id, is_preferred, consequences, exons_effected, protein, - protein_effect_start, protein_effect_end) - return Variant(variant_coordinates, [transcript], genotypes) + protein_effect = Region(protein_effect_start, protein_effect_end) + transcript = TranscriptAnnotation(gene_name, trans_id, hgvsc_id, is_preferred, consequences, exons_effected, + protein, protein_effect) + return Variant(variant_coordinates, (transcript,), genotypes) def __init__(self, var_coordinates: VariantCoordinates, tx_annotations: typing.Iterable[TranscriptAnnotation], diff --git a/src/genophenocorr/preprocessing/_vep.py b/src/genophenocorr/preprocessing/_vep.py index f5bdc2db..2de04f41 100644 --- a/src/genophenocorr/preprocessing/_vep.py +++ b/src/genophenocorr/preprocessing/_vep.py @@ -1,12 +1,11 @@ # A module with classes that interact with Ensembl's REST API to fetch required data. import logging -import re import typing -import hpotk import requests from genophenocorr.model import VariantCoordinates, TranscriptAnnotation, VariantEffect +from genophenocorr.model.genome import Region from ._api import FunctionalAnnotator, ProteinMetadataService @@ -104,7 +103,7 @@ def _parse_variant_effect(self, effect: str) -> typing.Optional[VariantEffect]: return None return var_effect - def _process_item(self, item) -> typing.Optional[TranscriptAnnotation]: + def _process_item(self, item: typing.Dict) -> typing.Optional[TranscriptAnnotation]: """ Parse one transcript annotation from the JSON response. """ @@ -121,23 +120,33 @@ def _process_item(self, item) -> typing.Optional[TranscriptAnnotation]: if var_effect is not None: var_effects.append(var_effect) gene_name = item.get('gene_symbol') - protein_id = item.get('protein_id') - protein = self._protein_annotator.annotate(protein_id) - protein_effect_start = item.get('protein_start') - protein_effect_end = item.get('protein_end') - if protein_effect_start is None and protein_effect_end is not None: - protein_effect_start = 1 - if protein_effect_end is not None: - protein_effect_end = int(protein_effect_end) - if protein_effect_start is not None: - protein_effect_start = int(protein_effect_start) + exons_effected = item.get('exon') if exons_effected is not None: exons_effected = exons_effected.split('/')[0].split('-') if len(exons_effected) == 2: exons_effected = range(int(exons_effected[0]), int(exons_effected[1]) + 1) - exons_effected = [int(x) for x in exons_effected] + exons_effected = (int(x) for x in exons_effected) + + protein_id = item.get('protein_id') + protein = self._protein_annotator.annotate(protein_id) + protein_effect_start = item.get('protein_start') + protein_effect_end = item.get('protein_end') + if protein_effect_start is None or protein_effect_end is None: + # Does this ever happen? Let's log a warning for now and address the absence of a coordinate later, + # if we see a lot of these warnings popping out. + # Note that Lauren's version of the code had a special branch for missing start, where she set the variable + # to `1` (1-based coordinate). + self._logging.warning('Missing start/end coordinate for %s on protein %s', hgvsc_id, protein_id) + protein_effect = None + else: + # The coordinates are in 1-based system and we need 0-based. + protein_effect_start = int(protein_effect_start) - 1 + protein_effect_end = int(protein_effect_end) + protein_effect = Region(protein_effect_start, protein_effect_end) + + return TranscriptAnnotation(gene_name, trans_id, hgvsc_id, @@ -145,8 +154,7 @@ def _process_item(self, item) -> typing.Optional[TranscriptAnnotation]: var_effects, exons_effected, protein, - protein_effect_start, - protein_effect_end) + protein_effect) def _query_vep(self, variant_coordinates: VariantCoordinates) -> dict: api_url = self._url % (verify_start_end_coordinates(variant_coordinates)) diff --git a/tests/fixtures.py b/tests/fixtures.py index 0f31d6bf..87108c92 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -14,7 +14,7 @@ def make_region(contig: str, start: int, end: int) -> GenomicRegion: @pytest.fixture def toy_cohort() -> Cohort: - prot = ProteinMetadata(protein_id='NP_037407.4', label='Ankyrin repeat domain-containing protein 11', + prot = ProteinMetadata(protein_id='NP_037407.4', label='Ankyrin repeat domain-containing protein 11', protein_features=(ProteinFeature.create(feature_type=FeatureType.REPEAT, info=FeatureInfo('ANK 1', Region(start=167, end=196))), ProteinFeature.create(feature_type=FeatureType.REPEAT, info=FeatureInfo('ANK 2', Region(start=200, end=229))), ProteinFeature.create(feature_type=FeatureType.REPEAT, info=FeatureInfo('ANK 3', Region(start=233, end=262))), @@ -37,36 +37,36 @@ def toy_cohort() -> Cohort: dup = Variant(VariantCoordinates(make_region("16", 89279849, 89279850), ref='G', alt='GC', change_length=1), [ TranscriptAnnotation('ANKRD11', 'NM_013275.6', 'NM_013275.6:c.6691dup', False, [VariantEffect.FRAMESHIFT_VARIANT], [9], - [prot], 2231, 2231) + [prot], Region(2230, 2231)) ], Genotypes.from_mapping({'HetSingleVar': Genotype.HETEROZYGOUS})) indel = Variant(VariantCoordinates(make_region("16", 89284600, 89284602), ref='GG', alt='A', change_length=-1), [ TranscriptAnnotation('ANKRD11', 'NM_013275.6', 'NM_013275.6:c.1940_1941delinsT', False, [VariantEffect.FRAMESHIFT_VARIANT], - [9], [prot], 647, 647) + [9], [prot], Region(646, 647)) ], Genotypes.from_mapping({'HetDoubleVar1': Genotype.HETEROZYGOUS})) snv_stop_gain = Variant(VariantCoordinates(make_region("16", 89280751, 89280752), ref='G', alt='T', change_length=0), [ TranscriptAnnotation('ANKRD11', 'NM_013275.6', 'NM_013275.6:c.5790C>A', False, [VariantEffect.STOP_GAINED], [9], [prot], - 1930, 1930)], + Region(1929, 1930))], Genotypes.from_mapping({'HetDoubleVar1': Genotype.HETEROZYGOUS})) snv_missense = Variant(VariantCoordinates(make_region("16", 89275127, 89275128), ref='G', alt='A', change_length=0), [ TranscriptAnnotation('ANKRD11', 'NM_013275.6', 'NM_013275.6:c.7534C>T', False, [VariantEffect.MISSENSE_VARIANT], [10], - [prot], 2512, 2512) + [prot], Region(2511, 2512)) ], Genotypes.from_mapping({'HetDoubleVar2': Genotype.HETEROZYGOUS})) del_frameshift = Variant(VariantCoordinates(make_region("16", 89279707, 89279725), ref='AGTGTTCGGGGCGGGGCC', alt='A', change_length=-17), [ TranscriptAnnotation('ANKRD11', 'NM_013275.6', 'NM_013275.6:c.6817_6833del', False, [VariantEffect.FRAMESHIFT_VARIANT], - [9], [prot], 2273, 2278) + [9], [prot], Region(2272, 2278)) ], Genotypes.from_mapping({'HetDoubleVar2': Genotype.HETEROZYGOUS})) del_small = Variant(VariantCoordinates(make_region("16", 89279457, 89279459), ref='TG', alt='T', change_length=-1), [ TranscriptAnnotation('ANKRD11', 'NM_013275.6', 'NM_013275.6:c.7083del', False, [VariantEffect.FRAMESHIFT_VARIANT], [9], - [prot], 2361, 2362) + [prot], Region(2360, 2362)) ], Genotypes.from_mapping({'HomoVar': Genotype.HOMOZYGOUS_ALTERNATE})) del_large = Variant(VariantCoordinates(make_region("16", 89_190_070, 89_439_815), ref='N', alt='', change_length=-249_745), @@ -74,7 +74,7 @@ def toy_cohort() -> Cohort: TranscriptAnnotation('ANKRD11', 'NM_013275.6', None, False, [VariantEffect.STOP_LOST, VariantEffect.FEATURE_TRUNCATION, VariantEffect.CODING_SEQUENCE_VARIANT, VariantEffect.FIVE_PRIME_UTR_VARIANT, VariantEffect.THREE_PRIME_UTR_VARIANT, VariantEffect.INTRON_VARIANT], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - [prot], None, None) + [prot], None) ], Genotypes.from_mapping({'LargeCNV': Genotype.HETEROZYGOUS})) diff --git a/tests/genome_data.py b/tests/genome_data.py index ded9ec28..0955c2f4 100644 --- a/tests/genome_data.py +++ b/tests/genome_data.py @@ -56,7 +56,7 @@ def toy_variants(toy_contig: Contig) -> typing.Sequence[Variant]: TranscriptAnnotation('some-gene', 'fake-tx-pos', 'fake-tx-pos-hgvsc:v1', True, (VariantEffect.MISSENSE_VARIANT,), (2,), - (), None, None), + (), None), ), Genotypes.empty() ), Variant(VariantCoordinates(GenomicRegion(toy_contig, 530, 531, Strand.POSITIVE), @@ -65,7 +65,7 @@ def toy_variants(toy_contig: Contig) -> typing.Sequence[Variant]: TranscriptAnnotation('some-gene', 'fake-tx-pos', 'fake-tx-pos-hgvsc:v2', True, (VariantEffect.FRAMESHIFT_VARIANT,), (3,), - (), None, None), + (), None), ), Genotypes.empty() ), Variant(VariantCoordinates(GenomicRegion(toy_contig, 160, 161, Strand.NEGATIVE).with_strand(Strand.POSITIVE), @@ -74,7 +74,7 @@ def toy_variants(toy_contig: Contig) -> typing.Sequence[Variant]: TranscriptAnnotation('other-gene', 'fake-tx-neg', 'fake-tx-neg-hgvsc:v3', True, (VariantEffect.SYNONYMOUS_VARIANT,), (1,), - (), None, None), + (), None), ), Genotypes.empty() ), Variant(VariantCoordinates(GenomicRegion(toy_contig, 570, 574, Strand.NEGATIVE).with_strand(Strand.POSITIVE), @@ -83,7 +83,7 @@ def toy_variants(toy_contig: Contig) -> typing.Sequence[Variant]: TranscriptAnnotation('other-gene', 'fake-tx-neg', 'fake-tx-neg-hgvsc:v4', True, (VariantEffect.THREE_PRIME_UTR_VARIANT,), (4,), - (), None, None), + (), None), ), Genotypes.empty() )