From 7c6bff109cd0435edde961533798114e5e52f6b7 Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Wed, 4 Dec 2024 21:41:16 +0000 Subject: [PATCH] Create DeletionPositionRemapping.ipynb See https://github.com/jeromekelleher/sc2ts-paper/issues/256 --- notebooks/DeletionPositionRemapping.ipynb | 2605 +++++++++++++++++++++ 1 file changed, 2605 insertions(+) create mode 100644 notebooks/DeletionPositionRemapping.ipynb diff --git a/notebooks/DeletionPositionRemapping.ipynb b/notebooks/DeletionPositionRemapping.ipynb new file mode 100644 index 0000000..609ffe4 --- /dev/null +++ b/notebooks/DeletionPositionRemapping.ipynb @@ -0,0 +1,2605 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bd7f5f3b-234a-4537-bd16-b9ed0e3cd69c", + "metadata": {}, + "source": [ + "# Assess the likelihood of a recurrent deletions in Alpha, BA.1 and BA.2\n", + "It is plausible that the SARS-CoV-2 deletions at positions 11288-11296 (BA.2 and Alpha) and 11283-11291 (BA.1) do not represent 3 separate origins. Here we re-run the sc2ts HMM to see if there are alternative likely explanations" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8d2dfea9-094c-4b19-9212-b26a3228bf74", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded Dataset at ../data/viridian_2024-04-29.alpha1.zarr.zip with 3960704 samples and 31 metadata fields\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " Tree Sequence \n", + "
Trees221
Sequence Length29,904.0
Time Unitsdays
Sample Nodes516,443
Total Size557.4 MiB
Metadata\n", + "
\n", + " \n", + "
\n", + " dict\n", + " \n", + "
\n", + " sc2ts:\n", + "
\n", + " dict\n", + " date: 2023-01-01
\n", + "
\n", + " exact_matches:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " date:\n", + "
\n", + " dict\n", + " 2020-01-25: 1
2020-01-29: 2
2020-01-30: 2
2020-02-01: 1
2020-02-02: 3
2020-02-05: 2
2020-02-06: 1
2020-02-07: 2
2020-02-08: 1
2020-02-09: 2
2020-02-11: 1
2020-02-14: 4
2020-02-15: 1
2020-02-16: 1
2020-02-18: 1
2020-02-19: 1
2020-02-20: 1
2020-02-21: 1
2020-02-22: 1
2020-02-23: 1
2020-02-24: 2
2020-02-25: 3
2020-02-26: 6
2020-02-27: 10
2020-02-28: 12
2020-02-29: 18
2020-03-01: 8
2020-03-02: 12
2020-03-03: 16
2020-03-04: 14
... and 1032 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + " node:\n", + "
\n", + " dict\n", + " 1: 36
10: 2
10001: 3
100016: 2
100017: 1
100024: 2
100025: 8
100032: 2
100036: 5
100047: 1
100052: 2
100061: 1
100062: 1
100064: 1
100068: 1
100069: 3
100075: 2
100082: 3
100085: 1
100087: 1
100089: 1
100090: 2
100092: 1
100099: 1
1001: 1
100102: 1
100112: 1
10013: 1
100141: 1
100143: 1
... and 79196 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + " pango:\n", + "
\n", + " dict\n", + " A: 50
A.1: 106
A.2: 14
A.2.2: 47
A.2.3: 44
A.23.1: 14
A.25: 1
A.26: 6
A.29: 8
A.3: 9
A.4: 14
A.5: 17
AA.1: 3
AA.2: 50
AA.3: 12
AA.4: 40
AA.5: 16
AA.6: 21
AA.7: 16
AA.8: 12
AD.2: 1433
AE.8: 12
AH.3: 6
AK.1: 21
AP.1: 274
AQ.1: 1
AS.1: 12
AV.1: 44
AY.1: 24
AY.10: 213
... and 1277 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " num_samples_processed:\n", + "
\n", + " dict\n", + " 2020-01-01: 1
2020-01-04: 0
2020-01-05: 0
2020-01-07: 0
2020-01-10: 1
2020-01-19: 1
2020-01-23: 0
2020-01-24: 1
2020-01-25: 3
2020-01-26: 0
2020-01-27: 0
2020-01-28: 5
2020-01-29: 5
2020-01-30: 11
2020-01-31: 1
2020-02-01: 14
2020-02-02: 12
2020-02-03: 6
2020-02-04: 8
2020-02-05: 4
2020-02-06: 5
2020-02-07: 3
2020-02-08: 4
2020-02-09: 3
2020-02-10: 6
2020-02-11: 2
2020-02-12: 0
2020-02-13: 2
2020-02-14: 5
2020-02-15: 4
... and 1050 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + " retro_groups:\n", + "
\n", + " list\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-06-20
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-06-14
2020-06-18
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
2020-06-19
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 718c16c69c8b6d8d9e0df800b8dddc
9a
num_mutations: 14
num_nodes: 21
num_recurrent_mutations: 0
num_root_mutations: 7
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
B.1.305
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR14069280
SRR15154105
SRR13937745
SRR13937746
SRR13937747
SRR13937748
SRR13937749
SRR13937752
SRR14036013
SRR14036024
SRR14036035
SRR14036053
SRR14036055
SRR14036056
SRR14036058
SRR14036062
SRR14036063
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-07-27
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-07-22
2020-07-22
2020-07-22
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-24
2020-07-26
\n", + "
\n", + "
\n", + "
depth: 1
group_id: c5a7b6a96de56db0c2a501002195ea
46
num_mutations: 12
num_nodes: 15
num_recurrent_mutations: 0
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
B.1.1.70
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR9188248
ERR9188262
ERR9188298
ERR9188252
ERR9188254
ERR9188259
ERR9188265
ERR9188282
ERR9188285
ERR9188288
ERR9188294
ERR9188301
ERR9188304
ERR9188291
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-06
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-07-31
2020-08-01
2020-08-01
2020-08-01
2020-08-02
2020-08-04
2020-08-04
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
2020-08-05
\n", + "
\n", + "
\n", + "
depth: 2
group_id: 5cbc3bf4558163608254ab413e2d3d
96
num_mutations: 19
num_nodes: 30
num_recurrent_mutations: 0
num_root_mutations: 10
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
AD.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4584991
ERR4584975
ERR4584992
ERR4584994
ERR4664437
ERR4585253
ERR4585268
ERR4585092
ERR4585111
ERR4585112
ERR4585175
ERR4585193
ERR4585195
ERR4585196
ERR4585202
ERR4585203
ERR4585206
ERR4585208
ERR4585209
ERR4585211
ERR4585212
ERR4585260
ERR4585270
ERR4585271
ERR4585272
ERR4585273
ERR4597084
ERR4597600
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-11
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-08-06
2020-08-06
2020-08-07
2020-08-07
2020-08-08
2020-08-09
2020-08-10
2020-08-10
2020-08-10
2020-08-10
\n", + "
\n", + "
\n", + "
depth: 3
group_id: c5a24610314df347cd16c81bc8e184
23
num_mutations: 15
num_nodes: 14
num_recurrent_mutations: 0
num_root_mutations: 5
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.258.3
B.1.258.3
B.1
B.1.258
B.1.258.3
B.1.258.3
B.1.258.3
B.1.258.3
B.1.258.3
B.1.258.3
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4555292
ERR4555334
ERR4597156
ERR4599760
ERR4569542
ERR4555303
ERR4600022
ERR4644119
ERR4644123
ERR4665443
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-14
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-08-09
2020-08-09
2020-08-09
2020-08-10
2020-08-10
2020-08-10
2020-08-11
2020-08-11
2020-08-12
2020-08-13
2020-08-13
2020-08-13
\n", + "
\n", + "
\n", + "
depth: 3
group_id: c5e68caf8f4f44399106a38ed90425
ef
num_mutations: 22
num_nodes: 15
num_recurrent_mutations: 0
num_root_mutations: 8
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1.389
B.1
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4597334
ERR4642945
ERR4644174
ERR4597704
ERR4597885
ERR4642987
ERR4597719
ERR4597993
ERR4598426
ERR4598014
ERR4598101
ERR4598187
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-15
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-08-09
2020-08-10
2020-08-10
2020-08-10
2020-08-12
2020-08-12
2020-08-12
2020-08-13
2020-08-13
2020-08-14
2020-08-14
\n", + "
\n", + "
\n", + "
depth: 2
group_id: 18a54759751f752f4aafd732101adc
80
num_mutations: 22
num_nodes: 13
num_recurrent_mutations: 0
num_root_mutations: 12
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
B.1.160
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4597098
ERR4597746
ERR4597965
ERR4597971
ERR4598093
ERR4598855
ERR4600007
ERR4598582
ERR4598821
ERR4598237
ERR4598827
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-18
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-08-12
2020-08-12
2020-08-13
2020-08-13
2020-08-16
2020-08-17
2020-08-17
2020-08-17
2020-08-17
2020-08-17
2020-08-17
2020-08-17
\n", + "
\n", + "
\n", + "
depth: 2
group_id: e50007dbcf84957cafe545477e556f
ec
num_mutations: 31
num_nodes: 15
num_recurrent_mutations: 0
num_root_mutations: 19
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
B.1.367
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4598595
ERR4598862
ERR4598021
ERR4599975
ERR4598363
ERR4598917
ERR4599128
ERR4599151
ERR4599159
ERR4599183
ERR4615507
ERR4643044
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-22
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-08-19
2020-08-20
2020-08-20
2020-08-20
2020-08-20
2020-08-20
2020-08-20
2020-08-20
2020-08-21
2020-08-21
2020-08-21
\n", + "
\n", + "
\n", + "
depth: 2
group_id: b9265e97e9d510bb646d71703e6720
75
num_mutations: 12
num_nodes: 13
num_recurrent_mutations: 0
num_root_mutations: 10
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " D.4
D.4
D.4
D.4
D.4
D.4
D.4
D.4
D.4
D.4
D.4
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4599024
ERR4599191
ERR4599291
ERR4599372
ERR4599378
ERR4599382
ERR4599436
ERR4599517
ERR4599251
ERR4599275
ERR4599387
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-08-25
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-08-20
2020-08-22
2020-08-22
2020-08-24
2020-08-24
2020-08-24
2020-08-24
2020-08-24
2020-08-24
2020-08-24
2020-08-24
2020-08-24
\n", + "
\n", + "
\n", + "
depth: 2
group_id: fe719835fb46d9b47e2c7cd3856a98
89
num_mutations: 11
num_nodes: 14
num_recurrent_mutations: 0
num_root_mutations: 8
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
B.1.282
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4668045
ERR4665400
ERR4671220
ERR4615747
ERR4615775
ERR4615779
ERR4615798
ERR4615838
ERR4615840
ERR4615848
ERR4615994
ERR4616004
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-09-11
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-09-06
2020-09-07
2020-09-07
2020-09-07
2020-09-07
2020-09-07
2020-09-08
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-09
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
2020-09-10
\n", + "
\n", + "
\n", + "
depth: 3
group_id: b18d4866220cd83e2f1fada3e064af
1b
num_mutations: 29
num_nodes: 35
num_recurrent_mutations: 0
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221
B.1.221
B.1.221
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221
B.1.221
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221.1
B.1.221
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4681656
ERR4651059
ERR4651121
ERR4651262
ERR4651264
ERR4652101
ERR4664316
ERR4651098
ERR4651515
ERR4652343
ERR4652386
ERR4652392
ERR4652435
ERR4652467
ERR4652470
ERR4652528
ERR4652561
ERR4652084
ERR4652093
ERR4652111
ERR4652115
ERR4652206
ERR4652234
ERR4652326
ERR4652482
ERR4652574
ERR4652590
ERR4653003
ERR4653098
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-09-22
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-09-18
2020-09-18
2020-09-20
2020-09-20
2020-09-20
2020-09-21
2020-09-21
2020-09-21
2020-09-21
2020-09-21
2020-09-21
2020-09-21
2020-09-21
\n", + "
\n", + "
\n", + "
depth: 2
group_id: 4e5bb8b55022dca40b94f2b1167c39
b5
num_mutations: 16
num_nodes: 16
num_recurrent_mutations: 0
num_root_mutations: 8
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
B.1.1.196
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4659489
ERR4659553
ERR4681831
ERR4681962
ERR4681963
ERR4659648
ERR4668507
ERR4681773
ERR4681904
ERR4681975
ERR4682011
ERR4698709
ERR4698734
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-09-25
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-09-19
2020-09-21
2020-09-21
2020-09-21
2020-09-23
2020-09-23
2020-09-24
2020-09-24
2020-09-24
2020-09-24
\n", + "
\n", + "
\n", + "
depth: 3
group_id: c305b5db79f87e85624781fbb3a6ff
45
num_mutations: 20
num_nodes: 13
num_recurrent_mutations: 0
num_root_mutations: 5
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.362.1
B.1.362.2
B.1.362.2
B.1.362.2
B.1.362.2
B.1.362.2
B.1.362.2
B.1.362.2
B.1.362.2
B.1.362.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4659502
ERR4698487
ERR4698500
ERR4698591
ERR4668736
ERR4693933
ERR4669622
ERR4669632
ERR4688523
ERR4707037
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-09-26
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-09-20
2020-09-20
2020-09-21
2020-09-22
2020-09-23
2020-09-24
2020-09-25
2020-09-25
2020-09-25
2020-09-25
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 8f20b17dde2d48025dea858611c06e
2d
num_mutations: 27
num_nodes: 13
num_recurrent_mutations: 0
num_root_mutations: 2
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " AP.1
B.1.1.70
B.1.1.70
AP.1
B.1.1.70
AP.1
B.1.1.70
AP.1
B.1.1.70
B.1.1.70
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4659781
ERR4681793
ERR4671594
ERR5073173
ERR4671222
ERR4693656
ERR4671014
ERR4671089
ERR4671356
ERR4671681
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-10-03
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-09-27
2020-09-27
2020-09-28
2020-09-30
2020-09-30
2020-10-01
2020-10-01
2020-10-02
2020-10-02
2020-10-02
\n", + "
\n", + "
\n", + "
depth: 3
group_id: ff42e4407538ae8f07954f8e22e651
56
num_mutations: 14
num_nodes: 14
num_recurrent_mutations: 1
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.523
B.1.523
B.1.523
B.1.523
B.1.523
B.1.523
B.1.523
B.1.523
B.1.523
B.1.523
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4758100
ERR4759681
ERR4759654
ERR4758323
ERR4758830
ERR4758826
ERR4758908
ERR4758821
ERR4758915
ERR4759676
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-10-13
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-10-08
2020-10-08
2020-10-08
2020-10-08
2020-10-09
2020-10-10
2020-10-10
2020-10-11
2020-10-12
2020-10-12
2020-10-12
\n", + "
\n", + "
\n", + "
depth: 2
group_id: 8eee0abe59c3365b1fddb40ad201a3
56
num_mutations: 14
num_nodes: 13
num_recurrent_mutations: 0
num_root_mutations: 11
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.177.26
B.1.177.26
B.1.177.26
B.1.177
B.1.177
B.1.177.26
B.1.177.26
B.1.177
B.1.177
B.1.177.26
B.1.177
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4758150
ERR4758395
ERR4758788
ERR4780355
ERR4759858
ERR4758774
ERR4759829
ERR4837949
ERR4806155
ERR4806690
ERR4808048
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-10-17
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-10-11
2020-10-12
2020-10-13
2020-10-13
2020-10-15
2020-10-15
2020-10-16
2020-10-16
2020-10-16
2020-10-16
2020-10-16
2020-10-16
2020-10-16
\n", + "
\n", + "
\n", + "
depth: 3
group_id: d27cc1d7184dee5fda90e054229181
78
num_mutations: 19
num_nodes: 16
num_recurrent_mutations: 0
num_root_mutations: 8
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
B.1.1.25
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR15426038
SRR15425992
SRR15425927
SRR15425946
SRR15425843
SRR15425844
SRR15426538
SRR15426539
SRR15426540
SRR15426541
SRR15426543
SRR15426545
SRR15426546
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-10-24
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-10-19
2020-10-19
2020-10-20
2020-10-22
2020-10-22
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-23
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 1ec598da14b6f97f744d1eaac54fac
25
num_mutations: 27
num_nodes: 14
num_recurrent_mutations: 0
num_root_mutations: 11
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.351
B.1.351
B.1.351
B.1.351
B.1.351
B.1.351
B.1.351
B.1.351
B.1.351
B.1.351
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR13620167
SRR13620177
SRR13620175
SRR13620165
SRR13620172
SRR13620164
SRR13620168
SRR13620169
SRR13620170
SRR13620332
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-10-25
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-10-21
2020-10-21
2020-10-21
2020-10-21
2020-10-21
2020-10-21
2020-10-21
2020-10-21
2020-10-21
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-24
2020-10-24
2020-10-24
2020-10-24
\n", + "
\n", + "
\n", + "
depth: 3
group_id: b15a9c9d236bf883289bafadb77206
96
num_mutations: 36
num_nodes: 24
num_recurrent_mutations: 0
num_root_mutations: 18
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
B.1.1.7
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4825199
ERR4826986
ERR4827009
ERR4827181
ERR4827359
ERR4833787
ERR4833883
ERR4833995
ERR5067896
ERR4848983
ERR4848985
ERR4848989
ERR4868220
ERR4868810
ERR4869470
ERR4849068
ERR4867808
ERR4868473
ERR4868556
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-10-27
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-10-21
2020-10-21
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-23
2020-10-26
2020-10-26
2020-10-26
2020-10-26
2020-10-26
2020-10-26
2020-10-26
2020-10-26
\n", + "
\n", + "
\n", + "
depth: 4
group_id: 1fdc22a3d71184bf5a7f9e98dd1ae3
b4
num_mutations: 45
num_nodes: 21
num_recurrent_mutations: 0
num_root_mutations: 2
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.258
B.1
B.1
B.1
B.1
B.1
B.1
B.1
B.1
B.1
B.1
B.1
B.1.258
B.1.258
B.1
B.1
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR4826370
ERR4826912
ERR4825212
ERR4825213
ERR4825214
ERR4825216
ERR4825217
ERR4825237
ERR4806553
ERR4835008
ERR4835010
ERR4835027
ERR4835029
ERR4835033
ERR4835035
ERR4835039
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-11-03
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-10-28
2020-10-28
2020-10-28
2020-10-29
2020-10-30
2020-10-30
2020-10-30
2020-11-02
2020-11-02
2020-11-02
2020-11-02
\n", + "
\n", + "
\n", + "
depth: 5
group_id: 5c36f21aa405c88bd34ff96033ff89
20
num_mutations: 34
num_nodes: 18
num_recurrent_mutations: 1
num_root_mutations: 7
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.429
B.1.429
B.1.429
B.1.427
B.1.429
B.1.429
B.1.429
B.1.429
B.1.429
B.1.429
B.1.429
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR16791871
SRR16791875
SRR16791878
SRR14200299
SRR20694822
SRR20694960
SRR20694971
SRR13194997
SRR13194998
SRR13195002
SRR13195396
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-11-11
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-11-06
2020-11-07
2020-11-08
2020-11-08
2020-11-09
2020-11-09
2020-11-09
2020-11-09
2020-11-09
2020-11-10
2020-11-10
2020-11-10
\n", + "
\n", + "
\n", + "
depth: 2
group_id: 5b70951c30887fc8afe0f040c1fb96
03
num_mutations: 20
num_nodes: 15
num_recurrent_mutations: 0
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
B.1.36.21
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR9229274
ERR9229286
ERR9229289
ERR9229351
ERR9229292
ERR9229299
ERR9229302
ERR9229435
ERR9230677
ERR9229314
ERR9229356
ERR9229359
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-11-29
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-11-26
2020-11-26
2020-11-26
2020-11-26
2020-11-27
2020-11-27
2020-11-27
2020-11-27
2020-11-27
2020-11-27
2020-11-28
2020-11-28
2020-11-28
2020-11-28
2020-11-28
2020-11-28
2020-11-28
2020-11-28
2020-11-28
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 3ce5b7fe23eb1b3c7f8e76866d490f
10
num_mutations: 24
num_nodes: 26
num_recurrent_mutations: 0
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
AH.3
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR7432886
ERR7433639
ERR7434550
ERR7435267
ERR7428255
ERR7428431
ERR7430754
ERR7434921
ERR7434981
ERR7435189
ERR7428284
ERR7430412
ERR7431721
ERR7432697
ERR7433029
ERR7435269
ERR7435382
ERR7435391
ERR7435556
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-02
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-11-29
2020-11-29
2020-11-30
2020-11-30
2020-11-30
2020-11-30
2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-01
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 89e4e2aea36a90af5cc8f1c4dbf50a
a6
num_mutations: 28
num_nodes: 16
num_recurrent_mutations: 0
num_root_mutations: 10
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
B.1.348
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR13635822
SRR13635849
SRR13635760
SRR13635766
SRR13635777
SRR13635796
SRR13635738
SRR13635743
SRR13635744
SRR13635747
SRR13635753
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-05
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-02
2020-12-03
2020-12-04
2020-12-04
2020-12-04
2020-12-04
2020-12-04
2020-12-04
2020-12-04
2020-12-04
2020-12-04
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 5d526e8ec48319979dbc59b931a226
74
num_mutations: 20
num_nodes: 15
num_recurrent_mutations: 0
num_root_mutations: 8
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
B.1.375
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR13574059
SRR23599950
SRR13574077
SRR13727401
SRR23599472
SRR23599518
SRR23599553
SRR23599675
SRR23599676
SRR23599823
SRR23599945
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-07
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-01
2020-12-02
2020-12-02
2020-12-02
2020-12-03
\n", + "
\n", + "
\n", + "
depth: 4
group_id: ea08cca6ed7d8d2706236866493e2e
21
num_mutations: 61
num_nodes: 23
num_recurrent_mutations: 1
num_root_mutations: 8
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
B.1.561
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR16649272
SRR16649409
SRR16649453
SRR16649579
SRR17209939
SRR17210028
SRR18382089
SRR18382100
SRR18382326
SRR17209233
SRR17209942
SRR17209946
SRR13379691
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-07
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-01
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-05
2020-12-06
\n", + "
\n", + "
\n", + "
depth: 3
group_id: 10bbf6826c797a4ed0ef14c9929888
84
num_mutations: 13
num_nodes: 17
num_recurrent_mutations: 0
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
B.1.1.228
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR16649408
SRR13527693
SRR13527694
SRR13527695
SRR13527696
SRR13527697
SRR13527698
SRR13527703
SRR13527704
SRR13527705
SRR13527706
SRR13527707
SRR13527708
SRR13527699
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-09
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-03
2020-12-03
2020-12-07
2020-12-07
2020-12-07
2020-12-07
2020-12-07
2020-12-07
2020-12-07
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
2020-12-08
\n", + "
\n", + "
\n", + "
depth: 5
group_id: 3e9e182d0ee1fd98dc4117d2d1ac6e
68
num_mutations: 33
num_nodes: 33
num_recurrent_mutations: 1
num_root_mutations: 11
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
B.1.110.3
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR13971825
SRR23594654
SRR23594161
SRR23594172
SRR23594440
SRR23594683
SRR23594697
SRR23599573
SRR23599669
SRR23594164
SRR23594168
SRR23594170
SRR23594182
SRR23594184
SRR23594268
SRR23594413
SRR23594420
SRR23594426
SRR23594431
SRR23594433
SRR23594438
SRR23594447
SRR23594681
SRR23594684
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-16
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-11
2020-12-12
2020-12-12
2020-12-14
2020-12-14
2020-12-15
2020-12-15
2020-12-15
2020-12-15
2020-12-15
\n", + "
\n", + "
\n", + "
depth: 4
group_id: e59e3840b603cc89ed3b02a23919b2
23
num_mutations: 47
num_nodes: 17
num_recurrent_mutations: 2
num_root_mutations: 2
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1
B.1
B.1.404
B.1.404
B.1
B.1.404
B.1
B.1
B.1.404
B.1.404
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR17362555
SRR18738007
SRR18738101
SRR17363616
SRR17363918
SRR16174429
SRR17209344
SRR17209393
SRR17209571
SRR17365400
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-17
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-12
2020-12-12
2020-12-14
2020-12-14
2020-12-15
2020-12-15
2020-12-15
2020-12-15
2020-12-16
2020-12-16
\n", + "
\n", + "
\n", + "
depth: 3
group_id: dfdecbcb0325dd3c4a7186f74a3ab9
6a
num_mutations: 42
num_nodes: 14
num_recurrent_mutations: 0
num_root_mutations: 13
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " A.23.1
A.23.1
A.23.1
A.23.1
A.23.1
A.23.1
A.23.1
A.23.1
A.23.1
A.23.1
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " ERR5055235
ERR6407668
ERR5039038
ERR5055053
ERR5054999
ERR6407664
ERR6407764
ERR6407766
ERR5040233
ERR5040447
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " dict\n", + " date_added: 2020-12-24
\n", + "
\n", + " dates:\n", + "
\n", + " list\n", + " 2020-12-18
2020-12-20
2020-12-21
2020-12-21
2020-12-21
2020-12-23
2020-12-23
2020-12-23
2020-12-23
2020-12-23
\n", + "
\n", + "
\n", + "
depth: 1
group_id: 40bd501aeec70fb9fd9f49c18103da
da
num_mutations: 13
num_nodes: 11
num_recurrent_mutations: 0
num_root_mutations: 9
\n", + "
\n", + " pango_lineages:\n", + "
\n", + " list\n", + " B.1.2
B.1.2
B.1.2
B.1.2
B.1.2
B.1.2
B.1.2
B.1.2
B.1.2
B.1.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " strains:\n", + "
\n", + " list\n", + " SRR18468649
SRR18468617
SRR18455063
SRR18455064
SRR18468572
SRR18284141
SRR18284142
SRR18284143
SRR18284145
SRR18284320
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
... and 37 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + " samples_strain:\n", + "
\n", + " list\n", + " Wuhan/Hu-1/2019
SRR11772659
SRR11397730
SRR11597198
SRR11597221
SRR11597143
SRR11597154
SRR11597205
SRR11597176
SRR11597187
SRR11597121
SRR11597220
SRR11597159
SRR11597160
SRR11597199
SRR11597200
SRR12162232
SRR12162233
SRR12162234
SRR12162235
SRR11597180
SRR11494548
SRR11597151
SRR11597140
SRR11597170
SRR11597189
SRR11597192
SRR11597203
SRR11597158
SRR11597171
... and 516413 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TableRowsSizeHas Metadata
Edges612,65518.7 MiB\n", + " \n", + "
Individuals024 Bytes\n", + " \n", + "
Migrations08 Bytes\n", + " \n", + "
Mutations988,303143.9 MiB\n", + " ✅\n", + "
Nodes612,209378.8 MiB\n", + " ✅\n", + "
Populations08 Bytes\n", + " \n", + "
Provenances1,0821.4 MiB\n", + " \n", + "
Sites29,9032.2 MiB\n", + " ✅\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Provenance TimestampSoftware NameVersionCommandFull record
03 December, 2024 at 08:39:37 PMtsdate0.1.dev998+ga142decvariational_gamma\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: tsdate
version: 0.1.dev998+ga142dec
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " mutation_rate: 1.6466513086793251e-06
recombination_rate: None
time_units: days
progress: True
population_size: None
eps: 1e-06
max_iterations: 10
max_shape: 1000
rescaling_intervals: 0
rescaling_iterations: 5
match_segregating_sites: False
regularise_roots: True
singletons_phased: True
command: variational_gamma
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Darwin
node: Yans-M2
release: 23.4.0
version: Darwin Kernel Version 23.4.0:
Fri Mar 15 00:19:22 PDT 2024;
root:xnu-
10063.101.17~1/RELEASE_ARM64_T
8...
machine: arm64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.10.14
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 03:39:16 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
31.ts
2023-01-01
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2023-01-
01.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 611.2654550075531
user_time: 18203.42
sys_time: 86.25999999999999
max_memory: 53750706176
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 03:29:01 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
30.ts
2022-12-31
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
31.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 887.5071303844452
user_time: 27757.920000000002
sys_time: 124.72
max_memory: 63542251520
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 03:14:11 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
29.ts
2022-12-30
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
30.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 1887.1828444004059
user_time: 66069.43000000001
sys_time: 267.71999999999997
max_memory: 56169525248
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 02:42:40 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
28.ts
2022-12-29
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
29.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 2398.4501554965973
user_time: 86017.21
sys_time: 342.45000000000005
max_memory: 61248253952
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 02:02:39 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
27.ts
2022-12-28
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
28.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 2477.8750681877136
user_time: 88849.24
sys_time: 349.44
max_memory: 60546060288
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 01:21:18 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
26.ts
2022-12-27
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
27.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 1397.1890790462494
user_time: 48200.36
sys_time: 202.61999999999998
max_memory: 61108924416
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 12:57:57 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
25.ts
2022-12-26
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
26.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 782.910284280777
user_time: 24836.27
sys_time: 114.72
max_memory: 69384589312
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 12:44:51 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
24.ts
2022-12-25
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
25.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 379.86762833595276
user_time: 8615.1
sys_time: 54.269999999999996
max_memory: 40564613120
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
28 November, 2024 at 12:38:28 PMsc2ts0.0.4.dev354+g2a83d80/scratch/jk/work/github/sc2ts/sc2ts/__main__.py\n", + "
\n", + " Details\n", + " \n", + "
\n", + " \n", + "
\n", + " dict\n", + " schema_version: 1.0.0
\n", + "
\n", + " software:\n", + "
\n", + " dict\n", + " name: sc2ts
version: 0.0.4.dev354+g2a83d80
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " parameters:\n", + "
\n", + " dict\n", + " command: /scratch/jk/work/github/sc2ts/
sc2ts/__main__.py
\n", + "
\n", + " args:\n", + "
\n", + " list\n", + " extend
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
23.ts
2022-12-24
data/Viridian_tree_cons_seqs_i
mported/aln.db
data/Viridian_tree_cons_seqs_i
mported/metadata_deduplicated.
db
/scratch/jk/tmp/matches-
find_problematic_v2.db
results/find_problematic_v2/fi
nd_problematic_v2-2022-12-
24.ts
--num-threads
40
-vv
-l
logs/find_problematic_v2.log
--include-samples
voc-seeds.txt
--hmm-cost-threshold
7
--min-group-size
10
--min-root-mutations
2
--max-mutations-per-sample
5
--max-recurrent-mutations
2
--deletions-as-missing
--num-mismatches
4
--retrospective-window
7
--max-daily-samples
... and 3 more\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " environment:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " os:\n", + "
\n", + " dict\n", + " system: Linux
node: holly
release: 5.10.0-24-amd64
version: #1 SMP Debian 5.10.179-5
(2023-08-08)
machine: x86_64
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " python:\n", + "
\n", + " dict\n", + " implementation: CPython
version: 3.9.2
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " libraries:\n", + "
\n", + " dict\n", + " \n", + "
\n", + " tsinfer:\n", + "
\n", + " dict\n", + " version: 0.1.dev1455+g7522bcc
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " tskit:\n", + "
\n", + " dict\n", + " version: 0.6.0
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " resources:\n", + "
\n", + " dict\n", + " elapsed_time: 575.978342294693
user_time: 15840.23
sys_time: 78.9
max_memory: 57233268736
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
... 1072 more
\n", + "
\n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "\n", + "import numpy as np\n", + "import tskit\n", + "import tszip\n", + "import sc2ts\n", + "\n", + "ts_dir = \"../data\"\n", + "ts_file = \"find_problematic_v2-2023-01-01.ts.il.dated.tsz\"\n", + "ds_file = \"viridian_2024-04-29.alpha1.zarr.zip\"\n", + "ds = sc2ts.Dataset(os.path.join(ts_dir, ds_file))\n", + "print(\"Loaded\", ds)\n", + "ts = tszip.decompress(os.path.join(ts_dir, ts_file))\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "27808869-1d27-4ef8-86d3-faacbc4e1a1a", + "metadata": {}, + "outputs": [], + "source": [ + "BA_1 = \"ERR7602255\"\n", + "BA_2 = \"SRR17712694\"\n", + "\n", + "Alpha_node = 78090\n", + "BA_1_node = 316856\n", + "BA_2_node = 346163 # origin is recombination node 346185\n", + "assert np.sum(ts.mutations_node == Alpha_node) == 17\n", + "assert np.sum(ts.mutations_node == BA_1_node) >= 30\n", + "assert ts.node(BA_1_node).metadata[\"strain\"] == BA_1\n", + "assert ts.node(BA_2_node).metadata[\"strain\"] == BA_2\n", + "assert np.sum(ts.mutations_node == 346185) >= 30" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c6bdbf15-5ad6-485c-b9db-f2b78ad9e730", + "metadata": {}, + "outputs": [], + "source": [ + "ts_pre_BA_1 = tszip.decompress(os.path.join(ts_dir, \"find_problematic_v2-2021-11-25.ts.tsz\"))\n", + "ts_pre_BA_2 = tszip.decompress(os.path.join(ts_dir, \"find_problematic_v2-2021-12-21.ts.tsz\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "43ee71ac-d207-4768-a1de-1623114938c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BA.1: ERR7602255\n", + "Was TA---------GTTTTAAGC\n", + "Now TAGTTTT---------AAGC\n", + "Missing TAGTTTT---......AAGC\n", + "\n", + "BA.2: SRR17712694\n", + "Was TAGTTTG---------AAGC\n", + "Now TAGTTTG---------AAGC\n", + "Missing TAGTTTG---......AAGC\n" + ] + } + ], + "source": [ + "nmut = 3 # how many SNP-like mutations do we place to represent a deletion\n", + "BA_1_haplotype = ds.haplotypes[BA_1]\n", + "BA_2_haplotype = ds.haplotypes[BA_2] # Just pick an early one at random\n", + "\n", + "print(f\"BA.1: {BA_1}\")\n", + "# Move the deletion in BA.1 from position 11283 to 11288, and mark the rest as missing\n", + "print(\n", + " \"Was \", \n", + " \"\".join(ds[\"variant_allele\"][:][np.arange(len(BA_1_haplotype)), BA_1_haplotype][11280: 11300])\n", + ")\n", + "# Check all alleles are the same across all sites\n", + "assert np.all(ds[\"variant_allele\"][:] == ds[\"variant_allele\"][0, :])\n", + "\n", + "# Shift the first 5 letters\n", + "BA_1_haplotype[11282: 11287] = BA_1_haplotype[11291: 11296]\n", + "BA_1_haplotype[11291: 11296] = BA_1_haplotype[11287]\n", + "print(\n", + " \"Now \",\n", + " \"\".join(ds[\"variant_allele\"][:][np.arange(len(BA_1_haplotype)), BA_1_haplotype][11280: 11300])\n", + ")\n", + "\n", + "# Set all deletions as missing, apart from the first at this site\n", + "deletion_idx = np.where(ds[\"variant_allele\"][0, :] == \"-\")[0][0]\n", + "BA_1_haplotype[BA_1_haplotype == deletion_idx] = tskit.MISSING_DATA\n", + "BA_1_haplotype[11287:(11287+nmut)] = deletion_idx\n", + "print(\n", + " \"Missing \",\n", + " \"\".join(ds[\"variant_allele\"][:][np.arange(len(BA_1_haplotype)), BA_1_haplotype][11280: 11300])\n", + ")\n", + "\n", + "\n", + "print(f\"\\nBA.2: {BA_2}\")\n", + "# Keep the deletion in BA.2 in the Alpha position\n", + "print(\n", + " \"Was \", \n", + " \"\".join(ds[\"variant_allele\"][:][np.arange(len(BA_2_haplotype)), BA_2_haplotype][11280: 11300])\n", + ")\n", + "print(\n", + " \"Now \",\n", + " \"\".join(ds[\"variant_allele\"][:][np.arange(len(BA_2_haplotype)), BA_2_haplotype][11280: 11300])\n", + ")\n", + "\n", + "# Set all deletions as missing, apart from the first at this site\n", + "deletion_idx = np.where(ds[\"variant_allele\"][0, :] == \"-\")[0][0]\n", + "BA_2_haplotype[BA_2_haplotype == deletion_idx] = tskit.MISSING_DATA\n", + "BA_2_haplotype[11287:(11287+nmut)] = deletion_idx\n", + "print(\n", + " \"Missing \",\n", + " \"\".join(ds[\"variant_allele\"][:][np.arange(len(BA_2_haplotype)), BA_2_haplotype][11280: 11300])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f80ac9f3-1547-4663-bd02-3681ff212b19", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded Dataset at synthetic_align.zarr with 1 samples and 1 metadata fields\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "path = \"synthetic_align.zarr\"\n", + "sc2ts.Dataset.new(path)\n", + "strain = ts.node(BA_2_node).metadata[\"strain\"]\n", + "sc2ts.Dataset.append_alignments(path, {strain: BA_2_haplotype})\n", + "sc2ts.Dataset.add_metadata(path, pd.DataFrame({'date': \"2022-01-01\"}, index=[strain]))\n", + "synth_ds = sc2ts.Dataset(path)\n", + "print(\"Loaded\", synth_ds)" + ] + }, + { + "cell_type": "markdown", + "id": "090b19ea-b668-455d-9ce9-f91588fbbea7", + "metadata": {}, + "source": [ + "Add the deletion into the tree sequence at the Alpha position 11288" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7f244369-fce0-4bab-8296-2b0c40c3d878", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Node(id=78090, flags=4194304, time=815.169405810066, population=-1, individual=-1, metadata={'Imputed_Viridian_pangolin': 'B.1.1.7', 'mn': 744.8553533217025, 'sc2ts': {'date_added': '2020-12-29', 'mutations': ['C3267T']}, 'vr': 554.8094973719983})\n", + "Node(id=78090, flags=4194304, time=427.5390669921875, population=-1, individual=-1, metadata={'sc2ts': {'date_added': '2020-12-29', 'mutations': ['C3267T']}})\n", + "\n", + "Check that there is a GTTTG before the newly created deletion\n", + "CTAGTTTGTCTGGTTTTAAG\n", + "CTAGTTTG---GGTTTTAAG\n" + ] + } + ], + "source": [ + "# Check the nodes look the same in the pre_BA_1 ts\n", + "print(ts.node(Alpha_node))\n", + "print(ts_pre_BA_1.node(Alpha_node))\n", + "print()\n", + "assert ts.node(Alpha_node).metadata[\"sc2ts\"] == ts_pre_BA_1.node(Alpha_node).metadata[\"sc2ts\"]\n", + "tables = ts_pre_BA_1.dump_tables()\n", + "for pos in range(11288, 11288+nmut):\n", + " tables.mutations.add_row(node=Alpha_node, time=ts_pre_BA_1.node(Alpha_node).time, site=ts.site(position=pos).id, derived_state=\"-\")\n", + "tables.sort()\n", + "ts_pre_BA_1_del = tables.tree_sequence()\n", + "\n", + "print(\"Check that there is a GTTTG before the newly created deletion\")\n", + "for align in ts_pre_BA_1_del.simplify([1, Alpha_node]).alignments(left=11280, right=11300):\n", + " print(align)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c6b7fd9c-d2fb-4496-9aad-e39b33fe0ad8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Node(id=78090, flags=4194304, time=815.169405810066, population=-1, individual=-1, metadata={'Imputed_Viridian_pangolin': 'B.1.1.7', 'mn': 744.8553533217025, 'sc2ts': {'date_added': '2020-12-29', 'mutations': ['C3267T']}, 'vr': 554.8094973719983})\n", + "Node(id=78090, flags=4194304, time=453.5390669921875, population=-1, individual=-1, metadata={'sc2ts': {'date_added': '2020-12-29', 'mutations': ['C3267T']}})\n", + "\n", + "Check that there is a GTTTG before the newly created deletion\n", + "CTAGTTTGTCTGGTTTTAAG\n", + "CTAGTTTG---GGTTTTAAG\n" + ] + } + ], + "source": [ + "# Check the nodes look the same in the pre_BA_1 ts\n", + "print(ts.node(Alpha_node))\n", + "print(ts_pre_BA_2.node(Alpha_node))\n", + "print()\n", + "assert ts.node(BA_1_node).metadata[\"sc2ts\"] == ts_pre_BA_2.node(BA_1_node).metadata[\"sc2ts\"]\n", + "tables = ts_pre_BA_2.dump_tables()\n", + "for pos in range(11288, 11288+nmut):\n", + " tables.mutations.add_row(node=BA_1_node, time=ts_pre_BA_2.node(BA_1_node).time, site=ts.site(position=pos).id, derived_state=\"-\")\n", + "tables.sort()\n", + "ts_pre_BA_2_del = tables.tree_sequence()\n", + "\n", + "print(\"Check that there is a GTTTG before the newly created deletion\")\n", + "for align in ts_pre_BA_2_del.simplify([1, BA_1_node]).alignments(left=11280, right=11300):\n", + " print(align)" + ] + }, + { + "cell_type": "markdown", + "id": "4e2e2197-6225-4715-a04a-c9c3b18a3d10", + "metadata": {}, + "source": [ + "# Run one round of sc2ts\n", + "\n", + "Match the synth sample (with the deletion) into the pre_BA.1 ARG\n", + "\n", + "Code is taken from https://github.com/jeromekelleher/sc2ts/blob/89421fb4cc6bd8d9cf89a6cc8bdf70ea8cc95c1a/sc2ts/cli.py#L835" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ce418dd7-d40b-4082-b58b-a325f4edac71", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matched against [PathSegment(left=0, right=22674, parent=67513), PathSegment(left=22674, right=29904, parent=338914)] (likelihood=6.502475810729579e-73, cost=38)\n", + "Matched against [PathSegment(left=0, right=9345, parent=67513), PathSegment(left=9345, right=29904, parent=338914)] (likelihood=6.502475810729579e-73, cost=38)\n" + ] + } + ], + "source": [ + "import json\n", + "import dataclasses\n", + "\n", + "args = json.loads(ts_pre_BA_2_del.provenance(-1).record)['parameters']['args']\n", + "mismatch_threshold= 100 # Not sure what this should be\n", + "\n", + "@dataclasses.dataclass(frozen=True)\n", + "class HmmRun:\n", + " strain: str\n", + " num_mismatches: int\n", + " direction: str\n", + " match: sc2ts.HmmMatch\n", + "\n", + " def asdict(self):\n", + " d = dataclasses.asdict(self)\n", + " d[\"match\"] = dataclasses.asdict(self.match)\n", + " d[\"match\"][\"cost\"] = d[\"match\"][\"cost\"].item()\n", + " d[\"match\"][\"likelihood\"] = d[\"match\"][\"likelihood\"].item()\n", + " return d\n", + "\n", + " def asjson(self):\n", + " return json.dumps(self.asdict())\n", + "\n", + "samples = sc2ts.preprocess(\n", + " [strain],\n", + " dataset=synth_ds,\n", + " keep_sites=ts_pre_BA_2_del.sites_position.astype(int),\n", + ")\n", + "for sample in samples:\n", + " if sample.haplotype is None:\n", + " raise ValueError(f\"No alignment stored for {sample.strain}\")\n", + "\n", + "run = {}\n", + "for direction in (\"forward\", \"reverse\"):\n", + " sc2ts.match_tsinfer(\n", + " samples=samples,\n", + " ts=ts_pre_BA_2_del,\n", + " num_mismatches=int(args[args.index(\"--num-mismatches\") + 1]),\n", + " deletions_as_missing=False,\n", + " mismatch_threshold=mismatch_threshold,\n", + " mirror_coordinates=direction == \"reverse\",\n", + " )\n", + " for sample in samples:\n", + " run[direction] = HmmRun(\n", + " strain=sample.strain,\n", + " num_mismatches=int(args[args.index(\"--num-mismatches\") + 1]),\n", + " direction=direction,\n", + " match=sample.hmm_match,\n", + " )\n", + " print(\n", + " f\"Matched against {run[direction].match.path}\",\n", + " f\"(likelihood={run[direction].match.likelihood}, cost={run[direction].match.cost})\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "05ab4bcb-e88d-45dd-afe2-30e53d4b031c", + "metadata": {}, + "source": [ + "Look specifically at deletions" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "122fa1a0-ba0f-4bd1-b29e-83fea2cba552", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HMM direction: forward\n", + " Mutations to a deletion: T11288-, C11289-, T11290-\n", + "HMM direction: reverse\n", + " Mutations to a deletion: \n" + ] + } + ], + "source": [ + "for direction in (\"forward\", \"reverse\"):\n", + " print(\"HMM direction:\", direction)\n", + " print(\n", + " \" Mutations to a deletion:\", \n", + " \", \".join([str(m) for m in run[direction].match.mutations if m.derived_state==\"-\"])\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8ed07d89-9836-4a44-bd66-2aebcd1ee1dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Node(id=338914, flags=67108864, time=394.64384060506484, population=-1, individual=-1, metadata={'Imputed_Viridian_pangolin': 'BA.1.1', 'mn': 400.3543071136764, 'sc2ts': {'date_added': '2021-12-21', 'group_id': '1249faedcdfb43b7c4f37e999399d238'}, 'vr': 160.28357122447193})" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts.node(338914)" + ] + }, + { + "cell_type": "markdown", + "id": "e28144dd-8dc3-4df0-a402-bdce81cc3563", + "metadata": {}, + "source": [ + "It appears as if the forward run picks the deletion region from the LH parent (bot BA.1.1), and hence requires 3 deletions, whereas the reverse run takes this region from the RH parent, which is BA.1.1 and hence already has the deletion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e65fdde-782d-443e-9c21-9d1a8fd37906", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}