-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_noninteger_CN_from_ASCAT.py
74 lines (59 loc) · 2.14 KB
/
get_noninteger_CN_from_ASCAT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 12 10:50:09 2017
@author: lpsmith
"""
from __future__ import division
from os import walk
from os import path
from os import mkdir
from os.path import isfile
import lucianSNPLibrary as lsl
pASCAT_root = "gamma_test_output/pASCAT_input_g"
patient_dir = pASCAT_root + "500/"
outdir = "nonintegerCNs/"
use500 = True
onlysomepatients = False
somepatients = ["772"]
firstpatients = ["17", "42", "55", "59", "74", "43", "184", "163", "396", "1047"]
if not path.isdir(outdir):
mkdir(outdir)
missing = open(outdir + "missing_samples.tsv", "w")
(labels, rev_labels) = lsl.getSNPLabelsAll(False)
files = []
for (__, __, f) in walk(patient_dir):
files += f
for f in files:
if "_copynumber_segments" not in f:
continue
patient = f.split("_")[0]
if onlysomepatients and patient not in somepatients:
continue
# if patient in firstpatients:
# #These need to be re-run, since the SNPs were off.
# continue
if use500:
canonical_ascats = lsl.getSingleGammaCallsFor(patient, "500")
else:
canonical_ascats = lsl.getCanonicalAscatCallsFor(patient)
for canon in canonical_ascats:
(sample, gamma, constraint) = canon
if gamma=="None":
continue
if constraint=="eight":
print("Found different 'eight' solution for", patient, sample, gamma)
rawsegs_dir = pASCAT_root + gamma + "/" + constraint + "/"
rawsegs_file = patient + "_" + sample + "_raw_segments.txt"
outname = outdir + patient + "_" + sample + "_g" + gamma + "_" + constraint + "_nonint_CNs.txt"
outfile = open(outname,"w")
outfile.write("patient\tbiopsy\tchrom\tsegstart\tsegend\trawA\trawB\tintA\tintB\n")
#print("Analyzing", patient, sample, constraint)
if not (lsl.collatepASCATOutput(rawsegs_dir, rawsegs_file, outfile, labels)):
missing.write(patient)
missing.write("\t" + sample)
missing.write("\t" + gamma)
missing.write("\t" + constraint)
missing.write("\n")
outfile.close()
missing.close()