-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcheck_unequal_cns.py
77 lines (66 loc) · 2.37 KB
/
check_unequal_cns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 12 13:43:10 2018
@author: lpsmith
"""
from __future__ import division
from os import walk
from os import path
from os import mkdir
import lucianSNPLibrary as lsl
CNdir = "nonintegerCNs/"
balanced_dir = "balanced_calls/"
onlysomepatients = False
somepatients = ["997"]
firstpatients = ["17", "42", "55", "59", "74", "43", "184", "163", "396", "1047"]
def readAmbiguousCallsFromASCAT(f):
cnfile = open(CNdir + f, "r")
unbal_calls = {}
for line in cnfile:
if "patient" in line:
continue
(patient, sample, chr, start, end, rawA, rawB, intA, intB) = line.split()
if intA == "NA" or intB == "NA":
continue
rawA = float(rawA)
rawB = float(rawB)
if abs(rawA-rawB) < 0.1 and intA != intB:
if chr not in unbal_calls:
unbal_calls[chr] = []
unbal_calls[chr].append((int(start), int(end), rawA))
return unbal_calls
def compareAndReport(unbal_calls, bal_calls, patient, sample, ploidy, allbal, allunbal):
for chr in unbal_calls:
for ub_seg in unbal_calls[chr]:
for b_seg in bal_calls[chr]:
if bal_calls[chr][b_seg] == "Unknown":
continue
(bstart, bend) = b_seg
(ubstart, ubend, rawA) = ub_seg
if bstart >= ubstart and bend <= ubend:
if bal_calls[chr][b_seg] == "Balanced":
allbal.append(rawA)
# print("Balanced:", chr, ub_seg)
else:
allunbal.append(rawA)
# print("Unbalanced:", chr, ub_seg)
allbal = []
allunbal = []
files = []
for (__, __, f) in walk(CNdir):
files += f
for f in files:
if "nonint" not in f:
continue
(patient, sample, ploidy) = f.split("_")[0:3]
if onlysomepatients and patient not in somepatients:
continue
unbal_calls = readAmbiguousCallsFromASCAT(f)
bal_calls = lsl.readBalancedCalls(patient, sample)
print ("Comparing", patient, sample, ploidy)
compareAndReport(unbal_calls, bal_calls, patient, sample, ploidy, allbal, allunbal)
print("All balanced values:")
lsl.createPrintAndSaveHistogram(allbal, "", 0.001)
print("All unbalanced values:")
x = lsl.createPrintAndSaveHistogram(allunbal, "", 0.001, axis=[0, 5, 0])