forked from Kevinlega/DBGDE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSecond_program.py
120 lines (94 loc) · 3.81 KB
/
Second_program.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
##########################################################################################
# #
# Copyright 2018 Megaprobe-Lab #
# #
# This is software created by the megaprobe lab under the GPL3 license. #
# #
# This program parses any ole GFA file you may have with read information for two #
# organisms. To run the program, utilize the command: "Python2.7 Second_Program.py GFA". #
# The GFA file utilized must be located on the same directory as the parser or full path #
# must be specified. #
# #
##########################################################################################
# Load Modules
import os
import sys
import fileinput
import argparse
import gfapy
# Make output directory
if not os.path.exists("Parser_Output"):
os.mkdir("Parser_Output")
############################################
# #
# Readin GFA format input #
# #
############################################
# def coverageSegmentF(C,kmerA,kmerB,Db,k):
# global g
# kA,kB = kmerA,kmerB
# kA = kA.split("A:")
# kA = kA[1]
# kA = kA.split(',B:')
# kA[1] = kA[1].split(")")[0]
# kB = kB.split("A:")
# kB = kB[1]
# kB = kB.split(',B:')
# kB[1] = kB[1].split(")")[0]
# coverageA = abs(int(kA[0]) - int(kA[1]))
# coverageB = abs(int(kB[0]) - int(kB[1]))
# DifEx = (coverageA + coverageB)/2
# if DifEx < C:
# return False
# else:
# a = ("L\t%s\t+\t%s\t%s\t%dM\tKC:i:%d"%(kmerA,kmerB,dB,k-1,int(DifEx)))
# return a
def positive(C):
C = int(C)
if C <= 0:
sys.exit("Coverage must be positive integer")
return C
def main():
global C,g,args
#maybe clean all the unnatached segments
output = gfapy.Gfa()
output.add_line(g.header)
all_segments = [output.add_line(str(line)) for line in g.segments]
links = [str(line) for line in g.edges]
for x in range(len(links)-1):
link = links[x]
kmerA = link.split("\t")
k = kmerA[5]
dB = kmerA[4]
kmerB = kmerA[3]
kmerA = kmerA[1]
kA,kB = kmerA,kmerB
kA = kA.split("A:")
kA = kA[1]
kA = kA.split(',B:')
kA[1] = kA[1].split(")")[0]
kB = kB.split("A:")
kB = kB[1]
kB = kB.split(',B:')
kB[1] = kB[1].split(")")[0]
coverageA = abs(int(kA[0]) - int(kA[1]))
coverageB = abs(int(kB[0]) - int(kB[1]))
DifEx = (coverageA + coverageB)/2
if DifEx < C:
pass
else:
output.add_line("L\t%s\t+\t%s\t%s\t%s\tKC:i:%d"%(kmerA,kmerB,dB,k,int(DifEx)))
filename = os.path.join("Parser_Output",args.output)
output.to_file(filename)
parser = argparse.ArgumentParser(description="Creates a GFA file with one or two organisms given a kmer size. \
If coverage is give eliminates below that coverage ")
parser.add_argument("-f", required=True, help="the output file of dbg.py")
parser.add_argument("-output",required=False,default='output.gfa',help="Output GFA file name")
parser.add_argument("-C", required=True,type=positive,help="Eliminates links with Diferential Expresion below coverage")
args = parser.parse_args()
g = gfapy.Gfa.from_file(args.f)
C = args.C
# To add more organisms add this parser.add_argument("-B", nargs='+', required=True, help="Organism_B_files")
# change the name and do another call to build and do multiple merge_dicts calls
if __name__ == "__main__":
main()