-
Notifications
You must be signed in to change notification settings - Fork 1
/
tssplit.pl
94 lines (81 loc) · 2.94 KB
/
tssplit.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/perl
# this utility ignores chrM and other incomplete chromosomes with "-" or "_" in name
use strict;
use Fcntl qw(SEEK_SET SEEK_CUR SEEK_END);
use File::Copy;
my $ARGC=scalar @ARGV;
if ($ARGC!=1) { die "Usage: tssplit tssfile.sgr\n"; }
my $infile=$ARGV[0];
open (INP,"$infile") || die "Can't read \"$infile\": $!";
my $basename=substr($infile,0,rindex($infile,"."));
my $outbidi=$basename.".bidi.sgr";
my $outuni=$basename.".uni.sgr";
my $outpairs=$basename.".pairs.bed";
open (OUTB,">$outbidi") || die "Can't create \"$outbidi\": $!";
open (OUTU,">$outuni") || die "Can't create \"$outuni\": $!";
open (OUTP,">$outpairs") || die "Can't create \"$outpairs\": $!";
my $i=0;
my (@chr, @coord, @chain, @name, @uni);
while (<INP>) {
chomp;
chomp;
if (length($_) < 5) { next; }
my @arr=split('\s+');
my $currchr=$arr[0];
my $currcoord=$arr[1];
my $currchain=$arr[2];
my $currname=$arr[3];
if ($currchr =~ m /_|-/) { print "chr $currchr skipped\n"; next; }
if (uc($currchr) =~ m /CHRM/) { print "chr $currchr skipped\n"; next; }
my $sname = $currname =~ s/_\d+//r;
$chr[$i]=$currchr;
$coord[$i]=$currcoord;
$chain[$i]=$currchain;
$name[$i]=$sname;
$uni[$i]=1;
$i++;
}
close (INP);
my $totaltss=$i;
print ("$totaltss TSS read\n");
undef $i;
my $prevperc=0;
for (my $i=1;$i<$totaltss;$i++) {
if ($chr[$i] ne $chr[$i-1]) { next; }
if ($chain[$i] eq $chain[$i-1]) { next; }
if ($chain[$i] ne "+") { next; } # Divergent/Convergent BIDI switch
if ($coord[$i]-$coord[$i-1]>1000) { next; } # Distance between TSS to be bidirectional
print OUTP "$chr[$i]\t$coord[$i-1]\t$coord[$i]\n"; # \t$chain[$i-1]\t$chain[$i]\n";
$uni[$i]=0;
$uni[$i-1]=0; }
for (my $i=0;$i<$totaltss;$i++) {
if ($uni[$i]==1) { print OUTU "$chr[$i]\t$coord[$i]\t$chain[$i]\t$name[$i]\n"; }
if ($uni[$i]==0) { print OUTB "$chr[$i]\t$coord[$i]\t$chain[$i]\t$name[$i]\n"; }
}
close (OUTU);
close (OUTB);
close (OUTP);
my $cmd="sort -k1,1 -k2,2n ". $outuni . " -o ". $outuni;
`$cmd`;
$cmd="uniq $outbidi > uni.tmp";
`$cmd`;
move("uni.tmp",$outbidi);
$cmd="uniq $outuni > uni.tmp";
`$cmd`;
move("uni.tmp",$outuni);
$cmd="wc -l $outuni";
my @arr1=`$cmd`;
$_=$arr1[0];
@arr1=split(/\s+/,$_);
my $uni_strings=$arr1[0];
undef @arr1;
$cmd="wc -l $outbidi";
@arr1=`$cmd`;
$_=$arr1[0];
@arr1=split(/\s+/,$_);
my $bidi_strings=$arr1[0];
undef @arr1;
my $cmd="sort -k1,1 -k2,2n ". $outpairs . " -o ". $outpairs;
`$cmd`;
my $sumstr=$bidi_strings+$uni_strings;
print "Bidirectional TSS: $bidi_strings, unidirectional TSS: $uni_strings, TSS sum: $sumstr, totalTSS: $totaltss\n";