-
Notifications
You must be signed in to change notification settings - Fork 9
/
non_ref_contigs.sh
executable file
·80 lines (66 loc) · 2.85 KB
/
non_ref_contigs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/sh
ratio=0.5
out_dir=$1
seq_name=$2
SCRIPTS=$3
. $SCRIPTS/configs.cf
fasta=$4 # contigs of unmapped reads
assem_fasta=$5 # whole genome sequence assembly
rm -rf $out_dir/$seq_name.inserted.assembly.intervals
for scf_name in `less "$assem_fasta" | grep ">" | awk '{print $1}' | cut -d '>' -f2`
do
$BIN/pull_fasta_scaf $assem_fasta $scf_name > $out_dir/temp.fasta
len=`$BIN/seq_len $out_dir/temp.fasta`
lastz T=2 Y=3400 $out_dir/temp.fasta $REF_FASTA --ambiguous=iupac --format=maf > $out_dir/temp.maf
$BIN/find_inserted_intervals $out_dir/temp.maf $scf_name $len >> $out_dir/$seq_name.inserted.assembly.intervals
rm -rf $out_dir/temp.fasta
rm -rf $out_dir/temp.maf
done
rm -rf $out_dir/$seq_name.inserted.intervals
for scf_name in `less "$fasta" | grep ">" | awk '{print $1}' | cut -d '>' -f2`
do
$BIN/pull_fasta_scaf $fasta $scf_name > $out_dir/temp.fasta
len=`$BIN/seq_len $out_dir/temp.fasta`
$BIN/lastz T=2 Y=3400 $out_dir/temp.fasta $REF_FASTA --ambiguous=iupac --format=maf > $out_dir/temp.maf
$BIN/find_inserted_intervals $out_dir/temp.maf $scf_name $len >> $out_dir/$seq_name.inserted.intervals
rm -rf $out_dir/temp.maf
done
rm -rf $out_dir/temp.*
rm -rf $out_dir/$seq_name.inserted.original.intervals
rm -rf $out_dir/$seq_name.inserted.original.temp.intervals
while read line
do
scf_name=`echo $line | awk '{print $1}'`
b=`echo $line | awk '{print $2}'`
e=`echo $line | awk '{print $3}'`
$BIN/pull_fasta_scaf $fasta $scf_name > $out_dir/temp.seq.fasta
$BIN/dna $b,$e $out_dir/temp.seq.fasta > $out_dir/temp.cur.seq.fasta
$BIN/lastz T=2 Y=3400 $assem_fasta[multi] $out_dir/temp.cur.seq.fasta --ambiguous=iupac --format=maf > $out_dir/temp.seq.maf
$BIN/scf_lift_over $out_dir/temp.seq.maf >> $out_dir/$seq_name.inserted.original.intervals
rm -rf $out_dir/temp.seq.maf
rm -rf $out_dir/temp.seq.fasta
rm -rf $out_dir/temp.cur.seq.fasta
done < $out_dir/$seq_name.inserted.intervals
if [ -f $out_dir/$seq_name.inserted.original.intervals ]
then
sort $out_dir/$seq_name.inserted.original.intervals > $out_dir/$seq_name.inserted.original.temp.intervals
$BIN/merge_scf_intervals $out_dir/$seq_name.inserted.original.temp.intervals > $out_dir/$seq_name.inserted.original.intervals
else
echo -n "" > $out_dir/$seq_name.inserted.original.intervals
fi
rm -rf $out_dir/$seq_name.final.inserted.original.intervals
if [ -f $out_dir/$seq_name.inserted.original.intervals ]
then
num_len=`less $out_dir/$seq_name.inserted.original.intervals | wc -l`
if [ $num_len -gt 0 ]
then
if [ -f $out_dir/$seq_name.inserted.assembly.intervals ]
then
num_len=`less $out_dir/$seq_name.inserted.assembly.intervals | wc -l`
if [ $num_len -gt 0 ]
then
$BIN/common_scf_intervals $out_dir/$seq_name.inserted.original.intervals $out_dir/$seq_name.inserted.assembly.intervals > $out_dir/$seq_name.final.inserted.original.intervals
fi
fi
fi
fi