Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ISUgenomics/common_analyses
Browse files Browse the repository at this point in the history
  • Loading branch information
aseetharam committed Feb 2, 2017
2 parents 6096678 + c05aea2 commit f6f8fea
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 9 deletions.
8 changes: 4 additions & 4 deletions GATK_00_PrepareRef.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Prepares the Reference Genome for mapping as well as for using it with GATK pipeline
# You need to supply the referece genome as REF below or as:
# ./GATK_00_PrepareRef.sh your_genome.fasta
module load picard_tools
module load picard
module load samtools
module load bwa
module load bedtools
Expand All @@ -11,16 +11,16 @@ module load python
REF="$1"
#index genome for (a) picard, (b) samtools and (c) bwa
parallel <<FIL
java -Xmx100G -jar $PICARD/picard.jar CreateSequenceDictionary \
java -Xmx100G -jar $PICARD_HOME/picard.jar CreateSequenceDictionary \
REFERENCE=${REF} \
OUTPUT=${REF%.*}.dict
samtools faidx ${REF}
bwa index -a bwtsw ${REF}
FIL
# Create interval list (here 100 kb intervals)
python fasta_length.py ${REF} > ${REF%.*}_length.txt
fasta_length.py ${REF} > ${REF%.*}_length.txt
bedtools makewindows -w 100000 -g ${REF%.*}_length.txt > ${REF%.*}_100kb_coords.bed
java -Xmx100G -jar $PICARD/picard.jar BedToIntervalList \
java -Xmx100G -jar $PICARD_HOME/picard.jar BedToIntervalList \
INPUT=${REF%.*}_100kb_coords.bed \
SEQUENCE_DICTIONARY=${REF%.*}.dict \
OUTPUT=${REF%.*}_100kb_gatk_intervals.list
10 changes: 5 additions & 5 deletions GATK_01_RunBWA.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@

#command genomeModule READ1 READ2

module load picard
module load picard/2.4.1
module load java
module load bwa/0.7.13
module load bwa/0.7.12
module load samtools
module load $1

REF="$GENOMEDIR/$GNAME"
# this option might be the frequetly changed, hence not it's a variable
THREADS="8"
THREADS="16"
# if the reads are paired then use -p option
if [ "$#" -eq 3 ]; then
READ1="$2"
READ2="$3"
OUTNAME=$(basename ${READ1%.*} | cut -f 1-2 -d "_")
bwa mem -M -t ${THREADS} ${REF} ${READ1} ${READ2} | samtools view -buS - > ${OUTNAME}.bam
bwa mem -M -x ont2d -t ${THREADS} ${REF} ${READ1} ${READ2} | samtools view -buS - > ${OUTNAME}.bam
# if not just use the reads as single reads
elif [ "$#" -eq 1 ]; then
READ1="$2"
OUTNAME=$(basename ${READ1%.*} | cut -f 1-2 -d "_")
bwa mem -M -t ${THREADS} ${REF} ${READ1} | samtools view -buS - > ${OUTNAME}.bam
bwa mem -M -x ont2d -t ${THREADS} ${REF} ${READ1} | samtools view -buS - > ${OUTNAME}.bam
# if number of arguments do not match, raise error
else
echo "ERROR: INVALID NUMBER OF ARGUMENTS"
Expand Down
49 changes: 49 additions & 0 deletions fastaSortByName.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/perl -w

my $usage="\nUsage: $0 [-hrg] [fastaFileName1 ...]\n".
" -h: help\n".
" -r: reverse\n" .
" -g: remove gaps '-' from the sequence\n".
"Sort FASTA sequences alphabetically by names. If multiple files are \n".
"given, sequences in all files are marged before sorting. If no \n".
"argument is given, it will take STDIN as the input\n";

our($opt_h, $opt_g, $opt_r);

use Bio::SeqIO;

use Getopt::Std;
getopts('hgr') || die "$usage\n";
die "$usage\n" if (defined($opt_h));

my $format = "fasta";
my @seqArr = ();

@ARGV = ('-') unless @ARGV;
while (my $file = shift) {
my $seqio_obj = Bio::SeqIO->new(-file => $file, -format => $format);
while (my $seq = $seqio_obj->next_seq()) {
push(@seqArr, $seq);
}
}

if (defined($opt_r)) {
@seqArr = sort { - ($a->id() cmp $b->id()) } @seqArr;
} else {
@seqArr = sort { $a->id() cmp $b->id() } @seqArr;
}


my $seqOut = Bio::SeqIO->new(-fs => \*STDOUT, -format => $format);
foreach my $s (@seqArr) {
if(defined($opt_g)) {
my $tmp = $s->seq();
$tmp =~ s/-//g;
$s->seq($tmp);
}
$seqOut->write_seq($s);
}

exit;


0 comments on commit f6f8fea

Please sign in to comment.