Skip to content

Commit

Permalink
Merge branch 'release/v3.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Feb 8, 2019
2 parents 5bb5ef4 + a601732 commit 9455fb8
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 52 deletions.
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ env:

addons:
apt:
update: true
packages:
- build-essential
- autoconf
Expand All @@ -25,6 +26,13 @@ addons:
- lsof
- libbz2-dev
- liblzma-dev
- libgnutls-dev
- libtasn1-6-dev
- p11-kit
- libxml2-dev
- libgd-dev
- psmisc
- libdb-dev

install: true

Expand Down
9 changes: 9 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
# CHANGES

## 3.1.0

* Incorporates updated pindel which improves sensitivity
* Internally interpret QCFAIL to determine if whole pair fails

## 3.0.6

* Fixed version tag

## 3.0.5

* Handles species names with spaces in it
* modified checks for species,assembly and checksum

## 3.0.4

* Output bug for pindel BAM/CRAM corrected. When more than 1 chr in output files had no reads.
Expand Down
69 changes: 33 additions & 36 deletions c++/pindel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ unsigned int BalanceCutoff = 50; //#
const bool Analyze_TD_INV_LI_Others = false; //#
const unsigned int NumRead2ReportCutOff = 3; //#
short MaxRangeIndex = 9;// 5 or 6 or 7 or maximum 8//#
const float MaximumAllowedMismatchRate = 0.1; //#
const float MaximumAllowedMismatchRate = 0.02; //# The value of the original is too big; revise in 2018.11.26 by Liang Hao
const short Min_Num_Matched_Bases = 30; //#
// #########################################################
//const float Double_Seq_Error_Rate_Per_Side = Seq_Error_Rate_Per_Side * 2;
Expand Down Expand Up @@ -820,7 +820,7 @@ int main (int argc, char *argv[]) {
}

cerr << "Searching breakpoints of deletion events" << endl;
for (short RangeIndex = 1; RangeIndex < MaxRangeIndex; RangeIndex++) {
for (short RangeIndex = 0; RangeIndex < MaxRangeIndex; RangeIndex++) { //RangeIndex begin from 0 revise in 2018.11.26 by Liang Hao

CountFarEnd = 0;
CountFarEndMinus = 0;
Expand Down Expand Up @@ -866,7 +866,7 @@ int main (int argc, char *argv[]) {
}

cerr << "Searching breakpoints of SI events" << endl;
for (short RangeIndex = 1; RangeIndex < 2; RangeIndex++) {
for (short RangeIndex = 0; RangeIndex < 2; RangeIndex++) { //RangeIndex begin from 0 revise in 2018.11.26 by Liang Hao
CountFarEnd = 0;
CountFarEndMinus = 0;
CountFarEndPlus = 0;
Expand Down Expand Up @@ -906,7 +906,7 @@ if (Analyze_TD_INV_LI_Others) {

cerr << "Searching breakpoints of tandem duplication events" << endl;
//int CountFarEnd, CountFarEndPlus, CountFarEndMinus;
for (short RangeIndex = 1; RangeIndex < MaxRangeIndex; RangeIndex++) {
for (short RangeIndex = 0; RangeIndex < MaxRangeIndex; RangeIndex++) { //RangeIndex begin from 0 revise in 2018.11.26 by Liang Hao

CountFarEnd = 0;
CountFarEndMinus = 0;
Expand Down Expand Up @@ -964,7 +964,7 @@ if (Analyze_TD_INV_LI_Others) {
cerr << "Searching breakpoints of inversions" << endl;
unsigned ReadsUsedForD = 0;
unsigned ReadsUsedForDI = 0;
for (short RangeIndex = 1; RangeIndex < MaxRangeIndex; RangeIndex++) {
for (short RangeIndex = 0; RangeIndex < MaxRangeIndex; RangeIndex++) { //RangeIndex begin from 0 revise in 2018.11.26 by Liang Hao

CountFarEnd = 0;
CountFarEndMinus = 0;
Expand Down Expand Up @@ -2266,14 +2266,14 @@ void CheckLeft_Close(const SPLIT_READ & OneRead,
for (short i = 0; i <= OneRead.MAX_SNP_ERROR; i++) {
if (Left_PD[i].size() == 1 && CurrentLength >= BP_Left_Start + i) {
Sum = 0;
if (ADDITIONAL_MISMATCH)
for (short j = 1; j <= ADDITIONAL_MISMATCH; j++)
Sum += Left_PD[i + j].size();

if (Sum == 0 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
if (ADDITIONAL_MISMATCH)
for (short j = 0; j <= i+ADDITIONAL_MISMATCH; j++)
Sum += Left_PD[j].size(); //Maybe the previous SNPS still exist; revise in 2018.11.26 by Liang Hao
if (Sum == 1 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
UniquePoint TempOne;
TempOne.LengthStr = CurrentLength;
TempOne.AbsLoc = Left_PD[i][0];

TempOne.Direction = FORWARD;
TempOne.Strand = ANTISENSE;
TempOne.Mismatches = i;
Expand Down Expand Up @@ -2356,11 +2356,10 @@ void CheckRight_Close(const SPLIT_READ & OneRead,
for (short i = 0; i <= OneRead.MAX_SNP_ERROR; i++) {
if (Right_PD[i].size() == 1 && CurrentLength >= BP_Right_Start + i) {
Sum = 0;
if (ADDITIONAL_MISMATCH)
for (short j = 1; j <= ADDITIONAL_MISMATCH; j++)
Sum += Right_PD[i + j].size();

if (Sum == 0 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
if (ADDITIONAL_MISMATCH)
for (short j = 0; j <= i+ADDITIONAL_MISMATCH; j++)
Sum += Right_PD[j].size(); //Maybe the previous SNPS still exist; revise in 2018.11.26 by Liang Hao
if (Sum == 1 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
UniquePoint TempOne;
TempOne.LengthStr = CurrentLength;
TempOne.AbsLoc = Right_PD[i][0];
Expand Down Expand Up @@ -2447,11 +2446,10 @@ void CheckLeft_Far(const SPLIT_READ & OneRead,
for (short i = 0; i <= OneRead.MAX_SNP_ERROR; i++) {
if (Left_PD[i].size() == 1 && CurrentLength >= BP_Left_Start + i) {
Sum = 0;
if (ADDITIONAL_MISMATCH)
for (short j = 1; j <= ADDITIONAL_MISMATCH; j++)
Sum += Left_PD[i + j].size();

if (Sum == 0 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
if (ADDITIONAL_MISMATCH)
for (short j = 0; j <= i+ADDITIONAL_MISMATCH; j++)
Sum += Left_PD[j].size(); //Maybe the previous SNPS still exist; revise in 2018.11.26 by Liang Hao
if (Sum == 1 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
UniquePoint TempOne;
TempOne.LengthStr = CurrentLength;
TempOne.AbsLoc = Left_PD[i][0];
Expand Down Expand Up @@ -2567,11 +2565,10 @@ void CheckRight_Far(const SPLIT_READ & OneRead,
for (short i = 0; i <= OneRead.MAX_SNP_ERROR; i++) {
if (Right_PD[i].size() == 1 && CurrentLength >= BP_Right_Start + i) {
Sum = 0;
if (ADDITIONAL_MISMATCH)
for (short j = 1; j <= ADDITIONAL_MISMATCH; j++)
Sum += Right_PD[i + j].size();

if (Sum == 0 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
if (ADDITIONAL_MISMATCH)
for (short j = 0; j <= i+ADDITIONAL_MISMATCH; j++)
Sum += Right_PD[j].size(); //Maybe the previous SNPS still exist; revise in 2018.11.26 by Liang Hao
if (Sum == 1 && i <= (short)(CurrentLength * Seq_Error_Rate + 1)) {
UniquePoint TempOne;
TempOne.LengthStr = CurrentLength;
TempOne.AbsLoc = Right_PD[i][0];
Expand Down Expand Up @@ -4497,7 +4494,7 @@ void GetCloseEnd(const string & CurrentChr, SPLIT_READ & Temp_One_Read) {
if (CurrentChr[pos] == LeftChar) {
PD[0].push_back(pos);
}
else PD[1].push_back(pos);
//else PD[1].push_back(pos); find close_end: first Char of read is not mismatched; revise in 2018.11.26 by Liang Hao
}
}
else { //Match2N[(short)'A'] = 'N';
Expand Down Expand Up @@ -4528,7 +4525,7 @@ void GetCloseEnd(const string & CurrentChr, SPLIT_READ & Temp_One_Read) {
if (CurrentChr[pos] == RightChar) {
PD[0].push_back(pos);
}
else PD[1].push_back(pos);
//else PD[1].push_back(pos); find close_end: first Char of read is not mismatched; revise in 2018.11.26 by Liang Hao
}
}
else { //Match2N[(short)'A'] = 'N';
Expand Down Expand Up @@ -4651,7 +4648,7 @@ void GetFarEnd_SingleStrandDownStreamInsertions(const string & CurrentChr, SPLIT

End = Temp_One_Read.UP_Close[0].AbsLoc + Temp_One_Read.UP_Close[0].LengthStr;
if (End > SpacerBeforeAfter + Temp_One_Read.InsertSize * 2 + DSizeArray[RangeIndex])
Start = End - DSizeArray[RangeIndex] - Temp_One_Read.InsertSize * 2;
Start = End - DSizeArray[RangeIndex] - Temp_One_Read.InsertSize * 2;
else Start = SpacerBeforeAfter;


Expand Down Expand Up @@ -4792,7 +4789,7 @@ void GetFarEnd_SingleStrandDownStream(const string & CurrentChr, SPLIT_READ & Te

End = Temp_One_Read.UP_Close[0].AbsLoc + Temp_One_Read.UP_Close[0].LengthStr - Temp_One_Read.ReadLength;
if (End > SpacerBeforeAfter + Temp_One_Read.InsertSize * 2 + DSizeArray[RangeIndex])
Start = End - DSizeArray[RangeIndex] - Temp_One_Read.InsertSize * 2;
Start = End - DSizeArray[RangeIndex];// - Temp_One_Read.InsertSize * 2; revise in 2018.11.26 by Liang Hao
else Start = SpacerBeforeAfter;


Expand Down Expand Up @@ -4849,7 +4846,7 @@ void GetFarEnd_SingleStrandDownStream(const string & CurrentChr, SPLIT_READ & Te

Start = Temp_One_Read.UP_Close[0].AbsLoc - Temp_One_Read.UP_Close[0].LengthStr + Temp_One_Read.ReadLength;
//Start = Temp_One_Read.MatchedRelPos + SpacerBeforeAfter;
End = Start + DSizeArray[RangeIndex] + Temp_One_Read.InsertSize * 2;
End = Start + DSizeArray[RangeIndex];// + Temp_One_Read.InsertSize * 2; revise in 2018.11.26 by Liang Hao
if (End > CurrentChr.size() - SpacerBeforeAfter) End = CurrentChr.size() - SpacerBeforeAfter;

RightChar = CurrentReadSeq[Temp_One_Read.ReadLengthMinus];
Expand Down Expand Up @@ -4932,7 +4929,7 @@ void GetFarEnd_SingleStrandUpStream(const string & CurrentChr, SPLIT_READ & Temp
CurrentReadSeq = Temp_One_Read.UnmatchedSeq;

Start = Temp_One_Read.UP_Close[0].AbsLoc + Temp_One_Read.UP_Close[0].LengthStr;
End = Start + DSizeArray[RangeIndex] + Temp_One_Read.InsertSize * 2;
End = Start + DSizeArray[RangeIndex];// + Temp_One_Read.InsertSize * 2; revise in 2018.11.26 by Liang Hao
if (End > CurrentChr.size() - SpacerBeforeAfter) End = CurrentChr.size() - SpacerBeforeAfter;

LeftChar = CurrentReadSeq[0];
Expand Down Expand Up @@ -4989,7 +4986,7 @@ void GetFarEnd_SingleStrandUpStream(const string & CurrentChr, SPLIT_READ & Temp
//Start = Temp_One_Read.MatchedRelPos + SpacerBeforeAfter;

if (End > DSizeArray[RangeIndex] + Temp_One_Read.InsertSize * 2 + SpacerBeforeAfter)
Start = End - DSizeArray[RangeIndex] - Temp_One_Read.InsertSize * 2;
Start = End - DSizeArray[RangeIndex];// - Temp_One_Read.InsertSize * 2; The original search region is a bit large; revise in 2018.11.26 by Liang Hao
else Start = SpacerBeforeAfter;

RightChar = CurrentReadSeq[Temp_One_Read.ReadLengthMinus];
Expand Down Expand Up @@ -5408,10 +5405,10 @@ void CheckBoth(const SPLIT_READ & OneRead,
if (PD_Plus[i].size() + PD_Minus[i].size() == 1 && CurrentLength >= BP_Start + i) {
Sum = 0;
if (ADDITIONAL_MISMATCH)
for (short j = 1; j <= ADDITIONAL_MISMATCH; j++)
Sum += PD_Plus[i + j].size() + PD_Minus[i + j].size();

if (Sum == 0 && i <= (short)(Seq_Error_Rate * CurrentLength + 1)) {
for (short j = 0; j <= i+ADDITIONAL_MISMATCH; j++)
Sum += PD_Plus[j].size() + PD_Minus[j].size();
//revise in 2018.11.26 by Liang Hao
if (Sum == 1 && i <= (short)(Seq_Error_Rate * CurrentLength + 1)) {
UniquePoint TempOne;
TempOne.LengthStr = CurrentLength;
if (PD_Plus[i].size() == 1) {
Expand Down
Binary file modified perl/docs.tar.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions perl/lib/Sanger/CGP/Pindel.pm
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package Sanger::CGP::Pindel;

########## LICENCE ##########
# Copyright (c) 2014-2018 Genome Research Ltd.
# Copyright (c) 2014-2019 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
Expand All @@ -26,7 +26,7 @@ use strict;
use Const::Fast qw(const);

use base 'Exporter';
our $VERSION = '3.0.6';
our $VERSION = '3.1.0';
our @EXPORT = qw($VERSION);

1;
7 changes: 3 additions & 4 deletions perl/lib/Sanger/CGP/Pindel/InputGen.pm
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package Sanger::CGP::Pindel::InputGen;

########## LICENCE ##########
# Copyright (c) 2014-2018 Genome Research Ltd.
# Copyright (c) 2014-2019 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
Expand Down Expand Up @@ -51,7 +51,7 @@ use Sanger::CGP::Pindel::InputGen::Pair;

const my $PAIRS_PER_THREAD => 500_000;

const my $BAMCOLLATE => q{%s outputformat=sam colsbs=268435456 collate=1 classes=F,F2 exclude=DUP,SECONDARY,QCFAIL,SUPPLEMENTARY T=%s filename=%s reference=%s inputformat=%s};
const my $BAMCOLLATE => q{%s outputformat=sam colsbs=268435456 collate=1 classes=F,F2 exclude=DUP,SECONDARY,SUPPLEMENTARY T=%s filename=%s reference=%s inputformat=%s};

sub new {
my ($class, $bam, $exclude, $ref) = @_;
Expand Down Expand Up @@ -238,8 +238,7 @@ sub reads_to_pindel {
for(1..$total_pairs) {
my $pair = Sanger::CGP::Pindel::InputGen::Pair->new(\shift @reads, \shift @reads, $tabix);
next unless($pair->keep_pair);
$to_pindel->set_pair($pair);
push @records, @{$to_pindel->pair_to_pindel};
push @records, @{$to_pindel->pair_to_pindel($pair)};

}
my $retained = scalar @records;
Expand Down
9 changes: 8 additions & 1 deletion perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package Sanger::CGP::Pindel::InputGen::Pair;

########## LICENCE ##########
# Copyright (c) 2014-2018 Genome Research Ltd.
# Copyright (c) 2014-2019 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
Expand Down Expand Up @@ -57,6 +57,12 @@ sub unmapped_pair {
return 0;
}

sub qcfailed_pair {
my $self = shift;
return 1 if($self->{'r1'}->qc_failed && $self->{'r2'}->qc_failed);
return 0;
}

sub has_good_anchor {
my $self = shift;
my $r2_state = $self->{'r2'}->good_anchor; # to ensure both fully populates
Expand All @@ -73,6 +79,7 @@ sub has_good_query {

sub keep_pair {
my $self = shift;
return 0 if($self->qcfailed_pair);
return 0 if($self->unmapped_pair);
return 0 if($self->exact);
return 0 unless($self->has_good_anchor);
Expand Down
11 changes: 3 additions & 8 deletions perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package Sanger::CGP::Pindel::InputGen::PairToPindel;

########## LICENCE ##########
# Copyright (c) 2014-2018 Genome Research Ltd.
# Copyright (c) 2014-2019 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
Expand Down Expand Up @@ -55,14 +55,9 @@ sub setup_rg_inserts {
return 1;
}

sub set_pair {
my ($self, $pair) = @_;
croak "No pair provided" unless(defined $pair);
$self->{'pair'} = $pair;
}

sub pair_to_pindel {
my $self = shift;
my ($self, $pair_in) = @_;
$self->{'pair'} = $pair_in;
my $r1 = $self->{'pair'}->{'r1'};
my $r2 = $self->{'pair'}->{'r2'};
my @pindel_records;
Expand Down
9 changes: 8 additions & 1 deletion perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package Sanger::CGP::Pindel::InputGen::Read;

########## LICENCE ##########
# Copyright (c) 2014-2018 Genome Research Ltd.
# Copyright (c) 2014-2019 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
Expand Down Expand Up @@ -41,6 +41,7 @@ const my $UNMAPPED => 4;
const my $READ_REVERSED => 16; #0x0010 (set when reverse)
const my $FIRST_IN_PAIR => 64;
const my $SECOND_IN_PAIR => 128;
const my $QC_FAIL => 512;

const my $MIN_MAPQ => 0;
const my $MIN_ANCHOR_MAPQ => 0;
Expand Down Expand Up @@ -102,6 +103,11 @@ sub reversed {
(($self->{'flag'} | $READ_REVERSED) == $self->{'flag'}) ? 1 : 0;
}

sub qc_failed {
my $self = shift;
(($self->{'flag'} | $QC_FAIL) == $self->{'flag'}) ? 1 : 0;
}

sub strand {
my $self = shift;
$self->reversed ? '-' : '+';
Expand Down Expand Up @@ -191,6 +197,7 @@ sub good_query {

sub _good_query {
my $self = shift;
return 0 if($self->qc_failed);
return 0 if(index($self->{'seq'},'NN') >= 0);
return 0 if($self->frac_pbq_poor > $MAX_POOR_PBQ_FRAC);
unless($self->unmapped) {
Expand Down

0 comments on commit 9455fb8

Please sign in to comment.