Skip to content

Commit

Permalink
modified: src/data_structures/Translocation.cpp
Browse files Browse the repository at this point in the history
	modified:   src/data_structures/Translocation.h
	modified:   src/data_structures/findTranslocationsOnTheFly.cpp
  • Loading branch information
J35P312 committed Jul 14, 2015
1 parent 45474ce commit 103c5f1
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 19 deletions.
143 changes: 124 additions & 19 deletions src/data_structures/Translocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,55 @@ bool sortMate(long i, long j) {
return (i < j);
}

string VCFHeader(){
string headerString ="";
//Define fileformat and source
headerString+="##fileformat=VCFv4.1\n";
headerString+="##source=FindTranslocations\n";
//define the alowed events
headerString+="##ALT=<ID=DEL,Description=\"Deletion\">\n";
headerString+="##ALT=<ID=DUP,Description=\"Duplication\">\n";
headerString+="##ALT=<ID=BND,Description=\"Duplication\">\n";
//Define the info fields
headerString+="##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n";
headerString+="##INFO=<ID=LFW,Number=1,Type=Integer,Description=\"Links from window\">\n";
headerString+="##INFO=<ID=LCB,Number=1,Type=Integer,Description=\"Links to chromosome B\">\n";
headerString+="##INFO=<ID=LTE,Number=1,Type=Integer,Description=\"Links to event\"\b>\n";
headerString+="##INFO=<ID=COVA,Number=1,Type=Integer,Description=\"Coverage on window A\">\n";
headerString+="##INFO=<ID=COVB,Number=1,Type=Integer,Description=\"Coverage on window B\">\n";
headerString+="##INFO=<ID=OA,Number=1,Type=Integer,Description=\"Orientation of the reads in window A\">\n";
headerString+="##INFO=<ID=OB,Number=1,Type=Integer,Description=\"Orientation of the mates in window B\">\n";
headerString+="##INFO=<ID=CHRA,Number=1,Type=String,Description=\"The chromosome of window A\">\n";
headerString+="##INFO=<ID=CHRB,Number=1,Type=String,Description=\"The chromosome of window B\">\n";
headerString+="##INFO=<ID=WINA,Number=2,Type=Integer,Description=\"start and stop positon of window A\">\n";
headerString+="##INFO=<ID=WINB,Number=2,Type=Integer,Description=\"start and stop position of window B\">\n";
headerString+="##INFO=<ID=EL,Number=1,Type=Integer,Description=\"Expected links to window B\">\n";
headerString+="##INFO=<ID=RATIO,Number=1,Type=Integer,Description=\"The number of links divided by the expected number of links\">\n";
headerString+="##INFO=<ID=ED,Number=1,Type=Integer,Description=\"The average estimated distance between paired ends within the window\">\n";
//set filters
headerString+="##FILTER=<ID=BelowExpectedLinks,Description=\"The number of links between A and B is less than 40\% of the expected value\">\n";
headerString+="=##FILTER=<ID=FewLinks,Description=\"Fewer than 40% of the links in window A link to window B\">\n";
headerString+="=##FILTER=<ID=UnexpectedDistance,Description=\"The average paired reads distance is deviating\">\n";
//Header
headerString+="#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";

return(headerString);
}

string filterFunction(double RATIO, int linksToB, int LinksFromWindow,float mean_insert, float std_insert,int estimatedDistance){
string filter = "PASS";
double linkratio= (double)linksToB/(double)LinksFromWindow;
if(RATIO < 0.4){
filter="BelowExpectedLinks";
}else if(linkratio < 0.4){
filter="FewLinks";
}else if(estimatedDistance > 2*std_insert + mean_insert or estimatedDistance < -2*std_insert+mean_insert){
filter="UnexpectedDistance";
}


return(filter);
}

Window::Window(int max_insert, uint16_t minimum_mapping_quality,
bool outtie, float mean_insert, float std_insert, int minimumPairs,
Expand All @@ -30,13 +78,21 @@ Window::Window(int max_insert, uint16_t minimum_mapping_quality,
this->outputFileHeader = outputFileHeader;
string inter_chr_events = outputFileHeader + "_inter_chr_events.tab";
this->interChrVariations.open(inter_chr_events.c_str());
this->interChrVariations << "chrA\tstartOnA\tendOnA\tchrB\tstartOnB\tendOnB\tLinksFromWindow\tLinksToChrB\tLinksToEvent\tCoverageOnChrA\tCoverageOnChrB\tOrientationA\tOrientationB\n";
this->interChrVariations <<"chrA\tstartOnA\tendOnA\tchrB\tstartOnB\tendOnB\tLinksFromWindow\tLinksToChrB\tLinksToEvent\tCoverageOnChrA\tCoverageOnChrB\tOrientationA\tOrientationB\tExpectedLinksToEvent\tLinksToEvent/ExpectedLinksToEvent\tEstimatedDistance\n";
string intra_chr_events = outputFileHeader + "_intra_chr_events.tab";
this->intraChrVariations.open(intra_chr_events.c_str());
this->intraChrVariations << "chrA\tstartOnA\tendOnA\tchrB\tstartOnB\tendOnB\tLinksFromWindow\tLinksToChrB\tLinksToEvent\tCoverageOnChrA\tCoverageOnChrB\tOrientationA\tOrientationB\n";
this->intraChrVariations << "chrA\tstartOnA\tendOnA\tchrB\tstartOnB\tendOnB\tLinksFromWindow\tLinksToChrB\tLinksToEvent\tCoverageOnChrA\tCoverageOnChrB\tOrientationA\tOrientationB\tExpectedLinksToEvent\tLinksToEvent/ExpectedLinksToEvent\tEstimatedDistance\n";

string inter_chr_eventsVCF = outputFileHeader + "_inter_chr_events.vcf";
this->interChrVariationsVCF.open(inter_chr_eventsVCF.c_str());
this->interChrVariationsVCF << VCFHeader();

string intra_chr_eventsVCF = outputFileHeader + "_intra_chr_events.vcf";
this->intraChrVariationsVCF.open(intra_chr_eventsVCF.c_str());
this->intraChrVariationsVCF << VCFHeader();

this->coverage = 0;
this->chr =-1;
this->chr =-1;

}

Expand Down Expand Up @@ -146,27 +202,42 @@ float Window::computeCoverageB(int chrB, int start, int end, int32_t secondWindo
}

//This function accepts a queue with alignments that links to chrB from chrA, the function returns the number of reads that are positioned inside the region on A and have mates that are linking anywhere on B
int findLinksToChr2(queue<BamAlignment> ReadQueue,long startChrA,long stopChrA){
vector<int> findLinksToChr2(queue<BamAlignment> ReadQueue,long startChrA,long stopChrA,long startChrB,long endChrB, int pairsFormingLink){
int linkNumber=0;
int QueueSize=ReadQueue.size();
int estimatedDistance=0;
int lengthWindowA=stopChrA-startChrA+1;

for(int i=0;i<QueueSize;i++){
if(startChrA <= ReadQueue.front().Position and stopChrA >= ReadQueue.front().Position ){
linkNumber+=1;
//If the reads are bridging the two windows, calculate the mate and read distance
if(startChrB <= ReadQueue.front().MatePosition and endChrB >= ReadQueue.front().MatePosition){
estimatedDistance+=lengthWindowA-(ReadQueue.front().Position-startChrA)+(ReadQueue.front().MatePosition-startChrB);
}
}
ReadQueue.pop();
}

return(linkNumber);
vector<int> output;
output.push_back(linkNumber);
if(pairsFormingLink > 0){
output.push_back(estimatedDistance/pairsFormingLink);
}else{
//if no links exists, the region will not count as an event, thus any number may be returned as the distance(the statistics wont be printed anyway)
output.push_back(-1);
}
return(output);
}


vector<double> Window::computeStatisticsA(string bamFileName, int chrB, int start, int end, int32_t WindowLength, string indexFile){
vector<double> statisticsVector;
int bases=0;
double coverage;
double AverageReadLength=0;
BamReader bamFile;
BamAlignment currentRead;
int linksFromWindow=0;
int nreads=0;

if(!bamFile.Open(bamFileName)){
statisticsVector.push_back(-1);statisticsVector.push_back(-1);
Expand All @@ -185,6 +256,8 @@ vector<double> Window::computeStatisticsA(string bamFileName, int chrB, int star
if(alignmentStatus != unmapped and alignmentStatus != lowQualty ) {
//calculates the length of a read
bases+=currentRead.Length;
AverageReadLength+=currentRead.Length;
nreads++;

//if the distance between a pair is too long, the amount of links from the winddoe is increased
if(alignmentStatus == pair_wrongChrs or alignmentStatus == pair_wrongDistance) {
Expand All @@ -195,10 +268,11 @@ vector<double> Window::computeStatisticsA(string bamFileName, int chrB, int star
}
}
}
AverageReadLength=AverageReadLength/nreads;
bamFile.Close();
//calculates the coverage and returns the coverage within the window
coverage=bases/float(WindowLength+1);
statisticsVector.push_back(coverage);statisticsVector.push_back(linksFromWindow);
statisticsVector.push_back(coverage);statisticsVector.push_back(linksFromWindow);statisticsVector.push_back(AverageReadLength);
return(statisticsVector);
}

Expand All @@ -219,15 +293,19 @@ bool Window::computeVariations(int chr2) {
long stopSecondWindow=chr2regions[i*3+1];
long pairsFormingLink=chr2regions[i*3+2];



//resize region so that it is just large enough to cover the reads that have mates in the present cluster
vector<long> chrALimit=newChrALimit(this-> eventReads[chr2],startSecondWindow,stopSecondWindow);


long startchrA=chrALimit[0];
long stopchrA=chrALimit[1];

int numLinksToChr2=findLinksToChr2(eventReads[chr2],startchrA, stopchrA);

vector<int> statsOnB=findLinksToChr2(eventReads[chr2],startchrA, stopchrA,startSecondWindow,stopSecondWindow,pairsFormingLink);
int numLinksToChr2=statsOnB[0];
int estimatedDistance=statsOnB[1];

vector<string> orientation=computeOrientation(eventReads[chr2],startchrA ,stopchrA,startSecondWindow,stopSecondWindow);
string read1_orientation= orientation[0];
string read2_orientation= orientation[1];
Expand All @@ -236,27 +314,53 @@ bool Window::computeVariations(int chr2) {
vector<double> statisticsFirstWindow =computeStatisticsA(bamFileName, eventReads[chr2].front().RefID, startchrA, stopchrA, (stopchrA-startchrA), indexFile);
double coverageRealFirstWindow = statisticsFirstWindow[0];
int linksFromWindow=int(statisticsFirstWindow[1]);
double coverageRealSecondWindow=computeCoverageB(chr2, startSecondWindow, stopSecondWindow, (stopSecondWindow-startSecondWindow) );

//I need to find the window that maximise this
int averageReadLength=int(statisticsFirstWindow[2]);
double coverageRealSecondWindow=computeCoverageB(chr2, startSecondWindow, stopSecondWindow, (stopSecondWindow-startSecondWindow+1) );

if(pairsFormingLink >= minimumPairs ) { //ration between coverage // and coverageRealFirstWindow < 5*this->meanCoverage


//I need to find the window that maximise this
int secondWindowLength=(stopSecondWindow-startSecondWindow+1);
int firstWindowLength=stopchrA-startchrA+1;
float expectedLinksInWindow = ExpectedLinks(firstWindowLength, secondWindowLength, estimatedDistance, mean_insert, std_insert, coverageRealFirstWindow, averageReadLength);
if(this->chr == chr2) {
intraChrVariations << position2contig[this->chr] << "\t" << startchrA << "\t" << stopchrA << "\t" ;
intraChrVariations << position2contig[chr2] << "\t" << startSecondWindow << "\t" << stopSecondWindow << "\t" ;
intraChrVariations << linksFromWindow << "\t" << numLinksToChr2 << "\t" << pairsFormingLink << "\t";
intraChrVariations << coverageRealFirstWindow << "\t" << coverageRealSecondWindow << "\t";
intraChrVariations << read1_orientation << "\t" << read2_orientation << "\n" ;
//expectedLinksInWindow << "\t" << pairsFormingLink/expectedLinksInWindow << "\t" << estimatedDistance << "\n";
intraChrVariations << read1_orientation << "\t" << read2_orientation << "\t"
<< expectedLinksInWindow << "\t" << pairsFormingLink/expectedLinksInWindow << "\t" << estimatedDistance << "\n";

string filter=filterFunction(pairsFormingLink/expectedLinksInWindow, pairsFormingLink, linksFromWindow,mean_insert, std_insert,estimatedDistance);
string svType="BND";

intraChrVariationsVCF << position2contig[this -> chr] << "\t" << stopchrA << "\t.\t" ;
intraChrVariationsVCF << "N" << "\t" << "N[" << position2contig[chr2] << ":" << startSecondWindow << "[";
intraChrVariationsVCF << "\t.\t" << filter << "\tSVTYPE="+svType <<";CHRA="<<position2contig[this->chr]<<";WINA=" << startchrA << "," << stopchrA;
intraChrVariationsVCF <<";CHRB="<< position2contig[chr2] <<";WINB=" << startSecondWindow << "," << stopSecondWindow << ";LFW=" << linksFromWindow;
intraChrVariationsVCF << ";LCB=" << numLinksToChr2 << ";LTE=" << pairsFormingLink << ";COVA=" << coverageRealFirstWindow;
intraChrVariationsVCF << ";COVB=" << coverageRealSecondWindow << ";OA=" << read1_orientation << ";OB=" << read2_orientation;
intraChrVariationsVCF << ";EL=" << expectedLinksInWindow << ";RATIO="<< pairsFormingLink/expectedLinksInWindow <<";ED=" << estimatedDistance << "\n";


} else {
interChrVariations << position2contig[this -> chr] << "\t" << startchrA << "\t" << stopchrA << "\t" ;
interChrVariations << position2contig[chr2] << "\t" << startSecondWindow << "\t" << stopSecondWindow << "\t" ;
interChrVariations << linksFromWindow << "\t" << numLinksToChr2 << "\t" << pairsFormingLink << "\t";
interChrVariations << coverageRealFirstWindow << "\t" << coverageRealSecondWindow << "\t";
interChrVariations << read1_orientation << "\t" << read2_orientation << "\n" ;
//expectedLinksInWindow << "\t" << pairsFormingLink/expectedLinksInWindow << "\t" << estimatedDistance << "\n";
interChrVariations << read1_orientation << "\t" << read2_orientation << "\t"
<< expectedLinksInWindow << "\t" << pairsFormingLink/expectedLinksInWindow << "\t" << estimatedDistance << "\n";

string filter=filterFunction(pairsFormingLink/expectedLinksInWindow, pairsFormingLink, linksFromWindow,mean_insert, std_insert,estimatedDistance);
string svType="BND";

interChrVariationsVCF << position2contig[this -> chr] << "\t" << stopchrA << "\t.\t" ;
interChrVariationsVCF << "N" << "\t" << "N[" << position2contig[chr2] << ":" << startSecondWindow << "[";
interChrVariationsVCF << "\t.\t" << filter << "\tSVTYPE="+svType <<";CHRA="<<position2contig[this->chr]<<";WINA=" << startchrA << "," << stopchrA;
interChrVariationsVCF <<";CHRB="<< position2contig[chr2] <<";WINB=" << startSecondWindow << "," << stopSecondWindow << ";LFW=" << linksFromWindow;
interChrVariationsVCF << ";LCB=" << numLinksToChr2 << ";LTE=" << pairsFormingLink << ";COVA=" << coverageRealFirstWindow;
interChrVariationsVCF << ";COVB=" << coverageRealSecondWindow << ";OA=" << read1_orientation << ";OB=" << read2_orientation;
interChrVariationsVCF << ";EL=" << expectedLinksInWindow << ";RATIO="<< pairsFormingLink/expectedLinksInWindow <<";ED=" << estimatedDistance << "\n";

}
found = true;

Expand Down Expand Up @@ -432,3 +536,4 @@ vector<string> Window::computeOrientation(queue<BamAlignment> alignmentQueue,lon




2 changes: 2 additions & 0 deletions src/data_structures/Translocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class Window {
string outputFileHeader;
ofstream interChrVariations;
ofstream intraChrVariations;
ofstream interChrVariationsVCF;
ofstream intraChrVariationsVCF;


Window(int max_insert, uint16_t minimum_mapping_quality,
Expand Down
2 changes: 2 additions & 0 deletions src/data_structures/findTranslocationsOnTheFly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ void StructuralVariations::findTranslocationsOnTheFly(string bamFileName, int32_
}
window->interChrVariations.close();
window->intraChrVariations.close();
window->interChrVariationsVCF.close();
window->intraChrVariationsVCF.close();

}

0 comments on commit 103c5f1

Please sign in to comment.