Skip to content

Commit

Permalink
mercy k-mer simple test through
Browse files Browse the repository at this point in the history
  • Loading branch information
rhinempi committed Oct 28, 2024
1 parent 96e697b commit 33519c2
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 22 deletions.
22 changes: 16 additions & 6 deletions src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/Pipelines.java
Original file line number Diff line number Diff line change
Expand Up @@ -1050,8 +1050,10 @@ public void reflexivDSDynamicReductionPipe() throws IOException {
} else {
reflexivDS64CounterPipe();

param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize1 + "/part*.csv.gz";
reflexivDS64MercyKmerPipe();
if (param.kmerSize <=55) {
param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize1 + "/part*.csv.gz";
reflexivDS64MercyKmerPipe();
}
}
} else {
info.readMessage("Checking existing k-mer counts: Count_" + param.kmerSize1 + " succeeded");
Expand Down Expand Up @@ -1122,8 +1124,11 @@ public void reflexivDSDynamicReductionPipe() throws IOException {
} else {
reflexivDS64CounterPipe();

param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize2 + "*/part*.csv.gz";
reflexivDS64MercyKmerPipe();

if (param.kmerSize <=55) {
param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize2 + "*/part*.csv.gz";
reflexivDS64MercyKmerPipe();
}
}
} else {
info.readMessage("Checking existing k-mer counts: Count_" + param.kmerSize2 + " succeeded");
Expand Down Expand Up @@ -1246,11 +1251,16 @@ public void reflexivDSDynamicReductionPipe() throws IOException {
info.screenDump();
if (param.kmerSize <= 31) {
reflexivDSCounterPipe();
} else {
reflexivDS64CounterPipe();

param.inputKmerPath = param.outputPath + "/Count_" + param.kmerListInt[param.kmerListInt.length - 1] + "/part*.csv.gz";
reflexivDS64MercyKmerPipe();
} else {
reflexivDS64CounterPipe();

if (param.kmerSize <=55) {
param.inputKmerPath = param.outputPath + "/Count_" + param.kmerListInt[param.kmerListInt.length - 1] + "/part*.csv.gz";
reflexivDS64MercyKmerPipe();
}
}
} else {
info.readMessage("Checking existing k-mer counts: Count_" + param.kmerListInt[param.kmerListInt.length-1] + " succeeded");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1329,8 +1329,17 @@ private long[] findRange(List<Integer> i, long index){
/**
* a-1 means that the gap starts from the next k-mer
*/
range = buildingAlongFromThreeInt(1, a+1, b); // a+1 means that the gap starts from the next k-mer
gaps.add(range);
if (param.kmerSize +1 >=b-a-1 && b-a-1>=param.kmerSize-1){
/**
* ATCG--------*---------
* ATCG--------*ATCG-----
* --------- Gap size = b-a-1
*/
// if the gap size is equal to an error
}else {
range = buildingAlongFromThreeInt(1, a + 1, b); // a+1 means that the gap starts from the next k-mer
gaps.add(range);
}
}

lastIndex=i.get(j);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,20 +453,28 @@ public Iterator<Row> call(Iterator<Row> s) {
if (subKmerSlotComparator(subKmer.getSeq(0), HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getSeq(0)) == true) {
if (leftMarker > highestLeftMarker) {
// highestLeftMarker > 1 is for mercy k-mer with only 1 coverage
if (highestLeftMarker <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * highestLeftMarker && highestLeftMarker > 1) { // should use rightMarker here . However, since in the beginning, left and right are the same as coverage, it does not matter
if (highestLeftMarker <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * highestLeftMarker) { // should use rightMarker here . However, since in the beginning, left and right are the same as coverage, it does not matter
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1);
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
} else {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize+3);
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
if (highestLeftMarker == 1){
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1);
}else {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3);
}
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
}
} else if (leftMarker == highestLeftMarker) {
if (subKmer.getLong(2) > HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getLong(2)) {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize+3);
if (highestLeftMarker==1) {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1);
}else {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3);
}
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
Expand All @@ -476,13 +484,17 @@ public Iterator<Row> call(Iterator<Row> s) {
leftMarker=getLeftMarker(subKmer.getLong(1));
// rightMarker=getRightMarker(subKmer.getLong(1));
currentSubKmerSize=currentKmerSizeFromBinaryBlockArray(subKmerArray);
attribute = buildingAlongFromThreeInt(reflexivMarker,leftMarker,maxKmerSize+3);
if (highestLeftMarker==1) {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1);
}else{
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3);
}
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
}
} else {
if (leftMarker <= param.minErrorCoverage && highestLeftMarker >= param.minRepeatFold * leftMarker && leftMarker > 1) {
if (leftMarker <= param.minErrorCoverage && highestLeftMarker >= param.minRepeatFold * leftMarker) {
subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1);
reflexivMarker=getReflexivMarker(subKmer.getLong(1));
leftMarker=getLeftMarker(subKmer.getLong(1));
Expand All @@ -493,12 +505,18 @@ public Iterator<Row> call(Iterator<Row> s) {
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
} else {
if (leftMarker == 1){
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1);
}else {
attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3);
}

subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1);
reflexivMarker=getReflexivMarker(subKmer.getLong(1));
leftMarker=getLeftMarker(subKmer.getLong(1));
// rightMarker=getRightMarker(subKmer.getLong(1));
currentSubKmerSize=currentKmerSizeFromBinaryBlockArray(subKmerArray);
attribute = buildingAlongFromThreeInt(reflexivMarker,leftMarker,maxKmerSize+3);

HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
Expand Down Expand Up @@ -704,15 +722,19 @@ public Iterator<Row> call(Iterator<Row> s) {
int highestLeftMarker = getLeftMarker(HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getLong(1));
if (subKmerSlotComparator(subKmer.getSeq(0), HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getSeq(0)) == true) {
if (leftMarker > HighCoverLastCoverage) {
if (HighCoverLastCoverage <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * HighCoverLastCoverage && HighCoverLastCoverage >1) {
if (HighCoverLastCoverage <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * HighCoverLastCoverage ) {
HighCoverLastCoverage = leftMarker;
attribute = buildingAlongFromThreeInt(reflexivMarker, -1, rightMarker);
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
} else {
if (HighCoverLastCoverage == 1){
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker);
}else{
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker);
}
HighCoverLastCoverage = leftMarker;
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker);
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))
);
Expand All @@ -724,7 +746,11 @@ public Iterator<Row> call(Iterator<Row> s) {
Long HighCoverageSubKmerFirstSuffix = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getLong(2) >>> 2 * (32-HighCoverageSubKmerFirstSuffixLength);

if (subKmerFirstSuffix.compareTo(HighCoverageSubKmerFirstSuffix) > 0) {
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker);
if (HighCoverLastCoverage==1){ // both kmer coverage are 1, consider one with sequencing error
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, -1);
}else {
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, rightMarker);
}
HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0),
attribute, subKmer.getLong(2))
Expand All @@ -743,7 +769,7 @@ public Iterator<Row> call(Iterator<Row> s) {
);
}
} else {
if (leftMarker <= param.minErrorCoverage && HighCoverLastCoverage >= param.minRepeatFold * leftMarker && leftMarker > 1) {
if (leftMarker <= param.minErrorCoverage && HighCoverLastCoverage >= param.minRepeatFold * leftMarker) {
subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1);

reflexivMarker=getReflexivMarker(subKmer.getLong(1));
Expand All @@ -756,12 +782,18 @@ public Iterator<Row> call(Iterator<Row> s) {
} else {
subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1);

if (leftMarker ==1 ){
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, -1);
}else {
attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, rightMarker);
}

reflexivMarker=getReflexivMarker(subKmer.getLong(1));
//leftMarker=getLeftMarker(subKmer.getLong(1));
rightMarker=getRightMarker(subKmer.getLong(1));
currentSubKmerSize=currentKmerSizeFromBinaryBlockArray(subKmerArray);

attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker);

HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1,
RowFactory.create(subKmer.getSeq(0),
attribute, subKmer.getLong(2))
Expand Down

0 comments on commit 33519c2

Please sign in to comment.