From 33519c25b8e4e04ac60a2b59d21c372efba6421f Mon Sep 17 00:00:00 2001 From: rhinempi Date: Tue, 29 Oct 2024 00:00:35 +0100 Subject: [PATCH] mercy k-mer simple test through --- .../cmg/reflexiv/pipeline/Pipelines.java | 22 +++++-- .../pipeline/ReflexivDSDynamicMercyKmer.java | 13 +++- .../ReflexivDSKmerLeftAndRightSorting.java | 60 ++++++++++++++----- 3 files changed, 73 insertions(+), 22 deletions(-) diff --git a/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/Pipelines.java b/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/Pipelines.java index 855c22b..bfdf224 100644 --- a/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/Pipelines.java +++ b/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/Pipelines.java @@ -1050,8 +1050,10 @@ public void reflexivDSDynamicReductionPipe() throws IOException { } else { reflexivDS64CounterPipe(); - param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize1 + "/part*.csv.gz"; - reflexivDS64MercyKmerPipe(); + if (param.kmerSize <=55) { + param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize1 + "/part*.csv.gz"; + reflexivDS64MercyKmerPipe(); + } } } else { info.readMessage("Checking existing k-mer counts: Count_" + param.kmerSize1 + " succeeded"); @@ -1122,8 +1124,11 @@ public void reflexivDSDynamicReductionPipe() throws IOException { } else { reflexivDS64CounterPipe(); - param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize2 + "*/part*.csv.gz"; - reflexivDS64MercyKmerPipe(); + + if (param.kmerSize <=55) { + param.inputKmerPath = param.outputPath + "/Count_" + param.kmerSize2 + "*/part*.csv.gz"; + reflexivDS64MercyKmerPipe(); + } } } else { info.readMessage("Checking existing k-mer counts: Count_" + param.kmerSize2 + " succeeded"); @@ -1246,11 +1251,16 @@ public void reflexivDSDynamicReductionPipe() throws IOException { info.screenDump(); if (param.kmerSize <= 31) { reflexivDSCounterPipe(); - } else { - reflexivDS64CounterPipe(); param.inputKmerPath = param.outputPath + "/Count_" + param.kmerListInt[param.kmerListInt.length - 1] + "/part*.csv.gz"; reflexivDS64MercyKmerPipe(); + } else { + reflexivDS64CounterPipe(); + + if (param.kmerSize <=55) { + param.inputKmerPath = param.outputPath + "/Count_" + param.kmerListInt[param.kmerListInt.length - 1] + "/part*.csv.gz"; + reflexivDS64MercyKmerPipe(); + } } } else { info.readMessage("Checking existing k-mer counts: Count_" + param.kmerListInt[param.kmerListInt.length-1] + " succeeded"); diff --git a/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSDynamicMercyKmer.java b/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSDynamicMercyKmer.java index c97695b..9c966bc 100644 --- a/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSDynamicMercyKmer.java +++ b/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSDynamicMercyKmer.java @@ -1329,8 +1329,17 @@ private long[] findRange(List i, long index){ /** * a-1 means that the gap starts from the next k-mer */ - range = buildingAlongFromThreeInt(1, a+1, b); // a+1 means that the gap starts from the next k-mer - gaps.add(range); + if (param.kmerSize +1 >=b-a-1 && b-a-1>=param.kmerSize-1){ + /** + * ATCG--------*--------- + * ATCG--------*ATCG----- + * --------- Gap size = b-a-1 + */ + // if the gap size is equal to an error + }else { + range = buildingAlongFromThreeInt(1, a + 1, b); // a+1 means that the gap starts from the next k-mer + gaps.add(range); + } } lastIndex=i.get(j); diff --git a/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSKmerLeftAndRightSorting.java b/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSKmerLeftAndRightSorting.java index 98cf7cb..7a6c4a3 100644 --- a/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSKmerLeftAndRightSorting.java +++ b/src/main/java/uni/bielefeld/cmg/reflexiv/pipeline/ReflexivDSKmerLeftAndRightSorting.java @@ -453,20 +453,28 @@ public Iterator call(Iterator s) { if (subKmerSlotComparator(subKmer.getSeq(0), HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getSeq(0)) == true) { if (leftMarker > highestLeftMarker) { // highestLeftMarker > 1 is for mercy k-mer with only 1 coverage - if (highestLeftMarker <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * highestLeftMarker && highestLeftMarker > 1) { // should use rightMarker here . However, since in the beginning, left and right are the same as coverage, it does not matter + if (highestLeftMarker <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * highestLeftMarker) { // should use rightMarker here . However, since in the beginning, left and right are the same as coverage, it does not matter attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1); HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); } else { - attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize+3); - HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, - RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) - ); + if (highestLeftMarker == 1){ + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1); + }else { + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3); + } + HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, + RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) + ); } } else if (leftMarker == highestLeftMarker) { if (subKmer.getLong(2) > HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getLong(2)) { - attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize+3); + if (highestLeftMarker==1) { + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1); + }else { + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3); + } HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); @@ -476,13 +484,17 @@ public Iterator call(Iterator s) { leftMarker=getLeftMarker(subKmer.getLong(1)); // rightMarker=getRightMarker(subKmer.getLong(1)); currentSubKmerSize=currentKmerSizeFromBinaryBlockArray(subKmerArray); - attribute = buildingAlongFromThreeInt(reflexivMarker,leftMarker,maxKmerSize+3); + if (highestLeftMarker==1) { + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1); + }else{ + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3); + } HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); } } else { - if (leftMarker <= param.minErrorCoverage && highestLeftMarker >= param.minRepeatFold * leftMarker && leftMarker > 1) { + if (leftMarker <= param.minErrorCoverage && highestLeftMarker >= param.minRepeatFold * leftMarker) { subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1); reflexivMarker=getReflexivMarker(subKmer.getLong(1)); leftMarker=getLeftMarker(subKmer.getLong(1)); @@ -493,12 +505,18 @@ public Iterator call(Iterator s) { RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); } else { + if (leftMarker == 1){ + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, -1); + }else { + attribute = buildingAlongFromThreeInt(reflexivMarker, leftMarker, maxKmerSize + 3); + } + subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1); reflexivMarker=getReflexivMarker(subKmer.getLong(1)); leftMarker=getLeftMarker(subKmer.getLong(1)); // rightMarker=getRightMarker(subKmer.getLong(1)); currentSubKmerSize=currentKmerSizeFromBinaryBlockArray(subKmerArray); - attribute = buildingAlongFromThreeInt(reflexivMarker,leftMarker,maxKmerSize+3); + HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); @@ -704,15 +722,19 @@ public Iterator call(Iterator s) { int highestLeftMarker = getLeftMarker(HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getLong(1)); if (subKmerSlotComparator(subKmer.getSeq(0), HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getSeq(0)) == true) { if (leftMarker > HighCoverLastCoverage) { - if (HighCoverLastCoverage <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * HighCoverLastCoverage && HighCoverLastCoverage >1) { + if (HighCoverLastCoverage <= param.minErrorCoverage && leftMarker >= param.minRepeatFold * HighCoverLastCoverage ) { HighCoverLastCoverage = leftMarker; attribute = buildingAlongFromThreeInt(reflexivMarker, -1, rightMarker); HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); } else { + if (HighCoverLastCoverage == 1){ + attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker); + }else{ + attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker); + } HighCoverLastCoverage = leftMarker; - attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker); HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) ); @@ -724,7 +746,11 @@ public Iterator call(Iterator s) { Long HighCoverageSubKmerFirstSuffix = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1).getLong(2) >>> 2 * (32-HighCoverageSubKmerFirstSuffixLength); if (subKmerFirstSuffix.compareTo(HighCoverageSubKmerFirstSuffix) > 0) { - attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker); + if (HighCoverLastCoverage==1){ // both kmer coverage are 1, consider one with sequencing error + attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, -1); + }else { + attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, rightMarker); + } HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2)) @@ -743,7 +769,7 @@ public Iterator call(Iterator s) { ); } } else { - if (leftMarker <= param.minErrorCoverage && HighCoverLastCoverage >= param.minRepeatFold * leftMarker && leftMarker > 1) { + if (leftMarker <= param.minErrorCoverage && HighCoverLastCoverage >= param.minRepeatFold * leftMarker) { subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1); reflexivMarker=getReflexivMarker(subKmer.getLong(1)); @@ -756,12 +782,18 @@ public Iterator call(Iterator s) { } else { subKmer = HighCoverageSubKmer.get(HighCoverageSubKmer.size() - 1); + if (leftMarker ==1 ){ + attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, -1); + }else { + attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize + 3, rightMarker); + } + reflexivMarker=getReflexivMarker(subKmer.getLong(1)); //leftMarker=getLeftMarker(subKmer.getLong(1)); rightMarker=getRightMarker(subKmer.getLong(1)); currentSubKmerSize=currentKmerSizeFromBinaryBlockArray(subKmerArray); - attribute = buildingAlongFromThreeInt(reflexivMarker, maxKmerSize+3, rightMarker); + HighCoverageSubKmer.set(HighCoverageSubKmer.size() - 1, RowFactory.create(subKmer.getSeq(0), attribute, subKmer.getLong(2))