From 7f5c0003a9135add5a2b6f14ab822e527b3df547 Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Mon, 9 Dec 2024 01:45:10 +0700 Subject: [PATCH] Fix memory issue --- src/linclust/kmermatcher.cpp | 12 ++++++++---- src/prefiltering/IndexBuilder.cpp | 18 ++++++++++-------- src/util/makepaddedseqdb.cpp | 3 +-- src/util/masksequence.cpp | 2 +- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/linclust/kmermatcher.cpp b/src/linclust/kmermatcher.cpp index 5d898826..8bdf737e 100644 --- a/src/linclust/kmermatcher.cpp +++ b/src/linclust/kmermatcher.cpp @@ -63,10 +63,7 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz size_t offset = 0; int querySeqType = seqDbr.getDbtype(); size_t longestKmer = par.kmerSize; - Masker *masker = NULL; - if (par.maskMode == 1) { - masker = new Masker(*subMat); - } + ScoreMatrix two; ScoreMatrix three; @@ -85,6 +82,10 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz unsigned short * scoreDist= new unsigned short[65536]; unsigned int * hierarchicalScoreDist= new unsigned int[128]; + Masker *masker = NULL; + if (par.maskMode == 1) { + masker = new Masker(*subMat); + } const int adjustedKmerSize = (par.adjustKmerLength) ? std::min( par.kmerSize+5, 23) : par.kmerSize; Sequence seq(par.maxSeqLen, querySeqType, subMat, adjustedKmerSize, par.spacedKmer, false, true, par.spacedKmerPattern); KmerGenerator* generator; @@ -336,6 +337,9 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz if (thread_idx == 0) { seqDbr.remapData(); } + if (masker != NULL) { + delete masker; + } #pragma omp barrier } diff --git a/src/prefiltering/IndexBuilder.cpp b/src/prefiltering/IndexBuilder.cpp index fa2ff7df..81965a0b 100644 --- a/src/prefiltering/IndexBuilder.cpp +++ b/src/prefiltering/IndexBuilder.cpp @@ -81,11 +81,6 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL EXIT(EXIT_FAILURE); } - // need to prune low scoring k-mers through masking - Masker *masker = NULL; - if (maskedLookup != NULL) { - masker = new Masker(subMat); - } // identical scores for memory reduction code char *idScoreLookup = getScoreLookup(subMat); @@ -99,6 +94,12 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL #ifdef OPENMP thread_idx = static_cast(omp_get_thread_num()); #endif + // need to prune low scoring k-mers through masking + Masker *masker = NULL; + if (maskedLookup != NULL) { + masker = new Masker(subMat); + } + Indexer idxer(static_cast(indexTable->getAlphabetSize()), seq->getKmerSize()); Sequence s(seq->getMaxLen(), seq->getSeqType(), &subMat, seq->getKmerSize(), seq->isSpaced(), false, true, seq->getUserSpacedKmerPattern()); @@ -159,11 +160,12 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL if (generator != NULL) { delete generator; } + if(masker != NULL) { + delete masker; + } } - if(masker != NULL) { - delete masker; - } + Debug(Debug::INFO) << "Index table: Masked residues: " << maskedResidues << "\n"; if(totalKmerCount == 0) { diff --git a/src/util/makepaddedseqdb.cpp b/src/util/makepaddedseqdb.cpp index 16b2f8fb..2fd4641d 100644 --- a/src/util/makepaddedseqdb.cpp +++ b/src/util/makepaddedseqdb.cpp @@ -31,7 +31,6 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { dbhw.open(); // need to prune low scoring k-mers through masking - Masker masker(subMat); Debug::Progress progress(dbr.getSize()); #pragma omp parallel @@ -40,7 +39,7 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { #ifdef OPENMP thread_idx = static_cast(omp_get_thread_num()); #endif - + Masker masker(subMat); std::string result; result.reserve(par.maxSeqLen); diff --git a/src/util/masksequence.cpp b/src/util/masksequence.cpp index 012568b2..b1c8a836 100644 --- a/src/util/masksequence.cpp +++ b/src/util/masksequence.cpp @@ -29,7 +29,6 @@ int masksequence(int argc, const char **argv, const Command& command) { } // need to prune low scoring k-mers through masking - Masker masker(*subMat); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, reader.getDbtype()); writer.open(); @@ -40,6 +39,7 @@ int masksequence(int argc, const char **argv, const Command& command) { #ifdef OPENMP thread_idx = (unsigned int) omp_get_thread_num(); #endif + Masker masker(*subMat); unsigned char *charSequence = new unsigned char[reader.getMaxSeqLen() + 1]; Sequence seq(reader.getMaxSeqLen(), reader.getDbtype(), subMat, 0, false, false); #pragma omp for schedule(dynamic, 1)