From d85b9ada48897ab424d7498f9afe41686a0fd8c8 Mon Sep 17 00:00:00 2001 From: russell <09689835g@connect.polyu.hk> Date: Mon, 18 Oct 2021 15:09:54 +0800 Subject: [PATCH 1/3] incrementally remove explicit calls to free memory (transformSequences/createReverseCopy/and part of AlignmentData) --- edlib/src/edlib.cpp | 70 +++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/edlib/src/edlib.cpp b/edlib/src/edlib.cpp index e19f119..05a4786 100644 --- a/edlib/src/edlib.cpp +++ b/edlib/src/edlib.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -20,8 +21,7 @@ struct AlignmentData { Word* Ps; Word* Ms; int* scores; - int* firstBlocks; - int* lastBlocks; + std::vector firstBlocks, lastBlocks; AlignmentData(int maxNumBlocks, int targetLength) { // We build a complete table and mark first and last block for each column @@ -30,16 +30,14 @@ struct AlignmentData { Ps = new Word[maxNumBlocks * targetLength]; Ms = new Word[maxNumBlocks * targetLength]; scores = new int[maxNumBlocks * targetLength]; - firstBlocks = new int[targetLength]; - lastBlocks = new int[targetLength]; + firstBlocks.resize(targetLength); + lastBlocks.resize(targetLength); } ~AlignmentData() { delete[] Ps; delete[] Ms; delete[] scores; - delete[] firstBlocks; - delete[] lastBlocks; } }; @@ -122,12 +120,12 @@ static int obtainAlignmentTraceback(int queryLength, int targetLength, static string transformSequences(const char* queryOriginal, int queryLength, const char* targetOriginal, int targetLength, - unsigned char** queryTransformed, - unsigned char** targetTransformed); + std::vector & queryTransformed, + std::vector & targetTransformed); static inline int ceilDiv(int x, int y); -static inline unsigned char* createReverseCopy(const unsigned char* seq, int length); +static inline std::unique_ptr createReverseCopy(const unsigned char* seq, int length); static inline Word* buildPeq(const int alphabetLength, const unsigned char* query, @@ -151,9 +149,9 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in result.alphabetLength = 0; /*------------ TRANSFORM SEQUENCES AND RECOGNIZE ALPHABET -----------*/ - unsigned char* query, * target; + std::vector query, target; string alphabet = transformSequences(queryOriginal, queryLength, targetOriginal, targetLength, - &query, &target); + query, target); result.alphabetLength = static_cast(alphabet.size()); /*-------------------------------------------------------*/ @@ -173,8 +171,6 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in result.status = EDLIB_STATUS_ERROR; } - free(query); - free(target); return result; } @@ -182,7 +178,7 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); // bmax in Myers int W = maxNumBlocks * WORD_SIZE - queryLength; // number of redundant cells in last level blocks EqualityDefinition equalityDefinition(alphabet, config.additionalEqualities, config.additionalEqualitiesLength); - Word* Peq = buildPeq(static_cast(alphabet.size()), query, queryLength, equalityDefinition); + Word* Peq = buildPeq(static_cast(alphabet.size()), query.data(), queryLength, equalityDefinition); /*-------------------------------------------------------*/ /*------------------ MAIN CALCULATION -------------------*/ @@ -199,12 +195,12 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in do { if (config.mode == EDLIB_MODE_HW || config.mode == EDLIB_MODE_SHW) { myersCalcEditDistanceSemiGlobal(Peq, W, maxNumBlocks, - queryLength, target, targetLength, + queryLength, target.data(), targetLength, k, config.mode, &(result.editDistance), &(result.endLocations), &(result.numLocations)); } else { // mode == EDLIB_MODE_NW myersCalcEditDistanceNW(Peq, W, maxNumBlocks, - queryLength, target, targetLength, + queryLength, target.data(), targetLength, k, &(result.editDistance), &positionNW, false, &alignData, -1); } @@ -223,10 +219,10 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in if (config.task == EDLIB_TASK_LOC || config.task == EDLIB_TASK_PATH) { result.startLocations = static_cast(malloc(result.numLocations * sizeof(int))); if (config.mode == EDLIB_MODE_HW) { // If HW, I need to calculate start locations. - const unsigned char* rTarget = createReverseCopy(target, targetLength); - const unsigned char* rQuery = createReverseCopy(query, queryLength); + auto rTarget = createReverseCopy(target.data(), targetLength); + auto rQuery = createReverseCopy(query.data(), queryLength); // Peq for reversed query. - Word* rPeq = buildPeq(static_cast(alphabet.size()), rQuery, queryLength, equalityDefinition); + Word* rPeq = buildPeq(static_cast(alphabet.size()), rQuery.get(), queryLength, equalityDefinition); for (int i = 0; i < result.numLocations; i++) { int endLocation = result.endLocations[i]; if (endLocation == -1) { @@ -247,7 +243,7 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in int* positionsSHW; myersCalcEditDistanceSemiGlobal( rPeq, W, maxNumBlocks, - queryLength, rTarget + targetLength - endLocation - 1, endLocation + 1, + queryLength, rTarget.get() + targetLength - endLocation - 1, endLocation + 1, result.editDistance, EDLIB_MODE_SHW, &bestScoreSHW, &positionsSHW, &numPositionsSHW); // Taking last location as start ensures that alignment will not start with insertions @@ -256,8 +252,6 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in free(positionsSHW); } } - delete[] rTarget; - delete[] rQuery; delete[] rPeq; } else { // If mode is SHW or NW for (int i = 0; i < result.numLocations; i++) { @@ -271,24 +265,20 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in if (config.task == EDLIB_TASK_PATH) { int alnStartLocation = result.startLocations[0]; int alnEndLocation = result.endLocations[0]; - const unsigned char* alnTarget = target + alnStartLocation; + const unsigned char* alnTarget = target.data() + alnStartLocation; const int alnTargetLength = alnEndLocation - alnStartLocation + 1; - const unsigned char* rAlnTarget = createReverseCopy(alnTarget, alnTargetLength); - const unsigned char* rQuery = createReverseCopy(query, queryLength); - obtainAlignment(query, rQuery, queryLength, - alnTarget, rAlnTarget, alnTargetLength, + auto rAlnTarget = createReverseCopy(alnTarget, alnTargetLength); + auto rQuery = createReverseCopy(query.data(), queryLength); + obtainAlignment(query.data(), rQuery.get(), queryLength, + alnTarget, rAlnTarget.get(), alnTargetLength, equalityDefinition, static_cast(alphabet.size()), result.editDistance, &(result.alignment), &(result.alignmentLength)); - delete[] rAlnTarget; - delete[] rQuery; } } /*-------------------------------------------------------*/ //--- Free memory ---// delete[] Peq; - free(query); - free(target); if (alignData) delete alignData; //-------------------// @@ -383,10 +373,9 @@ static inline Word* buildPeq(const int alphabetLength, /** * Returns new sequence that is reverse of given sequence. - * Free returned array with delete[]. */ -static inline unsigned char* createReverseCopy(const unsigned char* const seq, const int length) { - unsigned char* rSeq = new unsigned char[length]; +static inline std::unique_ptr createReverseCopy(const unsigned char* const seq, const int length) { + std::unique_ptr rSeq(new unsigned char[length]); for (int i = 0; i < length; i++) { rSeq[i] = seq[length - i - 1]; } @@ -1403,7 +1392,6 @@ static int obtainAlignmentHirschberg( * Takes char query and char target, recognizes alphabet and transforms them into unsigned char sequences * where elements in sequences are not any more letters of alphabet, but their index in alphabet. * Most of internal edlib functions expect such transformed sequences. - * This function will allocate queryTransformed and targetTransformed, so make sure to free them when done. * Example: * Original sequences: "ACT" and "CGT". * Alphabet would be recognized as "ACTG". Alphabet length = 4. @@ -1419,14 +1407,14 @@ static int obtainAlignmentHirschberg( */ static string transformSequences(const char* const queryOriginal, const int queryLength, const char* const targetOriginal, const int targetLength, - unsigned char** const queryTransformed, - unsigned char** const targetTransformed) { + std::vector & queryTransformed, + std::vector & targetTransformed) { // Alphabet is constructed from letters that are present in sequences. // Each letter is assigned an ordinal number, starting from 0 up to alphabetLength - 1, // and new query and target are created in which letters are replaced with their ordinal numbers. // This query and target are used in all the calculations later. - *queryTransformed = static_cast(malloc(sizeof(unsigned char) * queryLength)); - *targetTransformed = static_cast(malloc(sizeof(unsigned char) * targetLength)); + queryTransformed.resize(queryLength); + targetTransformed.resize(targetLength); string alphabet = ""; @@ -1443,7 +1431,7 @@ static string transformSequences(const char* const queryOriginal, const int quer letterIdx[c] = static_cast(alphabet.size()); alphabet += queryOriginal[i]; } - (*queryTransformed)[i] = letterIdx[c]; + queryTransformed[i] = letterIdx[c]; } for (int i = 0; i < targetLength; i++) { unsigned char c = static_cast(targetOriginal[i]); @@ -1452,7 +1440,7 @@ static string transformSequences(const char* const queryOriginal, const int quer letterIdx[c] = static_cast(alphabet.size()); alphabet += targetOriginal[i]; } - (*targetTransformed)[i] = letterIdx[c]; + targetTransformed[i] = letterIdx[c]; } return alphabet; From 9a2876f6cf7e1976e3209df0c4992afc31e244e6 Mon Sep 17 00:00:00 2001 From: GDCCP Date: Thu, 21 Oct 2021 15:28:30 +0800 Subject: [PATCH 2/3] use std::vector for obtainAlignment result(output) and in AlignmentData struct --- edlib/src/edlib.cpp | 106 ++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 58 deletions(-) diff --git a/edlib/src/edlib.cpp b/edlib/src/edlib.cpp index 05a4786..e1fda47 100644 --- a/edlib/src/edlib.cpp +++ b/edlib/src/edlib.cpp @@ -18,26 +18,23 @@ static const int MAX_UCHAR = 255; // Data needed to find alignment. struct AlignmentData { - Word* Ps; + std::vector Ps; Word* Ms; - int* scores; - std::vector firstBlocks, lastBlocks; - - AlignmentData(int maxNumBlocks, int targetLength) { + std::vector scores; + int * firstBlocks, * lastBlocks; + + AlignmentData(int maxNumBlocks, int targetLength) : + Ps(maxNumBlocks * targetLength * 2), + Ms(Ps.data() + maxNumBlocks * targetLength), + scores((maxNumBlocks + 2)* targetLength), + firstBlocks(scores.data() + maxNumBlocks * targetLength), + lastBlocks(firstBlocks + targetLength) { // We build a complete table and mark first and last block for each column // (because algorithm is banded so only part of each columns is used). // TODO: do not build a whole table, but just enough blocks for each column. - Ps = new Word[maxNumBlocks * targetLength]; - Ms = new Word[maxNumBlocks * targetLength]; - scores = new int[maxNumBlocks * targetLength]; - firstBlocks.resize(targetLength); - lastBlocks.resize(targetLength); } ~AlignmentData() { - delete[] Ps; - delete[] Ms; - delete[] scores; } }; @@ -106,17 +103,17 @@ static int obtainAlignment( const unsigned char* query, const unsigned char* rQuery, int queryLength, const unsigned char* target, const unsigned char* rTarget, int targetLength, const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore, - unsigned char** alignment, int* alignmentLength); + std::vector & alignment); static int obtainAlignmentHirschberg( const unsigned char* query, const unsigned char* rQuery, int queryLength, const unsigned char* target, const unsigned char* rTarget, int targetLength, const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore, - unsigned char** alignment, int* alignmentLength); + std::vector & alignment); static int obtainAlignmentTraceback(int queryLength, int targetLength, int bestScore, const AlignmentData* alignData, - unsigned char** alignment, int* alignmentLength); + std::vector & alignment); static string transformSequences(const char* queryOriginal, int queryLength, const char* targetOriginal, int targetLength, @@ -269,10 +266,16 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in const int alnTargetLength = alnEndLocation - alnStartLocation + 1; auto rAlnTarget = createReverseCopy(alnTarget, alnTargetLength); auto rQuery = createReverseCopy(query.data(), queryLength); + std::vector alignment; obtainAlignment(query.data(), rQuery.get(), queryLength, alnTarget, rAlnTarget.get(), alnTargetLength, equalityDefinition, static_cast(alphabet.size()), result.editDistance, - &(result.alignment), &(result.alignmentLength)); + alignment); + result.alignmentLength = int(alignment.size()); + if (result.alignmentLength) { + result.alignment = new unsigned char[result.alignmentLength]; + ::memcpy(result.alignment,alignment.data(),result.alignmentLength * sizeof(unsigned char)); + } } } /*-------------------------------------------------------*/ @@ -928,17 +931,16 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int * @param [in] bestScore Best score. * @param [in] alignData Data obtained during finding best score that is useful for finding alignment. * @param [out] alignment Alignment. - * @param [out] alignmentLength Length of alignment. * @return Status code. */ static int obtainAlignmentTraceback(const int queryLength, const int targetLength, const int bestScore, const AlignmentData* const alignData, - unsigned char** const alignment, int* const alignmentLength) { + std::vector & alignment) { const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); const int W = maxNumBlocks * WORD_SIZE - queryLength; - *alignment = static_cast(malloc((queryLength + targetLength - 1) * sizeof(unsigned char))); - *alignmentLength = 0; + alignment.reserve(queryLength + targetLength - 1); + //*alignmentLength = 0; int c = targetLength - 1; // index of column int b = maxNumBlocks - 1; // index of block in column int currScore = bestScore; // Score of current cell @@ -1015,9 +1017,9 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt uScore = ulScore = -1; if (blockPos == 0) { // If entering new (upper) block if (b == 0) { // If there are no cells above (only boundary cells) - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; // Move up + alignment.push_back(EDLIB_EDOP_INSERT); // Move up for (int i = 0; i < c + 1; i++) // Move left until end - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; + alignment.push_back(EDLIB_EDOP_DELETE); break; } else { blockPos = WORD_SIZE - 1; @@ -1040,7 +1042,7 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt lM <<= 1; } // Mark move - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; + alignment.push_back(EDLIB_EDOP_INSERT); } // Move left - deletion from target - insertion to query else if (lScore != -1 && lScore + 1 == currScore) { @@ -1049,10 +1051,10 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt lScore = ulScore = -1; c--; if (c == -1) { // If there are no cells to the left (only boundary cells) - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; // Move left + alignment.push_back(EDLIB_EDOP_DELETE); // Move left int numUp = b * WORD_SIZE + blockPos + 1; for (int i = 0; i < numUp; i++) // Move up until end - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; + alignment.push_back(EDLIB_EDOP_INSERT); break; } currP = lP; @@ -1071,7 +1073,7 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt } } // Mark move - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; + alignment.push_back(EDLIB_EDOP_DELETE); } // Move up left - (mis)match else if (ulScore != -1) { @@ -1080,17 +1082,17 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt uScore = lScore = ulScore = -1; c--; if (c == -1) { // If there are no cells to the left (only boundary cells) - (*alignment)[(*alignmentLength)++] = moveCode; // Move left + alignment.push_back(moveCode); // Move left int numUp = b * WORD_SIZE + blockPos; for (int i = 0; i < numUp; i++) // Move up until end - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; + alignment.push_back(EDLIB_EDOP_INSERT); break; } if (blockPos == 0) { // If entering upper left block if (b == 0) { // If there are no more cells above (only boundary cells) - (*alignment)[(*alignmentLength)++] = moveCode; // Move up left + alignment.push_back(moveCode); // Move up left for (int i = 0; i < c + 1; i++) // Move left until end - (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; + alignment.push_back(EDLIB_EDOP_DELETE); break; } blockPos = WORD_SIZE - 1; @@ -1119,7 +1121,7 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt } } // Mark move - (*alignment)[(*alignmentLength)++] = moveCode; + alignment.push_back(moveCode); } else { // Reached end - finished! break; @@ -1127,8 +1129,8 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt //----------------------------------// } - *alignment = static_cast(realloc(*alignment, (*alignmentLength) * sizeof(unsigned char))); - reverse(*alignment, *alignment + (*alignmentLength)); + //*alignment = static_cast(realloc(*alignment, (*alignmentLength) * sizeof(unsigned char))); + std::reverse(alignment.begin(), alignment.end()); return EDLIB_STATUS_OK; } @@ -1147,22 +1149,17 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt * @param [in] alphabetLength * @param [in] bestScore Best(optimal) score. * @param [out] alignment Sequence of edit operations that make target equal to query. - * @param [out] alignmentLength Length of alignment. * @return Status code. */ static int obtainAlignment( const unsigned char* const query, const unsigned char* const rQuery, const int queryLength, const unsigned char* const target, const unsigned char* const rTarget, const int targetLength, const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore, - unsigned char** const alignment, int* const alignmentLength) { + std::vector & alignment) { // Handle special case when one of sequences has length of 0. if (queryLength == 0 || targetLength == 0) { - *alignmentLength = targetLength + queryLength; - *alignment = static_cast(malloc((*alignmentLength) * sizeof(unsigned char))); - for (int i = 0; i < *alignmentLength; i++) { - (*alignment)[i] = queryLength == 0 ? EDLIB_EDOP_DELETE : EDLIB_EDOP_INSERT; - } + alignment.resize(targetLength + queryLength,queryLength == 0 ? EDLIB_EDOP_DELETE : EDLIB_EDOP_INSERT); return EDLIB_STATUS_OK; } @@ -1192,14 +1189,14 @@ static int obtainAlignment( //assert(endLocation_ == targetLength - 1); statusCode = obtainAlignmentTraceback(queryLength, targetLength, - bestScore, alignData, alignment, alignmentLength); + bestScore, alignData, alignment); delete alignData; delete[] Peq; } else { statusCode = obtainAlignmentHirschberg(query, rQuery, queryLength, target, rTarget, targetLength, equalityDefinition, alphabetLength, bestScore, - alignment, alignmentLength); + alignment); } return statusCode; } @@ -1217,14 +1214,13 @@ static int obtainAlignment( * @param [in] alphabetLength * @param [in] bestScore Best(optimal) score. * @param [out] alignment Sequence of edit operations that make target equal to query. - * @param [out] alignmentLength Length of alignment. * @return Status code. */ static int obtainAlignmentHirschberg( const unsigned char* const query, const unsigned char* const rQuery, const int queryLength, const unsigned char* const target, const unsigned char* const rTarget, const int targetLength, const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore, - unsigned char** const alignment, int* const alignmentLength) { + std::vector& alignment) { const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); const int W = maxNumBlocks * WORD_SIZE - queryLength; @@ -1360,30 +1356,24 @@ static int obtainAlignmentHirschberg( const int lrHeight = queryLength - ulHeight; const int ulWidth = leftHalfWidth; const int lrWidth = rightHalfWidth; - unsigned char* ulAlignment = NULL; int ulAlignmentLength; + std::vector ulAlignment; int ulStatusCode = obtainAlignment(query, rQuery + lrHeight, ulHeight, target, rTarget + lrWidth, ulWidth, equalityDefinition, alphabetLength, leftScore, - &ulAlignment, &ulAlignmentLength); - unsigned char* lrAlignment = NULL; int lrAlignmentLength; + ulAlignment); + std::vector lrAlignment; int lrStatusCode = obtainAlignment(query + ulHeight, rQuery, lrHeight, target + ulWidth, rTarget, lrWidth, equalityDefinition, alphabetLength, rightScore, - &lrAlignment, &lrAlignmentLength); + lrAlignment); if (ulStatusCode == EDLIB_STATUS_ERROR || lrStatusCode == EDLIB_STATUS_ERROR) { - if (ulAlignment) free(ulAlignment); - if (lrAlignment) free(lrAlignment); return EDLIB_STATUS_ERROR; } // Build alignment by concatenating upper left alignment with lower right alignment. - *alignmentLength = ulAlignmentLength + lrAlignmentLength; - *alignment = static_cast(malloc((*alignmentLength) * sizeof(unsigned char))); - memcpy(*alignment, ulAlignment, ulAlignmentLength); - memcpy(*alignment + ulAlignmentLength, lrAlignment, lrAlignmentLength); + alignment.resize(ulAlignment.size()+lrAlignment.size()); + std::copy(lrAlignment.begin(),lrAlignment.end(),std::copy(ulAlignment.begin(), ulAlignment.end(),alignment.begin())); - free(ulAlignment); - free(lrAlignment); return EDLIB_STATUS_OK; } @@ -1466,5 +1456,5 @@ extern "C" EdlibAlignConfig edlibDefaultAlignConfig(void) { extern "C" void edlibFreeAlignResult(EdlibAlignResult result) { if (result.endLocations) free(result.endLocations); if (result.startLocations) free(result.startLocations); - if (result.alignment) free(result.alignment); + if (result.alignment) delete [] result.alignment; } From 3021edee56cf8442d953509019dc68868daa27c1 Mon Sep 17 00:00:00 2001 From: GDCCP Date: Sat, 23 Oct 2021 04:42:25 +0800 Subject: [PATCH 3/3] using vector to make memory management implicit in buildPeq and parts in calcEditDistance --- edlib/src/edlib.cpp | 87 ++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 52 deletions(-) diff --git a/edlib/src/edlib.cpp b/edlib/src/edlib.cpp index e1fda47..0cb900c 100644 --- a/edlib/src/edlib.cpp +++ b/edlib/src/edlib.cpp @@ -124,7 +124,7 @@ static inline int ceilDiv(int x, int y); static inline std::unique_ptr createReverseCopy(const unsigned char* seq, int length); -static inline Word* buildPeq(const int alphabetLength, +static inline std::vector buildPeq(const int alphabetLength, const unsigned char* query, const int queryLength, const EqualityDefinition& equalityDefinition); @@ -175,7 +175,7 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); // bmax in Myers int W = maxNumBlocks * WORD_SIZE - queryLength; // number of redundant cells in last level blocks EqualityDefinition equalityDefinition(alphabet, config.additionalEqualities, config.additionalEqualitiesLength); - Word* Peq = buildPeq(static_cast(alphabet.size()), query.data(), queryLength, equalityDefinition); + const auto & Peq = buildPeq(static_cast(alphabet.size()), query.data(), queryLength, equalityDefinition); /*-------------------------------------------------------*/ /*------------------ MAIN CALCULATION -------------------*/ @@ -191,12 +191,12 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in do { if (config.mode == EDLIB_MODE_HW || config.mode == EDLIB_MODE_SHW) { - myersCalcEditDistanceSemiGlobal(Peq, W, maxNumBlocks, + myersCalcEditDistanceSemiGlobal(Peq.data(), W, maxNumBlocks, queryLength, target.data(), targetLength, k, config.mode, &(result.editDistance), &(result.endLocations), &(result.numLocations)); } else { // mode == EDLIB_MODE_NW - myersCalcEditDistanceNW(Peq, W, maxNumBlocks, + myersCalcEditDistanceNW(Peq.data(), W, maxNumBlocks, queryLength, target.data(), targetLength, k, &(result.editDistance), &positionNW, false, &alignData, -1); @@ -219,7 +219,7 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in auto rTarget = createReverseCopy(target.data(), targetLength); auto rQuery = createReverseCopy(query.data(), queryLength); // Peq for reversed query. - Word* rPeq = buildPeq(static_cast(alphabet.size()), rQuery.get(), queryLength, equalityDefinition); + const auto & rPeq = buildPeq(static_cast(alphabet.size()), rQuery.get(), queryLength, equalityDefinition); for (int i = 0; i < result.numLocations; i++) { int endLocation = result.endLocations[i]; if (endLocation == -1) { @@ -239,7 +239,7 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in int bestScoreSHW, numPositionsSHW; int* positionsSHW; myersCalcEditDistanceSemiGlobal( - rPeq, W, maxNumBlocks, + rPeq.data(), W, maxNumBlocks, queryLength, rTarget.get() + targetLength - endLocation - 1, endLocation + 1, result.editDistance, EDLIB_MODE_SHW, &bestScoreSHW, &positionsSHW, &numPositionsSHW); @@ -249,7 +249,6 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in free(positionsSHW); } } - delete[] rPeq; } else { // If mode is SHW or NW for (int i = 0; i < result.numLocations; i++) { result.startLocations[i] = 0; @@ -281,7 +280,6 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in /*-------------------------------------------------------*/ //--- Free memory ---// - delete[] Peq; if (alignData) delete alignData; //-------------------// @@ -301,7 +299,7 @@ extern "C" char* edlibAlignmentToCigar(const unsigned char* const alignment, con moveCodeToChar[0] = moveCodeToChar[3] = 'M'; } - vector* cigar = new vector(); + vector cigar; char lastMove = 0; // Char of last move. 0 if there was no previous move. int numOfSameMoves = 0; for (int i = 0; i <= alignmentLength; i++) { @@ -310,17 +308,16 @@ extern "C" char* edlibAlignmentToCigar(const unsigned char* const alignment, con // Write number of moves to cigar string. int numDigits = 0; for (; numOfSameMoves; numOfSameMoves /= 10) { - cigar->push_back('0' + numOfSameMoves % 10); + cigar.push_back('0' + numOfSameMoves % 10); numDigits++; } - reverse(cigar->end() - numDigits, cigar->end()); + reverse(cigar.end() - numDigits, cigar.end()); // Write code of move to cigar string. - cigar->push_back(lastMove); + cigar.push_back(lastMove); // If not at the end, start new sequence of moves. if (i < alignmentLength) { // Check if alignment has valid values. if (alignment[i] > 3) { - delete cigar; return 0; } numOfSameMoves = 0; @@ -331,10 +328,9 @@ extern "C" char* edlibAlignmentToCigar(const unsigned char* const alignment, con numOfSameMoves++; } } - cigar->push_back(0); // Null character termination. - char* cigar_ = static_cast(malloc(cigar->size() * sizeof(char))); - memcpy(cigar_, &(*cigar)[0], cigar->size() * sizeof(char)); - delete cigar; + cigar.push_back(0); // Null character termination. + char* cigar_ = static_cast(malloc(cigar.size() * sizeof(char))); + memcpy(cigar_, cigar.data(), cigar.size() * sizeof(char)); return cigar_; } @@ -343,15 +339,14 @@ extern "C" char* edlibAlignmentToCigar(const unsigned char* const alignment, con * Build Peq table for given query and alphabet. * Peq is table of dimensions alphabetLength+1 x maxNumBlocks. * Bit i of Peq[s * maxNumBlocks + b] is 1 if i-th symbol from block b of query equals symbol s, otherwise it is 0. - * NOTICE: free returned array with delete[]! */ -static inline Word* buildPeq(const int alphabetLength, +static inline std::vector buildPeq(const int alphabetLength, const unsigned char* const query, const int queryLength, const EqualityDefinition& equalityDefinition) { int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); // table of dimensions alphabetLength+1 x maxNumBlocks. Last symbol is wildcard. - Word* Peq = new Word[(alphabetLength + 1) * maxNumBlocks]; + std::vector Peq((alphabetLength + 1) * maxNumBlocks); // Build Peq (1 is match, 0 is mismatch). NOTE: last column is wildcard(symbol that matches anything) with just 1s for (int symbol = 0; symbol <= alphabetLength; symbol++) { @@ -549,9 +544,7 @@ static int myersCalcEditDistanceSemiGlobal( // lastBlock is 0-based index of last block in Ukkonen band. int firstBlock = 0; int lastBlock = min(ceilDiv(k + 1, WORD_SIZE), maxNumBlocks) - 1; // y in Myers - Block *bl; // Current block - - Block* blocks = new Block[maxNumBlocks]; + std::vector blocks(maxNumBlocks); // For HW, solution will never be larger then queryLength. if (mode == EDLIB_MODE_HW) { @@ -560,10 +553,10 @@ static int myersCalcEditDistanceSemiGlobal( // Each STRONG_REDUCE_NUM column is reduced in more expensive way. // This gives speed up of about 2 times for small k. - const int STRONG_REDUCE_NUM = 2048; + constexpr int STRONG_REDUCE_NUM = 2048; // Initialize P, M and score - bl = blocks; + Block *bl = blocks.data(); // Current block for (int b = 0; b <= lastBlock; b++) { bl->score = (b + 1) * WORD_SIZE; bl->P = static_cast(-1); // All 1s @@ -580,7 +573,7 @@ static int myersCalcEditDistanceSemiGlobal( //----------------------- Calculate column -------------------------// int hout = startHout; - bl = blocks + firstBlock; + bl = blocks.data() + firstBlock; Peq_c += firstBlock; for (int b = firstBlock; b <= lastBlock; b++) { hout = calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M); @@ -641,7 +634,6 @@ static int myersCalcEditDistanceSemiGlobal( *numPositions_ = static_cast(positions.size()); copy(positions.begin(), positions.end(), *positions_); } - delete[] blocks; return EDLIB_STATUS_OK; } //------------------------------------------------------------------// @@ -691,7 +683,6 @@ static int myersCalcEditDistanceSemiGlobal( copy(positions.begin(), positions.end(), *positions_); } - delete[] blocks; return EDLIB_STATUS_OK; } @@ -745,12 +736,10 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int int firstBlock = 0; // This is optimal now, by my formula. int lastBlock = min(maxNumBlocks, ceilDiv(min(k, (k + queryLength - targetLength) / 2) + 1, WORD_SIZE)) - 1; - Block* bl; // Current block - - Block* blocks = new Block[maxNumBlocks]; + std::vector blocks(maxNumBlocks); // Initialize P, M and score - bl = blocks; + Block* bl = blocks.data(); // Current block for (int b = 0; b <= lastBlock; b++) { bl->score = (b + 1) * WORD_SIZE; bl->P = static_cast(-1); // All 1s @@ -772,7 +761,7 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int //----------------------- Calculate column -------------------------// int hout = 1; - bl = blocks + firstBlock; + bl = blocks.data() + firstBlock; for (int b = firstBlock; b <= lastBlock; b++) { hout = calculateBlock(bl->P, bl->M, Peq_c[b], hout, bl->P, bl->M); bl->score += hout; @@ -868,7 +857,6 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int // If band stops to exist finish if (lastBlock < firstBlock) { *bestScore_ = *position_ = -1; - delete[] blocks; return EDLIB_STATUS_OK; } //------------------------------------------------------------------// @@ -876,7 +864,7 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int //---- Save column so it can be used for reconstruction ----// if (findAlignment && c < targetLength) { - bl = blocks + firstBlock; + bl = blocks.data() + firstBlock; for (int b = firstBlock; b <= lastBlock; b++) { (*alignData)->Ps[maxNumBlocks * c + b] = bl->P; (*alignData)->Ms[maxNumBlocks * c + b] = bl->M; @@ -890,15 +878,14 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int //---- If this is stop column, save it and finish ----// if (c == targetStopPosition) { for (int b = firstBlock; b <= lastBlock; b++) { - (*alignData)->Ps[b] = (blocks + b)->P; - (*alignData)->Ms[b] = (blocks + b)->M; - (*alignData)->scores[b] = (blocks + b)->score; + (*alignData)->Ps[b] = blocks[b].P; + (*alignData)->Ms[b] = blocks[b].M; + (*alignData)->scores[b] = blocks[b].score; (*alignData)->firstBlocks[0] = firstBlock; (*alignData)->lastBlocks[0] = lastBlock; } *bestScore_ = -1; *position_ = targetStopPosition; - delete[] blocks; return EDLIB_STATUS_OK; } //----------------------------------------------------// @@ -912,13 +899,11 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int if (bestScore <= k) { *bestScore_ = bestScore; *position_ = targetLength - 1; - delete[] blocks; return EDLIB_STATUS_OK; } } *bestScore_ = *position_ = -1; - delete[] blocks; return EDLIB_STATUS_OK; } @@ -1129,7 +1114,9 @@ static int obtainAlignmentTraceback(const int queryLength, const int targetLengt //----------------------------------// } - //*alignment = static_cast(realloc(*alignment, (*alignmentLength) * sizeof(unsigned char))); + // realloc was used to shrink alignment -- a C array. + // shrink_to_fit could be used to do what realloc does, but alignment will be destroyed very soon + // as storage in vector won't be passed outside to the extern "C" functions client; hence not doing it. std::reverse(alignment.begin(), alignment.end()); return EDLIB_STATUS_OK; } @@ -1179,8 +1166,8 @@ static int obtainAlignment( if (alignmentDataSize < 1024 * 1024) { int score_, endLocation_; // Used only to call function. AlignmentData* alignData = NULL; - Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition); - myersCalcEditDistanceNW(Peq, W, maxNumBlocks, + const auto & Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition); + myersCalcEditDistanceNW(Peq.data(), W, maxNumBlocks, queryLength, target, targetLength, bestScore, @@ -1191,7 +1178,6 @@ static int obtainAlignment( statusCode = obtainAlignmentTraceback(queryLength, targetLength, bestScore, alignData, alignment); delete alignData; - delete[] Peq; } else { statusCode = obtainAlignmentHirschberg(query, rQuery, queryLength, target, rTarget, targetLength, @@ -1225,8 +1211,8 @@ static int obtainAlignmentHirschberg( const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); const int W = maxNumBlocks * WORD_SIZE - queryLength; - Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition); - Word* rPeq = buildPeq(alphabetLength, rQuery, queryLength, equalityDefinition); + const auto & Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition); + const auto & rPeq = buildPeq(alphabetLength, rQuery, queryLength, equalityDefinition); // Used only to call functions. int score_, endLocation_; @@ -1238,18 +1224,15 @@ static int obtainAlignmentHirschberg( // Calculate left half. AlignmentData* alignDataLeftHalf = NULL; int leftHalfCalcStatus = myersCalcEditDistanceNW( - Peq, W, maxNumBlocks, queryLength, target, targetLength, bestScore, + Peq.data(), W, maxNumBlocks, queryLength, target, targetLength, bestScore, &score_, &endLocation_, false, &alignDataLeftHalf, leftHalfWidth - 1); // Calculate right half. AlignmentData* alignDataRightHalf = NULL; int rightHalfCalcStatus = myersCalcEditDistanceNW( - rPeq, W, maxNumBlocks, queryLength, rTarget, targetLength, bestScore, + rPeq.data(), W, maxNumBlocks, queryLength, rTarget, targetLength, bestScore, &score_, &endLocation_, false, &alignDataRightHalf, rightHalfWidth - 1); - delete[] Peq; - delete[] rPeq; - if (leftHalfCalcStatus == EDLIB_STATUS_ERROR || rightHalfCalcStatus == EDLIB_STATUS_ERROR) { if (alignDataLeftHalf) delete alignDataLeftHalf; if (alignDataRightHalf) delete alignDataRightHalf;