Skip to content

Commit

Permalink
Fixed issue #103.
Browse files Browse the repository at this point in the history
  • Loading branch information
Martinsos committed Feb 3, 2018
1 parent ee36875 commit 6967b80
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 14 deletions.
2 changes: 1 addition & 1 deletion bindings/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
name = "edlib",
description = "Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance.",
long_description = long_description,
version = "1.2.1",
version = "1.2.2",
url = "https://github.com/Martinsos/edlib",
author = "Martin Sosic",
author_email = "[email protected]",
Expand Down
37 changes: 26 additions & 11 deletions edlib/src/edlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,17 +208,32 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in
Word* rPeq = buildPeq((int) alphabet.size(), rQuery, queryLength, equalityDefinition);
for (int i = 0; i < result.numLocations; i++) {
int endLocation = result.endLocations[i];
int bestScoreSHW, numPositionsSHW;
int* positionsSHW;
myersCalcEditDistanceSemiGlobal(
rPeq, W, maxNumBlocks,
queryLength, rTarget + targetLength - endLocation - 1, endLocation + 1,
result.editDistance, EDLIB_MODE_SHW,
&bestScoreSHW, &positionsSHW, &numPositionsSHW);
// Taking last location as start ensures that alignment will not start with insertions
// if it can start with mismatches instead.
result.startLocations[i] = endLocation - positionsSHW[numPositionsSHW - 1];
free(positionsSHW);
if (endLocation == -1) {
// NOTE: Sometimes one of optimal solutions is that query starts before target, like this:
// AAGG <- target
// CCTT <- query
// It will never be only optimal solution and it does not happen often, however it is
// possible and in that case end location will be -1. What should we do with that?
// Should we just skip reporting such end location, although it is a solution?
// If we do report it, what is the start location? -4? -1? Nothing?
// TODO: Figure this out. This has to do in general with how we think about start
// and end locations.
// Also, we have alignment later relying on this locations to limit the space of it's
// search -> how can it do it right if these locations are negative or incorrect?
result.startLocations[i] = 0; // I put 0 for now, but it does not make much sense.
} else {
int bestScoreSHW, numPositionsSHW;
int* positionsSHW;
myersCalcEditDistanceSemiGlobal(
rPeq, W, maxNumBlocks,
queryLength, rTarget + targetLength - endLocation - 1, endLocation + 1,
result.editDistance, EDLIB_MODE_SHW,
&bestScoreSHW, &positionsSHW, &numPositionsSHW);
// Taking last location as start ensures that alignment will not start with insertions
// if it can start with mismatches instead.
result.startLocations[i] = endLocation - positionsSHW[numPositionsSHW - 1];
free(positionsSHW);
}
}
delete[] rTarget;
delete[] rQuery;
Expand Down
67 changes: 65 additions & 2 deletions test/runTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,68 @@ bool test12() {
return pass;
}

bool test13() {
// In this test, one of optimal solutions is:
// B
// AA
// which brings us into interesting situation where one of end locations is -1.
const char* query = "AA";
const char* target = "B";

EdlibAlignResult result = edlibAlign(query, (int) std::strlen(query),
target, (int) std::strlen(target),
edlibNewAlignConfig(-1, EDLIB_MODE_HW, EDLIB_TASK_PATH, NULL, 0));
bool pass = result.status == EDLIB_STATUS_OK && result.editDistance == 2;
printf(pass ? "\x1B[32m""OK""\x1B[0m\n" : "\x1B[31m""FAIL""\x1B[0m\n");
edlibFreeAlignResult(result);
return pass;
}

bool test14() {
// In this test, one of optimal solutions is:
// B
// AA
// which brings us into interesting situation where one of end locations is -1.
const char* query = "AA";
const char* target = "B";

EdlibAlignResult result = edlibAlign(query, (int) std::strlen(query),
target, (int) std::strlen(target),
edlibNewAlignConfig(-1, EDLIB_MODE_SHW, EDLIB_TASK_PATH, NULL, 0));
bool pass = result.status == EDLIB_STATUS_OK && result.editDistance == 2;
printf(pass ? "\x1B[32m""OK""\x1B[0m\n" : "\x1B[31m""FAIL""\x1B[0m\n");
edlibFreeAlignResult(result);
return pass;
}

bool test15() {
// In this test, optimal alignment is when query and target overlap, query end with target start, HW.
const char* query = "AAABBB";
const char* target = "BBBC";

EdlibAlignResult result = edlibAlign(query, (int) std::strlen(query),
target, (int) std::strlen(target),
edlibNewAlignConfig(-1, EDLIB_MODE_HW, EDLIB_TASK_LOC, NULL, 0));
bool pass = result.status == EDLIB_STATUS_OK && result.editDistance == 3;
printf(pass ? "\x1B[32m""OK""\x1B[0m\n" : "\x1B[31m""FAIL""\x1B[0m\n");
edlibFreeAlignResult(result);
return pass;
}

bool test16() {
// In this test, optimal alignment is when query and target overlap, query start with target end, HW.
const char* query = "BBBAAA";
const char* target = "CBBB";

EdlibAlignResult result = edlibAlign(query, (int) std::strlen(query),
target, (int) std::strlen(target),
edlibNewAlignConfig(-1, EDLIB_MODE_HW, EDLIB_TASK_LOC, NULL, 0));
bool pass = result.status == EDLIB_STATUS_OK && result.editDistance == 3;
printf(pass ? "\x1B[32m""OK""\x1B[0m\n" : "\x1B[31m""FAIL""\x1B[0m\n");
edlibFreeAlignResult(result);
return pass;
}

bool testCigar() {
unsigned char alignment[] = {EDLIB_EDOP_MATCH, EDLIB_EDOP_MATCH, EDLIB_EDOP_INSERT, EDLIB_EDOP_INSERT,
EDLIB_EDOP_INSERT, EDLIB_EDOP_DELETE, EDLIB_EDOP_INSERT, EDLIB_EDOP_INSERT,
Expand Down Expand Up @@ -489,9 +551,10 @@ bool testCustomEqualityRelation() {

bool runTests() {
// TODO: make this global vector where tests have to add themselves.
int numTests = 14;
int numTests = 18;
bool (* tests [])() = {test1, test2, test3, test4, test5, test6,
test7, test8, test9, test10, test11, test12, testCigar, testCustomEqualityRelation};
test7, test8, test9, test10, test11, test12, test13, test14, test15, test16,
testCigar, testCustomEqualityRelation};

bool allTestsPassed = true;
for (int i = 0; i < numTests; i++) {
Expand Down

0 comments on commit 6967b80

Please sign in to comment.