From 0193f2f4a38343e32631264df1cd8a9c1788afd7 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Sat, 23 Sep 2023 16:45:04 +0200 Subject: [PATCH] manage path range boundaries --- src/subcommand/extract_main.cpp | 36 ++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/subcommand/extract_main.cpp b/src/subcommand/extract_main.cpp index e4cc7657..ce0ecc02 100644 --- a/src/subcommand/extract_main.cpp +++ b/src/subcommand/extract_main.cpp @@ -477,11 +477,37 @@ namespace odgi { if (show_progress) { progress->increment(1); } - algorithms::for_handle_in_path_range( - source, path_handle, path_range.begin.offset, path_range.end.offset, - [&](const handle_t& handle) { - keep_bv.set(source.get_id(handle) - shift); - }); + + // The extraction does not cut nodes, so the input path ranges have to be + // extended if their ranges (start, end) fall in the middle of the nodes. + bool first = true; + uint64_t new_start = 0; + uint64_t new_end = 0; + + const uint64_t start = path_range.begin.offset; + const uint64_t end = path_range.end.offset; + + uint64_t walked = 0; + const auto path_end = source.path_end(path_handle); + for (step_handle_t cur_step = source.path_begin(path_handle); + cur_step != path_end && walked < end; cur_step = source.get_next_step(cur_step)) { + const handle_t cur_handle = source.get_handle_of_step(cur_step); + walked += source.get_length(cur_handle); + if (walked > start) { + keep_bv.set(source.get_id(cur_handle) - shift); + + if (first) { + first = false; + new_start = walked - source.get_length(cur_handle); + } + } + } + new_end = walked; + + // Extend path range to entirely include the first and the last node of the range. + // Thi is important to path names with the correct path ranges. + path_range.begin.offset = new_start; + path_range.end.offset = new_end; } if (!pangenomic_ranges.empty()) { uint64_t pos = 0;