Skip to content

Commit

Permalink
Merge pull request #320 from pangenome/generalize_odgi_extract
Browse files Browse the repository at this point in the history
`odgi extract`: make it possible to extract from a subgraph and fix impolite memory accesses
  • Loading branch information
AndreaGuarracino authored Sep 15, 2021
2 parents 4a9486d + 15b738c commit bdd1be4
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 23 deletions.
13 changes: 7 additions & 6 deletions src/algorithms/subgraph/region.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ namespace odgi {
void parse_region(const std::string& target, std::string& name, int64_t& start, int64_t& end) {
start = -1;
end = -1;
size_t foundFirstColon = target.find(":");
const size_t foundLastColon = target.find_last_of(":");

// we only have a single string, use the whole sequence as the target
if (foundFirstColon == std::string::npos) {
if (foundLastColon == std::string::npos) {
name = target;
} else {
name = target.substr(0, foundFirstColon);
size_t foundRangeDash = target.find("-", foundFirstColon);
name = target.substr(0, foundLastColon);
size_t foundRangeDash = target.find("-", foundLastColon);
if (foundRangeDash == std::string::npos) {
start = atoi(target.substr(foundFirstColon + 1).c_str());
start = atoi(target.substr(foundLastColon + 1).c_str());
end = start;
} else {
start = atoi(target.substr(foundFirstColon + 1, foundRangeDash - foundRangeDash - 1).c_str());
start = atoi(target.substr(foundLastColon + 1, foundRangeDash - foundRangeDash - 1).c_str());
end = atoi(target.substr(foundRangeDash + 1).c_str());
}
}
Expand Down
21 changes: 8 additions & 13 deletions src/subcommand/extract_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,12 @@ namespace odgi {
}
}

if (_full_range) {
if (!graph.is_optimized()) {
std::cerr
<< "[odgi::extract] error: the graph is not optimized. "
"To extract the full ranges, please run 'odgi sort' using -O, --optimize."
<< std::endl;
exit(1);
}
const uint64_t shift = graph.min_node_id();
if (graph.max_node_id() - shift >= graph.get_node_count()){
std::cerr << "[odgi::extract] error: the node IDs are not compacted. Please run 'odgi sort' using -O, --optimize to optimize the graph." << std::endl;
exit(1);
}


// Prepare all paths for parallelize the next step (actually, not all paths are always present in the subgraph)
std::vector<path_handle_t> paths;
if (args::get(_path_names_file).empty()) {
Expand Down Expand Up @@ -506,13 +501,13 @@ namespace odgi {
algorithms::for_handle_in_path_range(
graph, path_handle, path_range.begin.offset, path_range.end.offset,
[&](const handle_t& handle) {
keep_bv.set(graph.get_id(handle));
keep_bv.set(graph.get_id(handle) - shift);
});
}
for (auto id : keep_bv) {
const handle_t h = graph.get_handle(id);
for (auto id_shifted : keep_bv) {
const handle_t h = graph.get_handle(id_shifted + shift);
subgraph.create_handle(graph.get_sequence(h),
id);
id_shifted + shift);
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/subcommand/viz_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,10 @@ namespace odgi {
{
std::string nucleotide_range = args::get(_nucleotide_range);
if (!nucleotide_range.empty()) {
const size_t foundFirstColon = nucleotide_range.find_last_of(':');
const size_t foundLastColon = nucleotide_range.find_last_of(':');
std::string path_name;
if (foundFirstColon != string::npos) {
path_name = nucleotide_range.substr(0, foundFirstColon);
if (foundLastColon != string::npos) {
path_name = nucleotide_range.substr(0, foundLastColon);

if (!graph.has_path(path_name)) {
std::cerr
Expand All @@ -283,7 +283,7 @@ namespace odgi {
return 1;
}

nucleotide_range = nucleotide_range.substr(foundFirstColon + 1);
nucleotide_range = nucleotide_range.substr(foundLastColon + 1);
}

const std::regex regex("-");
Expand Down

0 comments on commit bdd1be4

Please sign in to comment.