Skip to content

Commit

Permalink
Merge pull request #204 from vgteam/why-unchop-why
Browse files Browse the repository at this point in the history
Why unchop why
  • Loading branch information
ekg authored Dec 17, 2020
2 parents 706c372 + 1ec3767 commit a33c73a
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 36 deletions.
2 changes: 1 addition & 1 deletion src/algorithms/topological_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ std::vector<handle_t> topological_order(const HandleGraph* g, bool use_heads, bo
max_handle_rank = std::max(max_handle_rank,
number_bool_packing::unpack_number(found));
});
for (uint64_t i = 0; i <= max_handle_rank; ++i) {
for (uint64_t i = 0; i <= max_handle_rank+1; ++i) {
s.push_back(0);
masked_edges_bv.push_back(1);
masked_edges_bv.push_back(1);
Expand Down
10 changes: 1 addition & 9 deletions src/algorithms/unchop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@

#include "unchop.hpp"

#include <unordered_set>
#include <list>
#include <set>
#include <iostream>
#include <sstream>
#include <atomic>
#include "ips4o.hpp"

namespace odgi {
namespace algorithms {

Expand Down Expand Up @@ -388,4 +380,4 @@ namespace odgi {
return ok.load();
}
}
}
}
9 changes: 8 additions & 1 deletion src/algorithms/unchop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@
#include <handlegraph/util.hpp>
#include <handlegraph/mutable_path_deletable_handle_graph.hpp>
#include <vector>

#include <unordered_set>
#include <list>
#include <set>
#include <iostream>
#include <sstream>
#include <atomic>

#include "ips4o.hpp"
#include "simple_components.hpp"

namespace odgi {
Expand Down
5 changes: 3 additions & 2 deletions src/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,9 @@ void node_t::apply_ordering(
uint64_t j = 0;
for (uint64_t i = 0; i < decoding.size(); ++i) {
uint64_t old_id = decode(i);
if (old_id) {
dec_v.push_back(get_new_id(old_id));
uint64_t new_id = old_id ? get_new_id(old_id) : 0;
if (new_id) {
dec_v.push_back(new_id);
encoding_map.push_back(j++);
} else {
// this means that the node referred to by this entry has been deleted
Expand Down
49 changes: 26 additions & 23 deletions src/odgi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ step_handle_t graph_t::get_next_step(const step_handle_t& step_handle) const {
auto step_rank = as_integers(step_handle)[1];
if (node.step_is_end(step_rank)) {
node.clear_lock();
return path_front_end(get_path_handle_of_step(step_handle));
return path_end(get_path_handle_of_step(step_handle));
}
nid_t next_id = node.step_next_id(step_rank);
auto next_rank = node.step_next_rank(step_rank);
Expand Down Expand Up @@ -762,30 +762,19 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
}

// establish id mapping
uint64_t max_handle_rank = 0;
uint64_t min_handle_rank = std::numeric_limits<uint64_t>::max();
{
uint64_t tmp;
for_each_handle([&](const handle_t& handle) {
tmp = number_bool_packing::unpack_number(handle);
max_handle_rank = std::max(max_handle_rank, tmp);
min_handle_rank = std::min(min_handle_rank, tmp);
});
}

std::vector<std::pair<nid_t, bool>> ids;
// fill even for deleted nodes
// fill even for deleted nodes, which we map to 0
ids.resize(node_v.size(), std::make_pair(0, false));

if (compact_ids) {
for (uint64_t i = 0; i < order->size(); ++i) {
ids[number_bool_packing::unpack_number(order->at(i)) - min_handle_rank] =
ids[number_bool_packing::unpack_number(order->at(i))] =
std::make_pair(i+1,
get_is_reverse(order->at(i)));
}
} else {
for (auto handle : *order) {
ids[number_bool_packing::unpack_number(handle) - min_handle_rank] =
ids[number_bool_packing::unpack_number(handle)] =
std::make_pair(get_id(handle),
get_is_reverse(handle));
}
Expand All @@ -794,11 +783,11 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
// helpers to map from current to new id and orientation
auto get_new_id =
[&](uint64_t id) {
return ids[id - 1 - min_handle_rank].first;
return ids[id - 1].first;
};
auto to_flip =
[&](uint64_t id) {
return ids[id - 1 - min_handle_rank].second;
return ids[id - 1].second;
};

// nodes, edges, and path steps
Expand Down Expand Up @@ -845,13 +834,27 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
}

// now we actually apply the ordering to our node_v, while removing deleted slots
std::vector<node_t*> new_node_v(order->size());
uint64_t j = 0;
for (uint64_t i = 0; i < node_v.size(); ++i) {
if (node_v[i] != nullptr) {
auto h = (*order)[j];
new_node_v[j++] = &get_node_ref(h);
std::vector<node_t*> new_node_v; //(order->size());
if (compact_ids) {
uint64_t j = 0;
for (uint64_t i = 0; i < node_v.size(); ++i) {
if (node_v[i] != nullptr) {
auto h = (*order)[j++];
new_node_v.push_back(&get_node_ref(h));
}
}
_max_node_id = new_node_v.size();
} else {
uint64_t j = 0;
for (uint64_t i = 0; i < node_v.size(); ++i) {
if (node_v[i] != nullptr) {
auto h = (*order)[j++];
new_node_v.push_back(&get_node_ref(h));
} else {
new_node_v.push_back(nullptr);
}
}
_max_node_id = new_node_v.size();
}
node_v = new_node_v;
deleted_nodes.clear();
Expand Down
94 changes: 94 additions & 0 deletions src/unittest/simplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,99 @@ TEST_CASE("Graph simplification reduces a graph with a self inverting -/+ loop",
}
}

TEST_CASE("Graph simplification reduces a graph with a self inverting -/+ loop with paths", "[simplify]") {
graph_t graph;
handle_t n1 = graph.create_handle("CAAATAAG");
handle_t n2 = graph.create_handle("A");
handle_t n3 = graph.create_handle("G");
handle_t n4 = graph.create_handle("T");
handle_t n5 = graph.create_handle("C");
handle_t n6 = graph.create_handle("TTG");
graph.create_edge(n1, n2);
graph.create_edge(n2, n3);
graph.create_edge(graph.flip(n3), n3);
graph.create_edge(n3, n4);
graph.create_edge(n4, n5);
graph.create_edge(n5, n6);
path_handle_t p_x = graph.create_path_handle("x");
path_handle_t p_y = graph.create_path_handle("y");
for (auto& p : { p_x, p_y }) {
for (auto& h : { n1, n2, n3, n4, n5, n6 }) {
graph.append_step(p, h);
}
}
path_handle_t q_y = graph.create_path_handle("q");
for (auto& p : { q_y }) {
for (auto& h : { n6, n5, n4, n3, n2, n1 }) {
graph.append_step(p, graph.flip(h));
}
}

algorithms::unchop(graph);

uint64_t seen_steps = 0;
for (auto& p : { p_x, p_y, q_y }) {
step_handle_t begin = graph.path_begin(p);
step_handle_t end = graph.path_end(p);
for (step_handle_t step = begin;
step != end;
step = graph.get_next_step(step)) {
handle_t h = graph.get_handle_of_step(step);
++seen_steps;
}
}

// sort the graph
graph.apply_ordering(algorithms::topological_order(&graph), true);

// check that iteration still works
uint64_t seen_steps_after_sort = 0;
for (auto& p : { p_x, p_y, q_y }) {
step_handle_t begin = graph.path_begin(p);
step_handle_t end = graph.path_end(p);
for (step_handle_t step = begin;
step != end;
step = graph.get_next_step(step)) {
handle_t h = graph.get_handle_of_step(step);
++seen_steps_after_sort;
}
}

SECTION("The graph is as expected") {
REQUIRE(seen_steps == 6);
REQUIRE(seen_steps == seen_steps_after_sort);
REQUIRE(graph.get_sequence(graph.get_handle(1)) == "CAAATAAGA");
REQUIRE(graph.get_sequence(graph.get_handle(2)) == "GTCTTG");
REQUIRE(graph.has_edge(graph.get_handle(1), graph.get_handle(2)));
REQUIRE(graph.has_edge(graph.flip(graph.get_handle(2)), graph.get_handle(2)));
}

// sort the graph
auto order = algorithms::topological_order(&graph);
std::reverse(order.begin(), order.end());
graph.apply_ordering(order, true);
//graph.optimize(); // breaks!!

// check that iteration still works
uint64_t seen_steps_after_rev = 0;
for (auto& p : { p_x, p_y, q_y }) {
step_handle_t begin = graph.path_begin(p);
step_handle_t end = graph.path_end(p);
for (step_handle_t step = begin;
step != end;
step = graph.get_next_step(step)) {
handle_t h = graph.get_handle_of_step(step);
++seen_steps_after_rev;
}
}

SECTION("The graph is as expected after reverse sorting") {
REQUIRE(seen_steps == seen_steps_after_rev);
REQUIRE(graph.get_sequence(graph.get_handle(2)) == "CAAATAAGA");
REQUIRE(graph.get_sequence(graph.get_handle(1)) == "GTCTTG");
}

}

}
}

0 comments on commit a33c73a

Please sign in to comment.