diff --git a/.github/workflows/build_and_test_on_push.yml b/.github/workflows/build_and_test_on_push.yml index 582a1b65..7327a234 100644 --- a/.github/workflows/build_and_test_on_push.yml +++ b/.github/workflows/build_and_test_on_push.yml @@ -6,7 +6,7 @@ jobs: build_and_test: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install required packages run: sudo apt-get update && sudo apt-get install -y git diff --git a/.github/workflows/publish_docker_hub.yml b/.github/workflows/publish_docker_hub.yml index b06f46e2..66dde160 100644 --- a/.github/workflows/publish_docker_hub.yml +++ b/.github/workflows/publish_docker_hub.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Docker login env: diff --git a/.gitignore b/.gitignore index 5570830b..3f92b884 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,7 @@ docs/sphinx_build docs/sphinx_build_man docs/_build Testing/ -.idea/ \ No newline at end of file +.idea/ +.vscode/ +.cmake/ +cmake-build-debug/ \ No newline at end of file diff --git a/src/algorithms/atomic_image.cpp b/src/algorithms/atomic_image.cpp index 870e2759..9efaa31e 100644 --- a/src/algorithms/atomic_image.cpp +++ b/src/algorithms/atomic_image.cpp @@ -39,6 +39,10 @@ std::ostream& operator<<(std::ostream& out, const color_t& c) { return out; } +bool operator==(const color_t& a, const color_t& b) { + return a.c.r == b.c.r && a.c.g == b.c.g && a.c.b == b.c.b && a.c.a == b.c.a; +} + color_t lighten(const color_t& c, const double& f) { return layer(c, COLOR_WHITE, f); } @@ -99,10 +103,20 @@ std::string to_rgba(const color_t& c) { ss << (int)c.c.r << ","; ss << (int)c.c.g << ","; ss << (int)c.c.b << ","; - ss << (int)c.c.a << ")"; + ss << (int)c.c.a << ")"; return ss.str(); } +std::string to_hexrgb(const color_t& c) { + std::stringstream ss; + ss << "#"; + ss << std::hex << std::uppercase; // Use hexadecimal format + ss << std::setfill('0') << std::setw(2) << (int)c.c.r; + ss << std::setfill('0') << std::setw(2) << (int)c.c.g; + ss << std::setfill('0') << std::setw(2) << (int)c.c.b; + return ss.str(); +} + // helpers double u_ipart(double x) { return std::floor(x); } diff --git a/src/algorithms/atomic_image.hpp b/src/algorithms/atomic_image.hpp index 05472902..ac218933 100644 --- a/src/algorithms/atomic_image.hpp +++ b/src/algorithms/atomic_image.hpp @@ -85,6 +85,7 @@ typedef union rgb_t { } color_t; std::ostream& operator<<(std::ostream& out, const color_t& c); +bool operator==(const color_t& a, const color_t& b); color_t hash_color(const std::string& s); color_t lighten(const color_t& c, const double& f); @@ -95,6 +96,7 @@ color_t mix(const color_t& a, const color_t& b, const double& f); std::string to_hex(const color_t& c); std::string to_rgba(const color_t& c); +std::string to_hexrgb(const color_t& c); const color_t COLOR_BLACK = { 0xff000000 }; const color_t COLOR_LIGHTGRAY = { 0xffD3D3D3 }; diff --git a/src/algorithms/draw.cpp b/src/algorithms/draw.cpp index 7d606c8f..e95c2d0b 100644 --- a/src/algorithms/draw.cpp +++ b/src/algorithms/draw.cpp @@ -1,4 +1,5 @@ #include "draw.hpp" +#include "split.hpp" namespace odgi { @@ -98,6 +99,104 @@ void get_layout(const std::vector &X, } +struct label_info_t { + double x, y; + std::string content; + // Simple constructor for convenience + label_info_t(double x, double y, std::string content) : x(x), y(y), content(std::move(content)) {} +}; +bool is_too_close(double x, double y, const std::string& content, double threshold, std::vector& placed_labels) { + for (const auto& label : placed_labels) { + if (label.content == content && std::abs(label.x - x) < threshold && std::abs(label.y - y) < threshold) { + return true; // Found a label too close with the same content + } + } + return false; +} + +uint64_t node_hash(const nid_t& node_id) { + uint64_t x = node_id; + x = (~x) + (x << 21); // x = (x << 21) - x - 1; + x = x ^ (x >> 24); + x = (x + (x << 3)) + (x << 8); // x * 265 + x = x ^ (x >> 14); + x = (x + (x << 2)) + (x << 4); // x * 21 + x = x ^ (x >> 28); + x = x + (x << 31); + return x; +} +bool keep_node(const nid_t& node_id, const float f) { + // hash the node_id and check if it's accepted given our sparsification factor + return node_hash(node_id) < std::numeric_limits::max() * f; +} +// Define a struct to hold the coordinates for simplicity +struct Coordinates { + double x1, y1, x2, y2; +}; + +// Function to adjust node length +Coordinates adjustNodeLength(double x1, double y1, double x2, double y2, double scale, double x_off, double y_off, double sparsification_factor) { + // Apply scale and offsets to original coordinates + x1 = (x1 * scale) - x_off; + y1 = (y1 * scale) + y_off; + x2 = (x2 * scale) - x_off; + y2 = (y2 * scale) + y_off; + + // Calculate the original length + double length = sqrt(pow(x2 - x1, 2) + pow(y2 - y1, 2)); + + // Adjust length based on 1.0 / sparsification_factor + double new_length = sparsification_factor == 0 ? length : length * (1.0 / sparsification_factor); + + // Calculate the midpoint + double mid_x = (x1 + x2) / 2.0; + double mid_y = (y1 + y2) / 2.0; + + // Calculate the unit vector for the direction + double unit_x = (x2 - x1) / length; + double unit_y = (y2 - y1) / length; + + // Calculate new endpoints using the new length + double half_new_length = new_length / 2.0; + double new_x1 = mid_x - half_new_length * unit_x; + double new_y1 = mid_y - half_new_length * unit_y; + double new_x2 = mid_x + half_new_length * unit_x; + double new_y2 = mid_y + half_new_length * unit_y; + + // Return the new coordinates + return Coordinates{new_x1, new_y1, new_x2, new_y2}; +} +Coordinates adjustNodeEndpoints(const handle_t& handle, const std::vector& X, const std::vector& Y, double scale, double x_off, double y_off, double sparsification_factor, bool lengthen_left_nodes) { + // Original coordinates + uint64_t a = 2 * number_bool_packing::unpack_number(handle); + double x1 = (X[a] * scale) - x_off; + double y1 = (Y[a] * scale) + y_off; + double x2 = (X[a + 1] * scale) - x_off; + double y2 = (Y[a + 1] * scale) + y_off; + + // Calculate the original length + double length = std::sqrt(std::pow(x2 - x1, 2) + std::pow(y2 - y1, 2)); + + // Adjust length based on 1.0 / sparsification_factor + double new_length = !lengthen_left_nodes || sparsification_factor == 0 ? length : length * (1.0 / sparsification_factor); + + // Calculate the midpoint + double mid_x = (x1 + x2) / 2.0; + double mid_y = (y1 + y2) / 2.0; + + // Calculate the unit vector for the direction + double unit_x = (x2 - x1) / length; + double unit_y = (y2 - y1) / length; + + // Calculate new endpoints using the new length + double half_new_length = new_length / 2.0; + double new_x1 = mid_x - half_new_length * unit_x; + double new_y1 = mid_y - half_new_length * unit_y; + double new_x2 = mid_x + half_new_length * unit_x; + double new_y2 = mid_y + half_new_length * unit_y; + + return Coordinates{new_x1, new_y1, new_x2, new_y2}; +} void draw_svg(std::ostream &out, const std::vector &X, @@ -106,7 +205,10 @@ void draw_svg(std::ostream &out, const double& scale, const double& border, const double& line_width, - std::vector& node_id_to_color) { + std::vector& node_id_to_color, + ska::flat_hash_map>& node_id_to_label_map, + const float& sparsification_factor, + const bool& lengthen_left_nodes) { std::vector> weak_components; coord_range_2d_t rendered_range; @@ -121,6 +223,8 @@ void draw_svg(std::ostream &out, double width = rendered_range.width(); double height = rendered_range.height(); + std::vector placed_labels; + out << std::setprecision(std::numeric_limits::digits10 + 1); out << " highlights; + for (auto& handle : component) { - uint64_t a = 2 * number_bool_packing::unpack_number(handle); algorithms::color_t color = node_id_to_color.empty() ? COLOR_BLACK : node_id_to_color[graph.get_id(handle)]; + + if (!(sparsification_factor == 0 || keep_node(graph.get_id(handle), sparsification_factor) || node_id_to_label_map.count(graph.get_id(handle)))) { + continue; // Skip this node to output a lighter SVG (do not nodes with labels, if any) + } + + Coordinates newEndpoints = adjustNodeEndpoints(handle, X, Y, scale, x_off, y_off, sparsification_factor, lengthen_left_nodes); + + if (color == COLOR_BLACK || color == COLOR_LIGHTGRAY) { + out << "" + << std::endl; + } else { + highlights.push_back(handle); + } + + // Check if this is a node with a label + if (node_id_to_label_map.count(graph.get_id(handle))){ + // Collect the labels that can be put without overlapping identical ones + std::vector labels; + for (auto text : node_id_to_label_map[graph.get_id(handle)]){ + if (!is_too_close(newEndpoints.x2, newEndpoints.y2, text, 30.0, placed_labels)) { + labels.push_back(text); + } + } + // Check if there is something to label + if (!labels.empty()){ + out << ""; + for (auto text : labels){ + out << "" << text << ""; + placed_labels.emplace_back(newEndpoints.x2, newEndpoints.y2, text); // Record the label's placement + } + out << "" + << std::endl; + } + } + } + + // color highlights + for (auto& handle : highlights) { + Coordinates newEndpoints = adjustNodeEndpoints(handle, X, Y, scale, x_off, y_off, sparsification_factor, lengthen_left_nodes); + algorithms::color_t color = node_id_to_color.empty() ? COLOR_BLACK : node_id_to_color[graph.get_id(handle)]; out << "" << std::endl; - } } @@ -208,11 +363,18 @@ std::vector rasterize(const std::vector &X, source_min_x, source_min_y); auto range_itr = component_ranges.begin(); + struct draw_target_t { + xy_d_t xy0; + xy_d_t xy1; + algorithms::color_t color; + }; + for (auto& component : weak_components) { auto& range = *range_itr++; auto& x_off = range.x_offset; auto& y_off = range.y_offset; -#pragma omp parallel for + std::vector highlights; +//#pragma omp parallel for for (uint64_t i = 0; i < component.size(); ++i) { const handle_t& handle = component[i]; uint64_t a = 2 * number_bool_packing::unpack_number(handle); @@ -252,9 +414,18 @@ std::vector rasterize(const std::vector &X, */ const algorithms::color_t node_color = !node_id_to_color.empty() ? node_id_to_color[graph.get_id(handle)] : COLOR_BLACK; - wu_calc_wide_line(xy0, xy1, node_color, image, line_width); + // if gray or black color, otherwise save for later + if (node_color == COLOR_BLACK || node_color == COLOR_LIGHTGRAY) { + wu_calc_wide_line(xy0, xy1, node_color, image, line_width); + } else { + highlights.push_back({xy0, xy1, node_color}); + } } } + // color highlights + for (auto& highlight : highlights) { + wu_calc_wide_line(highlight.xy0, highlight.xy1, highlight.color, image, line_width); + } } // todo, edges, paths, coverage, bins diff --git a/src/algorithms/draw.hpp b/src/algorithms/draw.hpp index 6315e90f..7cff95ff 100644 --- a/src/algorithms/draw.hpp +++ b/src/algorithms/draw.hpp @@ -71,7 +71,10 @@ void draw_svg(std::ostream &out, const double& scale, const double& border, const double& line_width, - std::vector& node_id_to_color); + std::vector& node_id_to_color, + ska::flat_hash_map>& node_id_to_label_map, + const float& sparsification_factor, + const bool& lengthen_left_nodes); std::vector rasterize(const std::vector &X, const std::vector &Y, diff --git a/src/position.hpp b/src/position.hpp index 9aa1ac4d..69078f8c 100644 --- a/src/position.hpp +++ b/src/position.hpp @@ -88,6 +88,15 @@ struct path_range_t { std::string data; }; +struct path_range_comparator { + bool operator() (const path_range_t& lhs, const path_range_t& rhs) const { + if (lhs.begin.path != rhs.begin.path) return lhs.begin.path < rhs.begin.path; + if (lhs.end.path != rhs.end.path) return lhs.end.path < rhs.end.path; + if (lhs.begin.offset != rhs.begin.offset) return lhs.begin.offset < rhs.begin.offset; + return lhs.end.offset < rhs.end.offset; + } +}; + inline std::string& get_long_path_name(std::tuple path_long_start_end) { return std::get<0>(path_long_start_end); } diff --git a/src/subcommand/draw_main.cpp b/src/subcommand/draw_main.cpp index 620e69d2..86df9ccc 100644 --- a/src/subcommand/draw_main.cpp +++ b/src/subcommand/draw_main.cpp @@ -41,9 +41,9 @@ int main_draw(int argc, char **argv) { args::ValueFlag png_height(visualizations_opts, "FILE", "Height of PNG rendering (default: 1000).", {'H', "png-height"}); args::ValueFlag png_border(visualizations_opts, "FILE", "Size of PNG border in bp (default: 10).", {'E', "png-border"}); args::Flag color_paths(visualizations_opts, "color-paths", "Color paths (in PNG output).", {'C', "color-paths"}); - args::ValueFlag render_scale(visualizations_opts, "N", "Image scaling (default 0.001).", {'R', "scale"}); + args::ValueFlag svg_render_scale(visualizations_opts, "N", "SVG image scaling (default 0.01).", {'R', "scale"}); args::ValueFlag render_border(visualizations_opts, "N", "Image border (in approximate bp) (default 100.0).", {'B', "border"}); - args::ValueFlag png_line_width(visualizations_opts, "N", "Line width (in approximate bp) (default 0.0).", {'w', "line-width"}); + args::ValueFlag png_line_width(visualizations_opts, "N", "Line width (in approximate bp) (default 10.0).", {'w', "line-width"}); //args::ValueFlag png_line_overlay(parser, "N", "line width (in approximate bp) (default 10.0)", {'O', "line-overlay"}); args::ValueFlag png_path_line_spacing(visualizations_opts, "N", "Spacing between path lines in PNG layout (in approximate bp) (default 0.0).", {'S', "path-line-spacing"}); args::ValueFlag _path_bed_file(visualizations_opts, "FILE", @@ -51,6 +51,8 @@ int main_draw(int argc, char **argv) { "Colors are derived from the 4th column, if present, else from the path name." "If the 4th column value is in the format 'string#RRGGBB', the RRGGBB color (in hex notation) will be used.", {'b', "bed-file"}); + args::ValueFlag node_sparsification(visualizations_opts, "N", "Remove this fraction of nodes from the SVG output (to output smaller files) (default: 0.0, keep all nodes).", {'f', "svg-sparse-factor"}); + args::Flag lengthen_left_nodes(visualizations_opts, "lengthen", "When node sparsitication is active, lengthen the remaining nodes proportionally with the sparsification factor", {'l', "svg-lengthen-nodes"}); args::Group threading(parser, "[ Threading ]"); args::ValueFlag nthreads(threading, "N", "Number of threads to use for parallel operations.", {'t', "threads"}); args::Group processing_info_opts(parser, "[ Processing Information ]"); @@ -94,6 +96,12 @@ int main_draw(int argc, char **argv) { return 1; } + const float sparse_nodes = node_sparsification ? args::get(node_sparsification) : 0.0; + if (sparse_nodes < 0.0 || sparse_nodes > 1.0) { + std::cerr << "[odgi::draw] error: -f/--svg-sparse-factor must be in the range [0.0, 1.0]." << std::endl; + return 1; + } + const uint64_t num_threads = args::get(nthreads) ? args::get(nthreads) : 1; graph_t graph; @@ -104,7 +112,7 @@ int main_draw(int argc, char **argv) { if (infile == "-") { graph.deserialize(std::cin); } else { - utils::handle_gfa_odgi_input(infile, "draw", args::get(progress), num_threads, graph); + utils::handle_gfa_odgi_input(infile, "draw", lengthen_left_nodes, num_threads, graph); } } } @@ -117,6 +125,8 @@ int main_draw(int argc, char **argv) { // handle targets from BED std::vector path_ranges; std::vector node_id_to_color; + ska::flat_hash_map> node_id_to_label_map; // To remember the unique node to label for each path range + if (_path_bed_file && !args::get(_path_bed_file).empty()) { std::ifstream bed_in(args::get(_path_bed_file)); std::string line; @@ -137,6 +147,8 @@ int main_draw(int argc, char **argv) { if (!path_range.name.empty()) { auto vals = split(path_range.name, '#'); if (vals.size() == 2 && vals[1].length() == 6) { + path_range.name = vals[0]; // Remove the color from the name + // Colors are given in RRGGBB in the BED file, but they are taken in BBGGRR, so we need to switch BB/RR char temp = vals[1][0]; @@ -157,24 +169,27 @@ int main_draw(int argc, char **argv) { } } - - - - + bool first_handle_taken = path_range.name.empty(); // To avoid checking if there is no name to take algorithms::for_handle_in_path_range( graph, path_handle, path_range.begin.offset, path_range.end.offset, [&](const handle_t& handle) { - node_id_to_color[graph.get_id(handle)] = path_color; + const auto node_id = graph.get_id(handle); + node_id_to_color[node_id] = path_color; + + if (!first_handle_taken) { + first_handle_taken = true; + // The set automatically handles uniqueness of labels within the set. + node_id_to_label_map[node_id].insert(path_range.name); + } }); } } } const uint64_t _png_height = png_height ? args::get(png_height) : 1000; - const double _png_line_width = png_line_width ? args::get(png_line_width) : 0; + const double _png_line_width = png_line_width ? args::get(png_line_width) : 10.0; const bool _color_paths = args::get(color_paths); const double _png_path_line_spacing = png_path_line_spacing ? args::get(png_path_line_spacing) : 0.0; - const double svg_scale = !render_scale ? 0.01 : args::get(render_scale); size_t max_node_depth = 0; graph.for_each_handle( [&](const handle_t& h) { @@ -211,12 +226,13 @@ int main_draw(int argc, char **argv) { } if (svg_out_file) { + const double svg_scale = !svg_render_scale ? 0.01 : args::get(svg_render_scale); auto& outfile = args::get(svg_out_file); ofstream f(outfile.c_str()); // todo could be done with callbacks std::vector X = layout.get_X(); std::vector Y = layout.get_Y(); - algorithms::draw_svg(f, X, Y, graph, svg_scale, border_bp, _png_line_width, node_id_to_color); + algorithms::draw_svg(f, X, Y, graph, svg_scale, border_bp, _png_line_width, node_id_to_color, node_id_to_label_map, sparse_nodes, args::get(lengthen_left_nodes)); f.close(); } diff --git a/src/subcommand/extract_main.cpp b/src/subcommand/extract_main.cpp index ce0ecc02..e900a28d 100644 --- a/src/subcommand/extract_main.cpp +++ b/src/subcommand/extract_main.cpp @@ -556,6 +556,17 @@ namespace odgi { return std::binary_search(source_paths_from_path_ranges.begin(), source_paths_from_path_ranges.end(), x); }), source_paths->end()); + // We don't cut nodes for the extraction, so close path intervals can generate identical subpaths. + // To avoid duplicated subpaths in the final subgraph, we remove duplicated path ranges. + { + std::set unique_path_ranges; + + for (const auto& path_range : path_ranges) { + unique_path_ranges.insert(path_range); + } + + path_ranges.assign(unique_path_ranges.begin(), unique_path_ranges.end()); + } if (max_dist_subpaths > 0) { // Iterate multiple times to merge subpaths which became mergeable during the first iteration where new nodes were added diff --git a/src/subcommand/paths_main.cpp b/src/subcommand/paths_main.cpp index e9f43b6d..48c3415f 100644 --- a/src/subcommand/paths_main.cpp +++ b/src/subcommand/paths_main.cpp @@ -548,6 +548,22 @@ int main_paths(int argc, char** argv) { step_range.push_back(step); } }); + + // Emit last non reference range, if any + if (end > start && (end - start) >= min_size_in_bp) { + if (_show_step_ranges) { + std::string step_range_str = ""; + for (auto& step : step_range) { + const handle_t handle = graph.get_handle_of_step(step); + step_range_str += std::to_string(graph.get_id(handle)) + (graph.get_is_reverse(handle) ? "-" : "+") + ","; + } + #pragma omp critical (cout) + std::cout << graph.get_path_name(path) << "\t" << start << "\t" << end << "\t" << step_range_str.substr(0, step_range_str.size() - 1) << std::endl; // trim the trailing comma from step_range + } else { + #pragma omp critical (cout) + std::cout << graph.get_path_name(path) << "\t" << start << "\t" << end << std::endl; + } + } } } }