Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add macos to CI #513

Closed
wants to merge 41 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
f2e09c8
add macos to CI
kojix2 Jun 21, 2023
85a22bf
update workflow to trigger on push and pull_request
kojix2 Jun 21, 2023
fa9b1c0
heaps: plot down to 0 on the y-axis
subwaystation Jun 22, 2023
07c0822
progress in `odgi pav`
AndreaGuarracino Jul 3, 2023
d650d92
fix vector size
AndreaGuarracino Jul 14, 2023
f2cf727
enable ASAN in the github workflow
AndreaGuarracino Jul 14, 2023
9782943
no ASAN for now
AndreaGuarracino Jul 14, 2023
811e969
-O option to optimize the extracted graph
AndreaGuarracino Aug 6, 2023
4ca7e85
-d is not mandatory anymore, 100kbp by default
AndreaGuarracino Aug 6, 2023
8a4c05b
fix bug in reference guided sorting
subwaystation Aug 28, 2023
ca39478
add `-n/--non-reference-paths` in `odgi paths`
AndreaGuarracino Aug 9, 2023
f480059
add `--non-reference-nodes`
AndreaGuarracino Aug 10, 2023
943d019
add `--min-size`
AndreaGuarracino Aug 13, 2023
bc8e268
add `--show-step-ranges` to show steps of non-ref ranges
AndreaGuarracino Aug 13, 2023
ff4c5c9
check there are no duplicated path ranges
AndreaGuarracino Aug 22, 2023
b57665e
fix typo
AndreaGuarracino Sep 9, 2023
938b2b7
a bit more aggressive sub path merging by default
AndreaGuarracino Sep 9, 2023
5571856
manage path range boundaries
AndreaGuarracino Sep 23, 2023
2e662f8
fix odgi pav seg fault
AndreaGuarracino Oct 24, 2023
d5f5299
fix seg faul on extracted graphs
AndreaGuarracino Oct 26, 2023
142b4e7
remove debugging code
AndreaGuarracino Oct 26, 2023
e5cebff
fix odgi crush description
AndreaGuarracino Nov 7, 2023
4cc4367
add nix build scripts
ekg Oct 26, 2023
82e0dcb
document nix build
ekg Oct 26, 2023
0682d1d
depth: option to use unique window depths
kdm9 Nov 18, 2023
a9a0233
make svg drawing work with colors
ekg Nov 29, 2023
c448498
update images
AndreaGuarracino Dec 4, 2023
9c03426
emit also node length
AndreaGuarracino Dec 4, 2023
245b6c6
fix nix build pkgconfig issue
ekg Dec 18, 2023
02958e2
deactivate 1D layout metrics in MultiQC mode
subwaystation Jan 8, 2024
4cbc1b4
sort layout power up
subwaystation Jan 31, 2024
d32b34d
fix line indents
subwaystation Jan 31, 2024
970a1f7
update MultiQC tutorial
subwaystation Feb 1, 2024
842d629
update sort layout tutorial
subwaystation Feb 1, 2024
3eb4224
update sort layout tutorial
subwaystation Feb 1, 2024
eb96c6b
correct copyright year
subwaystation Feb 1, 2024
f4eeb5b
typo
subwaystation Feb 1, 2024
6bf8674
Playing around with the 1D PG-SGD parameters
subwaystation Feb 1, 2024
6e7efd0
I-fell-lucky commit
AndreaGuarracino Feb 2, 2024
7bbc2af
python requirements
AndreaGuarracino Feb 2, 2024
5a6e834
sphinx-rtd-theme
AndreaGuarracino Feb 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add --non-reference-nodes
AndreaGuarracino authored and kojix2 committed Feb 6, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit f4800594db272616a90235250968acdd86faf809
132 changes: 90 additions & 42 deletions src/subcommand/paths_main.cpp
Original file line number Diff line number Diff line change
@@ -46,7 +46,8 @@ int main_paths(int argc, char** argv) {
args::ValueFlag<std::uint16_t> path_delim_pos(path_investigation_opts, "N", "Consider the N-th occurrence of the delimiter specified with **-D, --delim**"
" to obtain the group identifier. Specify 1 for the 1st occurrence (default).",
{'p', "delim-pos"});
args::ValueFlag<std::string> non_reference_paths(path_investigation_opts, "FILE", "Print to stdout (in BED format) path ranges that are not in the paths listed (by line) in *FILE*.", {'n', "non-reference-paths"});
args::ValueFlag<std::string> non_reference_nodes(path_investigation_opts, "FILE", "Print to stdout IDs of nodes that are not in the paths listed (by line) in *FILE*.", {"non-reference-nodes"});
args::ValueFlag<std::string> non_reference_ranges(path_investigation_opts, "FILE", "Print to stdout (in BED format) path ranges that are not in the paths listed (by line) in *FILE*.", {"non-reference-ranges"});
args::Group path_modification_opts(parser, "[ Path Modification Options ]");
args::ValueFlag<std::string> keep_paths_file(path_modification_opts, "FILE", "Keep paths listed (by line) in *FILE*.", {'K', "keep-paths"});
args::ValueFlag<std::string> drop_paths_file(path_modification_opts, "FILE", "Drop paths listed (by line) in *FILE*.", {'X', "drop-paths"});
@@ -93,6 +94,11 @@ int main_paths(int argc, char** argv) {
return 1;
}

if (non_reference_nodes && non_reference_ranges) {
std::cerr << "[odgi::paths] error: specify --non-reference-nodes or non-reference-ranges, not both." << std::endl;
return 1;
}

const uint64_t num_threads = args::get(threads) ? args::get(threads) : 1;
omp_set_num_threads(num_threads);

@@ -348,7 +354,10 @@ int main_paths(int argc, char** argv) {
}
}

if (non_reference_paths && !args::get(non_reference_paths).empty()) {
if (
(non_reference_nodes && !args::get(non_reference_nodes).empty()) ||
(non_reference_ranges && !args::get(non_reference_ranges).empty())
) {
// Check if the node IDs are compacted
const uint64_t shift = graph.min_node_id();
if (graph.max_node_id() - shift >= graph.get_node_count()){
@@ -359,7 +368,7 @@ int main_paths(int argc, char** argv) {
// Read paths to use as reference paths
std::vector<path_handle_t> reference_paths;
std::string line;
auto& x = args::get(non_reference_paths);
auto& x = non_reference_nodes && !args::get(non_reference_nodes).empty() ? args::get(non_reference_nodes) : args::get(non_reference_ranges);
std::ifstream infile(x);
while (std::getline(infile, line)) {
// This file should contain path names, one per line
@@ -373,50 +382,89 @@ int main_paths(int argc, char** argv) {
}
}

// Set the reference nodes
atomicbitvector::atomic_bv_t reference_nodes(graph.get_node_count()+1);
if (non_reference_nodes && !args::get(non_reference_nodes).empty()){
// Emit non-reference nodes

// Set non-reference nodes
atomicbitvector::atomic_bv_t non_reference_nodes(graph.get_node_count());
for(uint64_t i = 0; i < non_reference_nodes.size(); i++) {
non_reference_nodes.set(i);
}
#pragma omp parallel for schedule(dynamic,1)
for (auto &path : reference_paths) {
graph.for_each_step_in_path(path, [&](const step_handle_t& step) {
const handle_t handle = graph.get_handle_of_step(step);
reference_nodes.set(graph.get_id(handle) - shift);
});
}
for (auto &path : reference_paths) {
graph.for_each_step_in_path(path, [&](const step_handle_t& step) {
const handle_t handle = graph.get_handle_of_step(step);
non_reference_nodes.reset(graph.get_id(handle) - shift);
});
}

std::vector<path_handle_t> non_reference_paths;
graph.for_each_path_handle([&non_reference_paths](const path_handle_t& path) {
non_reference_paths.push_back(path);
});

// Prepare non reference path handles for parallel processing
std::sort(non_reference_paths.begin(), non_reference_paths.end());
std::sort(reference_paths.begin(), reference_paths.end());

non_reference_paths.erase(
std::remove_if(non_reference_paths.begin(), non_reference_paths.end(),
[&reference_paths](const auto &x) {
return std::binary_search(reference_paths.begin(), reference_paths.end(), x);
}), non_reference_paths.end());

// Traverse non reference paths to emit non reference ranges
#pragma omp parallel for schedule(dynamic, 1)
for (auto& path : non_reference_paths) {
uint64_t start = 0, end = 0;
graph.for_each_step_in_path(path, [&](const step_handle_t& step) {
const handle_t handle = graph.get_handle_of_step(step);
const uint64_t index = graph.get_id(handle) - shift;
if (reference_nodes.test(index)) {
// Emit the previous non reference range, if any
if (end > start) {
#pragma omp critical (cout)
std::cout << graph.get_path_name(path) << "\t" << start << "\t" << end << std::endl;
// Emit non-reference nodes
std::cout << "#node.id\tpaths" << std::endl;
for (auto x : non_reference_nodes) {
const handle_t handle = graph.get_handle(x + shift);

// Check paths that go through this node, if any
std::unordered_set<path_handle_t> unique_path_handles;
graph.for_each_step_on_handle(handle, [&](const step_handle_t& step) {
unique_path_handles.insert(graph.get_path_handle_of_step(step));
});
std::string result;
for (const auto& path : unique_path_handles) {
if (!result.empty()) {
result += ",";
}
end += graph.get_length(handle);
start = end;
} else {
end += graph.get_length(handle);
result += graph.get_path_name(path);
}

std::cout << graph.get_id(handle) << "\t" << result << std::endl;
}
} else {
// Emit non-reference ranges

// Set the reference nodes
atomicbitvector::atomic_bv_t reference_nodes(graph.get_node_count());
#pragma omp parallel for schedule(dynamic,1)
for (auto &path : reference_paths) {
graph.for_each_step_in_path(path, [&](const step_handle_t& step) {
const handle_t handle = graph.get_handle_of_step(step);
reference_nodes.set(graph.get_id(handle) - shift);
});
}

// Prepare non reference path handles for parallel processing
std::vector<path_handle_t> non_reference_paths;
graph.for_each_path_handle([&non_reference_paths](const path_handle_t& path) {
non_reference_paths.push_back(path);
});
std::sort(non_reference_paths.begin(), non_reference_paths.end());
std::sort(reference_paths.begin(), reference_paths.end());
non_reference_paths.erase(
std::remove_if(non_reference_paths.begin(), non_reference_paths.end(),
[&reference_paths](const auto &x) {
return std::binary_search(reference_paths.begin(), reference_paths.end(), x);
}), non_reference_paths.end());

// Traverse non reference paths to emit non-reference ranges
std::cout << "#path.name\tstart\tend" << std::endl;
#pragma omp parallel for schedule(dynamic, 1)
for (auto& path : non_reference_paths) {
uint64_t start = 0, end = 0;
graph.for_each_step_in_path(path, [&](const step_handle_t& step) {
const handle_t handle = graph.get_handle_of_step(step);
const uint64_t index = graph.get_id(handle) - shift;
if (reference_nodes.test(index)) {
// Emit the previous non reference range, if any
if (end > start) {
#pragma omp critical (cout)
std::cout << graph.get_path_name(path) << "\t" << start << "\t" << end << std::endl;
}
end += graph.get_length(handle);
start = end;
} else {
end += graph.get_length(handle);
}
});
}
}
}