Skip to content

Commit

Permalink
Simplify write_links
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris7 committed Oct 29, 2024
1 parent 5727f3e commit 065e000
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 36 deletions.
45 changes: 10 additions & 35 deletions src/exports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,8 @@ pub fn export_gfa(
let file = File::create(filename).unwrap();
let mut writer = BufWriter::new(file);

let node_sequence_starts_by_end_coordinate = blocks
.iter()
.filter(|block| !Node::is_terminal(block.node_id))
.map(|block| ((block.node_id, block.end), block.start))
.collect::<HashMap<(i64, i64), i64>>();
write_segments(&mut writer, &blocks);
write_links(
&mut writer,
&graph,
&edges_by_node_pair,
node_sequence_starts_by_end_coordinate,
);
write_links(&mut writer, &graph);
write_paths(&mut writer, conn, collection_name, &blocks);
}

Expand All @@ -102,35 +92,20 @@ fn segment_line(sequence: &str, node_id: i64, sequence_start: i64) -> String {
format!("S\t{}.{}\t{}\t*\n", node_id, sequence_start, sequence,)
}

fn write_links(
writer: &mut BufWriter<File>,
graph: &DiGraphMap<GraphNode, GraphEdge>,
edges_by_node_pair: &HashMap<(i64, i64), Edge>,
node_sequence_starts_by_end_coordinate: HashMap<(i64, i64), i64>,
) {
for (source, target, _edge_weight) in graph.all_edges() {
let edge = edges_by_node_pair
.get(&(source.block_id, target.block_id))
.unwrap();
if Node::is_terminal(edge.source_node_id) || Node::is_terminal(edge.target_node_id) {
fn write_links(writer: &mut BufWriter<File>, graph: &DiGraphMap<GraphNode, GraphEdge>) {
for (source, target, edge_info) in graph.all_edges() {
if Node::is_terminal(source.node_id) || Node::is_terminal(target.node_id) {
continue;
}
// Since we're encoding a segment ID as node ID + sequence start coordinate, we need to do
// one step of translation to get that for an edge's source. The edge's source is the node
// ID + sequence end coordinate, so the following line converts that to the sequence start
// coordinate.
let sequence_start = node_sequence_starts_by_end_coordinate
.get(&(edge.source_node_id, edge.source_coordinate))
.unwrap();
writer
.write_all(
&link_line(
edge.source_node_id,
*sequence_start,
edge.source_strand,
edge.target_node_id,
edge.target_coordinate,
edge.target_strand,
source.node_id,
source.sequence_start,
edge_info.source_strand,
target.node_id,
target.sequence_start,
edge_info.target_strand,
)
.into_bytes(),
)
Expand Down
3 changes: 3 additions & 0 deletions src/graph.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::hash::Hash;
use std::iter::from_fn;

use crate::models::strand::Strand;
use petgraph::visit::{IntoNeighborsDirected, NodeCount};
use petgraph::Direction;

Expand All @@ -23,6 +24,8 @@ pub struct GraphEdge {
pub edge_id: i64,
pub chromosome_index: i64,
pub phased: i64,
pub source_strand: Strand,
pub target_strand: Strand,
}

// hacked from https://docs.rs/petgraph/latest/src/petgraph/algo/simple_paths.rs.html#36-102 to support digraphmap
Expand Down
2 changes: 2 additions & 0 deletions src/models/edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ impl Edge {
chromosome_index: edge.chromosome_index,
phased: edge.phased,
edge_id: edge.id,
source_strand: edge.source_strand,
target_strand: edge.target_strand,
},
);
edges_by_node_pair.insert((*source_id_value, *target_id_value), edge.clone());
Expand Down
2 changes: 1 addition & 1 deletion src/models/strand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use rusqlite::ToSql;
use serde::{Deserialize, Serialize};
use std::fmt;

#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize, Ord, PartialOrd)]
pub enum Strand {
Forward,
Reverse,
Expand Down

0 comments on commit 065e000

Please sign in to comment.