diff --git a/Cargo.toml b/Cargo.toml index fc50628..7ec9267 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tcrdist_rs" -version = "0.1.5" +version = "0.1.6" edition = "2021" license = "MIT" description = "Rust tcrdist routines, including SMID-accelerated edit distances from triple_accel." diff --git a/src/distance.rs b/src/distance.rs index c850bef..7446953 100644 --- a/src/distance.rs +++ b/src/distance.rs @@ -278,6 +278,53 @@ pub fn tcrdist_many_to_many( } } +/// Compute the tcrdist between many strings and many other strings pairwise. +pub fn tcrdist_pairwise( + seqs1: &[&str], + seqs2: &[&str], + dist_weight: u16, + gap_penalty: u16, + ntrim: usize, + ctrim: usize, + fixed_gappos: bool, + parallel: bool, +) -> Vec { + if parallel == false { + let mut dists: Vec = vec![0; cmp::min(seqs1.len(), seqs2.len())]; + let mut counter: usize = 0; + for (&s1, &s2) in seqs1.iter().zip(seqs2.iter()) { + dists[counter] = tcrdist( + s1.as_bytes(), + s2.as_bytes(), + dist_weight, + gap_penalty, + ntrim, + ctrim, + fixed_gappos, + ); + counter += 1; + } + dists + } else { + POOL.install(|| { + seqs1 + .par_iter() + .zip(seqs2.par_iter()) + .map(|(&s1, &s2)| { + tcrdist( + s1.as_bytes(), + s2.as_bytes(), + dist_weight, + gap_penalty, + ntrim, + ctrim, + fixed_gappos, + ) + }) + .collect::>() + }) + } +} /// Compute the distance between V alleles which are written as byte strings. /// /// This function is memoized to speed up V alleles distance computations further. @@ -548,6 +595,64 @@ pub fn tcrdist_allele_many_to_many( }) } } +/// Compute the full tcrdist between many CDR3-V allele arrays and many others pairwise. +pub fn tcrdist_allele_pairwise( + seqs1: &[[&str; 2]], + seqs2: &[[&str; 2]], + phmc_weight: u16, + cdr1_weight: u16, + cdr2_weight: u16, + cdr3_weight: u16, + gap_penalty: u16, + ntrim: usize, + ctrim: usize, + fixed_gappos: bool, + parallel: bool, +) -> Vec { + if parallel == false { + let mut dists: Vec = vec![0; cmp::min(seqs1.len(), seqs2.len())]; + let mut counter: usize = 0; + + for (&s1, &s2) in seqs1.iter().zip(seqs2.iter()) { + dists[counter] = tcrdist_allele( + s1, + s2, + phmc_weight, + cdr1_weight, + cdr2_weight, + cdr3_weight, + gap_penalty, + ntrim, + ctrim, + fixed_gappos, + ); + counter += 1; + } + + dists + } else { + POOL.install(|| { + seqs1 + .par_iter() + .zip(seqs2.par_iter()) + .map(|(&s1, &s2)| { + tcrdist_allele( + s1, + s2, + phmc_weight, + cdr1_weight, + cdr2_weight, + cdr3_weight, + gap_penalty, + ntrim, + ctrim, + fixed_gappos, + ) + }) + .collect() + }) + } +} /// Compute the full tcrdist between two CDR3-V gene pairs. /// @@ -674,6 +779,38 @@ pub fn tcrdist_gene_many_to_many( } } +/// Compute the full tcrdist between many CDR3-V gene arrays and many others pairwise. +pub fn tcrdist_gene_pairwise( + seqs1: &[[&str; 2]], + seqs2: &[[&str; 2]], + ntrim: usize, + ctrim: usize, + parallel: bool, +) -> Vec { + if parallel == false { + let seqs1_len: usize = seqs1.len(); + let seqs2_len: usize = seqs2.len(); + let mut dists: Vec = vec![0; seqs1_len * seqs2_len]; + let mut counter: usize = 0; + + for &s1 in seqs1.iter() { + for &s2 in seqs2.iter() { + dists[counter] = tcrdist_gene(s1, s2, ntrim, ctrim); + counter += 1; + } + } + dists + } else { + POOL.install(|| { + seqs1 + .par_iter() + .zip(seqs2.par_iter()) + .map(|(&s1, &s2)| tcrdist_gene(s1, s2, ntrim, ctrim)) + .collect::>() + }) + } +} + /// Compute whether two CDR3-V gene arrays are tcrdist-gene neighbors. pub fn tcrdist_gene_neighbor( s1: [&str; 2], @@ -697,7 +834,7 @@ pub fn tcrdist_gene_neighbor( return false; } - // Stop computation if V gene distance and length difference are too different. + // Stop computation if V gene distance and length difference are too large. let v_gene_dist = match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); if v_gene_dist + len_diff > threshold { return false; @@ -712,7 +849,7 @@ pub fn tcrdist_gene_neighbor_matrix( ntrim: usize, ctrim: usize, parallel: bool, -) -> Vec<[usize; 2]> { +) -> Vec<[usize; 3]> { if parallel == false { seqs.iter() .enumerate() @@ -720,10 +857,32 @@ pub fn tcrdist_gene_neighbor_matrix( seqs[idx + 1..] .iter() .enumerate() - .filter(move |(_, &s2)| tcrdist_gene_neighbor(s1, s2, threshold, ntrim, ctrim)) - .map(move |(jdx, _)| [idx, idx + 1 + jdx]) + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let s1_bytes: &[u8] = s1[0].as_bytes(); + let s2_bytes: &[u8] = s2[0].as_bytes(); + let s1_len: usize = s1_bytes.len(); + let s2_len: usize = s2_bytes.len(); + let len_diff: u16 = if s1_len > s2_len { + (s1_len - s2_len) as u16 + } else { + (s2_len - s1_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = + match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist(s1_bytes, s2_bytes, 1, 4, ntrim, ctrim, false); + if dist <= threshold { + v.push([idx, jdx + 1 + idx, dist as usize]) + }; + } + } + v + }) }) - .collect() + .collect::>() } else { POOL.install(|| { seqs.par_iter() @@ -732,15 +891,117 @@ pub fn tcrdist_gene_neighbor_matrix( seqs[idx + 1..] .iter() .enumerate() - .filter(|(_, &s2)| tcrdist_gene_neighbor(s1, s2, threshold, ntrim, ctrim)) - .map(move |(jdx, _)| [idx, idx + 1 + jdx]) - .collect::>() + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let s1_bytes: &[u8] = s1[0].as_bytes(); + let s2_bytes: &[u8] = s2[0].as_bytes(); + let s1_len: usize = s1_bytes.len(); + let s2_len: usize = s2_bytes.len(); + let len_diff: u16 = if s1_len > s2_len { + (s1_len - s2_len) as u16 + } else { + (s2_len - s1_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = + match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist( + s1_bytes, s2_bytes, 1, 4, ntrim, ctrim, false, + ); + if dist <= threshold { + v.push([idx, jdx + 1 + idx, dist as usize]) + }; + } + } + v + }) }) - .collect() + .collect::>() }) } } +pub fn tcrdist_gene_neighbor_pairwise( + seqs1: &[[&str; 2]], + seqs2: &[[&str; 2]], + threshold: u16, + ntrim: usize, + ctrim: usize, + parallel: bool, +) -> Vec<[usize; 2]> { + if parallel == false { + seqs1 + .iter() + .enumerate() + .zip(seqs2.iter()) + .fold(Vec::new(), |mut v, ((idx, &s1), &s2)| { + let s1_bytes: &[u8] = s1[0].as_bytes(); + let s2_bytes: &[u8] = s2[0].as_bytes(); + let s1_len: usize = s1_bytes.len(); + let s2_len: usize = s2_bytes.len(); + let len_diff: u16 = if s1_len > s2_len { + (s1_len - s2_len) as u16 + } else { + (s2_len - s1_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = + match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist(s1_bytes, s2_bytes, 1, 4, ntrim, ctrim, false); + if dist <= threshold { + v.push([idx, dist as usize]) + }; + } + } + v + }) + } else { + seqs1 + .par_iter() + .enumerate() + .zip(seqs2.par_iter()) + .fold( + || Vec::new(), + |mut v, ((idx, &s1), &s2)| { + let s1_bytes: &[u8] = s1[0].as_bytes(); + let s2_bytes: &[u8] = s2[0].as_bytes(); + let s1_len: usize = s1_bytes.len(); + let s2_len: usize = s2_bytes.len(); + let len_diff: u16 = if s1_len > s2_len { + (s1_len - s2_len) as u16 + } else { + (s2_len - s1_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = + match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist(s1_bytes, s2_bytes, 1, 4, ntrim, ctrim, false); + if dist <= threshold { + v.push([idx, dist as usize]) + }; + } + } + v + }, + ) + .reduce( + || Vec::new(), + |mut combined, v| { + combined.extend(v); + combined + }, + ) + } +} + pub fn tcrdist_gene_neighbor_one_to_many( seq: [&str; 2], seqs: &[[&str; 2]], @@ -748,21 +1009,69 @@ pub fn tcrdist_gene_neighbor_one_to_many( ntrim: usize, ctrim: usize, parallel: bool, -) -> Vec { +) -> Vec<[usize; 2]> { + let seq_bytes: &[u8] = seq[0].as_bytes(); + let seq_len: usize = seq_bytes.len(); + let seq_v: &[u8] = seq[1].as_bytes(); + if parallel == false { seqs.iter() .enumerate() - .filter(|(_, &s)| tcrdist_gene_neighbor(seq, s, threshold, ntrim, ctrim)) - .map(|(idx, _)| idx) - .collect() + .fold(Vec::new(), |mut v, (idx, &s)| { + let s_bytes: &[u8] = s[0].as_bytes(); + let s_len: usize = s_bytes.len(); + let len_diff: u16 = if seq_len > s_len { + (seq_len - s_len) as u16 + } else { + (s_len - seq_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = match_table::gene_distance(seq_v, s[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist(seq_bytes, s_bytes, 1, 4, ntrim, ctrim, false); + if dist <= threshold { + v.push([idx, dist as usize]) + }; + } + } + v + }) } else { - POOL.install(|| { - seqs.par_iter() - .enumerate() - .filter(|(_, &s)| tcrdist_gene_neighbor(seq, s, threshold, ntrim, ctrim)) - .map(|(idx, _)| idx) - .collect::>() - }) + seqs.par_iter() + .enumerate() + .fold( + || Vec::new(), + |mut v, (idx, &s)| { + let s_bytes: &[u8] = s[0].as_bytes(); + let s_len: usize = s_bytes.len(); + let len_diff: u16 = if seq_len > s_len { + (seq_len - s_len) as u16 + } else { + (s_len - seq_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = match_table::gene_distance(seq_v, s[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist(seq_bytes, s_bytes, 1, 4, ntrim, ctrim, false); + if dist <= threshold { + v.push([idx, dist as usize]) + }; + } + } + v + }, + ) + .reduce( + || Vec::new(), + |mut combined, v| { + combined.extend(v); + combined + }, + ) } } @@ -773,7 +1082,7 @@ pub fn tcrdist_gene_neighbor_many_to_many( ntrim: usize, ctrim: usize, parallel: bool, -) -> Vec<[usize; 2]> { +) -> Vec<[usize; 3]> { if parallel == false { seqs1 .iter() @@ -782,11 +1091,32 @@ pub fn tcrdist_gene_neighbor_many_to_many( seqs2 .iter() .enumerate() - .filter(|(_, &s2)| tcrdist_gene_neighbor(s1, s2, threshold, ntrim, ctrim)) - .map(|(jdx, _)| [idx, jdx]) - .collect::>() + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let s1_bytes: &[u8] = s1[0].as_bytes(); + let s2_bytes: &[u8] = s2[0].as_bytes(); + let s1_len: usize = s1_bytes.len(); + let s2_len: usize = s2_bytes.len(); + let len_diff: u16 = if s1_len > s2_len { + (s1_len - s2_len) as u16 + } else { + (s2_len - s1_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = + match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist(s1_bytes, s2_bytes, 1, 4, ntrim, ctrim, false); + if dist <= threshold { + v.push([idx, jdx, dist as usize]) + }; + } + } + v + }) }) - .collect() + .collect::>() } else { POOL.install(|| { seqs1 @@ -796,11 +1126,34 @@ pub fn tcrdist_gene_neighbor_many_to_many( seqs2 .iter() .enumerate() - .filter(|(_, &s2)| tcrdist_gene_neighbor(s1, s2, threshold, ntrim, ctrim)) - .map(|(jdx, _)| [idx, jdx]) - .collect::>() + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let s1_bytes: &[u8] = s1[0].as_bytes(); + let s2_bytes: &[u8] = s2[0].as_bytes(); + let s1_len: usize = s1_bytes.len(); + let s2_len: usize = s2_bytes.len(); + let len_diff: u16 = if s1_len > s2_len { + (s1_len - s2_len) as u16 + } else { + (s2_len - s1_len) as u16 + }; + + if len_diff * 12 <= threshold { + let v_gene_dist = + match_table::gene_distance(s1[1].as_bytes(), s2[1].as_bytes()); + if v_gene_dist + len_diff <= threshold { + let dist: u16 = v_gene_dist + + 3 * tcrdist( + s1_bytes, s2_bytes, 1, 4, ntrim, ctrim, false, + ); + if dist <= threshold { + v.push([idx, jdx, dist as usize]) + }; + } + } + v + }) }) - .collect() + .collect::>() }) } } @@ -896,12 +1249,35 @@ pub fn str_cmp_many_to_many( } } +pub fn str_cmp_pairwise(seqs1: &[&str], seqs2: &[&str], parallel: bool, metric: &str) -> Vec { + let metric_fn = map_metric(metric); + if parallel == false { + let mut dists: Vec = vec![0; cmp::min(seqs1.len(), seqs2.len())]; + let mut counter: usize = 0; + + for (&s1, &s2) in seqs1.iter().zip(seqs2.iter()) { + dists[counter] = metric_fn(s1.as_bytes(), s2.as_bytes()); + counter += 1; + } + + dists + } else { + POOL.install(|| { + seqs1 + .par_iter() + .zip(seqs2.par_iter()) + .map(|(&s1, &s2)| metric_fn(s1.as_bytes(), s2.as_bytes())) + .collect() + }) + } +} + pub fn str_neighbor_matrix( seqs: &[&str], threshold: u32, parallel: bool, metric: &str, -) -> Vec<[usize; 2]> { +) -> Vec<[usize; 3]> { let metric_fn = map_metric(metric); if parallel == false { seqs.iter() @@ -910,8 +1286,13 @@ pub fn str_neighbor_matrix( seqs[idx + 1..] .iter() .enumerate() - .filter(move |(_, &s2)| metric_fn(s1.as_bytes(), s2.as_bytes()) <= threshold) - .map(move |(jdx, _)| [idx, idx + 1 + jdx]) + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let dist: u32 = metric_fn(s1.as_bytes(), s2.as_bytes()); + if dist <= threshold { + v.push([idx, idx + 1 + jdx, dist as usize]); + } + v + }) }) .collect() } else { @@ -922,11 +1303,13 @@ pub fn str_neighbor_matrix( seqs[idx + 1..] .iter() .enumerate() - .filter(move |(_, &s2)| { - metric_fn(s1.as_bytes(), s2.as_bytes()) <= threshold + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let dist: u32 = metric_fn(s1.as_bytes(), s2.as_bytes()); + if dist <= threshold { + v.push([idx, idx + 1 + jdx, dist as usize]); + } + v }) - .map(move |(jdx, _)| [idx, idx + 1 + jdx]) - .collect::>() }) .collect() }) @@ -939,22 +1322,40 @@ pub fn str_neighbor_one_to_many( threshold: u32, parallel: bool, metric: &str, -) -> Vec { +) -> Vec<[usize; 2]> { let metric_fn = map_metric(metric); let seq_bytes = seq.as_bytes(); if parallel == false { seqs.iter() .enumerate() - .filter(|(_, &s)| metric_fn(seq_bytes, s.as_bytes()) <= threshold) - .map(|(idx, _)| idx) - .collect() + .fold(Vec::new(), |mut v, (idx, &s)| { + let dist: u32 = metric_fn(seq_bytes, s.as_bytes()); + if dist <= threshold { + v.push([idx, dist as usize]); + } + v + }) } else { POOL.install(|| { seqs.par_iter() .enumerate() - .filter(|(_, &s)| metric_fn(seq_bytes, s.as_bytes()) <= threshold) - .map(|(idx, _)| idx) - .collect::>() + .fold( + || Vec::new(), + |mut v, (idx, &s)| { + let dist: u32 = metric_fn(seq_bytes, s.as_bytes()); + if dist <= threshold { + v.push([idx, dist as usize]); + } + v + }, + ) + .reduce( + || Vec::new(), + |mut combined, v| { + combined.extend(v); + combined + }, + ) }) } } @@ -965,7 +1366,7 @@ pub fn str_neighbor_many_to_many( threshold: u32, parallel: bool, metric: &str, -) -> Vec<[usize; 2]> { +) -> Vec<[usize; 3]> { let metric_fn = map_metric(metric); if parallel == false { seqs1 @@ -975,9 +1376,13 @@ pub fn str_neighbor_many_to_many( seqs2 .iter() .enumerate() - .filter(|(_, &s2)| metric_fn(s1.as_bytes(), s2.as_bytes()) <= threshold) - .map(|(jdx, _)| [idx, jdx]) - .collect::>() + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let dist: u32 = metric_fn(s1.as_bytes(), s2.as_bytes()); + if dist <= threshold { + v.push([idx, jdx, dist as usize]); + } + v + }) }) .collect() } else { @@ -989,15 +1394,65 @@ pub fn str_neighbor_many_to_many( seqs2 .iter() .enumerate() - .filter(|(_, &s2)| metric_fn(s1.as_bytes(), s2.as_bytes()) <= threshold) - .map(|(jdx, _)| [idx, jdx]) - .collect::>() + .fold(Vec::new(), |mut v, (jdx, &s2)| { + let dist: u32 = metric_fn(s1.as_bytes(), s2.as_bytes()); + if dist <= threshold { + v.push([idx, idx + 1 + jdx, dist as usize]); + } + v + }) }) .collect() }) } } +pub fn str_neighbor_pairwise( + seqs1: &[&str], + seqs2: &[&str], + threshold: u32, + parallel: bool, + metric: &str, +) -> Vec<[usize; 2]> { + let metric_fn = map_metric(metric); + if parallel == false { + seqs1 + .iter() + .enumerate() + .zip(seqs2) + .fold(Vec::new(), |mut v, ((idx, &s1), &s2)| { + let dist: u32 = metric_fn(s1.as_bytes(), s2.as_bytes()); + if dist <= threshold { + v.push([idx, dist as usize]); + } + v + }) + } else { + POOL.install(|| { + seqs1 + .par_iter() + .enumerate() + .zip(seqs2.par_iter()) + .fold( + || Vec::new(), + |mut v, ((idx, &s1), &s2)| { + let dist: u32 = metric_fn(s1.as_bytes(), s2.as_bytes()); + if dist <= threshold { + v.push([idx, dist as usize]); + } + v + }, + ) + .reduce( + || Vec::new(), + |mut combined, v| { + combined.extend(v); + combined + }, + ) + }) + } +} pub fn str_bin_many_to_many( seqs1: &[&str], seqs2: &[&str], diff --git a/src/lib.rs b/src/lib.rs index c187e8b..307a57d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,6 +135,15 @@ fn hamming_many_to_many(seqs1: Vec<&str>, seqs2: Vec<&str>, parallel: bool) -> P )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, parallel=false))] +fn hamming_pairwise(seqs1: Vec<&str>, seqs2: Vec<&str>, parallel: bool) -> PyResult> { + Ok(_distance::str_cmp_pairwise( + &seqs1, &seqs2, parallel, "hamming", + )) +} + #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs, threshold, parallel=false))] @@ -142,7 +151,7 @@ fn hamming_neighbor_matrix( seqs: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_matrix( &seqs, threshold, parallel, "hamming", )) @@ -155,11 +164,12 @@ fn hamming_neighbor_one_to_many( seqs: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_one_to_many( seq, &seqs, threshold, parallel, "hamming", )) } + #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs1, seqs2, threshold, parallel=false))] @@ -168,12 +178,26 @@ fn hamming_neighbor_many_to_many( seqs2: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_many_to_many( &seqs1, &seqs2, threshold, parallel, "hamming", )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, threshold, parallel=false))] +fn hamming_neighbor_pairwise( + seqs1: Vec<&str>, + seqs2: Vec<&str>, + threshold: u32, + parallel: bool, +) -> PyResult> { + Ok(_distance::str_neighbor_pairwise( + &seqs1, &seqs2, threshold, parallel, "hamming", + )) +} + #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs1, seqs2, parallel=false))] @@ -317,6 +341,17 @@ fn levenshtein_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, parallel=false))] +fn levenshtein_pairwise(seqs1: Vec<&str>, seqs2: Vec<&str>, parallel: bool) -> PyResult> { + Ok(_distance::str_cmp_pairwise( + &seqs1, + &seqs2, + parallel, + "levenshtein", + )) +} #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs, threshold, parallel=false))] @@ -324,7 +359,7 @@ fn levenshtein_neighbor_matrix( seqs: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_matrix( &seqs, threshold, @@ -340,7 +375,7 @@ fn levenshtein_neighbor_one_to_many( seqs: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_one_to_many( seq, &seqs, @@ -357,7 +392,7 @@ fn levenshtein_neighbor_many_to_many( seqs2: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_many_to_many( &seqs1, &seqs2, @@ -367,6 +402,23 @@ fn levenshtein_neighbor_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, threshold, parallel=false))] +fn levenshtein_neighbor_pairwise( + seqs1: Vec<&str>, + seqs2: Vec<&str>, + threshold: u32, + parallel: bool, +) -> PyResult> { + Ok(_distance::str_neighbor_pairwise( + &seqs1, + &seqs2, + threshold, + parallel, + "levenshtein", + )) +} #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs1, seqs2, parallel=false))] @@ -528,6 +580,21 @@ fn levenshtein_exp_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, parallel=false))] +fn levenshtein_exp_pairwise( + seqs1: Vec<&str>, + seqs2: Vec<&str>, + parallel: bool, +) -> PyResult> { + Ok(_distance::str_cmp_pairwise( + &seqs1, + &seqs2, + parallel, + "levenshtein_exp", + )) +} #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs, threshold, parallel=false))] @@ -535,7 +602,7 @@ fn levenshtein_exp_neighbor_matrix( seqs: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_matrix( &seqs, threshold, @@ -551,7 +618,7 @@ fn levenshtein_exp_neighbor_one_to_many( seqs: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_one_to_many( seq, &seqs, @@ -568,7 +635,7 @@ fn levenshtein_exp_neighbor_many_to_many( seqs2: Vec<&str>, threshold: u32, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::str_neighbor_many_to_many( &seqs1, &seqs2, @@ -578,6 +645,24 @@ fn levenshtein_exp_neighbor_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, threshold, parallel=false))] +fn levenshtein_exp_neighbor_pairwise( + seqs1: Vec<&str>, + seqs2: Vec<&str>, + threshold: u32, + parallel: bool, +) -> PyResult> { + Ok(_distance::str_neighbor_pairwise( + &seqs1, + &seqs2, + threshold, + parallel, + "levenshtein_exp", + )) +} + #[cfg(all(feature = "pyo3"))] #[pyfunction] #[pyo3(signature = (seqs1, seqs2, parallel=false))] @@ -1001,6 +1086,30 @@ fn tcrdist_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, dist_weight=1, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=false, parallel=false))] +fn tcrdist_pairwise( + seqs1: Vec<&str>, + seqs2: Vec<&str>, + dist_weight: u16, + gap_penalty: u16, + ntrim: usize, + ctrim: usize, + fixed_gappos: bool, + parallel: bool, +) -> PyResult> { + Ok(_distance::tcrdist_pairwise( + &seqs1, + &seqs2, + dist_weight, + gap_penalty, + ntrim, + ctrim, + fixed_gappos, + parallel, + )) +} /// Compute the tcrdist between two CDR3-V allele pairs. /// /// This incorporates differences between the pMHC, CDR1, CDR2, and CDR3. @@ -1325,6 +1434,37 @@ fn tcrdist_allele_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, phmc_weight=1, cdr1_weight=1, cdr2_weight=1, cdr3_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=false, parallel=false))] +fn tcrdist_allele_pairwise( + seqs1: Vec<[&str; 2]>, + seqs2: Vec<[&str; 2]>, + phmc_weight: u16, + cdr1_weight: u16, + cdr2_weight: u16, + cdr3_weight: u16, + gap_penalty: u16, + ntrim: usize, + ctrim: usize, + fixed_gappos: bool, + parallel: bool, +) -> PyResult> { + Ok(_distance::tcrdist_allele_pairwise( + &seqs1, + &seqs2, + phmc_weight, + cdr1_weight, + cdr2_weight, + cdr3_weight, + gap_penalty, + ntrim, + ctrim, + fixed_gappos, + parallel, + )) +} + /// Compute the tcrdist between two CDR3-V gene pairs. /// /// Parameters @@ -1493,6 +1633,20 @@ fn tcrdist_gene_many_to_many( )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, ntrim=3, ctrim=2, parallel=false))] +fn tcrdist_gene_pairwise( + seqs1: Vec<[&str; 2]>, + seqs2: Vec<[&str; 2]>, + ntrim: usize, + ctrim: usize, + parallel: bool, +) -> PyResult> { + Ok(_distance::tcrdist_gene_pairwise( + &seqs1, &seqs2, ntrim, ctrim, parallel, + )) +} /// Compute whether two CDR3-V gene pairs are neighbors with tcrdist_gene. /// /// This function is quicker than using the tcrdist_gene function since it computes @@ -1553,7 +1707,7 @@ fn tcrdist_gene_neighbor_matrix( ntrim: usize, ctrim: usize, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::tcrdist_gene_neighbor_matrix( &seqs, threshold, ntrim, ctrim, parallel, )) @@ -1569,7 +1723,7 @@ fn tcrdist_gene_neighbor_one_to_many( ntrim: usize, ctrim: usize, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::tcrdist_gene_neighbor_one_to_many( seq, &seqs, threshold, ntrim, ctrim, parallel, )) @@ -1585,12 +1739,28 @@ fn tcrdist_gene_neighbor_many_to_many( ntrim: usize, ctrim: usize, parallel: bool, -) -> PyResult> { +) -> PyResult> { Ok(_distance::tcrdist_gene_neighbor_many_to_many( &seqs1, &seqs2, threshold, ntrim, ctrim, parallel, )) } +#[cfg(all(feature = "pyo3"))] +#[pyfunction] +#[pyo3(signature = (seqs1, seqs2, threshold, ntrim=3, ctrim=2, parallel=false))] +fn tcrdist_gene_neighbor_pairwise( + seqs1: Vec<[&str; 2]>, + seqs2: Vec<[&str; 2]>, + threshold: u16, + ntrim: usize, + ctrim: usize, + parallel: bool, +) -> PyResult> { + Ok(_distance::tcrdist_gene_neighbor_pairwise( + &seqs1, &seqs2, threshold, ntrim, ctrim, parallel, + )) +} + #[pymodule] #[pyo3(name = "tcrdist_rs")] pub fn tcrdist_rs(_py: Python<'_>, m: &PyModule) -> PyResult<()> { @@ -1598,28 +1768,34 @@ pub fn tcrdist_rs(_py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(hamming_matrix, m)?)?; m.add_function(wrap_pyfunction!(hamming_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(hamming_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(hamming_pairwise, m)?)?; m.add_function(wrap_pyfunction!(hamming_neighbor_matrix, m)?)?; m.add_function(wrap_pyfunction!(hamming_neighbor_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(hamming_neighbor_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(hamming_neighbor_pairwise, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_matrix, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(levenshtein_exp_pairwise, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_neighbor_matrix, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_neighbor_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_neighbor_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(levenshtein_exp_neighbor_pairwise, m)?)?; m.add_function(wrap_pyfunction!(levenshtein, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_matrix, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(levenshtein_pairwise, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_neighbor_matrix, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_neighbor_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_neighbor_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(levenshtein_neighbor_pairwise, m)?)?; m.add_function(wrap_pyfunction!(hamming_bin_many_to_many, m)?)?; m.add_function(wrap_pyfunction!(levenshtein_exp_bin_many_to_many, m)?)?; @@ -1635,21 +1811,25 @@ pub fn tcrdist_rs(_py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(tcrdist_matrix, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(tcrdist_pairwise, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_allele, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_allele_matrix, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_allele_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_allele_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(tcrdist_allele_pairwise, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_matrix, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(tcrdist_gene_pairwise, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_neighbor, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_neighbor_matrix, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_neighbor_one_to_many, m)?)?; m.add_function(wrap_pyfunction!(tcrdist_gene_neighbor_many_to_many, m)?)?; + m.add_function(wrap_pyfunction!(tcrdist_gene_neighbor_pairwise, m)?)?; Ok(()) }