Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ebioman committed Sep 23, 2024
1 parent 10351c0 commit 32d04b3
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 56 deletions.
31 changes: 31 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/rust
{
"name": "Rust",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/rust:0-1-bullseye",

// Use 'mounts' to make the cargo cache persistent in a Docker Volume.
// "mounts": [
// {
// "source": "devcontainer-cargo-cache-${devcontainerId}",
// "target": "/usr/local/cargo",
// "type": "volume"
// }
// ]

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "sudo apt-get update && sudo apt-get install -y cmake"

// Configure tool-specific properties.
// "customizations": {},

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
17 changes: 8 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
name = "abc"
version = "0.3.1"
authors = ["Emanuel Schmid-Siegert"]
edition = "2018"
description = """
The Agnostic Bam Counter (abc) determines at a given positions the count of observed nucleotides.
Simply supply a bed file with positions and obtains a tsv file with counts for each ATCG and reference if provided.
Expand All @@ -18,14 +17,14 @@ Note:
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rust-htslib = { version = "*", default-features = false }
rust-htslib = "0.40.2"
#regex = "1"
clap = "*"
csv = "*"
chrono = "*"
bio = "*"
clap = "4.1.6"
csv = "1.2.0"
chrono = "0.4.23"
bio = "1.1.0"
itertools = "*"
bambam = {git = "https://github.com/ebioman/bambam"}
crossbeam = "*"
crossbeam-channel = "*"
rayon = "*"
crossbeam = "0.8.2"
crossbeam-channel = "0.5.6"
rayon = "1.6.1"
143 changes: 96 additions & 47 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use clap::{app_from_crate,crate_name,crate_description,crate_authors,crate_version,Arg};
use std::collections::HashMap;
use std::path::Path;
use rust_htslib::{bam, bam::Read};
Expand All @@ -12,11 +11,15 @@ use itertools::Itertools;
extern crate bambam;
use std::fs;
use crossbeam::channel::{unbounded};
use rayon;



use clap::{arg, Command,Arg};

extern crate rust_htslib;
extern crate clap;
extern crate rayon;
extern crate itertools;
extern crate chrono;
extern crate crossbeam;
extern crate bio;

// Change the alias to `Box<error::Error>`.
type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
Expand All @@ -40,41 +43,54 @@ struct Pileup {
/// reference FASTA
reference: String ,
/// number of observe adenines
nuc_a: i32,
nuc_a: u32,
/// number of observe thyosins
nuc_t: i32,
nuc_t: u32,
/// number of observe cytosins
nuc_c: i32,
nuc_c: u32,
/// number of observe guanins
nuc_g: i32,
nuc_g: u32,
/// number of observe ambigious
/// elements, e.g. N or X
ambigious: i32,
ambigious: u32,
/// number of reads with insertion
ins: i32,
ins: u32,
/// number of reads with deletion
del: i32,
del: u32,
/// depth at that given position
depth: u32,
/// mutated or not
mutated: bool,
/// sum of variant frequency (sum/depth)
/// variant frequency (sum/depth)
vaf: f32,
//// reference allele frequency (ref/depth)
raf: f32,
}

#[derive(Debug,Default,Clone)]
// the information concerning the
// program which are organized and printed in the header
pub struct MetaInfo {
version: String,
author: String,
command: String,
}

// takes the generates pileups in our specific
// format and prints them according to our wishes
// currently only tsv, might add more possibilities
// in the future
fn print_pileup(result:&[Pileup],out: &str, version: &str, author: &str , command: &str )-> Result<i32> {
fn print_pileup(
result:&[Pileup],
out: &str,
infos: MetaInfo,
)-> Result<i32> {
let now: DateTime<Local> = Local::now();
let mut writer = csv::WriterBuilder::new().delimiter(b'\t').from_path(&out)?;
writer.write_record(&["##","abc:",version,"","","","","","","","","","",""])?;
writer.write_record(&["##","author:",author,"","","","","","","","","","",""])?;
writer.write_record(&["##","abc:",infos.version,"","","","","","","","","","",""])?;
writer.write_record(&["##","author:",infos.author,"","","","","","","","","","",""])?;
writer.write_record(&["##","date:",&now.to_rfc2822(),"","","","","","","","","","",""])?;
writer.write_record(&["##","command:", command,"","","","","","","","","","",""])?;
writer.write_record(&["##","command:", infos.command,"","","","","","","","","","",""])?;
writer.write_record(&["# chromosome","start","end","reference","A","T","C","G","ambigious","ins","del","depth","VAF","RAF"])?;
for element in result.iter() {
writer.write_record(&[
Expand Down Expand Up @@ -102,7 +118,9 @@ fn print_pileup(result:&[Pileup],out: &str, version: &str, author: &str , comman
// this simply takes a BED file and organizes entries in a Hashmap
// where they are organized by chromosome which should speedup
// later the indexed access
fn parse_bed_file (input: &str) -> HashMap<String,Vec<u64>>{
fn parse_bed_file (
input: &str
) -> HashMap<String,Vec<u64>>{
assert!(
Path::new(input).exists(),
"ERROR: BED file {} does not exist!",
Expand All @@ -113,7 +131,14 @@ fn parse_bed_file (input: &str) -> HashMap<String,Vec<u64>>{
for entry in bed.records(){
let record = entry.expect("ERROR: wrong BED record");
if record.end()-record.start()>1 {
panic!("ERROR: entry {:?} contained not a position but a range!",&record);
eprintln!("WARNING: entry {:?} contained not a position but a range!",&record);
for i in record.end()..record.start {
bed_result
.entry(record.chrom().to_string())
.or_insert_with(Vec::new)
.push(i);

}
}
bed_result
.entry(record.chrom().to_string())
Expand All @@ -131,7 +156,12 @@ fn parse_bed_file (input: &str) -> HashMap<String,Vec<u64>>{
// This reads now the BAM file and uses the positions
// to check for each position the observed number of nucleotides
// It sorts as well again by chromosome and position to be safe
fn analyze_bam_positions (input: &str , positions: &HashMap<String,Vec<u64>>, ref_fasta: &str, threads: &usize) -> Vec<Pileup>{
fn analyze_bam_positions (
input: &str ,
positions: &HashMap<String,Vec<u64>>,
ref_fasta: &str,
threads: &usize
) -> Vec<Pileup>{
assert!(
Path::new(input).exists(),
"ERROR: BAM file {} does not exist!",
Expand Down Expand Up @@ -191,7 +221,12 @@ fn analyze_bam_positions (input: &str , positions: &HashMap<String,Vec<u64>>, re
// this sub-function gets the iterator result from the positions
// and return the result of pileup analysis at that given position
// allows to potentially already parallelize over each position analyzed
fn fetch_position(bam: & mut bam::IndexedReader, chr: &str, pos:&u64, ref_file: Option<IndexedReader<fs::File>>) -> Pileup {
fn fetch_position(
bam: & mut bam::IndexedReader,
chr: &str,
pos:&u64,
ref_file: Option<IndexedReader<fs::File>>
) -> Pileup {
// NOTE:
// SAM is 1 based and not 0 based, need to correct for that in
let start = *pos ;
Expand Down Expand Up @@ -253,7 +288,9 @@ fn fetch_position(bam: & mut bam::IndexedReader, chr: &str, pos:&u64, ref_file:
}
}

fn eval_mutation(mut obs:Pileup) -> Pileup{
fn eval_mutation(
mut obs:Pileup
) -> Pileup{
match obs.reference.to_uppercase().as_str(){
"A" => {
if ( obs.nuc_c !=0 ) || (obs.nuc_g != 0) || (obs.nuc_t != 0) || (obs.del !=0) || (obs.ins !=0) {
Expand Down Expand Up @@ -307,51 +344,51 @@ fn main() {
// was used to execute as I cant get this from clap
let args: Vec<String> = env::args().collect();
let args_string = args.join(" ");
let matches = app_from_crate!()
.arg(Arg::with_name("BAM")
.short("b")
let matches = Command::new("abc")
.arg(Arg::new("BAM")
.short('b')
.long("bam")
.value_name("BAM")
.help("aligned short or long reads")
.takes_value(true)
//.takes_values(true)
.required(true))
.arg(Arg::with_name("POSITIONS")
.short("p")
.arg(Arg::new("POSITIONS")
.short('p')
.long("positions")
.value_name("BED")
.help("A bed file with positions")
.takes_value(true)
//.takes_value(true)
.required(true))
.arg(Arg::with_name("OUT")
.short("o")
.arg(Arg::new("OUT")
.short('o')
.long("outfile")
.value_name("TSV")
.help("the output file which will be in tsv format")
.takes_value(true)
//.takes_value(true)
.required(true))
.arg(Arg::with_name("REF")
.short("r")
.arg(Arg::new("REF")
.short('r')
.long("reference")
.value_name("FASTA")
.help("if reference in fasta format provided, reference nucleotide \
\nis provided for each positions, otherwise NA")
.takes_value(true)
//.takes_value(true)
.required(false))
.arg(Arg::with_name("THREADS")
.short("t")
.arg(Arg::new("THREADS")
.short('t')
.long("threads")
.value_name("INT")
.help("number of threads used in thread-pool for querying positions ")
.default_value("1")
.takes_value(true)
//.takes_value(true)
.required(false))
.arg(Arg::with_name("BAMBAM")
.short("x")
.arg(Arg::new("BAMBAM")
.short('x')
.long("bambam")
.value_name("IDH")
.takes_value(false)
//.takes_value(false)
.required(false)
.hidden(true))
.hide(true))
.get_matches();

// prepare input or quit
Expand All @@ -361,18 +398,30 @@ fn main() {
let ref_file = matches.value_of("REF").unwrap_or("NONE");
let bam_threads = matches.value_of("THREADS").unwrap().parse::<usize>().unwrap();

if matches.is_present("BAMBAM") {
if matches.args_present("BAMBAM") {
bambam::bam_bam_inda_house();
}
let positions : HashMap<String,Vec<u64>> = parse_bed_file(bed_file);
let analysis_result = analyze_bam_positions(bam_file,&positions, ref_file, &bam_threads);

let positions : HashMap<String,Vec<u64>> = parse_bed_file(
bed_file
);

let analysis_result = analyze_bam_positions(
bam_file,&positions,
ref_file,
&bam_threads
);
let infos = MetaInfo{
version: env!("CARGO_PKG_VERSION").to_string(),
author: env!("CARGO_PKG_AUTHORS").to_string(),
command: args_string,
};
// here we box the error, so in case the writing does
// not work we get a return of the error from the process back
if let Err(err) = print_pileup(
&analysis_result,
out_file,
crate_version!(),
crate_authors!(),&args_string
infos,
){
eprintln!("{}", err);
process::exit(1);
Expand Down

0 comments on commit 32d04b3

Please sign in to comment.