Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use mzdata::mz_read #1

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
13 changes: 12 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install stable toolchain
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true

- name: Run Clippy
run: cargo clippy --all-targets --all-features
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-targets --all-features

pytest:
runs-on: ${{ matrix.os }}
Expand Down
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ms2rescore-rs"
version = "0.3.0"
version = "0.4.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -10,5 +10,6 @@ crate-type = ["cdylib"]

[dependencies]
pyo3 = "0.20.0"
mzdata = "0.20.0"
mzdata = "0.26.0"
timsrust = "0.3.0"
mzpeaks = "0.19.0"
37 changes: 23 additions & 14 deletions src/file_types.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
use mzdata::io::MassSpectrometryFormat;

pub enum SpectrumFileType {
MascotGenericFormat,
MzML,
MzMLb,
BrukerRaw,
// ThermoRaw,
ThermoRaw,
Unknown,
}

pub fn match_file_type(spectrum_path: &str) -> SpectrumFileType {
let extension = spectrum_path.split('.').last().unwrap_or("").to_lowercase();
match extension.as_str() {
"mgf" => SpectrumFileType::MascotGenericFormat,
"mzml" => SpectrumFileType::MzML,
"d" | "ms2" => SpectrumFileType::BrukerRaw,
// "raw" => SpectrumFileType::ThermoRaw,
_ => match (
folder_contains_extension(spectrum_path, "bin"),
folder_contains_extension(spectrum_path, "parquet"),
) {
(true, true) => SpectrumFileType::BrukerRaw,
_ => SpectrumFileType::Unknown,
},
match mzdata::io::infer_from_path(spectrum_path).0 {
MassSpectrometryFormat::MGF => SpectrumFileType::MascotGenericFormat,
MassSpectrometryFormat::MzML => SpectrumFileType::MzML,
MassSpectrometryFormat::MzMLb => SpectrumFileType::MzMLb,
MassSpectrometryFormat::ThermoRaw => SpectrumFileType::ThermoRaw,
MassSpectrometryFormat::Unknown => {
let extension = spectrum_path.split('.').last().unwrap_or("").to_lowercase();
match extension.as_str() {
"d" | "ms2" => SpectrumFileType::BrukerRaw,
_ => match (
folder_contains_extension(spectrum_path, "bin"),
folder_contains_extension(spectrum_path, "parquet"),
) {
(true, true) => SpectrumFileType::BrukerRaw,
_ => SpectrumFileType::Unknown,
},
}
}
_ => SpectrumFileType::Unknown
}
}

Expand Down
29 changes: 18 additions & 11 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,37 @@ mod ms2_spectrum;

use std::collections::HashMap;

use pyo3::exceptions::PyOSError;
use pyo3::exceptions::{PyException, PyValueError};
use pyo3::prelude::*;

use file_types::{match_file_type, SpectrumFileType};
use precursor::Precursor;
use ms2_spectrum::MS2Spectrum;

/// Check if spectrum path matches a supported file type.
#[pyfunction]
pub fn is_supported_file_type(spectrum_path: String) -> bool {
let file_type = match_file_type(&spectrum_path);

!matches!(file_type, SpectrumFileType::Unknown)
}

/// Get mapping of spectrum identifiers to precursor information.
#[pyfunction]
pub fn get_precursor_info(spectrum_path: String) -> PyResult<HashMap<String, Precursor>> {
let file_type = match_file_type(&spectrum_path);

let precursors = match file_type {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML => {
parse_mzdata::parse_precursor_info(&spectrum_path, file_type)
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML | SpectrumFileType::MzMLb | SpectrumFileType:: ThermoRaw => {
parse_mzdata::parse_precursor_info(&spectrum_path)
}
SpectrumFileType::BrukerRaw => parse_timsrust::parse_precursor_info(&spectrum_path),
// SpectrumFileType::ThermoRaw => parse_with_mzdata_thermo(&spectrum_path, file_type),
SpectrumFileType::Unknown => return Err(PyOSError::new_err("Unsupported file type")),
SpectrumFileType::Unknown => return Err(PyValueError::new_err("Unsupported file type")),
};

match precursors {
Ok(precursors) => Ok(precursors),
Err(e) => Err(PyOSError::new_err(e.to_string())),
Err(e) => Err(PyException::new_err(e.to_string())),
}
}

Expand All @@ -39,17 +46,16 @@ pub fn get_ms2_spectra(spectrum_path: String) -> PyResult<Vec<ms2_spectrum::MS2S
let file_type = match_file_type(&spectrum_path);

let spectra = match file_type {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML => {
parse_mzdata::read_ms2_spectra(&spectrum_path, file_type)
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML | SpectrumFileType::MzMLb | SpectrumFileType:: ThermoRaw => {
parse_mzdata::read_ms2_spectra(&spectrum_path)
}
SpectrumFileType::BrukerRaw => parse_timsrust::read_ms2_spectra(&spectrum_path),
// SpectrumFileType::ThermoRaw => parse_with_mzdata_thermo(&spectrum_path, file_type),
SpectrumFileType::Unknown => return Err(PyOSError::new_err("Unsupported file type")),
SpectrumFileType::Unknown => return Err(PyValueError::new_err("Unsupported file type")),
};

match spectra {
Ok(spectra) => Ok(spectra),
Err(e) => Err(PyOSError::new_err(e.to_string())),
Err(e) => Err(PyException::new_err(e.to_string())),
}
}

Expand All @@ -59,6 +65,7 @@ pub fn get_ms2_spectra(spectrum_path: String) -> PyResult<Vec<ms2_spectrum::MS2S
fn ms2rescore_rs(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Precursor>()?;
m.add_class::<MS2Spectrum>()?;
m.add_function(wrap_pyfunction!(is_supported_file_type, m)?)?;
m.add_function(wrap_pyfunction!(get_precursor_info, m)?)?;
m.add_function(wrap_pyfunction!(get_ms2_spectra, m)?)?;
Ok(())
Expand Down
49 changes: 9 additions & 40 deletions src/parse_mzdata.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
use std::collections::HashMap;
use std::fs::File;

use mzdata::io::{MGFReader, MzMLReader};
use mzdata::params::ParamValue;
use mzdata::mz_read;

use crate::file_types::SpectrumFileType;
use crate::ms2_spectrum::MS2Spectrum;
use crate::precursor::Precursor;

Expand Down Expand Up @@ -50,55 +48,26 @@ impl From<mzdata::spectrum::MultiLayerSpectrum> for MS2Spectrum {
/// Parse precursor info from spectrum files with mzdata
pub fn parse_precursor_info(
spectrum_path: &str,
file_type: SpectrumFileType,
) -> Result<HashMap<String, Precursor>, std::io::Error> {
let file = File::open(spectrum_path)?;
match file_type {
SpectrumFileType::MascotGenericFormat => Ok(MGFReader::new(file)
mz_read!(spectrum_path.as_ref(), reader => {
reader.filter(|spectrum| spectrum.description.ms_level == 2)
.filter_map(|spectrum| {
spectrum.description.precursor.as_ref()?;
Some((spectrum.description.id.clone(), Precursor::from(&spectrum)))
})
.collect::<HashMap<String, Precursor>>()),

SpectrumFileType::MzML => Ok(MzMLReader::new(file)
.filter_map(|spectrum| {
if spectrum.description.ms_level != 2 {
return None;
}
spectrum.description.precursor.as_ref()?;
Some((spectrum.description.id.clone(), Precursor::from(&spectrum)))
})
.collect::<HashMap<String, Precursor>>()),

_ => Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Unsupported file type for mzdata",
)),
}
.collect::<HashMap<String, Precursor>>()
})
}

/// Read MS2 spectra from spectrum files with mzdata
pub fn read_ms2_spectra(
spectrum_path: &str,
file_type: SpectrumFileType,
) -> Result<Vec<MS2Spectrum>, std::io::Error> {
let file = File::open(spectrum_path)?;
match file_type {
SpectrumFileType::MascotGenericFormat => Ok(MGFReader::new(file)
.map(MS2Spectrum::from)
.collect::<Vec<MS2Spectrum>>()),

SpectrumFileType::MzML => Ok(MzMLReader::new(file)
.filter(|spectrum| spectrum.description.ms_level == 2)
mz_read!(spectrum_path.as_ref(), reader => {
reader.filter(|spectrum| spectrum.description.ms_level == 2)
.map(MS2Spectrum::from)
.collect::<Vec<MS2Spectrum>>()),

_ => Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Unsupported file type for mzdata",
)),
}
.collect::<Vec<MS2Spectrum>>()
})
}

// pub fn parse_precursor_info_thermo(
RalfG marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Loading