Skip to content

Commit

Permalink
Add MS2 spectrum reading
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Apr 8, 2024
1 parent 3a0b359 commit d29e245
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 9 deletions.
24 changes: 24 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod file_types;
mod parse_mzdata;
mod parse_timsrust;
mod precursor;
mod ms2_spectrum;

use std::collections::HashMap;

Expand All @@ -10,6 +11,7 @@ use pyo3::prelude::*;

use file_types::{match_file_type, SpectrumFileType};
use precursor::Precursor;
use ms2_spectrum::MS2Spectrum;

/// Get mapping of spectrum identifiers to precursor information.
#[pyfunction]
Expand All @@ -31,11 +33,33 @@ pub fn get_precursor_info(spectrum_path: String) -> PyResult<HashMap<String, Pre
}
}

/// Get MS2 spectra from a spectrum file.
#[pyfunction]
pub fn get_ms2_spectra(spectrum_path: String) -> PyResult<Vec<ms2_spectrum::MS2Spectrum>> {
let file_type = match_file_type(&spectrum_path);

let spectra = match file_type {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML => {
parse_mzdata::read_ms2_spectra(&spectrum_path, file_type)
}
SpectrumFileType::BrukerRaw => parse_timsrust::read_ms2_spectra(&spectrum_path),
// SpectrumFileType::ThermoRaw => parse_with_mzdata_thermo(&spectrum_path, file_type),
SpectrumFileType::Unknown => return Err(PyOSError::new_err("Unsupported file type")),
};

match spectra {
Ok(spectra) => Ok(spectra),
Err(e) => Err(PyOSError::new_err(e.to_string())),
}
}


/// A Python module implemented in Rust.
#[pymodule]
fn ms2rescore_rs(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Precursor>()?;
m.add_class::<MS2Spectrum>()?;
m.add_function(wrap_pyfunction!(get_precursor_info, m)?)?;
m.add_function(wrap_pyfunction!(get_ms2_spectra, m)?)?;
Ok(())
}
38 changes: 38 additions & 0 deletions src/ms2_spectrum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use pyo3::prelude::*;

use crate::precursor::Precursor;

#[pyclass(get_all, set_all)]
#[derive(Debug, Clone)]
pub struct MS2Spectrum {
pub identifier: String,
pub mz: Vec<f32>,
pub intensity: Vec<f32>,
pub precursor: Option<Precursor>,
}

impl MS2Spectrum {
pub fn new(
identifier: String,
mz: Vec<f32>,
intensity: Vec<f32>,
precursor: Option<Precursor>,
) -> Self {
MS2Spectrum {
identifier,
mz,
intensity,
precursor,
}
}
}

#[pymethods]
impl MS2Spectrum {
fn __repr__(&self) -> String {
format!(
"MS2Spectrum(identifier='{}', mz=[..], intensity=[..], precursor={:?})",
self.identifier, self.precursor
)
}
}
55 changes: 47 additions & 8 deletions src/parse_mzdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ use std::fs::File;
use mzdata::io::{MGFReader, MzMLReader};

use crate::file_types::SpectrumFileType;
use crate::ms2_spectrum::MS2Spectrum;
use crate::precursor::Precursor;

impl From<mzdata::spectrum::MultiLayerSpectrum> for Precursor {
fn from(spectrum: mzdata::spectrum::MultiLayerSpectrum) -> Self {
impl From<&mzdata::spectrum::MultiLayerSpectrum> for Precursor {
fn from(spectrum: &mzdata::spectrum::MultiLayerSpectrum) -> Self {
let precursor = &spectrum.description.precursor;
match precursor {
Some(precursor) => Precursor {
Expand All @@ -18,18 +19,33 @@ impl From<mzdata::spectrum::MultiLayerSpectrum> for Precursor {
.first_scan()
.map(|s| s.start_time)
.unwrap_or(0.0),
im: get_im_from_spectrum_description(&spectrum)
.or(get_im_from_selected_ion(&spectrum))
.or(get_im_from_first_scan(&spectrum))
im: get_im_from_spectrum_description(spectrum)
.or(get_im_from_selected_ion(spectrum))
.or(get_im_from_first_scan(spectrum))
.unwrap_or(0.0),
charge: get_charge_from_spectrum(&spectrum).unwrap_or(0),
charge: get_charge_from_spectrum(spectrum).unwrap_or(0),
intensity: precursor.ions[0].intensity as f64,
},
None => Precursor::default(),
}
}
}

impl From<mzdata::spectrum::MultiLayerSpectrum> for MS2Spectrum {
fn from(spectrum: mzdata::spectrum::MultiLayerSpectrum) -> Self {
let identifier: String = spectrum.description.id.to_string();
let precursor = Precursor::from(&spectrum);
let mzdata_centroid_spectrum = spectrum.into_centroid().unwrap();
let (mz, intensity): (Vec<f32>, Vec<f32>) = mzdata_centroid_spectrum
.peaks
.iter()
.map(|peak| (peak.mz as f32, peak.intensity))
.unzip();

MS2Spectrum::new(identifier, mz, intensity, Some(precursor))
}
}

/// Parse precursor info from spectrum files with mzdata
pub fn parse_precursor_info(
spectrum_path: &str,
Expand All @@ -40,7 +56,7 @@ pub fn parse_precursor_info(
SpectrumFileType::MascotGenericFormat => Ok(MGFReader::new(file)
.filter_map(|spectrum| {
spectrum.description.precursor.as_ref()?;
Some((spectrum.description.id.clone(), Precursor::from(spectrum)))
Some((spectrum.description.id.clone(), Precursor::from(&spectrum)))
})
.collect::<HashMap<String, Precursor>>()),

Expand All @@ -50,7 +66,7 @@ pub fn parse_precursor_info(
return None;
}
spectrum.description.precursor.as_ref()?;
Some((spectrum.description.id.clone(), Precursor::from(spectrum)))
Some((spectrum.description.id.clone(), Precursor::from(&spectrum)))
})
.collect::<HashMap<String, Precursor>>()),

Expand All @@ -61,6 +77,29 @@ pub fn parse_precursor_info(
}
}

/// Read MS2 spectra from spectrum files with mzdata
pub fn read_ms2_spectra(
spectrum_path: &str,
file_type: SpectrumFileType,
) -> Result<Vec<MS2Spectrum>, std::io::Error> {
let file = File::open(spectrum_path)?;
match file_type {
SpectrumFileType::MascotGenericFormat => Ok(MGFReader::new(file)
.map(MS2Spectrum::from)
.collect::<Vec<MS2Spectrum>>()),

SpectrumFileType::MzML => Ok(MzMLReader::new(file)
.filter(|spectrum| spectrum.description.ms_level == 2)
.map(MS2Spectrum::from)
.collect::<Vec<MS2Spectrum>>()),

_ => Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Unsupported file type for mzdata",
)),
}
}

// pub fn parse_precursor_info_thermo(
// spectrum_path: &str,
// file_type: SpectrumFileType,
Expand Down
31 changes: 31 additions & 0 deletions src/parse_timsrust.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::collections::HashMap;

use crate::ms2_spectrum::MS2Spectrum;
use crate::precursor::Precursor;

impl From<timsrust::Precursor> for Precursor {
Expand All @@ -14,6 +15,24 @@ impl From<timsrust::Precursor> for Precursor {
}
}

impl From<timsrust::Spectrum> for MS2Spectrum {
fn from(spectrum: timsrust::Spectrum) -> Self {
MS2Spectrum::new(
spectrum.index.to_string(),
spectrum.mz_values.iter().map(|mz| *mz as f32).collect(),
spectrum
.intensities
.iter()
.map(|intensity| *intensity as f32)
.collect(),
match spectrum.precursor {
timsrust::QuadrupoleEvent::Precursor(precursor) => Some(Precursor::from(precursor)),
_ => None,
},
)
}
}

/// Parse precursor info from spectrum files with timsrust
pub fn parse_precursor_info(
spectrum_path: &str,
Expand All @@ -38,3 +57,15 @@ pub fn parse_precursor_info(
})
.collect::<HashMap<String, Precursor>>())
}

/// Read MS2 spectra from spectrum files with timsrust
pub fn read_ms2_spectra(spectrum_path: &str) -> Result<Vec<MS2Spectrum>, std::io::Error> {
let reader = timsrust::FileReader::new(spectrum_path)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?;

Ok(reader
.read_all_spectra()
.into_iter()
.map(MS2Spectrum::from)
.collect())
}
2 changes: 1 addition & 1 deletion src/precursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use pyo3::prelude::*;

/// Precursor information.
#[pyclass(get_all, set_all)]
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct Precursor {
pub mz: f64,
pub rt: f64,
Expand Down

0 comments on commit d29e245

Please sign in to comment.