Merge pull request #101 from Proteobench/parse_settings

Parse settings
Proteobench · Nov 22, 2023 · 902dd1b · 902dd1b
2 parents 5086469 + 27f8f1f
commit 902dd1b
Show file tree

Hide file tree

Showing 34 changed files with 5,947 additions and 64 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,25 @@
+{
+    "[python]": {
+        "editor.formatOnSave": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        },
+        "editor.rulers": [
+            88
+        ],
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.formatting.provider": "none",
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./test",
+        "-p",
+        "test_*.py"
+    ],
+    "python.testing.pytestEnabled": false,
+    "python.testing.unittestEnabled": true,
+    "flake8.args": [
+        "--max-line-length=120",
+    ],
+}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,4 +1,117 @@
 # Contributing
 
-Check out the [developer guide](https://proteobench.github.io/developer-guide/)
-to learn how you can contribute to ProteoBench.
+This document briefly describes how to contribute to
+[ProteoBench](https://github.com/proteobench/proteobench).
+
+
+
+## Before you begin
+
+If you have an idea for a feature, use case to add or an approach for a bugfix,
+you are welcome to communicate it with the community by opening a
+thread in
+[GitHub Discussions](https://github.com/proteobench/proteobench/discussions)
+or in [GitHub Issues](https://github.com/proteobench/proteobench/issues).
+
+Not sure where to start? Great contributions to
+[ProteoBench](https://github.com/proteobench/proteobench) include:
+
+[TODO]
+
+Also check out the [open issues](https://github.com/proteobench/proteobench/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3A%22help+wanted%22)
+that carry the `good first issue` or `help wanted` labels.
+
+
+## Development setup
+
+### Local installation
+Install the package and dependencies with [Flit](https://flit.pypa.io/en/stable/):
+
+```
+pip install flit
+flit install -s
+```
+
+Using a virtual environment is recommended.
+
+
+### Unit tests
+
+To run the tests run the command:
+
+> We use pytest which also supports unittest if you prefer that.
+
+```
+pytest
+```
+
+
+### Testing the web interface locally
+
+Start the web server locally with:
+
+```
+cd ./webinterface/
+streamlit run Home.py
+```
+
+This will launch the Proteobench application in your web browser.
+
+
+Changes to the code in `./webinterface` will trigger a reload of the web server.
+However, changes in `./proteobench` require a full restart of the webserver
+to be included.
+
+
+### Documentation
+
+To work on the documentation and get a live preview, install the requirements
+and run `sphinx-autobuild`:
+
+```
+flit install -s
+sphinx-autobuild  --watch ./proteobench ./docs/ ./docs/_build/html/
+```
+
+Then browse to http://localhost:8000 to watch the live preview.
+
+
+## How to contribute
+
+- Fork [ProteoBench](https://github.com/proteobench/proteobench) on GitHub to
+  make your changes.
+- Commit and push your changes to your
+  [fork](https://help.github.com/articles/pushing-to-a-remote/).
+- Ensure that the tests and documentation (both Python docstrings and files in
+  `/docs/source/`) have been updated according to your changes. Python
+  docstrings are formatted in the
+  [numpydoc style](https://numpydoc.readthedocs.io/en/latest/format.html).
+- Open a
+  [pull request](https://help.github.com/articles/creating-a-pull-request/)
+  with these changes. You pull request message ideally should include:
+
+    - A description of why the changes should be made.
+    - A description of the implementation of the changes.
+    - A description of how to test the changes.
+
+- The pull request should pass all the continuous integration tests which are
+  automatically run by
+  [GitHub Actions](https://github.com/proteobench/proteobench/actions).
+
+
+
+## Release workflow
+
+- When a new version is ready to be published:
+
+    1. Change the `__version__` in `proteobench/__init__.py` following
+       [semantic versioning](https://semver.org/).
+    2. Update the changelog (if not already done) in `CHANGELOG.md` according to
+       [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+    3. Merge all final changes with the `main` branch.
+    4. Create a new release on GitHub.
+
+- When a new GitHub release is made, the `Publish` GitHub Action is automatically
+  triggered to build the Python package and publish it to PyPI. Upon a new PyPI release,
+  the Bioconda automations will automatically update the Bioconda package. However,
+  if dependencies are changed, the conda recipe will have to be updated accordingly.
diff --git a/proteobench/io/params/__init__.py b/proteobench/io/params/__init__.py
@@ -0,0 +1,78 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+# Reference for parameter names
+# https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml
+@dataclass
+class ProteoBenchParameters:
+    """
+    Parameters for a proteomics search engine.
+
+    Attributes
+    ----------
+    software_name : Optional[str]
+        Name of the software tool / pipeline used for this benchmark run
+        (examples: "MaxQuant", "AlphaPept", "Proline", ...).
+    software_version : Optional[str]
+        Version of the software tool / pipeline used for this benchmark run
+    search_engine: Optional[str]
+        Search engine used for this benchmark run
+        (examples: "Andromeda", "Mascot", ...).
+    search_engine_version : Optional[str]
+        Version of the search engine used for this benchmark run.
+    ident_fdr_psm : Optional[str]
+        False discovery rate (FDR) threshold for peptide-spectrum match
+        (PSM) validation ("0.01" = 1%).
+    ident_fdr_peptide : Optional[str]
+        False discovery rate (FDR) threshold for peptide validation ("0.01" = 1%).
+    ident_fdr_protein : Optional[str]
+        False discovery rate (FDR) threshold for protein validation ("0.01" = 1%).
+    enable_match_between_runs : Optional[bool]
+        Match between run (also named cross assignment) is enabled.
+    precursor_mass_tolerance : Optional[str]
+       Precursor mass tolerance used for the search,
+       associated with the unit: "20 ppm" = +/- 20 ppm; if several, separate with "|".
+    fragment_mass_tolerance : Optional[str]
+        Precursor mass tolerance used for the search:
+        "20 ppm" = +/- 20 ppm; if several, separate with "|"
+    enzyme : Optional[str]
+        Enzyme used as parameter for the search. If several, use "|".
+    allowed_miscleavages : Optional[int]
+        Maximal number of missed cleavages allowed.
+    min_peptide_length : Optional[str]
+        Minimum peptide length (number of residues) allowed for the search.
+    max_peptide_length : Optional[str]
+        Maximum peptide length (number of residues) allowed for the search.
+    fixed_mods : Optional[str]
+        Fixed modifications searched for in the search. If several, separate with "|".
+    variable_mods : Optional[str]
+        Variable modifications searched for in the search. If several, separate with "|".
+    max_mods : Optional[int]
+        Maximal number of modifications per peptide
+        (including fixed and variable modifications).
+    min_precursor_charge : Optional[int]
+        Minimum precursor charge allowed.
+    max_precursor_charge : Optional[int]
+        Maximum precursor charge allowed.
+    """
+
+    software_name: Optional[str] = None
+    software_version: Optional[str] = None
+    search_engine: Optional[str] = None
+    search_engine_version: Optional[str] = None
+    ident_fdr_psm: Optional[str] = None  # fdr_psm
+    ident_fdr_peptide: Optional[str] = None
+    ident_fdr_protein: Optional[str] = None  # fdr_protein
+    enable_match_between_runs: Optional[bool] = None  # MBR
+    precursor_mass_tolerance: Optional[str] = None  # precursor_tol, precursor_tol_unit
+    fragment_mass_tolerance: Optional[str] = None  # fragment_tol, fragment_tol_unit
+    enzyme: Optional[str] = None  # enzyme_name
+    allowed_miscleavages: Optional[int] = None  # missed_cleavages
+    min_peptide_length: Optional[str] = None  # min_pep_length
+    max_peptide_length: Optional[str] = None  # max_pep_length
+    fixed_mods: Optional[str] = None  # fixed_modifications
+    variable_mods: Optional[str] = None  # variable_modifications
+    max_mods: Optional[int] = None  # max_num_modifications
+    min_precursor_charge: Optional[int] = None  # precursor_charge
+    max_precursor_charge: Optional[int] = None
diff --git a/proteobench/io/params/fragger.py b/proteobench/io/params/fragger.py
@@ -0,0 +1,60 @@
+"""Functionality to parse MSFragger fragger.params parameter files.
+
+MSFragger has a text based paramter file format which 
+separates paramters and their value using an equal sign. Optional comments are 
+expressed with a hash sign.
+"""
+from __future__ import annotations
+
+import logging
+from collections import namedtuple
+
+logger = logging.getLogger(__name__)
+
+Parameter = namedtuple("Parameter", ["name", "value", "comment"])
+
+
+def read_file(file: str) -> list[Parameter]:
+    """Read MSFragger parameter file as list of records."""
+    with open(file) as f:
+        data = []
+        for line in f:
+            line = line.strip()
+            logger.debug(line)
+            # ! logic below also allows to keep the comments as comments
+            if line.startswith("#"):
+                continue
+            if not line:
+                continue
+            if "#" in line:
+                res = line.split("#")
+                if len(res) == 1:
+                    comment = res[0]
+                    data.append(Parameter(None, None, comment.strip()))
+                    continue
+                param, comment = [x.strip() for x in res]
+            else:
+                param = line
+                comment = None
+            res = param.strip().split(" = ")
+            if len(res) == 1:
+                param = res[0].strip()
+                data.append(Parameter(param, None, comment))
+                continue
+            param, value = [x.strip() for x in res]
+            data.append(Parameter(param, value, comment))
+    return data
+
+
+if __name__ == "__main__":
+    import pathlib
+
+    import pandas as pd
+
+    file = pathlib.Path("../../../test/params/fragger.params")
+    data = read_file(file)
+    df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index(
+        Parameter._fields[0]
+    )
+    df
+    df.to_csv(file.with_suffix(".csv"))