-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #101 from Proteobench/parse_settings
Parse settings
- Loading branch information
Showing
34 changed files
with
5,947 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"[python]": { | ||
"editor.formatOnSave": true, | ||
"editor.codeActionsOnSave": { | ||
"source.organizeImports": true | ||
}, | ||
"editor.rulers": [ | ||
88 | ||
], | ||
"editor.defaultFormatter": "ms-python.black-formatter" | ||
}, | ||
"python.formatting.provider": "none", | ||
"python.testing.unittestArgs": [ | ||
"-v", | ||
"-s", | ||
"./test", | ||
"-p", | ||
"test_*.py" | ||
], | ||
"python.testing.pytestEnabled": false, | ||
"python.testing.unittestEnabled": true, | ||
"flake8.args": [ | ||
"--max-line-length=120", | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,117 @@ | ||
# Contributing | ||
|
||
Check out the [developer guide](https://proteobench.github.io/developer-guide/) | ||
to learn how you can contribute to ProteoBench. | ||
This document briefly describes how to contribute to | ||
[ProteoBench](https://github.com/proteobench/proteobench). | ||
|
||
|
||
|
||
## Before you begin | ||
|
||
If you have an idea for a feature, use case to add or an approach for a bugfix, | ||
you are welcome to communicate it with the community by opening a | ||
thread in | ||
[GitHub Discussions](https://github.com/proteobench/proteobench/discussions) | ||
or in [GitHub Issues](https://github.com/proteobench/proteobench/issues). | ||
|
||
Not sure where to start? Great contributions to | ||
[ProteoBench](https://github.com/proteobench/proteobench) include: | ||
|
||
[TODO] | ||
|
||
Also check out the [open issues](https://github.com/proteobench/proteobench/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3A%22help+wanted%22) | ||
that carry the `good first issue` or `help wanted` labels. | ||
|
||
|
||
## Development setup | ||
|
||
### Local installation | ||
Install the package and dependencies with [Flit](https://flit.pypa.io/en/stable/): | ||
|
||
``` | ||
pip install flit | ||
flit install -s | ||
``` | ||
|
||
Using a virtual environment is recommended. | ||
|
||
|
||
### Unit tests | ||
|
||
To run the tests run the command: | ||
|
||
> We use pytest which also supports unittest if you prefer that. | ||
``` | ||
pytest | ||
``` | ||
|
||
|
||
### Testing the web interface locally | ||
|
||
Start the web server locally with: | ||
|
||
``` | ||
cd ./webinterface/ | ||
streamlit run Home.py | ||
``` | ||
|
||
This will launch the Proteobench application in your web browser. | ||
|
||
|
||
Changes to the code in `./webinterface` will trigger a reload of the web server. | ||
However, changes in `./proteobench` require a full restart of the webserver | ||
to be included. | ||
|
||
|
||
### Documentation | ||
|
||
To work on the documentation and get a live preview, install the requirements | ||
and run `sphinx-autobuild`: | ||
|
||
``` | ||
flit install -s | ||
sphinx-autobuild --watch ./proteobench ./docs/ ./docs/_build/html/ | ||
``` | ||
|
||
Then browse to http://localhost:8000 to watch the live preview. | ||
|
||
|
||
## How to contribute | ||
|
||
- Fork [ProteoBench](https://github.com/proteobench/proteobench) on GitHub to | ||
make your changes. | ||
- Commit and push your changes to your | ||
[fork](https://help.github.com/articles/pushing-to-a-remote/). | ||
- Ensure that the tests and documentation (both Python docstrings and files in | ||
`/docs/source/`) have been updated according to your changes. Python | ||
docstrings are formatted in the | ||
[numpydoc style](https://numpydoc.readthedocs.io/en/latest/format.html). | ||
- Open a | ||
[pull request](https://help.github.com/articles/creating-a-pull-request/) | ||
with these changes. You pull request message ideally should include: | ||
|
||
- A description of why the changes should be made. | ||
- A description of the implementation of the changes. | ||
- A description of how to test the changes. | ||
|
||
- The pull request should pass all the continuous integration tests which are | ||
automatically run by | ||
[GitHub Actions](https://github.com/proteobench/proteobench/actions). | ||
|
||
|
||
|
||
## Release workflow | ||
|
||
- When a new version is ready to be published: | ||
|
||
1. Change the `__version__` in `proteobench/__init__.py` following | ||
[semantic versioning](https://semver.org/). | ||
2. Update the changelog (if not already done) in `CHANGELOG.md` according to | ||
[Keep a Changelog](https://keepachangelog.com/en/1.0.0/). | ||
3. Merge all final changes with the `main` branch. | ||
4. Create a new release on GitHub. | ||
|
||
- When a new GitHub release is made, the `Publish` GitHub Action is automatically | ||
triggered to build the Python package and publish it to PyPI. Upon a new PyPI release, | ||
the Bioconda automations will automatically update the Bioconda package. However, | ||
if dependencies are changed, the conda recipe will have to be updated accordingly. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
|
||
|
||
# Reference for parameter names | ||
# https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml | ||
@dataclass | ||
class ProteoBenchParameters: | ||
""" | ||
Parameters for a proteomics search engine. | ||
Attributes | ||
---------- | ||
software_name : Optional[str] | ||
Name of the software tool / pipeline used for this benchmark run | ||
(examples: "MaxQuant", "AlphaPept", "Proline", ...). | ||
software_version : Optional[str] | ||
Version of the software tool / pipeline used for this benchmark run | ||
search_engine: Optional[str] | ||
Search engine used for this benchmark run | ||
(examples: "Andromeda", "Mascot", ...). | ||
search_engine_version : Optional[str] | ||
Version of the search engine used for this benchmark run. | ||
ident_fdr_psm : Optional[str] | ||
False discovery rate (FDR) threshold for peptide-spectrum match | ||
(PSM) validation ("0.01" = 1%). | ||
ident_fdr_peptide : Optional[str] | ||
False discovery rate (FDR) threshold for peptide validation ("0.01" = 1%). | ||
ident_fdr_protein : Optional[str] | ||
False discovery rate (FDR) threshold for protein validation ("0.01" = 1%). | ||
enable_match_between_runs : Optional[bool] | ||
Match between run (also named cross assignment) is enabled. | ||
precursor_mass_tolerance : Optional[str] | ||
Precursor mass tolerance used for the search, | ||
associated with the unit: "20 ppm" = +/- 20 ppm; if several, separate with "|". | ||
fragment_mass_tolerance : Optional[str] | ||
Precursor mass tolerance used for the search: | ||
"20 ppm" = +/- 20 ppm; if several, separate with "|" | ||
enzyme : Optional[str] | ||
Enzyme used as parameter for the search. If several, use "|". | ||
allowed_miscleavages : Optional[int] | ||
Maximal number of missed cleavages allowed. | ||
min_peptide_length : Optional[str] | ||
Minimum peptide length (number of residues) allowed for the search. | ||
max_peptide_length : Optional[str] | ||
Maximum peptide length (number of residues) allowed for the search. | ||
fixed_mods : Optional[str] | ||
Fixed modifications searched for in the search. If several, separate with "|". | ||
variable_mods : Optional[str] | ||
Variable modifications searched for in the search. If several, separate with "|". | ||
max_mods : Optional[int] | ||
Maximal number of modifications per peptide | ||
(including fixed and variable modifications). | ||
min_precursor_charge : Optional[int] | ||
Minimum precursor charge allowed. | ||
max_precursor_charge : Optional[int] | ||
Maximum precursor charge allowed. | ||
""" | ||
|
||
software_name: Optional[str] = None | ||
software_version: Optional[str] = None | ||
search_engine: Optional[str] = None | ||
search_engine_version: Optional[str] = None | ||
ident_fdr_psm: Optional[str] = None # fdr_psm | ||
ident_fdr_peptide: Optional[str] = None | ||
ident_fdr_protein: Optional[str] = None # fdr_protein | ||
enable_match_between_runs: Optional[bool] = None # MBR | ||
precursor_mass_tolerance: Optional[str] = None # precursor_tol, precursor_tol_unit | ||
fragment_mass_tolerance: Optional[str] = None # fragment_tol, fragment_tol_unit | ||
enzyme: Optional[str] = None # enzyme_name | ||
allowed_miscleavages: Optional[int] = None # missed_cleavages | ||
min_peptide_length: Optional[str] = None # min_pep_length | ||
max_peptide_length: Optional[str] = None # max_pep_length | ||
fixed_mods: Optional[str] = None # fixed_modifications | ||
variable_mods: Optional[str] = None # variable_modifications | ||
max_mods: Optional[int] = None # max_num_modifications | ||
min_precursor_charge: Optional[int] = None # precursor_charge | ||
max_precursor_charge: Optional[int] = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""Functionality to parse MSFragger fragger.params parameter files. | ||
MSFragger has a text based paramter file format which | ||
separates paramters and their value using an equal sign. Optional comments are | ||
expressed with a hash sign. | ||
""" | ||
from __future__ import annotations | ||
|
||
import logging | ||
from collections import namedtuple | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
Parameter = namedtuple("Parameter", ["name", "value", "comment"]) | ||
|
||
|
||
def read_file(file: str) -> list[Parameter]: | ||
"""Read MSFragger parameter file as list of records.""" | ||
with open(file) as f: | ||
data = [] | ||
for line in f: | ||
line = line.strip() | ||
logger.debug(line) | ||
# ! logic below also allows to keep the comments as comments | ||
if line.startswith("#"): | ||
continue | ||
if not line: | ||
continue | ||
if "#" in line: | ||
res = line.split("#") | ||
if len(res) == 1: | ||
comment = res[0] | ||
data.append(Parameter(None, None, comment.strip())) | ||
continue | ||
param, comment = [x.strip() for x in res] | ||
else: | ||
param = line | ||
comment = None | ||
res = param.strip().split(" = ") | ||
if len(res) == 1: | ||
param = res[0].strip() | ||
data.append(Parameter(param, None, comment)) | ||
continue | ||
param, value = [x.strip() for x in res] | ||
data.append(Parameter(param, value, comment)) | ||
return data | ||
|
||
|
||
if __name__ == "__main__": | ||
import pathlib | ||
|
||
import pandas as pd | ||
|
||
file = pathlib.Path("../../../test/params/fragger.params") | ||
data = read_file(file) | ||
df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index( | ||
Parameter._fields[0] | ||
) | ||
df | ||
df.to_csv(file.with_suffix(".csv")) |
Oops, something went wrong.