From ecb2039d1160f0249aeb2ab5c4119d034bc621d0 Mon Sep 17 00:00:00 2001 From: mmdanziger Date: Mon, 17 Jun 2024 18:27:00 +0300 Subject: [PATCH] Add github actions for tests and pre-commit (#1) * Create python-package.yml Add first github action Signed-off-by: mmdanziger * Update python-package.yml Signed-off-by: mmdanziger * add defult for input file * add click to installments * add creation of tasks to workflow needed for tests * update command input name for reactome * update total number of tasks in test * update total tasks number * add pre commit * update pre commit file path * change file location * run pre commit on all files * split pre commit into seperate workflow * rename pre commit file * reformat files from pre commit * change version * change version * add whitespace * update pre-commit and run --------- Signed-off-by: mmdanziger Co-authored-by: edenjenzohar --- .github/workflows/pre-commit-check.yml | 28 ++++++++++++ .github/workflows/python-package.yml | 43 ++++++++++++++++++ .pre-commit-config.yaml | 48 +++++--------------- gene_benchmark/tests/test_encoder.py | 16 ++----- gene_benchmark/tests/test_tasks.py | 2 +- pyproject.toml | 1 + scripts/tasks_retrival/HPA_tasks_creation.py | 6 +-- 7 files changed, 90 insertions(+), 54 deletions(-) create mode 100644 .github/workflows/pre-commit-check.yml create mode 100644 .github/workflows/python-package.yml diff --git a/.github/workflows/pre-commit-check.yml b/.github/workflows/pre-commit-check.yml new file mode 100644 index 0000000..4b88c44 --- /dev/null +++ b/.github/workflows/pre-commit-check.yml @@ -0,0 +1,28 @@ + +name: Pre-commit + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install pre-commit + run: pip install pre-commit + - name: Run pre-commit + run: pre-commit run --all-files diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..0e0bd24 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,43 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install packagge + run: | + python -m pip install --upgrade pip pytest pytest-cov + python -m pip install -e . + + - name: Create tasks needed for testing + run: | + python scripts/tasks_retrival/gene2gene_task_creation.py --allow-downloads True + python scripts/tasks_retrival/Genecorpus_tasks_creation.py --allow-downloads True + python scripts/tasks_retrival/HLA_task_creation.py --allow-downloads True + python scripts/tasks_retrival/HPA_tasks_creation.py --allow-downloads True + python scripts/tasks_retrival/humantfs_task_creation.py --allow-downloads True + python scripts/tasks_retrival/Reactome_tasks_creation.py --use-local-files False + + - name: Test with pytest + run: | + python -m pytest -v --durations=40 --cov=gene_benchmark--cov-fail-under=70 gene_benchmark diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a73694..f632448 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,35 +1,3 @@ -################################### -# GETTING STARTED WITH PRE-COMMIT # -################################### -# To start using this pre-commit file instapp pre-commit: -# pip install pre-commit -# Install hooks: -# pre-commit install -# Note: Multiple changes to the same file by a hook will block git commits. -# Therefore hooks should be added one at a time. -# Apply to all files without commiting: -# pre-commit run --all-files -# For more info, see https://pre-commit.com/ - -########## -# UPDATE # -########## -# Update this file (will change version numbers of tools, which could lead to -# simultaneous changes that require manual resolution): -# pre-commit autoupdate - -############# -# CUSTOMIZE # -############# -# To customize per-hook behavior you can use: -# 1) the `args` option to pass cli args to the tool -# 2) per-tool config files such as mypy.ini, .pylintrc etc in repo root -# 3) the shared config file pyproject.toml in repo root - -############# -# UNINSTALL # -############# -# pre-commit uninstall repos: # default hooks provided by the pre-commit project - repo: https://github.com/pre-commit/pre-commit-hooks @@ -48,14 +16,20 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - id: mixed-line-ending - + # mypy: type checking - caution this can be hard to satisfy also slow + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v0.910-1 + # hooks: + # - id: mypy - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.4.4 + rev: v0.4.9 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] types_or: [ python, pyi, jupyter ] - # Run the formatter. - - id: ruff-format - types_or: [ python, pyi, jupyter ] + # black: automatic pep8 compliance code formatting + - repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black diff --git a/gene_benchmark/tests/test_encoder.py b/gene_benchmark/tests/test_encoder.py index 1679102..c6169bd 100644 --- a/gene_benchmark/tests/test_encoder.py +++ b/gene_benchmark/tests/test_encoder.py @@ -236,11 +236,9 @@ def test_sentenceTransformerEncoder_series(self): encoder = SentenceTransformerEncoder(mpnet_name) txt1 = "It was originally taken from a Latin text written by a Roman Scholar, Sceptic and Philosopher\ by the name of Marcus Tullius Cicero, who influenced the Latin language greatly." - txt2 = ( - "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ + txt2 = "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ Latin word. It is suggested that the reason that the text starts with Lorem is because there was a\ page break spanning the word" - ) txt3 = "Do-lorem. If you a re looking for a translation of the text, it's meaningless. The original text\ talks about the pain and love involved in the pursuit of pleasure or something like that." txt4 = "The reason we use Lorem Ipsum is simple. If we used real text, it would possibly distract from\ @@ -255,11 +253,9 @@ def test_sentenceTransformerEncoder_series_with_none(self): encoder = SentenceTransformerEncoder(mpnet_name) txt1 = "It was originally taken from a Latin text written by a Roman Scholar, Sceptic and Philosopher\ by the name of Marcus Tullius Cicero, who influenced the Latin language greatly." - txt2 = ( - "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ + txt2 = "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ Latin word. It is suggested that the reason that the text starts with Lorem is because there was a\ page break spanning the word" - ) txt3 = "Do-lorem. If you a re looking for a translation of the text, it's meaningless. The original text\ talks about the pain and love involved in the pursuit of pleasure or something like that." txt4 = "The reason we use Lorem Ipsum is simple. If we used real text, it would possibly distract from\ @@ -277,11 +273,9 @@ def test_entenceTransformerEncoder_df(self): encoder = SentenceTransformerEncoder(mpnet_name) txt1 = "It was originally taken from a Latin text written by a Roman Scholar, Sceptic and Philosopher\ by the name of Marcus Tullius Cicero, who influenced the Latin language greatly." - txt2 = ( - "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ + txt2 = "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ Latin word. It is suggested that the reason that the text starts with Lorem is because there was a\ page break spanning the word" - ) txt3 = "Do-lorem. If you a re looking for a translation of the text, it's meaningless. The original text\ talks about the pain and love involved in the pursuit of pleasure or something like that." txt4 = "The reason we use Lorem Ipsum is simple. If we used real text, it would possibly distract from\ @@ -300,11 +294,9 @@ def test_sentenceTransformerEncoder_pdseries(self): encoder = SentenceTransformerEncoder(mpnet_name) txt1 = "It was originally taken from a Latin text written by a Roman Scholar, Sceptic and Philosopher\ by the name of Marcus Tullius Cicero, who influenced the Latin language greatly." - txt2 = ( - "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ + txt2 = "The filler text we know today has been altered over the years (in fact Lorem isn't actually a\ Latin word. It is suggested that the reason that the text starts with Lorem is because there was a\ page break spanning the word" - ) txt3 = "Do-lorem. If you a re looking for a translation of the text, it's meaningless. The original text\ talks about the pain and love involved in the pursuit of pleasure or something like that." txt4 = "The reason we use Lorem Ipsum is simple. If we used real text, it would possibly distract from\ diff --git a/gene_benchmark/tests/test_tasks.py b/gene_benchmark/tests/test_tasks.py index e818384..73f5ad8 100644 --- a/gene_benchmark/tests/test_tasks.py +++ b/gene_benchmark/tests/test_tasks.py @@ -329,7 +329,7 @@ def test_list_subtests(self): def test_get_task_names(self): tasks_folder = _get_tasks_folder() names = list(get_tasks_definition_names(tasks_folder)) - assert len(names) >= 81 + assert len(names) >= 70 assert "RNA cancer distribution" in names assert "bivalent vs non-methylated" in names diff --git a/pyproject.toml b/pyproject.toml index 794d470..922d9dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "mygene", "sentence_transformers", "scikit-learn", +"click", ] [project.optional-dependencies] diff --git a/scripts/tasks_retrival/HPA_tasks_creation.py b/scripts/tasks_retrival/HPA_tasks_creation.py index 9d29ae3..8523e0e 100644 --- a/scripts/tasks_retrival/HPA_tasks_creation.py +++ b/scripts/tasks_retrival/HPA_tasks_creation.py @@ -93,7 +93,7 @@ def save_task_to_dir(main_task_directory, task_name, entities, outcomes): "--columns-to-use-yaml", type=click.STRING, help="A path to a yaml file containing the column names to be used as tasks", - default="scripts/hpa_column_names_for_tasks.yaml", + default="scripts/tasks_retrival/hpa_column_names_for_tasks.yaml", ) @click.option( "--main-task-directory", @@ -110,9 +110,7 @@ def save_task_to_dir(main_task_directory, task_name, entities, outcomes): default=False, ) @click.option( - "--input-file", - type=click.STRING, - help="The path to the data file", + "--input-file", type=click.STRING, help="The path to the data file", default=None ) def main(columns_to_use_yaml, main_task_directory, allow_downloads, input_file): if allow_downloads: