[MRG] pre-commit with ruff,codespell,yamlint (#681)

* add file and lint * add ignore words * update tests * codespell commit * test should pass * fix demo flow * upate release file and documentaion for conribution * try to fix doctests * fix tests * remove tets on 3.8 * try other mlegacy option * test correctly
PythonOT · Oct 31, 2024 · 38922c0 · 38922c0
1 parent ea841c7
commit 38922c0
Show file tree

Hide file tree

Showing 175 changed files with 20,385 additions and 9,252 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -23,6 +23,15 @@ GitHub, clone, and develop on a branch. Steps:
    $ cd POT
    ```
 
+2. Install pre-commit hooks to ensure that your code is properly formatted:
+
+   ```bash
+   $ pip install pre-commit
+   $ pre-commit install
+   ```
+
+   This will install the pre-commit hooks that will run on every commit. If the hooks fail, the commit will be aborted.
+
 3. Create a ``feature`` branch to hold your development changes:
 
    ```bash
@@ -56,7 +65,7 @@ Pull Request Checklist
 We recommended that your contribution complies with the
 following rules before you submit a pull request:
 
--  Follow the PEP8 Guidelines.
+-  Follow the PEP8 Guidelines which should be handles automatically by pre-commit.
 
 -  If your pull request addresses an issue, please use the pull request title
    to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is
@@ -101,27 +110,19 @@ following rules before you submit a pull request:
 You can also check for common programming errors with the following
 tools:
 
-
--  No pyflakes warnings, check with:
+- All lint checks pass. You can run the following command to check:
 
   ```bash
-  $ pip install pyflakes
-  $ pyflakes path/to/module.py
+  $ pre-commit run --all-files
   ```
 
--  No PEP8 warnings, check with:
+  This will run the pre-commit checks on all files in the repository.
 
-  ```bash
-  $ pip install pep8
-  $ pep8 path/to/module.py
-  ```
-
--  AutoPEP8 can help you fix some of the easy redundant errors:
+- All tests pass. You can run the following command to check:
 
   ```bash
-  $ pip install autopep8
-  $ autopep8 path/to/pep8.py
-  ```
+   $ pytest --durations=20 -v test/ --doctest-modules
+  ```   
 
 Bonus points for contributions that include a performance analysis with
 a benchmark script and profiling output (please report on the mailing

diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
@@ -15,14 +15,38 @@ on:
       - '**'
 
 jobs:
+
+  Lint:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    defaults:
+      run:
+        shell: bash -l {0}
+    steps:
+
+
+    - name: Checking Out Repository
+      uses: actions/checkout@v2
+    # Install Python & Packages
+    - uses: actions/setup-python@v4
+      with:
+        python-version: "3.10"
+    - run: which python
+    - name: Lint with pre-commit
+      run: |
+        pip install pre-commit
+        pre-commit install --install-hooks
+        pre-commit run --all-files
+
   linux:
 
     runs-on: ubuntu-latest
     if: "!contains(github.event.head_commit.message, 'no ci')"
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v4
@@ -44,26 +68,6 @@ jobs:
     - name: Upload coverage reports to Codecov with GitHub Action
       uses: codecov/codecov-action@v3
 
-  pep8:
-    runs-on: ubuntu-latest
-    if: "!contains(github.event.head_commit.message, 'no pep8')"
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: "3.x"
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip setuptools
-        pip install flake8
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 examples/ ot/ test/ --count --max-line-length=127 --statistics
-
   linux-minimal-deps:
 
     runs-on: ubuntu-latest

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,51 @@
+repos:
+  # Ruff skada
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.5.2
+    hooks:
+      - id: ruff
+        name: ruff lint
+        args: ["--fix"]
+        files: ^ot/
+      - id: ruff
+        name: ruff lint preview
+        args: ["--fix", "--preview", "--select=NPY201"]
+        files: ^ot/
+      - id: ruff
+        name: ruff lint doc, tutorials, tests and examples
+        # D103: missing docstring in public function
+        # D400: docstring first line must end with period
+        args: ["--ignore=D103,D400", "--fix"]
+        files: ^docs/|^examples/^test/
+      - id: ruff-format
+        files: ^ot/|^docs/|^examples/|
+
+  # Codespell
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.6
+    hooks:
+      - id: codespell
+        additional_dependencies:
+          - tomli
+        files: ^ot/|^docs/|^examples/
+        types_or: [python, bib, rst, inc]
+        args: [
+          "--ignore-words",
+          "ignore-words.txt",
+        ]
+
+  # yamllint
+  - repo: https://github.com/adrienverge/yamllint.git
+    rev: v1.35.1
+    hooks:
+      - id: yamllint
+      # args: [--strict]
+
+# # rstcheck
+  # - repo: https://github.com/rstcheck/rstcheck.git
+  #   rev: v6.2.0
+  #   hooks:
+  #     - id: rstcheck
+  #       additional_dependencies:
+  #         - tomli
+  #       files: ^docs/source/.*\.(rst|inc)$
diff --git a/.yamllint.yml b/.yamllint.yml
@@ -0,0 +1,10 @@
+extends: default
+
+ignore: |
+  .github/workflows/*.yml
+  .circleci/config.yml
+  codecov.yml
+
+rules:
+  line-length: disable
+  document-start: disable
diff --git a/RELEASES.md b/RELEASES.md
@@ -6,6 +6,7 @@
 - Custom functions provided as parameter `line_search` to `ot.optim.generic_conditional_gradient` must now have the signature `line_search(cost, G, deltaG, Mi, cost_G, df_G, **kwargs)`, adding as input `df_G` the gradient of the regularizer evaluated at the transport plan `G`. This change aims at improving speed of solvers having quadratic polynomial functions as regularizer such as the Gromov-Wassertein loss (PR #663).
 
 #### New features
+- New linter based on pre-commit using ruff, codespell and yamllint (PR #681)
 - Added feature `mass=True` for `nx.kl_div` (PR #654)
 - Implemented Gaussian Mixture Model OT `ot.gmm` (PR #649)
 - Added feature `semirelaxed_fgw_barycenters` and generic FGW-related barycenter updates `update_barycenter_structure` and `update_barycenter_feature` (PR #659)

diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
@@ -8,10 +8,12 @@
 def setup_backends():
     if jax:
         from jax.config import config
+
         config.update("jax_enable_x64", True)
 
     if tf:
         from tensorflow.python.ops.numpy_ops import np_config
+
         np_config.enable_numpy_behavior()
 
 
@@ -36,10 +38,7 @@ def exec_bench(setup, tested_function, param_list, n_runs, warmup_runs):
             print(nx, param_list[i])
             args = inputs[i]
             results_nx = nx._bench(
-                tested_function,
-                *args,
-                n_runs=n_runs,
-                warmup_runs=warmup_runs
+                tested_function, *args, n_runs=n_runs, warmup_runs=warmup_runs
             )
             gc.collect()
             results_nx_with_param_in_key = dict()
@@ -64,10 +63,11 @@ def convert_to_html_table(results, param_name, main_title=None, comments=None):
     assert cpus_cols + gpus_cols == len(devices_names)
 
     if main_title is not None:
-        string += f'<tr><th align="center" colspan="{length}">{str(main_title)}</th></tr>\n'
+        string += (
+            f'<tr><th align="center" colspan="{length}">{str(main_title)}</th></tr>\n'
+        )
 
     for i, bitsize in enumerate(bitsizes):
-
         if i != 0:
             string += f'<tr><td colspan="{length}">&nbsp;</td></tr>\n'
 

diff --git a/benchmarks/emd.py b/benchmarks/emd.py
@@ -3,11 +3,7 @@
 
 import numpy as np
 import ot
-from .benchmark import (
-    setup_backends,
-    exec_bench,
-    convert_to_html_table
-)
+from .benchmark import setup_backends, exec_bench, convert_to_html_table
 
 
 def setup(n_samples):
@@ -31,10 +27,12 @@ def setup(n_samples):
         tested_function=lambda a, M: ot.emd(a, a, M),
         param_list=param_list,
         n_runs=n_runs,
-        warmup_runs=warmup_runs
+        warmup_runs=warmup_runs,
+    )
+    print(
+        convert_to_html_table(
+            results,
+            param_name="Sample size",
+            main_title=f"EMD - Averaged on {n_runs} runs",
+        )
     )
-    print(convert_to_html_table(
-        results,
-        param_name="Sample size",
-        main_title=f"EMD - Averaged on {n_runs} runs"
-    ))
diff --git a/benchmarks/sinkhorn_knopp.py b/benchmarks/sinkhorn_knopp.py
@@ -3,11 +3,7 @@
 
 import numpy as np
 import ot
-from .benchmark import (
-    setup_backends,
-    exec_bench,
-    convert_to_html_table
-)
+from .benchmark import setup_backends, exec_bench, convert_to_html_table
 
 
 def setup(n_samples):
@@ -33,10 +29,12 @@ def setup(n_samples):
         tested_function=lambda *args: ot.bregman.sinkhorn(*args, reg=1, stopThr=1e-7),
         param_list=param_list,
         n_runs=n_runs,
-        warmup_runs=warmup_runs
+        warmup_runs=warmup_runs,
+    )
+    print(
+        convert_to_html_table(
+            results,
+            param_name="Sample size",
+            main_title=f"Sinkhorn Knopp - Averaged on {n_runs} runs",
+        )
     )
-    print(convert_to_html_table(
-        results,
-        param_name="Sample size",
-        main_title=f"Sinkhorn Knopp - Averaged on {n_runs} runs"
-    ))
diff --git a/docs/nb_run_conv b/docs/nb_run_conv
@@ -9,18 +9,17 @@ Created on Fri Sep  1 16:43:45 2017
 @author: rflamary
 """
 
-import sys
 import json
 import glob
 import hashlib
 import subprocess
 
 import os
 
-cache_file = 'cache_nbrun'
+cache_file = "cache_nbrun"
 
-path_doc = 'source/auto_examples/'
-path_nb = '../notebooks/'
+path_doc = "source/auto_examples/"
+path_nb = "../notebooks/"
 
 
 def load_json(fname):
@@ -34,7 +33,7 @@ def load_json(fname):
 
 
 def save_json(fname, nb):
-    f = open(fname, 'w')
+    f = open(fname, "w")
     f.write(json.dumps(nb))
     f.close()
 
@@ -60,22 +59,45 @@ def to_update(fname, cache):
 
 
 def update(fname, cache):
-
-    # jupyter nbconvert --to notebook --execute mynotebook.ipynb --output targte
-    subprocess.check_call(['cp', path_doc + fname, path_nb])
-    print(' '.join(['jupyter', 'nbconvert', '--to', 'notebook', '--ExecutePreprocessor.timeout=600', '--execute', path_nb + fname, '--inplace']))
-    subprocess.check_call(['jupyter', 'nbconvert', '--to', 'notebook', '--ExecutePreprocessor.timeout=600', '--execute', path_nb + fname, '--inplace'])
+    # jupyter nbconvert --to notebook --execute mynotebook.ipynb --output target
+    subprocess.check_call(["cp", path_doc + fname, path_nb])
+    print(
+        " ".join(
+            [
+                "jupyter",
+                "nbconvert",
+                "--to",
+                "notebook",
+                "--ExecutePreprocessor.timeout=600",
+                "--execute",
+                path_nb + fname,
+                "--inplace",
+            ]
+        )
+    )
+    subprocess.check_call(
+        [
+            "jupyter",
+            "nbconvert",
+            "--to",
+            "notebook",
+            "--ExecutePreprocessor.timeout=600",
+            "--execute",
+            path_nb + fname,
+            "--inplace",
+        ]
+    )
     cache[fname] = md5(path_doc + fname)
 
 
 cache = load_json(cache_file)
 
-lst_file = glob.glob(path_doc + '*.ipynb')
+lst_file = glob.glob(path_doc + "*.ipynb")
 
 lst_file = [os.path.basename(name) for name in lst_file]
 
 for fname in lst_file:
     if to_update(fname, cache):
-        print('Updating file: {}'.format(fname))
+        print("Updating file: {}".format(fname))
         update(fname, cache)
         save_json(cache_file, cache)
diff --git a/docs/rtd/conf.py b/docs/rtd/conf.py
@@ -1,6 +1,6 @@
 from recommonmark.parser import CommonMarkParser
 
-source_parsers = {'.md': CommonMarkParser}
+source_parsers = {".md": CommonMarkParser}
 
-source_suffix = ['.md']
-master_doc = 'index'
+source_suffix = [".md"]
+master_doc = "index"