Merge pull request #88 from flatironinstitute/dev

merge dev to main
flatironinstitute · Aug 15, 2024 · cb4b812 · cb4b812
2 parents f80dc6a + efb77bc
commit cb4b812
Show file tree

Hide file tree

Showing 51 changed files with 756 additions and 477 deletions.
diff --git a/.github/workflows/main_merge_check.yml b/.github/workflows/main_merge_check.yml
@@ -11,4 +11,4 @@ jobs:
         if: github.base_ref == 'main' && github.head_ref != 'dev'
         run: |
           echo "ERROR: You can only merge to main from dev."
-          exit 1
+          exit 1
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -0,0 +1,12 @@
+# Runs the Ruff linter and formatter.
+
+name: Lint
+
+on: [push]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: chartboost/ruff-action@v1
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -26,30 +26,12 @@ jobs:
           python-version: ${{ matrix.python-version }}
           cache: 'pip' # caching pip dependencies
 
-      - name: Cache test data
-        id: cache_test_data
-        uses: actions/cache@v3
-        with:
-          path: |
-            tests/data
-            data
-          key: venv-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('**/tests/scripts/fetch_test_data.sh') }}
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install .
           pip install pytest omegaconf
-          
-      - name: Get test data from OSF
-        if: ${{ steps.cache_test_data.outputs.cache-hit != 'true' }}
-        run: |
-          sh tests/scripts/fetch_test_data.sh
-          
+
       - name: Test with pytest
         run: |
-          pytest tests/test_preprocessing.py
-          pytest tests/test_svd.py
-          pytest tests/test_map_to_map.py
-          pytest tests/test_distribution_to_distribution.py
-          
+          pytest tests
diff --git a/.gitignore b/.gitignore
@@ -3,10 +3,7 @@ data/dataset_2_submissions
 data/dataset_1_submissions
 data/dataset_2_ground_truth
 
-# data for testing and resulting outputs
-tests/data/Ground_truth
-tests/data/dataset_2_submissions/
-tests/data/unprocessed_dataset_2_submissions/submission_x/
+# testing results
 tests/results/
 
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -12,8 +12,28 @@ The "-e" flag will install the package in editable mode, which means you can edi
 
 ## Things to do before pushing to GitHub
 
-In this project we use Ruff for linting, and pre-commit to make sure that the code being pushed is not broken or goes against PEP8 guidelines. When you run `git commit` the pre-commit pipeline should rune automatically. In the near future we will start using pytest and mypy to perform more checks.
+### Using pre-commit hooks for code formatting and linting
 
+When you install in developer mode with `".[dev]` you will install the [pre-commit](https://pre-commit.com/) package. To set up this package simply run
+
+```bash
+pre-commit install
+```
+
+Then, everytime before doing a commit (that is before `git add` and `git commit`) run the following command:
+
+```bash
+pre-commit run --all-files
+```
+
+This will run `ruff` linting and formatting. If there is anything that cannot be automatically fixed, the command will let you know the file and line that needs to be fixed before being able to commit. Once you have fixed everything, you will be able to run `git add` and `git commit` without issue.
+
+
+### Make sure tests run
+
+```bash
+python -m pytest tests/
+```
 
 ## Best practices for contributing
 

diff --git a/config_files/config_distribution_to_distribution.yaml b/config_files/config_distribution_to_distribution.yaml
@@ -12,4 +12,4 @@ cvxpy_solver: ECOS
 optimal_q_kl:
   n_iter: 100000
   break_atol: 0.0001
-output_fname: results/distribution_to_distribution_submission_0.pkl
+output_fname: results/distribution_to_distribution_submission_0.pkl
diff --git a/...es/config_map_to_map_distance_matrix.yaml → config_files/config_map_to_map.yaml b/...es/config_map_to_map_distance_matrix.yaml → config_files/config_map_to_map.yaml
@@ -1,15 +1,15 @@
 data:
   n_pix: 224
-  psize: 2.146 
+  psize: 2.146
   submission:
     fname: data/dataset_2_ground_truth/submission_0.pt
     volume_key: volumes
     metadata_key: populations
     label_key: id
   ground_truth:
-    volumes: data/dataset_2_ground_truth/maps_gt_flat.pt 
-    metadata: data/dataset_2_ground_truth/metadata.csv 
-  mask: 
+    volumes: data/dataset_2_ground_truth/maps_gt_flat.pt
+    metadata: data/dataset_2_ground_truth/metadata.csv
+  mask:
     do: true
     volume: data/dataset_2_ground_truth/mask_dilated_wide_224x224.mrc
 analysis:
@@ -18,9 +18,10 @@ analysis:
     - corr
     - bioem
     - fsc
+    - res
   chunk_size_submission: 80
   chunk_size_gt: 190
   normalize:
     do: true
     method: median_zscore
-output: results/map_to_map_distance_matrix_submission_0.pkl
+output: results/map_to_map_distance_matrix_submission_0.pkl
diff --git a/config_files/config_preproc.yaml b/config_files/config_preproc.yaml
@@ -1,5 +1,4 @@
 submission_config_file: submission_config.json
-seed_flavor_assignment: 0
 thresh_percentile: 93.0
 BOT_box_size: 32
 BOT_loss: wemd

diff --git a/src/cryo_challenge/__init__.py b/src/cryo_challenge/__init__.py
@@ -1 +1,3 @@
-from cryo_challenge.__about__ import __version__
+from cryo_challenge.__about__ import __version__
+
+__all__ = ["__version__"]
diff --git a/src/cryo_challenge/_commands/run_map2map_pipeline.py b/src/cryo_challenge/_commands/run_map2map_pipeline.py
@@ -6,7 +6,7 @@
 import os
 import yaml
 
-from .._map_to_map.map_to_map_distance_matrix import run
+from .._map_to_map.map_to_map_pipeline import run
 from ..data._validation.config_validators import validate_input_config_mtm
 
 
@@ -46,5 +46,5 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description=__doc__)
-    args = parser.parse_args()
+    # args = parser.parse_args()
     main(add_args(parser).parse_args())
diff --git a/src/cryo_challenge/_distribution_to_distribution/distribution_to_distribution.py b/src/cryo_challenge/_distribution_to_distribution/distribution_to_distribution.py
@@ -2,8 +2,6 @@
 import numpy as np
 import pickle
 from scipy.stats import rankdata
-import yaml
-import argparse
 import torch
 import ot
 
@@ -14,10 +12,12 @@
 
 
 def sort_by_transport(cost):
-    m,n = cost.shape
-    _, transport = compute_wasserstein_between_distributions_from_weights_and_cost(np.ones(m) / m, np.ones(n)/n, cost)
-    indices = np.argsort((transport * np.arange(m)[...,None]).sum(0))
-    return cost[:,indices], indices, transport
+    m, n = cost.shape
+    _, transport = compute_wasserstein_between_distributions_from_weights_and_cost(
+        np.ones(m) / m, np.ones(n) / n, cost
+    )
+    indices = np.argsort((transport * np.arange(m)[..., None]).sum(0))
+    return cost[:, indices], indices, transport
 
 
 def compute_wasserstein_between_distributions_from_weights_and_cost(
@@ -65,15 +65,14 @@ def make_assignment_matrix(cost_matrix):
 
 
 def run(config):
-
     metadata_df = pd.read_csv(config["gt_metadata_fname"])
     metadata_df.sort_values("pc1", inplace=True)
 
     with open(config["input_fname"], "rb") as f:
         data = pickle.load(f)
 
     # user_submitted_populations = np.ones(80)/80
-    user_submitted_populations = data["user_submitted_populations"]#.numpy()
+    user_submitted_populations = data["user_submitted_populations"]  # .numpy()
     id = torch.load(data["config"]["data"]["submission"]["fname"])["id"]
 
     results_dict = {}
@@ -213,5 +212,5 @@ def optimal_q_kl(n_iter, x_start, A, Window, prob_gt, break_atol):
     DistributionToDistributionResultsValidator.from_dict(results_dict)
     with open(config["output_fname"], "wb") as f:
         pickle.dump(results_dict, f)
-    
+
     return results_dict
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,4 @@ jobs: @@
             if: github.base_ref == 'main' && github.head_ref != 'dev'
             run: |
               echo "ERROR: You can only merge to main from dev."
-              exit 1
+              exit 1