diff --git a/.github/workflows/macos_test_cases.yml b/.github/workflows/macos_test_cases.yml deleted file mode 100644 index bbeab6a..0000000 --- a/.github/workflows/macos_test_cases.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: spare_scores test cases on macos - -# workflow dispatch has been added for testing purposes -on: [push, pull_request, workflow_dispatch] - -jobs: - build: - runs-on: ["macos-13"] - - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.8' - - name: Set-up miniconda for macos and ubuntu - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: 3.8 - miniconda-version: "latest" - - name: Create conda env - run: conda create -n spare python=3.8 - - name: Install pip - run: conda run -n spare conda install pip - - name: Install spare scores - run: conda run -n spare pip install spare_scores - - name: Download dependencies - run: pip install setuptools && pip install . - - name: Run unit tests - run: | - cd tests/unit && python -m unittest discover -s . -p "*.py" - - diff --git a/.github/workflows/macos_test_cases_p3-12.yml b/.github/workflows/macos_test_cases_p3-12.yml new file mode 100644 index 0000000..b9c42d4 --- /dev/null +++ b/.github/workflows/macos_test_cases_p3-12.yml @@ -0,0 +1,31 @@ +name: spare_scores test cases on macos for python 3.12 + +# workflow dispatch has been added for testing purposes +on: [push, pull_request, workflow_dispatch] + +jobs: + build: + runs-on: ["macos-13"] + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Set-up miniconda for macos and ubuntu + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.12 + miniconda-version: "latest" + - name: Create conda env + run: conda create -n spare python=3.12 + - name: Install pip + run: conda run -n spare conda install pip + - name: Install spare scores + run: | + pip install setuptools twine wheel + python -m pip install . + - name: Run unit tests + run: | + cd tests/unit && python -m unittest discover -s . -p "*.py" diff --git a/.github/workflows/macos_test_cases_p3-8.yml b/.github/workflows/macos_test_cases_p3-8.yml new file mode 100644 index 0000000..7c91627 --- /dev/null +++ b/.github/workflows/macos_test_cases_p3-8.yml @@ -0,0 +1,42 @@ +name: spare_scores test cases on macos for python 3.8 + +# workflow dispatch has been added for testing purposes +on: [push, pull_request, workflow_dispatch] + +jobs: + build: + runs-on: ["macos-13"] + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Set-up miniconda for macos and ubuntu + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.8 + miniconda-version: "latest" + - name: Create conda env + run: conda create -n spare python=3.8 + - name: Install pip + run: conda run -n spare conda install pip + - name: Install spare scores + run: | + pip install setuptools twine wheel + python setup.py bdist_wheel + cd dist + WHEEL_FILE=$(ls spare_scores*) + pip install "$WHEEL_FILE" + - name: Download dependencies + run: pip install setuptools && pip install . + - name: Generate Coverage Report + run: | + pip install pytest-cov + cd tests/unit && pytest --cov=../../ --cov-report=xml + - name: Upload Coverage to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + slug: CBICA/spare_score diff --git a/.github/workflows/sphinx-docs.yml b/.github/workflows/sphinx-docs.yml index 9fc8d6a..79d8139 100644 --- a/.github/workflows/sphinx-docs.yml +++ b/.github/workflows/sphinx-docs.yml @@ -2,7 +2,7 @@ name: Deploy static cntent to Pages on: push: branches: ["main"] - + jobs: build-docs: runs-on: ubuntu-latest @@ -10,10 +10,10 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: "3.12" - name: Activate conda run: | - conda create -n spare python=3.8 + conda create -n spare python=3.12 conda run -n spare conda install pip conda run -n spare pip install spare_scores - name: Install dependencies @@ -29,30 +29,29 @@ jobs: - name: Upload pages artifact uses: actions/upload-pages-artifact@v3 - + with: path: docs/_build/html retention-days: 90 - + deploy-docs: if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} needs: build-docs - + permissions: pages: write id-token: write - + environment: name: github-pages url: ${{ steps.deployment.output.page_url }} - - concurrency: + + concurrency: group: "pages" cancel-in-progress: true - + runs-on: ubuntu-latest steps: - - name: Deploy artifact to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 - + - name: Deploy artifact to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/ubuntu_test_cases.yml b/.github/workflows/ubuntu_test_cases.yml deleted file mode 100644 index 7ae22ea..0000000 --- a/.github/workflows/ubuntu_test_cases.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: spare_scores test cases on ubuntu - -# workflow dispatch has been added for testing purposes -on: [push, pull_request, workflow_dispatch] - -jobs: - build: - runs-on: ["ubuntu-latest"] - - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.8' - - name: Set-up miniconda for macos and ubuntu - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: 3.8 - miniconda-version: "latest" - - name: Create conda env - run: conda create -n spare python=3.8 - - name: Install pip - run: conda run -n spare conda install pip - - name: Install spare scores - run: conda run -n spare pip install spare_scores - - name: Download dependencies - run: pip install setuptools && pip install . - - name: Run unit tests - run: | - cd tests/unit && python -m unittest discover -s . -p "*.py" - - name: Generate Coverage Report - run: | - pip install pytest-cov - cd tests/unit && pytest --cov=../../ --cov-report=xml - - name: Upload Coverage to Codecov - uses: codecov/codecov-action@v4.0.1 - with: - token: ${{ secrets.CODECOV_TOKEN }} - slug: CBICA/spare_score - - diff --git a/.github/workflows/ubuntu_test_cases_p3-12.yml b/.github/workflows/ubuntu_test_cases_p3-12.yml new file mode 100644 index 0000000..8d71e56 --- /dev/null +++ b/.github/workflows/ubuntu_test_cases_p3-12.yml @@ -0,0 +1,33 @@ +name: spare_scores test cases on ubuntu for python 3.12 + +# workflow dispatch has been added for testing purposes +on: [push, pull_request, workflow_dispatch] + +jobs: + build: + runs-on: ["ubuntu-latest"] + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Set-up miniconda for macos and ubuntu + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.12 + miniconda-version: "latest" + - name: Create conda env + run: conda create -n spare python=3.12 + - name: Install pip + run: conda run -n spare conda install pip + - name: Install spare scores + run: | + pip install setuptools twine wheel + python -m pip install . + - name: Download dependencies + run: pip install setuptools && pip install . + - name: Run unit tests + run: | + cd tests/unit && python -m unittest discover -s . -p "*.py" diff --git a/.github/workflows/ubuntu_test_cases_p3-8.yml b/.github/workflows/ubuntu_test_cases_p3-8.yml new file mode 100644 index 0000000..552323c --- /dev/null +++ b/.github/workflows/ubuntu_test_cases_p3-8.yml @@ -0,0 +1,36 @@ +name: spare_scores test cases on ubuntu for python 3.8 + +# workflow dispatch has been added for testing purposes +on: [push, pull_request, workflow_dispatch] + +jobs: + build: + runs-on: ["ubuntu-latest"] + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Set-up miniconda for macos and ubuntu + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.8 + miniconda-version: "latest" + - name: Create conda env + run: conda create -n spare python=3.8 + - name: Install pip + run: conda run -n spare conda install pip + - name: Install spare scores + run: | + pip install setuptools twine wheel + python setup.py bdist_wheel + cd dist + WHEEL_FILE=$(ls spare_scores*) + pip install "$WHEEL_FILE" + - name: Download dependencies + run: pip install setuptools && pip install . + - name: Run unit tests + run: | + cd tests/unit && python -m unittest discover -s . -p "*.py" diff --git a/README.md b/README.md index fb21cdf..f90c50a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +[![codecov](https://codecov.io/gh/CBICA/spare_score/graph/badge.svg?token=7yk7pkydHE)](https://codecov.io/gh/CBICA/spare_score) # Compute SPARE Scores for Your Case @@ -11,10 +12,13 @@ For detailed documentation, please see here: **[spare_scores](https://cbica.gith ## Installation +You can install the spare_score package for python 3.8 up to python 3.12 +Please open an issue if you find any bugs for the newer versions of spare_score + ### Conda environment using pip ```bash - conda create -n spare python=3.8 + conda create -n spare python=3.8 # (up to python=3.12) conda activate spare conda install pip pip install spare_scores @@ -28,61 +32,66 @@ For detailed documentation, please see here: **[spare_scores](https://cbica.gith pip install spare_scores ``` -### Conda environment from Github repository +### Manually build spare_score ```bash git clone https://github.com/CBICA/spare_score.git cd spare_score - pip install . + python -m pip install . # for python 3.12 + + # for python 3.8... + # python setup.py bdist_wheel + # cd dist && pip install "$The .wh file" + ``` ## Usage ```text -spare_scores v1.0.0. +spare_scores v1.2.1. SPARE model training & scores calculation required arguments: [ACTION] The action to be performed, either 'train' or 'test' [-a, --action] - [INPUT] The dataset to be used for training / testing. Can be + [INPUT] The dataset to be used for training / testing. Can be [-i, --input] a filepath string of a .csv file. - + optional arguments: - [OUTPUT] The filename for the model (as a .pkl.gz) to be saved - [-o, --output] at, if training. If testing, the filepath of the - resulting SPARE score dataframe (as a .csv file) to be + [OUTPUT] The filename for the model (as a .pkl.gz) to be saved + [-o, --output] at, if training. If testing, the filepath of the + resulting SPARE score dataframe (as a .csv file) to be saved. If not given, nothing will be saved. - [MODEL] The model to be used (only) for testing. Can be a + [MODEL] The model to be used (only) for testing. Can be a [-m, --model, filepath string of a .pkl.gz file. Required for testing --model_file] - [KEY_VAR] The key variable to be used for training. This could - [-kv, be a string of a column name that can uniquely - --key_var, identify a row of the dataset. - --identifier] For example (if a row_ID doesn't exist), it could be: + [KEY_VAR] The key variable to be used for training. This could + [-kv, be a string of a column name that can uniquely + --key_var, identify a row of the dataset. + --identifier] For example (if a row_ID doesn't exist), it could be: --key_var PTID - If not given, the first column of the dataset is + If not given, the first column of the dataset is considered the primary key of the dataset. Required for training. [DATA_VARS] The list of predictors to be used for training. List. [-dv, If not given, training will assume that all (apart from - --data_vars, the key variables) variables will be used as + --data_vars, the key variables) variables will be used as --predictors] predictors, with the ignore variables ignored. [IGNORE_VARS] The list of predictors to be ignored for training. Can - [-iv, be a list, or empty. + [-iv, be a list, or empty. --ignore_vars, - --ignore] + --ignore] [TARGET] The characteristic to be predicted in the course of the - [-t, training. String of the name of the column. Required + [-t, training. String of the name of the column. Required --target, for training. --to_predict] - [POS_GROUP] Group to assign a positive SPARE score (only for + [POS_GROUP] Group to assign a positive SPARE score (only for -pg, classification). String. Required for training. --pos_group] @@ -90,17 +99,17 @@ optional arguments: [-mt, 'SVM' or 'MLP'. Required for training. --model_type] - [KERNEL] The kernel for SVM training. 'linear' or 'rbf' (only + [KERNEL] The kernel for SVM training. 'linear' or 'rbf' (only -k, linear is supported currently in regression). --kernel] - [SPARE_VAR] The name of the column to be used for SPARE score. If + [SPARE_VAR] The name of the column to be used for SPARE score. If [-sv, not given, the column will be named 'SPARE_score'. --spare_var] [VERBOSE] Verbosity. Int. [-v, 0: Warnings - --verbose, 1: Info + --verbose, 1: Info --verbosity] 2: Debug 3: Errors 4: Critical @@ -109,8 +118,8 @@ optional arguments: [-l, printed out. --logs] - [VERSION] Display the version of the package. - [-V, --version] + [VERSION] Display the version of the package. + [-V, --version] [HELP] Show this help message and exit. [-h, --help] diff --git a/codecov.yml b/codecov.yml index 657d8f7..c82f0ee 100644 --- a/codecov.yml +++ b/codecov.yml @@ -18,3 +18,9 @@ comment: layout: "reach,diff,flags,tree" behavior: default require_changes: no + +ignore: + - "merge_ROI_demo_and_test.py" + - "setup.py" + - "spare_scores/cli.py" + - "tests/conftest.py" diff --git a/dev-dependencies.txt b/dev-dependencies.txt index 044635a..d12df9f 100644 --- a/dev-dependencies.txt +++ b/dev-dependencies.txt @@ -3,24 +3,24 @@ attrs==23.1.0 certifi==2023.5.7 charset-normalizer==3.1.0 click==8.1.3 -contourpy==1.1.0 +# contourpy==1.2.1 cycler==0.11.0 exceptiongroup==1.1.1 filelock==3.12.2 fonttools==4.40.0 -frozenlist==1.3.3 -grpcio==1.51.3 +frozenlist==1.4.1 +grpcio==1.65.4 idna==3.4 importlib-resources==5.12.0 iniconfig==2.0.0 joblib==1.3.1 jsonschema==4.17.3 kiwisolver==1.4.4 -matplotlib==3.7.1 +matplotlib==3.9.0 msgpack==1.0.5 -numpy==1.26.4 +numpy==1.22.0 packaging==23.1 -pandas==2.0.3 +pandas==2.2.0 Pillow==9.5.0 pkgutil_resolve_name==1.3.10 pluggy==1.5.0 @@ -30,17 +30,18 @@ pyrsistent==0.19.3 pytest==8.2.2 python-dateutil==2.8.2 pytz==2023.3 -PyYAML==6.0 -ray==2.5.1 +PyYAML==6.0.2 +ray==2.34.0 requests==2.31.0 -scikit-learn==0.24.2 -scipy==1.8.0 +scikit-learn==1.0.1 +scipy==1.14.0 six==1.16.0 threadpoolctl==3.1.0 tomli==2.0.1 -torch==2.3.1 -typing_extensions==4.7.0 +torch==2.2.0 +torchvision==0.17.1 +typing_extensions==4.8.0 tzdata==2023.3 urllib3==2.0.3 zipp==3.15.0 -setuptools==59.8.0 +setuptools==69.2.0 diff --git a/setup.py b/setup.py index 776e1d9..e0f7596 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ "pandas", "setuptools", "scikit-learn", - "torch<2.3.1", + "torch<=2.3.1", "matplotlib", "optuna", ], diff --git a/spare_scores/classes.py b/spare_scores/classes.py index 1a11fff..f072b76 100644 --- a/spare_scores/classes.py +++ b/spare_scores/classes.py @@ -2,10 +2,11 @@ from typing import Any import pandas as pd -from data_prep import logging_basic_config -from mlp import MLPModel -from mlp_torch import MLPTorchModel -from svm import SVMModel + +from .data_prep import logging_basic_config +from .mlp import MLPModel +from .mlp_torch import MLPTorchModel +from .svm import SVMModel class SpareModel: @@ -77,7 +78,7 @@ def __init__( predictors, target, key_var, verbose, **parameters, **kwargs ) else: - logger.err(f"Model type {self.model_type} not supported.") + logger.error(f"Model type {self.model_type} not supported.") raise NotImplementedError("Only SVM is supported currently.") def set_parameters(self, **parameters: Any) -> None: diff --git a/spare_scores/cli.py b/spare_scores/cli.py index 93848c4..598d6ab 100644 --- a/spare_scores/cli.py +++ b/spare_scores/cli.py @@ -1,7 +1,8 @@ import argparse import pkg_resources # type: ignore -from spare import spare_test, spare_train + +from .spare import spare_test, spare_train VERSION = pkg_resources.require("spare_scores")[0].version diff --git a/spare_scores/data_prep.py b/spare_scores/data_prep.py index a262c70..a7e3127 100644 --- a/spare_scores/data_prep.py +++ b/spare_scores/data_prep.py @@ -6,7 +6,8 @@ import numpy as np import pandas as pd from scipy import stats -from util import convert_to_number_if_possible + +from .util import convert_to_number_if_possible def check_train( diff --git a/spare_scores/mdl/mdl_SPARE_BA_hMUSE_single.pkl.gz b/spare_scores/mdl/mdl_SPARE_BA_hMUSE_single.pkl.gz index 525c16a..dd37825 100644 Binary files a/spare_scores/mdl/mdl_SPARE_BA_hMUSE_single.pkl.gz and b/spare_scores/mdl/mdl_SPARE_BA_hMUSE_single.pkl.gz differ diff --git a/spare_scores/mlp.py b/spare_scores/mlp.py index d101563..afb233a 100644 --- a/spare_scores/mlp.py +++ b/spare_scores/mlp.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -from data_prep import logging_basic_config from sklearn import metrics from sklearn.exceptions import ConvergenceWarning from sklearn.model_selection import GridSearchCV, KFold @@ -13,6 +12,8 @@ from sklearn.preprocessing import StandardScaler from sklearn.utils._testing import ignore_warnings +from .data_prep import logging_basic_config + class MLPModel: """ diff --git a/spare_scores/mlp_torch.py b/spare_scores/mlp_torch.py index 3715432..b37a0ab 100644 --- a/spare_scores/mlp_torch.py +++ b/spare_scores/mlp_torch.py @@ -9,7 +9,6 @@ import torch import torch.nn as nn import torch.optim as optim -from data_prep import logging_basic_config from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import ( accuracy_score, @@ -29,6 +28,8 @@ from sklearn.utils._testing import ignore_warnings from torch.utils.data import DataLoader, Dataset +from .data_prep import logging_basic_config + os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" # for MPS backend device = ( "cuda" diff --git a/spare_scores/spare.py b/spare_scores/spare.py index 7caddc8..ae5dcaa 100644 --- a/spare_scores/spare.py +++ b/spare_scores/spare.py @@ -3,14 +3,21 @@ import numpy as np import pandas as pd -from classes import MetaData, SpareModel -from data_prep import ( + +from .classes import MetaData, SpareModel +from .data_prep import ( check_test, check_train, convert_cat_variables, logging_basic_config, ) -from util import check_file_exists, is_unique_identifier, load_df, load_model, save_file +from .util import ( + check_file_exists, + is_unique_identifier, + load_df, + load_model, + save_file, +) def spare_train( @@ -105,7 +112,7 @@ def spare_train( # Check if it contains any errors. try: - df, predictors, mdl_task = check_train( + df, predictors, mdl_task = check_train( # type: ignore df, predictors, to_predict, verbose, pos_group ) except Exception as e: @@ -200,9 +207,6 @@ def spare_train( if output != "" and output is not None: save_file(result, output, "train", logger) - print("###### PRINTING ########") - print(result) - print("####### END ###########") res["status"] = "OK" res["data"] = result res["status_code"] = 0 diff --git a/spare_scores/svm.py b/spare_scores/svm.py index 5734927..a3a7b30 100644 --- a/spare_scores/svm.py +++ b/spare_scores/svm.py @@ -4,12 +4,13 @@ import numpy as np import pandas as pd -from data_prep import logging_basic_config from sklearn import metrics from sklearn.model_selection import GridSearchCV, RepeatedKFold from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, LinearSVC, LinearSVR -from util import expspace + +from .data_prep import logging_basic_config +from .util import expspace class SVMModel: diff --git a/tests/test_file b/tests/test_file deleted file mode 100644 index e69de29..0000000 diff --git a/tests/unit/test_data_prep.py b/tests/unit/test_data_prep.py index 591f8fa..8f07ac0 100644 --- a/tests/unit/test_data_prep.py +++ b/tests/unit/test_data_prep.py @@ -1,21 +1,17 @@ import logging import os -import sys import unittest import pandas as pd -sys.path.append( - "../../spare_scores/" -) # check_test and check_train were imported from the build, but now they are updated -from data_prep import ( # If updates go through, it can be updated to spare_scores.data_prep +from spare_scores.data_prep import ( # If updates go through, it can be updated to spare_scores.data_prep age_sex_match, check_test, check_train, logging_basic_config, smart_unique, ) -from util import load_df +from spare_scores.util import load_df class CheckDataPrep(unittest.TestCase): diff --git a/tests/unit/test_spare_scores.py b/tests/unit/test_spare_scores.py index 46857c9..bcbf974 100644 --- a/tests/unit/test_spare_scores.py +++ b/tests/unit/test_spare_scores.py @@ -1,14 +1,28 @@ -import sys import unittest from pathlib import Path - +import numpy as np import pandas as pd +import os +from spare_scores.data_prep import check_test +from spare_scores.util import load_df, load_model +from spare_scores.mlp_torch import MLPDataset +from spare_scores.spare import spare_test, spare_train -sys.path.append("../../spare_scores") -from util import load_df, load_model - -from spare_scores import spare_test, spare_train +class CheckMLPDataset(unittest.TestCase): + def test_len(self): + # test case 1: testing length + self.X = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Y = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Dataset = MLPDataset(self.X, self.Y) + self.assertTrue(len(self.Dataset) == 8) + def test_idx(self): + # test case 2: testing getter + self.X = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Y = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Dataset = MLPDataset(self.X, self.Y) + self.assertTrue(self.Dataset[0] == (1, 1)) + self.assertTrue(self.Dataset[len(self.Dataset) - 1] == (8, 8)) class CheckSpareScores(unittest.TestCase): @@ -55,7 +69,7 @@ def test_spare_test_SVM(self): ) self.assertTrue(result == ["ROI1"]) - def test_spare_train_MLP(self): + def test_spare_train_MLP(self): self.df_fixture = load_df("../fixtures/sample_data.csv") self.model_fixture = load_model("../fixtures/sample_model.pkl.gz") # Test case 1: Testing spare_train with MLP model @@ -86,6 +100,30 @@ def test_spare_train_MLP(self): ) self.assertTrue(metadata["to_predict"] == self.model_fixture[1]["to_predict"]) + # test case 2: testing MLP regression model + result = spare_train( + self.df_fixture, + "ROI1", + model_type="MLP", + data_vars = [ + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10" + ] + ) + status, result_data = result["status"], result["data"] + metadata = result_data[1] + self.assertTrue(status == "OK") + self.assertTrue(metadata["mdl_type"] == "MLP") + self.assertTrue(metadata["kernel"] == "linear") + # self.assertTrue(metadata["to_predict"] == "to_predict") + def test_spare_train_MLPTorch(self): self.df_fixture = load_df("../fixtures/sample_data.csv") self.model_fixture = load_model("../fixtures/sample_model.pkl.gz") @@ -119,6 +157,30 @@ def test_spare_train_MLPTorch(self): ) self.assertTrue(metadata["to_predict"] == self.model_fixture[1]["to_predict"]) + # test case 2: testing MLPTorch regression model + result = spare_train( + self.df_fixture, + "ROI1", + model_type="MLPTorch", + data_vars = [ + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ] + ) + status, result_data = result["status"], result["data"] + metadata = result_data[1] + self.assertTrue(status == "OK") + self.assertTrue(metadata["mdl_type"] == "MLPTorch") + self.assertTrue(metadata["kernel"] == "linear") + # self.assertTrue(metadata["to_predict"] == "to_predict") + def test_spare_train_SVM(self): self.df_fixture = load_df("../fixtures/sample_data.csv") self.model_fixture = load_model("../fixtures/sample_model.pkl.gz") @@ -155,3 +217,195 @@ def test_spare_train_SVM(self): metadata["categorical_var_map"] == self.model_fixture[1]["categorical_var_map"] ) + + # test case 2: testing SVM regression model + result = spare_train( + self.df_fixture, + "ROI1", + data_vars = [ + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10" + ] + ) + status, result_data = result["status"], result["data"] + metadata = result_data[1] + self.assertTrue(status == "OK") + self.assertTrue(metadata["mdl_type"] == "SVM") + self.assertTrue(metadata["kernel"] == "linear") + # self.assertTrue(metadata["to_predict"] == "to_predict") + + def test_spare_train_SVM_None(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: Training with no data vars + result = spare_train( + self.df_fixture, + "Age" + ) + self.assertTrue(result is not None) + + + def test_spare_train_SVM2(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: Test overwrites + result = spare_train( + self.df_fixture, + "Age", + output="test_util.py" + ) + self.assertTrue(result["status_code"] == 2) + + # Test case 2: Train with non existing output file + result = spare_train( + self.df_fixture, + "Age", + data_vars=[ + "ROI1", + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ], + output="results" + ) + self.assertTrue(os.path.isfile("results.pkl.gz") == True) + os.remove("results.pkl.gz") + + def test_spare_train_non_existing_model(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: training with non existing model type + result = spare_train( + self.df_fixture, + "Age", + model_type="CNN", + data_vars=[ + "ROI1", + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ], + ) + self.assertTrue(result["status_code"] == 2) + + def test_spare_test_exceptions(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + self.model_fixture = load_model("../fixtures/sample_model.pkl.gz") + + # Test case 1: Test with existing output path + if(not os.path.isfile("output.csv")): + f = open("output.csv", "x") + result = spare_test(self.df_fixture, self.model_fixture, output="output") + self.assertTrue(result["status_code"] == 0) + os.remove("output.csv") + + # Test case 2: Test with predictors not existing in the original dataframe + data = { + "Var1": [x for x in range(100)], + "Var2": [x for x in range(100)], + "label": [x**2 for x in range(100)] + } + self.df_fixture = pd.DataFrame(data=data) + meta_data = { + "predictors": "Not_existing" + } + err, cols_not_found = check_test(self.df_fixture, meta_data) + self.assertTrue(len(err) != 0) + self.assertTrue(cols_not_found is not None) + + + def test_spare_train_regression_error(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: testing with non-integer like as predictor + result = spare_train( + self.df_fixture, + "ScanID", + data_vars=[ + "ROI1", + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ] + ) + + self.assertTrue(result["status_code"] == 2) + self.assertTrue(result["status"] == "Dataset check failed before training was initiated.") + + # Test case 2: testing with a too-small dataset + data = { + "Var1": [1,2,3,4,5], + "Var2": [2,4,6,8,10], + "label": [1.5,2.4,3.2,4.5,5.5] + } + self.df_fixture = pd.DataFrame(data=data) + result = spare_train( + self.df_fixture, + "label", + data_vars=[ + "Var1", + "Var2" + ] + ) + + self.assertTrue(result["status_code"] == 2) + self.assertTrue(result["status"] == "Dataset check failed before training was initiated.") + + # Test case 3: testing with a label that has to variance + data = { + "Var1": [1,2,3,4,5], + "Var2": [2,4,6,8,10], + "label": [1,1,1,1,1] + } + self.df_fixture = pd.DataFrame(data=data) + result = spare_train( + self.df_fixture, + "label", + data_vars=[ + "Var1", + "Var2" + ] + ) + self.assertTrue(result["status_code"] == 2) + self.assertTrue(result["status"] == "Dataset check failed before training was initiated.") + + # Test case 4: testing with a dataset that may be too small + data = { + "Var1": [x for x in range(80)], + "Var2": [x for x in range(80)], + "Var3": [x for x in range(80)], + "label": [x*2 for x in range(80)] + } + + self.df_fixture = pd.DataFrame(data=data) + result = spare_train( + self.df_fixture, + "label", + data_vars=[ + "Var1", + "Var2" + ] + ) + + self.assertTrue(result is not None) diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py index 2265f36..7512b3d 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_util.py @@ -1,14 +1,12 @@ import logging import os -import sys import unittest from pathlib import Path import numpy as np import pandas as pd -sys.path.append("../../spare_scores") -from util import ( +from spare_scores.util import ( add_file_extension, check_file_exists, convert_to_number_if_possible, @@ -23,11 +21,9 @@ class CheckSpareScoresUtil(unittest.TestCase): def test_load_model(self): - self.model_fixture = "../fixture/sample_model.pkl.gz" - # Test case 1: No arguments given: - no_args = "load_model() missing 1 required positional " + "argument: 'mdl_path'" + self.model_fixture = load_model("../../tests/fixtures/sample_model.pkl.gz") - # Test case 2: Load a model + # Test case 1: Load a model filepath = ( Path(__file__).resolve().parent.parent / "fixtures" / "sample_model.pkl.gz" ) @@ -121,11 +117,6 @@ def test_is_unique_identifier(self): self.df_fixture = pd.DataFrame(data=df) self.assertFalse(is_unique_identifier(self.df_fixture, ["Var1", "Var2"])) - def test_load_model(self): - # test case 1: testing opening existing model - model = load_model("../../spare_scores/mdl/mdl_SPARE_BA_hMUSE_single.pkl.gz") - self.assertFalse(model is None) - def test_load_examples(self): # test case 1: testing loading example csv file_name = "example_data.csv" @@ -133,10 +124,15 @@ def test_load_examples(self): self.assertTrue(isinstance(result, pd.DataFrame)) # test case 2: testing loading model - file_name = "mdl_SPARE_BA_hMUSE_single.pkl.gz" + file_name = "../../spare_scores/mdl/mdl_SPARE_BA_hMUSE_single.pkl.gz" result = load_examples(file_name) self.assertFalse(result is None and isinstance(result, pd.DataFrame)) + # test case 3: testing with non existant filename + file_name = "non_existant" + result = load_examples(file_name) + self.assertTrue(result is None) + def test_convert_to_number_if_possible(self): # test case 1: valid convertion to integer num = "254"