From ca6e2cdab11238b02bfaf893346cc831de069211 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Thu, 21 Sep 2023 22:31:08 +0800 Subject: [PATCH] Refactor testing cases (#189) * refactor: clear up testing cases; * refactor: refactor code in Dataset classes for models; * refactor: adjust testing workflows according to refactored test cases; * fix: turn missing_mask into torch.float; * fix: error in BTTF testing case; * feat: using pip to manage dependencies in CI testing workflow, and using conda in Daily testing workflow; --- .github/workflows/testing_ci.yml | 59 +- .github/workflows/testing_daily.yml | 60 +- docs/pypots.forecasting.rst | 24 +- pypots/classification/grud/data.py | 2 +- .../template/{dataset.py => data.py} | 0 .../template/{dataset.py => data.py} | 0 pypots/clustering/vader/data.py | 12 +- pypots/data/base.py | 16 +- pypots/data/saving.py | 11 +- .../template/{dataset.py => data.py} | 0 pypots/imputation/brits/data.py | 26 +- pypots/imputation/gpvae/data.py | 9 +- pypots/imputation/saits/data.py | 20 +- .../template/{dataset.py => data.py} | 0 tests/classification/__init__.py | 6 + tests/classification/brits.py | 106 +++ tests/classification/config.py | 21 + tests/classification/grud.py | 105 +++ tests/classification/raindrop.py | 110 +++ tests/cli/__init__.py | 6 + tests/cli/config.py | 11 + tests/cli/dev.py | 92 ++ tests/cli/doc.py | 104 +++ tests/cli/env.py | 49 ++ tests/clustering/__init__.py | 6 + tests/clustering/config.py | 22 + tests/clustering/crli.py | 103 +++ .../vader.py} | 93 +-- tests/data/__init__.py | 6 + .../lazy_loading_strategy.py} | 77 +- tests/forecasting/__init__.py | 6 + .../bttf.py} | 14 +- tests/forecasting/config.py | 23 + tests/global_test_config.py | 13 + tests/imputation/__init__.py | 6 + tests/imputation/brits.py | 104 +++ tests/imputation/config.py | 25 + tests/imputation/gpvae.py | 104 +++ tests/imputation/locf.py | 46 + tests/imputation/mrnn.py | 104 +++ tests/imputation/saits.py | 110 +++ tests/imputation/transformer.py | 113 +++ tests/imputation/usgan.py | 111 +++ tests/optim/__init__.py | 6 + tests/optim/adadelta.py | 56 ++ tests/optim/adagrad.py | 56 ++ tests/optim/adam.py | 56 ++ tests/optim/adamw.py | 56 ++ tests/optim/config.py | 19 + tests/optim/rmsprop.py | 56 ++ tests/optim/sgd.py | 56 ++ tests/test_classification.py | 256 ------ tests/test_cli.py | 189 ----- tests/test_imputation.py | 503 ----------- tests/test_optim.py | 244 ------ tests/test_training_on_multi_gpus.py | 783 ------------------ tests/utils/__init__.py | 6 + tests/{test_utils.py => utils/logging.py} | 25 +- tests/utils/random.py | 36 + 59 files changed, 2111 insertions(+), 2227 deletions(-) rename pypots/classification/template/{dataset.py => data.py} (100%) rename pypots/clustering/template/{dataset.py => data.py} (100%) rename pypots/forecasting/template/{dataset.py => data.py} (100%) rename pypots/imputation/template/{dataset.py => data.py} (100%) create mode 100644 tests/classification/__init__.py create mode 100644 tests/classification/brits.py create mode 100644 tests/classification/config.py create mode 100644 tests/classification/grud.py create mode 100644 tests/classification/raindrop.py create mode 100644 tests/cli/__init__.py create mode 100644 tests/cli/config.py create mode 100644 tests/cli/dev.py create mode 100644 tests/cli/doc.py create mode 100644 tests/cli/env.py create mode 100644 tests/clustering/__init__.py create mode 100644 tests/clustering/config.py create mode 100644 tests/clustering/crli.py rename tests/{test_clustering.py => clustering/vader.py} (51%) create mode 100644 tests/data/__init__.py rename tests/{test_data.py => data/lazy_loading_strategy.py} (56%) create mode 100644 tests/forecasting/__init__.py rename tests/{test_forecasting.py => forecasting/bttf.py} (78%) create mode 100644 tests/forecasting/config.py create mode 100644 tests/imputation/__init__.py create mode 100644 tests/imputation/brits.py create mode 100644 tests/imputation/config.py create mode 100644 tests/imputation/gpvae.py create mode 100644 tests/imputation/locf.py create mode 100644 tests/imputation/mrnn.py create mode 100644 tests/imputation/saits.py create mode 100644 tests/imputation/transformer.py create mode 100644 tests/imputation/usgan.py create mode 100644 tests/optim/__init__.py create mode 100644 tests/optim/adadelta.py create mode 100644 tests/optim/adagrad.py create mode 100644 tests/optim/adam.py create mode 100644 tests/optim/adamw.py create mode 100644 tests/optim/config.py create mode 100644 tests/optim/rmsprop.py create mode 100644 tests/optim/sgd.py delete mode 100644 tests/test_classification.py delete mode 100644 tests/test_cli.py delete mode 100644 tests/test_imputation.py delete mode 100644 tests/test_optim.py delete mode 100644 tests/test_training_on_multi_gpus.py create mode 100644 tests/utils/__init__.py rename tests/{test_utils.py => utils/logging.py} (64%) create mode 100644 tests/utils/random.py diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml index d339afe5..7e5b6780 100644 --- a/.github/workflows/testing_ci.yml +++ b/.github/workflows/testing_ci.yml @@ -15,43 +15,60 @@ jobs: runs-on: ${{ matrix.os }} defaults: run: - shell: bash -l {0} + shell: bash {0} strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macOS-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.7", "3.10"] + torch-version: ["1.13.1"] steps: - name: Check out the repo code uses: actions/checkout@v3 - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + - name: Determine the Python version + uses: haya14busa/action-cond@v1 + id: condval with: - activate-environment: pypots-test - python-version: ${{ matrix.python-version }} - environment-file: tests/environment_for_conda_test.yml - auto-activate-base: false + cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }} + # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'" + if_true: "3.7.16" + if_false: ${{ matrix.python-version }} + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ steps.condval.outputs.value }} + check-latest: true + cache: pip + cache-dependency-path: | + setup.cfg + + - name: Install PyTorch ${{ matrix.torch-version }}+cpu + # we have to install torch in advance because torch_sparse needs it for compilation, + # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details + run: | + which python + which pip + python -m pip install --upgrade pip + pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu + python -c "import torch; print('PyTorch:', torch.__version__)" + + - name: Install other dependencies + run: | + pip install pypots + pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html" + pip install -e ".[dev]" - name: Fetch the test environment details run: | which python - conda info - conda list + pip list - name: Test with pytest run: | - # run tests separately here due to Segmentation Fault in test_clustering when run all in - # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF. - python -m pytest -rA tests/test_classification.py -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_imputation.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_clustering.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_forecasting.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_optim.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_data.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_utils.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - python -m pytest -rA tests/test_cli.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + coverage run --source=pypots -m pytest -rA tests/*/* - name: Generate the LCOV report run: | @@ -61,4 +78,4 @@ jobs: uses: coverallsapp/github-action@master with: github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: 'coverage.lcov' + path-to-lcov: "coverage.lcov" diff --git a/.github/workflows/testing_daily.yml b/.github/workflows/testing_daily.yml index f0b3ba61..5e41630f 100644 --- a/.github/workflows/testing_daily.yml +++ b/.github/workflows/testing_daily.yml @@ -10,61 +10,43 @@ jobs: runs-on: ${{ matrix.os }} defaults: run: - shell: bash {0} + shell: bash -l {0} strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macOS-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] - torch-version: ["1.13.1"] + python-version: ["3.7", "3.10"] steps: - name: Check out the repo code uses: actions/checkout@v3 - - name: Determine the Python version - uses: haya14busa/action-cond@v1 - id: condval + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 with: - cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }} - # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'" - if_true: "3.7.16" - if_false: ${{ matrix.python-version }} - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: ${{ steps.condval.outputs.value }} - check-latest: true - cache: pip - cache-dependency-path: | - setup.cfg - - - name: Install PyTorch ${{ matrix.torch-version }}+cpu - # we have to install torch in advance because torch_sparse needs it for compilation, - # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details - run: | - which python - which pip - python -m pip install --upgrade pip - pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu - python -c "import torch; print('PyTorch:', torch.__version__)" - - - name: Install other dependencies - run: | - pip install pypots - pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html" - pip install -e ".[dev]" + activate-environment: pypots-test + python-version: ${{ matrix.python-version }} + environment-file: tests/environment_for_conda_test.yml + auto-activate-base: false - name: Fetch the test environment details run: | which python - pip list + conda info + conda list - name: Test with pytest run: | - coverage run --source=pypots -m pytest --ignore tests/test_training_on_multi_gpus.py - # ignore the test_training_on_multi_gpus.py because it requires multiple GPUs which are not available on GitHub Actions + # run tests separately here due to Segmentation Fault in test_clustering when run all in + # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF. + python -m pytest -rA tests/classification/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/imputation/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/clustering/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/forecasting/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/optim/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/data/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/utils/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc + python -m pytest -rA tests/cli/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc - name: Generate the LCOV report run: | @@ -74,4 +56,4 @@ jobs: uses: coverallsapp/github-action@master with: github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: "coverage.lcov" + path-to-lcov: 'coverage.lcov' diff --git a/docs/pypots.forecasting.rst b/docs/pypots.forecasting.rst index 2ae67b85..c4ac76b7 100644 --- a/docs/pypots.forecasting.rst +++ b/docs/pypots.forecasting.rst @@ -1,11 +1,31 @@ pypots.forecasting package ========================== +Subpackages +----------- -pypots.forecasting.bttf module +.. toctree:: + :maxdepth: 4 + + pypots.forecasting.bttf + pypots.forecasting.template + +Submodules +---------- + +pypots.forecasting.base module ------------------------------ -.. automodule:: pypots.forecasting.bttf +.. automodule:: pypots.forecasting.base + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + +Module contents +--------------- + +.. automodule:: pypots.forecasting :members: :undoc-members: :show-inheritance: diff --git a/pypots/classification/grud/data.py b/pypots/classification/grud/data.py index 52186017..edf1d4d0 100644 --- a/pypots/classification/grud/data.py +++ b/pypots/classification/grud/data.py @@ -123,7 +123,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable: if self.file_handle is None: self.file_handle = self._open_file_handle() - X = torch.from_numpy(self.file_handle["X"][idx]) + X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32) missing_mask = (~torch.isnan(X)).to(torch.float32) X_filledLOCF = self.locf._locf_torch(X.unsqueeze(dim=0)).squeeze() X = torch.nan_to_num(X) diff --git a/pypots/classification/template/dataset.py b/pypots/classification/template/data.py similarity index 100% rename from pypots/classification/template/dataset.py rename to pypots/classification/template/data.py diff --git a/pypots/clustering/template/dataset.py b/pypots/clustering/template/data.py similarity index 100% rename from pypots/clustering/template/dataset.py rename to pypots/clustering/template/data.py diff --git a/pypots/clustering/vader/data.py b/pypots/clustering/vader/data.py index a3b2f91d..a8910b44 100644 --- a/pypots/clustering/vader/data.py +++ b/pypots/clustering/vader/data.py @@ -6,12 +6,12 @@ # License: GLP-v3 -from typing import Union +from typing import Union, Iterable -from ..crli.data import DatasetForCRLI +from ...data.base import BaseDataset -class DatasetForVaDER(DatasetForCRLI): +class DatasetForVaDER(BaseDataset): """Dataset class for model VaDER. Parameters @@ -45,3 +45,9 @@ def __init__( file_type: str = "h5py", ): super().__init__(data, return_labels, file_type) + + def _fetch_data_from_array(self, idx: int) -> Iterable: + return super()._fetch_data_from_array(idx) + + def _fetch_data_from_file(self, idx: int) -> Iterable: + return super()._fetch_data_from_file(idx) diff --git a/pypots/data/base.py b/pypots/data/base.py index 86b15fc2..1bef9f9c 100644 --- a/pypots/data/base.py +++ b/pypots/data/base.py @@ -204,13 +204,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable: The collated data sample, a list including all necessary sample info. """ - X = self.X[idx] - missing_mask = ~torch.isnan(X) + X = self.X[idx].to(torch.float32) + missing_mask = (~torch.isnan(X)).to(torch.float32) X = torch.nan_to_num(X) sample = [ torch.tensor(idx), - X.to(torch.float32), - missing_mask.to(torch.float32), + X, + missing_mask, ] if self.y is not None and self.return_labels: @@ -279,13 +279,13 @@ def _fetch_data_from_file(self, idx: int) -> Iterable: if self.file_handle is None: self.file_handle = self._open_file_handle() - X = torch.from_numpy(self.file_handle["X"][idx]) - missing_mask = ~torch.isnan(X) + X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32) + missing_mask = (~torch.isnan(X)).to(torch.float32) X = torch.nan_to_num(X) sample = [ torch.tensor(idx), - X.to(torch.float32), - missing_mask.to(torch.float32), + X, + missing_mask, ] # if the dataset has labels and is for training, then fetch it from the file diff --git a/pypots/data/saving.py b/pypots/data/saving.py index 8581ad50..61138df2 100644 --- a/pypots/data/saving.py +++ b/pypots/data/saving.py @@ -14,7 +14,11 @@ from pypots.utils.logging import logger -def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None: +def save_dict_into_h5( + data_dict: dict, + saving_dir: str, + saving_name: str = "datasets.h5", +) -> None: """Save the given data (in a dictionary) into the given h5 file. Parameters @@ -25,6 +29,9 @@ def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None: saving_dir : str, The h5 file to save the data. + saving_name : str, optional (default="datasets.h5") + The final name of the saved h5 file. + """ def save_set(handle, name, data): @@ -36,7 +43,7 @@ def save_set(handle, name, data): handle.create_dataset(name, data=data) create_dir_if_not_exist(saving_dir) - saving_path = os.path.join(saving_dir, "datasets.h5") + saving_path = os.path.join(saving_dir, saving_name) with h5py.File(saving_path, "w") as hf: for k, v in data_dict.items(): save_set(hf, k, v) diff --git a/pypots/forecasting/template/dataset.py b/pypots/forecasting/template/data.py similarity index 100% rename from pypots/forecasting/template/dataset.py rename to pypots/forecasting/template/data.py diff --git a/pypots/imputation/brits/data.py b/pypots/imputation/brits/data.py index f39e411c..342ede98 100644 --- a/pypots/imputation/brits/data.py +++ b/pypots/imputation/brits/data.py @@ -59,14 +59,14 @@ def __init__( self.processed_data = { "forward": { - "X": forward_X, - "missing_mask": forward_missing_mask, - "delta": forward_delta, + "X": forward_X.to(torch.float32), + "missing_mask": forward_missing_mask.to(torch.float32), + "delta": forward_delta.to(torch.float32), }, "backward": { - "X": backward_X, - "missing_mask": backward_missing_mask, - "delta": backward_delta, + "X": backward_X.to(torch.float32), + "missing_mask": backward_missing_mask.to(torch.float32), + "delta": backward_delta.to(torch.float32), }, } @@ -101,13 +101,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable: sample = [ torch.tensor(idx), # for forward - self.processed_data["forward"]["X"][idx].to(torch.float32), - self.processed_data["forward"]["missing_mask"][idx].to(torch.float32), - self.processed_data["forward"]["delta"][idx].to(torch.float32), + self.processed_data["forward"]["X"][idx], + self.processed_data["forward"]["missing_mask"][idx], + self.processed_data["forward"]["delta"][idx], # for backward - self.processed_data["backward"]["X"][idx].to(torch.float32), - self.processed_data["backward"]["missing_mask"][idx].to(torch.float32), - self.processed_data["backward"]["delta"][idx].to(torch.float32), + self.processed_data["backward"]["X"][idx], + self.processed_data["backward"]["missing_mask"][idx], + self.processed_data["backward"]["delta"][idx], ] if self.y is not None and self.return_labels: @@ -133,7 +133,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable: if self.file_handle is None: self.file_handle = self._open_file_handle() - X = torch.from_numpy(self.file_handle["X"][idx]) + X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32) missing_mask = (~torch.isnan(X)).to(torch.float32) X = torch.nan_to_num(X) diff --git a/pypots/imputation/gpvae/data.py b/pypots/imputation/gpvae/data.py index 4f8b27c4..8bb9be8c 100644 --- a/pypots/imputation/gpvae/data.py +++ b/pypots/imputation/gpvae/data.py @@ -10,7 +10,6 @@ import torch from ...data.base import BaseDataset -from ...data.utils import torch_parse_delta class DatasetForGPVAE(BaseDataset): @@ -51,7 +50,7 @@ def __init__( if not isinstance(self.data, str): # calculate all delta here. missing_mask = (~torch.isnan(self.X)).type(torch.float32) - X = torch.nan_to_num(self.X) + X = torch.nan_to_num(self.X).to(torch.float32) self.processed_data = { "X": X, @@ -89,8 +88,8 @@ def _fetch_data_from_array(self, idx: int) -> Iterable: sample = [ torch.tensor(idx), # for forward - self.processed_data["X"][idx].to(torch.float32), - self.processed_data["missing_mask"][idx].to(torch.float32), + self.processed_data["X"][idx], + self.processed_data["missing_mask"][idx], ] if self.y is not None and self.return_labels: @@ -116,7 +115,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable: if self.file_handle is None: self.file_handle = self._open_file_handle() - X = torch.from_numpy(self.file_handle["X"][idx]) + X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32) missing_mask = (~torch.isnan(X)).to(torch.float32) X = torch.nan_to_num(X) diff --git a/pypots/imputation/saits/data.py b/pypots/imputation/saits/data.py index 2fb80bc3..5ff679a5 100644 --- a/pypots/imputation/saits/data.py +++ b/pypots/imputation/saits/data.py @@ -88,15 +88,15 @@ def _fetch_data_from_array(self, idx: int) -> Iterable: indicating_mask : tensor. The mask indicates artificially missing values in X. """ - X = self.X[idx] + X = self.X[idx].to(torch.float32) X_intact, X, missing_mask, indicating_mask = mcar(X, rate=self.rate) sample = [ torch.tensor(idx), - X_intact.to(torch.float32), - X.to(torch.float32), - missing_mask.to(torch.float32), - indicating_mask.to(torch.float32), + X_intact, + X, + missing_mask, + indicating_mask, ] if self.y is not None and self.return_labels: @@ -122,15 +122,15 @@ def _fetch_data_from_file(self, idx: int) -> Iterable: if self.file_handle is None: self.file_handle = self._open_file_handle() - X = torch.from_numpy(self.file_handle["X"][idx]) + X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32) X_intact, X, missing_mask, indicating_mask = mcar(X, rate=self.rate) sample = [ torch.tensor(idx), - X_intact.to(torch.float32), - X.to(torch.float32), - missing_mask.to(torch.float32), - indicating_mask.to(torch.float32), + X_intact, + X, + missing_mask, + indicating_mask, ] # if the dataset has labels and is for training, then fetch it from the file diff --git a/pypots/imputation/template/dataset.py b/pypots/imputation/template/data.py similarity index 100% rename from pypots/imputation/template/dataset.py rename to pypots/imputation/template/data.py diff --git a/tests/classification/__init__.py b/tests/classification/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/classification/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/classification/brits.py b/tests/classification/brits.py new file mode 100644 index 00000000..b1905c39 --- /dev/null +++ b/tests/classification/brits.py @@ -0,0 +1,106 @@ +""" +Test cases for BRITS classification model. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os +import unittest + +import pytest + +from pypots.classification import BRITS +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_binary_classification_metrics +from tests.classification.config import ( + EPOCHS, + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_CLASSIFICATION, +) +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) + + +class TestBRITS(unittest.TestCase): + logger.info("Running tests for a classification model BRITS...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS") + model_save_name = "saved_BRITS_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a BRITS model + brits = BRITS( + DATA["n_steps"], + DATA["n_features"], + n_classes=DATA["n_classes"], + rnn_hidden_size=256, + epochs=EPOCHS, + saving_path=saving_path, + model_saving_strategy="better", + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="classification-brits") + def test_0_fit(self): + self.brits.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="classification-brits") + def test_1_classify(self): + predictions = self.brits.classify(TEST_SET) + metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) + logger.info( + f'ROC_AUC: {metrics["roc_auc"]}, \n' + f'PR_AUC: {metrics["pr_auc"]},\n' + f'F1: {metrics["f1"]},\n' + f'Precision: {metrics["precision"]},\n' + f'Recall: {metrics["recall"]},\n' + ) + assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" + + @pytest.mark.xdist_group(name="classification-brits") + def test_2_parameters(self): + assert hasattr(self.brits, "model") and self.brits.model is not None + + assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None + + assert hasattr(self.brits, "best_loss") + self.assertNotEqual(self.brits.best_loss, float("inf")) + + assert ( + hasattr(self.brits, "best_model_dict") + and self.brits.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="classification-brits") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.brits) + + # save the trained model into file, and check if the path exists + self.brits.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.brits.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/classification/config.py b/tests/classification/config.py new file mode 100644 index 00000000..35b17029 --- /dev/null +++ b/tests/classification/config.py @@ -0,0 +1,21 @@ +""" +Test configs for classification models. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os + +from tests.global_test_config import ( + DATA, + RESULT_SAVING_DIR, +) + +EPOCHS = 5 + +TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]} +VAL_SET = {"X": DATA["val_X"], "y": DATA["val_y"]} +TEST_SET = {"X": DATA["test_X"]} + +RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification") diff --git a/tests/classification/grud.py b/tests/classification/grud.py new file mode 100644 index 00000000..a662cb70 --- /dev/null +++ b/tests/classification/grud.py @@ -0,0 +1,105 @@ +""" +Test cases for GRUD classification model. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os +import unittest + +import pytest + +from pypots.classification import GRUD +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_binary_classification_metrics +from tests.classification.config import ( + EPOCHS, + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_CLASSIFICATION, +) +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) + + +class TestGRUD(unittest.TestCase): + logger.info("Running tests for a classification model GRUD...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD") + model_save_name = "saved_GRUD_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a GRUD model + grud = GRUD( + DATA["n_steps"], + DATA["n_features"], + n_classes=DATA["n_classes"], + rnn_hidden_size=256, + epochs=EPOCHS, + saving_path=saving_path, + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="classification-grud") + def test_0_fit(self): + self.grud.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="classification-grud") + def test_1_classify(self): + predictions = self.grud.classify(TEST_SET) + metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) + logger.info( + f'ROC_AUC: {metrics["roc_auc"]}, \n' + f'PR_AUC: {metrics["pr_auc"]},\n' + f'F1: {metrics["f1"]},\n' + f'Precision: {metrics["precision"]},\n' + f'Recall: {metrics["recall"]},\n' + ) + assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" + + @pytest.mark.xdist_group(name="classification-grud") + def test_2_parameters(self): + assert hasattr(self.grud, "model") and self.grud.model is not None + + assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None + + assert hasattr(self.grud, "best_loss") + self.assertNotEqual(self.grud.best_loss, float("inf")) + + assert ( + hasattr(self.grud, "best_model_dict") + and self.grud.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="classification-grud") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.grud) + + # save the trained model into file, and check if the path exists + self.grud.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.grud.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/classification/raindrop.py b/tests/classification/raindrop.py new file mode 100644 index 00000000..277164dc --- /dev/null +++ b/tests/classification/raindrop.py @@ -0,0 +1,110 @@ +""" +Test cases for Raindrop classification model. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os +import unittest + +import pytest + +from pypots.classification import Raindrop +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_binary_classification_metrics +from tests.classification.config import ( + EPOCHS, + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_CLASSIFICATION, +) +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) + + +class TestRaindrop(unittest.TestCase): + logger.info("Running tests for a classification model Raindrop...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop") + model_save_name = "saved_Raindrop_model.pypots" + + # initialize a Raindrop model + raindrop = Raindrop( + DATA["n_steps"], + DATA["n_features"], + DATA["n_classes"], + n_layers=2, + d_model=DATA["n_features"] * 4, + d_inner=256, + n_heads=2, + dropout=0.3, + d_static=0, + aggregation="mean", + sensor_wise_mask=False, + static=False, + epochs=EPOCHS, + saving_path=saving_path, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="classification-raindrop") + def test_0_fit(self): + self.raindrop.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="classification-raindrop") + def test_1_classify(self): + predictions = self.raindrop.classify(TEST_SET) + metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) + logger.info( + f'ROC_AUC: {metrics["roc_auc"]}, \n' + f'PR_AUC: {metrics["pr_auc"]},\n' + f'F1: {metrics["f1"]},\n' + f'Precision: {metrics["precision"]},\n' + f'Recall: {metrics["recall"]},\n' + ) + assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" + + @pytest.mark.xdist_group(name="classification-raindrop") + def test_2_parameters(self): + assert hasattr(self.raindrop, "model") and self.raindrop.model is not None + + assert ( + hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None + ) + + assert hasattr(self.raindrop, "best_loss") + self.assertNotEqual(self.raindrop.best_loss, float("inf")) + + assert ( + hasattr(self.raindrop, "best_model_dict") + and self.raindrop.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="classification-raindrop") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.raindrop) + + # save the trained model into file, and check if the path exists + self.raindrop.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.raindrop.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/cli/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/cli/config.py b/tests/cli/config.py new file mode 100644 index 00000000..defdb211 --- /dev/null +++ b/tests/cli/config.py @@ -0,0 +1,11 @@ +""" +Test configs for CLI tools. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os + + +PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), "../../..")) diff --git a/tests/cli/dev.py b/tests/cli/dev.py new file mode 100644 index 00000000..4387be29 --- /dev/null +++ b/tests/cli/dev.py @@ -0,0 +1,92 @@ +""" +Test cases for the functions and classes in package `pypots.cli.dev`. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os +import threading +import unittest +from argparse import Namespace +from copy import copy + +import pytest + +from pypots.cli.dev import dev_command_factory +from tests.cli.config import PROJECT_ROOT_DIR + + +def callback_func(): + raise TimeoutError("Time out.") + + +def time_out(interval, callback): + def decorator(func): + def wrapper(*args, **kwargs): + t = threading.Thread(target=func, args=args, kwargs=kwargs) + t.setDaemon(True) + t.start() + t.join(interval) # wait for interval seconds + if t.is_alive(): + return threading.Timer(0, callback).start() # invoke callback() + else: + return + + return wrapper + + return decorator + + +@pytest.mark.xfail(reason="Allow tests for CLI to fail") +class TestPyPOTSCLIDev(unittest.TestCase): + # set up the default arguments + default_arguments = { + "build": False, + "cleanup": False, + "run_tests": False, + "k": None, + "show_coverage": False, + "lint_code": False, + } + # `pypots-cli dev` must run under the project root dir + os.chdir(PROJECT_ROOT_DIR) + + @pytest.mark.xdist_group(name="cli-dev") + def test_0_build(self): + arguments = copy(self.default_arguments) + arguments["build"] = True + args = Namespace(**arguments) + dev_command_factory(args).run() + + @pytest.mark.xdist_group(name="cli-dev") + def test_1_run_tests(self): + arguments = copy(self.default_arguments) + arguments["run_tests"] = True + arguments["k"] = "try_to_find_a_non_existing_test_case" + args = Namespace(**arguments) + try: + dev_command_factory(args).run() + except RuntimeError: # try to find a non-existing test case, so RuntimeError will be raised + pass + except Exception as e: # other exceptions will cause an error and result in failed testing + raise e + + # Don't test --lint-code because Black will reformat the code and cause error when generating the coverage report + # @pytest.mark.xdist_group(name="cli-dev") + # def test_2_lint_code(self): + # arguments = copy(self.default_arguments) + # arguments["lint_code"] = True + # args = Namespace(**arguments) + # dev_command_factory(args).run() + + @pytest.mark.xdist_group(name="cli-dev") + def test_3_cleanup(self): + arguments = copy(self.default_arguments) + arguments["cleanup"] = True + args = Namespace(**arguments) + dev_command_factory(args).run() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cli/doc.py b/tests/cli/doc.py new file mode 100644 index 00000000..85e4e190 --- /dev/null +++ b/tests/cli/doc.py @@ -0,0 +1,104 @@ +""" +Test cases for the functions and classes in package `pypots.cli.doc`. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os +import threading +import unittest +from argparse import Namespace +from copy import copy + +import pytest + +from pypots.cli.doc import doc_command_factory +from pypots.utils.logging import logger +from tests.cli.config import PROJECT_ROOT_DIR + + +def callback_func(): + raise TimeoutError("Time out.") + + +def time_out(interval, callback): + def decorator(func): + def wrapper(*args, **kwargs): + t = threading.Thread(target=func, args=args, kwargs=kwargs) + t.setDaemon(True) + t.start() + t.join(interval) # wait for interval seconds + if t.is_alive(): + return threading.Timer(0, callback).start() # invoke callback() + else: + return + + return wrapper + + return decorator + + +@pytest.mark.xfail(reason="Allow tests for CLI to fail") +class TestPyPOTSCLIDoc(unittest.TestCase): + # set up the default arguments + default_arguments = { + "gene_rst": False, + "branch": "main", + "gene_html": False, + "view_doc": False, + "port": 9075, + "cleanup": False, + } + # `pypots-cli doc` must run under the project root dir + os.chdir(PROJECT_ROOT_DIR) + + @pytest.mark.xdist_group(name="cli-doc") + def test_0_gene_rst(self): + arguments = copy(self.default_arguments) + arguments["gene_rst"] = True + args = Namespace(**arguments) + doc_command_factory(args).run() + + logger.info("run again under a non-root dir") + try: + os.chdir(os.path.abspath(os.path.join(PROJECT_ROOT_DIR, "pypots"))) + doc_command_factory(args).run() + except RuntimeError: # try to run under a non-root dir, so RuntimeError will be raised + pass + except Exception as e: # other exceptions will cause an error and result in failed testing + raise e + finally: + os.chdir(PROJECT_ROOT_DIR) + + @pytest.mark.xdist_group(name="cli-doc") + def test_1_gene_html(self): + arguments = copy(self.default_arguments) + arguments["gene_html"] = True + args = Namespace(**arguments) + try: + doc_command_factory(args).run() + except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below + logger.error(e) + + @pytest.mark.xdist_group(name="cli-doc") + @time_out(2, callback_func) # wait for two seconds + def test_2_view_doc(self): + arguments = copy(self.default_arguments) + arguments["view_doc"] = True + args = Namespace(**arguments) + try: + doc_command_factory(args).run() + except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below + logger.error(e) + + @pytest.mark.xdist_group(name="cli-doc") + def test_3_cleanup(self): + arguments = copy(self.default_arguments) + arguments["cleanup"] = True + args = Namespace(**arguments) + doc_command_factory(args).run() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cli/env.py b/tests/cli/env.py new file mode 100644 index 00000000..36b5b20e --- /dev/null +++ b/tests/cli/env.py @@ -0,0 +1,49 @@ +""" +Test cases for the functions and classes in package `pypots.cli.env`. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os +import unittest +from argparse import Namespace +from copy import copy + +import pytest + +from pypots.cli.env import env_command_factory +from pypots.utils.logging import logger +from tests.cli.config import PROJECT_ROOT_DIR + + +@pytest.mark.xfail(reason="Allow tests for CLI to fail") +class TestPyPOTSCLIEnv(unittest.TestCase): + # set up the default arguments + default_arguments = { + "install": "optional", + "tool": "conda", + } + + # `pypots-cli env` must run under the project root dir + os.chdir(PROJECT_ROOT_DIR) + + @pytest.mark.xdist_group(name="cli-env") + def test_0_install_with_conda(self): + arguments = copy(self.default_arguments) + arguments["tool"] = "conda" + args = Namespace(**arguments) + try: + env_command_factory(args).run() + except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below + logger.error(e) + + @pytest.mark.xdist_group(name="cli-env") + def test_1_install_with_pip(self): + arguments = copy(self.default_arguments) + arguments["tool"] = "pip" + args = Namespace(**arguments) + try: + env_command_factory(args).run() + except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below + logger.error(e) diff --git a/tests/clustering/__init__.py b/tests/clustering/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/clustering/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/clustering/config.py b/tests/clustering/config.py new file mode 100644 index 00000000..aa43d7dd --- /dev/null +++ b/tests/clustering/config.py @@ -0,0 +1,22 @@ +""" +Test configs for clustering models. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os + +from tests.global_test_config import ( + DATA, + RESULT_SAVING_DIR, +) + + +EPOCHS = 5 + +TRAIN_SET = {"X": DATA["train_X"]} +VAL_SET = {"X": DATA["val_X"]} +TEST_SET = {"X": DATA["test_X"]} + +RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering") diff --git a/tests/clustering/crli.py b/tests/clustering/crli.py new file mode 100644 index 00000000..923911fd --- /dev/null +++ b/tests/clustering/crli.py @@ -0,0 +1,103 @@ +""" +Test cases for CRLI clustering model. +""" + +# Created by Wenjie Du +# License: GLP-v3 + + +import os +import unittest + +import pytest + +from pypots.clustering import CRLI +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_rand_index, cal_cluster_purity +from tests.clustering.config import ( + EPOCHS, + TRAIN_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_CLUSTERING, +) +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) + + +class TestCRLI(unittest.TestCase): + logger.info("Running tests for a clustering model CRLI...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI") + model_save_name = "saved_CRLI_model.pypots" + + # initialize an Adam optimizer + G_optimizer = Adam(lr=0.001, weight_decay=1e-5) + D_optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a CRLI model + crli = CRLI( + n_steps=DATA["n_steps"], + n_features=DATA["n_features"], + n_clusters=DATA["n_classes"], + n_generator_layers=2, + rnn_hidden_size=128, + epochs=EPOCHS, + saving_path=saving_path, + G_optimizer=G_optimizer, + D_optimizer=D_optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="clustering-crli") + def test_0_fit(self): + self.crli.fit(TRAIN_SET) + + @pytest.mark.xdist_group(name="clustering-crli") + def test_1_parameters(self): + assert hasattr(self.crli, "model") and self.crli.model is not None + + assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None + assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None + + assert hasattr(self.crli, "best_loss") + self.assertNotEqual(self.crli.best_loss, float("inf")) + + assert ( + hasattr(self.crli, "best_model_dict") + and self.crli.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="clustering-crli") + def test_2_cluster(self): + clustering = self.crli.cluster(TEST_SET) + RI = cal_rand_index(clustering, DATA["test_y"]) + CP = cal_cluster_purity(clustering, DATA["test_y"]) + logger.info(f"RI: {RI}\nCP: {CP}") + + @pytest.mark.xdist_group(name="clustering-crli") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.crli) + + # save the trained model into file, and check if the path exists + self.crli.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.crli.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_clustering.py b/tests/clustering/vader.py similarity index 51% rename from tests/test_clustering.py rename to tests/clustering/vader.py index bbd4d014..71a6a91d 100644 --- a/tests/test_clustering.py +++ b/tests/clustering/vader.py @@ -1,5 +1,5 @@ """ -Test cases for clustering models. +Test cases for VaDER clustering model. """ # Created by Wenjie Du @@ -12,94 +12,22 @@ import numpy as np import pytest -from pypots.clustering import VaDER, CRLI +from pypots.clustering import VaDER from pypots.optim import Adam from pypots.utils.logging import logger from pypots.utils.metrics import cal_rand_index, cal_cluster_purity +from tests.clustering.config import ( + EPOCHS, + TRAIN_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_CLUSTERING, +) from tests.global_test_config import ( DATA, - RESULT_SAVING_DIR, + DEVICE, check_tb_and_model_checkpoints_existence, ) -EPOCHS = 5 - -TRAIN_SET = {"X": DATA["train_X"]} -VAL_SET = {"X": DATA["val_X"]} -TEST_SET = {"X": DATA["test_X"]} - -RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering") - - -class TestCRLI(unittest.TestCase): - logger.info("Running tests for a clustering model CRLI...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI") - model_save_name = "saved_CRLI_model.pypots" - - # initialize an Adam optimizer - G_optimizer = Adam(lr=0.001, weight_decay=1e-5) - D_optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a CRLI model - crli = CRLI( - n_steps=DATA["n_steps"], - n_features=DATA["n_features"], - n_clusters=DATA["n_classes"], - n_generator_layers=2, - rnn_hidden_size=128, - epochs=EPOCHS, - saving_path=saving_path, - G_optimizer=G_optimizer, - D_optimizer=D_optimizer, - ) - - @pytest.mark.xdist_group(name="clustering-crli") - def test_0_fit(self): - self.crli.fit(TRAIN_SET) - - @pytest.mark.xdist_group(name="clustering-crli") - def test_1_parameters(self): - assert hasattr(self.crli, "model") and self.crli.model is not None - - assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None - assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None - - assert hasattr(self.crli, "best_loss") - self.assertNotEqual(self.crli.best_loss, float("inf")) - - assert ( - hasattr(self.crli, "best_model_dict") - and self.crli.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="clustering-crli") - def test_2_cluster(self): - clustering = self.crli.cluster(TEST_SET) - RI = cal_rand_index(clustering, DATA["test_y"]) - CP = cal_cluster_purity(clustering, DATA["test_y"]) - logger.info(f"RI: {RI}\nCP: {CP}") - - @pytest.mark.xdist_group(name="clustering-crli") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.crli) - - # save the trained model into file, and check if the path exists - self.crli.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.crli.load_model(saved_model_path) - class TestVaDER(unittest.TestCase): logger.info("Running tests for a clustering model Transformer...") @@ -120,8 +48,9 @@ class TestVaDER(unittest.TestCase): d_mu_stddev=5, pretrain_epochs=20, epochs=EPOCHS, - saving_path=saving_path, optimizer=optimizer, + saving_path=saving_path, + device=DEVICE, ) @pytest.mark.xdist_group(name="clustering-vader") diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/data/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/test_data.py b/tests/data/lazy_loading_strategy.py similarity index 56% rename from tests/test_data.py rename to tests/data/lazy_loading_strategy.py index 27531098..8db1080c 100644 --- a/tests/test_data.py +++ b/tests/data/lazy_loading_strategy.py @@ -8,31 +8,28 @@ import os import unittest -import h5py import pytest from pypots.classification import BRITS, GRUD +from pypots.data.saving import save_dict_into_h5 from pypots.imputation import SAITS -from tests.global_test_config import DATA, DATA_SAVING_DIR from pypots.utils.logging import logger +from tests.global_test_config import DATA, DATA_SAVING_DIR - -TRAIN_SET = f"{DATA_SAVING_DIR}/train_set.h5" -VAL_SET = f"{DATA_SAVING_DIR}/val_set.h5" -TEST_SET = f"{DATA_SAVING_DIR}/test_set.h5" -IMPUTATION_TRAIN_SET = f"{DATA_SAVING_DIR}/imputation_train_set.h5" -IMPUTATION_VAL_SET = f"{DATA_SAVING_DIR}/imputation_val_set.h5" +TRAIN_SET_NAME = "train_set.h5" +TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/{TRAIN_SET_NAME}" +VAL_SET_NAME = "val_set.h5" +VAL_SET_PATH = f"{DATA_SAVING_DIR}/{VAL_SET_NAME}" +TEST_SET_NAME = "test_set.h5" +TEST_SET_PATH = f"{DATA_SAVING_DIR}/{TEST_SET_NAME}" +IMPUTATION_TRAIN_SET_NAME = "imputation_train_set.h5" +IMPUTATION_TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/{IMPUTATION_TRAIN_SET_NAME}" +IMPUTATION_VAL_SET_NAME = "imputation_val_set.h5" +IMPUTATION_VAL_SET_PATH = f"{DATA_SAVING_DIR}/{IMPUTATION_VAL_SET_NAME}" EPOCHS = 1 -def save_data_set_into_h5(data, path): - with h5py.File(path, "w") as hf: - for i in data.keys(): - tp = int if i == "y" else "float32" - hf.create_dataset(i, data=data[i].astype(tp)) - - class TestLazyLoadingClasses(unittest.TestCase): logger.info("Running tests for Dataset classes with lazy-loading strategy...") @@ -73,53 +70,63 @@ def test_0_save_datasets_into_files(self): # create the dir for saving files os.makedirs(DATA_SAVING_DIR, exist_ok=True) - if not os.path.exists(TRAIN_SET): - save_data_set_into_h5( - {"X": DATA["train_X"], "y": DATA["train_y"].astype(int)}, TRAIN_SET + if not os.path.exists(TRAIN_SET_PATH): + save_dict_into_h5( + {"X": DATA["train_X"], "y": DATA["train_y"].astype(float)}, + DATA_SAVING_DIR, + TRAIN_SET_NAME, ) - if not os.path.exists(VAL_SET): - save_data_set_into_h5( - {"X": DATA["val_X"], "y": DATA["val_y"].astype(int)}, VAL_SET + if not os.path.exists(VAL_SET_PATH): + save_dict_into_h5( + {"X": DATA["val_X"], "y": DATA["val_y"].astype(float)}, + DATA_SAVING_DIR, + VAL_SET_NAME, ) - if not os.path.exists(IMPUTATION_TRAIN_SET): - save_data_set_into_h5({"X": DATA["train_X"]}, IMPUTATION_TRAIN_SET) + if not os.path.exists(IMPUTATION_TRAIN_SET_PATH): + save_dict_into_h5( + {"X": DATA["train_X"]}, DATA_SAVING_DIR, IMPUTATION_TRAIN_SET_NAME + ) - if not os.path.exists(IMPUTATION_VAL_SET): - save_data_set_into_h5( + if not os.path.exists(IMPUTATION_VAL_SET_PATH): + save_dict_into_h5( { "X": DATA["val_X"], "X_intact": DATA["val_X_intact"], "indicating_mask": DATA["val_X_indicating_mask"], }, - IMPUTATION_VAL_SET, + DATA_SAVING_DIR, + IMPUTATION_VAL_SET_NAME, ) - if not os.path.exists(TEST_SET): - save_data_set_into_h5( + if not os.path.exists(TEST_SET_PATH): + save_dict_into_h5( { "X": DATA["test_X"], "X_intact": DATA["test_X_intact"], "indicating_mask": DATA["test_X_indicating_mask"], }, - TEST_SET, + DATA_SAVING_DIR, + TEST_SET_NAME, ) @pytest.mark.xdist_group(name="data-lazy-loading") def test_1_DatasetForMIT_BaseDataset(self): - self.saits.fit(train_set=IMPUTATION_TRAIN_SET, val_set=IMPUTATION_VAL_SET) - _ = self.saits.impute(X=TEST_SET) + self.saits.fit( + train_set=IMPUTATION_TRAIN_SET_PATH, val_set=IMPUTATION_VAL_SET_PATH + ) + _ = self.saits.impute(X=TEST_SET_PATH) @pytest.mark.xdist_group(name="data-lazy-loading") def test_2_DatasetForBRITS(self): - self.brits.fit(train_set=TRAIN_SET, val_set=VAL_SET) - _ = self.brits.classify(X=TEST_SET) + self.brits.fit(train_set=TRAIN_SET_PATH, val_set=VAL_SET_PATH) + _ = self.brits.classify(X=TEST_SET_PATH) @pytest.mark.xdist_group(name="data-lazy-loading") def test_3_DatasetForGRUD(self): - self.grud.fit(train_set=TRAIN_SET, val_set=VAL_SET) - _ = self.grud.classify(X=TEST_SET) + self.grud.fit(train_set=TRAIN_SET_PATH, val_set=VAL_SET_PATH) + _ = self.grud.classify(X=TEST_SET_PATH) if __name__ == "__main__": diff --git a/tests/forecasting/__init__.py b/tests/forecasting/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/forecasting/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/test_forecasting.py b/tests/forecasting/bttf.py similarity index 78% rename from tests/test_forecasting.py rename to tests/forecasting/bttf.py index d2e8e14b..8e6946e7 100644 --- a/tests/test_forecasting.py +++ b/tests/forecasting/bttf.py @@ -1,5 +1,5 @@ """ -Test cases for forecasting models. +Test cases for BTTF forecasting model. """ # Created by Wenjie Du @@ -12,12 +12,13 @@ from pypots.forecasting import BTTF from pypots.utils.logging import logger from pypots.utils.metrics import cal_mae +from tests.forecasting.config import ( + TEST_SET, + TEST_SET_INTACT, + N_PRED_STEP, +) from tests.global_test_config import DATA -EPOCHS = 5 -N_PRED_STEP = 4 -TEST_SET = {"X": DATA["test_X"][:, :-N_PRED_STEP]} - class TestBTTF(unittest.TestCase): logger.info("Running tests for a forecasting model BTTF...") @@ -37,8 +38,7 @@ class TestBTTF(unittest.TestCase): @pytest.mark.xdist_group(name="forecasting-bttf") def test_0_forecasting(self): predictions = self.bttf.forecast(TEST_SET) - logger.info(f"prediction shape: {predictions.shape}") - mae = cal_mae(predictions, DATA["test_X_intact"][:, -N_PRED_STEP:]) + mae = cal_mae(predictions, TEST_SET_INTACT["X"][:, -N_PRED_STEP:]) logger.info(f"prediction MAE: {mae}") diff --git a/tests/forecasting/config.py b/tests/forecasting/config.py new file mode 100644 index 00000000..0a2a9e78 --- /dev/null +++ b/tests/forecasting/config.py @@ -0,0 +1,23 @@ +""" +Test configs for forecasting models. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os + +from tests.global_test_config import ( + DATA, + RESULT_SAVING_DIR, +) + +EPOCHS = 5 +N_PRED_STEP = 4 + +TRAIN_SET = {"X": DATA["train_X"]} +VAL_SET = {"X": DATA["val_X"]} +TEST_SET = {"X": DATA["test_X"][:, :-N_PRED_STEP]} +TEST_SET_INTACT = {"X": DATA["test_X_intact"]} + +RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "forecasting") diff --git a/tests/global_test_config.py b/tests/global_test_config.py index f3349483..5e152734 100644 --- a/tests/global_test_config.py +++ b/tests/global_test_config.py @@ -7,7 +7,10 @@ import os +import torch + from pypots.data.generating import gene_incomplete_random_walk_dataset +from pypots.utils.logging import logger # Generate the unified data for testing and cache it first, DATA here is a singleton # Otherwise, file lock will cause bug if running test parallely with pytest-xdist. @@ -20,6 +23,16 @@ RESULT_SAVING_DIR = "testing_results" +# set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes +cuda_devices = [torch.device(i) for i in range(torch.cuda.device_count())] +if len(cuda_devices) > 2: + logger.info("❗️Detected multiple cuda devices, using all of them to run testing.") + DEVICE = cuda_devices +else: + # if having no multiple cuda devices, leave it as None to use the default device + DEVICE = None + + def check_tb_and_model_checkpoints_existence(model): # check the tensorboard file existence saved_files = os.listdir(model.saving_path) diff --git a/tests/imputation/__init__.py b/tests/imputation/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/imputation/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/imputation/brits.py b/tests/imputation/brits.py new file mode 100644 index 00000000..bf0a70c3 --- /dev/null +++ b/tests/imputation/brits.py @@ -0,0 +1,104 @@ +""" +Test cases for BRITS imputation model. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import os.path +import unittest + +import numpy as np +import pytest + +from pypots.imputation import BRITS +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) +from tests.imputation.config import ( + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_IMPUTATION, + EPOCHS, +) + + +class TestBRITS(unittest.TestCase): + logger.info("Running tests for an imputation model BRITS...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS") + model_save_name = "saved_BRITS_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a BRITS model + brits = BRITS( + DATA["n_steps"], + DATA["n_features"], + 256, + epochs=EPOCHS, + saving_path=saving_path, + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="imputation-brits") + def test_0_fit(self): + self.brits.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="imputation-brits") + def test_1_impute(self): + imputed_X = self.brits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"BRITS test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-brits") + def test_2_parameters(self): + assert hasattr(self.brits, "model") and self.brits.model is not None + + assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None + + assert hasattr(self.brits, "best_loss") + self.assertNotEqual(self.brits.best_loss, float("inf")) + + assert ( + hasattr(self.brits, "best_model_dict") + and self.brits.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="imputation-brits") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.brits) + + # save the trained model into file, and check if the path exists + self.brits.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.brits.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/imputation/config.py b/tests/imputation/config.py new file mode 100644 index 00000000..c225598b --- /dev/null +++ b/tests/imputation/config.py @@ -0,0 +1,25 @@ +""" +Test configs for imputation models. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import os + +from tests.global_test_config import ( + DATA, + RESULT_SAVING_DIR, +) + +EPOCHS = 5 + +TRAIN_SET = {"X": DATA["train_X"]} +VAL_SET = { + "X": DATA["val_X"], + "X_intact": DATA["val_X_intact"], + "indicating_mask": DATA["val_X_indicating_mask"], +} +TEST_SET = {"X": DATA["test_X"]} + +RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation") diff --git a/tests/imputation/gpvae.py b/tests/imputation/gpvae.py new file mode 100644 index 00000000..9c59c5b2 --- /dev/null +++ b/tests/imputation/gpvae.py @@ -0,0 +1,104 @@ +""" +Test cases for GP-VAE imputation model. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import os.path +import unittest + +import numpy as np +import pytest + +from pypots.imputation import GPVAE +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) +from tests.imputation.config import ( + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_IMPUTATION, + EPOCHS, +) + + +class TestGPVAE(unittest.TestCase): + logger.info("Running tests for an imputation model GP-VAE...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "GP-VAE") + model_save_name = "saved_GPVAE_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a GP-VAE model + gp_vae = GPVAE( + DATA["n_steps"], + DATA["n_features"], + 256, + epochs=EPOCHS, + saving_path=saving_path, + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="imputation-gpvae") + def test_0_fit(self): + self.gp_vae.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="imputation-gpvae") + def test_1_impute(self): + imputed_X = self.gp_vae.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"GP-VAE test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-gpvae") + def test_2_parameters(self): + assert hasattr(self.gp_vae, "model") and self.gp_vae.model is not None + + assert hasattr(self.gp_vae, "optimizer") and self.gp_vae.optimizer is not None + + assert hasattr(self.gp_vae, "best_loss") + self.assertNotEqual(self.gp_vae.best_loss, float("inf")) + + assert ( + hasattr(self.gp_vae, "best_model_dict") + and self.gp_vae.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="imputation-gpvae") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.gp_vae) + + # save the trained model into file, and check if the path exists + self.gp_vae.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.gp_vae.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/imputation/locf.py b/tests/imputation/locf.py new file mode 100644 index 00000000..8e54fbe0 --- /dev/null +++ b/tests/imputation/locf.py @@ -0,0 +1,46 @@ +""" +Test cases for LOCF imputation method. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import LOCF +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, +) +from tests.imputation.config import ( + TEST_SET, +) + + +class TestLOCF(unittest.TestCase): + logger.info("Running tests for an imputation model LOCF...") + locf = LOCF(nan=0) + + @pytest.mark.xdist_group(name="imputation-locf") + def test_0_impute(self): + test_X_imputed = self.locf.impute(TEST_SET) + assert not np.isnan( + test_X_imputed + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"LOCF test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-locf") + def test_1_parameters(self): + assert hasattr(self.locf, "nan") and self.locf.nan is not None + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/imputation/mrnn.py b/tests/imputation/mrnn.py new file mode 100644 index 00000000..681a9121 --- /dev/null +++ b/tests/imputation/mrnn.py @@ -0,0 +1,104 @@ +""" +Test cases for MRNN imputation model. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import os.path +import unittest + +import numpy as np +import pytest + +from pypots.imputation import MRNN +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) +from tests.imputation.config import ( + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_IMPUTATION, + EPOCHS, +) + + +class TestMRNN(unittest.TestCase): + logger.info("Running tests for an imputation model MRNN...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN") + model_save_name = "saved_MRNN_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a MRNN model + mrnn = MRNN( + DATA["n_steps"], + DATA["n_features"], + 256, + epochs=EPOCHS, + saving_path=saving_path, + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="imputation-mrnn") + def test_0_fit(self): + self.mrnn.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="imputation-mrnn") + def test_1_impute(self): + imputed_X = self.mrnn.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"MRNN test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-mrnn") + def test_2_parameters(self): + assert hasattr(self.mrnn, "model") and self.mrnn.model is not None + + assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None + + assert hasattr(self.mrnn, "best_loss") + self.assertNotEqual(self.mrnn.best_loss, float("inf")) + + assert ( + hasattr(self.mrnn, "best_model_dict") + and self.mrnn.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="imputation-mrnn") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.mrnn) + + # save the trained model into file, and check if the path exists + self.mrnn.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.mrnn.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/imputation/saits.py b/tests/imputation/saits.py new file mode 100644 index 00000000..647e8657 --- /dev/null +++ b/tests/imputation/saits.py @@ -0,0 +1,110 @@ +""" +Test cases for SAITS imputation model. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import os.path +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) +from tests.imputation.config import ( + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_IMPUTATION, + EPOCHS, +) + + +class TestSAITS(unittest.TestCase): + logger.info("Running tests for an imputation model SAITS...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS") + model_save_name = "saved_saits_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=2, + d_model=256, + d_inner=128, + n_heads=4, + d_k=64, + d_v=64, + dropout=0.1, + epochs=EPOCHS, + saving_path=saving_path, + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="imputation-saits") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="imputation-saits") + def test_1_impute(self): + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-saits") + def test_2_parameters(self): + assert hasattr(self.saits, "model") and self.saits.model is not None + + assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None + + assert hasattr(self.saits, "best_loss") + self.assertNotEqual(self.saits.best_loss, float("inf")) + + assert ( + hasattr(self.saits, "best_model_dict") + and self.saits.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="imputation-saits") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.saits) + + # save the trained model into file, and check if the path exists + self.saits.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.saits.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/imputation/transformer.py b/tests/imputation/transformer.py new file mode 100644 index 00000000..965b2cf7 --- /dev/null +++ b/tests/imputation/transformer.py @@ -0,0 +1,113 @@ +""" +Test cases for Transformer imputation model. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import os.path +import unittest + +import numpy as np +import pytest + +from pypots.imputation import Transformer +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) +from tests.imputation.config import ( + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_IMPUTATION, + EPOCHS, +) + + +class TestTransformer(unittest.TestCase): + logger.info("Running tests for an imputation model Transformer...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer") + model_save_name = "saved_transformer_model.pypots" + + # initialize an Adam optimizer + optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a Transformer model + transformer = Transformer( + DATA["n_steps"], + DATA["n_features"], + n_layers=2, + d_model=256, + d_inner=128, + n_heads=4, + d_k=64, + d_v=64, + dropout=0.1, + epochs=EPOCHS, + saving_path=saving_path, + optimizer=optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="imputation-transformer") + def test_0_fit(self): + self.transformer.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="imputation-transformer") + def test_1_impute(self): + imputed_X = self.transformer.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"Transformer test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-transformer") + def test_2_parameters(self): + assert hasattr(self.transformer, "model") and self.transformer.model is not None + + assert ( + hasattr(self.transformer, "optimizer") + and self.transformer.optimizer is not None + ) + + assert hasattr(self.transformer, "best_loss") + self.assertNotEqual(self.transformer.best_loss, float("inf")) + + assert ( + hasattr(self.transformer, "best_model_dict") + and self.transformer.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="imputation-transformer") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.transformer) + + # save the trained model into file, and check if the path exists + self.transformer.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.transformer.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/imputation/usgan.py b/tests/imputation/usgan.py new file mode 100644 index 00000000..c91a17a1 --- /dev/null +++ b/tests/imputation/usgan.py @@ -0,0 +1,111 @@ +""" +Test cases for US-GAN imputation model. +""" + +# Created by Wenjie Du +# License: GPL-v3 + + +import os.path +import unittest + +import numpy as np +import pytest + +from pypots.imputation import USGAN +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import ( + DATA, + DEVICE, + check_tb_and_model_checkpoints_existence, +) +from tests.imputation.config import ( + TRAIN_SET, + VAL_SET, + TEST_SET, + RESULT_SAVING_DIR_FOR_IMPUTATION, + EPOCHS, +) + + +class TestUSGAN(unittest.TestCase): + logger.info("Running tests for an imputation model US-GAN...") + + # set the log and model saving path + saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "US-GAN") + model_save_name = "saved_USGAN_model.pypots" + + # initialize an Adam optimizer + G_optimizer = Adam(lr=0.001, weight_decay=1e-5) + D_optimizer = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a US-GAN model + us_gan = USGAN( + DATA["n_steps"], + DATA["n_features"], + 256, + epochs=EPOCHS, + saving_path=saving_path, + G_optimizer=G_optimizer, + D_optimizer=D_optimizer, + device=DEVICE, + ) + + @pytest.mark.xdist_group(name="imputation-usgan") + def test_0_fit(self): + self.us_gan.fit(TRAIN_SET, VAL_SET) + + @pytest.mark.xdist_group(name="imputation-usgan") + def test_1_impute(self): + imputed_X = self.us_gan.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"US-GAN test_MAE: {test_MAE}") + + @pytest.mark.xdist_group(name="imputation-usgan") + def test_2_parameters(self): + assert hasattr(self.us_gan, "model") and self.us_gan.model is not None + + assert ( + hasattr(self.us_gan, "G_optimizer") and self.us_gan.G_optimizer is not None + ) + assert ( + hasattr(self.us_gan, "D_optimizer") and self.us_gan.D_optimizer is not None + ) + + assert hasattr(self.us_gan, "best_loss") + self.assertNotEqual(self.us_gan.best_loss, float("inf")) + + assert ( + hasattr(self.us_gan, "best_model_dict") + and self.us_gan.best_model_dict is not None + ) + + @pytest.mark.xdist_group(name="imputation-usgan") + def test_3_saving_path(self): + # whether the root saving dir exists, which should be created by save_log_into_tb_file + assert os.path.exists( + self.saving_path + ), f"file {self.saving_path} does not exist" + + # check if the tensorboard file and model checkpoints exist + check_tb_and_model_checkpoints_existence(self.us_gan) + + # save the trained model into file, and check if the path exists + self.us_gan.save_model( + saving_dir=self.saving_path, file_name=self.model_save_name + ) + + # test loading the saved model, not necessary, but need to test + saved_model_path = os.path.join(self.saving_path, self.model_save_name) + self.us_gan.load_model(saved_model_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/optim/__init__.py b/tests/optim/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/optim/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/optim/adadelta.py b/tests/optim/adadelta.py new file mode 100644 index 00000000..b69e5ea4 --- /dev/null +++ b/tests/optim/adadelta.py @@ -0,0 +1,56 @@ +""" +Test cases for the optimizer Adadelta. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import Adadelta +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import DATA +from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET + + +class TestAdadelta(unittest.TestCase): + logger.info("Running tests for Adadelta...") + + # initialize an Adadelta optimizer + adadelta = Adadelta(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model for testing DatasetForMIT and BaseDataset + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=1, + d_model=128, + d_inner=64, + n_heads=2, + d_k=64, + d_v=64, + dropout=0.1, + optimizer=adadelta, + epochs=EPOCHS, + ) + + @pytest.mark.xdist_group(name="optim-adadelta") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/optim/adagrad.py b/tests/optim/adagrad.py new file mode 100644 index 00000000..21b4696a --- /dev/null +++ b/tests/optim/adagrad.py @@ -0,0 +1,56 @@ +""" +Test cases for the optimizer Adagrad. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import Adagrad +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import DATA +from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET + + +class TestAdagrad(unittest.TestCase): + logger.info("Running tests for Adagrad...") + + # initialize an Adagrad optimizer + adagrad = Adagrad(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model for testing DatasetForMIT and BaseDataset + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=1, + d_model=128, + d_inner=64, + n_heads=2, + d_k=64, + d_v=64, + dropout=0.1, + optimizer=adagrad, + epochs=EPOCHS, + ) + + @pytest.mark.xdist_group(name="optim-adagrad") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/optim/adam.py b/tests/optim/adam.py new file mode 100644 index 00000000..448f92b9 --- /dev/null +++ b/tests/optim/adam.py @@ -0,0 +1,56 @@ +""" +Test cases for the optimizer Adam. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import Adam +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import DATA +from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET + + +class TestAdam(unittest.TestCase): + logger.info("Running tests for Adam...") + + # initialize an Adam optimizer + adam = Adam(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model for testing DatasetForMIT and BaseDataset + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=1, + d_model=128, + d_inner=64, + n_heads=2, + d_k=64, + d_v=64, + dropout=0.1, + optimizer=adam, + epochs=EPOCHS, + ) + + @pytest.mark.xdist_group(name="optim-adam") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/optim/adamw.py b/tests/optim/adamw.py new file mode 100644 index 00000000..a7941f43 --- /dev/null +++ b/tests/optim/adamw.py @@ -0,0 +1,56 @@ +""" +Test cases for the optimizer AdamW. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import AdamW +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import DATA +from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET + + +class TestAdamW(unittest.TestCase): + logger.info("Running tests for AdamW...") + + # initialize an AdamW optimizer + adamw = AdamW(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model for testing DatasetForMIT and BaseDataset + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=1, + d_model=128, + d_inner=64, + n_heads=2, + d_k=64, + d_v=64, + dropout=0.1, + optimizer=adamw, + epochs=EPOCHS, + ) + + @pytest.mark.xdist_group(name="optim-adamw") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/optim/config.py b/tests/optim/config.py new file mode 100644 index 00000000..a0391027 --- /dev/null +++ b/tests/optim/config.py @@ -0,0 +1,19 @@ +""" +Test configs for optimizers. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +from tests.global_test_config import DATA + +TRAIN_SET = {"X": DATA["train_X"]} +VAL_SET = { + "X": DATA["val_X"], + "X_intact": DATA["val_X_intact"], + "indicating_mask": DATA["val_X_indicating_mask"], +} +TEST_SET = {"X": DATA["test_X"]} + + +EPOCHS = 1 diff --git a/tests/optim/rmsprop.py b/tests/optim/rmsprop.py new file mode 100644 index 00000000..1fe61a0d --- /dev/null +++ b/tests/optim/rmsprop.py @@ -0,0 +1,56 @@ +""" +Test cases for the optimizer RMSprop. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import RMSprop +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import DATA +from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET + + +class TestRMSprop(unittest.TestCase): + logger.info("Running tests for RMSprop...") + + # initialize a RMSprop optimizer + rmsprop = RMSprop(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model for testing DatasetForMIT and BaseDataset + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=1, + d_model=128, + d_inner=64, + n_heads=2, + d_k=64, + d_v=64, + dropout=0.1, + optimizer=rmsprop, + epochs=EPOCHS, + ) + + @pytest.mark.xdist_group(name="optim-rmsprop") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/optim/sgd.py b/tests/optim/sgd.py new file mode 100644 index 00000000..4b1c1998 --- /dev/null +++ b/tests/optim/sgd.py @@ -0,0 +1,56 @@ +""" +Test cases for the optimizer SGD. +""" + +# Created by Wenjie Du +# License: GLP-v3 + +import unittest + +import numpy as np +import pytest + +from pypots.imputation import SAITS +from pypots.optim import SGD +from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae +from tests.global_test_config import DATA +from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET + + +class TestSGD(unittest.TestCase): + logger.info("Running tests for SGD...") + + # initialize a SGD optimizer + sgd = SGD(lr=0.001, weight_decay=1e-5) + + # initialize a SAITS model for testing DatasetForMIT and BaseDataset + saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=1, + d_model=128, + d_inner=64, + n_heads=2, + d_k=64, + d_v=64, + dropout=0.1, + optimizer=sgd, + epochs=EPOCHS, + ) + + @pytest.mark.xdist_group(name="optim-sgd") + def test_0_fit(self): + self.saits.fit(TRAIN_SET, VAL_SET) + imputed_X = self.saits.impute(TEST_SET) + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] + ) + logger.info(f"SAITS test_MAE: {test_MAE}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_classification.py b/tests/test_classification.py deleted file mode 100644 index 2ef9c6d1..00000000 --- a/tests/test_classification.py +++ /dev/null @@ -1,256 +0,0 @@ -""" -Test cases for classification models. -""" - -# Created by Wenjie Du -# License: GLP-v3 - -import os -import unittest - -import pytest - -from pypots.classification import BRITS, GRUD, Raindrop -from pypots.optim import Adam -from pypots.utils.logging import logger -from pypots.utils.metrics import cal_binary_classification_metrics -from tests.global_test_config import ( - DATA, - RESULT_SAVING_DIR, - check_tb_and_model_checkpoints_existence, -) - -EPOCHS = 5 - -TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]} -VAL_SET = {"X": DATA["val_X"], "y": DATA["val_y"]} -TEST_SET = {"X": DATA["test_X"]} - -RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification") - - -class TestBRITS(unittest.TestCase): - logger.info("Running tests for a classification model BRITS...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS") - model_save_name = "saved_BRITS_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a BRITS model - brits = BRITS( - DATA["n_steps"], - DATA["n_features"], - n_classes=DATA["n_classes"], - rnn_hidden_size=256, - epochs=EPOCHS, - saving_path=saving_path, - model_saving_strategy="better", - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="classification-brits") - def test_0_fit(self): - self.brits.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="classification-brits") - def test_1_classify(self): - predictions = self.brits.classify(TEST_SET) - metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) - logger.info( - f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n' - ) - assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" - - @pytest.mark.xdist_group(name="classification-brits") - def test_2_parameters(self): - assert hasattr(self.brits, "model") and self.brits.model is not None - - assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None - - assert hasattr(self.brits, "best_loss") - self.assertNotEqual(self.brits.best_loss, float("inf")) - - assert ( - hasattr(self.brits, "best_model_dict") - and self.brits.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="classification-brits") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.brits) - - # save the trained model into file, and check if the path exists - self.brits.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.brits.load_model(saved_model_path) - - -class TestGRUD(unittest.TestCase): - logger.info("Running tests for a classification model GRUD...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD") - model_save_name = "saved_GRUD_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a GRUD model - grud = GRUD( - DATA["n_steps"], - DATA["n_features"], - n_classes=DATA["n_classes"], - rnn_hidden_size=256, - epochs=EPOCHS, - saving_path=saving_path, - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="classification-grud") - def test_0_fit(self): - self.grud.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="classification-grud") - def test_1_classify(self): - predictions = self.grud.classify(TEST_SET) - metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) - logger.info( - f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n' - ) - assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" - - @pytest.mark.xdist_group(name="classification-grud") - def test_2_parameters(self): - assert hasattr(self.grud, "model") and self.grud.model is not None - - assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None - - assert hasattr(self.grud, "best_loss") - self.assertNotEqual(self.grud.best_loss, float("inf")) - - assert ( - hasattr(self.grud, "best_model_dict") - and self.grud.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="classification-grud") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.grud) - - # save the trained model into file, and check if the path exists - self.grud.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.grud.load_model(saved_model_path) - - -class TestRaindrop(unittest.TestCase): - logger.info("Running tests for a classification model Raindrop...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop") - model_save_name = "saved_Raindrop_model.pypots" - - # initialize a Raindrop model - raindrop = Raindrop( - DATA["n_steps"], - DATA["n_features"], - DATA["n_classes"], - n_layers=2, - d_model=DATA["n_features"] * 4, - d_inner=256, - n_heads=2, - dropout=0.3, - d_static=0, - aggregation="mean", - sensor_wise_mask=False, - static=False, - epochs=EPOCHS, - saving_path=saving_path, - ) - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_0_fit(self): - self.raindrop.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_1_classify(self): - predictions = self.raindrop.classify(TEST_SET) - metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) - logger.info( - f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n' - ) - assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_2_parameters(self): - assert hasattr(self.raindrop, "model") and self.raindrop.model is not None - - assert ( - hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None - ) - - assert hasattr(self.raindrop, "best_loss") - self.assertNotEqual(self.raindrop.best_loss, float("inf")) - - assert ( - hasattr(self.raindrop, "best_model_dict") - and self.raindrop.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.raindrop) - - # save the trained model into file, and check if the path exists - self.raindrop.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.raindrop.load_model(saved_model_path) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_cli.py b/tests/test_cli.py deleted file mode 100644 index 4e9e9927..00000000 --- a/tests/test_cli.py +++ /dev/null @@ -1,189 +0,0 @@ -""" -Test cases for the functions and classes in package `pypots.cli`. -""" - -# Created by Wenjie Du -# License: GLP-v3 - -import os -import threading -import unittest -from argparse import Namespace -from copy import copy - -import pytest - -from pypots.cli.dev import dev_command_factory -from pypots.cli.doc import doc_command_factory -from pypots.cli.env import env_command_factory -from pypots.utils.logging import logger - -PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), "../..")) - - -def callback_func(): - raise TimeoutError("Time out.") - - -def time_out(interval, callback): - def decorator(func): - def wrapper(*args, **kwargs): - t = threading.Thread(target=func, args=args, kwargs=kwargs) - t.setDaemon(True) - t.start() - t.join(interval) # wait for interval seconds - if t.is_alive(): - return threading.Timer(0, callback).start() # invoke callback() - else: - return - - return wrapper - - return decorator - - -@pytest.mark.xfail(reason="Allow tests for CLI to fail") -class TestPyPOTSCLIDev(unittest.TestCase): - # set up the default arguments - default_arguments = { - "build": False, - "cleanup": False, - "run_tests": False, - "k": None, - "show_coverage": False, - "lint_code": False, - } - # `pypots-cli dev` must run under the project root dir - os.chdir(PROJECT_ROOT_DIR) - - @pytest.mark.xdist_group(name="cli-dev") - def test_0_build(self): - arguments = copy(self.default_arguments) - arguments["build"] = True - args = Namespace(**arguments) - dev_command_factory(args).run() - - @pytest.mark.xdist_group(name="cli-dev") - def test_1_run_tests(self): - arguments = copy(self.default_arguments) - arguments["run_tests"] = True - arguments["k"] = "try_to_find_a_non_existing_test_case" - args = Namespace(**arguments) - try: - dev_command_factory(args).run() - except RuntimeError: # try to find a non-existing test case, so RuntimeError will be raised - pass - except Exception as e: # other exceptions will cause an error and result in failed testing - raise e - - # Don't test --lint-code because Black will reformat the code and cause error when generating the coverage report - # @pytest.mark.xdist_group(name="cli-dev") - # def test_2_lint_code(self): - # arguments = copy(self.default_arguments) - # arguments["lint_code"] = True - # args = Namespace(**arguments) - # dev_command_factory(args).run() - - @pytest.mark.xdist_group(name="cli-dev") - def test_3_cleanup(self): - arguments = copy(self.default_arguments) - arguments["cleanup"] = True - args = Namespace(**arguments) - dev_command_factory(args).run() - - -@pytest.mark.xfail(reason="Allow tests for CLI to fail") -class TestPyPOTSCLIDoc(unittest.TestCase): - # set up the default arguments - default_arguments = { - "gene_rst": False, - "branch": "main", - "gene_html": False, - "view_doc": False, - "port": 9075, - "cleanup": False, - } - # `pypots-cli doc` must run under the project root dir - os.chdir(PROJECT_ROOT_DIR) - - @pytest.mark.xdist_group(name="cli-doc") - def test_0_gene_rst(self): - arguments = copy(self.default_arguments) - arguments["gene_rst"] = True - args = Namespace(**arguments) - doc_command_factory(args).run() - - logger.info("run again under a non-root dir") - try: - os.chdir(os.path.abspath(os.path.join(PROJECT_ROOT_DIR, "pypots"))) - doc_command_factory(args).run() - except RuntimeError: # try to run under a non-root dir, so RuntimeError will be raised - pass - except Exception as e: # other exceptions will cause an error and result in failed testing - raise e - finally: - os.chdir(PROJECT_ROOT_DIR) - - @pytest.mark.xdist_group(name="cli-doc") - def test_1_gene_html(self): - arguments = copy(self.default_arguments) - arguments["gene_html"] = True - args = Namespace(**arguments) - try: - doc_command_factory(args).run() - except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below - logger.error(e) - - @pytest.mark.xdist_group(name="cli-doc") - @time_out(2, callback_func) # wait for two seconds - def test_2_view_doc(self): - arguments = copy(self.default_arguments) - arguments["view_doc"] = True - args = Namespace(**arguments) - try: - doc_command_factory(args).run() - except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below - logger.error(e) - - @pytest.mark.xdist_group(name="cli-doc") - def test_3_cleanup(self): - arguments = copy(self.default_arguments) - arguments["cleanup"] = True - args = Namespace(**arguments) - doc_command_factory(args).run() - - -@pytest.mark.xfail(reason="Allow tests for CLI to fail") -class TestPyPOTSCLIEnv(unittest.TestCase): - # set up the default arguments - default_arguments = { - "install": "optional", - "tool": "conda", - } - - # `pypots-cli env` must run under the project root dir - os.chdir(PROJECT_ROOT_DIR) - - @pytest.mark.xdist_group(name="cli-env") - def test_0_install_with_conda(self): - arguments = copy(self.default_arguments) - arguments["tool"] = "conda" - args = Namespace(**arguments) - try: - env_command_factory(args).run() - except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below - logger.error(e) - - @pytest.mark.xdist_group(name="cli-env") - def test_1_install_with_pip(self): - arguments = copy(self.default_arguments) - arguments["tool"] = "pip" - args = Namespace(**arguments) - try: - env_command_factory(args).run() - except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below - logger.error(e) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_imputation.py b/tests/test_imputation.py deleted file mode 100644 index 64a0b1ff..00000000 --- a/tests/test_imputation.py +++ /dev/null @@ -1,503 +0,0 @@ -""" -Test cases for imputation models. -""" - -# Created by Wenjie Du -# License: GPL-v3 - - -import os.path -import unittest - -import numpy as np -import pytest - -from pypots.imputation import ( - SAITS, - Transformer, - USGAN, - GPVAE, - BRITS, - MRNN, - LOCF, -) -from pypots.optim import Adam -from pypots.utils.logging import logger -from pypots.utils.metrics import cal_mae -from tests.global_test_config import ( - DATA, - RESULT_SAVING_DIR, - check_tb_and_model_checkpoints_existence, -) - -EPOCH = 5 - -TRAIN_SET = {"X": DATA["train_X"]} -VAL_SET = { - "X": DATA["val_X"], - "X_intact": DATA["val_X_intact"], - "indicating_mask": DATA["val_X_indicating_mask"], -} -TEST_SET = {"X": DATA["test_X"]} - -RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation") - - -class TestSAITS(unittest.TestCase): - logger.info("Running tests for an imputation model SAITS...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS") - model_save_name = "saved_saits_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=2, - d_model=256, - d_inner=128, - n_heads=4, - d_k=64, - d_v=64, - dropout=0.1, - epochs=EPOCH, - saving_path=saving_path, - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="imputation-saits") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-saits") - def test_1_impute(self): - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-saits") - def test_2_parameters(self): - assert hasattr(self.saits, "model") and self.saits.model is not None - - assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None - - assert hasattr(self.saits, "best_loss") - self.assertNotEqual(self.saits.best_loss, float("inf")) - - assert ( - hasattr(self.saits, "best_model_dict") - and self.saits.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-saits") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.saits) - - # save the trained model into file, and check if the path exists - self.saits.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.saits.load_model(saved_model_path) - - -class TestTransformer(unittest.TestCase): - logger.info("Running tests for an imputation model Transformer...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer") - model_save_name = "saved_transformer_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a Transformer model - transformer = Transformer( - DATA["n_steps"], - DATA["n_features"], - n_layers=2, - d_model=256, - d_inner=128, - n_heads=4, - d_k=64, - d_v=64, - dropout=0.1, - epochs=EPOCH, - saving_path=saving_path, - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_0_fit(self): - self.transformer.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_1_impute(self): - imputed_X = self.transformer.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"Transformer test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_2_parameters(self): - assert hasattr(self.transformer, "model") and self.transformer.model is not None - - assert ( - hasattr(self.transformer, "optimizer") - and self.transformer.optimizer is not None - ) - - assert hasattr(self.transformer, "best_loss") - self.assertNotEqual(self.transformer.best_loss, float("inf")) - - assert ( - hasattr(self.transformer, "best_model_dict") - and self.transformer.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.transformer) - - # save the trained model into file, and check if the path exists - self.transformer.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.transformer.load_model(saved_model_path) - - -class TestUSGAN(unittest.TestCase): - logger.info("Running tests for an imputation model US-GAN...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "US-GAN") - model_save_name = "saved_USGAN_model.pypots" - - # initialize an Adam optimizer - G_optimizer = Adam(lr=0.001, weight_decay=1e-5) - D_optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a US-GAN model - us_gan = USGAN( - DATA["n_steps"], - DATA["n_features"], - 256, - epochs=EPOCH, - saving_path=saving_path, - G_optimizer=G_optimizer, - D_optimizer=D_optimizer, - ) - - @pytest.mark.xdist_group(name="imputation-usgan") - def test_0_fit(self): - self.us_gan.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-usgan") - def test_1_impute(self): - imputed_X = self.us_gan.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"US-GAN test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-usgan") - def test_2_parameters(self): - assert hasattr(self.us_gan, "model") and self.us_gan.model is not None - - assert ( - hasattr(self.us_gan, "G_optimizer") and self.us_gan.G_optimizer is not None - ) - assert ( - hasattr(self.us_gan, "D_optimizer") and self.us_gan.D_optimizer is not None - ) - - assert hasattr(self.us_gan, "best_loss") - self.assertNotEqual(self.us_gan.best_loss, float("inf")) - - assert ( - hasattr(self.us_gan, "best_model_dict") - and self.us_gan.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-usgan") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.us_gan) - - # save the trained model into file, and check if the path exists - self.us_gan.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.us_gan.load_model(saved_model_path) - - -class TestGPVAE(unittest.TestCase): - logger.info("Running tests for an imputation model GP-VAE...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "GP-VAE") - model_save_name = "saved_GPVAE_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a GP-VAE model - gp_vae = GPVAE( - DATA["n_steps"], - DATA["n_features"], - 256, - epochs=EPOCH, - saving_path=saving_path, - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="imputation-gpvae") - def test_0_fit(self): - self.gp_vae.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-gpvae") - def test_1_impute(self): - imputed_X = self.gp_vae.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"GP-VAE test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-gpvae") - def test_2_parameters(self): - assert hasattr(self.gp_vae, "model") and self.gp_vae.model is not None - - assert hasattr(self.gp_vae, "optimizer") and self.gp_vae.optimizer is not None - - assert hasattr(self.gp_vae, "best_loss") - self.assertNotEqual(self.gp_vae.best_loss, float("inf")) - - assert ( - hasattr(self.gp_vae, "best_model_dict") - and self.gp_vae.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-GPVAE") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.gp_vae) - - # save the trained model into file, and check if the path exists - self.gp_vae.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.gp_vae.load_model(saved_model_path) - - -class TestBRITS(unittest.TestCase): - logger.info("Running tests for an imputation model BRITS...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS") - model_save_name = "saved_BRITS_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a BRITS model - brits = BRITS( - DATA["n_steps"], - DATA["n_features"], - 256, - epochs=EPOCH, - saving_path=saving_path, - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="imputation-brits") - def test_0_fit(self): - self.brits.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-brits") - def test_1_impute(self): - imputed_X = self.brits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"BRITS test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-brits") - def test_2_parameters(self): - assert hasattr(self.brits, "model") and self.brits.model is not None - - assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None - - assert hasattr(self.brits, "best_loss") - self.assertNotEqual(self.brits.best_loss, float("inf")) - - assert ( - hasattr(self.brits, "best_model_dict") - and self.brits.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-brits") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.brits) - - # save the trained model into file, and check if the path exists - self.brits.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.brits.load_model(saved_model_path) - - -class TestMRNN(unittest.TestCase): - logger.info("Running tests for an imputation model MRNN...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN") - model_save_name = "saved_MRNN_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a MRNN model - mrnn = MRNN( - DATA["n_steps"], - DATA["n_features"], - 256, - epochs=EPOCH, - saving_path=saving_path, - optimizer=optimizer, - ) - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_0_fit(self): - self.mrnn.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_1_impute(self): - imputed_X = self.mrnn.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"MRNN test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_2_parameters(self): - assert hasattr(self.mrnn, "model") and self.mrnn.model is not None - - assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None - - assert hasattr(self.mrnn, "best_loss") - self.assertNotEqual(self.mrnn.best_loss, float("inf")) - - assert ( - hasattr(self.mrnn, "best_model_dict") - and self.mrnn.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.mrnn) - - # save the trained model into file, and check if the path exists - self.mrnn.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.mrnn.load_model(saved_model_path) - - -class TestLOCF(unittest.TestCase): - logger.info("Running tests for an imputation model LOCF...") - locf = LOCF(nan=0) - - @pytest.mark.xdist_group(name="imputation-locf") - def test_0_impute(self): - test_X_imputed = self.locf.impute(TEST_SET) - assert not np.isnan( - test_X_imputed - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"LOCF test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-locf") - def test_1_parameters(self): - assert hasattr(self.locf, "nan") and self.locf.nan is not None - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_optim.py b/tests/test_optim.py deleted file mode 100644 index 9be096fb..00000000 --- a/tests/test_optim.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -Test cases for optimizers. -""" - -# Created by Wenjie Du -# License: GLP-v3 - -import unittest - -import h5py -import numpy as np -import pytest - -from pypots.imputation import SAITS -from pypots.optim import Adam, AdamW, Adagrad, Adadelta, SGD, RMSprop -from pypots.utils.logging import logger -from pypots.utils.metrics import cal_mae -from tests.global_test_config import DATA - -TRAIN_SET = {"X": DATA["train_X"]} -VAL_SET = { - "X": DATA["val_X"], - "X_intact": DATA["val_X_intact"], - "indicating_mask": DATA["val_X_indicating_mask"], -} -TEST_SET = {"X": DATA["test_X"]} - - -EPOCHS = 3 - - -def save_data_set_into_h5(data, path): - with h5py.File(path, "w") as hf: - for i in data.keys(): - tp = int if i == "y" else "float32" - hf.create_dataset(i, data=data[i].astype(tp)) - - -class TestAdam(unittest.TestCase): - logger.info("Running tests for Adam...") - - # initialize an Adam optimizer - adam = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model for testing DatasetForMIT and BaseDataset - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=1, - d_model=128, - d_inner=64, - n_heads=2, - d_k=64, - d_v=64, - dropout=0.1, - optimizer=adam, - epochs=EPOCHS, - ) - - @pytest.mark.xdist_group(name="optim-adam") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - -class TestAdamW(unittest.TestCase): - logger.info("Running tests for AdamW...") - - # initialize an AdamW optimizer - adamw = AdamW(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model for testing DatasetForMIT and BaseDataset - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=1, - d_model=128, - d_inner=64, - n_heads=2, - d_k=64, - d_v=64, - dropout=0.1, - optimizer=adamw, - epochs=EPOCHS, - ) - - @pytest.mark.xdist_group(name="optim-adamw") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - -class TestAdagrad(unittest.TestCase): - logger.info("Running tests for Adagrad...") - - # initialize an Adagrad optimizer - adagrad = Adagrad(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model for testing DatasetForMIT and BaseDataset - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=1, - d_model=128, - d_inner=64, - n_heads=2, - d_k=64, - d_v=64, - dropout=0.1, - optimizer=adagrad, - epochs=EPOCHS, - ) - - @pytest.mark.xdist_group(name="optim-adagrad") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - -class TestAdadelta(unittest.TestCase): - logger.info("Running tests for Adadelta...") - - # initialize an Adadelta optimizer - adadelta = Adadelta(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model for testing DatasetForMIT and BaseDataset - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=1, - d_model=128, - d_inner=64, - n_heads=2, - d_k=64, - d_v=64, - dropout=0.1, - optimizer=adadelta, - epochs=EPOCHS, - ) - - @pytest.mark.xdist_group(name="optim-adadelta") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - -class TestSGD(unittest.TestCase): - logger.info("Running tests for SGD...") - - # initialize a SGD optimizer - sgd = SGD(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model for testing DatasetForMIT and BaseDataset - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=1, - d_model=128, - d_inner=64, - n_heads=2, - d_k=64, - d_v=64, - dropout=0.1, - optimizer=sgd, - epochs=EPOCHS, - ) - - @pytest.mark.xdist_group(name="optim-sgd") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - -class TestRMSprop(unittest.TestCase): - logger.info("Running tests for RMSprop...") - - # initialize a RMSprop optimizer - rmsprop = RMSprop(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model for testing DatasetForMIT and BaseDataset - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=1, - d_model=128, - d_inner=64, - n_heads=2, - d_k=64, - d_v=64, - dropout=0.1, - optimizer=rmsprop, - epochs=EPOCHS, - ) - - @pytest.mark.xdist_group(name="optim-rmsprop") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_training_on_multi_gpus.py b/tests/test_training_on_multi_gpus.py deleted file mode 100644 index b076cbfe..00000000 --- a/tests/test_training_on_multi_gpus.py +++ /dev/null @@ -1,783 +0,0 @@ -""" -Test cases for running models on multi cuda devices. -""" - -# Created by Wenjie Du -# License: GPL-v3 - - -import os.path -import unittest - -import numpy as np -import pytest -import torch - -from pypots.classification import BRITS, GRUD, Raindrop -from pypots.clustering import VaDER, CRLI -from pypots.forecasting import BTTF -from pypots.imputation import BRITS as ImputationBRITS -from pypots.imputation import ( - SAITS, - Transformer, - MRNN, - LOCF, -) -from pypots.optim import Adam -from pypots.utils.logging import logger -from pypots.utils.metrics import cal_binary_classification_metrics -from pypots.utils.metrics import cal_mae -from pypots.utils.metrics import cal_rand_index, cal_cluster_purity -from tests.global_test_config import ( - DATA, - RESULT_SAVING_DIR, - check_tb_and_model_checkpoints_existence, -) - -EPOCHS = 5 - -cuda_devices = [torch.device(i) for i in range(torch.cuda.device_count())] - -# set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes -DEVICES = None if cuda_devices == [] else cuda_devices - -# global skip test if less than two cuda-enabled devices -LESS_THAN_TWO_DEVICES = len(cuda_devices) < 2 -pytestmark = pytest.mark.skipif( - LESS_THAN_TWO_DEVICES, reason="not enough cuda devices to run tests" -) - - -TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]} - -VAL_SET = { - "X": DATA["val_X"], - "X_intact": DATA["val_X_intact"], - "indicating_mask": DATA["val_X_indicating_mask"], - "y": DATA["val_y"], -} -TEST_SET = {"X": DATA["test_X"]} - -RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation") -RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification") -RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering") - - -class TestSAITS(unittest.TestCase): - logger.info("Running tests for an imputation model SAITS...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS") - model_save_name = "saved_saits_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a SAITS model - saits = SAITS( - DATA["n_steps"], - DATA["n_features"], - n_layers=2, - d_model=256, - d_inner=128, - n_heads=4, - d_k=64, - d_v=64, - dropout=0.1, - epochs=EPOCHS, - saving_path=saving_path, - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="imputation-saits") - def test_0_fit(self): - self.saits.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-saits") - def test_1_impute(self): - imputed_X = self.saits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"SAITS test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-saits") - def test_2_parameters(self): - assert hasattr(self.saits, "model") and self.saits.model is not None - - assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None - - assert hasattr(self.saits, "best_loss") - self.assertNotEqual(self.saits.best_loss, float("inf")) - - assert ( - hasattr(self.saits, "best_model_dict") - and self.saits.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-saits") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.saits) - - # save the trained model into file, and check if the path exists - self.saits.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.saits.load_model(saved_model_path) - - -class TestTransformer(unittest.TestCase): - logger.info("Running tests for an imputation model Transformer...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer") - model_save_name = "saved_transformer_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a Transformer model - transformer = Transformer( - DATA["n_steps"], - DATA["n_features"], - n_layers=2, - d_model=256, - d_inner=128, - n_heads=4, - d_k=64, - d_v=64, - dropout=0.1, - epochs=EPOCHS, - saving_path=saving_path, - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_0_fit(self): - self.transformer.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_1_impute(self): - imputed_X = self.transformer.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"Transformer test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_2_parameters(self): - assert hasattr(self.transformer, "model") and self.transformer.model is not None - - assert ( - hasattr(self.transformer, "optimizer") - and self.transformer.optimizer is not None - ) - - assert hasattr(self.transformer, "best_loss") - self.assertNotEqual(self.transformer.best_loss, float("inf")) - - assert ( - hasattr(self.transformer, "best_model_dict") - and self.transformer.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-transformer") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.transformer) - - # save the trained model into file, and check if the path exists - self.transformer.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.transformer.load_model(saved_model_path) - - -class TestImputationBRITS(unittest.TestCase): - logger.info("Running tests for an imputation model BRITS...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS") - model_save_name = "saved_BRITS_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a BRITS model - brits = ImputationBRITS( - DATA["n_steps"], - DATA["n_features"], - 256, - epochs=EPOCHS, - saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/BRITS", - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="imputation-brits") - def test_0_fit(self): - self.brits.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-brits") - def test_1_impute(self): - imputed_X = self.brits.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"BRITS test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-brits") - def test_2_parameters(self): - assert hasattr(self.brits, "model") and self.brits.model is not None - - assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None - - assert hasattr(self.brits, "best_loss") - self.assertNotEqual(self.brits.best_loss, float("inf")) - - assert ( - hasattr(self.brits, "best_model_dict") - and self.brits.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-brits") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.brits) - - # save the trained model into file, and check if the path exists - self.brits.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.brits.load_model(saved_model_path) - - -class TestMRNN(unittest.TestCase): - logger.info("Running tests for an imputation model MRNN...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN") - model_save_name = "saved_MRNN_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a MRNN model - mrnn = MRNN( - DATA["n_steps"], - DATA["n_features"], - 256, - epochs=EPOCHS, - saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/MRNN", - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_0_fit(self): - self.mrnn.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_1_impute(self): - imputed_X = self.mrnn.impute(TEST_SET) - assert not np.isnan( - imputed_X - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"MRNN test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_2_parameters(self): - assert hasattr(self.mrnn, "model") and self.mrnn.model is not None - - assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None - - assert hasattr(self.mrnn, "best_loss") - self.assertNotEqual(self.mrnn.best_loss, float("inf")) - - assert ( - hasattr(self.mrnn, "best_model_dict") - and self.mrnn.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="imputation-mrnn") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.mrnn) - - # save the trained model into file, and check if the path exists - self.mrnn.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.mrnn.load_model(saved_model_path) - - -class TestLOCF(unittest.TestCase): - logger.info("Running tests for an imputation model LOCF...") - locf = LOCF(nan=0) - - @pytest.mark.xdist_group(name="imputation-locf") - def test_0_impute(self): - test_X_imputed = self.locf.impute(TEST_SET) - assert not np.isnan( - test_X_imputed - ).any(), "Output still has missing values after running impute()." - test_MAE = cal_mae( - test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"] - ) - logger.info(f"LOCF test_MAE: {test_MAE}") - - @pytest.mark.xdist_group(name="imputation-locf") - def test_1_parameters(self): - assert hasattr(self.locf, "nan") and self.locf.nan is not None - - -class TestClassificationBRITS(unittest.TestCase): - logger.info("Running tests for a classification model BRITS...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS") - model_save_name = "saved_BRITS_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a BRITS model - brits = BRITS( - DATA["n_steps"], - DATA["n_features"], - n_classes=DATA["n_classes"], - rnn_hidden_size=256, - epochs=EPOCHS, - saving_path=saving_path, - model_saving_strategy="better", - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="classification-brits") - def test_0_fit(self): - self.brits.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="classification-brits") - def test_1_classify(self): - predictions = self.brits.classify(TEST_SET) - metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) - logger.info( - f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n' - ) - assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" - - @pytest.mark.xdist_group(name="classification-brits") - def test_2_parameters(self): - assert hasattr(self.brits, "model") and self.brits.model is not None - - assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None - - assert hasattr(self.brits, "best_loss") - self.assertNotEqual(self.brits.best_loss, float("inf")) - - assert ( - hasattr(self.brits, "best_model_dict") - and self.brits.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="classification-brits") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.brits) - - # save the trained model into file, and check if the path exists - self.brits.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.brits.load_model(saved_model_path) - - -class TestGRUD(unittest.TestCase): - logger.info("Running tests for a classification model GRUD...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD") - model_save_name = "saved_GRUD_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a GRUD model - grud = GRUD( - DATA["n_steps"], - DATA["n_features"], - n_classes=DATA["n_classes"], - rnn_hidden_size=256, - epochs=EPOCHS, - saving_path=saving_path, - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="classification-grud") - def test_0_fit(self): - self.grud.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="classification-grud") - def test_1_classify(self): - predictions = self.grud.classify(TEST_SET) - metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) - logger.info( - f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n' - ) - assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" - - @pytest.mark.xdist_group(name="classification-grud") - def test_2_parameters(self): - assert hasattr(self.grud, "model") and self.grud.model is not None - - assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None - - assert hasattr(self.grud, "best_loss") - self.assertNotEqual(self.grud.best_loss, float("inf")) - - assert ( - hasattr(self.grud, "best_model_dict") - and self.grud.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="classification-grud") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.grud) - - # save the trained model into file, and check if the path exists - self.grud.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.grud.load_model(saved_model_path) - - -class TestRaindrop(unittest.TestCase): - logger.info("Running tests for a classification model Raindrop...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop") - model_save_name = "saved_Raindrop_model.pypots" - - # initialize a Raindrop model - raindrop = Raindrop( - DATA["n_steps"], - DATA["n_features"], - DATA["n_classes"], - n_layers=2, - d_model=DATA["n_features"] * 4, - d_inner=256, - n_heads=2, - dropout=0.3, - d_static=0, - aggregation="mean", - sensor_wise_mask=False, - static=False, - epochs=EPOCHS, - saving_path=saving_path, - ) - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_0_fit(self): - self.raindrop.fit(TRAIN_SET, VAL_SET) - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_1_classify(self): - predictions = self.raindrop.classify(TEST_SET) - metrics = cal_binary_classification_metrics(predictions, DATA["test_y"]) - logger.info( - f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n' - ) - assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_2_parameters(self): - assert hasattr(self.raindrop, "model") and self.raindrop.model is not None - - assert ( - hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None - ) - - assert hasattr(self.raindrop, "best_loss") - self.assertNotEqual(self.raindrop.best_loss, float("inf")) - - assert ( - hasattr(self.raindrop, "best_model_dict") - and self.raindrop.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="classification-raindrop") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.raindrop) - - # save the trained model into file, and check if the path exists - self.raindrop.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.raindrop.load_model(saved_model_path) - - -class TestCRLI(unittest.TestCase): - logger.info("Running tests for a clustering model CRLI...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI") - model_save_name = "saved_CRLI_model.pypots" - - # initialize an Adam optimizer - G_optimizer = Adam(lr=0.001, weight_decay=1e-5) - D_optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a CRLI model - crli = CRLI( - n_steps=DATA["n_steps"], - n_features=DATA["n_features"], - n_clusters=DATA["n_classes"], - n_generator_layers=2, - rnn_hidden_size=128, - epochs=EPOCHS, - saving_path=saving_path, - G_optimizer=G_optimizer, - D_optimizer=D_optimizer, - ) - - @pytest.mark.xdist_group(name="clustering-crli") - def test_0_fit(self): - self.crli.fit(TRAIN_SET) - - @pytest.mark.xdist_group(name="clustering-crli") - def test_1_parameters(self): - assert hasattr(self.crli, "model") and self.crli.model is not None - - assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None - assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None - - assert hasattr(self.crli, "best_loss") - self.assertNotEqual(self.crli.best_loss, float("inf")) - - assert ( - hasattr(self.crli, "best_model_dict") - and self.crli.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="clustering-crli") - def test_2_cluster(self): - clustering = self.crli.cluster(TEST_SET) - RI = cal_rand_index(clustering, DATA["test_y"]) - CP = cal_cluster_purity(clustering, DATA["test_y"]) - logger.info(f"RI: {RI}\nCP: {CP}") - - @pytest.mark.xdist_group(name="clustering-crli") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.crli) - - # save the trained model into file, and check if the path exists - self.crli.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.crli.load_model(saved_model_path) - - -class TestVaDER(unittest.TestCase): - logger.info("Running tests for a clustering model Transformer...") - - # set the log and model saving path - saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "VaDER") - model_save_name = "saved_VaDER_model.pypots" - - # initialize an Adam optimizer - optimizer = Adam(lr=0.001, weight_decay=1e-5) - - # initialize a VaDER model - vader = VaDER( - n_steps=DATA["n_steps"], - n_features=DATA["n_features"], - n_clusters=DATA["n_classes"], - rnn_hidden_size=64, - d_mu_stddev=5, - pretrain_epochs=20, - epochs=EPOCHS, - saving_path=saving_path, - optimizer=optimizer, - num_workers=2, - device=DEVICES, - ) - - @pytest.mark.xdist_group(name="clustering-vader") - def test_0_fit(self): - self.vader.fit(TRAIN_SET) - - @pytest.mark.xdist_group(name="clustering-vader") - def test_1_cluster(self): - try: - clustering = self.vader.cluster(TEST_SET) - RI = cal_rand_index(clustering, DATA["test_y"]) - CP = cal_cluster_purity(clustering, DATA["test_y"]) - logger.info(f"RI: {RI}\nCP: {CP}") - except np.linalg.LinAlgError as e: - logger.error( - f"{e}\n" - "Got singular matrix, please try to retrain the model to fix this" - ) - - @pytest.mark.xdist_group(name="clustering-vader") - def test_2_parameters(self): - assert hasattr(self.vader, "model") and self.vader.model is not None - - assert hasattr(self.vader, "optimizer") and self.vader.optimizer is not None - - assert hasattr(self.vader, "best_loss") - self.assertNotEqual(self.vader.best_loss, float("inf")) - - assert ( - hasattr(self.vader, "best_model_dict") - and self.vader.best_model_dict is not None - ) - - @pytest.mark.xdist_group(name="clustering-vader") - def test_3_saving_path(self): - # whether the root saving dir exists, which should be created by save_log_into_tb_file - assert os.path.exists( - self.saving_path - ), f"file {self.saving_path} does not exist" - - # check if the tensorboard file and model checkpoints exist - check_tb_and_model_checkpoints_existence(self.vader) - - # save the trained model into file, and check if the path exists - self.vader.save_model( - saving_dir=self.saving_path, file_name=self.model_save_name - ) - - # test loading the saved model, not necessary, but need to test - saved_model_path = os.path.join(self.saving_path, self.model_save_name) - self.vader.load_model(saved_model_path) - - -class TestBTTF(unittest.TestCase): - logger.info("Running tests for a forecasting model BTTF...") - - # initialize a BTTF model - pred_step = 4 - bttf = BTTF( - n_steps=DATA["n_steps"] - pred_step, - n_features=10, - pred_step=pred_step, - rank=10, - time_lags=[1, 2, 3, 5, 5 + 1, 5 + 2, 10, 10 + 1, 10 + 2], - burn_iter=5, - gibbs_iter=5, - multi_step=1, - ) - - @pytest.mark.xdist_group(name="forecasting-bttf") - def test_0_forecasting(self): - predictions = self.bttf.forecast({"X": DATA["test_X"][:, : -self.pred_step]}) - logger.info(f"prediction shape: {predictions.shape}") - mae = cal_mae(predictions, DATA["test_X_intact"][:, -self.pred_step :]) - logger.info(f"prediction MAE: {mae}") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 00000000..f0b4685e --- /dev/null +++ b/tests/utils/__init__.py @@ -0,0 +1,6 @@ +""" + +""" + +# Created by Wenjie Du +# License: GLP-v3 diff --git a/tests/test_utils.py b/tests/utils/logging.py similarity index 64% rename from tests/test_utils.py rename to tests/utils/logging.py index 0fd48ec8..113f0dde 100644 --- a/tests/test_utils.py +++ b/tests/utils/logging.py @@ -1,5 +1,5 @@ """ -Test cases for the functions and classes in package `pypots.utils`. +Test cases for the functions and classes in package `pypots.utils.logging`. """ # Created by Wenjie Du @@ -9,10 +9,7 @@ import shutil import unittest -import torch - from pypots.utils.logging import Logger -from pypots.utils.random import set_random_seed class TestLogging(unittest.TestCase): @@ -49,25 +46,5 @@ def test_saving_log_into_file(self): shutil.rmtree("test_log", ignore_errors=True) -class TestRandom(unittest.TestCase): - def test_set_random_seed(self): - random_state1 = torch.get_rng_state() - torch.rand( - 1, 3 - ) # randomly generate something, the random state will be reset, so two states should be varying - random_state2 = torch.get_rng_state() - assert not torch.equal( - random_state1, random_state2 - ), "The random seed hasn't set, so two random states should be different." - - set_random_seed(26) - random_state1 = torch.get_rng_state() - set_random_seed(26) - random_state2 = torch.get_rng_state() - assert torch.equal( - random_state1, random_state2 - ), "The random seed has been set, two random states are not the same." - - if __name__ == "__main__": unittest.main() diff --git a/tests/utils/random.py b/tests/utils/random.py new file mode 100644 index 00000000..0d1a0ca0 --- /dev/null +++ b/tests/utils/random.py @@ -0,0 +1,36 @@ +""" +Test cases for the functions and classes in package `pypots.utils.random`. +""" + +# Created by Wenjie Du +# License: GPL-v3 + +import unittest + +import torch + +from pypots.utils.random import set_random_seed + + +class TestRandom(unittest.TestCase): + def test_set_random_seed(self): + random_state1 = torch.get_rng_state() + torch.rand( + 1, 3 + ) # randomly generate something, the random state will be reset, so two states should be varying + random_state2 = torch.get_rng_state() + assert not torch.equal( + random_state1, random_state2 + ), "The random seed hasn't set, so two random states should be different." + + set_random_seed(26) + random_state1 = torch.get_rng_state() + set_random_seed(26) + random_state2 = torch.get_rng_state() + assert torch.equal( + random_state1, random_state2 + ), "The random seed has been set, two random states are not the same." + + +if __name__ == "__main__": + unittest.main()