diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
index d339afe5..4cdfe5bc 100644
--- a/.github/workflows/testing_ci.yml
+++ b/.github/workflows/testing_ci.yml
@@ -15,43 +15,61 @@ jobs:
runs-on: ${{ matrix.os }}
defaults:
run:
- shell: bash -l {0}
+ shell: bash {0}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
- python-version: ["3.7", "3.8", "3.9", "3.10"]
+ python-version: ["3.7", "3.10"]
+ torch-version: ["1.13.1"]
steps:
- name: Check out the repo code
uses: actions/checkout@v3
- - name: Set up Conda
- uses: conda-incubator/setup-miniconda@v2
+ - name: Determine the Python version
+ uses: haya14busa/action-cond@v1
+ id: condval
with:
- activate-environment: pypots-test
- python-version: ${{ matrix.python-version }}
- environment-file: tests/environment_for_conda_test.yml
- auto-activate-base: false
+ cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
+ # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
+ if_true: "3.7.16"
+ if_false: ${{ matrix.python-version }}
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ steps.condval.outputs.value }}
+ check-latest: true
+ cache: pip
+ cache-dependency-path: |
+ setup.cfg
+
+ - name: Install PyTorch ${{ matrix.torch-version }}+cpu
+ # we have to install torch in advance because torch_sparse needs it for compilation,
+ # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
+ run: |
+ which python
+ which pip
+ python -m pip install --upgrade pip
+ pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
+ python -c "import torch; print('PyTorch:', torch.__version__)"
+
+ - name: Install other dependencies
+ run: |
+ pip install pypots
+ pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
+ pip install -e ".[dev]"
- name: Fetch the test environment details
run: |
which python
- conda info
- conda list
+ pip list
- name: Test with pytest
run: |
- # run tests separately here due to Segmentation Fault in test_clustering when run all in
- # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
- python -m pytest -rA tests/test_classification.py -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_imputation.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_clustering.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_forecasting.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_optim.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_data.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_utils.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_cli.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ rm -rf tests/__pycache__
+ python -m pytest -rA tests/*/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
- name: Generate the LCOV report
run: |
@@ -61,4 +79,4 @@ jobs:
uses: coverallsapp/github-action@master
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- path-to-lcov: 'coverage.lcov'
+ path-to-lcov: "coverage.lcov"
diff --git a/.github/workflows/testing_daily.yml b/.github/workflows/testing_daily.yml
index f0b3ba61..5e41630f 100644
--- a/.github/workflows/testing_daily.yml
+++ b/.github/workflows/testing_daily.yml
@@ -10,61 +10,43 @@ jobs:
runs-on: ${{ matrix.os }}
defaults:
run:
- shell: bash {0}
+ shell: bash -l {0}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
- python-version: ["3.7", "3.8", "3.9", "3.10"]
- torch-version: ["1.13.1"]
+ python-version: ["3.7", "3.10"]
steps:
- name: Check out the repo code
uses: actions/checkout@v3
- - name: Determine the Python version
- uses: haya14busa/action-cond@v1
- id: condval
+ - name: Set up Conda
+ uses: conda-incubator/setup-miniconda@v2
with:
- cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
- # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
- if_true: "3.7.16"
- if_false: ${{ matrix.python-version }}
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: ${{ steps.condval.outputs.value }}
- check-latest: true
- cache: pip
- cache-dependency-path: |
- setup.cfg
-
- - name: Install PyTorch ${{ matrix.torch-version }}+cpu
- # we have to install torch in advance because torch_sparse needs it for compilation,
- # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
- run: |
- which python
- which pip
- python -m pip install --upgrade pip
- pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
- python -c "import torch; print('PyTorch:', torch.__version__)"
-
- - name: Install other dependencies
- run: |
- pip install pypots
- pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
- pip install -e ".[dev]"
+ activate-environment: pypots-test
+ python-version: ${{ matrix.python-version }}
+ environment-file: tests/environment_for_conda_test.yml
+ auto-activate-base: false
- name: Fetch the test environment details
run: |
which python
- pip list
+ conda info
+ conda list
- name: Test with pytest
run: |
- coverage run --source=pypots -m pytest --ignore tests/test_training_on_multi_gpus.py
- # ignore the test_training_on_multi_gpus.py because it requires multiple GPUs which are not available on GitHub Actions
+ # run tests separately here due to Segmentation Fault in test_clustering when run all in
+ # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
+ python -m pytest -rA tests/classification/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/imputation/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/clustering/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/forecasting/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/optim/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/data/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/utils/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/cli/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- name: Generate the LCOV report
run: |
@@ -74,4 +56,4 @@ jobs:
uses: coverallsapp/github-action@master
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- path-to-lcov: "coverage.lcov"
+ path-to-lcov: 'coverage.lcov'
diff --git a/.gitignore b/.gitignore
index 0841fdef..51294f38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,8 @@ docs/_build
.coverage
.pytest_cache
*__pycache__*
-*testing_results*
+*test*
# ignore specific kinds of files like all PDFs
*.pdf
+*.ipynb
diff --git a/README.md b/README.md
index 7b591634..9c86f08a 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,8 @@
-##
Welcome to PyPOTS
+Welcome to PyPOTS
+
**A Python Toolbox for Data Mining on Partially-Observed Time Series
**
@@ -161,6 +162,8 @@ PyPOTS supports imputation, classification, clustering, and forecasting tasks on
| **Type** | **Abbr.** | **Full name of the algorithm/model/paper** | **Year** |
| Neural Net | SAITS | Self-Attention-based Imputation for Time Series [^1] | 2023 |
| Neural Net | Transformer | Attention is All you Need [^2]; Self-Attention-based Imputation for Time Series [^1];Note: proposed in [^2], and re-implemented as an imputation model in [^1]. | 2017 |
+| Neural Net | US-GAN | Generative Semi-supervised Learning for Multivariate Time Series Imputation [^10] | 2021 |
+| Neural Net | GP-VAE | GP-VAE: Deep Probabilistic Time Series Imputation [^11] | 2020 |
| Neural Net | BRITS | Bidirectional Recurrent Imputation for Time Series [^3] | 2018 |
| Neural Net | M-RNN | Multi-directional Recurrent Neural Network [^9] | 2019 |
| Naive | LOCF | Last Observation Carried Forward | - |
@@ -253,7 +256,7 @@ We care about the feedback from our users, so we're building PyPOTS community on
If you have any suggestions or want to contribute ideas or share time-series related papers, join us and tell.
PyPOTS community is open, transparent, and surely friendly. Let's work together to build and improve PyPOTS!
-
+[//]: # (Use APA reference style below)
[^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). *Expert systems with applications*.
[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS 2017*.
[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS 2018*.
@@ -263,7 +266,8 @@ PyPOTS community is open, transparent, and surely friendly. Let's work together
[^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). *GigaScience*.
[^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). *IEEE transactions on pattern analysis and machine intelligence*.
[^9]: Yoon, J., Zame, W. R., & van der Schaar, M. (2019). [Estimating Missing Data in Temporal Data Streams Using Multi-Directional Recurrent Neural Networks](https://ieeexplore.ieee.org/document/8485748). *IEEE Transactions on Biomedical Engineering*.
-
+[^10]: Miao, X., Wu, Y., Wang, J., Gao, Y., Mao, X., & Yin, J. (2021). [Generative Semi-supervised Learning for Multivariate Time Series Imputation](https://ojs.aaai.org/index.php/AAAI/article/view/17086). *AAAI 2021*.
+[^11]: Fortuin, V., Baranchuk, D., Raetsch, G. & Mandt, S.. (2020). [GP-VAE: Deep Probabilistic Time Series Imputation](https://proceedings.mlr.press/v108/fortuin20a.html). *AISTATS 2020*.
🏠 Visits
@@ -271,4 +275,4 @@ PyPOTS community is open, transparent, and surely friendly. Let's work together
-
\ No newline at end of file
+
diff --git a/docs/about_us.rst b/docs/about_us.rst
index aaaab944..370a3e0d 100644
--- a/docs/about_us.rst
+++ b/docs/about_us.rst
@@ -33,5 +33,5 @@ PyPOTS exists thanks to all the nice people (sorted by contribution time) who co
.. raw:: html
-
+
diff --git a/docs/pypots.data.rst b/docs/pypots.data.rst
index d792d6aa..fe7c4678 100644
--- a/docs/pypots.data.rst
+++ b/docs/pypots.data.rst
@@ -10,6 +10,15 @@ pypots.data.base module
:show-inheritance:
:inherited-members:
+pypots.data.saving module
+-----------------------------
+
+.. automodule:: pypots.data.saving
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :inherited-members:
+
pypots.data.generating module
-----------------------------
diff --git a/docs/pypots.forecasting.rst b/docs/pypots.forecasting.rst
index 2ae67b85..c4ac76b7 100644
--- a/docs/pypots.forecasting.rst
+++ b/docs/pypots.forecasting.rst
@@ -1,11 +1,31 @@
pypots.forecasting package
==========================
+Subpackages
+-----------
-pypots.forecasting.bttf module
+.. toctree::
+ :maxdepth: 4
+
+ pypots.forecasting.bttf
+ pypots.forecasting.template
+
+Submodules
+----------
+
+pypots.forecasting.base module
------------------------------
-.. automodule:: pypots.forecasting.bttf
+.. automodule:: pypots.forecasting.base
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :inherited-members:
+
+Module contents
+---------------
+
+.. automodule:: pypots.forecasting
:members:
:undoc-members:
:show-inheritance:
diff --git a/docs/pypots.imputation.rst b/docs/pypots.imputation.rst
index 0e31f8c8..a33e0fdf 100644
--- a/docs/pypots.imputation.rst
+++ b/docs/pypots.imputation.rst
@@ -19,6 +19,24 @@ pypots.imputation.transformer module
:show-inheritance:
:inherited-members:
+pypots.imputation.usgan module
+------------------------------
+
+.. automodule:: pypots.imputation.usgan
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :inherited-members:
+
+pypots.imputation.gpvae module
+------------------------------
+
+.. automodule:: pypots.imputation.gpvae
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :inherited-members:
+
pypots.imputation.brits module
------------------------------
diff --git a/pypots/base.py b/pypots/base.py
index f55033e3..7a12fe94 100644
--- a/pypots/base.py
+++ b/pypots/base.py
@@ -96,7 +96,9 @@ def _setup_device(self, device: Union[None, str, torch.device, list]):
self.device = device
elif isinstance(device, list):
if len(device) == 0:
- raise ValueError("The list of devices should have at least 1 device, but got 0.")
+ raise ValueError(
+ "The list of devices should have at least 1 device, but got 0."
+ )
elif len(device) == 1:
return self._setup_device(device[0])
# parallely training on multiple CUDA devices
@@ -176,7 +178,6 @@ def _send_data_to_given_device(self, data):
if isinstance(self.device, torch.device): # single device
data = map(lambda x: x.to(self.device), data)
else: # parallely training on multiple devices
-
# randomly choose one device to balance the workload
# device = np.random.choice(self.device)
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
index a30fd698..a16dbc01 100644
--- a/pypots/classification/base.py
+++ b/pypots/classification/base.py
@@ -256,7 +256,6 @@ def _train_model(
training_loader: DataLoader,
val_loader: DataLoader = None,
) -> None:
-
# each training starts from the very beginning, so reset the loss and model dict here
self.best_loss = float("inf")
self.best_model_dict = None
diff --git a/pypots/classification/grud/data.py b/pypots/classification/grud/data.py
index 52186017..edf1d4d0 100644
--- a/pypots/classification/grud/data.py
+++ b/pypots/classification/grud/data.py
@@ -123,7 +123,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()
- X = torch.from_numpy(self.file_handle["X"][idx])
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X_filledLOCF = self.locf._locf_torch(X.unsqueeze(dim=0)).squeeze()
X = torch.nan_to_num(X)
diff --git a/pypots/classification/raindrop/modules.py b/pypots/classification/raindrop/modules.py
index 76a992ef..191ff9c7 100644
--- a/pypots/classification/raindrop/modules.py
+++ b/pypots/classification/raindrop/modules.py
@@ -174,7 +174,6 @@ def forward(
edge_attr: OptTensor = None,
return_attention_weights=None,
) -> Tuple[torch.Tensor, Any]:
-
r"""
Args:
return_attention_weights (bool, optional): If set to :obj:`True`,
diff --git a/pypots/classification/template/dataset.py b/pypots/classification/template/data.py
similarity index 100%
rename from pypots/classification/template/dataset.py
rename to pypots/classification/template/data.py
diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
index 324e6718..fd9b7f0d 100644
--- a/pypots/clustering/base.py
+++ b/pypots/clustering/base.py
@@ -244,7 +244,6 @@ def _train_model(
training_loader: DataLoader,
val_loader: DataLoader = None,
) -> None:
-
"""
Parameters
diff --git a/pypots/clustering/crli/model.py b/pypots/clustering/crli/model.py
index b5e2e14a..8b7a63a1 100644
--- a/pypots/clustering/crli/model.py
+++ b/pypots/clustering/crli/model.py
@@ -226,7 +226,6 @@ def __init__(
saving_path: Optional[str] = None,
model_saving_strategy: Optional[str] = "best",
):
-
super().__init__(
n_clusters,
batch_size,
diff --git a/pypots/clustering/template/dataset.py b/pypots/clustering/template/data.py
similarity index 100%
rename from pypots/clustering/template/dataset.py
rename to pypots/clustering/template/data.py
diff --git a/pypots/clustering/vader/data.py b/pypots/clustering/vader/data.py
index a3b2f91d..a8910b44 100644
--- a/pypots/clustering/vader/data.py
+++ b/pypots/clustering/vader/data.py
@@ -6,12 +6,12 @@
# License: GLP-v3
-from typing import Union
+from typing import Union, Iterable
-from ..crli.data import DatasetForCRLI
+from ...data.base import BaseDataset
-class DatasetForVaDER(DatasetForCRLI):
+class DatasetForVaDER(BaseDataset):
"""Dataset class for model VaDER.
Parameters
@@ -45,3 +45,9 @@ def __init__(
file_type: str = "h5py",
):
super().__init__(data, return_labels, file_type)
+
+ def _fetch_data_from_array(self, idx: int) -> Iterable:
+ return super()._fetch_data_from_array(idx)
+
+ def _fetch_data_from_file(self, idx: int) -> Iterable:
+ return super()._fetch_data_from_file(idx)
diff --git a/pypots/clustering/vader/model.py b/pypots/clustering/vader/model.py
index f2912cce..5a44da85 100644
--- a/pypots/clustering/vader/model.py
+++ b/pypots/clustering/vader/model.py
@@ -184,7 +184,6 @@ def forward(
) = self.get_results(X, missing_mask)
if not training and not pretrain:
-
results = {
"mu_tilde": mu_tilde,
"mu": mu_c,
@@ -403,7 +402,6 @@ def _train_model(
training_loader: DataLoader,
val_loader: DataLoader = None,
) -> None:
-
# each training starts from the very beginning, so reset the loss and model dict here
self.best_loss = float("inf")
self.best_model_dict = None
diff --git a/pypots/data/base.py b/pypots/data/base.py
index 86b15fc2..1bef9f9c 100644
--- a/pypots/data/base.py
+++ b/pypots/data/base.py
@@ -204,13 +204,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
The collated data sample, a list including all necessary sample info.
"""
- X = self.X[idx]
- missing_mask = ~torch.isnan(X)
+ X = self.X[idx].to(torch.float32)
+ missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)
sample = [
torch.tensor(idx),
- X.to(torch.float32),
- missing_mask.to(torch.float32),
+ X,
+ missing_mask,
]
if self.y is not None and self.return_labels:
@@ -279,13 +279,13 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()
- X = torch.from_numpy(self.file_handle["X"][idx])
- missing_mask = ~torch.isnan(X)
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+ missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)
sample = [
torch.tensor(idx),
- X.to(torch.float32),
- missing_mask.to(torch.float32),
+ X,
+ missing_mask,
]
# if the dataset has labels and is for training, then fetch it from the file
diff --git a/pypots/data/saving.py b/pypots/data/saving.py
index 8581ad50..61138df2 100644
--- a/pypots/data/saving.py
+++ b/pypots/data/saving.py
@@ -14,7 +14,11 @@
from pypots.utils.logging import logger
-def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
+def save_dict_into_h5(
+ data_dict: dict,
+ saving_dir: str,
+ saving_name: str = "datasets.h5",
+) -> None:
"""Save the given data (in a dictionary) into the given h5 file.
Parameters
@@ -25,6 +29,9 @@ def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
saving_dir : str,
The h5 file to save the data.
+ saving_name : str, optional (default="datasets.h5")
+ The final name of the saved h5 file.
+
"""
def save_set(handle, name, data):
@@ -36,7 +43,7 @@ def save_set(handle, name, data):
handle.create_dataset(name, data=data)
create_dir_if_not_exist(saving_dir)
- saving_path = os.path.join(saving_dir, "datasets.h5")
+ saving_path = os.path.join(saving_dir, saving_name)
with h5py.File(saving_path, "w") as hf:
for k, v in data_dict.items():
save_set(hf, k, v)
diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py
index 5188999b..079f5925 100644
--- a/pypots/forecasting/base.py
+++ b/pypots/forecasting/base.py
@@ -242,7 +242,6 @@ def _train_model(
training_loader: DataLoader,
val_loader: DataLoader = None,
) -> None:
-
# each training starts from the very beginning, so reset the loss and model dict here
self.best_loss = float("inf")
self.best_model_dict = None
diff --git a/pypots/forecasting/template/dataset.py b/pypots/forecasting/template/data.py
similarity index 100%
rename from pypots/forecasting/template/dataset.py
rename to pypots/forecasting/template/data.py
diff --git a/pypots/imputation/__init__.py b/pypots/imputation/__init__.py
index 9de8d0bc..a6c4dcd8 100644
--- a/pypots/imputation/__init__.py
+++ b/pypots/imputation/__init__.py
@@ -6,10 +6,12 @@
# License: GPL-v3
from .brits import BRITS
+from .gpvae import GPVAE
from .locf import LOCF
+from .mrnn import MRNN
from .saits import SAITS
from .transformer import Transformer
-from .mrnn import MRNN
+from .usgan import USGAN
__all__ = [
"SAITS",
@@ -17,4 +19,6 @@
"BRITS",
"MRNN",
"LOCF",
+ "GPVAE",
+ "USGAN",
]
diff --git a/pypots/imputation/brits/data.py b/pypots/imputation/brits/data.py
index f39e411c..342ede98 100644
--- a/pypots/imputation/brits/data.py
+++ b/pypots/imputation/brits/data.py
@@ -59,14 +59,14 @@ def __init__(
self.processed_data = {
"forward": {
- "X": forward_X,
- "missing_mask": forward_missing_mask,
- "delta": forward_delta,
+ "X": forward_X.to(torch.float32),
+ "missing_mask": forward_missing_mask.to(torch.float32),
+ "delta": forward_delta.to(torch.float32),
},
"backward": {
- "X": backward_X,
- "missing_mask": backward_missing_mask,
- "delta": backward_delta,
+ "X": backward_X.to(torch.float32),
+ "missing_mask": backward_missing_mask.to(torch.float32),
+ "delta": backward_delta.to(torch.float32),
},
}
@@ -101,13 +101,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
sample = [
torch.tensor(idx),
# for forward
- self.processed_data["forward"]["X"][idx].to(torch.float32),
- self.processed_data["forward"]["missing_mask"][idx].to(torch.float32),
- self.processed_data["forward"]["delta"][idx].to(torch.float32),
+ self.processed_data["forward"]["X"][idx],
+ self.processed_data["forward"]["missing_mask"][idx],
+ self.processed_data["forward"]["delta"][idx],
# for backward
- self.processed_data["backward"]["X"][idx].to(torch.float32),
- self.processed_data["backward"]["missing_mask"][idx].to(torch.float32),
- self.processed_data["backward"]["delta"][idx].to(torch.float32),
+ self.processed_data["backward"]["X"][idx],
+ self.processed_data["backward"]["missing_mask"][idx],
+ self.processed_data["backward"]["delta"][idx],
]
if self.y is not None and self.return_labels:
@@ -133,7 +133,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()
- X = torch.from_numpy(self.file_handle["X"][idx])
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)
diff --git a/pypots/imputation/gpvae/__init__.py b/pypots/imputation/gpvae/__init__.py
new file mode 100644
index 00000000..f5ffb05e
--- /dev/null
+++ b/pypots/imputation/gpvae/__init__.py
@@ -0,0 +1,12 @@
+"""
+The package of the partially-observed time-series imputation method GP-VAE.
+"""
+
+# Created by Jun Wang
+# License: GLP-v3
+
+from .model import GPVAE
+
+__all__ = [
+ "GPVAE",
+]
diff --git a/pypots/imputation/gpvae/data.py b/pypots/imputation/gpvae/data.py
new file mode 100644
index 00000000..8bb9be8c
--- /dev/null
+++ b/pypots/imputation/gpvae/data.py
@@ -0,0 +1,132 @@
+"""
+Dataset class for model GP-VAE.
+"""
+
+# Created by Jun Wang and Wenjie Du
+# License: GLP-v3
+
+from typing import Union, Iterable
+
+import torch
+
+from ...data.base import BaseDataset
+
+
+class DatasetForGPVAE(BaseDataset):
+ """Dataset class for GP-VAE.
+
+ Parameters
+ ----------
+ data : dict or str,
+ The dataset for model input, should be a dictionary including keys as 'X' and 'y',
+ or a path string locating a data file.
+ If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
+ which is time-series data for input, can contain missing values, and y should be array-like of shape
+ [n_samples], which is classification labels of X.
+ If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+ key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
+
+ return_labels : bool, default = True,
+ Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
+ during training of classification models, the Dataset class will return labels in __getitem__() for model input.
+ Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
+ need the defined Dataset class for all training/validating/testing stages. For those big datasets stored in h5
+ files, they already have both X and y saved. But we don't read labels from the file for validating and testing
+ with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
+ distinction.
+
+ file_type : str, default = "h5py"
+ The type of the given file if train_set and val_set are path strings.
+ """
+
+ def __init__(
+ self,
+ data: Union[dict, str],
+ return_labels: bool = True,
+ file_type: str = "h5py",
+ ):
+ super().__init__(data, return_labels, file_type)
+
+ if not isinstance(self.data, str):
+ # calculate all delta here.
+ missing_mask = (~torch.isnan(self.X)).type(torch.float32)
+ X = torch.nan_to_num(self.X).to(torch.float32)
+
+ self.processed_data = {
+ "X": X,
+ "missing_mask": missing_mask,
+ }
+
+ def _fetch_data_from_array(self, idx: int) -> Iterable:
+ """Fetch data from self.X if it is given.
+
+ Parameters
+ ----------
+ idx : int,
+ The index of the sample to be return.
+
+ Returns
+ -------
+ sample : list,
+ A list contains
+
+ index : int tensor,
+ The index of the sample.
+
+ X : tensor,
+ The feature vector for model input.
+
+ missing_mask : tensor,
+ The mask indicates all missing values in X.
+
+ delta : tensor,
+ The delta matrix contains time gaps of missing values.
+
+ label (optional) : tensor,
+ The target label of the time-series sample.
+ """
+ sample = [
+ torch.tensor(idx),
+ # for forward
+ self.processed_data["X"][idx],
+ self.processed_data["missing_mask"][idx],
+ ]
+
+ if self.y is not None and self.return_labels:
+ sample.append(self.y[idx].to(torch.long))
+
+ return sample
+
+ def _fetch_data_from_file(self, idx: int) -> Iterable:
+ """Fetch data with the lazy-loading strategy, i.e. only loading data from the file while requesting for samples.
+ Here the opened file handle doesn't load the entire dataset into RAM but only load the currently accessed slice.
+
+ Parameters
+ ----------
+ idx : int,
+ The index of the sample to be return.
+
+ Returns
+ -------
+ sample : list,
+ The collated data sample, a list including all necessary sample info.
+ """
+
+ if self.file_handle is None:
+ self.file_handle = self._open_file_handle()
+
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+ missing_mask = (~torch.isnan(X)).to(torch.float32)
+ X = torch.nan_to_num(X)
+
+ sample = [
+ torch.tensor(idx),
+ X,
+ missing_mask,
+ ]
+
+ # if the dataset has labels and is for training, then fetch it from the file
+ if "y" in self.file_handle.keys() and self.return_labels:
+ sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
+
+ return sample
diff --git a/pypots/imputation/gpvae/model.py b/pypots/imputation/gpvae/model.py
new file mode 100644
index 00000000..6b613d4d
--- /dev/null
+++ b/pypots/imputation/gpvae/model.py
@@ -0,0 +1,446 @@
+"""
+The implementation of GP-VAE for the partially-observed time-series imputation task.
+
+Refer to the paper Fortuin V, Baranchuk D, Rätsch G, et al.
+GP-VAE: Deep probabilistic time series imputation. AISTATS. PMLR, 2020: 1651-1661.
+
+"""
+
+# Created by Jun Wang and Wenjie Du
+# License: GPL-v3
+
+
+from typing import Union, Optional
+
+import h5py
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+
+from .data import DatasetForGPVAE
+from .modules import (
+ Encoder,
+ rbf_kernel,
+ diffusion_kernel,
+ matern_kernel,
+ cauchy_kernel,
+ Decoder,
+)
+from ..base import BaseNNImputer
+from ...optim.adam import Adam
+from ...optim.base import Optimizer
+
+
+class _GPVAE(nn.Module):
+ """model GPVAE with Gaussian Process prior
+
+ Parameters
+ ----------
+ input_dim : int,
+ the feature dimension of the input
+
+ time_length : int,
+ the length of each time series
+
+ latent_dim : int,
+ the feature dimension of the latent embedding
+
+ encoder_sizes : tuple,
+ the tuple of the network size in encoder
+
+ decoder_sizes : tuple,
+ the tuple of the network size in decoder
+
+ beta : float,
+ the weight of the KL divergence
+
+ M : int,
+ the number of Monte Carlo samples for ELBO estimation
+
+ K : int,
+ the number of importance weights for IWAE model
+
+ kernel : str,
+ the Gaussian Process kernel ["cauchy", "diffusion", "rbf", "matern"]
+
+ sigma : float,
+ the scale parameter for a kernel function
+
+ length_scale : float,
+ the length scale parameter for a kernel function
+
+ kernel_scales : int,
+ the number of different length scales over latent space dimensions
+ """
+
+ def __init__(
+ self,
+ input_dim,
+ time_length,
+ latent_dim,
+ encoder_sizes=(64, 64),
+ decoder_sizes=(64, 64),
+ beta=1,
+ M=1,
+ K=1,
+ kernel="cauchy",
+ sigma=1.0,
+ length_scale=7.0,
+ kernel_scales=1,
+ window_size=24,
+ ):
+ super().__init__()
+ self.kernel = kernel
+ self.sigma = sigma
+ self.length_scale = length_scale
+ self.kernel_scales = kernel_scales
+
+ self.input_dim = input_dim
+ self.time_length = time_length
+ self.latent_dim = latent_dim
+ self.beta = beta
+ self.encoder = Encoder(input_dim, latent_dim, encoder_sizes, window_size)
+ self.decoder = Decoder(latent_dim, input_dim, decoder_sizes)
+ self.M = M
+ self.K = K
+
+ # Precomputed KL components for efficiency
+ self.prior = self._init_prior()
+ # self.pz_scale_inv = None
+ # self.pz_scale_log_abs_determinant = None
+
+ def encode(self, x):
+ return self.encoder(x)
+
+ def decode(self, z):
+ if not torch.is_tensor(z):
+ z = torch.tensor(z).float()
+ num_dim = len(z.shape)
+ assert num_dim > 2
+ return self.decoder(torch.transpose(z, num_dim - 1, num_dim - 2))
+
+ def forward(self, inputs, training=True):
+ x = inputs["X"]
+ m_mask = inputs["missing_mask"]
+ x = x.repeat(self.M * self.K, 1, 1)
+ if m_mask is not None:
+ m_mask = m_mask.repeat(self.M * self.K, 1, 1)
+ m_mask = m_mask.type(torch.bool)
+
+ # pz = self.prior()
+ qz_x = self.encode(x)
+ z = qz_x.rsample()
+ px_z = self.decode(z)
+
+ nll = -px_z.log_prob(x)
+ nll = torch.where(torch.isfinite(nll), nll, torch.zeros_like(nll))
+ if m_mask is not None:
+ nll = torch.where(m_mask, nll, torch.zeros_like(nll))
+ nll = nll.sum(dim=(1, 2))
+
+ if self.K > 1:
+ kl = qz_x.log_prob(z) - self.prior.log_prob(z)
+ kl = torch.where(torch.isfinite(kl), kl, torch.zeros_like(kl))
+ kl = kl.sum(1)
+
+ weights = -nll - kl
+ weights = torch.reshape(weights, [self.M, self.K, -1])
+
+ elbo = torch.logsumexp(weights, dim=1)
+ elbo = elbo.mean()
+ else:
+ kl = self.kl_divergence(qz_x, self.prior)
+ kl = torch.where(torch.isfinite(kl), kl, torch.zeros_like(kl))
+ kl = kl.sum(1)
+
+ elbo = -nll - self.beta * kl
+ elbo = elbo.mean()
+
+ imputed_data = self.decode(self.encode(x).mean).mean * ~m_mask + x * m_mask
+
+ if not training:
+ # if not in training mode, return the classification result only
+ return {
+ "imputed_data": imputed_data,
+ }
+
+ results = {
+ "loss": -elbo.mean(),
+ "imputed_data": imputed_data,
+ }
+ return results
+
+ @staticmethod
+ def kl_divergence(a, b):
+ # TODO: different from the author's implementation
+ return torch.distributions.kl.kl_divergence(a, b)
+
+ def _init_prior(self):
+ # Compute kernel matrices for each latent dimension
+ kernel_matrices = []
+ for i in range(self.kernel_scales):
+ if self.kernel == "rbf":
+ kernel_matrices.append(
+ rbf_kernel(self.time_length, self.length_scale / 2**i)
+ )
+ elif self.kernel == "diffusion":
+ kernel_matrices.append(
+ diffusion_kernel(self.time_length, self.length_scale / 2**i)
+ )
+ elif self.kernel == "matern":
+ kernel_matrices.append(
+ matern_kernel(self.time_length, self.length_scale / 2**i)
+ )
+ elif self.kernel == "cauchy":
+ kernel_matrices.append(
+ cauchy_kernel(
+ self.time_length, self.sigma, self.length_scale / 2**i
+ )
+ )
+
+ # Combine kernel matrices for each latent dimension
+ tiled_matrices = []
+ total = 0
+ for i in range(self.kernel_scales):
+ if i == self.kernel_scales - 1:
+ multiplier = self.latent_dim - total
+ else:
+ multiplier = int(np.ceil(self.latent_dim / self.kernel_scales))
+ total += multiplier
+ tiled_matrices.append(
+ torch.unsqueeze(kernel_matrices[i], 0).repeat(multiplier, 1, 1)
+ )
+ kernel_matrix_tiled = torch.cat(tiled_matrices)
+ assert len(kernel_matrix_tiled) == self.latent_dim
+ prior = torch.distributions.MultivariateNormal(
+ loc=torch.zeros(self.latent_dim, self.time_length),
+ covariance_matrix=kernel_matrix_tiled,
+ )
+
+ return prior
+
+
+class GPVAE(BaseNNImputer):
+ """The PyTorch implementation of the GPVAE model :cite:``.
+
+ Parameters
+ ----------
+ beta:
+ The weight of KL divergence in EBLO.
+
+ kernel:
+ The type of kernel function chosen in the Gaussain Process Proir. ["cauchy", "diffusion", "rbf", "matern"]
+
+ batch_size :
+ The batch size for training and evaluating the model.
+
+ epochs :
+ The number of epochs for training the model.
+
+ patience :
+ The patience for the early-stopping mechanism. Given a positive integer, the training process will be
+ stopped when the model does not perform better after that number of epochs.
+ Leaving it default as None will disable the early-stopping.
+
+ optimizer :
+ The optimizer for model training.
+ If not given, will use a default Adam optimizer.
+
+ num_workers :
+ The number of subprocesses to use for data loading.
+ `0` means data loading will be in the main process, i.e. there won't be subprocesses.
+
+ device :
+ The device for the model to run on. It can be a string, a :class:`torch.device` object, or a list of them.
+ If not given, will try to use CUDA devices first (will use the default CUDA device if there are multiple),
+ then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+ If given a list of devices, e.g. ['cuda:0', 'cuda:1'], or [torch.device('cuda:0'), torch.device('cuda:1')] , the
+ model will be parallely trained on the multiple devices (so far only support parallel training on CUDA devices).
+ Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+
+ saving_path :
+ The path for automatically saving model checkpoints and tensorboard files (i.e. loss values recorded during
+ training into a tensorboard file). Will not save if not given.
+
+ model_saving_strategy :
+ The strategy to save model checkpoints. It has to be one of [None, "best", "better"].
+ No model will be saved when it is set as None.
+ The "best" strategy will only automatically save the best model after the training finished.
+ The "better" strategy will automatically save the model during training whenever the model performs
+ better than in previous epochs.
+
+ Attributes
+ ----------
+ model : :class:`torch.nn.Module`
+ The underlying GPVAE model.
+
+ optimizer : :class:`pypots.optim.Optimizer`
+ The optimizer for model training.
+
+ """
+
+ def __init__(
+ self,
+ n_steps: int,
+ n_features: int,
+ latent_size: int,
+ encoder_sizes: tuple = (64, 64),
+ decoder_sizes: tuple = (64, 64),
+ kernel: str = "cauchy",
+ beta: float = 0.2,
+ M: int = 1,
+ K: int = 1,
+ sigma: float = 1.0,
+ length_scale: float = 7.0,
+ kernel_scales: int = 1,
+ window_size: int = 3,
+ batch_size: int = 32,
+ epochs: int = 100,
+ patience: int = None,
+ optimizer: Optional[Optimizer] = Adam(),
+ num_workers: int = 0,
+ device: Optional[Union[str, torch.device, list]] = None,
+ saving_path: str = None,
+ model_saving_strategy: Optional[str] = "best",
+ ):
+ super().__init__(
+ batch_size,
+ epochs,
+ patience,
+ num_workers,
+ device,
+ saving_path,
+ model_saving_strategy,
+ )
+
+ self.n_steps = n_steps
+ self.n_features = n_features
+ self.latent_size = latent_size
+ self.kernel = kernel
+ self.encoder_sizes = encoder_sizes
+ self.decoder_sizes = decoder_sizes
+ self.beta = beta
+ self.M = M
+ self.K = K
+ self.sigma = sigma
+ self.length_scale = length_scale
+ self.kernel_scales = kernel_scales
+
+ # set up the model
+ self.model = _GPVAE(
+ input_dim=self.n_features,
+ time_length=self.n_steps,
+ latent_dim=self.latent_size,
+ kernel=self.kernel,
+ encoder_sizes=self.encoder_sizes,
+ decoder_sizes=self.decoder_sizes,
+ beta=self.beta,
+ M=self.M,
+ K=self.K,
+ sigma=self.sigma,
+ length_scale=self.length_scale,
+ kernel_scales=self.kernel_scales,
+ window_size=window_size,
+ )
+ self._send_model_to_given_device()
+ self._print_model_size()
+
+ # set up the optimizer
+ self.optimizer = optimizer
+ self.optimizer.init_optimizer(self.model.parameters())
+
+ def _assemble_input_for_training(self, data: list) -> dict:
+ # fetch data
+ (
+ indices,
+ X,
+ missing_mask,
+ ) = self._send_data_to_given_device(data)
+
+ # assemble input data
+ inputs = {
+ "indices": indices,
+ "X": X,
+ "missing_mask": missing_mask,
+ }
+
+ return inputs
+
+ def _assemble_input_for_validating(self, data: list) -> dict:
+ return self._assemble_input_for_training(data)
+
+ def _assemble_input_for_testing(self, data: list) -> dict:
+ return self._assemble_input_for_validating(data)
+
+ def fit(
+ self,
+ train_set: Union[dict, str],
+ val_set: Optional[Union[dict, str]] = None,
+ file_type: str = "h5py",
+ ) -> None:
+ # Step 1: wrap the input data with classes Dataset and DataLoader
+ training_set = DatasetForGPVAE(
+ train_set, return_labels=False, file_type=file_type
+ )
+ training_loader = DataLoader(
+ training_set,
+ batch_size=self.batch_size,
+ shuffle=True,
+ num_workers=self.num_workers,
+ )
+ val_loader = None
+ if val_set is not None:
+ if isinstance(val_set, str):
+ with h5py.File(val_set, "r") as hf:
+ # Here we read the whole validation set from the file to mask a portion for validation.
+ # In PyPOTS, using a file usually because the data is too big. However, the validation set is
+ # generally shouldn't be too large. For example, we have 1 billion samples for model training.
+ # We won't take 20% of them as the validation set because we want as much as possible data for the
+ # training stage to enhance the model's generalization ability. Therefore, 100,000 representative
+ # samples will be enough to validate the model.
+ val_set = {
+ "X": hf["X"][:],
+ "X_intact": hf["X_intact"][:],
+ "indicating_mask": hf["indicating_mask"][:],
+ }
+ val_set = DatasetForGPVAE(val_set, return_labels=False, file_type=file_type)
+ val_loader = DataLoader(
+ val_set,
+ batch_size=self.batch_size,
+ shuffle=False,
+ num_workers=self.num_workers,
+ )
+
+ # Step 2: train the model and freeze it
+ self._train_model(training_loader, val_loader)
+ self.model.load_state_dict(self.best_model_dict)
+ self.model.eval() # set the model as eval status to freeze it.
+
+ # Step 3: save the model if necessary
+ self._auto_save_model_if_necessary(training_finished=True)
+
+ def impute(
+ self,
+ X: Union[dict, str],
+ file_type="h5py",
+ ) -> np.ndarray:
+ self.model.eval() # set the model as eval status to freeze it.
+ test_set = DatasetForGPVAE(X, return_labels=False, file_type=file_type)
+ test_loader = DataLoader(
+ test_set,
+ batch_size=self.batch_size,
+ shuffle=False,
+ num_workers=self.num_workers,
+ )
+ imputation_collector = []
+
+ with torch.no_grad():
+ for idx, data in enumerate(test_loader):
+ inputs = self._assemble_input_for_testing(data)
+ results = self.model.forward(inputs, training=False)
+ imputed_data = results["imputed_data"]
+ imputation_collector.append(imputed_data)
+
+ imputation_collector = torch.cat(imputation_collector)
+ return imputation_collector.cpu().detach().numpy()
diff --git a/pypots/imputation/gpvae/modules.py b/pypots/imputation/gpvae/modules.py
new file mode 100644
index 00000000..5ad81e09
--- /dev/null
+++ b/pypots/imputation/gpvae/modules.py
@@ -0,0 +1,261 @@
+"""
+The implementation of GP-VAE for the partially-observed time-series imputation task.
+
+Refer to the paper Fortuin V, Baranchuk D, Rätsch G, et al.
+GP-VAE: Deep probabilistic time series imputation. AISTATS. PMLR, 2020: 1651-1661.
+
+
+"""
+
+# Created by Jun Wang and Wenjie Du
+# License: GPL-v3
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def rbf_kernel(T, length_scale):
+ xs = torch.arange(T).float()
+ xs_in = torch.unsqueeze(xs, 0)
+ xs_out = torch.unsqueeze(xs, 1)
+ distance_matrix = (xs_in - xs_out) ** 2
+ distance_matrix_scaled = distance_matrix / length_scale**2
+ kernel_matrix = torch.exp(-distance_matrix_scaled)
+ return kernel_matrix
+
+
+def diffusion_kernel(T, length_scale):
+ assert length_scale < 0.5, (
+ "length_scale has to be smaller than 0.5 for the "
+ "kernel matrix to be diagonally dominant"
+ )
+ sigmas = torch.ones(T, T) * length_scale
+ sigmas_tridiag = torch.diagonal(sigmas, offset=0, dim1=-2, dim2=-1)
+ sigmas_tridiag += torch.diagonal(sigmas, offset=1, dim1=-2, dim2=-1)
+ sigmas_tridiag += torch.diagonal(sigmas, offset=-1, dim1=-2, dim2=-1)
+ kernel_matrix = sigmas_tridiag + torch.eye(T) * (1.0 - length_scale)
+ return kernel_matrix
+
+
+def matern_kernel(T, length_scale):
+ xs = torch.arange(T).float()
+ xs_in = torch.unsqueeze(xs, 0)
+ xs_out = torch.unsqueeze(xs, 1)
+ distance_matrix = torch.abs(xs_in - xs_out)
+ distance_matrix_scaled = distance_matrix / torch.sqrt(length_scale).type(
+ torch.float32
+ )
+ kernel_matrix = torch.exp(-distance_matrix_scaled)
+ return kernel_matrix
+
+
+def cauchy_kernel(T, sigma, length_scale):
+ xs = torch.arange(T).float()
+ xs_in = torch.unsqueeze(xs, 0)
+ xs_out = torch.unsqueeze(xs, 1)
+ distance_matrix = (xs_in - xs_out) ** 2
+ distance_matrix_scaled = distance_matrix / length_scale**2
+ kernel_matrix = sigma / (distance_matrix_scaled + 1.0)
+
+ alpha = 0.001
+ eye = torch.eye(kernel_matrix.shape[-1])
+ return kernel_matrix + alpha * eye
+
+
+def make_nn(input_size, output_size, hidden_sizes):
+ """This function used to creates fully connected neural network.
+
+ Parameters
+ ----------
+ input_size : int,
+ the dimension of input embeddings
+
+ output_size : int,
+ the dimension of out embeddings
+
+ hidden_sizes : tuple,
+ the tuple of hidden layer sizes, and the tuple length sets the number of hidden layers
+
+ Returns
+ -------
+ output: tensor
+ the processing embeddings
+ """
+ layers = []
+ for i in range(len(hidden_sizes)):
+ if i == 0:
+ layers.append(
+ nn.Linear(in_features=input_size, out_features=hidden_sizes[i])
+ )
+ else:
+ layers.append(
+ nn.Linear(in_features=hidden_sizes[i - 1], out_features=hidden_sizes[i])
+ )
+ layers.append(nn.ReLU())
+ layers.append(nn.Linear(in_features=hidden_sizes[-1], out_features=output_size))
+ return nn.Sequential(*layers)
+
+
+class CustomConv1d(torch.nn.Conv1d):
+ def __init(self, in_channels, out_channels, kernel_size, padding):
+ super().__init__(in_channels, out_channels, kernel_size, padding)
+
+ def forward(self, x):
+ if len(x.shape) > 2:
+ shape = list(np.arange(len(x.shape)))
+ new_shape = [0, shape[-1]] + shape[1:-1]
+ out = super(CustomConv1d, self).forward(x.permute(*new_shape))
+ shape = list(np.arange(len(out.shape)))
+ new_shape = [0, shape[-1]] + shape[1:-1]
+ if self.kernel_size[0] % 2 == 0:
+ out = F.pad(out, (0, -1), "constant", 0)
+ return out.permute(new_shape)
+
+ return super(CustomConv1d, self).forward(x)
+
+
+def make_cnn(input_size, output_size, hidden_sizes, kernel_size=3):
+ """This function used to construct neural network consisting of
+ one 1d-convolutional layer that utilizes temporal dependencies,
+ fully connected network
+
+ Parameters
+ ----------
+ input_size : int,
+ the dimension of input embeddings
+
+ output_size : int,
+ the dimension of out embeddings
+
+ hidden_sizes : tuple,
+ the tuple of hidden layer sizes, and the tuple length sets the number of hidden layers,
+
+ kernel_size : int
+ kernel size for convolutional layer
+
+ Returns
+ -------
+ output: tensor
+ the processing embeddings
+ """
+ padding = kernel_size // 2
+
+ cnn_layer = CustomConv1d(
+ input_size, hidden_sizes[0], kernel_size=kernel_size, padding=padding
+ )
+ layers = [cnn_layer]
+
+ for i, h in zip(hidden_sizes, hidden_sizes[1:]):
+ layers.extend([nn.Linear(i, h), nn.ReLU()])
+ if isinstance(output_size, tuple):
+ net = nn.Sequential(*layers)
+ return [net] + [nn.Linear(hidden_sizes[-1], o) for o in output_size]
+
+ layers.append(nn.Linear(hidden_sizes[-1], output_size))
+ return nn.Sequential(*layers)
+
+
+class Encoder(nn.Module):
+ def __init__(self, input_size, z_size, hidden_sizes=(128, 128), window_size=24):
+ """This module is an encoder with 1d-convolutional network and multivariate Normal posterior used by GP-VAE with
+ proposed banded covariance matrix
+
+ Parameters
+ ----------
+ input_size : int,
+ the feature dimension of the input
+
+ z_size : int,
+ the feature dimension of the output latent embedding
+
+ hidden_sizes : tuple,
+ the tuple of the hidden layer sizes, and the tuple length sets the number of hidden layers
+
+ window_size : int
+ the kernel size for the Conv1D layer
+ """
+ super().__init__()
+ self.z_size = int(z_size)
+ self.input_size = input_size
+ self.net, self.mu_layer, self.logvar_layer = make_cnn(
+ input_size, (z_size, z_size * 2), hidden_sizes, window_size
+ )
+
+ def forward(self, x):
+ mapped = self.net(x)
+ batch_size = mapped.size(0)
+ time_length = mapped.size(1)
+
+ num_dim = len(mapped.shape)
+ mu = self.mu_layer(mapped)
+ logvar = self.logvar_layer(mapped)
+ mapped_mean = torch.transpose(mu, num_dim - 1, num_dim - 2)
+ mapped_covar = torch.transpose(logvar, num_dim - 1, num_dim - 2)
+ mapped_covar = torch.sigmoid(mapped_covar)
+ mapped_reshaped = mapped_covar.reshape(batch_size, self.z_size, 2 * time_length)
+
+ dense_shape = [batch_size, self.z_size, time_length, time_length]
+ idxs_1 = np.repeat(np.arange(batch_size), self.z_size * (2 * time_length - 1))
+ idxs_2 = np.tile(
+ np.repeat(np.arange(self.z_size), (2 * time_length - 1)), batch_size
+ )
+ idxs_3 = np.tile(
+ np.concatenate([np.arange(time_length), np.arange(time_length - 1)]),
+ batch_size * self.z_size,
+ )
+ idxs_4 = np.tile(
+ np.concatenate([np.arange(time_length), np.arange(1, time_length)]),
+ batch_size * self.z_size,
+ )
+ idxs_all = np.stack([idxs_1, idxs_2, idxs_3, idxs_4], axis=1)
+
+ mapped_values = mapped_reshaped[:, :, :-1].reshape(-1)
+ prec_sparse = torch.sparse_coo_tensor(
+ torch.LongTensor(idxs_all).t().to(mapped.device),
+ (mapped_values).to(mapped.device),
+ (dense_shape),
+ )
+ prec_sparse = prec_sparse.coalesce()
+ prec_tril = prec_sparse.to_dense()
+ eye = (
+ torch.eye(prec_tril.shape[-1])
+ .unsqueeze(0)
+ .repeat(prec_tril.shape[0], prec_tril.shape[1], 1, 1)
+ .to(mapped.device)
+ )
+ prec_tril = prec_tril + eye
+ cov_tril = torch.linalg.solve_triangular(prec_tril, eye, upper=True)
+ cov_tril = torch.where(
+ torch.isfinite(cov_tril), cov_tril, torch.zeros_like(cov_tril)
+ ).to(mapped.device)
+
+ num_dim = len(cov_tril.shape)
+ cov_tril_lower = torch.transpose(cov_tril, num_dim - 1, num_dim - 2)
+
+ z_dist = torch.distributions.MultivariateNormal(
+ loc=mapped_mean, scale_tril=cov_tril_lower
+ )
+ return z_dist
+
+
+class Decoder(nn.Module):
+ def __init__(self, input_size, output_size, hidden_sizes=(256, 256)):
+ """This module is a decoder with Gaussian output distribution.
+
+ Parameters
+ ----------
+ output_size : int,
+ the feature dimension of the output
+
+ hidden_sizes: tuple
+ the tuple of hidden layer sizes, and the tuple length sets the number of hidden layers.
+ """
+ super().__init__()
+ self.net = make_nn(input_size, output_size, hidden_sizes)
+
+ def forward(self, x):
+ mu = self.net(x)
+ var = torch.ones_like(mu)
+ return torch.distributions.Normal(mu, var)
diff --git a/pypots/imputation/mrnn/module.py b/pypots/imputation/mrnn/module.py
index 873d2d73..a143d121 100644
--- a/pypots/imputation/mrnn/module.py
+++ b/pypots/imputation/mrnn/module.py
@@ -18,7 +18,7 @@
class FCN_Regression(nn.Module):
def __init__(self, feature_num, rnn_hid_size):
- super(FCN_Regression, self).__init__()
+ super().__init__()
self.feat_reg = FeatureRegression(rnn_hid_size * 2)
self.U = Parameter(torch.Tensor(feature_num, feature_num))
self.V1 = Parameter(torch.Tensor(feature_num, feature_num))
diff --git a/pypots/imputation/saits/data.py b/pypots/imputation/saits/data.py
index 2fb80bc3..5ff679a5 100644
--- a/pypots/imputation/saits/data.py
+++ b/pypots/imputation/saits/data.py
@@ -88,15 +88,15 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
indicating_mask : tensor.
The mask indicates artificially missing values in X.
"""
- X = self.X[idx]
+ X = self.X[idx].to(torch.float32)
X_intact, X, missing_mask, indicating_mask = mcar(X, rate=self.rate)
sample = [
torch.tensor(idx),
- X_intact.to(torch.float32),
- X.to(torch.float32),
- missing_mask.to(torch.float32),
- indicating_mask.to(torch.float32),
+ X_intact,
+ X,
+ missing_mask,
+ indicating_mask,
]
if self.y is not None and self.return_labels:
@@ -122,15 +122,15 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()
- X = torch.from_numpy(self.file_handle["X"][idx])
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
X_intact, X, missing_mask, indicating_mask = mcar(X, rate=self.rate)
sample = [
torch.tensor(idx),
- X_intact.to(torch.float32),
- X.to(torch.float32),
- missing_mask.to(torch.float32),
- indicating_mask.to(torch.float32),
+ X_intact,
+ X,
+ missing_mask,
+ indicating_mask,
]
# if the dataset has labels and is for training, then fetch it from the file
diff --git a/pypots/imputation/template/dataset.py b/pypots/imputation/template/data.py
similarity index 100%
rename from pypots/imputation/template/dataset.py
rename to pypots/imputation/template/data.py
diff --git a/pypots/imputation/usgan/__init__.py b/pypots/imputation/usgan/__init__.py
new file mode 100644
index 00000000..fb388d94
--- /dev/null
+++ b/pypots/imputation/usgan/__init__.py
@@ -0,0 +1,12 @@
+"""
+The package of the partially-observed time-series imputation method USGAN.
+"""
+
+# Created by Jun Wang
+# License: GLP-v3
+
+from .model import USGAN
+
+__all__ = [
+ "USGAN",
+]
diff --git a/pypots/imputation/usgan/data.py b/pypots/imputation/usgan/data.py
new file mode 100644
index 00000000..bd012c30
--- /dev/null
+++ b/pypots/imputation/usgan/data.py
@@ -0,0 +1,46 @@
+"""
+Dataset class for model USGAN.
+"""
+
+# Created by Jun Wang and Wenjie Du
+# License: GLP-v3
+
+from typing import Union
+
+from ..brits.data import DatasetForBRITS
+
+
+class DatasetForUSGAN(DatasetForBRITS):
+ """Dataset class for USGAN, the same with the one for BRITS.
+
+ Parameters
+ ----------
+ data : dict or str,
+ The dataset for model input, should be a dictionary including keys as 'X' and 'y',
+ or a path string locating a data file.
+ If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
+ which is time-series data for input, can contain missing values, and y should be array-like of shape
+ [n_samples], which is classification labels of X.
+ If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+ key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
+
+ return_labels : bool, default = True,
+ Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
+ during training of classification models, the Dataset class will return labels in __getitem__() for model input.
+ Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
+ need the defined Dataset class for all training/validating/testing stages. For those big datasets stored in h5
+ files, they already have both X and y saved. But we don't read labels from the file for validating and testing
+ with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
+ distinction.
+
+ file_type : str, default = "h5py"
+ The type of the given file if train_set and val_set are path strings.
+ """
+
+ def __init__(
+ self,
+ data: Union[dict, str],
+ return_labels: bool = True,
+ file_type: str = "h5py",
+ ):
+ super().__init__(data, return_labels, file_type)
diff --git a/pypots/imputation/usgan/model.py b/pypots/imputation/usgan/model.py
new file mode 100644
index 00000000..c171d810
--- /dev/null
+++ b/pypots/imputation/usgan/model.py
@@ -0,0 +1,539 @@
+"""
+The implementation of USGAN for the partially-observed time-series imputation task.
+
+Refer to the paper "Miao, X., Wu, Y., Wang, J., Gao, Y., Mao, X., & Yin, J. (2021).
+Generative Semi-supervised Learning for Multivariate Time Series Imputation. AAAI 2021."
+
+"""
+
+# Created by Jun Wang and Wenjie Du
+# License: GPL-v3
+
+from typing import Union, Optional
+
+import h5py
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+from .data import DatasetForUSGAN
+from ..base import BaseNNImputer
+from ..brits.model import _BRITS
+from ...optim.adam import Adam
+from ...optim.base import Optimizer
+from ...utils.logging import logger
+
+
+class Discriminator(nn.Module):
+ """model Discriminator: built on BiRNN
+
+ Parameters
+ ----------
+ n_features :
+ the feature dimension of the input
+
+ rnn_hidden_size :
+ the hidden size of the RNN cell
+
+ hint_rate :
+ the hint rate for the input imputed_data
+
+ dropout_rate :
+ the dropout rate for the output layer
+
+ device :
+ specify running the model on which device, CPU/GPU
+
+ """
+
+ def __init__(
+ self,
+ n_features: int,
+ rnn_hidden_size: int,
+ hint_rate: float = 0.7,
+ dropout_rate: float = 0.0,
+ device: Union[str, torch.device] = "cpu",
+ ):
+ super().__init__()
+ self.hint_rate = hint_rate
+ self.device = device
+ self.biRNN = nn.GRU(
+ n_features * 2, rnn_hidden_size, bidirectional=True, batch_first=True
+ ).to(device)
+ self.dropout = nn.Dropout(dropout_rate).to(device)
+ self.read_out = nn.Linear(rnn_hidden_size * 2, n_features).to(device)
+
+ def forward(
+ self,
+ imputed_X: torch.Tensor,
+ missing_mask: torch.Tensor,
+ ) -> torch.Tensor:
+ """Forward processing of USGAN Discriminator.
+
+ Parameters
+ ----------
+ imputed_X : torch.Tensor,
+ The original X with missing parts already imputed.
+
+ missing_mask : torch.Tensor,
+ The missing mask of X.
+
+ Returns
+ -------
+ logits : torch.Tensor,
+ the logits of the probability of being the true value.
+
+ """
+
+ hint = (
+ torch.rand_like(missing_mask, dtype=torch.float, device=self.device)
+ < self.hint_rate
+ )
+ hint = hint.int()
+ h = hint * missing_mask + (1 - hint) * 0.5
+ x_in = torch.cat([imputed_X, h], dim=-1)
+
+ out, _ = self.biRNN(x_in)
+ logits = self.read_out(self.dropout(out))
+ return logits
+
+
+class _USGAN(nn.Module):
+ """model USGAN:
+ USGAN consists of a generator, a discriminator, which are all built on bidirectional recurrent neural networks.
+
+ Attributes
+ ----------
+ n_steps :
+ sequence length (number of time steps)
+
+ n_features :
+ number of features (input dimensions)
+
+ rnn_hidden_size :
+ the hidden size of the RNN cell
+
+ lambda_mse :
+ the weigth of the reconstruction loss
+
+ hint_rate :
+ the hint rate for the discriminator
+
+ dropout_rate :
+ the dropout rate for the last layer in Discriminator
+
+ device :
+ specify running the model on which device, CPU/GPU
+
+ """
+
+ def __init__(
+ self,
+ n_steps: int,
+ n_features: int,
+ rnn_hidden_size: int,
+ lambda_mse: float,
+ hint_rate: float = 0.7,
+ dropout_rate: float = 0.0,
+ device: Union[str, torch.device] = "cpu",
+ ):
+ super().__init__()
+ self.generator = _BRITS(n_steps, n_features, rnn_hidden_size, device)
+ self.discriminator = Discriminator(
+ n_features,
+ rnn_hidden_size,
+ hint_rate=hint_rate,
+ dropout_rate=dropout_rate,
+ device=device,
+ )
+
+ self.lambda_mse = lambda_mse
+ self.device = device
+
+ def forward(
+ self,
+ inputs: dict,
+ training_object: str = "generator",
+ training: bool = True,
+ ) -> dict:
+ assert training_object in [
+ "generator",
+ "discriminator",
+ ], 'training_object should be "generator" or "discriminator"'
+
+ forward_X = inputs["forward"]["X"]
+ forward_missing_mask = inputs["forward"]["missing_mask"]
+ losses = {}
+ results = self.generator(inputs, training=training)
+ inputs["discrimination"] = self.discriminator(forward_X, forward_missing_mask)
+ if not training:
+ # if only run imputation operation, then no need to calculate loss
+ return results
+
+ if training_object == "discriminator":
+ l_D = F.binary_cross_entropy_with_logits(
+ inputs["discrimination"], forward_missing_mask
+ )
+ losses["discrimination_loss"] = l_D
+ else:
+ inputs["discrimination"] = inputs["discrimination"].detach()
+ l_G = F.binary_cross_entropy_with_logits(
+ inputs["discrimination"],
+ 1 - forward_missing_mask,
+ weight=1 - forward_missing_mask,
+ )
+ loss_gene = l_G + self.lambda_mse * results["loss"]
+ losses["generation_loss"] = loss_gene
+
+ losses["imputed_data"] = results["imputed_data"]
+ return losses
+
+
+class USGAN(BaseNNImputer):
+ """The PyTorch implementation of the CRLI model :cite:`ma2021CRLI`.
+
+ Parameters
+ ----------
+ n_steps :
+ The number of time steps in the time-series data sample.
+
+ n_features :
+ The number of features in the time-series data sample.
+
+ rnn_hidden_size :
+ the hidden size of the RNN cell
+
+ lambda_mse :
+ the weight of the reconstruction loss
+
+ hint_rate :
+ the hint rate for the discriminator
+
+ dropout_rate :
+ the dropout rate for the last layer in Discriminator
+
+ G_steps :
+ The number of steps to train the generator in each iteration.
+
+ D_steps :
+ The number of steps to train the discriminator in each iteration.
+
+ batch_size :
+ The batch size for training and evaluating the model.
+
+ epochs :
+ The number of epochs for training the model.
+
+ patience :
+ The patience for the early-stopping mechanism. Given a positive integer, the training process will be
+ stopped when the model does not perform better after that number of epochs.
+ Leaving it default as None will disable the early-stopping.
+
+ G_optimizer :
+ The optimizer for the generator training.
+ If not given, will use a default Adam optimizer.
+
+ D_optimizer :
+ The optimizer for the discriminator training.
+ If not given, will use a default Adam optimizer.
+
+ num_workers :
+ The number of subprocesses to use for data loading.
+ `0` means data loading will be in the main process, i.e. there won't be subprocesses.
+
+ device :
+ The device for the model to run on. It can be a string, a :class:`torch.device` object, or a list of them.
+ If not given, will try to use CUDA devices first (will use the default CUDA device if there are multiple),
+ then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+ If given a list of devices, e.g. ['cuda:0', 'cuda:1'], or [torch.device('cuda:0'), torch.device('cuda:1')] , the
+ model will be parallely trained on the multiple devices (so far only support parallel training on CUDA devices).
+ Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+
+ saving_path :
+ The path for automatically saving model checkpoints and tensorboard files (i.e. loss values recorded during
+ training into a tensorboard file). Will not save if not given.
+
+ model_saving_strategy :
+ The strategy to save model checkpoints. It has to be one of [None, "best", "better"].
+ No model will be saved when it is set as None.
+ The "best" strategy will only automatically save the best model after the training finished.
+ The "better" strategy will automatically save the model during training whenever the model performs
+ better than in previous epochs.
+
+ Attributes
+ ----------
+ model : :class:`torch.nn.Module`
+ The underlying CRLI model.
+
+ optimizer : :class:`pypots.optim.Optimizer`
+ The optimizer for model training.
+
+ """
+
+ def __init__(
+ self,
+ n_steps: int,
+ n_features: int,
+ rnn_hidden_size: int,
+ lambda_mse: float = 1,
+ hint_rate: float = 0.7,
+ dropout_rate: float = 0.0,
+ G_steps: int = 1,
+ D_steps: int = 1,
+ batch_size: int = 32,
+ epochs: int = 100,
+ patience: Optional[int] = None,
+ G_optimizer: Optional[Optimizer] = Adam(),
+ D_optimizer: Optional[Optimizer] = Adam(),
+ num_workers: int = 0,
+ device: Optional[Union[str, torch.device, list]] = None,
+ saving_path: Optional[str] = None,
+ model_saving_strategy: Optional[str] = "best",
+ ):
+ super().__init__(
+ batch_size,
+ epochs,
+ patience,
+ num_workers,
+ device,
+ saving_path,
+ model_saving_strategy,
+ )
+ assert G_steps > 0 and D_steps > 0, "G_steps and D_steps should both >0"
+
+ self.n_steps = n_steps
+ self.n_features = n_features
+ self.G_steps = G_steps
+ self.D_steps = D_steps
+
+ # set up the model
+ self.model = _USGAN(
+ n_steps,
+ n_features,
+ rnn_hidden_size,
+ lambda_mse,
+ hint_rate,
+ dropout_rate,
+ self.device,
+ )
+ self._send_model_to_given_device()
+ self._print_model_size()
+
+ # set up the optimizer
+ self.G_optimizer = G_optimizer
+ self.G_optimizer.init_optimizer(self.model.generator.parameters())
+ self.D_optimizer = D_optimizer
+ self.D_optimizer.init_optimizer(self.model.discriminator.parameters())
+
+ def _assemble_input_for_training(self, data: list) -> dict:
+ # fetch data
+ (
+ indices,
+ X,
+ missing_mask,
+ deltas,
+ back_X,
+ back_missing_mask,
+ back_deltas,
+ ) = self._send_data_to_given_device(data)
+
+ # assemble input data
+ inputs = {
+ "indices": indices,
+ "forward": {
+ "X": X,
+ "missing_mask": missing_mask,
+ "deltas": deltas,
+ },
+ "backward": {
+ "X": back_X,
+ "missing_mask": back_missing_mask,
+ "deltas": back_deltas,
+ },
+ }
+
+ return inputs
+
+ def _assemble_input_for_validating(self, data: list) -> dict:
+ return self._assemble_input_for_training(data)
+
+ def _assemble_input_for_testing(self, data: list) -> dict:
+ return self._assemble_input_for_validating(data)
+
+ def _train_model(
+ self,
+ training_loader: DataLoader,
+ val_loader: DataLoader = None,
+ ) -> None:
+ # each training starts from the very beginning, so reset the loss and model dict here
+ self.best_loss = float("inf")
+ self.best_model_dict = None
+
+ try:
+ training_step = 0
+ epoch_train_loss_G_collector = []
+ epoch_train_loss_D_collector = []
+ for epoch in range(self.epochs):
+ self.model.train()
+ for idx, data in enumerate(training_loader):
+ training_step += 1
+ inputs = self._assemble_input_for_training(data)
+
+ step_train_loss_G_collector = []
+ step_train_loss_D_collector = []
+
+ if idx % self.G_steps == 0:
+ self.G_optimizer.zero_grad()
+ results = self.model.forward(
+ inputs, training_object="generator"
+ )
+ results["generation_loss"].backward()
+ self.G_optimizer.step()
+ step_train_loss_G_collector.append(
+ results["generation_loss"].item()
+ )
+
+ if idx % self.D_steps == 0:
+ self.D_optimizer.zero_grad()
+ results = self.model.forward(
+ inputs, training_object="discriminator"
+ )
+ results["discrimination_loss"].backward(retain_graph=True)
+ self.D_optimizer.step()
+ step_train_loss_D_collector.append(
+ results["discrimination_loss"].item()
+ )
+
+ mean_step_train_D_loss = np.mean(step_train_loss_D_collector)
+ mean_step_train_G_loss = np.mean(step_train_loss_G_collector)
+
+ epoch_train_loss_D_collector.append(mean_step_train_D_loss)
+ epoch_train_loss_G_collector.append(mean_step_train_G_loss)
+
+ # save training loss logs into the tensorboard file for every step if in need
+ # Note: the `training_step` is not the actual number of steps that Discriminator and Generator get
+ # trained, the actual number should be D_steps*training_step and G_steps*training_step accordingly
+ if self.summary_writer is not None:
+ loss_results = {
+ "generation_loss": mean_step_train_G_loss,
+ "discrimination_loss": mean_step_train_D_loss,
+ }
+ self._save_log_into_tb_file(
+ training_step, "training", loss_results
+ )
+ mean_epoch_train_D_loss = np.mean(epoch_train_loss_D_collector)
+ mean_epoch_train_G_loss = np.mean(epoch_train_loss_G_collector)
+ logger.info(
+ f"epoch {epoch}: "
+ f"training loss_generator {mean_epoch_train_G_loss:.4f}, "
+ f"train loss_discriminator {mean_epoch_train_D_loss:.4f}"
+ )
+ mean_loss = mean_epoch_train_G_loss
+
+ if mean_loss < self.best_loss:
+ self.best_loss = mean_loss
+ self.best_model_dict = self.model.state_dict()
+ self.patience = self.original_patience
+ # save the model if necessary
+ self._auto_save_model_if_necessary(
+ training_finished=False,
+ saving_name=f"{self.__class__.__name__}_epoch{epoch}_loss{mean_loss}",
+ )
+ else:
+ self.patience -= 1
+ if self.patience == 0:
+ logger.info(
+ "Exceeded the training patience. Terminating the training procedure..."
+ )
+ break
+ except Exception as e:
+ logger.error(f"Exception: {e}")
+ if self.best_model_dict is None:
+ raise RuntimeError(
+ "Training got interrupted. Model was not trained. Please investigate the error printed above."
+ )
+ else:
+ RuntimeWarning(
+ "Training got interrupted. Please investigate the error printed above.\n"
+ "Model got trained and will load the best checkpoint so far for testing.\n"
+ "If you don't want it, please try fit() again."
+ )
+
+ if np.equal(self.best_loss, float("inf")):
+ raise ValueError("Something is wrong. best_loss is Nan after training.")
+
+ logger.info("Finished training.")
+
+ def fit(
+ self,
+ train_set: Union[dict, str],
+ val_set: Optional[Union[dict, str]] = None,
+ file_type: str = "h5py",
+ ) -> None:
+ # Step 1: wrap the input data with classes Dataset and DataLoader
+ training_set = DatasetForUSGAN(
+ train_set, return_labels=False, file_type=file_type
+ )
+ training_loader = DataLoader(
+ training_set,
+ batch_size=self.batch_size,
+ shuffle=True,
+ num_workers=self.num_workers,
+ )
+ val_loader = None
+ if val_set is not None:
+ if isinstance(val_set, str):
+ with h5py.File(val_set, "r") as hf:
+ # Here we read the whole validation set from the file to mask a portion for validation.
+ # In PyPOTS, using a file usually because the data is too big. However, the validation set is
+ # generally shouldn't be too large. For example, we have 1 billion samples for model training.
+ # We won't take 20% of them as the validation set because we want as much as possible data for the
+ # training stage to enhance the model's generalization ability. Therefore, 100,000 representative
+ # samples will be enough to validate the model.
+ val_set = {
+ "X": hf["X"][:],
+ "X_intact": hf["X_intact"][:],
+ "indicating_mask": hf["indicating_mask"][:],
+ }
+ val_set = DatasetForUSGAN(val_set, return_labels=False, file_type=file_type)
+ val_loader = DataLoader(
+ val_set,
+ batch_size=self.batch_size,
+ shuffle=False,
+ num_workers=self.num_workers,
+ )
+
+ # Step 2: train the model and freeze it
+ self._train_model(training_loader, val_loader)
+ self.model.load_state_dict(self.best_model_dict)
+ self.model.eval() # set the model as eval status to freeze it.
+
+ # Step 3: save the model if necessary
+ self._auto_save_model_if_necessary(training_finished=True)
+
+ def impute(
+ self,
+ X: Union[dict, str],
+ file_type="h5py",
+ ) -> np.ndarray:
+ self.model.eval() # set the model as eval status to freeze it.
+ test_set = DatasetForUSGAN(X, return_labels=False, file_type=file_type)
+ test_loader = DataLoader(
+ test_set,
+ batch_size=self.batch_size,
+ shuffle=False,
+ num_workers=self.num_workers,
+ )
+ imputation_collector = []
+
+ with torch.no_grad():
+ for idx, data in enumerate(test_loader):
+ inputs = self._assemble_input_for_testing(data)
+ results = self.model.forward(inputs, training=False)
+ imputed_data = results["imputed_data"]
+ imputation_collector.append(imputed_data)
+
+ imputation_collector = torch.cat(imputation_collector)
+ return imputation_collector.cpu().detach().numpy()
diff --git a/pypots/utils/metrics.py b/pypots/utils/metrics.py
index 85efb54d..cc349b50 100644
--- a/pypots/utils/metrics.py
+++ b/pypots/utils/metrics.py
@@ -574,73 +574,90 @@ def cal_cluster_purity(
return cluster_purity
-def cal_silhouette(
- latent_rep: np.ndarray,
- class_predictions: np.ndarray
-) -> float:
+def cal_silhouette(X: np.ndarray, predicted_labels: np.ndarray) -> float:
"""Compute the mean Silhouette Coefficient of all samples.
Parameters
----------
- latent_rep :
- Latent representation learned by a clusterer.
+ X : array-like of shape (n_samples_a, n_features)
+ A feature array, or learned latent representation, that can be used for clustering.
- class_predictions :
- Clustering results returned by a clusterer.
+ predicted_labels : array-like of shape (n_samples)
+ Predicted labels for each sample.
Returns
-------
- silhouette :
+ silhouette_score : float
Mean Silhouette Coefficient for all samples.
"""
- silhouette = metrics.silhouette_score(latent_rep, class_predictions)
- return silhouette
+ silhouette_score = metrics.silhouette_score(X, predicted_labels)
+ return silhouette_score
-def cal_chs(
- latent_rep: np.ndarray,
- class_predictions: np.ndarray
-) -> float:
+def cal_chs(X: np.ndarray, predicted_labels: np.ndarray) -> float:
"""Compute the Calinski and Harabasz score (also known as the Variance Ratio Criterion).
- Parameters
- ----------
- latent_rep :
- Latent representation learned by a clusterer.
-
- class_predictions :
- Clustering results returned by a clusterer.
+ X : array-like of shape (n_samples_a, n_features)
+ A feature array, or learned latent representation, that can be used for clustering.
+ predicted_labels : array-like of shape (n_samples)
+ Predicted labels for each sample.
Returns
-------
- chs :
+ calinski_harabasz_score : float
The resulting Calinski-Harabasz score.
"""
- chs = metrics.calinski_harabasz_score(latent_rep, class_predictions)
- return chs
+ calinski_harabasz_score = metrics.calinski_harabasz_score(X, predicted_labels)
+ return calinski_harabasz_score
-def cal_dbs(
- latent_rep: np.ndarray,
- class_predictions: np.ndarray
-) -> float:
+def cal_dbs(X: np.ndarray, predicted_labels: np.ndarray) -> float:
"""Compute the Davies-Bouldin score.
Parameters
----------
- latent_rep :
- Latent representation learned by a clusterer.
+ X : array-like of shape (n_samples_a, n_features)
+ A feature array, or learned latent representation, that can be used for clustering.
- class_predictions :
- Clustering results returned by a clusterer.
+ predicted_labels : array-like of shape (n_samples)
+ Predicted labels for each sample.
Returns
-------
- dbs :
+ davies_bouldin_score : float
The resulting Davies-Bouldin score.
"""
- dbs = metrics.davies_bouldin_score(latent_rep, class_predictions)
- return dbs
+ davies_bouldin_score = metrics.davies_bouldin_score(X, predicted_labels)
+ return davies_bouldin_score
+
+
+def cal_internal_cluster_validation_metrics(X, predicted_labels):
+ """Computer all internal cluster validation metrics available in PyPOTS and return as a dictionary.
+
+ Parameters
+ ----------
+ X : array-like of shape (n_samples_a, n_features)
+ A feature array, or learned latent representation, that can be used for clustering.
+
+ predicted_labels : array-like of shape (n_samples)
+ Predicted labels for each sample.
+
+ Returns
+ -------
+ internal_cluster_validation_metrics : dict
+ A dictionary contains all internal cluster validation metrics available in PyPOTS.
+ """
+
+ silhouette_score = cal_silhouette(X, predicted_labels)
+ calinski_harabasz_score = cal_chs(X, predicted_labels)
+ davies_bouldin_score = cal_dbs(X, predicted_labels)
+
+ internal_cluster_validation_metrics = {
+ "silhouette_score": silhouette_score,
+ "calinski_harabasz_score": calinski_harabasz_score,
+ "davies_bouldin_score": davies_bouldin_score,
+ }
+ return internal_cluster_validation_metrics
diff --git a/tests/classification/__init__.py b/tests/classification/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/classification/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/classification/brits.py b/tests/classification/brits.py
new file mode 100644
index 00000000..b1905c39
--- /dev/null
+++ b/tests/classification/brits.py
@@ -0,0 +1,106 @@
+"""
+Test cases for BRITS classification model.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+import unittest
+
+import pytest
+
+from pypots.classification import BRITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_binary_classification_metrics
+from tests.classification.config import (
+ EPOCHS,
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_CLASSIFICATION,
+)
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestBRITS(unittest.TestCase):
+ logger.info("Running tests for a classification model BRITS...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS")
+ model_save_name = "saved_BRITS_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a BRITS model
+ brits = BRITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_classes=DATA["n_classes"],
+ rnn_hidden_size=256,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ model_saving_strategy="better",
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="classification-brits")
+ def test_0_fit(self):
+ self.brits.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="classification-brits")
+ def test_1_classify(self):
+ predictions = self.brits.classify(TEST_SET)
+ metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
+ logger.info(
+ f'ROC_AUC: {metrics["roc_auc"]}, \n'
+ f'PR_AUC: {metrics["pr_auc"]},\n'
+ f'F1: {metrics["f1"]},\n'
+ f'Precision: {metrics["precision"]},\n'
+ f'Recall: {metrics["recall"]},\n'
+ )
+ assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
+
+ @pytest.mark.xdist_group(name="classification-brits")
+ def test_2_parameters(self):
+ assert hasattr(self.brits, "model") and self.brits.model is not None
+
+ assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
+
+ assert hasattr(self.brits, "best_loss")
+ self.assertNotEqual(self.brits.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.brits, "best_model_dict")
+ and self.brits.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="classification-brits")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.brits)
+
+ # save the trained model into file, and check if the path exists
+ self.brits.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.brits.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/classification/config.py b/tests/classification/config.py
new file mode 100644
index 00000000..35b17029
--- /dev/null
+++ b/tests/classification/config.py
@@ -0,0 +1,21 @@
+"""
+Test configs for classification models.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+ DATA,
+ RESULT_SAVING_DIR,
+)
+
+EPOCHS = 5
+
+TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]}
+VAL_SET = {"X": DATA["val_X"], "y": DATA["val_y"]}
+TEST_SET = {"X": DATA["test_X"]}
+
+RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
diff --git a/tests/classification/grud.py b/tests/classification/grud.py
new file mode 100644
index 00000000..a662cb70
--- /dev/null
+++ b/tests/classification/grud.py
@@ -0,0 +1,105 @@
+"""
+Test cases for GRUD classification model.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+import unittest
+
+import pytest
+
+from pypots.classification import GRUD
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_binary_classification_metrics
+from tests.classification.config import (
+ EPOCHS,
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_CLASSIFICATION,
+)
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestGRUD(unittest.TestCase):
+ logger.info("Running tests for a classification model GRUD...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD")
+ model_save_name = "saved_GRUD_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a GRUD model
+ grud = GRUD(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_classes=DATA["n_classes"],
+ rnn_hidden_size=256,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="classification-grud")
+ def test_0_fit(self):
+ self.grud.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="classification-grud")
+ def test_1_classify(self):
+ predictions = self.grud.classify(TEST_SET)
+ metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
+ logger.info(
+ f'ROC_AUC: {metrics["roc_auc"]}, \n'
+ f'PR_AUC: {metrics["pr_auc"]},\n'
+ f'F1: {metrics["f1"]},\n'
+ f'Precision: {metrics["precision"]},\n'
+ f'Recall: {metrics["recall"]},\n'
+ )
+ assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
+
+ @pytest.mark.xdist_group(name="classification-grud")
+ def test_2_parameters(self):
+ assert hasattr(self.grud, "model") and self.grud.model is not None
+
+ assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None
+
+ assert hasattr(self.grud, "best_loss")
+ self.assertNotEqual(self.grud.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.grud, "best_model_dict")
+ and self.grud.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="classification-grud")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.grud)
+
+ # save the trained model into file, and check if the path exists
+ self.grud.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.grud.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/classification/raindrop.py b/tests/classification/raindrop.py
new file mode 100644
index 00000000..277164dc
--- /dev/null
+++ b/tests/classification/raindrop.py
@@ -0,0 +1,110 @@
+"""
+Test cases for Raindrop classification model.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+import unittest
+
+import pytest
+
+from pypots.classification import Raindrop
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_binary_classification_metrics
+from tests.classification.config import (
+ EPOCHS,
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_CLASSIFICATION,
+)
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestRaindrop(unittest.TestCase):
+ logger.info("Running tests for a classification model Raindrop...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop")
+ model_save_name = "saved_Raindrop_model.pypots"
+
+ # initialize a Raindrop model
+ raindrop = Raindrop(
+ DATA["n_steps"],
+ DATA["n_features"],
+ DATA["n_classes"],
+ n_layers=2,
+ d_model=DATA["n_features"] * 4,
+ d_inner=256,
+ n_heads=2,
+ dropout=0.3,
+ d_static=0,
+ aggregation="mean",
+ sensor_wise_mask=False,
+ static=False,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="classification-raindrop")
+ def test_0_fit(self):
+ self.raindrop.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="classification-raindrop")
+ def test_1_classify(self):
+ predictions = self.raindrop.classify(TEST_SET)
+ metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
+ logger.info(
+ f'ROC_AUC: {metrics["roc_auc"]}, \n'
+ f'PR_AUC: {metrics["pr_auc"]},\n'
+ f'F1: {metrics["f1"]},\n'
+ f'Precision: {metrics["precision"]},\n'
+ f'Recall: {metrics["recall"]},\n'
+ )
+ assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
+
+ @pytest.mark.xdist_group(name="classification-raindrop")
+ def test_2_parameters(self):
+ assert hasattr(self.raindrop, "model") and self.raindrop.model is not None
+
+ assert (
+ hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None
+ )
+
+ assert hasattr(self.raindrop, "best_loss")
+ self.assertNotEqual(self.raindrop.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.raindrop, "best_model_dict")
+ and self.raindrop.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="classification-raindrop")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.raindrop)
+
+ # save the trained model into file, and check if the path exists
+ self.raindrop.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.raindrop.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/cli/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/cli/config.py b/tests/cli/config.py
new file mode 100644
index 00000000..defdb211
--- /dev/null
+++ b/tests/cli/config.py
@@ -0,0 +1,11 @@
+"""
+Test configs for CLI tools.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+
+
+PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), "../../.."))
diff --git a/tests/cli/dev.py b/tests/cli/dev.py
new file mode 100644
index 00000000..4387be29
--- /dev/null
+++ b/tests/cli/dev.py
@@ -0,0 +1,92 @@
+"""
+Test cases for the functions and classes in package `pypots.cli.dev`.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+import threading
+import unittest
+from argparse import Namespace
+from copy import copy
+
+import pytest
+
+from pypots.cli.dev import dev_command_factory
+from tests.cli.config import PROJECT_ROOT_DIR
+
+
+def callback_func():
+ raise TimeoutError("Time out.")
+
+
+def time_out(interval, callback):
+ def decorator(func):
+ def wrapper(*args, **kwargs):
+ t = threading.Thread(target=func, args=args, kwargs=kwargs)
+ t.setDaemon(True)
+ t.start()
+ t.join(interval) # wait for interval seconds
+ if t.is_alive():
+ return threading.Timer(0, callback).start() # invoke callback()
+ else:
+ return
+
+ return wrapper
+
+ return decorator
+
+
+@pytest.mark.xfail(reason="Allow tests for CLI to fail")
+class TestPyPOTSCLIDev(unittest.TestCase):
+ # set up the default arguments
+ default_arguments = {
+ "build": False,
+ "cleanup": False,
+ "run_tests": False,
+ "k": None,
+ "show_coverage": False,
+ "lint_code": False,
+ }
+ # `pypots-cli dev` must run under the project root dir
+ os.chdir(PROJECT_ROOT_DIR)
+
+ @pytest.mark.xdist_group(name="cli-dev")
+ def test_0_build(self):
+ arguments = copy(self.default_arguments)
+ arguments["build"] = True
+ args = Namespace(**arguments)
+ dev_command_factory(args).run()
+
+ @pytest.mark.xdist_group(name="cli-dev")
+ def test_1_run_tests(self):
+ arguments = copy(self.default_arguments)
+ arguments["run_tests"] = True
+ arguments["k"] = "try_to_find_a_non_existing_test_case"
+ args = Namespace(**arguments)
+ try:
+ dev_command_factory(args).run()
+ except RuntimeError: # try to find a non-existing test case, so RuntimeError will be raised
+ pass
+ except Exception as e: # other exceptions will cause an error and result in failed testing
+ raise e
+
+ # Don't test --lint-code because Black will reformat the code and cause error when generating the coverage report
+ # @pytest.mark.xdist_group(name="cli-dev")
+ # def test_2_lint_code(self):
+ # arguments = copy(self.default_arguments)
+ # arguments["lint_code"] = True
+ # args = Namespace(**arguments)
+ # dev_command_factory(args).run()
+
+ @pytest.mark.xdist_group(name="cli-dev")
+ def test_3_cleanup(self):
+ arguments = copy(self.default_arguments)
+ arguments["cleanup"] = True
+ args = Namespace(**arguments)
+ dev_command_factory(args).run()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/cli/doc.py b/tests/cli/doc.py
new file mode 100644
index 00000000..85e4e190
--- /dev/null
+++ b/tests/cli/doc.py
@@ -0,0 +1,104 @@
+"""
+Test cases for the functions and classes in package `pypots.cli.doc`.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+import threading
+import unittest
+from argparse import Namespace
+from copy import copy
+
+import pytest
+
+from pypots.cli.doc import doc_command_factory
+from pypots.utils.logging import logger
+from tests.cli.config import PROJECT_ROOT_DIR
+
+
+def callback_func():
+ raise TimeoutError("Time out.")
+
+
+def time_out(interval, callback):
+ def decorator(func):
+ def wrapper(*args, **kwargs):
+ t = threading.Thread(target=func, args=args, kwargs=kwargs)
+ t.setDaemon(True)
+ t.start()
+ t.join(interval) # wait for interval seconds
+ if t.is_alive():
+ return threading.Timer(0, callback).start() # invoke callback()
+ else:
+ return
+
+ return wrapper
+
+ return decorator
+
+
+@pytest.mark.xfail(reason="Allow tests for CLI to fail")
+class TestPyPOTSCLIDoc(unittest.TestCase):
+ # set up the default arguments
+ default_arguments = {
+ "gene_rst": False,
+ "branch": "main",
+ "gene_html": False,
+ "view_doc": False,
+ "port": 9075,
+ "cleanup": False,
+ }
+ # `pypots-cli doc` must run under the project root dir
+ os.chdir(PROJECT_ROOT_DIR)
+
+ @pytest.mark.xdist_group(name="cli-doc")
+ def test_0_gene_rst(self):
+ arguments = copy(self.default_arguments)
+ arguments["gene_rst"] = True
+ args = Namespace(**arguments)
+ doc_command_factory(args).run()
+
+ logger.info("run again under a non-root dir")
+ try:
+ os.chdir(os.path.abspath(os.path.join(PROJECT_ROOT_DIR, "pypots")))
+ doc_command_factory(args).run()
+ except RuntimeError: # try to run under a non-root dir, so RuntimeError will be raised
+ pass
+ except Exception as e: # other exceptions will cause an error and result in failed testing
+ raise e
+ finally:
+ os.chdir(PROJECT_ROOT_DIR)
+
+ @pytest.mark.xdist_group(name="cli-doc")
+ def test_1_gene_html(self):
+ arguments = copy(self.default_arguments)
+ arguments["gene_html"] = True
+ args = Namespace(**arguments)
+ try:
+ doc_command_factory(args).run()
+ except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
+ logger.error(e)
+
+ @pytest.mark.xdist_group(name="cli-doc")
+ @time_out(2, callback_func) # wait for two seconds
+ def test_2_view_doc(self):
+ arguments = copy(self.default_arguments)
+ arguments["view_doc"] = True
+ args = Namespace(**arguments)
+ try:
+ doc_command_factory(args).run()
+ except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
+ logger.error(e)
+
+ @pytest.mark.xdist_group(name="cli-doc")
+ def test_3_cleanup(self):
+ arguments = copy(self.default_arguments)
+ arguments["cleanup"] = True
+ args = Namespace(**arguments)
+ doc_command_factory(args).run()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/cli/env.py b/tests/cli/env.py
new file mode 100644
index 00000000..36b5b20e
--- /dev/null
+++ b/tests/cli/env.py
@@ -0,0 +1,49 @@
+"""
+Test cases for the functions and classes in package `pypots.cli.env`.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+import unittest
+from argparse import Namespace
+from copy import copy
+
+import pytest
+
+from pypots.cli.env import env_command_factory
+from pypots.utils.logging import logger
+from tests.cli.config import PROJECT_ROOT_DIR
+
+
+@pytest.mark.xfail(reason="Allow tests for CLI to fail")
+class TestPyPOTSCLIEnv(unittest.TestCase):
+ # set up the default arguments
+ default_arguments = {
+ "install": "optional",
+ "tool": "conda",
+ }
+
+ # `pypots-cli env` must run under the project root dir
+ os.chdir(PROJECT_ROOT_DIR)
+
+ @pytest.mark.xdist_group(name="cli-env")
+ def test_0_install_with_conda(self):
+ arguments = copy(self.default_arguments)
+ arguments["tool"] = "conda"
+ args = Namespace(**arguments)
+ try:
+ env_command_factory(args).run()
+ except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
+ logger.error(e)
+
+ @pytest.mark.xdist_group(name="cli-env")
+ def test_1_install_with_pip(self):
+ arguments = copy(self.default_arguments)
+ arguments["tool"] = "pip"
+ args = Namespace(**arguments)
+ try:
+ env_command_factory(args).run()
+ except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
+ logger.error(e)
diff --git a/tests/clustering/__init__.py b/tests/clustering/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/clustering/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/clustering/config.py b/tests/clustering/config.py
new file mode 100644
index 00000000..aa43d7dd
--- /dev/null
+++ b/tests/clustering/config.py
@@ -0,0 +1,22 @@
+"""
+Test configs for clustering models.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+ DATA,
+ RESULT_SAVING_DIR,
+)
+
+
+EPOCHS = 5
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {"X": DATA["val_X"]}
+TEST_SET = {"X": DATA["test_X"]}
+
+RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
diff --git a/tests/clustering/crli.py b/tests/clustering/crli.py
new file mode 100644
index 00000000..923911fd
--- /dev/null
+++ b/tests/clustering/crli.py
@@ -0,0 +1,103 @@
+"""
+Test cases for CRLI clustering model.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+
+import os
+import unittest
+
+import pytest
+
+from pypots.clustering import CRLI
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_rand_index, cal_cluster_purity
+from tests.clustering.config import (
+ EPOCHS,
+ TRAIN_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_CLUSTERING,
+)
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestCRLI(unittest.TestCase):
+ logger.info("Running tests for a clustering model CRLI...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI")
+ model_save_name = "saved_CRLI_model.pypots"
+
+ # initialize an Adam optimizer
+ G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+ D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a CRLI model
+ crli = CRLI(
+ n_steps=DATA["n_steps"],
+ n_features=DATA["n_features"],
+ n_clusters=DATA["n_classes"],
+ n_generator_layers=2,
+ rnn_hidden_size=128,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ G_optimizer=G_optimizer,
+ D_optimizer=D_optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="clustering-crli")
+ def test_0_fit(self):
+ self.crli.fit(TRAIN_SET)
+
+ @pytest.mark.xdist_group(name="clustering-crli")
+ def test_1_parameters(self):
+ assert hasattr(self.crli, "model") and self.crli.model is not None
+
+ assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None
+ assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None
+
+ assert hasattr(self.crli, "best_loss")
+ self.assertNotEqual(self.crli.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.crli, "best_model_dict")
+ and self.crli.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="clustering-crli")
+ def test_2_cluster(self):
+ clustering = self.crli.cluster(TEST_SET)
+ RI = cal_rand_index(clustering, DATA["test_y"])
+ CP = cal_cluster_purity(clustering, DATA["test_y"])
+ logger.info(f"RI: {RI}\nCP: {CP}")
+
+ @pytest.mark.xdist_group(name="clustering-crli")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.crli)
+
+ # save the trained model into file, and check if the path exists
+ self.crli.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.crli.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_clustering.py b/tests/clustering/vader.py
similarity index 51%
rename from tests/test_clustering.py
rename to tests/clustering/vader.py
index bbd4d014..71a6a91d 100644
--- a/tests/test_clustering.py
+++ b/tests/clustering/vader.py
@@ -1,5 +1,5 @@
"""
-Test cases for clustering models.
+Test cases for VaDER clustering model.
"""
# Created by Wenjie Du
@@ -12,94 +12,22 @@
import numpy as np
import pytest
-from pypots.clustering import VaDER, CRLI
+from pypots.clustering import VaDER
from pypots.optim import Adam
from pypots.utils.logging import logger
from pypots.utils.metrics import cal_rand_index, cal_cluster_purity
+from tests.clustering.config import (
+ EPOCHS,
+ TRAIN_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_CLUSTERING,
+)
from tests.global_test_config import (
DATA,
- RESULT_SAVING_DIR,
+ DEVICE,
check_tb_and_model_checkpoints_existence,
)
-EPOCHS = 5
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {"X": DATA["val_X"]}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
-
-
-class TestCRLI(unittest.TestCase):
- logger.info("Running tests for a clustering model CRLI...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI")
- model_save_name = "saved_CRLI_model.pypots"
-
- # initialize an Adam optimizer
- G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
- D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a CRLI model
- crli = CRLI(
- n_steps=DATA["n_steps"],
- n_features=DATA["n_features"],
- n_clusters=DATA["n_classes"],
- n_generator_layers=2,
- rnn_hidden_size=128,
- epochs=EPOCHS,
- saving_path=saving_path,
- G_optimizer=G_optimizer,
- D_optimizer=D_optimizer,
- )
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_0_fit(self):
- self.crli.fit(TRAIN_SET)
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_1_parameters(self):
- assert hasattr(self.crli, "model") and self.crli.model is not None
-
- assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None
- assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None
-
- assert hasattr(self.crli, "best_loss")
- self.assertNotEqual(self.crli.best_loss, float("inf"))
-
- assert (
- hasattr(self.crli, "best_model_dict")
- and self.crli.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_2_cluster(self):
- clustering = self.crli.cluster(TEST_SET)
- RI = cal_rand_index(clustering, DATA["test_y"])
- CP = cal_cluster_purity(clustering, DATA["test_y"])
- logger.info(f"RI: {RI}\nCP: {CP}")
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.crli)
-
- # save the trained model into file, and check if the path exists
- self.crli.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.crli.load_model(saved_model_path)
-
class TestVaDER(unittest.TestCase):
logger.info("Running tests for a clustering model Transformer...")
@@ -120,8 +48,9 @@ class TestVaDER(unittest.TestCase):
d_mu_stddev=5,
pretrain_epochs=20,
epochs=EPOCHS,
- saving_path=saving_path,
optimizer=optimizer,
+ saving_path=saving_path,
+ device=DEVICE,
)
@pytest.mark.xdist_group(name="clustering-vader")
diff --git a/tests/data/__init__.py b/tests/data/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/data/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/test_data.py b/tests/data/lazy_loading_strategy.py
similarity index 56%
rename from tests/test_data.py
rename to tests/data/lazy_loading_strategy.py
index 27531098..8db1080c 100644
--- a/tests/test_data.py
+++ b/tests/data/lazy_loading_strategy.py
@@ -8,31 +8,28 @@
import os
import unittest
-import h5py
import pytest
from pypots.classification import BRITS, GRUD
+from pypots.data.saving import save_dict_into_h5
from pypots.imputation import SAITS
-from tests.global_test_config import DATA, DATA_SAVING_DIR
from pypots.utils.logging import logger
+from tests.global_test_config import DATA, DATA_SAVING_DIR
-
-TRAIN_SET = f"{DATA_SAVING_DIR}/train_set.h5"
-VAL_SET = f"{DATA_SAVING_DIR}/val_set.h5"
-TEST_SET = f"{DATA_SAVING_DIR}/test_set.h5"
-IMPUTATION_TRAIN_SET = f"{DATA_SAVING_DIR}/imputation_train_set.h5"
-IMPUTATION_VAL_SET = f"{DATA_SAVING_DIR}/imputation_val_set.h5"
+TRAIN_SET_NAME = "train_set.h5"
+TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/{TRAIN_SET_NAME}"
+VAL_SET_NAME = "val_set.h5"
+VAL_SET_PATH = f"{DATA_SAVING_DIR}/{VAL_SET_NAME}"
+TEST_SET_NAME = "test_set.h5"
+TEST_SET_PATH = f"{DATA_SAVING_DIR}/{TEST_SET_NAME}"
+IMPUTATION_TRAIN_SET_NAME = "imputation_train_set.h5"
+IMPUTATION_TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/{IMPUTATION_TRAIN_SET_NAME}"
+IMPUTATION_VAL_SET_NAME = "imputation_val_set.h5"
+IMPUTATION_VAL_SET_PATH = f"{DATA_SAVING_DIR}/{IMPUTATION_VAL_SET_NAME}"
EPOCHS = 1
-def save_data_set_into_h5(data, path):
- with h5py.File(path, "w") as hf:
- for i in data.keys():
- tp = int if i == "y" else "float32"
- hf.create_dataset(i, data=data[i].astype(tp))
-
-
class TestLazyLoadingClasses(unittest.TestCase):
logger.info("Running tests for Dataset classes with lazy-loading strategy...")
@@ -73,53 +70,63 @@ def test_0_save_datasets_into_files(self):
# create the dir for saving files
os.makedirs(DATA_SAVING_DIR, exist_ok=True)
- if not os.path.exists(TRAIN_SET):
- save_data_set_into_h5(
- {"X": DATA["train_X"], "y": DATA["train_y"].astype(int)}, TRAIN_SET
+ if not os.path.exists(TRAIN_SET_PATH):
+ save_dict_into_h5(
+ {"X": DATA["train_X"], "y": DATA["train_y"].astype(float)},
+ DATA_SAVING_DIR,
+ TRAIN_SET_NAME,
)
- if not os.path.exists(VAL_SET):
- save_data_set_into_h5(
- {"X": DATA["val_X"], "y": DATA["val_y"].astype(int)}, VAL_SET
+ if not os.path.exists(VAL_SET_PATH):
+ save_dict_into_h5(
+ {"X": DATA["val_X"], "y": DATA["val_y"].astype(float)},
+ DATA_SAVING_DIR,
+ VAL_SET_NAME,
)
- if not os.path.exists(IMPUTATION_TRAIN_SET):
- save_data_set_into_h5({"X": DATA["train_X"]}, IMPUTATION_TRAIN_SET)
+ if not os.path.exists(IMPUTATION_TRAIN_SET_PATH):
+ save_dict_into_h5(
+ {"X": DATA["train_X"]}, DATA_SAVING_DIR, IMPUTATION_TRAIN_SET_NAME
+ )
- if not os.path.exists(IMPUTATION_VAL_SET):
- save_data_set_into_h5(
+ if not os.path.exists(IMPUTATION_VAL_SET_PATH):
+ save_dict_into_h5(
{
"X": DATA["val_X"],
"X_intact": DATA["val_X_intact"],
"indicating_mask": DATA["val_X_indicating_mask"],
},
- IMPUTATION_VAL_SET,
+ DATA_SAVING_DIR,
+ IMPUTATION_VAL_SET_NAME,
)
- if not os.path.exists(TEST_SET):
- save_data_set_into_h5(
+ if not os.path.exists(TEST_SET_PATH):
+ save_dict_into_h5(
{
"X": DATA["test_X"],
"X_intact": DATA["test_X_intact"],
"indicating_mask": DATA["test_X_indicating_mask"],
},
- TEST_SET,
+ DATA_SAVING_DIR,
+ TEST_SET_NAME,
)
@pytest.mark.xdist_group(name="data-lazy-loading")
def test_1_DatasetForMIT_BaseDataset(self):
- self.saits.fit(train_set=IMPUTATION_TRAIN_SET, val_set=IMPUTATION_VAL_SET)
- _ = self.saits.impute(X=TEST_SET)
+ self.saits.fit(
+ train_set=IMPUTATION_TRAIN_SET_PATH, val_set=IMPUTATION_VAL_SET_PATH
+ )
+ _ = self.saits.impute(X=TEST_SET_PATH)
@pytest.mark.xdist_group(name="data-lazy-loading")
def test_2_DatasetForBRITS(self):
- self.brits.fit(train_set=TRAIN_SET, val_set=VAL_SET)
- _ = self.brits.classify(X=TEST_SET)
+ self.brits.fit(train_set=TRAIN_SET_PATH, val_set=VAL_SET_PATH)
+ _ = self.brits.classify(X=TEST_SET_PATH)
@pytest.mark.xdist_group(name="data-lazy-loading")
def test_3_DatasetForGRUD(self):
- self.grud.fit(train_set=TRAIN_SET, val_set=VAL_SET)
- _ = self.grud.classify(X=TEST_SET)
+ self.grud.fit(train_set=TRAIN_SET_PATH, val_set=VAL_SET_PATH)
+ _ = self.grud.classify(X=TEST_SET_PATH)
if __name__ == "__main__":
diff --git a/tests/forecasting/__init__.py b/tests/forecasting/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/forecasting/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/test_forecasting.py b/tests/forecasting/bttf.py
similarity index 78%
rename from tests/test_forecasting.py
rename to tests/forecasting/bttf.py
index d2e8e14b..8e6946e7 100644
--- a/tests/test_forecasting.py
+++ b/tests/forecasting/bttf.py
@@ -1,5 +1,5 @@
"""
-Test cases for forecasting models.
+Test cases for BTTF forecasting model.
"""
# Created by Wenjie Du
@@ -12,12 +12,13 @@
from pypots.forecasting import BTTF
from pypots.utils.logging import logger
from pypots.utils.metrics import cal_mae
+from tests.forecasting.config import (
+ TEST_SET,
+ TEST_SET_INTACT,
+ N_PRED_STEP,
+)
from tests.global_test_config import DATA
-EPOCHS = 5
-N_PRED_STEP = 4
-TEST_SET = {"X": DATA["test_X"][:, :-N_PRED_STEP]}
-
class TestBTTF(unittest.TestCase):
logger.info("Running tests for a forecasting model BTTF...")
@@ -37,8 +38,7 @@ class TestBTTF(unittest.TestCase):
@pytest.mark.xdist_group(name="forecasting-bttf")
def test_0_forecasting(self):
predictions = self.bttf.forecast(TEST_SET)
- logger.info(f"prediction shape: {predictions.shape}")
- mae = cal_mae(predictions, DATA["test_X_intact"][:, -N_PRED_STEP:])
+ mae = cal_mae(predictions, TEST_SET_INTACT["X"][:, -N_PRED_STEP:])
logger.info(f"prediction MAE: {mae}")
diff --git a/tests/forecasting/config.py b/tests/forecasting/config.py
new file mode 100644
index 00000000..0a2a9e78
--- /dev/null
+++ b/tests/forecasting/config.py
@@ -0,0 +1,23 @@
+"""
+Test configs for forecasting models.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+ DATA,
+ RESULT_SAVING_DIR,
+)
+
+EPOCHS = 5
+N_PRED_STEP = 4
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {"X": DATA["val_X"]}
+TEST_SET = {"X": DATA["test_X"][:, :-N_PRED_STEP]}
+TEST_SET_INTACT = {"X": DATA["test_X_intact"]}
+
+RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "forecasting")
diff --git a/tests/global_test_config.py b/tests/global_test_config.py
index f3349483..5e152734 100644
--- a/tests/global_test_config.py
+++ b/tests/global_test_config.py
@@ -7,7 +7,10 @@
import os
+import torch
+
from pypots.data.generating import gene_incomplete_random_walk_dataset
+from pypots.utils.logging import logger
# Generate the unified data for testing and cache it first, DATA here is a singleton
# Otherwise, file lock will cause bug if running test parallely with pytest-xdist.
@@ -20,6 +23,16 @@
RESULT_SAVING_DIR = "testing_results"
+# set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes
+cuda_devices = [torch.device(i) for i in range(torch.cuda.device_count())]
+if len(cuda_devices) > 2:
+ logger.info("❗️Detected multiple cuda devices, using all of them to run testing.")
+ DEVICE = cuda_devices
+else:
+ # if having no multiple cuda devices, leave it as None to use the default device
+ DEVICE = None
+
+
def check_tb_and_model_checkpoints_existence(model):
# check the tensorboard file existence
saved_files = os.listdir(model.saving_path)
diff --git a/tests/imputation/__init__.py b/tests/imputation/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/imputation/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/imputation/brits.py b/tests/imputation/brits.py
new file mode 100644
index 00000000..bf0a70c3
--- /dev/null
+++ b/tests/imputation/brits.py
@@ -0,0 +1,104 @@
+"""
+Test cases for BRITS imputation model.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import BRITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_IMPUTATION,
+ EPOCHS,
+)
+
+
+class TestBRITS(unittest.TestCase):
+ logger.info("Running tests for an imputation model BRITS...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS")
+ model_save_name = "saved_BRITS_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a BRITS model
+ brits = BRITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ 256,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="imputation-brits")
+ def test_0_fit(self):
+ self.brits.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="imputation-brits")
+ def test_1_impute(self):
+ imputed_X = self.brits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"BRITS test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-brits")
+ def test_2_parameters(self):
+ assert hasattr(self.brits, "model") and self.brits.model is not None
+
+ assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
+
+ assert hasattr(self.brits, "best_loss")
+ self.assertNotEqual(self.brits.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.brits, "best_model_dict")
+ and self.brits.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="imputation-brits")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.brits)
+
+ # save the trained model into file, and check if the path exists
+ self.brits.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.brits.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/imputation/config.py b/tests/imputation/config.py
new file mode 100644
index 00000000..c225598b
--- /dev/null
+++ b/tests/imputation/config.py
@@ -0,0 +1,25 @@
+"""
+Test configs for imputation models.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+ DATA,
+ RESULT_SAVING_DIR,
+)
+
+EPOCHS = 5
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {
+ "X": DATA["val_X"],
+ "X_intact": DATA["val_X_intact"],
+ "indicating_mask": DATA["val_X_indicating_mask"],
+}
+TEST_SET = {"X": DATA["test_X"]}
+
+RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
diff --git a/tests/imputation/gpvae.py b/tests/imputation/gpvae.py
new file mode 100644
index 00000000..9c59c5b2
--- /dev/null
+++ b/tests/imputation/gpvae.py
@@ -0,0 +1,104 @@
+"""
+Test cases for GP-VAE imputation model.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import GPVAE
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_IMPUTATION,
+ EPOCHS,
+)
+
+
+class TestGPVAE(unittest.TestCase):
+ logger.info("Running tests for an imputation model GP-VAE...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "GP-VAE")
+ model_save_name = "saved_GPVAE_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a GP-VAE model
+ gp_vae = GPVAE(
+ DATA["n_steps"],
+ DATA["n_features"],
+ 256,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="imputation-gpvae")
+ def test_0_fit(self):
+ self.gp_vae.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="imputation-gpvae")
+ def test_1_impute(self):
+ imputed_X = self.gp_vae.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"GP-VAE test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-gpvae")
+ def test_2_parameters(self):
+ assert hasattr(self.gp_vae, "model") and self.gp_vae.model is not None
+
+ assert hasattr(self.gp_vae, "optimizer") and self.gp_vae.optimizer is not None
+
+ assert hasattr(self.gp_vae, "best_loss")
+ self.assertNotEqual(self.gp_vae.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.gp_vae, "best_model_dict")
+ and self.gp_vae.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="imputation-gpvae")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.gp_vae)
+
+ # save the trained model into file, and check if the path exists
+ self.gp_vae.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.gp_vae.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/imputation/locf.py b/tests/imputation/locf.py
new file mode 100644
index 00000000..8e54fbe0
--- /dev/null
+++ b/tests/imputation/locf.py
@@ -0,0 +1,46 @@
+"""
+Test cases for LOCF imputation method.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import LOCF
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+)
+from tests.imputation.config import (
+ TEST_SET,
+)
+
+
+class TestLOCF(unittest.TestCase):
+ logger.info("Running tests for an imputation model LOCF...")
+ locf = LOCF(nan=0)
+
+ @pytest.mark.xdist_group(name="imputation-locf")
+ def test_0_impute(self):
+ test_X_imputed = self.locf.impute(TEST_SET)
+ assert not np.isnan(
+ test_X_imputed
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"LOCF test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-locf")
+ def test_1_parameters(self):
+ assert hasattr(self.locf, "nan") and self.locf.nan is not None
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/imputation/mrnn.py b/tests/imputation/mrnn.py
new file mode 100644
index 00000000..681a9121
--- /dev/null
+++ b/tests/imputation/mrnn.py
@@ -0,0 +1,104 @@
+"""
+Test cases for MRNN imputation model.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import MRNN
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_IMPUTATION,
+ EPOCHS,
+)
+
+
+class TestMRNN(unittest.TestCase):
+ logger.info("Running tests for an imputation model MRNN...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN")
+ model_save_name = "saved_MRNN_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a MRNN model
+ mrnn = MRNN(
+ DATA["n_steps"],
+ DATA["n_features"],
+ 256,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="imputation-mrnn")
+ def test_0_fit(self):
+ self.mrnn.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="imputation-mrnn")
+ def test_1_impute(self):
+ imputed_X = self.mrnn.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"MRNN test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-mrnn")
+ def test_2_parameters(self):
+ assert hasattr(self.mrnn, "model") and self.mrnn.model is not None
+
+ assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None
+
+ assert hasattr(self.mrnn, "best_loss")
+ self.assertNotEqual(self.mrnn.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.mrnn, "best_model_dict")
+ and self.mrnn.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="imputation-mrnn")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.mrnn)
+
+ # save the trained model into file, and check if the path exists
+ self.mrnn.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.mrnn.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/imputation/saits.py b/tests/imputation/saits.py
new file mode 100644
index 00000000..647e8657
--- /dev/null
+++ b/tests/imputation/saits.py
@@ -0,0 +1,110 @@
+"""
+Test cases for SAITS imputation model.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_IMPUTATION,
+ EPOCHS,
+)
+
+
+class TestSAITS(unittest.TestCase):
+ logger.info("Running tests for an imputation model SAITS...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS")
+ model_save_name = "saved_saits_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=2,
+ d_model=256,
+ d_inner=128,
+ n_heads=4,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="imputation-saits")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="imputation-saits")
+ def test_1_impute(self):
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-saits")
+ def test_2_parameters(self):
+ assert hasattr(self.saits, "model") and self.saits.model is not None
+
+ assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None
+
+ assert hasattr(self.saits, "best_loss")
+ self.assertNotEqual(self.saits.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.saits, "best_model_dict")
+ and self.saits.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="imputation-saits")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.saits)
+
+ # save the trained model into file, and check if the path exists
+ self.saits.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.saits.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/imputation/transformer.py b/tests/imputation/transformer.py
new file mode 100644
index 00000000..965b2cf7
--- /dev/null
+++ b/tests/imputation/transformer.py
@@ -0,0 +1,113 @@
+"""
+Test cases for Transformer imputation model.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import Transformer
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_IMPUTATION,
+ EPOCHS,
+)
+
+
+class TestTransformer(unittest.TestCase):
+ logger.info("Running tests for an imputation model Transformer...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer")
+ model_save_name = "saved_transformer_model.pypots"
+
+ # initialize an Adam optimizer
+ optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a Transformer model
+ transformer = Transformer(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=2,
+ d_model=256,
+ d_inner=128,
+ n_heads=4,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ optimizer=optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="imputation-transformer")
+ def test_0_fit(self):
+ self.transformer.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="imputation-transformer")
+ def test_1_impute(self):
+ imputed_X = self.transformer.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"Transformer test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-transformer")
+ def test_2_parameters(self):
+ assert hasattr(self.transformer, "model") and self.transformer.model is not None
+
+ assert (
+ hasattr(self.transformer, "optimizer")
+ and self.transformer.optimizer is not None
+ )
+
+ assert hasattr(self.transformer, "best_loss")
+ self.assertNotEqual(self.transformer.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.transformer, "best_model_dict")
+ and self.transformer.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="imputation-transformer")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.transformer)
+
+ # save the trained model into file, and check if the path exists
+ self.transformer.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.transformer.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/imputation/usgan.py b/tests/imputation/usgan.py
new file mode 100644
index 00000000..c91a17a1
--- /dev/null
+++ b/tests/imputation/usgan.py
@@ -0,0 +1,111 @@
+"""
+Test cases for US-GAN imputation model.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import USGAN
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+ DATA,
+ DEVICE,
+ check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+ TRAIN_SET,
+ VAL_SET,
+ TEST_SET,
+ RESULT_SAVING_DIR_FOR_IMPUTATION,
+ EPOCHS,
+)
+
+
+class TestUSGAN(unittest.TestCase):
+ logger.info("Running tests for an imputation model US-GAN...")
+
+ # set the log and model saving path
+ saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "US-GAN")
+ model_save_name = "saved_USGAN_model.pypots"
+
+ # initialize an Adam optimizer
+ G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+ D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a US-GAN model
+ us_gan = USGAN(
+ DATA["n_steps"],
+ DATA["n_features"],
+ 256,
+ epochs=EPOCHS,
+ saving_path=saving_path,
+ G_optimizer=G_optimizer,
+ D_optimizer=D_optimizer,
+ device=DEVICE,
+ )
+
+ @pytest.mark.xdist_group(name="imputation-usgan")
+ def test_0_fit(self):
+ self.us_gan.fit(TRAIN_SET, VAL_SET)
+
+ @pytest.mark.xdist_group(name="imputation-usgan")
+ def test_1_impute(self):
+ imputed_X = self.us_gan.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"US-GAN test_MAE: {test_MAE}")
+
+ @pytest.mark.xdist_group(name="imputation-usgan")
+ def test_2_parameters(self):
+ assert hasattr(self.us_gan, "model") and self.us_gan.model is not None
+
+ assert (
+ hasattr(self.us_gan, "G_optimizer") and self.us_gan.G_optimizer is not None
+ )
+ assert (
+ hasattr(self.us_gan, "D_optimizer") and self.us_gan.D_optimizer is not None
+ )
+
+ assert hasattr(self.us_gan, "best_loss")
+ self.assertNotEqual(self.us_gan.best_loss, float("inf"))
+
+ assert (
+ hasattr(self.us_gan, "best_model_dict")
+ and self.us_gan.best_model_dict is not None
+ )
+
+ @pytest.mark.xdist_group(name="imputation-usgan")
+ def test_3_saving_path(self):
+ # whether the root saving dir exists, which should be created by save_log_into_tb_file
+ assert os.path.exists(
+ self.saving_path
+ ), f"file {self.saving_path} does not exist"
+
+ # check if the tensorboard file and model checkpoints exist
+ check_tb_and_model_checkpoints_existence(self.us_gan)
+
+ # save the trained model into file, and check if the path exists
+ self.us_gan.save_model(
+ saving_dir=self.saving_path, file_name=self.model_save_name
+ )
+
+ # test loading the saved model, not necessary, but need to test
+ saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+ self.us_gan.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/optim/__init__.py b/tests/optim/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/optim/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/optim/adadelta.py b/tests/optim/adadelta.py
new file mode 100644
index 00000000..b69e5ea4
--- /dev/null
+++ b/tests/optim/adadelta.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer Adadelta.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adadelta
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdadelta(unittest.TestCase):
+ logger.info("Running tests for Adadelta...")
+
+ # initialize an Adadelta optimizer
+ adadelta = Adadelta(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=1,
+ d_model=128,
+ d_inner=64,
+ n_heads=2,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ optimizer=adadelta,
+ epochs=EPOCHS,
+ )
+
+ @pytest.mark.xdist_group(name="optim-adadelta")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/optim/adagrad.py b/tests/optim/adagrad.py
new file mode 100644
index 00000000..21b4696a
--- /dev/null
+++ b/tests/optim/adagrad.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer Adagrad.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adagrad
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdagrad(unittest.TestCase):
+ logger.info("Running tests for Adagrad...")
+
+ # initialize an Adagrad optimizer
+ adagrad = Adagrad(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=1,
+ d_model=128,
+ d_inner=64,
+ n_heads=2,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ optimizer=adagrad,
+ epochs=EPOCHS,
+ )
+
+ @pytest.mark.xdist_group(name="optim-adagrad")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/optim/adam.py b/tests/optim/adam.py
new file mode 100644
index 00000000..448f92b9
--- /dev/null
+++ b/tests/optim/adam.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer Adam.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdam(unittest.TestCase):
+ logger.info("Running tests for Adam...")
+
+ # initialize an Adam optimizer
+ adam = Adam(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=1,
+ d_model=128,
+ d_inner=64,
+ n_heads=2,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ optimizer=adam,
+ epochs=EPOCHS,
+ )
+
+ @pytest.mark.xdist_group(name="optim-adam")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/optim/adamw.py b/tests/optim/adamw.py
new file mode 100644
index 00000000..a7941f43
--- /dev/null
+++ b/tests/optim/adamw.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer AdamW.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import AdamW
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdamW(unittest.TestCase):
+ logger.info("Running tests for AdamW...")
+
+ # initialize an AdamW optimizer
+ adamw = AdamW(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=1,
+ d_model=128,
+ d_inner=64,
+ n_heads=2,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ optimizer=adamw,
+ epochs=EPOCHS,
+ )
+
+ @pytest.mark.xdist_group(name="optim-adamw")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/optim/config.py b/tests/optim/config.py
new file mode 100644
index 00000000..a0391027
--- /dev/null
+++ b/tests/optim/config.py
@@ -0,0 +1,19 @@
+"""
+Test configs for optimizers.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+from tests.global_test_config import DATA
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {
+ "X": DATA["val_X"],
+ "X_intact": DATA["val_X_intact"],
+ "indicating_mask": DATA["val_X_indicating_mask"],
+}
+TEST_SET = {"X": DATA["test_X"]}
+
+
+EPOCHS = 1
diff --git a/tests/optim/rmsprop.py b/tests/optim/rmsprop.py
new file mode 100644
index 00000000..1fe61a0d
--- /dev/null
+++ b/tests/optim/rmsprop.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer RMSprop.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import RMSprop
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestRMSprop(unittest.TestCase):
+ logger.info("Running tests for RMSprop...")
+
+ # initialize a RMSprop optimizer
+ rmsprop = RMSprop(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=1,
+ d_model=128,
+ d_inner=64,
+ n_heads=2,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ optimizer=rmsprop,
+ epochs=EPOCHS,
+ )
+
+ @pytest.mark.xdist_group(name="optim-rmsprop")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/optim/sgd.py b/tests/optim/sgd.py
new file mode 100644
index 00000000..4b1c1998
--- /dev/null
+++ b/tests/optim/sgd.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer SGD.
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import SGD
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestSGD(unittest.TestCase):
+ logger.info("Running tests for SGD...")
+
+ # initialize a SGD optimizer
+ sgd = SGD(lr=0.001, weight_decay=1e-5)
+
+ # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+ saits = SAITS(
+ DATA["n_steps"],
+ DATA["n_features"],
+ n_layers=1,
+ d_model=128,
+ d_inner=64,
+ n_heads=2,
+ d_k=64,
+ d_v=64,
+ dropout=0.1,
+ optimizer=sgd,
+ epochs=EPOCHS,
+ )
+
+ @pytest.mark.xdist_group(name="optim-sgd")
+ def test_0_fit(self):
+ self.saits.fit(TRAIN_SET, VAL_SET)
+ imputed_X = self.saits.impute(TEST_SET)
+ assert not np.isnan(
+ imputed_X
+ ).any(), "Output still has missing values after running impute()."
+ test_MAE = cal_mae(
+ imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+ )
+ logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_classification.py b/tests/test_classification.py
deleted file mode 100644
index 2ef9c6d1..00000000
--- a/tests/test_classification.py
+++ /dev/null
@@ -1,256 +0,0 @@
-"""
-Test cases for classification models.
-"""
-
-# Created by Wenjie Du
-# License: GLP-v3
-
-import os
-import unittest
-
-import pytest
-
-from pypots.classification import BRITS, GRUD, Raindrop
-from pypots.optim import Adam
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_binary_classification_metrics
-from tests.global_test_config import (
- DATA,
- RESULT_SAVING_DIR,
- check_tb_and_model_checkpoints_existence,
-)
-
-EPOCHS = 5
-
-TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]}
-VAL_SET = {"X": DATA["val_X"], "y": DATA["val_y"]}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
-
-
-class TestBRITS(unittest.TestCase):
- logger.info("Running tests for a classification model BRITS...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS")
- model_save_name = "saved_BRITS_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a BRITS model
- brits = BRITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_classes=DATA["n_classes"],
- rnn_hidden_size=256,
- epochs=EPOCHS,
- saving_path=saving_path,
- model_saving_strategy="better",
- optimizer=optimizer,
- )
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_0_fit(self):
- self.brits.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_1_classify(self):
- predictions = self.brits.classify(TEST_SET)
- metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
- logger.info(
- f'ROC_AUC: {metrics["roc_auc"]}, \n'
- f'PR_AUC: {metrics["pr_auc"]},\n'
- f'F1: {metrics["f1"]},\n'
- f'Precision: {metrics["precision"]},\n'
- f'Recall: {metrics["recall"]},\n'
- )
- assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_2_parameters(self):
- assert hasattr(self.brits, "model") and self.brits.model is not None
-
- assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
- assert hasattr(self.brits, "best_loss")
- self.assertNotEqual(self.brits.best_loss, float("inf"))
-
- assert (
- hasattr(self.brits, "best_model_dict")
- and self.brits.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.brits)
-
- # save the trained model into file, and check if the path exists
- self.brits.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.brits.load_model(saved_model_path)
-
-
-class TestGRUD(unittest.TestCase):
- logger.info("Running tests for a classification model GRUD...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD")
- model_save_name = "saved_GRUD_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a GRUD model
- grud = GRUD(
- DATA["n_steps"],
- DATA["n_features"],
- n_classes=DATA["n_classes"],
- rnn_hidden_size=256,
- epochs=EPOCHS,
- saving_path=saving_path,
- optimizer=optimizer,
- )
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_0_fit(self):
- self.grud.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_1_classify(self):
- predictions = self.grud.classify(TEST_SET)
- metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
- logger.info(
- f'ROC_AUC: {metrics["roc_auc"]}, \n'
- f'PR_AUC: {metrics["pr_auc"]},\n'
- f'F1: {metrics["f1"]},\n'
- f'Precision: {metrics["precision"]},\n'
- f'Recall: {metrics["recall"]},\n'
- )
- assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_2_parameters(self):
- assert hasattr(self.grud, "model") and self.grud.model is not None
-
- assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None
-
- assert hasattr(self.grud, "best_loss")
- self.assertNotEqual(self.grud.best_loss, float("inf"))
-
- assert (
- hasattr(self.grud, "best_model_dict")
- and self.grud.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.grud)
-
- # save the trained model into file, and check if the path exists
- self.grud.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.grud.load_model(saved_model_path)
-
-
-class TestRaindrop(unittest.TestCase):
- logger.info("Running tests for a classification model Raindrop...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop")
- model_save_name = "saved_Raindrop_model.pypots"
-
- # initialize a Raindrop model
- raindrop = Raindrop(
- DATA["n_steps"],
- DATA["n_features"],
- DATA["n_classes"],
- n_layers=2,
- d_model=DATA["n_features"] * 4,
- d_inner=256,
- n_heads=2,
- dropout=0.3,
- d_static=0,
- aggregation="mean",
- sensor_wise_mask=False,
- static=False,
- epochs=EPOCHS,
- saving_path=saving_path,
- )
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_0_fit(self):
- self.raindrop.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_1_classify(self):
- predictions = self.raindrop.classify(TEST_SET)
- metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
- logger.info(
- f'ROC_AUC: {metrics["roc_auc"]}, \n'
- f'PR_AUC: {metrics["pr_auc"]},\n'
- f'F1: {metrics["f1"]},\n'
- f'Precision: {metrics["precision"]},\n'
- f'Recall: {metrics["recall"]},\n'
- )
- assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_2_parameters(self):
- assert hasattr(self.raindrop, "model") and self.raindrop.model is not None
-
- assert (
- hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None
- )
-
- assert hasattr(self.raindrop, "best_loss")
- self.assertNotEqual(self.raindrop.best_loss, float("inf"))
-
- assert (
- hasattr(self.raindrop, "best_model_dict")
- and self.raindrop.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.raindrop)
-
- # save the trained model into file, and check if the path exists
- self.raindrop.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.raindrop.load_model(saved_model_path)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/test_cli.py b/tests/test_cli.py
deleted file mode 100644
index 4e9e9927..00000000
--- a/tests/test_cli.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-Test cases for the functions and classes in package `pypots.cli`.
-"""
-
-# Created by Wenjie Du
-# License: GLP-v3
-
-import os
-import threading
-import unittest
-from argparse import Namespace
-from copy import copy
-
-import pytest
-
-from pypots.cli.dev import dev_command_factory
-from pypots.cli.doc import doc_command_factory
-from pypots.cli.env import env_command_factory
-from pypots.utils.logging import logger
-
-PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), "../.."))
-
-
-def callback_func():
- raise TimeoutError("Time out.")
-
-
-def time_out(interval, callback):
- def decorator(func):
- def wrapper(*args, **kwargs):
- t = threading.Thread(target=func, args=args, kwargs=kwargs)
- t.setDaemon(True)
- t.start()
- t.join(interval) # wait for interval seconds
- if t.is_alive():
- return threading.Timer(0, callback).start() # invoke callback()
- else:
- return
-
- return wrapper
-
- return decorator
-
-
-@pytest.mark.xfail(reason="Allow tests for CLI to fail")
-class TestPyPOTSCLIDev(unittest.TestCase):
- # set up the default arguments
- default_arguments = {
- "build": False,
- "cleanup": False,
- "run_tests": False,
- "k": None,
- "show_coverage": False,
- "lint_code": False,
- }
- # `pypots-cli dev` must run under the project root dir
- os.chdir(PROJECT_ROOT_DIR)
-
- @pytest.mark.xdist_group(name="cli-dev")
- def test_0_build(self):
- arguments = copy(self.default_arguments)
- arguments["build"] = True
- args = Namespace(**arguments)
- dev_command_factory(args).run()
-
- @pytest.mark.xdist_group(name="cli-dev")
- def test_1_run_tests(self):
- arguments = copy(self.default_arguments)
- arguments["run_tests"] = True
- arguments["k"] = "try_to_find_a_non_existing_test_case"
- args = Namespace(**arguments)
- try:
- dev_command_factory(args).run()
- except RuntimeError: # try to find a non-existing test case, so RuntimeError will be raised
- pass
- except Exception as e: # other exceptions will cause an error and result in failed testing
- raise e
-
- # Don't test --lint-code because Black will reformat the code and cause error when generating the coverage report
- # @pytest.mark.xdist_group(name="cli-dev")
- # def test_2_lint_code(self):
- # arguments = copy(self.default_arguments)
- # arguments["lint_code"] = True
- # args = Namespace(**arguments)
- # dev_command_factory(args).run()
-
- @pytest.mark.xdist_group(name="cli-dev")
- def test_3_cleanup(self):
- arguments = copy(self.default_arguments)
- arguments["cleanup"] = True
- args = Namespace(**arguments)
- dev_command_factory(args).run()
-
-
-@pytest.mark.xfail(reason="Allow tests for CLI to fail")
-class TestPyPOTSCLIDoc(unittest.TestCase):
- # set up the default arguments
- default_arguments = {
- "gene_rst": False,
- "branch": "main",
- "gene_html": False,
- "view_doc": False,
- "port": 9075,
- "cleanup": False,
- }
- # `pypots-cli doc` must run under the project root dir
- os.chdir(PROJECT_ROOT_DIR)
-
- @pytest.mark.xdist_group(name="cli-doc")
- def test_0_gene_rst(self):
- arguments = copy(self.default_arguments)
- arguments["gene_rst"] = True
- args = Namespace(**arguments)
- doc_command_factory(args).run()
-
- logger.info("run again under a non-root dir")
- try:
- os.chdir(os.path.abspath(os.path.join(PROJECT_ROOT_DIR, "pypots")))
- doc_command_factory(args).run()
- except RuntimeError: # try to run under a non-root dir, so RuntimeError will be raised
- pass
- except Exception as e: # other exceptions will cause an error and result in failed testing
- raise e
- finally:
- os.chdir(PROJECT_ROOT_DIR)
-
- @pytest.mark.xdist_group(name="cli-doc")
- def test_1_gene_html(self):
- arguments = copy(self.default_arguments)
- arguments["gene_html"] = True
- args = Namespace(**arguments)
- try:
- doc_command_factory(args).run()
- except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
- logger.error(e)
-
- @pytest.mark.xdist_group(name="cli-doc")
- @time_out(2, callback_func) # wait for two seconds
- def test_2_view_doc(self):
- arguments = copy(self.default_arguments)
- arguments["view_doc"] = True
- args = Namespace(**arguments)
- try:
- doc_command_factory(args).run()
- except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
- logger.error(e)
-
- @pytest.mark.xdist_group(name="cli-doc")
- def test_3_cleanup(self):
- arguments = copy(self.default_arguments)
- arguments["cleanup"] = True
- args = Namespace(**arguments)
- doc_command_factory(args).run()
-
-
-@pytest.mark.xfail(reason="Allow tests for CLI to fail")
-class TestPyPOTSCLIEnv(unittest.TestCase):
- # set up the default arguments
- default_arguments = {
- "install": "optional",
- "tool": "conda",
- }
-
- # `pypots-cli env` must run under the project root dir
- os.chdir(PROJECT_ROOT_DIR)
-
- @pytest.mark.xdist_group(name="cli-env")
- def test_0_install_with_conda(self):
- arguments = copy(self.default_arguments)
- arguments["tool"] = "conda"
- args = Namespace(**arguments)
- try:
- env_command_factory(args).run()
- except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
- logger.error(e)
-
- @pytest.mark.xdist_group(name="cli-env")
- def test_1_install_with_pip(self):
- arguments = copy(self.default_arguments)
- arguments["tool"] = "pip"
- args = Namespace(**arguments)
- try:
- env_command_factory(args).run()
- except Exception as e: # somehow we have some error when testing on Windows, so just print and pass below
- logger.error(e)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/test_imputation.py b/tests/test_imputation.py
deleted file mode 100644
index 6094ce62..00000000
--- a/tests/test_imputation.py
+++ /dev/null
@@ -1,356 +0,0 @@
-"""
-Test cases for imputation models.
-"""
-
-# Created by Wenjie Du
-# License: GPL-v3
-
-
-import os.path
-import unittest
-
-import numpy as np
-import pytest
-
-from pypots.imputation import (
- SAITS,
- Transformer,
- BRITS,
- MRNN,
- LOCF,
-)
-from pypots.optim import Adam
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_mae
-from tests.global_test_config import (
- DATA,
- RESULT_SAVING_DIR,
- check_tb_and_model_checkpoints_existence,
-)
-
-EPOCH = 5
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {
- "X": DATA["val_X"],
- "X_intact": DATA["val_X_intact"],
- "indicating_mask": DATA["val_X_indicating_mask"],
-}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
-
-
-class TestSAITS(unittest.TestCase):
- logger.info("Running tests for an imputation model SAITS...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS")
- model_save_name = "saved_saits_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=2,
- d_model=256,
- d_inner=128,
- n_heads=4,
- d_k=64,
- d_v=64,
- dropout=0.1,
- epochs=EPOCH,
- saving_path=saving_path,
- optimizer=optimizer,
- )
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_1_impute(self):
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_2_parameters(self):
- assert hasattr(self.saits, "model") and self.saits.model is not None
-
- assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None
-
- assert hasattr(self.saits, "best_loss")
- self.assertNotEqual(self.saits.best_loss, float("inf"))
-
- assert (
- hasattr(self.saits, "best_model_dict")
- and self.saits.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.saits)
-
- # save the trained model into file, and check if the path exists
- self.saits.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.saits.load_model(saved_model_path)
-
-
-class TestTransformer(unittest.TestCase):
- logger.info("Running tests for an imputation model Transformer...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer")
- model_save_name = "saved_transformer_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a Transformer model
- transformer = Transformer(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=2,
- d_model=256,
- d_inner=128,
- n_heads=4,
- d_k=64,
- d_v=64,
- dropout=0.1,
- epochs=EPOCH,
- saving_path=saving_path,
- optimizer=optimizer,
- )
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_0_fit(self):
- self.transformer.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_1_impute(self):
- imputed_X = self.transformer.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"Transformer test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_2_parameters(self):
- assert hasattr(self.transformer, "model") and self.transformer.model is not None
-
- assert (
- hasattr(self.transformer, "optimizer")
- and self.transformer.optimizer is not None
- )
-
- assert hasattr(self.transformer, "best_loss")
- self.assertNotEqual(self.transformer.best_loss, float("inf"))
-
- assert (
- hasattr(self.transformer, "best_model_dict")
- and self.transformer.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.transformer)
-
- # save the trained model into file, and check if the path exists
- self.transformer.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.transformer.load_model(saved_model_path)
-
-
-class TestBRITS(unittest.TestCase):
- logger.info("Running tests for an imputation model BRITS...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS")
- model_save_name = "saved_BRITS_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a BRITS model
- brits = BRITS(
- DATA["n_steps"],
- DATA["n_features"],
- 256,
- epochs=EPOCH,
- saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/BRITS",
- optimizer=optimizer,
- )
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_0_fit(self):
- self.brits.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_1_impute(self):
- imputed_X = self.brits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"BRITS test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_2_parameters(self):
- assert hasattr(self.brits, "model") and self.brits.model is not None
-
- assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
- assert hasattr(self.brits, "best_loss")
- self.assertNotEqual(self.brits.best_loss, float("inf"))
-
- assert (
- hasattr(self.brits, "best_model_dict")
- and self.brits.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.brits)
-
- # save the trained model into file, and check if the path exists
- self.brits.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.brits.load_model(saved_model_path)
-
-
-class TestMRNN(unittest.TestCase):
- logger.info("Running tests for an imputation model MRNN...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN")
- model_save_name = "saved_MRNN_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a MRNN model
- mrnn = MRNN(
- DATA["n_steps"],
- DATA["n_features"],
- 256,
- epochs=EPOCH,
- saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/MRNN",
- optimizer=optimizer,
- )
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_0_fit(self):
- self.mrnn.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_1_impute(self):
- imputed_X = self.mrnn.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"MRNN test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_2_parameters(self):
- assert hasattr(self.mrnn, "model") and self.mrnn.model is not None
-
- assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None
-
- assert hasattr(self.mrnn, "best_loss")
- self.assertNotEqual(self.mrnn.best_loss, float("inf"))
-
- assert (
- hasattr(self.mrnn, "best_model_dict")
- and self.mrnn.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.mrnn)
-
- # save the trained model into file, and check if the path exists
- self.mrnn.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.mrnn.load_model(saved_model_path)
-
-
-class TestLOCF(unittest.TestCase):
- logger.info("Running tests for an imputation model LOCF...")
- locf = LOCF(nan=0)
-
- @pytest.mark.xdist_group(name="imputation-locf")
- def test_0_impute(self):
- test_X_imputed = self.locf.impute(TEST_SET)
- assert not np.isnan(
- test_X_imputed
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"LOCF test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-locf")
- def test_1_parameters(self):
- assert hasattr(self.locf, "nan") and self.locf.nan is not None
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/test_optim.py b/tests/test_optim.py
deleted file mode 100644
index 9be096fb..00000000
--- a/tests/test_optim.py
+++ /dev/null
@@ -1,244 +0,0 @@
-"""
-Test cases for optimizers.
-"""
-
-# Created by Wenjie Du
-# License: GLP-v3
-
-import unittest
-
-import h5py
-import numpy as np
-import pytest
-
-from pypots.imputation import SAITS
-from pypots.optim import Adam, AdamW, Adagrad, Adadelta, SGD, RMSprop
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_mae
-from tests.global_test_config import DATA
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {
- "X": DATA["val_X"],
- "X_intact": DATA["val_X_intact"],
- "indicating_mask": DATA["val_X_indicating_mask"],
-}
-TEST_SET = {"X": DATA["test_X"]}
-
-
-EPOCHS = 3
-
-
-def save_data_set_into_h5(data, path):
- with h5py.File(path, "w") as hf:
- for i in data.keys():
- tp = int if i == "y" else "float32"
- hf.create_dataset(i, data=data[i].astype(tp))
-
-
-class TestAdam(unittest.TestCase):
- logger.info("Running tests for Adam...")
-
- # initialize an Adam optimizer
- adam = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model for testing DatasetForMIT and BaseDataset
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=1,
- d_model=128,
- d_inner=64,
- n_heads=2,
- d_k=64,
- d_v=64,
- dropout=0.1,
- optimizer=adam,
- epochs=EPOCHS,
- )
-
- @pytest.mark.xdist_group(name="optim-adam")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestAdamW(unittest.TestCase):
- logger.info("Running tests for AdamW...")
-
- # initialize an AdamW optimizer
- adamw = AdamW(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model for testing DatasetForMIT and BaseDataset
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=1,
- d_model=128,
- d_inner=64,
- n_heads=2,
- d_k=64,
- d_v=64,
- dropout=0.1,
- optimizer=adamw,
- epochs=EPOCHS,
- )
-
- @pytest.mark.xdist_group(name="optim-adamw")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestAdagrad(unittest.TestCase):
- logger.info("Running tests for Adagrad...")
-
- # initialize an Adagrad optimizer
- adagrad = Adagrad(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model for testing DatasetForMIT and BaseDataset
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=1,
- d_model=128,
- d_inner=64,
- n_heads=2,
- d_k=64,
- d_v=64,
- dropout=0.1,
- optimizer=adagrad,
- epochs=EPOCHS,
- )
-
- @pytest.mark.xdist_group(name="optim-adagrad")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestAdadelta(unittest.TestCase):
- logger.info("Running tests for Adadelta...")
-
- # initialize an Adadelta optimizer
- adadelta = Adadelta(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model for testing DatasetForMIT and BaseDataset
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=1,
- d_model=128,
- d_inner=64,
- n_heads=2,
- d_k=64,
- d_v=64,
- dropout=0.1,
- optimizer=adadelta,
- epochs=EPOCHS,
- )
-
- @pytest.mark.xdist_group(name="optim-adadelta")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestSGD(unittest.TestCase):
- logger.info("Running tests for SGD...")
-
- # initialize a SGD optimizer
- sgd = SGD(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model for testing DatasetForMIT and BaseDataset
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=1,
- d_model=128,
- d_inner=64,
- n_heads=2,
- d_k=64,
- d_v=64,
- dropout=0.1,
- optimizer=sgd,
- epochs=EPOCHS,
- )
-
- @pytest.mark.xdist_group(name="optim-sgd")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestRMSprop(unittest.TestCase):
- logger.info("Running tests for RMSprop...")
-
- # initialize a RMSprop optimizer
- rmsprop = RMSprop(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model for testing DatasetForMIT and BaseDataset
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=1,
- d_model=128,
- d_inner=64,
- n_heads=2,
- d_k=64,
- d_v=64,
- dropout=0.1,
- optimizer=rmsprop,
- epochs=EPOCHS,
- )
-
- @pytest.mark.xdist_group(name="optim-rmsprop")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/test_training_on_multi_gpus.py b/tests/test_training_on_multi_gpus.py
deleted file mode 100644
index b076cbfe..00000000
--- a/tests/test_training_on_multi_gpus.py
+++ /dev/null
@@ -1,783 +0,0 @@
-"""
-Test cases for running models on multi cuda devices.
-"""
-
-# Created by Wenjie Du
-# License: GPL-v3
-
-
-import os.path
-import unittest
-
-import numpy as np
-import pytest
-import torch
-
-from pypots.classification import BRITS, GRUD, Raindrop
-from pypots.clustering import VaDER, CRLI
-from pypots.forecasting import BTTF
-from pypots.imputation import BRITS as ImputationBRITS
-from pypots.imputation import (
- SAITS,
- Transformer,
- MRNN,
- LOCF,
-)
-from pypots.optim import Adam
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_binary_classification_metrics
-from pypots.utils.metrics import cal_mae
-from pypots.utils.metrics import cal_rand_index, cal_cluster_purity
-from tests.global_test_config import (
- DATA,
- RESULT_SAVING_DIR,
- check_tb_and_model_checkpoints_existence,
-)
-
-EPOCHS = 5
-
-cuda_devices = [torch.device(i) for i in range(torch.cuda.device_count())]
-
-# set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes
-DEVICES = None if cuda_devices == [] else cuda_devices
-
-# global skip test if less than two cuda-enabled devices
-LESS_THAN_TWO_DEVICES = len(cuda_devices) < 2
-pytestmark = pytest.mark.skipif(
- LESS_THAN_TWO_DEVICES, reason="not enough cuda devices to run tests"
-)
-
-
-TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]}
-
-VAL_SET = {
- "X": DATA["val_X"],
- "X_intact": DATA["val_X_intact"],
- "indicating_mask": DATA["val_X_indicating_mask"],
- "y": DATA["val_y"],
-}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
-RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
-RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
-
-
-class TestSAITS(unittest.TestCase):
- logger.info("Running tests for an imputation model SAITS...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS")
- model_save_name = "saved_saits_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a SAITS model
- saits = SAITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=2,
- d_model=256,
- d_inner=128,
- n_heads=4,
- d_k=64,
- d_v=64,
- dropout=0.1,
- epochs=EPOCHS,
- saving_path=saving_path,
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_0_fit(self):
- self.saits.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_1_impute(self):
- imputed_X = self.saits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"SAITS test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_2_parameters(self):
- assert hasattr(self.saits, "model") and self.saits.model is not None
-
- assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None
-
- assert hasattr(self.saits, "best_loss")
- self.assertNotEqual(self.saits.best_loss, float("inf"))
-
- assert (
- hasattr(self.saits, "best_model_dict")
- and self.saits.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-saits")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.saits)
-
- # save the trained model into file, and check if the path exists
- self.saits.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.saits.load_model(saved_model_path)
-
-
-class TestTransformer(unittest.TestCase):
- logger.info("Running tests for an imputation model Transformer...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer")
- model_save_name = "saved_transformer_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a Transformer model
- transformer = Transformer(
- DATA["n_steps"],
- DATA["n_features"],
- n_layers=2,
- d_model=256,
- d_inner=128,
- n_heads=4,
- d_k=64,
- d_v=64,
- dropout=0.1,
- epochs=EPOCHS,
- saving_path=saving_path,
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_0_fit(self):
- self.transformer.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_1_impute(self):
- imputed_X = self.transformer.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"Transformer test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_2_parameters(self):
- assert hasattr(self.transformer, "model") and self.transformer.model is not None
-
- assert (
- hasattr(self.transformer, "optimizer")
- and self.transformer.optimizer is not None
- )
-
- assert hasattr(self.transformer, "best_loss")
- self.assertNotEqual(self.transformer.best_loss, float("inf"))
-
- assert (
- hasattr(self.transformer, "best_model_dict")
- and self.transformer.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-transformer")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.transformer)
-
- # save the trained model into file, and check if the path exists
- self.transformer.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.transformer.load_model(saved_model_path)
-
-
-class TestImputationBRITS(unittest.TestCase):
- logger.info("Running tests for an imputation model BRITS...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS")
- model_save_name = "saved_BRITS_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a BRITS model
- brits = ImputationBRITS(
- DATA["n_steps"],
- DATA["n_features"],
- 256,
- epochs=EPOCHS,
- saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/BRITS",
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_0_fit(self):
- self.brits.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_1_impute(self):
- imputed_X = self.brits.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"BRITS test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_2_parameters(self):
- assert hasattr(self.brits, "model") and self.brits.model is not None
-
- assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
- assert hasattr(self.brits, "best_loss")
- self.assertNotEqual(self.brits.best_loss, float("inf"))
-
- assert (
- hasattr(self.brits, "best_model_dict")
- and self.brits.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-brits")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.brits)
-
- # save the trained model into file, and check if the path exists
- self.brits.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.brits.load_model(saved_model_path)
-
-
-class TestMRNN(unittest.TestCase):
- logger.info("Running tests for an imputation model MRNN...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN")
- model_save_name = "saved_MRNN_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a MRNN model
- mrnn = MRNN(
- DATA["n_steps"],
- DATA["n_features"],
- 256,
- epochs=EPOCHS,
- saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/MRNN",
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_0_fit(self):
- self.mrnn.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_1_impute(self):
- imputed_X = self.mrnn.impute(TEST_SET)
- assert not np.isnan(
- imputed_X
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"MRNN test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_2_parameters(self):
- assert hasattr(self.mrnn, "model") and self.mrnn.model is not None
-
- assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None
-
- assert hasattr(self.mrnn, "best_loss")
- self.assertNotEqual(self.mrnn.best_loss, float("inf"))
-
- assert (
- hasattr(self.mrnn, "best_model_dict")
- and self.mrnn.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="imputation-mrnn")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.mrnn)
-
- # save the trained model into file, and check if the path exists
- self.mrnn.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.mrnn.load_model(saved_model_path)
-
-
-class TestLOCF(unittest.TestCase):
- logger.info("Running tests for an imputation model LOCF...")
- locf = LOCF(nan=0)
-
- @pytest.mark.xdist_group(name="imputation-locf")
- def test_0_impute(self):
- test_X_imputed = self.locf.impute(TEST_SET)
- assert not np.isnan(
- test_X_imputed
- ).any(), "Output still has missing values after running impute()."
- test_MAE = cal_mae(
- test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
- )
- logger.info(f"LOCF test_MAE: {test_MAE}")
-
- @pytest.mark.xdist_group(name="imputation-locf")
- def test_1_parameters(self):
- assert hasattr(self.locf, "nan") and self.locf.nan is not None
-
-
-class TestClassificationBRITS(unittest.TestCase):
- logger.info("Running tests for a classification model BRITS...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS")
- model_save_name = "saved_BRITS_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a BRITS model
- brits = BRITS(
- DATA["n_steps"],
- DATA["n_features"],
- n_classes=DATA["n_classes"],
- rnn_hidden_size=256,
- epochs=EPOCHS,
- saving_path=saving_path,
- model_saving_strategy="better",
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_0_fit(self):
- self.brits.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_1_classify(self):
- predictions = self.brits.classify(TEST_SET)
- metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
- logger.info(
- f'ROC_AUC: {metrics["roc_auc"]}, \n'
- f'PR_AUC: {metrics["pr_auc"]},\n'
- f'F1: {metrics["f1"]},\n'
- f'Precision: {metrics["precision"]},\n'
- f'Recall: {metrics["recall"]},\n'
- )
- assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_2_parameters(self):
- assert hasattr(self.brits, "model") and self.brits.model is not None
-
- assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
- assert hasattr(self.brits, "best_loss")
- self.assertNotEqual(self.brits.best_loss, float("inf"))
-
- assert (
- hasattr(self.brits, "best_model_dict")
- and self.brits.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="classification-brits")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.brits)
-
- # save the trained model into file, and check if the path exists
- self.brits.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.brits.load_model(saved_model_path)
-
-
-class TestGRUD(unittest.TestCase):
- logger.info("Running tests for a classification model GRUD...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD")
- model_save_name = "saved_GRUD_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a GRUD model
- grud = GRUD(
- DATA["n_steps"],
- DATA["n_features"],
- n_classes=DATA["n_classes"],
- rnn_hidden_size=256,
- epochs=EPOCHS,
- saving_path=saving_path,
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_0_fit(self):
- self.grud.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_1_classify(self):
- predictions = self.grud.classify(TEST_SET)
- metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
- logger.info(
- f'ROC_AUC: {metrics["roc_auc"]}, \n'
- f'PR_AUC: {metrics["pr_auc"]},\n'
- f'F1: {metrics["f1"]},\n'
- f'Precision: {metrics["precision"]},\n'
- f'Recall: {metrics["recall"]},\n'
- )
- assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_2_parameters(self):
- assert hasattr(self.grud, "model") and self.grud.model is not None
-
- assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None
-
- assert hasattr(self.grud, "best_loss")
- self.assertNotEqual(self.grud.best_loss, float("inf"))
-
- assert (
- hasattr(self.grud, "best_model_dict")
- and self.grud.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="classification-grud")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.grud)
-
- # save the trained model into file, and check if the path exists
- self.grud.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.grud.load_model(saved_model_path)
-
-
-class TestRaindrop(unittest.TestCase):
- logger.info("Running tests for a classification model Raindrop...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop")
- model_save_name = "saved_Raindrop_model.pypots"
-
- # initialize a Raindrop model
- raindrop = Raindrop(
- DATA["n_steps"],
- DATA["n_features"],
- DATA["n_classes"],
- n_layers=2,
- d_model=DATA["n_features"] * 4,
- d_inner=256,
- n_heads=2,
- dropout=0.3,
- d_static=0,
- aggregation="mean",
- sensor_wise_mask=False,
- static=False,
- epochs=EPOCHS,
- saving_path=saving_path,
- )
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_0_fit(self):
- self.raindrop.fit(TRAIN_SET, VAL_SET)
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_1_classify(self):
- predictions = self.raindrop.classify(TEST_SET)
- metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
- logger.info(
- f'ROC_AUC: {metrics["roc_auc"]}, \n'
- f'PR_AUC: {metrics["pr_auc"]},\n'
- f'F1: {metrics["f1"]},\n'
- f'Precision: {metrics["precision"]},\n'
- f'Recall: {metrics["recall"]},\n'
- )
- assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_2_parameters(self):
- assert hasattr(self.raindrop, "model") and self.raindrop.model is not None
-
- assert (
- hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None
- )
-
- assert hasattr(self.raindrop, "best_loss")
- self.assertNotEqual(self.raindrop.best_loss, float("inf"))
-
- assert (
- hasattr(self.raindrop, "best_model_dict")
- and self.raindrop.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="classification-raindrop")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.raindrop)
-
- # save the trained model into file, and check if the path exists
- self.raindrop.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.raindrop.load_model(saved_model_path)
-
-
-class TestCRLI(unittest.TestCase):
- logger.info("Running tests for a clustering model CRLI...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI")
- model_save_name = "saved_CRLI_model.pypots"
-
- # initialize an Adam optimizer
- G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
- D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a CRLI model
- crli = CRLI(
- n_steps=DATA["n_steps"],
- n_features=DATA["n_features"],
- n_clusters=DATA["n_classes"],
- n_generator_layers=2,
- rnn_hidden_size=128,
- epochs=EPOCHS,
- saving_path=saving_path,
- G_optimizer=G_optimizer,
- D_optimizer=D_optimizer,
- )
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_0_fit(self):
- self.crli.fit(TRAIN_SET)
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_1_parameters(self):
- assert hasattr(self.crli, "model") and self.crli.model is not None
-
- assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None
- assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None
-
- assert hasattr(self.crli, "best_loss")
- self.assertNotEqual(self.crli.best_loss, float("inf"))
-
- assert (
- hasattr(self.crli, "best_model_dict")
- and self.crli.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_2_cluster(self):
- clustering = self.crli.cluster(TEST_SET)
- RI = cal_rand_index(clustering, DATA["test_y"])
- CP = cal_cluster_purity(clustering, DATA["test_y"])
- logger.info(f"RI: {RI}\nCP: {CP}")
-
- @pytest.mark.xdist_group(name="clustering-crli")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.crli)
-
- # save the trained model into file, and check if the path exists
- self.crli.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.crli.load_model(saved_model_path)
-
-
-class TestVaDER(unittest.TestCase):
- logger.info("Running tests for a clustering model Transformer...")
-
- # set the log and model saving path
- saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "VaDER")
- model_save_name = "saved_VaDER_model.pypots"
-
- # initialize an Adam optimizer
- optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
- # initialize a VaDER model
- vader = VaDER(
- n_steps=DATA["n_steps"],
- n_features=DATA["n_features"],
- n_clusters=DATA["n_classes"],
- rnn_hidden_size=64,
- d_mu_stddev=5,
- pretrain_epochs=20,
- epochs=EPOCHS,
- saving_path=saving_path,
- optimizer=optimizer,
- num_workers=2,
- device=DEVICES,
- )
-
- @pytest.mark.xdist_group(name="clustering-vader")
- def test_0_fit(self):
- self.vader.fit(TRAIN_SET)
-
- @pytest.mark.xdist_group(name="clustering-vader")
- def test_1_cluster(self):
- try:
- clustering = self.vader.cluster(TEST_SET)
- RI = cal_rand_index(clustering, DATA["test_y"])
- CP = cal_cluster_purity(clustering, DATA["test_y"])
- logger.info(f"RI: {RI}\nCP: {CP}")
- except np.linalg.LinAlgError as e:
- logger.error(
- f"{e}\n"
- "Got singular matrix, please try to retrain the model to fix this"
- )
-
- @pytest.mark.xdist_group(name="clustering-vader")
- def test_2_parameters(self):
- assert hasattr(self.vader, "model") and self.vader.model is not None
-
- assert hasattr(self.vader, "optimizer") and self.vader.optimizer is not None
-
- assert hasattr(self.vader, "best_loss")
- self.assertNotEqual(self.vader.best_loss, float("inf"))
-
- assert (
- hasattr(self.vader, "best_model_dict")
- and self.vader.best_model_dict is not None
- )
-
- @pytest.mark.xdist_group(name="clustering-vader")
- def test_3_saving_path(self):
- # whether the root saving dir exists, which should be created by save_log_into_tb_file
- assert os.path.exists(
- self.saving_path
- ), f"file {self.saving_path} does not exist"
-
- # check if the tensorboard file and model checkpoints exist
- check_tb_and_model_checkpoints_existence(self.vader)
-
- # save the trained model into file, and check if the path exists
- self.vader.save_model(
- saving_dir=self.saving_path, file_name=self.model_save_name
- )
-
- # test loading the saved model, not necessary, but need to test
- saved_model_path = os.path.join(self.saving_path, self.model_save_name)
- self.vader.load_model(saved_model_path)
-
-
-class TestBTTF(unittest.TestCase):
- logger.info("Running tests for a forecasting model BTTF...")
-
- # initialize a BTTF model
- pred_step = 4
- bttf = BTTF(
- n_steps=DATA["n_steps"] - pred_step,
- n_features=10,
- pred_step=pred_step,
- rank=10,
- time_lags=[1, 2, 3, 5, 5 + 1, 5 + 2, 10, 10 + 1, 10 + 2],
- burn_iter=5,
- gibbs_iter=5,
- multi_step=1,
- )
-
- @pytest.mark.xdist_group(name="forecasting-bttf")
- def test_0_forecasting(self):
- predictions = self.bttf.forecast({"X": DATA["test_X"][:, : -self.pred_step]})
- logger.info(f"prediction shape: {predictions.shape}")
- mae = cal_mae(predictions, DATA["test_X_intact"][:, -self.pred_step :])
- logger.info(f"prediction MAE: {mae}")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/utils/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du
+# License: GLP-v3
diff --git a/tests/test_utils.py b/tests/utils/logging.py
similarity index 64%
rename from tests/test_utils.py
rename to tests/utils/logging.py
index 0fd48ec8..113f0dde 100644
--- a/tests/test_utils.py
+++ b/tests/utils/logging.py
@@ -1,5 +1,5 @@
"""
-Test cases for the functions and classes in package `pypots.utils`.
+Test cases for the functions and classes in package `pypots.utils.logging`.
"""
# Created by Wenjie Du
@@ -9,10 +9,7 @@
import shutil
import unittest
-import torch
-
from pypots.utils.logging import Logger
-from pypots.utils.random import set_random_seed
class TestLogging(unittest.TestCase):
@@ -49,25 +46,5 @@ def test_saving_log_into_file(self):
shutil.rmtree("test_log", ignore_errors=True)
-class TestRandom(unittest.TestCase):
- def test_set_random_seed(self):
- random_state1 = torch.get_rng_state()
- torch.rand(
- 1, 3
- ) # randomly generate something, the random state will be reset, so two states should be varying
- random_state2 = torch.get_rng_state()
- assert not torch.equal(
- random_state1, random_state2
- ), "The random seed hasn't set, so two random states should be different."
-
- set_random_seed(26)
- random_state1 = torch.get_rng_state()
- set_random_seed(26)
- random_state2 = torch.get_rng_state()
- assert torch.equal(
- random_state1, random_state2
- ), "The random seed has been set, two random states are not the same."
-
-
if __name__ == "__main__":
unittest.main()
diff --git a/tests/utils/random.py b/tests/utils/random.py
new file mode 100644
index 00000000..0d1a0ca0
--- /dev/null
+++ b/tests/utils/random.py
@@ -0,0 +1,36 @@
+"""
+Test cases for the functions and classes in package `pypots.utils.random`.
+"""
+
+# Created by Wenjie Du
+# License: GPL-v3
+
+import unittest
+
+import torch
+
+from pypots.utils.random import set_random_seed
+
+
+class TestRandom(unittest.TestCase):
+ def test_set_random_seed(self):
+ random_state1 = torch.get_rng_state()
+ torch.rand(
+ 1, 3
+ ) # randomly generate something, the random state will be reset, so two states should be varying
+ random_state2 = torch.get_rng_state()
+ assert not torch.equal(
+ random_state1, random_state2
+ ), "The random seed hasn't set, so two random states should be different."
+
+ set_random_seed(26)
+ random_state1 = torch.get_rng_state()
+ set_random_seed(26)
+ random_state2 = torch.get_rng_state()
+ assert torch.equal(
+ random_state1, random_state2
+ ), "The random seed has been set, two random states are not the same."
+
+
+if __name__ == "__main__":
+ unittest.main()