diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
index d339afe5..4cdfe5bc 100644
--- a/.github/workflows/testing_ci.yml
+++ b/.github/workflows/testing_ci.yml
@@ -15,43 +15,61 @@ jobs:
         runs-on: ${{ matrix.os }}
         defaults:
             run:
-                shell: bash -l {0}
+                shell: bash {0}
         strategy:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, windows-latest, macOS-latest]
-                python-version: ["3.7", "3.8", "3.9", "3.10"]
+                python-version: ["3.7", "3.10"]
+                torch-version: ["1.13.1"]
 
         steps:
             - name: Check out the repo code
               uses: actions/checkout@v3
 
-            - name: Set up Conda
-              uses: conda-incubator/setup-miniconda@v2
+            - name: Determine the Python version
+              uses: haya14busa/action-cond@v1
+              id: condval
               with:
-                  activate-environment: pypots-test
-                  python-version: ${{ matrix.python-version }}
-                  environment-file: tests/environment_for_conda_test.yml
-                  auto-activate-base: false
+                cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
+                # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
+                if_true: "3.7.16"
+                if_false: ${{ matrix.python-version }}
+
+            - name: Set up Python
+              uses: actions/setup-python@v4
+              with:
+                python-version: ${{ steps.condval.outputs.value }}
+                check-latest: true
+                cache: pip
+                cache-dependency-path: |
+                  setup.cfg
+
+            - name: Install PyTorch ${{ matrix.torch-version }}+cpu
+              # we have to install torch in advance because torch_sparse needs it for compilation,
+              # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
+              run: |
+                  which python
+                  which pip
+                  python -m pip install --upgrade pip
+                  pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
+                  python -c "import torch; print('PyTorch:', torch.__version__)"
+
+            - name: Install other dependencies
+              run: |
+                  pip install pypots
+                  pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
+                  pip install -e ".[dev]"
 
             - name: Fetch the test environment details
               run: |
                   which python
-                  conda info
-                  conda list
+                  pip list
 
             - name: Test with pytest
               run: |
-                  # run tests separately here due to Segmentation Fault in test_clustering when run all in
-                  # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
-                  python -m pytest -rA tests/test_classification.py -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_imputation.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_clustering.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_forecasting.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_optim.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_data.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_utils.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_cli.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  rm -rf tests/__pycache__
+                  python -m pytest -rA tests/*/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
 
             - name: Generate the LCOV report
               run: |
@@ -61,4 +79,4 @@ jobs:
               uses: coverallsapp/github-action@master
               with:
                   github-token: ${{ secrets.GITHUB_TOKEN }}
-                  path-to-lcov: 'coverage.lcov'
+                  path-to-lcov: "coverage.lcov"
diff --git a/.github/workflows/testing_daily.yml b/.github/workflows/testing_daily.yml
index f0b3ba61..5e41630f 100644
--- a/.github/workflows/testing_daily.yml
+++ b/.github/workflows/testing_daily.yml
@@ -10,61 +10,43 @@ jobs:
         runs-on: ${{ matrix.os }}
         defaults:
             run:
-                shell: bash {0}
+                shell: bash -l {0}
         strategy:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, windows-latest, macOS-latest]
-                python-version: ["3.7", "3.8", "3.9", "3.10"]
-                torch-version: ["1.13.1"]
+                python-version: ["3.7", "3.10"]
 
         steps:
             - name: Check out the repo code
               uses: actions/checkout@v3
 
-            - name: Determine the Python version
-              uses: haya14busa/action-cond@v1
-              id: condval
+            - name: Set up Conda
+              uses: conda-incubator/setup-miniconda@v2
               with:
-                cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
-                # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
-                if_true: "3.7.16"
-                if_false: ${{ matrix.python-version }}
-
-            - name: Set up Python
-              uses: actions/setup-python@v4
-              with:
-                python-version: ${{ steps.condval.outputs.value }}
-                check-latest: true
-                cache: pip
-                cache-dependency-path: |
-                  setup.cfg
-
-            - name: Install PyTorch ${{ matrix.torch-version }}+cpu
-              # we have to install torch in advance because torch_sparse needs it for compilation,
-              # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
-              run: |
-                  which python
-                  which pip
-                  python -m pip install --upgrade pip
-                  pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
-                  python -c "import torch; print('PyTorch:', torch.__version__)"
-
-            - name: Install other dependencies
-              run: |
-                  pip install pypots
-                  pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
-                  pip install -e ".[dev]"
+                  activate-environment: pypots-test
+                  python-version: ${{ matrix.python-version }}
+                  environment-file: tests/environment_for_conda_test.yml
+                  auto-activate-base: false
 
             - name: Fetch the test environment details
               run: |
                   which python
-                  pip list
+                  conda info
+                  conda list
 
             - name: Test with pytest
               run: |
-                  coverage run --source=pypots -m pytest --ignore tests/test_training_on_multi_gpus.py
-                  # ignore the test_training_on_multi_gpus.py because it requires multiple GPUs which are not available on GitHub Actions
+                  # run tests separately here due to Segmentation Fault in test_clustering when run all in
+                  # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
+                  python -m pytest -rA tests/classification/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/imputation/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/clustering/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/forecasting/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/optim/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/data/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/utils/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/cli/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
 
             - name: Generate the LCOV report
               run: |
@@ -74,4 +56,4 @@ jobs:
               uses: coverallsapp/github-action@master
               with:
                   github-token: ${{ secrets.GITHUB_TOKEN }}
-                  path-to-lcov: "coverage.lcov"
+                  path-to-lcov: 'coverage.lcov'
diff --git a/.gitignore b/.gitignore
index 0841fdef..51294f38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,8 @@ docs/_build
 .coverage
 .pytest_cache
 *__pycache__*
-*testing_results*
+*test*
 
 # ignore specific kinds of files like all PDFs
 *.pdf
+*.ipynb
diff --git a/README.md b/README.md
index 7b591634..9c86f08a 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,8 @@
     <img src="https://pypots.com/figs/pypots_logos/PyPOTS_logo_FFBG.svg?sanitize=true" width="200" align="right">
 </a>
 
-## <p align="center">Welcome to PyPOTS</p>
+<h2 align="center">Welcome to PyPOTS</h2>
+
 **<p align="center">A Python Toolbox for Data Mining on Partially-Observed Time Series</p>**
 
 <p align="center">
@@ -161,6 +162,8 @@ PyPOTS supports imputation, classification, clustering, and forecasting tasks on
 |        **Type**        |  **Abbr.**  |                                                                    **Full name of the algorithm/model/paper**                                                                     | **Year** |
 |       Neural Net       |    SAITS    |                                                               Self-Attention-based Imputation for Time Series [^1]                                                                |   2023   |
 |       Neural Net       | Transformer | Attention is All you Need [^2];<br>Self-Attention-based Imputation for Time Series [^1];<br><sub>Note: proposed in [^2], and re-implemented as an imputation model in [^1].</sub> |   2017   |
+|       Neural Net       |   US-GAN    |                                                 Generative Semi-supervised Learning for Multivariate Time Series Imputation [^10]                                                 |   2021   |
+|       Neural Net       |   GP-VAE    |                                                              GP-VAE: Deep Probabilistic Time Series Imputation [^11]                                                              |   2020   |
 |       Neural Net       |    BRITS    |                                                              Bidirectional Recurrent Imputation for Time Series [^3]                                                              |   2018   |
 |       Neural Net       |    M-RNN    |                                                                  Multi-directional Recurrent Neural Network [^9]                                                                  |   2019   |
 |         Naive          |    LOCF     |                                                                         Last Observation Carried Forward                                                                          |    -     |
@@ -253,7 +256,7 @@ We care about the feedback from our users, so we're building PyPOTS community on
 If you have any suggestions or want to contribute ideas or share time-series related papers, join us and tell.
 PyPOTS community is open, transparent, and surely friendly. Let's work together to build and improve PyPOTS!
 
-
+[//]: # (Use APA reference style below)
 [^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). *Expert systems with applications*.
 [^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS 2017*.
 [^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS 2018*.
@@ -263,7 +266,8 @@ PyPOTS community is open, transparent, and surely friendly. Let's work together
 [^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). *GigaScience*.
 [^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). *IEEE transactions on pattern analysis and machine intelligence*.
 [^9]: Yoon, J., Zame, W. R., & van der Schaar, M. (2019). [Estimating Missing Data in Temporal Data Streams Using Multi-Directional Recurrent Neural Networks](https://ieeexplore.ieee.org/document/8485748). *IEEE Transactions on Biomedical Engineering*.
-
+[^10]: Miao, X., Wu, Y., Wang, J., Gao, Y., Mao, X., & Yin, J. (2021). [Generative Semi-supervised Learning for Multivariate Time Series Imputation](https://ojs.aaai.org/index.php/AAAI/article/view/17086). *AAAI 2021*.
+[^11]: Fortuin, V., Baranchuk, D., Raetsch, G. & Mandt, S.. (2020). [GP-VAE: Deep Probabilistic Time Series Imputation](https://proceedings.mlr.press/v108/fortuin20a.html). *AISTATS 2020*.
 
 <details>
 <summary>🏠 Visits</summary>
@@ -271,4 +275,4 @@ PyPOTS community is open, transparent, and surely friendly. Let's work together
     <img alt="PyPOTS visits" align="left" src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FPyPOTS%2FPyPOTS&count_bg=%23009A0A&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=Visits%20since%20May%202022&edge_flat=false">
 </a>
 </details>
-<br>
\ No newline at end of file
+<br>
diff --git a/docs/about_us.rst b/docs/about_us.rst
index aaaab944..370a3e0d 100644
--- a/docs/about_us.rst
+++ b/docs/about_us.rst
@@ -33,5 +33,5 @@ PyPOTS exists thanks to all the nice people (sorted by contribution time) who co
 
 .. raw:: html
 
-    <object data="https://pypots.com/figs/PyPOTS_contributors.svg">
+    <object data="https://pypots.com/figs/pypots_logos/PyPOTS_contributors.svg">
     </object>
diff --git a/docs/pypots.data.rst b/docs/pypots.data.rst
index d792d6aa..fe7c4678 100644
--- a/docs/pypots.data.rst
+++ b/docs/pypots.data.rst
@@ -10,6 +10,15 @@ pypots.data.base module
    :show-inheritance:
    :inherited-members:
 
+pypots.data.saving module
+-----------------------------
+
+.. automodule:: pypots.data.saving
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
 pypots.data.generating module
 -----------------------------
 
diff --git a/docs/pypots.forecasting.rst b/docs/pypots.forecasting.rst
index 2ae67b85..c4ac76b7 100644
--- a/docs/pypots.forecasting.rst
+++ b/docs/pypots.forecasting.rst
@@ -1,11 +1,31 @@
 pypots.forecasting package
 ==========================
 
+Subpackages
+-----------
 
-pypots.forecasting.bttf module
+.. toctree::
+   :maxdepth: 4
+
+   pypots.forecasting.bttf
+   pypots.forecasting.template
+
+Submodules
+----------
+
+pypots.forecasting.base module
 ------------------------------
 
-.. automodule:: pypots.forecasting.bttf
+.. automodule:: pypots.forecasting.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
+Module contents
+---------------
+
+.. automodule:: pypots.forecasting
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/pypots.imputation.rst b/docs/pypots.imputation.rst
index 0e31f8c8..a33e0fdf 100644
--- a/docs/pypots.imputation.rst
+++ b/docs/pypots.imputation.rst
@@ -19,6 +19,24 @@ pypots.imputation.transformer module
    :show-inheritance:
    :inherited-members:
 
+pypots.imputation.usgan module
+------------------------------
+
+.. automodule:: pypots.imputation.usgan
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
+pypots.imputation.gpvae module
+------------------------------
+
+.. automodule:: pypots.imputation.gpvae
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
 pypots.imputation.brits module
 ------------------------------
 
diff --git a/pypots/base.py b/pypots/base.py
index f55033e3..7a12fe94 100644
--- a/pypots/base.py
+++ b/pypots/base.py
@@ -96,7 +96,9 @@ def _setup_device(self, device: Union[None, str, torch.device, list]):
                 self.device = device
             elif isinstance(device, list):
                 if len(device) == 0:
-                    raise ValueError("The list of devices should have at least 1 device, but got 0.")
+                    raise ValueError(
+                        "The list of devices should have at least 1 device, but got 0."
+                    )
                 elif len(device) == 1:
                     return self._setup_device(device[0])
                 # parallely training on multiple CUDA devices
@@ -176,7 +178,6 @@ def _send_data_to_given_device(self, data):
         if isinstance(self.device, torch.device):  # single device
             data = map(lambda x: x.to(self.device), data)
         else:  # parallely training on multiple devices
-
             # randomly choose one device to balance the workload
             # device = np.random.choice(self.device)
 
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
index a30fd698..a16dbc01 100644
--- a/pypots/classification/base.py
+++ b/pypots/classification/base.py
@@ -256,7 +256,6 @@ def _train_model(
         training_loader: DataLoader,
         val_loader: DataLoader = None,
     ) -> None:
-
         # each training starts from the very beginning, so reset the loss and model dict here
         self.best_loss = float("inf")
         self.best_model_dict = None
diff --git a/pypots/classification/grud/data.py b/pypots/classification/grud/data.py
index 52186017..edf1d4d0 100644
--- a/pypots/classification/grud/data.py
+++ b/pypots/classification/grud/data.py
@@ -123,7 +123,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
         missing_mask = (~torch.isnan(X)).to(torch.float32)
         X_filledLOCF = self.locf._locf_torch(X.unsqueeze(dim=0)).squeeze()
         X = torch.nan_to_num(X)
diff --git a/pypots/classification/raindrop/modules.py b/pypots/classification/raindrop/modules.py
index 76a992ef..191ff9c7 100644
--- a/pypots/classification/raindrop/modules.py
+++ b/pypots/classification/raindrop/modules.py
@@ -174,7 +174,6 @@ def forward(
         edge_attr: OptTensor = None,
         return_attention_weights=None,
     ) -> Tuple[torch.Tensor, Any]:
-
         r"""
         Args:
             return_attention_weights (bool, optional): If set to :obj:`True`,
diff --git a/pypots/classification/template/dataset.py b/pypots/classification/template/data.py
similarity index 100%
rename from pypots/classification/template/dataset.py
rename to pypots/classification/template/data.py
diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
index 324e6718..fd9b7f0d 100644
--- a/pypots/clustering/base.py
+++ b/pypots/clustering/base.py
@@ -244,7 +244,6 @@ def _train_model(
         training_loader: DataLoader,
         val_loader: DataLoader = None,
     ) -> None:
-
         """
 
         Parameters
diff --git a/pypots/clustering/crli/model.py b/pypots/clustering/crli/model.py
index b5e2e14a..8b7a63a1 100644
--- a/pypots/clustering/crli/model.py
+++ b/pypots/clustering/crli/model.py
@@ -226,7 +226,6 @@ def __init__(
         saving_path: Optional[str] = None,
         model_saving_strategy: Optional[str] = "best",
     ):
-
         super().__init__(
             n_clusters,
             batch_size,
diff --git a/pypots/clustering/template/dataset.py b/pypots/clustering/template/data.py
similarity index 100%
rename from pypots/clustering/template/dataset.py
rename to pypots/clustering/template/data.py
diff --git a/pypots/clustering/vader/data.py b/pypots/clustering/vader/data.py
index a3b2f91d..a8910b44 100644
--- a/pypots/clustering/vader/data.py
+++ b/pypots/clustering/vader/data.py
@@ -6,12 +6,12 @@
 # License: GLP-v3
 
 
-from typing import Union
+from typing import Union, Iterable
 
-from ..crli.data import DatasetForCRLI
+from ...data.base import BaseDataset
 
 
-class DatasetForVaDER(DatasetForCRLI):
+class DatasetForVaDER(BaseDataset):
     """Dataset class for model VaDER.
 
     Parameters
@@ -45,3 +45,9 @@ def __init__(
         file_type: str = "h5py",
     ):
         super().__init__(data, return_labels, file_type)
+
+    def _fetch_data_from_array(self, idx: int) -> Iterable:
+        return super()._fetch_data_from_array(idx)
+
+    def _fetch_data_from_file(self, idx: int) -> Iterable:
+        return super()._fetch_data_from_file(idx)
diff --git a/pypots/clustering/vader/model.py b/pypots/clustering/vader/model.py
index f2912cce..5a44da85 100644
--- a/pypots/clustering/vader/model.py
+++ b/pypots/clustering/vader/model.py
@@ -184,7 +184,6 @@ def forward(
         ) = self.get_results(X, missing_mask)
 
         if not training and not pretrain:
-
             results = {
                 "mu_tilde": mu_tilde,
                 "mu": mu_c,
@@ -403,7 +402,6 @@ def _train_model(
         training_loader: DataLoader,
         val_loader: DataLoader = None,
     ) -> None:
-
         # each training starts from the very beginning, so reset the loss and model dict here
         self.best_loss = float("inf")
         self.best_model_dict = None
diff --git a/pypots/data/base.py b/pypots/data/base.py
index 86b15fc2..1bef9f9c 100644
--- a/pypots/data/base.py
+++ b/pypots/data/base.py
@@ -204,13 +204,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             The collated data sample, a list including all necessary sample info.
         """
 
-        X = self.X[idx]
-        missing_mask = ~torch.isnan(X)
+        X = self.X[idx].to(torch.float32)
+        missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)
         sample = [
             torch.tensor(idx),
-            X.to(torch.float32),
-            missing_mask.to(torch.float32),
+            X,
+            missing_mask,
         ]
 
         if self.y is not None and self.return_labels:
@@ -279,13 +279,13 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
-        missing_mask = ~torch.isnan(X)
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+        missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)
         sample = [
             torch.tensor(idx),
-            X.to(torch.float32),
-            missing_mask.to(torch.float32),
+            X,
+            missing_mask,
         ]
 
         # if the dataset has labels and is for training, then fetch it from the file
diff --git a/pypots/data/saving.py b/pypots/data/saving.py
index 8581ad50..61138df2 100644
--- a/pypots/data/saving.py
+++ b/pypots/data/saving.py
@@ -14,7 +14,11 @@
 from pypots.utils.logging import logger
 
 
-def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
+def save_dict_into_h5(
+    data_dict: dict,
+    saving_dir: str,
+    saving_name: str = "datasets.h5",
+) -> None:
     """Save the given data (in a dictionary) into the given h5 file.
 
     Parameters
@@ -25,6 +29,9 @@ def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
     saving_dir : str,
         The h5 file to save the data.
 
+    saving_name : str, optional (default="datasets.h5")
+        The final name of the saved h5 file.
+
     """
 
     def save_set(handle, name, data):
@@ -36,7 +43,7 @@ def save_set(handle, name, data):
             handle.create_dataset(name, data=data)
 
     create_dir_if_not_exist(saving_dir)
-    saving_path = os.path.join(saving_dir, "datasets.h5")
+    saving_path = os.path.join(saving_dir, saving_name)
     with h5py.File(saving_path, "w") as hf:
         for k, v in data_dict.items():
             save_set(hf, k, v)
diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py
index 5188999b..079f5925 100644
--- a/pypots/forecasting/base.py
+++ b/pypots/forecasting/base.py
@@ -242,7 +242,6 @@ def _train_model(
         training_loader: DataLoader,
         val_loader: DataLoader = None,
     ) -> None:
-
         # each training starts from the very beginning, so reset the loss and model dict here
         self.best_loss = float("inf")
         self.best_model_dict = None
diff --git a/pypots/forecasting/template/dataset.py b/pypots/forecasting/template/data.py
similarity index 100%
rename from pypots/forecasting/template/dataset.py
rename to pypots/forecasting/template/data.py
diff --git a/pypots/imputation/__init__.py b/pypots/imputation/__init__.py
index 9de8d0bc..a6c4dcd8 100644
--- a/pypots/imputation/__init__.py
+++ b/pypots/imputation/__init__.py
@@ -6,10 +6,12 @@
 # License: GPL-v3
 
 from .brits import BRITS
+from .gpvae import GPVAE
 from .locf import LOCF
+from .mrnn import MRNN
 from .saits import SAITS
 from .transformer import Transformer
-from .mrnn import MRNN
+from .usgan import USGAN
 
 __all__ = [
     "SAITS",
@@ -17,4 +19,6 @@
     "BRITS",
     "MRNN",
     "LOCF",
+    "GPVAE",
+    "USGAN",
 ]
diff --git a/pypots/imputation/brits/data.py b/pypots/imputation/brits/data.py
index f39e411c..342ede98 100644
--- a/pypots/imputation/brits/data.py
+++ b/pypots/imputation/brits/data.py
@@ -59,14 +59,14 @@ def __init__(
 
             self.processed_data = {
                 "forward": {
-                    "X": forward_X,
-                    "missing_mask": forward_missing_mask,
-                    "delta": forward_delta,
+                    "X": forward_X.to(torch.float32),
+                    "missing_mask": forward_missing_mask.to(torch.float32),
+                    "delta": forward_delta.to(torch.float32),
                 },
                 "backward": {
-                    "X": backward_X,
-                    "missing_mask": backward_missing_mask,
-                    "delta": backward_delta,
+                    "X": backward_X.to(torch.float32),
+                    "missing_mask": backward_missing_mask.to(torch.float32),
+                    "delta": backward_delta.to(torch.float32),
                 },
             }
 
@@ -101,13 +101,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
         sample = [
             torch.tensor(idx),
             # for forward
-            self.processed_data["forward"]["X"][idx].to(torch.float32),
-            self.processed_data["forward"]["missing_mask"][idx].to(torch.float32),
-            self.processed_data["forward"]["delta"][idx].to(torch.float32),
+            self.processed_data["forward"]["X"][idx],
+            self.processed_data["forward"]["missing_mask"][idx],
+            self.processed_data["forward"]["delta"][idx],
             # for backward
-            self.processed_data["backward"]["X"][idx].to(torch.float32),
-            self.processed_data["backward"]["missing_mask"][idx].to(torch.float32),
-            self.processed_data["backward"]["delta"][idx].to(torch.float32),
+            self.processed_data["backward"]["X"][idx],
+            self.processed_data["backward"]["missing_mask"][idx],
+            self.processed_data["backward"]["delta"][idx],
         ]
 
         if self.y is not None and self.return_labels:
@@ -133,7 +133,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
         missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)
 
diff --git a/pypots/imputation/gpvae/__init__.py b/pypots/imputation/gpvae/__init__.py
new file mode 100644
index 00000000..f5ffb05e
--- /dev/null
+++ b/pypots/imputation/gpvae/__init__.py
@@ -0,0 +1,12 @@
+"""
+The package of the partially-observed time-series imputation method GP-VAE.
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk>
+# License: GLP-v3
+
+from .model import GPVAE
+
+__all__ = [
+    "GPVAE",
+]
diff --git a/pypots/imputation/gpvae/data.py b/pypots/imputation/gpvae/data.py
new file mode 100644
index 00000000..8bb9be8c
--- /dev/null
+++ b/pypots/imputation/gpvae/data.py
@@ -0,0 +1,132 @@
+"""
+Dataset class for model GP-VAE.
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk> and Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+from typing import Union, Iterable
+
+import torch
+
+from ...data.base import BaseDataset
+
+
+class DatasetForGPVAE(BaseDataset):
+    """Dataset class for GP-VAE.
+
+    Parameters
+    ----------
+    data : dict or str,
+        The dataset for model input, should be a dictionary including keys as 'X' and 'y',
+        or a path string locating a data file.
+        If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
+        which is time-series data for input, can contain missing values, and y should be array-like of shape
+        [n_samples], which is classification labels of X.
+        If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+        key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
+
+    return_labels : bool, default = True,
+        Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
+        during training of classification models, the Dataset class will return labels in __getitem__() for model input.
+        Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
+        need the defined Dataset class for all training/validating/testing stages. For those big datasets stored in h5
+        files, they already have both X and y saved. But we don't read labels from the file for validating and testing
+        with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
+        distinction.
+
+    file_type : str, default = "h5py"
+        The type of the given file if train_set and val_set are path strings.
+    """
+
+    def __init__(
+        self,
+        data: Union[dict, str],
+        return_labels: bool = True,
+        file_type: str = "h5py",
+    ):
+        super().__init__(data, return_labels, file_type)
+
+        if not isinstance(self.data, str):
+            # calculate all delta here.
+            missing_mask = (~torch.isnan(self.X)).type(torch.float32)
+            X = torch.nan_to_num(self.X).to(torch.float32)
+
+            self.processed_data = {
+                "X": X,
+                "missing_mask": missing_mask,
+            }
+
+    def _fetch_data_from_array(self, idx: int) -> Iterable:
+        """Fetch data from self.X if it is given.
+
+        Parameters
+        ----------
+        idx : int,
+            The index of the sample to be return.
+
+        Returns
+        -------
+        sample : list,
+            A list contains
+
+            index : int tensor,
+                The index of the sample.
+
+            X : tensor,
+                The feature vector for model input.
+
+            missing_mask : tensor,
+                The mask indicates all missing values in X.
+
+            delta : tensor,
+                The delta matrix contains time gaps of missing values.
+
+            label (optional) : tensor,
+                The target label of the time-series sample.
+        """
+        sample = [
+            torch.tensor(idx),
+            # for forward
+            self.processed_data["X"][idx],
+            self.processed_data["missing_mask"][idx],
+        ]
+
+        if self.y is not None and self.return_labels:
+            sample.append(self.y[idx].to(torch.long))
+
+        return sample
+
+    def _fetch_data_from_file(self, idx: int) -> Iterable:
+        """Fetch data with the lazy-loading strategy, i.e. only loading data from the file while requesting for samples.
+        Here the opened file handle doesn't load the entire dataset into RAM but only load the currently accessed slice.
+
+        Parameters
+        ----------
+        idx : int,
+            The index of the sample to be return.
+
+        Returns
+        -------
+        sample : list,
+            The collated data sample, a list including all necessary sample info.
+        """
+
+        if self.file_handle is None:
+            self.file_handle = self._open_file_handle()
+
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+        missing_mask = (~torch.isnan(X)).to(torch.float32)
+        X = torch.nan_to_num(X)
+
+        sample = [
+            torch.tensor(idx),
+            X,
+            missing_mask,
+        ]
+
+        # if the dataset has labels and is for training, then fetch it from the file
+        if "y" in self.file_handle.keys() and self.return_labels:
+            sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
+
+        return sample
diff --git a/pypots/imputation/gpvae/model.py b/pypots/imputation/gpvae/model.py
new file mode 100644
index 00000000..6b613d4d
--- /dev/null
+++ b/pypots/imputation/gpvae/model.py
@@ -0,0 +1,446 @@
+"""
+The implementation of GP-VAE for the partially-observed time-series imputation task.
+
+Refer to the paper Fortuin V, Baranchuk D, Rätsch G, et al.
+GP-VAE: Deep probabilistic time series imputation. AISTATS. PMLR, 2020: 1651-1661.
+
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk> and Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+from typing import Union, Optional
+
+import h5py
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+
+from .data import DatasetForGPVAE
+from .modules import (
+    Encoder,
+    rbf_kernel,
+    diffusion_kernel,
+    matern_kernel,
+    cauchy_kernel,
+    Decoder,
+)
+from ..base import BaseNNImputer
+from ...optim.adam import Adam
+from ...optim.base import Optimizer
+
+
+class _GPVAE(nn.Module):
+    """model GPVAE with Gaussian Process prior
+
+    Parameters
+    ----------
+    input_dim : int,
+        the feature dimension of the input
+
+    time_length : int,
+        the length of each time series
+
+    latent_dim : int,
+        the feature dimension of the latent embedding
+
+    encoder_sizes : tuple,
+        the tuple of the network size in encoder
+
+    decoder_sizes : tuple,
+        the tuple of the network size in decoder
+
+    beta : float,
+        the weight of the KL divergence
+
+    M : int,
+        the number of Monte Carlo samples for ELBO estimation
+
+    K : int,
+        the number of importance weights for IWAE model
+
+    kernel : str,
+        the Gaussian Process kernel ["cauchy", "diffusion", "rbf", "matern"]
+
+    sigma : float,
+        the scale parameter for a kernel function
+
+    length_scale : float,
+        the length scale parameter for a kernel function
+
+    kernel_scales : int,
+        the number of different length scales over latent space dimensions
+    """
+
+    def __init__(
+        self,
+        input_dim,
+        time_length,
+        latent_dim,
+        encoder_sizes=(64, 64),
+        decoder_sizes=(64, 64),
+        beta=1,
+        M=1,
+        K=1,
+        kernel="cauchy",
+        sigma=1.0,
+        length_scale=7.0,
+        kernel_scales=1,
+        window_size=24,
+    ):
+        super().__init__()
+        self.kernel = kernel
+        self.sigma = sigma
+        self.length_scale = length_scale
+        self.kernel_scales = kernel_scales
+
+        self.input_dim = input_dim
+        self.time_length = time_length
+        self.latent_dim = latent_dim
+        self.beta = beta
+        self.encoder = Encoder(input_dim, latent_dim, encoder_sizes, window_size)
+        self.decoder = Decoder(latent_dim, input_dim, decoder_sizes)
+        self.M = M
+        self.K = K
+
+        # Precomputed KL components for efficiency
+        self.prior = self._init_prior()
+        # self.pz_scale_inv = None
+        # self.pz_scale_log_abs_determinant = None
+
+    def encode(self, x):
+        return self.encoder(x)
+
+    def decode(self, z):
+        if not torch.is_tensor(z):
+            z = torch.tensor(z).float()
+        num_dim = len(z.shape)
+        assert num_dim > 2
+        return self.decoder(torch.transpose(z, num_dim - 1, num_dim - 2))
+
+    def forward(self, inputs, training=True):
+        x = inputs["X"]
+        m_mask = inputs["missing_mask"]
+        x = x.repeat(self.M * self.K, 1, 1)
+        if m_mask is not None:
+            m_mask = m_mask.repeat(self.M * self.K, 1, 1)
+            m_mask = m_mask.type(torch.bool)
+
+        # pz = self.prior()
+        qz_x = self.encode(x)
+        z = qz_x.rsample()
+        px_z = self.decode(z)
+
+        nll = -px_z.log_prob(x)
+        nll = torch.where(torch.isfinite(nll), nll, torch.zeros_like(nll))
+        if m_mask is not None:
+            nll = torch.where(m_mask, nll, torch.zeros_like(nll))
+        nll = nll.sum(dim=(1, 2))
+
+        if self.K > 1:
+            kl = qz_x.log_prob(z) - self.prior.log_prob(z)
+            kl = torch.where(torch.isfinite(kl), kl, torch.zeros_like(kl))
+            kl = kl.sum(1)
+
+            weights = -nll - kl
+            weights = torch.reshape(weights, [self.M, self.K, -1])
+
+            elbo = torch.logsumexp(weights, dim=1)
+            elbo = elbo.mean()
+        else:
+            kl = self.kl_divergence(qz_x, self.prior)
+            kl = torch.where(torch.isfinite(kl), kl, torch.zeros_like(kl))
+            kl = kl.sum(1)
+
+            elbo = -nll - self.beta * kl
+            elbo = elbo.mean()
+
+        imputed_data = self.decode(self.encode(x).mean).mean * ~m_mask + x * m_mask
+
+        if not training:
+            # if not in training mode, return the classification result only
+            return {
+                "imputed_data": imputed_data,
+            }
+
+        results = {
+            "loss": -elbo.mean(),
+            "imputed_data": imputed_data,
+        }
+        return results
+
+    @staticmethod
+    def kl_divergence(a, b):
+        # TODO: different from the author's implementation
+        return torch.distributions.kl.kl_divergence(a, b)
+
+    def _init_prior(self):
+        # Compute kernel matrices for each latent dimension
+        kernel_matrices = []
+        for i in range(self.kernel_scales):
+            if self.kernel == "rbf":
+                kernel_matrices.append(
+                    rbf_kernel(self.time_length, self.length_scale / 2**i)
+                )
+            elif self.kernel == "diffusion":
+                kernel_matrices.append(
+                    diffusion_kernel(self.time_length, self.length_scale / 2**i)
+                )
+            elif self.kernel == "matern":
+                kernel_matrices.append(
+                    matern_kernel(self.time_length, self.length_scale / 2**i)
+                )
+            elif self.kernel == "cauchy":
+                kernel_matrices.append(
+                    cauchy_kernel(
+                        self.time_length, self.sigma, self.length_scale / 2**i
+                    )
+                )
+
+        # Combine kernel matrices for each latent dimension
+        tiled_matrices = []
+        total = 0
+        for i in range(self.kernel_scales):
+            if i == self.kernel_scales - 1:
+                multiplier = self.latent_dim - total
+            else:
+                multiplier = int(np.ceil(self.latent_dim / self.kernel_scales))
+                total += multiplier
+            tiled_matrices.append(
+                torch.unsqueeze(kernel_matrices[i], 0).repeat(multiplier, 1, 1)
+            )
+        kernel_matrix_tiled = torch.cat(tiled_matrices)
+        assert len(kernel_matrix_tiled) == self.latent_dim
+        prior = torch.distributions.MultivariateNormal(
+            loc=torch.zeros(self.latent_dim, self.time_length),
+            covariance_matrix=kernel_matrix_tiled,
+        )
+
+        return prior
+
+
+class GPVAE(BaseNNImputer):
+    """The PyTorch implementation of the GPVAE model :cite:``.
+
+    Parameters
+    ----------
+    beta:
+        The weight of KL divergence in EBLO.
+
+    kernel:
+        The type of kernel function chosen in the Gaussain Process Proir. ["cauchy", "diffusion", "rbf", "matern"]
+
+    batch_size :
+        The batch size for training and evaluating the model.
+
+    epochs :
+        The number of epochs for training the model.
+
+    patience :
+        The patience for the early-stopping mechanism. Given a positive integer, the training process will be
+        stopped when the model does not perform better after that number of epochs.
+        Leaving it default as None will disable the early-stopping.
+
+    optimizer :
+        The optimizer for model training.
+        If not given, will use a default Adam optimizer.
+
+    num_workers :
+        The number of subprocesses to use for data loading.
+        `0` means data loading will be in the main process, i.e. there won't be subprocesses.
+
+    device :
+        The device for the model to run on. It can be a string, a :class:`torch.device` object, or a list of them.
+        If not given, will try to use CUDA devices first (will use the default CUDA device if there are multiple),
+        then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+        If given a list of devices, e.g. ['cuda:0', 'cuda:1'], or [torch.device('cuda:0'), torch.device('cuda:1')] , the
+        model will be parallely trained on the multiple devices (so far only support parallel training on CUDA devices).
+        Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+
+    saving_path :
+        The path for automatically saving model checkpoints and tensorboard files (i.e. loss values recorded during
+        training into a tensorboard file). Will not save if not given.
+
+    model_saving_strategy :
+        The strategy to save model checkpoints. It has to be one of [None, "best", "better"].
+        No model will be saved when it is set as None.
+        The "best" strategy will only automatically save the best model after the training finished.
+        The "better" strategy will automatically save the model during training whenever the model performs
+        better than in previous epochs.
+
+    Attributes
+    ----------
+    model : :class:`torch.nn.Module`
+        The underlying GPVAE model.
+
+    optimizer : :class:`pypots.optim.Optimizer`
+        The optimizer for model training.
+
+    """
+
+    def __init__(
+        self,
+        n_steps: int,
+        n_features: int,
+        latent_size: int,
+        encoder_sizes: tuple = (64, 64),
+        decoder_sizes: tuple = (64, 64),
+        kernel: str = "cauchy",
+        beta: float = 0.2,
+        M: int = 1,
+        K: int = 1,
+        sigma: float = 1.0,
+        length_scale: float = 7.0,
+        kernel_scales: int = 1,
+        window_size: int = 3,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: int = None,
+        optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
+        device: Optional[Union[str, torch.device, list]] = None,
+        saving_path: str = None,
+        model_saving_strategy: Optional[str] = "best",
+    ):
+        super().__init__(
+            batch_size,
+            epochs,
+            patience,
+            num_workers,
+            device,
+            saving_path,
+            model_saving_strategy,
+        )
+
+        self.n_steps = n_steps
+        self.n_features = n_features
+        self.latent_size = latent_size
+        self.kernel = kernel
+        self.encoder_sizes = encoder_sizes
+        self.decoder_sizes = decoder_sizes
+        self.beta = beta
+        self.M = M
+        self.K = K
+        self.sigma = sigma
+        self.length_scale = length_scale
+        self.kernel_scales = kernel_scales
+
+        # set up the model
+        self.model = _GPVAE(
+            input_dim=self.n_features,
+            time_length=self.n_steps,
+            latent_dim=self.latent_size,
+            kernel=self.kernel,
+            encoder_sizes=self.encoder_sizes,
+            decoder_sizes=self.decoder_sizes,
+            beta=self.beta,
+            M=self.M,
+            K=self.K,
+            sigma=self.sigma,
+            length_scale=self.length_scale,
+            kernel_scales=self.kernel_scales,
+            window_size=window_size,
+        )
+        self._send_model_to_given_device()
+        self._print_model_size()
+
+        # set up the optimizer
+        self.optimizer = optimizer
+        self.optimizer.init_optimizer(self.model.parameters())
+
+    def _assemble_input_for_training(self, data: list) -> dict:
+        # fetch data
+        (
+            indices,
+            X,
+            missing_mask,
+        ) = self._send_data_to_given_device(data)
+
+        # assemble input data
+        inputs = {
+            "indices": indices,
+            "X": X,
+            "missing_mask": missing_mask,
+        }
+
+        return inputs
+
+    def _assemble_input_for_validating(self, data: list) -> dict:
+        return self._assemble_input_for_training(data)
+
+    def _assemble_input_for_testing(self, data: list) -> dict:
+        return self._assemble_input_for_validating(data)
+
+    def fit(
+        self,
+        train_set: Union[dict, str],
+        val_set: Optional[Union[dict, str]] = None,
+        file_type: str = "h5py",
+    ) -> None:
+        # Step 1: wrap the input data with classes Dataset and DataLoader
+        training_set = DatasetForGPVAE(
+            train_set, return_labels=False, file_type=file_type
+        )
+        training_loader = DataLoader(
+            training_set,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
+        )
+        val_loader = None
+        if val_set is not None:
+            if isinstance(val_set, str):
+                with h5py.File(val_set, "r") as hf:
+                    # Here we read the whole validation set from the file to mask a portion for validation.
+                    # In PyPOTS, using a file usually because the data is too big. However, the validation set is
+                    # generally shouldn't be too large. For example, we have 1 billion samples for model training.
+                    # We won't take 20% of them as the validation set because we want as much as possible data for the
+                    # training stage to enhance the model's generalization ability. Therefore, 100,000 representative
+                    # samples will be enough to validate the model.
+                    val_set = {
+                        "X": hf["X"][:],
+                        "X_intact": hf["X_intact"][:],
+                        "indicating_mask": hf["indicating_mask"][:],
+                    }
+            val_set = DatasetForGPVAE(val_set, return_labels=False, file_type=file_type)
+            val_loader = DataLoader(
+                val_set,
+                batch_size=self.batch_size,
+                shuffle=False,
+                num_workers=self.num_workers,
+            )
+
+        # Step 2: train the model and freeze it
+        self._train_model(training_loader, val_loader)
+        self.model.load_state_dict(self.best_model_dict)
+        self.model.eval()  # set the model as eval status to freeze it.
+
+        # Step 3: save the model if necessary
+        self._auto_save_model_if_necessary(training_finished=True)
+
+    def impute(
+        self,
+        X: Union[dict, str],
+        file_type="h5py",
+    ) -> np.ndarray:
+        self.model.eval()  # set the model as eval status to freeze it.
+        test_set = DatasetForGPVAE(X, return_labels=False, file_type=file_type)
+        test_loader = DataLoader(
+            test_set,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers,
+        )
+        imputation_collector = []
+
+        with torch.no_grad():
+            for idx, data in enumerate(test_loader):
+                inputs = self._assemble_input_for_testing(data)
+                results = self.model.forward(inputs, training=False)
+                imputed_data = results["imputed_data"]
+                imputation_collector.append(imputed_data)
+
+        imputation_collector = torch.cat(imputation_collector)
+        return imputation_collector.cpu().detach().numpy()
diff --git a/pypots/imputation/gpvae/modules.py b/pypots/imputation/gpvae/modules.py
new file mode 100644
index 00000000..5ad81e09
--- /dev/null
+++ b/pypots/imputation/gpvae/modules.py
@@ -0,0 +1,261 @@
+"""
+The implementation of GP-VAE for the partially-observed time-series imputation task.
+
+Refer to the paper Fortuin V, Baranchuk D, Rätsch G, et al.
+GP-VAE: Deep probabilistic time series imputation. AISTATS. PMLR, 2020: 1651-1661.
+
+
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk> and Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def rbf_kernel(T, length_scale):
+    xs = torch.arange(T).float()
+    xs_in = torch.unsqueeze(xs, 0)
+    xs_out = torch.unsqueeze(xs, 1)
+    distance_matrix = (xs_in - xs_out) ** 2
+    distance_matrix_scaled = distance_matrix / length_scale**2
+    kernel_matrix = torch.exp(-distance_matrix_scaled)
+    return kernel_matrix
+
+
+def diffusion_kernel(T, length_scale):
+    assert length_scale < 0.5, (
+        "length_scale has to be smaller than 0.5 for the "
+        "kernel matrix to be diagonally dominant"
+    )
+    sigmas = torch.ones(T, T) * length_scale
+    sigmas_tridiag = torch.diagonal(sigmas, offset=0, dim1=-2, dim2=-1)
+    sigmas_tridiag += torch.diagonal(sigmas, offset=1, dim1=-2, dim2=-1)
+    sigmas_tridiag += torch.diagonal(sigmas, offset=-1, dim1=-2, dim2=-1)
+    kernel_matrix = sigmas_tridiag + torch.eye(T) * (1.0 - length_scale)
+    return kernel_matrix
+
+
+def matern_kernel(T, length_scale):
+    xs = torch.arange(T).float()
+    xs_in = torch.unsqueeze(xs, 0)
+    xs_out = torch.unsqueeze(xs, 1)
+    distance_matrix = torch.abs(xs_in - xs_out)
+    distance_matrix_scaled = distance_matrix / torch.sqrt(length_scale).type(
+        torch.float32
+    )
+    kernel_matrix = torch.exp(-distance_matrix_scaled)
+    return kernel_matrix
+
+
+def cauchy_kernel(T, sigma, length_scale):
+    xs = torch.arange(T).float()
+    xs_in = torch.unsqueeze(xs, 0)
+    xs_out = torch.unsqueeze(xs, 1)
+    distance_matrix = (xs_in - xs_out) ** 2
+    distance_matrix_scaled = distance_matrix / length_scale**2
+    kernel_matrix = sigma / (distance_matrix_scaled + 1.0)
+
+    alpha = 0.001
+    eye = torch.eye(kernel_matrix.shape[-1])
+    return kernel_matrix + alpha * eye
+
+
+def make_nn(input_size, output_size, hidden_sizes):
+    """This function used to creates fully connected neural network.
+
+    Parameters
+    ----------
+    input_size : int,
+        the dimension of input embeddings
+
+    output_size : int,
+        the dimension of out embeddings
+
+    hidden_sizes : tuple,
+        the tuple of hidden layer sizes, and the tuple length sets the number of hidden layers
+
+    Returns
+    -------
+    output: tensor
+        the processing embeddings
+    """
+    layers = []
+    for i in range(len(hidden_sizes)):
+        if i == 0:
+            layers.append(
+                nn.Linear(in_features=input_size, out_features=hidden_sizes[i])
+            )
+        else:
+            layers.append(
+                nn.Linear(in_features=hidden_sizes[i - 1], out_features=hidden_sizes[i])
+            )
+        layers.append(nn.ReLU())
+    layers.append(nn.Linear(in_features=hidden_sizes[-1], out_features=output_size))
+    return nn.Sequential(*layers)
+
+
+class CustomConv1d(torch.nn.Conv1d):
+    def __init(self, in_channels, out_channels, kernel_size, padding):
+        super().__init__(in_channels, out_channels, kernel_size, padding)
+
+    def forward(self, x):
+        if len(x.shape) > 2:
+            shape = list(np.arange(len(x.shape)))
+            new_shape = [0, shape[-1]] + shape[1:-1]
+            out = super(CustomConv1d, self).forward(x.permute(*new_shape))
+            shape = list(np.arange(len(out.shape)))
+            new_shape = [0, shape[-1]] + shape[1:-1]
+            if self.kernel_size[0] % 2 == 0:
+                out = F.pad(out, (0, -1), "constant", 0)
+            return out.permute(new_shape)
+
+        return super(CustomConv1d, self).forward(x)
+
+
+def make_cnn(input_size, output_size, hidden_sizes, kernel_size=3):
+    """This function used to construct neural network consisting of
+       one 1d-convolutional layer that utilizes temporal dependencies,
+       fully connected network
+
+    Parameters
+    ----------
+    input_size : int,
+        the dimension of input embeddings
+
+    output_size : int,
+        the dimension of out embeddings
+
+    hidden_sizes : tuple,
+        the tuple of hidden layer sizes, and the tuple length sets the number of hidden layers,
+
+    kernel_size : int
+        kernel size for convolutional layer
+
+    Returns
+    -------
+    output: tensor
+        the processing embeddings
+    """
+    padding = kernel_size // 2
+
+    cnn_layer = CustomConv1d(
+        input_size, hidden_sizes[0], kernel_size=kernel_size, padding=padding
+    )
+    layers = [cnn_layer]
+
+    for i, h in zip(hidden_sizes, hidden_sizes[1:]):
+        layers.extend([nn.Linear(i, h), nn.ReLU()])
+    if isinstance(output_size, tuple):
+        net = nn.Sequential(*layers)
+        return [net] + [nn.Linear(hidden_sizes[-1], o) for o in output_size]
+
+    layers.append(nn.Linear(hidden_sizes[-1], output_size))
+    return nn.Sequential(*layers)
+
+
+class Encoder(nn.Module):
+    def __init__(self, input_size, z_size, hidden_sizes=(128, 128), window_size=24):
+        """This module is an encoder with 1d-convolutional network and multivariate Normal posterior used by GP-VAE with
+        proposed banded covariance matrix
+
+        Parameters
+        ----------
+        input_size : int,
+            the feature dimension of the input
+
+        z_size : int,
+            the feature dimension of the output latent embedding
+
+        hidden_sizes : tuple,
+            the tuple of the hidden layer sizes, and the tuple length sets the number of hidden layers
+
+        window_size : int
+            the kernel size for the Conv1D layer
+        """
+        super().__init__()
+        self.z_size = int(z_size)
+        self.input_size = input_size
+        self.net, self.mu_layer, self.logvar_layer = make_cnn(
+            input_size, (z_size, z_size * 2), hidden_sizes, window_size
+        )
+
+    def forward(self, x):
+        mapped = self.net(x)
+        batch_size = mapped.size(0)
+        time_length = mapped.size(1)
+
+        num_dim = len(mapped.shape)
+        mu = self.mu_layer(mapped)
+        logvar = self.logvar_layer(mapped)
+        mapped_mean = torch.transpose(mu, num_dim - 1, num_dim - 2)
+        mapped_covar = torch.transpose(logvar, num_dim - 1, num_dim - 2)
+        mapped_covar = torch.sigmoid(mapped_covar)
+        mapped_reshaped = mapped_covar.reshape(batch_size, self.z_size, 2 * time_length)
+
+        dense_shape = [batch_size, self.z_size, time_length, time_length]
+        idxs_1 = np.repeat(np.arange(batch_size), self.z_size * (2 * time_length - 1))
+        idxs_2 = np.tile(
+            np.repeat(np.arange(self.z_size), (2 * time_length - 1)), batch_size
+        )
+        idxs_3 = np.tile(
+            np.concatenate([np.arange(time_length), np.arange(time_length - 1)]),
+            batch_size * self.z_size,
+        )
+        idxs_4 = np.tile(
+            np.concatenate([np.arange(time_length), np.arange(1, time_length)]),
+            batch_size * self.z_size,
+        )
+        idxs_all = np.stack([idxs_1, idxs_2, idxs_3, idxs_4], axis=1)
+
+        mapped_values = mapped_reshaped[:, :, :-1].reshape(-1)
+        prec_sparse = torch.sparse_coo_tensor(
+            torch.LongTensor(idxs_all).t().to(mapped.device),
+            (mapped_values).to(mapped.device),
+            (dense_shape),
+        )
+        prec_sparse = prec_sparse.coalesce()
+        prec_tril = prec_sparse.to_dense()
+        eye = (
+            torch.eye(prec_tril.shape[-1])
+            .unsqueeze(0)
+            .repeat(prec_tril.shape[0], prec_tril.shape[1], 1, 1)
+            .to(mapped.device)
+        )
+        prec_tril = prec_tril + eye
+        cov_tril = torch.linalg.solve_triangular(prec_tril, eye, upper=True)
+        cov_tril = torch.where(
+            torch.isfinite(cov_tril), cov_tril, torch.zeros_like(cov_tril)
+        ).to(mapped.device)
+
+        num_dim = len(cov_tril.shape)
+        cov_tril_lower = torch.transpose(cov_tril, num_dim - 1, num_dim - 2)
+
+        z_dist = torch.distributions.MultivariateNormal(
+            loc=mapped_mean, scale_tril=cov_tril_lower
+        )
+        return z_dist
+
+
+class Decoder(nn.Module):
+    def __init__(self, input_size, output_size, hidden_sizes=(256, 256)):
+        """This module is a decoder with Gaussian output distribution.
+
+        Parameters
+        ----------
+        output_size : int,
+            the feature dimension of the output
+
+        hidden_sizes: tuple
+            the tuple of hidden layer sizes, and the tuple length sets the number of hidden layers.
+        """
+        super().__init__()
+        self.net = make_nn(input_size, output_size, hidden_sizes)
+
+    def forward(self, x):
+        mu = self.net(x)
+        var = torch.ones_like(mu)
+        return torch.distributions.Normal(mu, var)
diff --git a/pypots/imputation/mrnn/module.py b/pypots/imputation/mrnn/module.py
index 873d2d73..a143d121 100644
--- a/pypots/imputation/mrnn/module.py
+++ b/pypots/imputation/mrnn/module.py
@@ -18,7 +18,7 @@
 
 class FCN_Regression(nn.Module):
     def __init__(self, feature_num, rnn_hid_size):
-        super(FCN_Regression, self).__init__()
+        super().__init__()
         self.feat_reg = FeatureRegression(rnn_hid_size * 2)
         self.U = Parameter(torch.Tensor(feature_num, feature_num))
         self.V1 = Parameter(torch.Tensor(feature_num, feature_num))
diff --git a/pypots/imputation/saits/data.py b/pypots/imputation/saits/data.py
index 2fb80bc3..5ff679a5 100644
--- a/pypots/imputation/saits/data.py
+++ b/pypots/imputation/saits/data.py
@@ -88,15 +88,15 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             indicating_mask : tensor.
                 The mask indicates artificially missing values in X.
         """
-        X = self.X[idx]
+        X = self.X[idx].to(torch.float32)
         X_intact, X, missing_mask, indicating_mask = mcar(X, rate=self.rate)
 
         sample = [
             torch.tensor(idx),
-            X_intact.to(torch.float32),
-            X.to(torch.float32),
-            missing_mask.to(torch.float32),
-            indicating_mask.to(torch.float32),
+            X_intact,
+            X,
+            missing_mask,
+            indicating_mask,
         ]
 
         if self.y is not None and self.return_labels:
@@ -122,15 +122,15 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
         X_intact, X, missing_mask, indicating_mask = mcar(X, rate=self.rate)
 
         sample = [
             torch.tensor(idx),
-            X_intact.to(torch.float32),
-            X.to(torch.float32),
-            missing_mask.to(torch.float32),
-            indicating_mask.to(torch.float32),
+            X_intact,
+            X,
+            missing_mask,
+            indicating_mask,
         ]
 
         # if the dataset has labels and is for training, then fetch it from the file
diff --git a/pypots/imputation/template/dataset.py b/pypots/imputation/template/data.py
similarity index 100%
rename from pypots/imputation/template/dataset.py
rename to pypots/imputation/template/data.py
diff --git a/pypots/imputation/usgan/__init__.py b/pypots/imputation/usgan/__init__.py
new file mode 100644
index 00000000..fb388d94
--- /dev/null
+++ b/pypots/imputation/usgan/__init__.py
@@ -0,0 +1,12 @@
+"""
+The package of the partially-observed time-series imputation method USGAN.
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk>
+# License: GLP-v3
+
+from .model import USGAN
+
+__all__ = [
+    "USGAN",
+]
diff --git a/pypots/imputation/usgan/data.py b/pypots/imputation/usgan/data.py
new file mode 100644
index 00000000..bd012c30
--- /dev/null
+++ b/pypots/imputation/usgan/data.py
@@ -0,0 +1,46 @@
+"""
+Dataset class for model USGAN.
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk> and Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+from typing import Union
+
+from ..brits.data import DatasetForBRITS
+
+
+class DatasetForUSGAN(DatasetForBRITS):
+    """Dataset class for USGAN, the same with the one for BRITS.
+
+    Parameters
+    ----------
+    data : dict or str,
+        The dataset for model input, should be a dictionary including keys as 'X' and 'y',
+        or a path string locating a data file.
+        If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
+        which is time-series data for input, can contain missing values, and y should be array-like of shape
+        [n_samples], which is classification labels of X.
+        If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+        key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
+
+    return_labels : bool, default = True,
+        Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
+        during training of classification models, the Dataset class will return labels in __getitem__() for model input.
+        Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
+        need the defined Dataset class for all training/validating/testing stages. For those big datasets stored in h5
+        files, they already have both X and y saved. But we don't read labels from the file for validating and testing
+        with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
+        distinction.
+
+    file_type : str, default = "h5py"
+        The type of the given file if train_set and val_set are path strings.
+    """
+
+    def __init__(
+        self,
+        data: Union[dict, str],
+        return_labels: bool = True,
+        file_type: str = "h5py",
+    ):
+        super().__init__(data, return_labels, file_type)
diff --git a/pypots/imputation/usgan/model.py b/pypots/imputation/usgan/model.py
new file mode 100644
index 00000000..c171d810
--- /dev/null
+++ b/pypots/imputation/usgan/model.py
@@ -0,0 +1,539 @@
+"""
+The implementation of USGAN for the partially-observed time-series imputation task.
+
+Refer to the paper "Miao, X., Wu, Y., Wang, J., Gao, Y., Mao, X., & Yin, J. (2021).
+Generative Semi-supervised Learning for Multivariate Time Series Imputation. AAAI 2021."
+
+"""
+
+# Created by Jun Wang <jwangfx@connect.ust.hk> and Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+from typing import Union, Optional
+
+import h5py
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+from .data import DatasetForUSGAN
+from ..base import BaseNNImputer
+from ..brits.model import _BRITS
+from ...optim.adam import Adam
+from ...optim.base import Optimizer
+from ...utils.logging import logger
+
+
+class Discriminator(nn.Module):
+    """model Discriminator: built on BiRNN
+
+    Parameters
+    ----------
+    n_features :
+        the feature dimension of the input
+
+    rnn_hidden_size :
+        the hidden size of the RNN cell
+
+    hint_rate :
+        the hint rate for the input imputed_data
+
+    dropout_rate :
+        the dropout rate for the output layer
+
+    device :
+        specify running the model on which device, CPU/GPU
+
+    """
+
+    def __init__(
+        self,
+        n_features: int,
+        rnn_hidden_size: int,
+        hint_rate: float = 0.7,
+        dropout_rate: float = 0.0,
+        device: Union[str, torch.device] = "cpu",
+    ):
+        super().__init__()
+        self.hint_rate = hint_rate
+        self.device = device
+        self.biRNN = nn.GRU(
+            n_features * 2, rnn_hidden_size, bidirectional=True, batch_first=True
+        ).to(device)
+        self.dropout = nn.Dropout(dropout_rate).to(device)
+        self.read_out = nn.Linear(rnn_hidden_size * 2, n_features).to(device)
+
+    def forward(
+        self,
+        imputed_X: torch.Tensor,
+        missing_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        """Forward processing of USGAN Discriminator.
+
+        Parameters
+        ----------
+        imputed_X : torch.Tensor,
+            The original X with missing parts already imputed.
+
+        missing_mask : torch.Tensor,
+            The missing mask of X.
+
+        Returns
+        -------
+        logits : torch.Tensor,
+            the logits of the probability of being the true value.
+
+        """
+
+        hint = (
+            torch.rand_like(missing_mask, dtype=torch.float, device=self.device)
+            < self.hint_rate
+        )
+        hint = hint.int()
+        h = hint * missing_mask + (1 - hint) * 0.5
+        x_in = torch.cat([imputed_X, h], dim=-1)
+
+        out, _ = self.biRNN(x_in)
+        logits = self.read_out(self.dropout(out))
+        return logits
+
+
+class _USGAN(nn.Module):
+    """model USGAN:
+    USGAN consists of a generator, a discriminator,  which are all built on bidirectional recurrent neural networks.
+
+    Attributes
+    ----------
+    n_steps :
+        sequence length (number of time steps)
+
+    n_features :
+        number of features (input dimensions)
+
+    rnn_hidden_size :
+        the hidden size of the RNN cell
+
+    lambda_mse :
+        the weigth of the reconstruction loss
+
+    hint_rate :
+        the hint rate for the discriminator
+
+    dropout_rate :
+        the dropout rate for the last layer in Discriminator
+
+    device :
+        specify running the model on which device, CPU/GPU
+
+    """
+
+    def __init__(
+        self,
+        n_steps: int,
+        n_features: int,
+        rnn_hidden_size: int,
+        lambda_mse: float,
+        hint_rate: float = 0.7,
+        dropout_rate: float = 0.0,
+        device: Union[str, torch.device] = "cpu",
+    ):
+        super().__init__()
+        self.generator = _BRITS(n_steps, n_features, rnn_hidden_size, device)
+        self.discriminator = Discriminator(
+            n_features,
+            rnn_hidden_size,
+            hint_rate=hint_rate,
+            dropout_rate=dropout_rate,
+            device=device,
+        )
+
+        self.lambda_mse = lambda_mse
+        self.device = device
+
+    def forward(
+        self,
+        inputs: dict,
+        training_object: str = "generator",
+        training: bool = True,
+    ) -> dict:
+        assert training_object in [
+            "generator",
+            "discriminator",
+        ], 'training_object should be "generator" or "discriminator"'
+
+        forward_X = inputs["forward"]["X"]
+        forward_missing_mask = inputs["forward"]["missing_mask"]
+        losses = {}
+        results = self.generator(inputs, training=training)
+        inputs["discrimination"] = self.discriminator(forward_X, forward_missing_mask)
+        if not training:
+            # if only run imputation operation, then no need to calculate loss
+            return results
+
+        if training_object == "discriminator":
+            l_D = F.binary_cross_entropy_with_logits(
+                inputs["discrimination"], forward_missing_mask
+            )
+            losses["discrimination_loss"] = l_D
+        else:
+            inputs["discrimination"] = inputs["discrimination"].detach()
+            l_G = F.binary_cross_entropy_with_logits(
+                inputs["discrimination"],
+                1 - forward_missing_mask,
+                weight=1 - forward_missing_mask,
+            )
+            loss_gene = l_G + self.lambda_mse * results["loss"]
+            losses["generation_loss"] = loss_gene
+
+        losses["imputed_data"] = results["imputed_data"]
+        return losses
+
+
+class USGAN(BaseNNImputer):
+    """The PyTorch implementation of the CRLI model :cite:`ma2021CRLI`.
+
+    Parameters
+    ----------
+    n_steps :
+        The number of time steps in the time-series data sample.
+
+    n_features :
+        The number of features in the time-series data sample.
+
+    rnn_hidden_size :
+        the hidden size of the RNN cell
+
+    lambda_mse :
+        the weight of the reconstruction loss
+
+    hint_rate :
+        the hint rate for the discriminator
+
+    dropout_rate :
+        the dropout rate for the last layer in Discriminator
+
+    G_steps :
+        The number of steps to train the generator in each iteration.
+
+    D_steps :
+        The number of steps to train the discriminator in each iteration.
+
+    batch_size :
+        The batch size for training and evaluating the model.
+
+    epochs :
+        The number of epochs for training the model.
+
+    patience :
+        The patience for the early-stopping mechanism. Given a positive integer, the training process will be
+        stopped when the model does not perform better after that number of epochs.
+        Leaving it default as None will disable the early-stopping.
+
+    G_optimizer :
+        The optimizer for the generator training.
+        If not given, will use a default Adam optimizer.
+
+    D_optimizer :
+        The optimizer for the discriminator training.
+        If not given, will use a default Adam optimizer.
+
+    num_workers :
+        The number of subprocesses to use for data loading.
+        `0` means data loading will be in the main process, i.e. there won't be subprocesses.
+
+    device :
+        The device for the model to run on. It can be a string, a :class:`torch.device` object, or a list of them.
+        If not given, will try to use CUDA devices first (will use the default CUDA device if there are multiple),
+        then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+        If given a list of devices, e.g. ['cuda:0', 'cuda:1'], or [torch.device('cuda:0'), torch.device('cuda:1')] , the
+        model will be parallely trained on the multiple devices (so far only support parallel training on CUDA devices).
+        Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+
+    saving_path :
+        The path for automatically saving model checkpoints and tensorboard files (i.e. loss values recorded during
+        training into a tensorboard file). Will not save if not given.
+
+    model_saving_strategy :
+        The strategy to save model checkpoints. It has to be one of [None, "best", "better"].
+        No model will be saved when it is set as None.
+        The "best" strategy will only automatically save the best model after the training finished.
+        The "better" strategy will automatically save the model during training whenever the model performs
+        better than in previous epochs.
+
+    Attributes
+    ----------
+    model : :class:`torch.nn.Module`
+        The underlying CRLI model.
+
+    optimizer : :class:`pypots.optim.Optimizer`
+        The optimizer for model training.
+
+    """
+
+    def __init__(
+        self,
+        n_steps: int,
+        n_features: int,
+        rnn_hidden_size: int,
+        lambda_mse: float = 1,
+        hint_rate: float = 0.7,
+        dropout_rate: float = 0.0,
+        G_steps: int = 1,
+        D_steps: int = 1,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
+        G_optimizer: Optional[Optimizer] = Adam(),
+        D_optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
+        device: Optional[Union[str, torch.device, list]] = None,
+        saving_path: Optional[str] = None,
+        model_saving_strategy: Optional[str] = "best",
+    ):
+        super().__init__(
+            batch_size,
+            epochs,
+            patience,
+            num_workers,
+            device,
+            saving_path,
+            model_saving_strategy,
+        )
+        assert G_steps > 0 and D_steps > 0, "G_steps and D_steps should both >0"
+
+        self.n_steps = n_steps
+        self.n_features = n_features
+        self.G_steps = G_steps
+        self.D_steps = D_steps
+
+        # set up the model
+        self.model = _USGAN(
+            n_steps,
+            n_features,
+            rnn_hidden_size,
+            lambda_mse,
+            hint_rate,
+            dropout_rate,
+            self.device,
+        )
+        self._send_model_to_given_device()
+        self._print_model_size()
+
+        # set up the optimizer
+        self.G_optimizer = G_optimizer
+        self.G_optimizer.init_optimizer(self.model.generator.parameters())
+        self.D_optimizer = D_optimizer
+        self.D_optimizer.init_optimizer(self.model.discriminator.parameters())
+
+    def _assemble_input_for_training(self, data: list) -> dict:
+        # fetch data
+        (
+            indices,
+            X,
+            missing_mask,
+            deltas,
+            back_X,
+            back_missing_mask,
+            back_deltas,
+        ) = self._send_data_to_given_device(data)
+
+        # assemble input data
+        inputs = {
+            "indices": indices,
+            "forward": {
+                "X": X,
+                "missing_mask": missing_mask,
+                "deltas": deltas,
+            },
+            "backward": {
+                "X": back_X,
+                "missing_mask": back_missing_mask,
+                "deltas": back_deltas,
+            },
+        }
+
+        return inputs
+
+    def _assemble_input_for_validating(self, data: list) -> dict:
+        return self._assemble_input_for_training(data)
+
+    def _assemble_input_for_testing(self, data: list) -> dict:
+        return self._assemble_input_for_validating(data)
+
+    def _train_model(
+        self,
+        training_loader: DataLoader,
+        val_loader: DataLoader = None,
+    ) -> None:
+        # each training starts from the very beginning, so reset the loss and model dict here
+        self.best_loss = float("inf")
+        self.best_model_dict = None
+
+        try:
+            training_step = 0
+            epoch_train_loss_G_collector = []
+            epoch_train_loss_D_collector = []
+            for epoch in range(self.epochs):
+                self.model.train()
+                for idx, data in enumerate(training_loader):
+                    training_step += 1
+                    inputs = self._assemble_input_for_training(data)
+
+                    step_train_loss_G_collector = []
+                    step_train_loss_D_collector = []
+
+                    if idx % self.G_steps == 0:
+                        self.G_optimizer.zero_grad()
+                        results = self.model.forward(
+                            inputs, training_object="generator"
+                        )
+                        results["generation_loss"].backward()
+                        self.G_optimizer.step()
+                        step_train_loss_G_collector.append(
+                            results["generation_loss"].item()
+                        )
+
+                    if idx % self.D_steps == 0:
+                        self.D_optimizer.zero_grad()
+                        results = self.model.forward(
+                            inputs, training_object="discriminator"
+                        )
+                        results["discrimination_loss"].backward(retain_graph=True)
+                        self.D_optimizer.step()
+                        step_train_loss_D_collector.append(
+                            results["discrimination_loss"].item()
+                        )
+
+                    mean_step_train_D_loss = np.mean(step_train_loss_D_collector)
+                    mean_step_train_G_loss = np.mean(step_train_loss_G_collector)
+
+                    epoch_train_loss_D_collector.append(mean_step_train_D_loss)
+                    epoch_train_loss_G_collector.append(mean_step_train_G_loss)
+
+                    # save training loss logs into the tensorboard file for every step if in need
+                    # Note: the `training_step` is not the actual number of steps that Discriminator and Generator get
+                    # trained, the actual number should be D_steps*training_step and G_steps*training_step accordingly
+                    if self.summary_writer is not None:
+                        loss_results = {
+                            "generation_loss": mean_step_train_G_loss,
+                            "discrimination_loss": mean_step_train_D_loss,
+                        }
+                        self._save_log_into_tb_file(
+                            training_step, "training", loss_results
+                        )
+                mean_epoch_train_D_loss = np.mean(epoch_train_loss_D_collector)
+                mean_epoch_train_G_loss = np.mean(epoch_train_loss_G_collector)
+                logger.info(
+                    f"epoch {epoch}: "
+                    f"training loss_generator {mean_epoch_train_G_loss:.4f}, "
+                    f"train loss_discriminator {mean_epoch_train_D_loss:.4f}"
+                )
+                mean_loss = mean_epoch_train_G_loss
+
+                if mean_loss < self.best_loss:
+                    self.best_loss = mean_loss
+                    self.best_model_dict = self.model.state_dict()
+                    self.patience = self.original_patience
+                    # save the model if necessary
+                    self._auto_save_model_if_necessary(
+                        training_finished=False,
+                        saving_name=f"{self.__class__.__name__}_epoch{epoch}_loss{mean_loss}",
+                    )
+                else:
+                    self.patience -= 1
+                    if self.patience == 0:
+                        logger.info(
+                            "Exceeded the training patience. Terminating the training procedure..."
+                        )
+                        break
+        except Exception as e:
+            logger.error(f"Exception: {e}")
+            if self.best_model_dict is None:
+                raise RuntimeError(
+                    "Training got interrupted. Model was not trained. Please investigate the error printed above."
+                )
+            else:
+                RuntimeWarning(
+                    "Training got interrupted. Please investigate the error printed above.\n"
+                    "Model got trained and will load the best checkpoint so far for testing.\n"
+                    "If you don't want it, please try fit() again."
+                )
+
+        if np.equal(self.best_loss, float("inf")):
+            raise ValueError("Something is wrong. best_loss is Nan after training.")
+
+        logger.info("Finished training.")
+
+    def fit(
+        self,
+        train_set: Union[dict, str],
+        val_set: Optional[Union[dict, str]] = None,
+        file_type: str = "h5py",
+    ) -> None:
+        # Step 1: wrap the input data with classes Dataset and DataLoader
+        training_set = DatasetForUSGAN(
+            train_set, return_labels=False, file_type=file_type
+        )
+        training_loader = DataLoader(
+            training_set,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
+        )
+        val_loader = None
+        if val_set is not None:
+            if isinstance(val_set, str):
+                with h5py.File(val_set, "r") as hf:
+                    # Here we read the whole validation set from the file to mask a portion for validation.
+                    # In PyPOTS, using a file usually because the data is too big. However, the validation set is
+                    # generally shouldn't be too large. For example, we have 1 billion samples for model training.
+                    # We won't take 20% of them as the validation set because we want as much as possible data for the
+                    # training stage to enhance the model's generalization ability. Therefore, 100,000 representative
+                    # samples will be enough to validate the model.
+                    val_set = {
+                        "X": hf["X"][:],
+                        "X_intact": hf["X_intact"][:],
+                        "indicating_mask": hf["indicating_mask"][:],
+                    }
+            val_set = DatasetForUSGAN(val_set, return_labels=False, file_type=file_type)
+            val_loader = DataLoader(
+                val_set,
+                batch_size=self.batch_size,
+                shuffle=False,
+                num_workers=self.num_workers,
+            )
+
+        # Step 2: train the model and freeze it
+        self._train_model(training_loader, val_loader)
+        self.model.load_state_dict(self.best_model_dict)
+        self.model.eval()  # set the model as eval status to freeze it.
+
+        # Step 3: save the model if necessary
+        self._auto_save_model_if_necessary(training_finished=True)
+
+    def impute(
+        self,
+        X: Union[dict, str],
+        file_type="h5py",
+    ) -> np.ndarray:
+        self.model.eval()  # set the model as eval status to freeze it.
+        test_set = DatasetForUSGAN(X, return_labels=False, file_type=file_type)
+        test_loader = DataLoader(
+            test_set,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers,
+        )
+        imputation_collector = []
+
+        with torch.no_grad():
+            for idx, data in enumerate(test_loader):
+                inputs = self._assemble_input_for_testing(data)
+                results = self.model.forward(inputs, training=False)
+                imputed_data = results["imputed_data"]
+                imputation_collector.append(imputed_data)
+
+        imputation_collector = torch.cat(imputation_collector)
+        return imputation_collector.cpu().detach().numpy()
diff --git a/pypots/utils/metrics.py b/pypots/utils/metrics.py
index 85efb54d..cc349b50 100644
--- a/pypots/utils/metrics.py
+++ b/pypots/utils/metrics.py
@@ -574,73 +574,90 @@ def cal_cluster_purity(
     return cluster_purity
 
 
-def cal_silhouette(
-    latent_rep: np.ndarray,
-    class_predictions: np.ndarray
-) -> float:
+def cal_silhouette(X: np.ndarray, predicted_labels: np.ndarray) -> float:
     """Compute the mean Silhouette Coefficient of all samples.
 
     Parameters
     ----------
-    latent_rep :
-        Latent representation learned by a clusterer.
+    X : array-like of shape (n_samples_a, n_features)
+        A feature array, or learned latent representation, that can be used for clustering.
 
-    class_predictions :
-        Clustering results returned by a clusterer.
+    predicted_labels : array-like of shape (n_samples)
+        Predicted labels for each sample.
 
     Returns
     -------
-    silhouette :
+    silhouette_score : float
         Mean Silhouette Coefficient for all samples.
 
     """
-    silhouette = metrics.silhouette_score(latent_rep, class_predictions)
-    return silhouette
+    silhouette_score = metrics.silhouette_score(X, predicted_labels)
+    return silhouette_score
 
 
-def cal_chs(
-    latent_rep: np.ndarray,
-    class_predictions: np.ndarray
-) -> float:
+def cal_chs(X: np.ndarray, predicted_labels: np.ndarray) -> float:
     """Compute the Calinski and Harabasz score (also known as the Variance Ratio Criterion).
 
-    Parameters
-    ----------
-    latent_rep :
-        Latent representation learned by a clusterer.
-
-    class_predictions :
-        Clustering results returned by a clusterer.
+    X : array-like of shape (n_samples_a, n_features)
+        A feature array, or learned latent representation, that can be used for clustering.
 
+    predicted_labels : array-like of shape (n_samples)
+        Predicted labels for each sample.
     Returns
     -------
-    chs :
+    calinski_harabasz_score : float
         The resulting Calinski-Harabasz score.
 
     """
-    chs = metrics.calinski_harabasz_score(latent_rep, class_predictions)
-    return chs
+    calinski_harabasz_score = metrics.calinski_harabasz_score(X, predicted_labels)
+    return calinski_harabasz_score
 
 
-def cal_dbs(
-    latent_rep: np.ndarray,
-    class_predictions: np.ndarray
-) -> float:
+def cal_dbs(X: np.ndarray, predicted_labels: np.ndarray) -> float:
     """Compute the Davies-Bouldin score.
 
     Parameters
     ----------
-    latent_rep :
-        Latent representation learned by a clusterer.
+    X : array-like of shape (n_samples_a, n_features)
+        A feature array, or learned latent representation, that can be used for clustering.
 
-    class_predictions :
-        Clustering results returned by a clusterer.
+    predicted_labels : array-like of shape (n_samples)
+        Predicted labels for each sample.
 
     Returns
     -------
-    dbs :
+    davies_bouldin_score : float
         The resulting Davies-Bouldin score.
 
     """
-    dbs = metrics.davies_bouldin_score(latent_rep, class_predictions)
-    return dbs
+    davies_bouldin_score = metrics.davies_bouldin_score(X, predicted_labels)
+    return davies_bouldin_score
+
+
+def cal_internal_cluster_validation_metrics(X, predicted_labels):
+    """Computer all internal cluster validation metrics available in PyPOTS and return as a dictionary.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples_a, n_features)
+        A feature array, or learned latent representation, that can be used for clustering.
+
+    predicted_labels : array-like of shape (n_samples)
+        Predicted labels for each sample.
+
+    Returns
+    -------
+    internal_cluster_validation_metrics : dict
+        A dictionary contains all internal cluster validation metrics available in PyPOTS.
+    """
+
+    silhouette_score = cal_silhouette(X, predicted_labels)
+    calinski_harabasz_score = cal_chs(X, predicted_labels)
+    davies_bouldin_score = cal_dbs(X, predicted_labels)
+
+    internal_cluster_validation_metrics = {
+        "silhouette_score": silhouette_score,
+        "calinski_harabasz_score": calinski_harabasz_score,
+        "davies_bouldin_score": davies_bouldin_score,
+    }
+    return internal_cluster_validation_metrics
diff --git a/tests/classification/__init__.py b/tests/classification/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/classification/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/classification/brits.py b/tests/classification/brits.py
new file mode 100644
index 00000000..b1905c39
--- /dev/null
+++ b/tests/classification/brits.py
@@ -0,0 +1,106 @@
+"""
+Test cases for BRITS classification model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+import unittest
+
+import pytest
+
+from pypots.classification import BRITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_binary_classification_metrics
+from tests.classification.config import (
+    EPOCHS,
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_CLASSIFICATION,
+)
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestBRITS(unittest.TestCase):
+    logger.info("Running tests for a classification model BRITS...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS")
+    model_save_name = "saved_BRITS_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a BRITS model
+    brits = BRITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_classes=DATA["n_classes"],
+        rnn_hidden_size=256,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        model_saving_strategy="better",
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="classification-brits")
+    def test_0_fit(self):
+        self.brits.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="classification-brits")
+    def test_1_classify(self):
+        predictions = self.brits.classify(TEST_SET)
+        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
+        logger.info(
+            f'ROC_AUC: {metrics["roc_auc"]}, \n'
+            f'PR_AUC: {metrics["pr_auc"]},\n'
+            f'F1: {metrics["f1"]},\n'
+            f'Precision: {metrics["precision"]},\n'
+            f'Recall: {metrics["recall"]},\n'
+        )
+        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
+
+    @pytest.mark.xdist_group(name="classification-brits")
+    def test_2_parameters(self):
+        assert hasattr(self.brits, "model") and self.brits.model is not None
+
+        assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
+
+        assert hasattr(self.brits, "best_loss")
+        self.assertNotEqual(self.brits.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.brits, "best_model_dict")
+            and self.brits.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="classification-brits")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.brits)
+
+        # save the trained model into file, and check if the path exists
+        self.brits.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.brits.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/classification/config.py b/tests/classification/config.py
new file mode 100644
index 00000000..35b17029
--- /dev/null
+++ b/tests/classification/config.py
@@ -0,0 +1,21 @@
+"""
+Test configs for classification models.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+    DATA,
+    RESULT_SAVING_DIR,
+)
+
+EPOCHS = 5
+
+TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]}
+VAL_SET = {"X": DATA["val_X"], "y": DATA["val_y"]}
+TEST_SET = {"X": DATA["test_X"]}
+
+RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
diff --git a/tests/classification/grud.py b/tests/classification/grud.py
new file mode 100644
index 00000000..a662cb70
--- /dev/null
+++ b/tests/classification/grud.py
@@ -0,0 +1,105 @@
+"""
+Test cases for GRUD classification model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+import unittest
+
+import pytest
+
+from pypots.classification import GRUD
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_binary_classification_metrics
+from tests.classification.config import (
+    EPOCHS,
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_CLASSIFICATION,
+)
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestGRUD(unittest.TestCase):
+    logger.info("Running tests for a classification model GRUD...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD")
+    model_save_name = "saved_GRUD_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a GRUD model
+    grud = GRUD(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_classes=DATA["n_classes"],
+        rnn_hidden_size=256,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="classification-grud")
+    def test_0_fit(self):
+        self.grud.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="classification-grud")
+    def test_1_classify(self):
+        predictions = self.grud.classify(TEST_SET)
+        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
+        logger.info(
+            f'ROC_AUC: {metrics["roc_auc"]}, \n'
+            f'PR_AUC: {metrics["pr_auc"]},\n'
+            f'F1: {metrics["f1"]},\n'
+            f'Precision: {metrics["precision"]},\n'
+            f'Recall: {metrics["recall"]},\n'
+        )
+        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
+
+    @pytest.mark.xdist_group(name="classification-grud")
+    def test_2_parameters(self):
+        assert hasattr(self.grud, "model") and self.grud.model is not None
+
+        assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None
+
+        assert hasattr(self.grud, "best_loss")
+        self.assertNotEqual(self.grud.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.grud, "best_model_dict")
+            and self.grud.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="classification-grud")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.grud)
+
+        # save the trained model into file, and check if the path exists
+        self.grud.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.grud.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/classification/raindrop.py b/tests/classification/raindrop.py
new file mode 100644
index 00000000..277164dc
--- /dev/null
+++ b/tests/classification/raindrop.py
@@ -0,0 +1,110 @@
+"""
+Test cases for Raindrop classification model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+import unittest
+
+import pytest
+
+from pypots.classification import Raindrop
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_binary_classification_metrics
+from tests.classification.config import (
+    EPOCHS,
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_CLASSIFICATION,
+)
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestRaindrop(unittest.TestCase):
+    logger.info("Running tests for a classification model Raindrop...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop")
+    model_save_name = "saved_Raindrop_model.pypots"
+
+    # initialize a Raindrop model
+    raindrop = Raindrop(
+        DATA["n_steps"],
+        DATA["n_features"],
+        DATA["n_classes"],
+        n_layers=2,
+        d_model=DATA["n_features"] * 4,
+        d_inner=256,
+        n_heads=2,
+        dropout=0.3,
+        d_static=0,
+        aggregation="mean",
+        sensor_wise_mask=False,
+        static=False,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="classification-raindrop")
+    def test_0_fit(self):
+        self.raindrop.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="classification-raindrop")
+    def test_1_classify(self):
+        predictions = self.raindrop.classify(TEST_SET)
+        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
+        logger.info(
+            f'ROC_AUC: {metrics["roc_auc"]}, \n'
+            f'PR_AUC: {metrics["pr_auc"]},\n'
+            f'F1: {metrics["f1"]},\n'
+            f'Precision: {metrics["precision"]},\n'
+            f'Recall: {metrics["recall"]},\n'
+        )
+        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
+
+    @pytest.mark.xdist_group(name="classification-raindrop")
+    def test_2_parameters(self):
+        assert hasattr(self.raindrop, "model") and self.raindrop.model is not None
+
+        assert (
+            hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None
+        )
+
+        assert hasattr(self.raindrop, "best_loss")
+        self.assertNotEqual(self.raindrop.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.raindrop, "best_model_dict")
+            and self.raindrop.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="classification-raindrop")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.raindrop)
+
+        # save the trained model into file, and check if the path exists
+        self.raindrop.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.raindrop.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/cli/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/cli/config.py b/tests/cli/config.py
new file mode 100644
index 00000000..defdb211
--- /dev/null
+++ b/tests/cli/config.py
@@ -0,0 +1,11 @@
+"""
+Test configs for CLI tools.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+
+
+PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), "../../.."))
diff --git a/tests/cli/dev.py b/tests/cli/dev.py
new file mode 100644
index 00000000..4387be29
--- /dev/null
+++ b/tests/cli/dev.py
@@ -0,0 +1,92 @@
+"""
+Test cases for the functions and classes in package `pypots.cli.dev`.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+import threading
+import unittest
+from argparse import Namespace
+from copy import copy
+
+import pytest
+
+from pypots.cli.dev import dev_command_factory
+from tests.cli.config import PROJECT_ROOT_DIR
+
+
+def callback_func():
+    raise TimeoutError("Time out.")
+
+
+def time_out(interval, callback):
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            t = threading.Thread(target=func, args=args, kwargs=kwargs)
+            t.setDaemon(True)
+            t.start()
+            t.join(interval)  # wait for interval seconds
+            if t.is_alive():
+                return threading.Timer(0, callback).start()  # invoke callback()
+            else:
+                return
+
+        return wrapper
+
+    return decorator
+
+
+@pytest.mark.xfail(reason="Allow tests for CLI to fail")
+class TestPyPOTSCLIDev(unittest.TestCase):
+    # set up the default arguments
+    default_arguments = {
+        "build": False,
+        "cleanup": False,
+        "run_tests": False,
+        "k": None,
+        "show_coverage": False,
+        "lint_code": False,
+    }
+    # `pypots-cli dev` must run under the project root dir
+    os.chdir(PROJECT_ROOT_DIR)
+
+    @pytest.mark.xdist_group(name="cli-dev")
+    def test_0_build(self):
+        arguments = copy(self.default_arguments)
+        arguments["build"] = True
+        args = Namespace(**arguments)
+        dev_command_factory(args).run()
+
+    @pytest.mark.xdist_group(name="cli-dev")
+    def test_1_run_tests(self):
+        arguments = copy(self.default_arguments)
+        arguments["run_tests"] = True
+        arguments["k"] = "try_to_find_a_non_existing_test_case"
+        args = Namespace(**arguments)
+        try:
+            dev_command_factory(args).run()
+        except RuntimeError:  # try to find a non-existing test case, so RuntimeError will be raised
+            pass
+        except Exception as e:  # other exceptions will cause an error and result in failed testing
+            raise e
+
+    # Don't test --lint-code because Black will reformat the code and cause error when generating the coverage report
+    # @pytest.mark.xdist_group(name="cli-dev")
+    # def test_2_lint_code(self):
+    #     arguments = copy(self.default_arguments)
+    #     arguments["lint_code"] = True
+    #     args = Namespace(**arguments)
+    #     dev_command_factory(args).run()
+
+    @pytest.mark.xdist_group(name="cli-dev")
+    def test_3_cleanup(self):
+        arguments = copy(self.default_arguments)
+        arguments["cleanup"] = True
+        args = Namespace(**arguments)
+        dev_command_factory(args).run()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/cli/doc.py b/tests/cli/doc.py
new file mode 100644
index 00000000..85e4e190
--- /dev/null
+++ b/tests/cli/doc.py
@@ -0,0 +1,104 @@
+"""
+Test cases for the functions and classes in package `pypots.cli.doc`.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+import threading
+import unittest
+from argparse import Namespace
+from copy import copy
+
+import pytest
+
+from pypots.cli.doc import doc_command_factory
+from pypots.utils.logging import logger
+from tests.cli.config import PROJECT_ROOT_DIR
+
+
+def callback_func():
+    raise TimeoutError("Time out.")
+
+
+def time_out(interval, callback):
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            t = threading.Thread(target=func, args=args, kwargs=kwargs)
+            t.setDaemon(True)
+            t.start()
+            t.join(interval)  # wait for interval seconds
+            if t.is_alive():
+                return threading.Timer(0, callback).start()  # invoke callback()
+            else:
+                return
+
+        return wrapper
+
+    return decorator
+
+
+@pytest.mark.xfail(reason="Allow tests for CLI to fail")
+class TestPyPOTSCLIDoc(unittest.TestCase):
+    # set up the default arguments
+    default_arguments = {
+        "gene_rst": False,
+        "branch": "main",
+        "gene_html": False,
+        "view_doc": False,
+        "port": 9075,
+        "cleanup": False,
+    }
+    # `pypots-cli doc` must run under the project root dir
+    os.chdir(PROJECT_ROOT_DIR)
+
+    @pytest.mark.xdist_group(name="cli-doc")
+    def test_0_gene_rst(self):
+        arguments = copy(self.default_arguments)
+        arguments["gene_rst"] = True
+        args = Namespace(**arguments)
+        doc_command_factory(args).run()
+
+        logger.info("run again under a non-root dir")
+        try:
+            os.chdir(os.path.abspath(os.path.join(PROJECT_ROOT_DIR, "pypots")))
+            doc_command_factory(args).run()
+        except RuntimeError:  # try to run under a non-root dir, so RuntimeError will be raised
+            pass
+        except Exception as e:  # other exceptions will cause an error and result in failed testing
+            raise e
+        finally:
+            os.chdir(PROJECT_ROOT_DIR)
+
+    @pytest.mark.xdist_group(name="cli-doc")
+    def test_1_gene_html(self):
+        arguments = copy(self.default_arguments)
+        arguments["gene_html"] = True
+        args = Namespace(**arguments)
+        try:
+            doc_command_factory(args).run()
+        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
+            logger.error(e)
+
+    @pytest.mark.xdist_group(name="cli-doc")
+    @time_out(2, callback_func)  # wait for two seconds
+    def test_2_view_doc(self):
+        arguments = copy(self.default_arguments)
+        arguments["view_doc"] = True
+        args = Namespace(**arguments)
+        try:
+            doc_command_factory(args).run()
+        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
+            logger.error(e)
+
+    @pytest.mark.xdist_group(name="cli-doc")
+    def test_3_cleanup(self):
+        arguments = copy(self.default_arguments)
+        arguments["cleanup"] = True
+        args = Namespace(**arguments)
+        doc_command_factory(args).run()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/cli/env.py b/tests/cli/env.py
new file mode 100644
index 00000000..36b5b20e
--- /dev/null
+++ b/tests/cli/env.py
@@ -0,0 +1,49 @@
+"""
+Test cases for the functions and classes in package `pypots.cli.env`.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+import unittest
+from argparse import Namespace
+from copy import copy
+
+import pytest
+
+from pypots.cli.env import env_command_factory
+from pypots.utils.logging import logger
+from tests.cli.config import PROJECT_ROOT_DIR
+
+
+@pytest.mark.xfail(reason="Allow tests for CLI to fail")
+class TestPyPOTSCLIEnv(unittest.TestCase):
+    # set up the default arguments
+    default_arguments = {
+        "install": "optional",
+        "tool": "conda",
+    }
+
+    # `pypots-cli env` must run under the project root dir
+    os.chdir(PROJECT_ROOT_DIR)
+
+    @pytest.mark.xdist_group(name="cli-env")
+    def test_0_install_with_conda(self):
+        arguments = copy(self.default_arguments)
+        arguments["tool"] = "conda"
+        args = Namespace(**arguments)
+        try:
+            env_command_factory(args).run()
+        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
+            logger.error(e)
+
+    @pytest.mark.xdist_group(name="cli-env")
+    def test_1_install_with_pip(self):
+        arguments = copy(self.default_arguments)
+        arguments["tool"] = "pip"
+        args = Namespace(**arguments)
+        try:
+            env_command_factory(args).run()
+        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
+            logger.error(e)
diff --git a/tests/clustering/__init__.py b/tests/clustering/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/clustering/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/clustering/config.py b/tests/clustering/config.py
new file mode 100644
index 00000000..aa43d7dd
--- /dev/null
+++ b/tests/clustering/config.py
@@ -0,0 +1,22 @@
+"""
+Test configs for clustering models.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+    DATA,
+    RESULT_SAVING_DIR,
+)
+
+
+EPOCHS = 5
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {"X": DATA["val_X"]}
+TEST_SET = {"X": DATA["test_X"]}
+
+RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
diff --git a/tests/clustering/crli.py b/tests/clustering/crli.py
new file mode 100644
index 00000000..923911fd
--- /dev/null
+++ b/tests/clustering/crli.py
@@ -0,0 +1,103 @@
+"""
+Test cases for CRLI clustering model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+
+import os
+import unittest
+
+import pytest
+
+from pypots.clustering import CRLI
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_rand_index, cal_cluster_purity
+from tests.clustering.config import (
+    EPOCHS,
+    TRAIN_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_CLUSTERING,
+)
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestCRLI(unittest.TestCase):
+    logger.info("Running tests for a clustering model CRLI...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI")
+    model_save_name = "saved_CRLI_model.pypots"
+
+    # initialize an Adam optimizer
+    G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+    D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a CRLI model
+    crli = CRLI(
+        n_steps=DATA["n_steps"],
+        n_features=DATA["n_features"],
+        n_clusters=DATA["n_classes"],
+        n_generator_layers=2,
+        rnn_hidden_size=128,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        G_optimizer=G_optimizer,
+        D_optimizer=D_optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="clustering-crli")
+    def test_0_fit(self):
+        self.crli.fit(TRAIN_SET)
+
+    @pytest.mark.xdist_group(name="clustering-crli")
+    def test_1_parameters(self):
+        assert hasattr(self.crli, "model") and self.crli.model is not None
+
+        assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None
+        assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None
+
+        assert hasattr(self.crli, "best_loss")
+        self.assertNotEqual(self.crli.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.crli, "best_model_dict")
+            and self.crli.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="clustering-crli")
+    def test_2_cluster(self):
+        clustering = self.crli.cluster(TEST_SET)
+        RI = cal_rand_index(clustering, DATA["test_y"])
+        CP = cal_cluster_purity(clustering, DATA["test_y"])
+        logger.info(f"RI: {RI}\nCP: {CP}")
+
+    @pytest.mark.xdist_group(name="clustering-crli")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.crli)
+
+        # save the trained model into file, and check if the path exists
+        self.crli.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.crli.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_clustering.py b/tests/clustering/vader.py
similarity index 51%
rename from tests/test_clustering.py
rename to tests/clustering/vader.py
index bbd4d014..71a6a91d 100644
--- a/tests/test_clustering.py
+++ b/tests/clustering/vader.py
@@ -1,5 +1,5 @@
 """
-Test cases for clustering models.
+Test cases for VaDER clustering model.
 """
 
 # Created by Wenjie Du <wenjay.du@gmail.com>
@@ -12,94 +12,22 @@
 import numpy as np
 import pytest
 
-from pypots.clustering import VaDER, CRLI
+from pypots.clustering import VaDER
 from pypots.optim import Adam
 from pypots.utils.logging import logger
 from pypots.utils.metrics import cal_rand_index, cal_cluster_purity
+from tests.clustering.config import (
+    EPOCHS,
+    TRAIN_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_CLUSTERING,
+)
 from tests.global_test_config import (
     DATA,
-    RESULT_SAVING_DIR,
+    DEVICE,
     check_tb_and_model_checkpoints_existence,
 )
 
-EPOCHS = 5
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {"X": DATA["val_X"]}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
-
-
-class TestCRLI(unittest.TestCase):
-    logger.info("Running tests for a clustering model CRLI...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI")
-    model_save_name = "saved_CRLI_model.pypots"
-
-    # initialize an Adam optimizer
-    G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
-    D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a CRLI model
-    crli = CRLI(
-        n_steps=DATA["n_steps"],
-        n_features=DATA["n_features"],
-        n_clusters=DATA["n_classes"],
-        n_generator_layers=2,
-        rnn_hidden_size=128,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        G_optimizer=G_optimizer,
-        D_optimizer=D_optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_0_fit(self):
-        self.crli.fit(TRAIN_SET)
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_1_parameters(self):
-        assert hasattr(self.crli, "model") and self.crli.model is not None
-
-        assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None
-        assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None
-
-        assert hasattr(self.crli, "best_loss")
-        self.assertNotEqual(self.crli.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.crli, "best_model_dict")
-            and self.crli.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_2_cluster(self):
-        clustering = self.crli.cluster(TEST_SET)
-        RI = cal_rand_index(clustering, DATA["test_y"])
-        CP = cal_cluster_purity(clustering, DATA["test_y"])
-        logger.info(f"RI: {RI}\nCP: {CP}")
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.crli)
-
-        # save the trained model into file, and check if the path exists
-        self.crli.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.crli.load_model(saved_model_path)
-
 
 class TestVaDER(unittest.TestCase):
     logger.info("Running tests for a clustering model Transformer...")
@@ -120,8 +48,9 @@ class TestVaDER(unittest.TestCase):
         d_mu_stddev=5,
         pretrain_epochs=20,
         epochs=EPOCHS,
-        saving_path=saving_path,
         optimizer=optimizer,
+        saving_path=saving_path,
+        device=DEVICE,
     )
 
     @pytest.mark.xdist_group(name="clustering-vader")
diff --git a/tests/data/__init__.py b/tests/data/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/data/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/test_data.py b/tests/data/lazy_loading_strategy.py
similarity index 56%
rename from tests/test_data.py
rename to tests/data/lazy_loading_strategy.py
index 27531098..8db1080c 100644
--- a/tests/test_data.py
+++ b/tests/data/lazy_loading_strategy.py
@@ -8,31 +8,28 @@
 import os
 import unittest
 
-import h5py
 import pytest
 
 from pypots.classification import BRITS, GRUD
+from pypots.data.saving import save_dict_into_h5
 from pypots.imputation import SAITS
-from tests.global_test_config import DATA, DATA_SAVING_DIR
 from pypots.utils.logging import logger
+from tests.global_test_config import DATA, DATA_SAVING_DIR
 
-
-TRAIN_SET = f"{DATA_SAVING_DIR}/train_set.h5"
-VAL_SET = f"{DATA_SAVING_DIR}/val_set.h5"
-TEST_SET = f"{DATA_SAVING_DIR}/test_set.h5"
-IMPUTATION_TRAIN_SET = f"{DATA_SAVING_DIR}/imputation_train_set.h5"
-IMPUTATION_VAL_SET = f"{DATA_SAVING_DIR}/imputation_val_set.h5"
+TRAIN_SET_NAME = "train_set.h5"
+TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/{TRAIN_SET_NAME}"
+VAL_SET_NAME = "val_set.h5"
+VAL_SET_PATH = f"{DATA_SAVING_DIR}/{VAL_SET_NAME}"
+TEST_SET_NAME = "test_set.h5"
+TEST_SET_PATH = f"{DATA_SAVING_DIR}/{TEST_SET_NAME}"
+IMPUTATION_TRAIN_SET_NAME = "imputation_train_set.h5"
+IMPUTATION_TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/{IMPUTATION_TRAIN_SET_NAME}"
+IMPUTATION_VAL_SET_NAME = "imputation_val_set.h5"
+IMPUTATION_VAL_SET_PATH = f"{DATA_SAVING_DIR}/{IMPUTATION_VAL_SET_NAME}"
 
 EPOCHS = 1
 
 
-def save_data_set_into_h5(data, path):
-    with h5py.File(path, "w") as hf:
-        for i in data.keys():
-            tp = int if i == "y" else "float32"
-            hf.create_dataset(i, data=data[i].astype(tp))
-
-
 class TestLazyLoadingClasses(unittest.TestCase):
     logger.info("Running tests for Dataset classes with lazy-loading strategy...")
 
@@ -73,53 +70,63 @@ def test_0_save_datasets_into_files(self):
         # create the dir for saving files
         os.makedirs(DATA_SAVING_DIR, exist_ok=True)
 
-        if not os.path.exists(TRAIN_SET):
-            save_data_set_into_h5(
-                {"X": DATA["train_X"], "y": DATA["train_y"].astype(int)}, TRAIN_SET
+        if not os.path.exists(TRAIN_SET_PATH):
+            save_dict_into_h5(
+                {"X": DATA["train_X"], "y": DATA["train_y"].astype(float)},
+                DATA_SAVING_DIR,
+                TRAIN_SET_NAME,
             )
 
-        if not os.path.exists(VAL_SET):
-            save_data_set_into_h5(
-                {"X": DATA["val_X"], "y": DATA["val_y"].astype(int)}, VAL_SET
+        if not os.path.exists(VAL_SET_PATH):
+            save_dict_into_h5(
+                {"X": DATA["val_X"], "y": DATA["val_y"].astype(float)},
+                DATA_SAVING_DIR,
+                VAL_SET_NAME,
             )
 
-        if not os.path.exists(IMPUTATION_TRAIN_SET):
-            save_data_set_into_h5({"X": DATA["train_X"]}, IMPUTATION_TRAIN_SET)
+        if not os.path.exists(IMPUTATION_TRAIN_SET_PATH):
+            save_dict_into_h5(
+                {"X": DATA["train_X"]}, DATA_SAVING_DIR, IMPUTATION_TRAIN_SET_NAME
+            )
 
-        if not os.path.exists(IMPUTATION_VAL_SET):
-            save_data_set_into_h5(
+        if not os.path.exists(IMPUTATION_VAL_SET_PATH):
+            save_dict_into_h5(
                 {
                     "X": DATA["val_X"],
                     "X_intact": DATA["val_X_intact"],
                     "indicating_mask": DATA["val_X_indicating_mask"],
                 },
-                IMPUTATION_VAL_SET,
+                DATA_SAVING_DIR,
+                IMPUTATION_VAL_SET_NAME,
             )
 
-        if not os.path.exists(TEST_SET):
-            save_data_set_into_h5(
+        if not os.path.exists(TEST_SET_PATH):
+            save_dict_into_h5(
                 {
                     "X": DATA["test_X"],
                     "X_intact": DATA["test_X_intact"],
                     "indicating_mask": DATA["test_X_indicating_mask"],
                 },
-                TEST_SET,
+                DATA_SAVING_DIR,
+                TEST_SET_NAME,
             )
 
     @pytest.mark.xdist_group(name="data-lazy-loading")
     def test_1_DatasetForMIT_BaseDataset(self):
-        self.saits.fit(train_set=IMPUTATION_TRAIN_SET, val_set=IMPUTATION_VAL_SET)
-        _ = self.saits.impute(X=TEST_SET)
+        self.saits.fit(
+            train_set=IMPUTATION_TRAIN_SET_PATH, val_set=IMPUTATION_VAL_SET_PATH
+        )
+        _ = self.saits.impute(X=TEST_SET_PATH)
 
     @pytest.mark.xdist_group(name="data-lazy-loading")
     def test_2_DatasetForBRITS(self):
-        self.brits.fit(train_set=TRAIN_SET, val_set=VAL_SET)
-        _ = self.brits.classify(X=TEST_SET)
+        self.brits.fit(train_set=TRAIN_SET_PATH, val_set=VAL_SET_PATH)
+        _ = self.brits.classify(X=TEST_SET_PATH)
 
     @pytest.mark.xdist_group(name="data-lazy-loading")
     def test_3_DatasetForGRUD(self):
-        self.grud.fit(train_set=TRAIN_SET, val_set=VAL_SET)
-        _ = self.grud.classify(X=TEST_SET)
+        self.grud.fit(train_set=TRAIN_SET_PATH, val_set=VAL_SET_PATH)
+        _ = self.grud.classify(X=TEST_SET_PATH)
 
 
 if __name__ == "__main__":
diff --git a/tests/forecasting/__init__.py b/tests/forecasting/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/forecasting/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/test_forecasting.py b/tests/forecasting/bttf.py
similarity index 78%
rename from tests/test_forecasting.py
rename to tests/forecasting/bttf.py
index d2e8e14b..8e6946e7 100644
--- a/tests/test_forecasting.py
+++ b/tests/forecasting/bttf.py
@@ -1,5 +1,5 @@
 """
-Test cases for forecasting models.
+Test cases for BTTF forecasting model.
 """
 
 # Created by Wenjie Du <wenjay.du@gmail.com>
@@ -12,12 +12,13 @@
 from pypots.forecasting import BTTF
 from pypots.utils.logging import logger
 from pypots.utils.metrics import cal_mae
+from tests.forecasting.config import (
+    TEST_SET,
+    TEST_SET_INTACT,
+    N_PRED_STEP,
+)
 from tests.global_test_config import DATA
 
-EPOCHS = 5
-N_PRED_STEP = 4
-TEST_SET = {"X": DATA["test_X"][:, :-N_PRED_STEP]}
-
 
 class TestBTTF(unittest.TestCase):
     logger.info("Running tests for a forecasting model BTTF...")
@@ -37,8 +38,7 @@ class TestBTTF(unittest.TestCase):
     @pytest.mark.xdist_group(name="forecasting-bttf")
     def test_0_forecasting(self):
         predictions = self.bttf.forecast(TEST_SET)
-        logger.info(f"prediction shape: {predictions.shape}")
-        mae = cal_mae(predictions, DATA["test_X_intact"][:, -N_PRED_STEP:])
+        mae = cal_mae(predictions, TEST_SET_INTACT["X"][:, -N_PRED_STEP:])
         logger.info(f"prediction MAE: {mae}")
 
 
diff --git a/tests/forecasting/config.py b/tests/forecasting/config.py
new file mode 100644
index 00000000..0a2a9e78
--- /dev/null
+++ b/tests/forecasting/config.py
@@ -0,0 +1,23 @@
+"""
+Test configs for forecasting models.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+    DATA,
+    RESULT_SAVING_DIR,
+)
+
+EPOCHS = 5
+N_PRED_STEP = 4
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {"X": DATA["val_X"]}
+TEST_SET = {"X": DATA["test_X"][:, :-N_PRED_STEP]}
+TEST_SET_INTACT = {"X": DATA["test_X_intact"]}
+
+RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "forecasting")
diff --git a/tests/global_test_config.py b/tests/global_test_config.py
index f3349483..5e152734 100644
--- a/tests/global_test_config.py
+++ b/tests/global_test_config.py
@@ -7,7 +7,10 @@
 
 import os
 
+import torch
+
 from pypots.data.generating import gene_incomplete_random_walk_dataset
+from pypots.utils.logging import logger
 
 # Generate the unified data for testing and cache it first, DATA here is a singleton
 # Otherwise, file lock will cause bug if running test parallely with pytest-xdist.
@@ -20,6 +23,16 @@
 RESULT_SAVING_DIR = "testing_results"
 
 
+# set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes
+cuda_devices = [torch.device(i) for i in range(torch.cuda.device_count())]
+if len(cuda_devices) > 2:
+    logger.info("❗️Detected multiple cuda devices, using all of them to run testing.")
+    DEVICE = cuda_devices
+else:
+    # if having no multiple cuda devices, leave it as None to use the default device
+    DEVICE = None
+
+
 def check_tb_and_model_checkpoints_existence(model):
     # check the tensorboard file existence
     saved_files = os.listdir(model.saving_path)
diff --git a/tests/imputation/__init__.py b/tests/imputation/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/imputation/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/imputation/brits.py b/tests/imputation/brits.py
new file mode 100644
index 00000000..bf0a70c3
--- /dev/null
+++ b/tests/imputation/brits.py
@@ -0,0 +1,104 @@
+"""
+Test cases for BRITS imputation model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import BRITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_IMPUTATION,
+    EPOCHS,
+)
+
+
+class TestBRITS(unittest.TestCase):
+    logger.info("Running tests for an imputation model BRITS...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS")
+    model_save_name = "saved_BRITS_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a BRITS model
+    brits = BRITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        256,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="imputation-brits")
+    def test_0_fit(self):
+        self.brits.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="imputation-brits")
+    def test_1_impute(self):
+        imputed_X = self.brits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"BRITS test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-brits")
+    def test_2_parameters(self):
+        assert hasattr(self.brits, "model") and self.brits.model is not None
+
+        assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
+
+        assert hasattr(self.brits, "best_loss")
+        self.assertNotEqual(self.brits.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.brits, "best_model_dict")
+            and self.brits.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="imputation-brits")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.brits)
+
+        # save the trained model into file, and check if the path exists
+        self.brits.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.brits.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/imputation/config.py b/tests/imputation/config.py
new file mode 100644
index 00000000..c225598b
--- /dev/null
+++ b/tests/imputation/config.py
@@ -0,0 +1,25 @@
+"""
+Test configs for imputation models.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import os
+
+from tests.global_test_config import (
+    DATA,
+    RESULT_SAVING_DIR,
+)
+
+EPOCHS = 5
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {
+    "X": DATA["val_X"],
+    "X_intact": DATA["val_X_intact"],
+    "indicating_mask": DATA["val_X_indicating_mask"],
+}
+TEST_SET = {"X": DATA["test_X"]}
+
+RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
diff --git a/tests/imputation/gpvae.py b/tests/imputation/gpvae.py
new file mode 100644
index 00000000..9c59c5b2
--- /dev/null
+++ b/tests/imputation/gpvae.py
@@ -0,0 +1,104 @@
+"""
+Test cases for GP-VAE imputation model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import GPVAE
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_IMPUTATION,
+    EPOCHS,
+)
+
+
+class TestGPVAE(unittest.TestCase):
+    logger.info("Running tests for an imputation model GP-VAE...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "GP-VAE")
+    model_save_name = "saved_GPVAE_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a GP-VAE model
+    gp_vae = GPVAE(
+        DATA["n_steps"],
+        DATA["n_features"],
+        256,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="imputation-gpvae")
+    def test_0_fit(self):
+        self.gp_vae.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="imputation-gpvae")
+    def test_1_impute(self):
+        imputed_X = self.gp_vae.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"GP-VAE test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-gpvae")
+    def test_2_parameters(self):
+        assert hasattr(self.gp_vae, "model") and self.gp_vae.model is not None
+
+        assert hasattr(self.gp_vae, "optimizer") and self.gp_vae.optimizer is not None
+
+        assert hasattr(self.gp_vae, "best_loss")
+        self.assertNotEqual(self.gp_vae.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.gp_vae, "best_model_dict")
+            and self.gp_vae.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="imputation-gpvae")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.gp_vae)
+
+        # save the trained model into file, and check if the path exists
+        self.gp_vae.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.gp_vae.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/imputation/locf.py b/tests/imputation/locf.py
new file mode 100644
index 00000000..8e54fbe0
--- /dev/null
+++ b/tests/imputation/locf.py
@@ -0,0 +1,46 @@
+"""
+Test cases for LOCF imputation method.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import LOCF
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+)
+from tests.imputation.config import (
+    TEST_SET,
+)
+
+
+class TestLOCF(unittest.TestCase):
+    logger.info("Running tests for an imputation model LOCF...")
+    locf = LOCF(nan=0)
+
+    @pytest.mark.xdist_group(name="imputation-locf")
+    def test_0_impute(self):
+        test_X_imputed = self.locf.impute(TEST_SET)
+        assert not np.isnan(
+            test_X_imputed
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"LOCF test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-locf")
+    def test_1_parameters(self):
+        assert hasattr(self.locf, "nan") and self.locf.nan is not None
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/imputation/mrnn.py b/tests/imputation/mrnn.py
new file mode 100644
index 00000000..681a9121
--- /dev/null
+++ b/tests/imputation/mrnn.py
@@ -0,0 +1,104 @@
+"""
+Test cases for MRNN imputation model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import MRNN
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_IMPUTATION,
+    EPOCHS,
+)
+
+
+class TestMRNN(unittest.TestCase):
+    logger.info("Running tests for an imputation model MRNN...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN")
+    model_save_name = "saved_MRNN_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a MRNN model
+    mrnn = MRNN(
+        DATA["n_steps"],
+        DATA["n_features"],
+        256,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="imputation-mrnn")
+    def test_0_fit(self):
+        self.mrnn.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="imputation-mrnn")
+    def test_1_impute(self):
+        imputed_X = self.mrnn.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"MRNN test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-mrnn")
+    def test_2_parameters(self):
+        assert hasattr(self.mrnn, "model") and self.mrnn.model is not None
+
+        assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None
+
+        assert hasattr(self.mrnn, "best_loss")
+        self.assertNotEqual(self.mrnn.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.mrnn, "best_model_dict")
+            and self.mrnn.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="imputation-mrnn")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.mrnn)
+
+        # save the trained model into file, and check if the path exists
+        self.mrnn.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.mrnn.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/imputation/saits.py b/tests/imputation/saits.py
new file mode 100644
index 00000000..647e8657
--- /dev/null
+++ b/tests/imputation/saits.py
@@ -0,0 +1,110 @@
+"""
+Test cases for SAITS imputation model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_IMPUTATION,
+    EPOCHS,
+)
+
+
+class TestSAITS(unittest.TestCase):
+    logger.info("Running tests for an imputation model SAITS...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS")
+    model_save_name = "saved_saits_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=2,
+        d_model=256,
+        d_inner=128,
+        n_heads=4,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="imputation-saits")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="imputation-saits")
+    def test_1_impute(self):
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-saits")
+    def test_2_parameters(self):
+        assert hasattr(self.saits, "model") and self.saits.model is not None
+
+        assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None
+
+        assert hasattr(self.saits, "best_loss")
+        self.assertNotEqual(self.saits.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.saits, "best_model_dict")
+            and self.saits.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="imputation-saits")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.saits)
+
+        # save the trained model into file, and check if the path exists
+        self.saits.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.saits.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/imputation/transformer.py b/tests/imputation/transformer.py
new file mode 100644
index 00000000..965b2cf7
--- /dev/null
+++ b/tests/imputation/transformer.py
@@ -0,0 +1,113 @@
+"""
+Test cases for Transformer imputation model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import Transformer
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_IMPUTATION,
+    EPOCHS,
+)
+
+
+class TestTransformer(unittest.TestCase):
+    logger.info("Running tests for an imputation model Transformer...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer")
+    model_save_name = "saved_transformer_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a Transformer model
+    transformer = Transformer(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=2,
+        d_model=256,
+        d_inner=128,
+        n_heads=4,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="imputation-transformer")
+    def test_0_fit(self):
+        self.transformer.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="imputation-transformer")
+    def test_1_impute(self):
+        imputed_X = self.transformer.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"Transformer test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-transformer")
+    def test_2_parameters(self):
+        assert hasattr(self.transformer, "model") and self.transformer.model is not None
+
+        assert (
+            hasattr(self.transformer, "optimizer")
+            and self.transformer.optimizer is not None
+        )
+
+        assert hasattr(self.transformer, "best_loss")
+        self.assertNotEqual(self.transformer.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.transformer, "best_model_dict")
+            and self.transformer.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="imputation-transformer")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.transformer)
+
+        # save the trained model into file, and check if the path exists
+        self.transformer.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.transformer.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/imputation/usgan.py b/tests/imputation/usgan.py
new file mode 100644
index 00000000..c91a17a1
--- /dev/null
+++ b/tests/imputation/usgan.py
@@ -0,0 +1,111 @@
+"""
+Test cases for US-GAN imputation model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import USGAN
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import (
+    DATA,
+    DEVICE,
+    check_tb_and_model_checkpoints_existence,
+)
+from tests.imputation.config import (
+    TRAIN_SET,
+    VAL_SET,
+    TEST_SET,
+    RESULT_SAVING_DIR_FOR_IMPUTATION,
+    EPOCHS,
+)
+
+
+class TestUSGAN(unittest.TestCase):
+    logger.info("Running tests for an imputation model US-GAN...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "US-GAN")
+    model_save_name = "saved_USGAN_model.pypots"
+
+    # initialize an Adam optimizer
+    G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+    D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a US-GAN model
+    us_gan = USGAN(
+        DATA["n_steps"],
+        DATA["n_features"],
+        256,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        G_optimizer=G_optimizer,
+        D_optimizer=D_optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="imputation-usgan")
+    def test_0_fit(self):
+        self.us_gan.fit(TRAIN_SET, VAL_SET)
+
+    @pytest.mark.xdist_group(name="imputation-usgan")
+    def test_1_impute(self):
+        imputed_X = self.us_gan.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"US-GAN test_MAE: {test_MAE}")
+
+    @pytest.mark.xdist_group(name="imputation-usgan")
+    def test_2_parameters(self):
+        assert hasattr(self.us_gan, "model") and self.us_gan.model is not None
+
+        assert (
+            hasattr(self.us_gan, "G_optimizer") and self.us_gan.G_optimizer is not None
+        )
+        assert (
+            hasattr(self.us_gan, "D_optimizer") and self.us_gan.D_optimizer is not None
+        )
+
+        assert hasattr(self.us_gan, "best_loss")
+        self.assertNotEqual(self.us_gan.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.us_gan, "best_model_dict")
+            and self.us_gan.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="imputation-usgan")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.us_gan)
+
+        # save the trained model into file, and check if the path exists
+        self.us_gan.save_model(
+            saving_dir=self.saving_path, file_name=self.model_save_name
+        )
+
+        # test loading the saved model, not necessary, but need to test
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.us_gan.load_model(saved_model_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/optim/__init__.py b/tests/optim/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/optim/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/optim/adadelta.py b/tests/optim/adadelta.py
new file mode 100644
index 00000000..b69e5ea4
--- /dev/null
+++ b/tests/optim/adadelta.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer Adadelta.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adadelta
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdadelta(unittest.TestCase):
+    logger.info("Running tests for Adadelta...")
+
+    # initialize an Adadelta optimizer
+    adadelta = Adadelta(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=1,
+        d_model=128,
+        d_inner=64,
+        n_heads=2,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        optimizer=adadelta,
+        epochs=EPOCHS,
+    )
+
+    @pytest.mark.xdist_group(name="optim-adadelta")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/optim/adagrad.py b/tests/optim/adagrad.py
new file mode 100644
index 00000000..21b4696a
--- /dev/null
+++ b/tests/optim/adagrad.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer Adagrad.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adagrad
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdagrad(unittest.TestCase):
+    logger.info("Running tests for Adagrad...")
+
+    # initialize an Adagrad optimizer
+    adagrad = Adagrad(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=1,
+        d_model=128,
+        d_inner=64,
+        n_heads=2,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        optimizer=adagrad,
+        epochs=EPOCHS,
+    )
+
+    @pytest.mark.xdist_group(name="optim-adagrad")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/optim/adam.py b/tests/optim/adam.py
new file mode 100644
index 00000000..448f92b9
--- /dev/null
+++ b/tests/optim/adam.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer Adam.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdam(unittest.TestCase):
+    logger.info("Running tests for Adam...")
+
+    # initialize an Adam optimizer
+    adam = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=1,
+        d_model=128,
+        d_inner=64,
+        n_heads=2,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        optimizer=adam,
+        epochs=EPOCHS,
+    )
+
+    @pytest.mark.xdist_group(name="optim-adam")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/optim/adamw.py b/tests/optim/adamw.py
new file mode 100644
index 00000000..a7941f43
--- /dev/null
+++ b/tests/optim/adamw.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer AdamW.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import AdamW
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestAdamW(unittest.TestCase):
+    logger.info("Running tests for AdamW...")
+
+    # initialize an AdamW optimizer
+    adamw = AdamW(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=1,
+        d_model=128,
+        d_inner=64,
+        n_heads=2,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        optimizer=adamw,
+        epochs=EPOCHS,
+    )
+
+    @pytest.mark.xdist_group(name="optim-adamw")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/optim/config.py b/tests/optim/config.py
new file mode 100644
index 00000000..a0391027
--- /dev/null
+++ b/tests/optim/config.py
@@ -0,0 +1,19 @@
+"""
+Test configs for optimizers.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+from tests.global_test_config import DATA
+
+TRAIN_SET = {"X": DATA["train_X"]}
+VAL_SET = {
+    "X": DATA["val_X"],
+    "X_intact": DATA["val_X_intact"],
+    "indicating_mask": DATA["val_X_indicating_mask"],
+}
+TEST_SET = {"X": DATA["test_X"]}
+
+
+EPOCHS = 1
diff --git a/tests/optim/rmsprop.py b/tests/optim/rmsprop.py
new file mode 100644
index 00000000..1fe61a0d
--- /dev/null
+++ b/tests/optim/rmsprop.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer RMSprop.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import RMSprop
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestRMSprop(unittest.TestCase):
+    logger.info("Running tests for RMSprop...")
+
+    # initialize a RMSprop optimizer
+    rmsprop = RMSprop(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=1,
+        d_model=128,
+        d_inner=64,
+        n_heads=2,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        optimizer=rmsprop,
+        epochs=EPOCHS,
+    )
+
+    @pytest.mark.xdist_group(name="optim-rmsprop")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/optim/sgd.py b/tests/optim/sgd.py
new file mode 100644
index 00000000..4b1c1998
--- /dev/null
+++ b/tests/optim/sgd.py
@@ -0,0 +1,56 @@
+"""
+Test cases for the optimizer SGD.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.imputation import SAITS
+from pypots.optim import SGD
+from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
+from tests.global_test_config import DATA
+from tests.optim.config import EPOCHS, TEST_SET, TRAIN_SET, VAL_SET
+
+
+class TestSGD(unittest.TestCase):
+    logger.info("Running tests for SGD...")
+
+    # initialize a SGD optimizer
+    sgd = SGD(lr=0.001, weight_decay=1e-5)
+
+    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
+    saits = SAITS(
+        DATA["n_steps"],
+        DATA["n_features"],
+        n_layers=1,
+        d_model=128,
+        d_inner=64,
+        n_heads=2,
+        d_k=64,
+        d_v=64,
+        dropout=0.1,
+        optimizer=sgd,
+        epochs=EPOCHS,
+    )
+
+    @pytest.mark.xdist_group(name="optim-sgd")
+    def test_0_fit(self):
+        self.saits.fit(TRAIN_SET, VAL_SET)
+        imputed_X = self.saits.impute(TEST_SET)
+        assert not np.isnan(
+            imputed_X
+        ).any(), "Output still has missing values after running impute()."
+        test_MAE = cal_mae(
+            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
+        )
+        logger.info(f"SAITS test_MAE: {test_MAE}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_classification.py b/tests/test_classification.py
deleted file mode 100644
index 2ef9c6d1..00000000
--- a/tests/test_classification.py
+++ /dev/null
@@ -1,256 +0,0 @@
-"""
-Test cases for classification models.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: GLP-v3
-
-import os
-import unittest
-
-import pytest
-
-from pypots.classification import BRITS, GRUD, Raindrop
-from pypots.optim import Adam
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_binary_classification_metrics
-from tests.global_test_config import (
-    DATA,
-    RESULT_SAVING_DIR,
-    check_tb_and_model_checkpoints_existence,
-)
-
-EPOCHS = 5
-
-TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]}
-VAL_SET = {"X": DATA["val_X"], "y": DATA["val_y"]}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
-
-
-class TestBRITS(unittest.TestCase):
-    logger.info("Running tests for a classification model BRITS...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS")
-    model_save_name = "saved_BRITS_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a BRITS model
-    brits = BRITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_classes=DATA["n_classes"],
-        rnn_hidden_size=256,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        model_saving_strategy="better",
-        optimizer=optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_0_fit(self):
-        self.brits.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_1_classify(self):
-        predictions = self.brits.classify(TEST_SET)
-        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
-        logger.info(
-            f'ROC_AUC: {metrics["roc_auc"]}, \n'
-            f'PR_AUC: {metrics["pr_auc"]},\n'
-            f'F1: {metrics["f1"]},\n'
-            f'Precision: {metrics["precision"]},\n'
-            f'Recall: {metrics["recall"]},\n'
-        )
-        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_2_parameters(self):
-        assert hasattr(self.brits, "model") and self.brits.model is not None
-
-        assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
-        assert hasattr(self.brits, "best_loss")
-        self.assertNotEqual(self.brits.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.brits, "best_model_dict")
-            and self.brits.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.brits)
-
-        # save the trained model into file, and check if the path exists
-        self.brits.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.brits.load_model(saved_model_path)
-
-
-class TestGRUD(unittest.TestCase):
-    logger.info("Running tests for a classification model GRUD...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD")
-    model_save_name = "saved_GRUD_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a GRUD model
-    grud = GRUD(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_classes=DATA["n_classes"],
-        rnn_hidden_size=256,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        optimizer=optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_0_fit(self):
-        self.grud.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_1_classify(self):
-        predictions = self.grud.classify(TEST_SET)
-        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
-        logger.info(
-            f'ROC_AUC: {metrics["roc_auc"]}, \n'
-            f'PR_AUC: {metrics["pr_auc"]},\n'
-            f'F1: {metrics["f1"]},\n'
-            f'Precision: {metrics["precision"]},\n'
-            f'Recall: {metrics["recall"]},\n'
-        )
-        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_2_parameters(self):
-        assert hasattr(self.grud, "model") and self.grud.model is not None
-
-        assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None
-
-        assert hasattr(self.grud, "best_loss")
-        self.assertNotEqual(self.grud.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.grud, "best_model_dict")
-            and self.grud.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.grud)
-
-        # save the trained model into file, and check if the path exists
-        self.grud.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.grud.load_model(saved_model_path)
-
-
-class TestRaindrop(unittest.TestCase):
-    logger.info("Running tests for a classification model Raindrop...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop")
-    model_save_name = "saved_Raindrop_model.pypots"
-
-    # initialize a Raindrop model
-    raindrop = Raindrop(
-        DATA["n_steps"],
-        DATA["n_features"],
-        DATA["n_classes"],
-        n_layers=2,
-        d_model=DATA["n_features"] * 4,
-        d_inner=256,
-        n_heads=2,
-        dropout=0.3,
-        d_static=0,
-        aggregation="mean",
-        sensor_wise_mask=False,
-        static=False,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-    )
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_0_fit(self):
-        self.raindrop.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_1_classify(self):
-        predictions = self.raindrop.classify(TEST_SET)
-        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
-        logger.info(
-            f'ROC_AUC: {metrics["roc_auc"]}, \n'
-            f'PR_AUC: {metrics["pr_auc"]},\n'
-            f'F1: {metrics["f1"]},\n'
-            f'Precision: {metrics["precision"]},\n'
-            f'Recall: {metrics["recall"]},\n'
-        )
-        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_2_parameters(self):
-        assert hasattr(self.raindrop, "model") and self.raindrop.model is not None
-
-        assert (
-            hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None
-        )
-
-        assert hasattr(self.raindrop, "best_loss")
-        self.assertNotEqual(self.raindrop.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.raindrop, "best_model_dict")
-            and self.raindrop.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.raindrop)
-
-        # save the trained model into file, and check if the path exists
-        self.raindrop.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.raindrop.load_model(saved_model_path)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_cli.py b/tests/test_cli.py
deleted file mode 100644
index 4e9e9927..00000000
--- a/tests/test_cli.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-Test cases for the functions and classes in package `pypots.cli`.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: GLP-v3
-
-import os
-import threading
-import unittest
-from argparse import Namespace
-from copy import copy
-
-import pytest
-
-from pypots.cli.dev import dev_command_factory
-from pypots.cli.doc import doc_command_factory
-from pypots.cli.env import env_command_factory
-from pypots.utils.logging import logger
-
-PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), "../.."))
-
-
-def callback_func():
-    raise TimeoutError("Time out.")
-
-
-def time_out(interval, callback):
-    def decorator(func):
-        def wrapper(*args, **kwargs):
-            t = threading.Thread(target=func, args=args, kwargs=kwargs)
-            t.setDaemon(True)
-            t.start()
-            t.join(interval)  # wait for interval seconds
-            if t.is_alive():
-                return threading.Timer(0, callback).start()  # invoke callback()
-            else:
-                return
-
-        return wrapper
-
-    return decorator
-
-
-@pytest.mark.xfail(reason="Allow tests for CLI to fail")
-class TestPyPOTSCLIDev(unittest.TestCase):
-    # set up the default arguments
-    default_arguments = {
-        "build": False,
-        "cleanup": False,
-        "run_tests": False,
-        "k": None,
-        "show_coverage": False,
-        "lint_code": False,
-    }
-    # `pypots-cli dev` must run under the project root dir
-    os.chdir(PROJECT_ROOT_DIR)
-
-    @pytest.mark.xdist_group(name="cli-dev")
-    def test_0_build(self):
-        arguments = copy(self.default_arguments)
-        arguments["build"] = True
-        args = Namespace(**arguments)
-        dev_command_factory(args).run()
-
-    @pytest.mark.xdist_group(name="cli-dev")
-    def test_1_run_tests(self):
-        arguments = copy(self.default_arguments)
-        arguments["run_tests"] = True
-        arguments["k"] = "try_to_find_a_non_existing_test_case"
-        args = Namespace(**arguments)
-        try:
-            dev_command_factory(args).run()
-        except RuntimeError:  # try to find a non-existing test case, so RuntimeError will be raised
-            pass
-        except Exception as e:  # other exceptions will cause an error and result in failed testing
-            raise e
-
-    # Don't test --lint-code because Black will reformat the code and cause error when generating the coverage report
-    # @pytest.mark.xdist_group(name="cli-dev")
-    # def test_2_lint_code(self):
-    #     arguments = copy(self.default_arguments)
-    #     arguments["lint_code"] = True
-    #     args = Namespace(**arguments)
-    #     dev_command_factory(args).run()
-
-    @pytest.mark.xdist_group(name="cli-dev")
-    def test_3_cleanup(self):
-        arguments = copy(self.default_arguments)
-        arguments["cleanup"] = True
-        args = Namespace(**arguments)
-        dev_command_factory(args).run()
-
-
-@pytest.mark.xfail(reason="Allow tests for CLI to fail")
-class TestPyPOTSCLIDoc(unittest.TestCase):
-    # set up the default arguments
-    default_arguments = {
-        "gene_rst": False,
-        "branch": "main",
-        "gene_html": False,
-        "view_doc": False,
-        "port": 9075,
-        "cleanup": False,
-    }
-    # `pypots-cli doc` must run under the project root dir
-    os.chdir(PROJECT_ROOT_DIR)
-
-    @pytest.mark.xdist_group(name="cli-doc")
-    def test_0_gene_rst(self):
-        arguments = copy(self.default_arguments)
-        arguments["gene_rst"] = True
-        args = Namespace(**arguments)
-        doc_command_factory(args).run()
-
-        logger.info("run again under a non-root dir")
-        try:
-            os.chdir(os.path.abspath(os.path.join(PROJECT_ROOT_DIR, "pypots")))
-            doc_command_factory(args).run()
-        except RuntimeError:  # try to run under a non-root dir, so RuntimeError will be raised
-            pass
-        except Exception as e:  # other exceptions will cause an error and result in failed testing
-            raise e
-        finally:
-            os.chdir(PROJECT_ROOT_DIR)
-
-    @pytest.mark.xdist_group(name="cli-doc")
-    def test_1_gene_html(self):
-        arguments = copy(self.default_arguments)
-        arguments["gene_html"] = True
-        args = Namespace(**arguments)
-        try:
-            doc_command_factory(args).run()
-        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
-            logger.error(e)
-
-    @pytest.mark.xdist_group(name="cli-doc")
-    @time_out(2, callback_func)  # wait for two seconds
-    def test_2_view_doc(self):
-        arguments = copy(self.default_arguments)
-        arguments["view_doc"] = True
-        args = Namespace(**arguments)
-        try:
-            doc_command_factory(args).run()
-        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
-            logger.error(e)
-
-    @pytest.mark.xdist_group(name="cli-doc")
-    def test_3_cleanup(self):
-        arguments = copy(self.default_arguments)
-        arguments["cleanup"] = True
-        args = Namespace(**arguments)
-        doc_command_factory(args).run()
-
-
-@pytest.mark.xfail(reason="Allow tests for CLI to fail")
-class TestPyPOTSCLIEnv(unittest.TestCase):
-    # set up the default arguments
-    default_arguments = {
-        "install": "optional",
-        "tool": "conda",
-    }
-
-    # `pypots-cli env` must run under the project root dir
-    os.chdir(PROJECT_ROOT_DIR)
-
-    @pytest.mark.xdist_group(name="cli-env")
-    def test_0_install_with_conda(self):
-        arguments = copy(self.default_arguments)
-        arguments["tool"] = "conda"
-        args = Namespace(**arguments)
-        try:
-            env_command_factory(args).run()
-        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
-            logger.error(e)
-
-    @pytest.mark.xdist_group(name="cli-env")
-    def test_1_install_with_pip(self):
-        arguments = copy(self.default_arguments)
-        arguments["tool"] = "pip"
-        args = Namespace(**arguments)
-        try:
-            env_command_factory(args).run()
-        except Exception as e:  # somehow we have some error when testing on Windows, so just print and pass below
-            logger.error(e)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_imputation.py b/tests/test_imputation.py
deleted file mode 100644
index 6094ce62..00000000
--- a/tests/test_imputation.py
+++ /dev/null
@@ -1,356 +0,0 @@
-"""
-Test cases for imputation models.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: GPL-v3
-
-
-import os.path
-import unittest
-
-import numpy as np
-import pytest
-
-from pypots.imputation import (
-    SAITS,
-    Transformer,
-    BRITS,
-    MRNN,
-    LOCF,
-)
-from pypots.optim import Adam
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_mae
-from tests.global_test_config import (
-    DATA,
-    RESULT_SAVING_DIR,
-    check_tb_and_model_checkpoints_existence,
-)
-
-EPOCH = 5
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {
-    "X": DATA["val_X"],
-    "X_intact": DATA["val_X_intact"],
-    "indicating_mask": DATA["val_X_indicating_mask"],
-}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
-
-
-class TestSAITS(unittest.TestCase):
-    logger.info("Running tests for an imputation model SAITS...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS")
-    model_save_name = "saved_saits_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=2,
-        d_model=256,
-        d_inner=128,
-        n_heads=4,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        epochs=EPOCH,
-        saving_path=saving_path,
-        optimizer=optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_1_impute(self):
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_2_parameters(self):
-        assert hasattr(self.saits, "model") and self.saits.model is not None
-
-        assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None
-
-        assert hasattr(self.saits, "best_loss")
-        self.assertNotEqual(self.saits.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.saits, "best_model_dict")
-            and self.saits.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.saits)
-
-        # save the trained model into file, and check if the path exists
-        self.saits.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.saits.load_model(saved_model_path)
-
-
-class TestTransformer(unittest.TestCase):
-    logger.info("Running tests for an imputation model Transformer...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer")
-    model_save_name = "saved_transformer_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a Transformer model
-    transformer = Transformer(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=2,
-        d_model=256,
-        d_inner=128,
-        n_heads=4,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        epochs=EPOCH,
-        saving_path=saving_path,
-        optimizer=optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_0_fit(self):
-        self.transformer.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_1_impute(self):
-        imputed_X = self.transformer.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"Transformer test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_2_parameters(self):
-        assert hasattr(self.transformer, "model") and self.transformer.model is not None
-
-        assert (
-            hasattr(self.transformer, "optimizer")
-            and self.transformer.optimizer is not None
-        )
-
-        assert hasattr(self.transformer, "best_loss")
-        self.assertNotEqual(self.transformer.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.transformer, "best_model_dict")
-            and self.transformer.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.transformer)
-
-        # save the trained model into file, and check if the path exists
-        self.transformer.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.transformer.load_model(saved_model_path)
-
-
-class TestBRITS(unittest.TestCase):
-    logger.info("Running tests for an imputation model BRITS...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS")
-    model_save_name = "saved_BRITS_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a BRITS model
-    brits = BRITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        256,
-        epochs=EPOCH,
-        saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/BRITS",
-        optimizer=optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_0_fit(self):
-        self.brits.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_1_impute(self):
-        imputed_X = self.brits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"BRITS test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_2_parameters(self):
-        assert hasattr(self.brits, "model") and self.brits.model is not None
-
-        assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
-        assert hasattr(self.brits, "best_loss")
-        self.assertNotEqual(self.brits.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.brits, "best_model_dict")
-            and self.brits.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.brits)
-
-        # save the trained model into file, and check if the path exists
-        self.brits.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.brits.load_model(saved_model_path)
-
-
-class TestMRNN(unittest.TestCase):
-    logger.info("Running tests for an imputation model MRNN...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN")
-    model_save_name = "saved_MRNN_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a MRNN model
-    mrnn = MRNN(
-        DATA["n_steps"],
-        DATA["n_features"],
-        256,
-        epochs=EPOCH,
-        saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/MRNN",
-        optimizer=optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_0_fit(self):
-        self.mrnn.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_1_impute(self):
-        imputed_X = self.mrnn.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"MRNN test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_2_parameters(self):
-        assert hasattr(self.mrnn, "model") and self.mrnn.model is not None
-
-        assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None
-
-        assert hasattr(self.mrnn, "best_loss")
-        self.assertNotEqual(self.mrnn.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.mrnn, "best_model_dict")
-            and self.mrnn.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.mrnn)
-
-        # save the trained model into file, and check if the path exists
-        self.mrnn.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.mrnn.load_model(saved_model_path)
-
-
-class TestLOCF(unittest.TestCase):
-    logger.info("Running tests for an imputation model LOCF...")
-    locf = LOCF(nan=0)
-
-    @pytest.mark.xdist_group(name="imputation-locf")
-    def test_0_impute(self):
-        test_X_imputed = self.locf.impute(TEST_SET)
-        assert not np.isnan(
-            test_X_imputed
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"LOCF test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-locf")
-    def test_1_parameters(self):
-        assert hasattr(self.locf, "nan") and self.locf.nan is not None
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_optim.py b/tests/test_optim.py
deleted file mode 100644
index 9be096fb..00000000
--- a/tests/test_optim.py
+++ /dev/null
@@ -1,244 +0,0 @@
-"""
-Test cases for optimizers.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: GLP-v3
-
-import unittest
-
-import h5py
-import numpy as np
-import pytest
-
-from pypots.imputation import SAITS
-from pypots.optim import Adam, AdamW, Adagrad, Adadelta, SGD, RMSprop
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_mae
-from tests.global_test_config import DATA
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {
-    "X": DATA["val_X"],
-    "X_intact": DATA["val_X_intact"],
-    "indicating_mask": DATA["val_X_indicating_mask"],
-}
-TEST_SET = {"X": DATA["test_X"]}
-
-
-EPOCHS = 3
-
-
-def save_data_set_into_h5(data, path):
-    with h5py.File(path, "w") as hf:
-        for i in data.keys():
-            tp = int if i == "y" else "float32"
-            hf.create_dataset(i, data=data[i].astype(tp))
-
-
-class TestAdam(unittest.TestCase):
-    logger.info("Running tests for Adam...")
-
-    # initialize an Adam optimizer
-    adam = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=1,
-        d_model=128,
-        d_inner=64,
-        n_heads=2,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        optimizer=adam,
-        epochs=EPOCHS,
-    )
-
-    @pytest.mark.xdist_group(name="optim-adam")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestAdamW(unittest.TestCase):
-    logger.info("Running tests for AdamW...")
-
-    # initialize an AdamW optimizer
-    adamw = AdamW(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=1,
-        d_model=128,
-        d_inner=64,
-        n_heads=2,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        optimizer=adamw,
-        epochs=EPOCHS,
-    )
-
-    @pytest.mark.xdist_group(name="optim-adamw")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestAdagrad(unittest.TestCase):
-    logger.info("Running tests for Adagrad...")
-
-    # initialize an Adagrad optimizer
-    adagrad = Adagrad(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=1,
-        d_model=128,
-        d_inner=64,
-        n_heads=2,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        optimizer=adagrad,
-        epochs=EPOCHS,
-    )
-
-    @pytest.mark.xdist_group(name="optim-adagrad")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestAdadelta(unittest.TestCase):
-    logger.info("Running tests for Adadelta...")
-
-    # initialize an Adadelta optimizer
-    adadelta = Adadelta(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=1,
-        d_model=128,
-        d_inner=64,
-        n_heads=2,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        optimizer=adadelta,
-        epochs=EPOCHS,
-    )
-
-    @pytest.mark.xdist_group(name="optim-adadelta")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestSGD(unittest.TestCase):
-    logger.info("Running tests for SGD...")
-
-    # initialize a SGD optimizer
-    sgd = SGD(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=1,
-        d_model=128,
-        d_inner=64,
-        n_heads=2,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        optimizer=sgd,
-        epochs=EPOCHS,
-    )
-
-    @pytest.mark.xdist_group(name="optim-sgd")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-class TestRMSprop(unittest.TestCase):
-    logger.info("Running tests for RMSprop...")
-
-    # initialize a RMSprop optimizer
-    rmsprop = RMSprop(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model for testing DatasetForMIT and BaseDataset
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=1,
-        d_model=128,
-        d_inner=64,
-        n_heads=2,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        optimizer=rmsprop,
-        epochs=EPOCHS,
-    )
-
-    @pytest.mark.xdist_group(name="optim-rmsprop")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_training_on_multi_gpus.py b/tests/test_training_on_multi_gpus.py
deleted file mode 100644
index b076cbfe..00000000
--- a/tests/test_training_on_multi_gpus.py
+++ /dev/null
@@ -1,783 +0,0 @@
-"""
-Test cases for running models on multi cuda devices.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: GPL-v3
-
-
-import os.path
-import unittest
-
-import numpy as np
-import pytest
-import torch
-
-from pypots.classification import BRITS, GRUD, Raindrop
-from pypots.clustering import VaDER, CRLI
-from pypots.forecasting import BTTF
-from pypots.imputation import BRITS as ImputationBRITS
-from pypots.imputation import (
-    SAITS,
-    Transformer,
-    MRNN,
-    LOCF,
-)
-from pypots.optim import Adam
-from pypots.utils.logging import logger
-from pypots.utils.metrics import cal_binary_classification_metrics
-from pypots.utils.metrics import cal_mae
-from pypots.utils.metrics import cal_rand_index, cal_cluster_purity
-from tests.global_test_config import (
-    DATA,
-    RESULT_SAVING_DIR,
-    check_tb_and_model_checkpoints_existence,
-)
-
-EPOCHS = 5
-
-cuda_devices = [torch.device(i) for i in range(torch.cuda.device_count())]
-
-# set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes
-DEVICES = None if cuda_devices == [] else cuda_devices
-
-# global skip test if less than two cuda-enabled devices
-LESS_THAN_TWO_DEVICES = len(cuda_devices) < 2
-pytestmark = pytest.mark.skipif(
-    LESS_THAN_TWO_DEVICES, reason="not enough cuda devices to run tests"
-)
-
-
-TRAIN_SET = {"X": DATA["train_X"], "y": DATA["train_y"]}
-
-VAL_SET = {
-    "X": DATA["val_X"],
-    "X_intact": DATA["val_X_intact"],
-    "indicating_mask": DATA["val_X_indicating_mask"],
-    "y": DATA["val_y"],
-}
-TEST_SET = {"X": DATA["test_X"]}
-
-RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
-RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
-RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
-
-
-class TestSAITS(unittest.TestCase):
-    logger.info("Running tests for an imputation model SAITS...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "SAITS")
-    model_save_name = "saved_saits_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a SAITS model
-    saits = SAITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=2,
-        d_model=256,
-        d_inner=128,
-        n_heads=4,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_0_fit(self):
-        self.saits.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_1_impute(self):
-        imputed_X = self.saits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"SAITS test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_2_parameters(self):
-        assert hasattr(self.saits, "model") and self.saits.model is not None
-
-        assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None
-
-        assert hasattr(self.saits, "best_loss")
-        self.assertNotEqual(self.saits.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.saits, "best_model_dict")
-            and self.saits.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-saits")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.saits)
-
-        # save the trained model into file, and check if the path exists
-        self.saits.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.saits.load_model(saved_model_path)
-
-
-class TestTransformer(unittest.TestCase):
-    logger.info("Running tests for an imputation model Transformer...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "Transformer")
-    model_save_name = "saved_transformer_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a Transformer model
-    transformer = Transformer(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_layers=2,
-        d_model=256,
-        d_inner=128,
-        n_heads=4,
-        d_k=64,
-        d_v=64,
-        dropout=0.1,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_0_fit(self):
-        self.transformer.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_1_impute(self):
-        imputed_X = self.transformer.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"Transformer test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_2_parameters(self):
-        assert hasattr(self.transformer, "model") and self.transformer.model is not None
-
-        assert (
-            hasattr(self.transformer, "optimizer")
-            and self.transformer.optimizer is not None
-        )
-
-        assert hasattr(self.transformer, "best_loss")
-        self.assertNotEqual(self.transformer.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.transformer, "best_model_dict")
-            and self.transformer.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-transformer")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.transformer)
-
-        # save the trained model into file, and check if the path exists
-        self.transformer.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.transformer.load_model(saved_model_path)
-
-
-class TestImputationBRITS(unittest.TestCase):
-    logger.info("Running tests for an imputation model BRITS...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "BRITS")
-    model_save_name = "saved_BRITS_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a BRITS model
-    brits = ImputationBRITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        256,
-        epochs=EPOCHS,
-        saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/BRITS",
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_0_fit(self):
-        self.brits.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_1_impute(self):
-        imputed_X = self.brits.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"BRITS test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_2_parameters(self):
-        assert hasattr(self.brits, "model") and self.brits.model is not None
-
-        assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
-        assert hasattr(self.brits, "best_loss")
-        self.assertNotEqual(self.brits.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.brits, "best_model_dict")
-            and self.brits.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-brits")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.brits)
-
-        # save the trained model into file, and check if the path exists
-        self.brits.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.brits.load_model(saved_model_path)
-
-
-class TestMRNN(unittest.TestCase):
-    logger.info("Running tests for an imputation model MRNN...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_IMPUTATION, "MRNN")
-    model_save_name = "saved_MRNN_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a MRNN model
-    mrnn = MRNN(
-        DATA["n_steps"],
-        DATA["n_features"],
-        256,
-        epochs=EPOCHS,
-        saving_path=f"{RESULT_SAVING_DIR_FOR_IMPUTATION}/MRNN",
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_0_fit(self):
-        self.mrnn.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_1_impute(self):
-        imputed_X = self.mrnn.impute(TEST_SET)
-        assert not np.isnan(
-            imputed_X
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            imputed_X, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"MRNN test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_2_parameters(self):
-        assert hasattr(self.mrnn, "model") and self.mrnn.model is not None
-
-        assert hasattr(self.mrnn, "optimizer") and self.mrnn.optimizer is not None
-
-        assert hasattr(self.mrnn, "best_loss")
-        self.assertNotEqual(self.mrnn.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.mrnn, "best_model_dict")
-            and self.mrnn.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="imputation-mrnn")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.mrnn)
-
-        # save the trained model into file, and check if the path exists
-        self.mrnn.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.mrnn.load_model(saved_model_path)
-
-
-class TestLOCF(unittest.TestCase):
-    logger.info("Running tests for an imputation model LOCF...")
-    locf = LOCF(nan=0)
-
-    @pytest.mark.xdist_group(name="imputation-locf")
-    def test_0_impute(self):
-        test_X_imputed = self.locf.impute(TEST_SET)
-        assert not np.isnan(
-            test_X_imputed
-        ).any(), "Output still has missing values after running impute()."
-        test_MAE = cal_mae(
-            test_X_imputed, DATA["test_X_intact"], DATA["test_X_indicating_mask"]
-        )
-        logger.info(f"LOCF test_MAE: {test_MAE}")
-
-    @pytest.mark.xdist_group(name="imputation-locf")
-    def test_1_parameters(self):
-        assert hasattr(self.locf, "nan") and self.locf.nan is not None
-
-
-class TestClassificationBRITS(unittest.TestCase):
-    logger.info("Running tests for a classification model BRITS...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "BRITS")
-    model_save_name = "saved_BRITS_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a BRITS model
-    brits = BRITS(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_classes=DATA["n_classes"],
-        rnn_hidden_size=256,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        model_saving_strategy="better",
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_0_fit(self):
-        self.brits.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_1_classify(self):
-        predictions = self.brits.classify(TEST_SET)
-        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
-        logger.info(
-            f'ROC_AUC: {metrics["roc_auc"]}, \n'
-            f'PR_AUC: {metrics["pr_auc"]},\n'
-            f'F1: {metrics["f1"]},\n'
-            f'Precision: {metrics["precision"]},\n'
-            f'Recall: {metrics["recall"]},\n'
-        )
-        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_2_parameters(self):
-        assert hasattr(self.brits, "model") and self.brits.model is not None
-
-        assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None
-
-        assert hasattr(self.brits, "best_loss")
-        self.assertNotEqual(self.brits.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.brits, "best_model_dict")
-            and self.brits.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="classification-brits")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.brits)
-
-        # save the trained model into file, and check if the path exists
-        self.brits.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.brits.load_model(saved_model_path)
-
-
-class TestGRUD(unittest.TestCase):
-    logger.info("Running tests for a classification model GRUD...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "GRUD")
-    model_save_name = "saved_GRUD_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a GRUD model
-    grud = GRUD(
-        DATA["n_steps"],
-        DATA["n_features"],
-        n_classes=DATA["n_classes"],
-        rnn_hidden_size=256,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_0_fit(self):
-        self.grud.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_1_classify(self):
-        predictions = self.grud.classify(TEST_SET)
-        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
-        logger.info(
-            f'ROC_AUC: {metrics["roc_auc"]}, \n'
-            f'PR_AUC: {metrics["pr_auc"]},\n'
-            f'F1: {metrics["f1"]},\n'
-            f'Precision: {metrics["precision"]},\n'
-            f'Recall: {metrics["recall"]},\n'
-        )
-        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_2_parameters(self):
-        assert hasattr(self.grud, "model") and self.grud.model is not None
-
-        assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None
-
-        assert hasattr(self.grud, "best_loss")
-        self.assertNotEqual(self.grud.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.grud, "best_model_dict")
-            and self.grud.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="classification-grud")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.grud)
-
-        # save the trained model into file, and check if the path exists
-        self.grud.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.grud.load_model(saved_model_path)
-
-
-class TestRaindrop(unittest.TestCase):
-    logger.info("Running tests for a classification model Raindrop...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLASSIFICATION, "Raindrop")
-    model_save_name = "saved_Raindrop_model.pypots"
-
-    # initialize a Raindrop model
-    raindrop = Raindrop(
-        DATA["n_steps"],
-        DATA["n_features"],
-        DATA["n_classes"],
-        n_layers=2,
-        d_model=DATA["n_features"] * 4,
-        d_inner=256,
-        n_heads=2,
-        dropout=0.3,
-        d_static=0,
-        aggregation="mean",
-        sensor_wise_mask=False,
-        static=False,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-    )
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_0_fit(self):
-        self.raindrop.fit(TRAIN_SET, VAL_SET)
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_1_classify(self):
-        predictions = self.raindrop.classify(TEST_SET)
-        metrics = cal_binary_classification_metrics(predictions, DATA["test_y"])
-        logger.info(
-            f'ROC_AUC: {metrics["roc_auc"]}, \n'
-            f'PR_AUC: {metrics["pr_auc"]},\n'
-            f'F1: {metrics["f1"]},\n'
-            f'Precision: {metrics["precision"]},\n'
-            f'Recall: {metrics["recall"]},\n'
-        )
-        assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5"
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_2_parameters(self):
-        assert hasattr(self.raindrop, "model") and self.raindrop.model is not None
-
-        assert (
-            hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None
-        )
-
-        assert hasattr(self.raindrop, "best_loss")
-        self.assertNotEqual(self.raindrop.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.raindrop, "best_model_dict")
-            and self.raindrop.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="classification-raindrop")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.raindrop)
-
-        # save the trained model into file, and check if the path exists
-        self.raindrop.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.raindrop.load_model(saved_model_path)
-
-
-class TestCRLI(unittest.TestCase):
-    logger.info("Running tests for a clustering model CRLI...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "CRLI")
-    model_save_name = "saved_CRLI_model.pypots"
-
-    # initialize an Adam optimizer
-    G_optimizer = Adam(lr=0.001, weight_decay=1e-5)
-    D_optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a CRLI model
-    crli = CRLI(
-        n_steps=DATA["n_steps"],
-        n_features=DATA["n_features"],
-        n_clusters=DATA["n_classes"],
-        n_generator_layers=2,
-        rnn_hidden_size=128,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        G_optimizer=G_optimizer,
-        D_optimizer=D_optimizer,
-    )
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_0_fit(self):
-        self.crli.fit(TRAIN_SET)
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_1_parameters(self):
-        assert hasattr(self.crli, "model") and self.crli.model is not None
-
-        assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None
-        assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None
-
-        assert hasattr(self.crli, "best_loss")
-        self.assertNotEqual(self.crli.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.crli, "best_model_dict")
-            and self.crli.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_2_cluster(self):
-        clustering = self.crli.cluster(TEST_SET)
-        RI = cal_rand_index(clustering, DATA["test_y"])
-        CP = cal_cluster_purity(clustering, DATA["test_y"])
-        logger.info(f"RI: {RI}\nCP: {CP}")
-
-    @pytest.mark.xdist_group(name="clustering-crli")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.crli)
-
-        # save the trained model into file, and check if the path exists
-        self.crli.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.crli.load_model(saved_model_path)
-
-
-class TestVaDER(unittest.TestCase):
-    logger.info("Running tests for a clustering model Transformer...")
-
-    # set the log and model saving path
-    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_CLUSTERING, "VaDER")
-    model_save_name = "saved_VaDER_model.pypots"
-
-    # initialize an Adam optimizer
-    optimizer = Adam(lr=0.001, weight_decay=1e-5)
-
-    # initialize a VaDER model
-    vader = VaDER(
-        n_steps=DATA["n_steps"],
-        n_features=DATA["n_features"],
-        n_clusters=DATA["n_classes"],
-        rnn_hidden_size=64,
-        d_mu_stddev=5,
-        pretrain_epochs=20,
-        epochs=EPOCHS,
-        saving_path=saving_path,
-        optimizer=optimizer,
-        num_workers=2,
-        device=DEVICES,
-    )
-
-    @pytest.mark.xdist_group(name="clustering-vader")
-    def test_0_fit(self):
-        self.vader.fit(TRAIN_SET)
-
-    @pytest.mark.xdist_group(name="clustering-vader")
-    def test_1_cluster(self):
-        try:
-            clustering = self.vader.cluster(TEST_SET)
-            RI = cal_rand_index(clustering, DATA["test_y"])
-            CP = cal_cluster_purity(clustering, DATA["test_y"])
-            logger.info(f"RI: {RI}\nCP: {CP}")
-        except np.linalg.LinAlgError as e:
-            logger.error(
-                f"{e}\n"
-                "Got singular matrix, please try to retrain the model to fix this"
-            )
-
-    @pytest.mark.xdist_group(name="clustering-vader")
-    def test_2_parameters(self):
-        assert hasattr(self.vader, "model") and self.vader.model is not None
-
-        assert hasattr(self.vader, "optimizer") and self.vader.optimizer is not None
-
-        assert hasattr(self.vader, "best_loss")
-        self.assertNotEqual(self.vader.best_loss, float("inf"))
-
-        assert (
-            hasattr(self.vader, "best_model_dict")
-            and self.vader.best_model_dict is not None
-        )
-
-    @pytest.mark.xdist_group(name="clustering-vader")
-    def test_3_saving_path(self):
-        # whether the root saving dir exists, which should be created by save_log_into_tb_file
-        assert os.path.exists(
-            self.saving_path
-        ), f"file {self.saving_path} does not exist"
-
-        # check if the tensorboard file and model checkpoints exist
-        check_tb_and_model_checkpoints_existence(self.vader)
-
-        # save the trained model into file, and check if the path exists
-        self.vader.save_model(
-            saving_dir=self.saving_path, file_name=self.model_save_name
-        )
-
-        # test loading the saved model, not necessary, but need to test
-        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
-        self.vader.load_model(saved_model_path)
-
-
-class TestBTTF(unittest.TestCase):
-    logger.info("Running tests for a forecasting model BTTF...")
-
-    # initialize a BTTF model
-    pred_step = 4
-    bttf = BTTF(
-        n_steps=DATA["n_steps"] - pred_step,
-        n_features=10,
-        pred_step=pred_step,
-        rank=10,
-        time_lags=[1, 2, 3, 5, 5 + 1, 5 + 2, 10, 10 + 1, 10 + 2],
-        burn_iter=5,
-        gibbs_iter=5,
-        multi_step=1,
-    )
-
-    @pytest.mark.xdist_group(name="forecasting-bttf")
-    def test_0_forecasting(self):
-        predictions = self.bttf.forecast({"X": DATA["test_X"][:, : -self.pred_step]})
-        logger.info(f"prediction shape: {predictions.shape}")
-        mae = cal_mae(predictions, DATA["test_X_intact"][:, -self.pred_step :])
-        logger.info(f"prediction MAE: {mae}")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
new file mode 100644
index 00000000..f0b4685e
--- /dev/null
+++ b/tests/utils/__init__.py
@@ -0,0 +1,6 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
diff --git a/tests/test_utils.py b/tests/utils/logging.py
similarity index 64%
rename from tests/test_utils.py
rename to tests/utils/logging.py
index 0fd48ec8..113f0dde 100644
--- a/tests/test_utils.py
+++ b/tests/utils/logging.py
@@ -1,5 +1,5 @@
 """
-Test cases for the functions and classes in package `pypots.utils`.
+Test cases for the functions and classes in package `pypots.utils.logging`.
 """
 
 # Created by Wenjie Du <wenjay.du@gmail.com>
@@ -9,10 +9,7 @@
 import shutil
 import unittest
 
-import torch
-
 from pypots.utils.logging import Logger
-from pypots.utils.random import set_random_seed
 
 
 class TestLogging(unittest.TestCase):
@@ -49,25 +46,5 @@ def test_saving_log_into_file(self):
         shutil.rmtree("test_log", ignore_errors=True)
 
 
-class TestRandom(unittest.TestCase):
-    def test_set_random_seed(self):
-        random_state1 = torch.get_rng_state()
-        torch.rand(
-            1, 3
-        )  # randomly generate something, the random state will be reset, so two states should be varying
-        random_state2 = torch.get_rng_state()
-        assert not torch.equal(
-            random_state1, random_state2
-        ), "The random seed hasn't set, so two random states should be different."
-
-        set_random_seed(26)
-        random_state1 = torch.get_rng_state()
-        set_random_seed(26)
-        random_state2 = torch.get_rng_state()
-        assert torch.equal(
-            random_state1, random_state2
-        ), "The random seed has been set, two random states are not the same."
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/utils/random.py b/tests/utils/random.py
new file mode 100644
index 00000000..0d1a0ca0
--- /dev/null
+++ b/tests/utils/random.py
@@ -0,0 +1,36 @@
+"""
+Test cases for the functions and classes in package `pypots.utils.random`.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GPL-v3
+
+import unittest
+
+import torch
+
+from pypots.utils.random import set_random_seed
+
+
+class TestRandom(unittest.TestCase):
+    def test_set_random_seed(self):
+        random_state1 = torch.get_rng_state()
+        torch.rand(
+            1, 3
+        )  # randomly generate something, the random state will be reset, so two states should be varying
+        random_state2 = torch.get_rng_state()
+        assert not torch.equal(
+            random_state1, random_state2
+        ), "The random seed hasn't set, so two random states should be different."
+
+        set_random_seed(26)
+        random_state1 = torch.get_rng_state()
+        set_random_seed(26)
+        random_state2 = torch.get_rng_state()
+        assert torch.equal(
+            random_state1, random_state2
+        ), "The random seed has been set, two random states are not the same."
+
+
+if __name__ == "__main__":
+    unittest.main()