Refactor testing cases (#189)

* refactor: clear up testing cases; * refactor: refactor code in Dataset classes for models; * refactor: adjust testing workflows according to refactored test cases; * fix: turn missing_mask into torch.float; * fix: error in BTTF testing case; * feat: using pip to manage dependencies in CI testing workflow, and using conda in Daily testing workflow;
WenjieDu · Sep 21, 2023 · ca6e2cd · ca6e2cd
1 parent 9bfffa1
commit ca6e2cd
Show file tree

Hide file tree

Showing 59 changed files with 2,111 additions and 2,227 deletions.
diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
@@ -15,43 +15,60 @@ jobs:
         runs-on: ${{ matrix.os }}
         defaults:
             run:
-                shell: bash -l {0}
+                shell: bash {0}
         strategy:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, windows-latest, macOS-latest]
-                python-version: ["3.7", "3.8", "3.9", "3.10"]
+                python-version: ["3.7", "3.10"]
+                torch-version: ["1.13.1"]
 
         steps:
             - name: Check out the repo code
               uses: actions/checkout@v3
 
-            - name: Set up Conda
-              uses: conda-incubator/setup-miniconda@v2
+            - name: Determine the Python version
+              uses: haya14busa/action-cond@v1
+              id: condval
               with:
-                  activate-environment: pypots-test
-                  python-version: ${{ matrix.python-version }}
-                  environment-file: tests/environment_for_conda_test.yml
-                  auto-activate-base: false
+                cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
+                # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
+                if_true: "3.7.16"
+                if_false: ${{ matrix.python-version }}
+
+            - name: Set up Python
+              uses: actions/setup-python@v4
+              with:
+                python-version: ${{ steps.condval.outputs.value }}
+                check-latest: true
+                cache: pip
+                cache-dependency-path: |
+                  setup.cfg
+
+            - name: Install PyTorch ${{ matrix.torch-version }}+cpu
+              # we have to install torch in advance because torch_sparse needs it for compilation,
+              # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
+              run: |
+                  which python
+                  which pip
+                  python -m pip install --upgrade pip
+                  pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
+                  python -c "import torch; print('PyTorch:', torch.__version__)"
+
+            - name: Install other dependencies
+              run: |
+                  pip install pypots
+                  pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
+                  pip install -e ".[dev]"
 
             - name: Fetch the test environment details
               run: |
                   which python
-                  conda info
-                  conda list
+                  pip list
 
             - name: Test with pytest
               run: |
-                  # run tests separately here due to Segmentation Fault in test_clustering when run all in
-                  # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
-                  python -m pytest -rA tests/test_classification.py -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_imputation.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_clustering.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_forecasting.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_optim.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_data.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_utils.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
-                  python -m pytest -rA tests/test_cli.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  coverage run --source=pypots -m pytest -rA tests/*/*
 
             - name: Generate the LCOV report
               run: |
@@ -61,4 +78,4 @@ jobs:
               uses: coverallsapp/github-action@master
               with:
                   github-token: ${{ secrets.GITHUB_TOKEN }}
-                  path-to-lcov: 'coverage.lcov'
+                  path-to-lcov: "coverage.lcov"
diff --git a/.github/workflows/testing_daily.yml b/.github/workflows/testing_daily.yml
@@ -10,61 +10,43 @@ jobs:
         runs-on: ${{ matrix.os }}
         defaults:
             run:
-                shell: bash {0}
+                shell: bash -l {0}
         strategy:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, windows-latest, macOS-latest]
-                python-version: ["3.7", "3.8", "3.9", "3.10"]
-                torch-version: ["1.13.1"]
+                python-version: ["3.7", "3.10"]
 
         steps:
             - name: Check out the repo code
               uses: actions/checkout@v3
 
-            - name: Determine the Python version
-              uses: haya14busa/action-cond@v1
-              id: condval
+            - name: Set up Conda
+              uses: conda-incubator/setup-miniconda@v2
               with:
-                cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
-                # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
-                if_true: "3.7.16"
-                if_false: ${{ matrix.python-version }}
-
-            - name: Set up Python
-              uses: actions/setup-python@v4
-              with:
-                python-version: ${{ steps.condval.outputs.value }}
-                check-latest: true
-                cache: pip
-                cache-dependency-path: |
-                  setup.cfg
-
-            - name: Install PyTorch ${{ matrix.torch-version }}+cpu
-              # we have to install torch in advance because torch_sparse needs it for compilation,
-              # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
-              run: |
-                  which python
-                  which pip
-                  python -m pip install --upgrade pip
-                  pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
-                  python -c "import torch; print('PyTorch:', torch.__version__)"
-
-            - name: Install other dependencies
-              run: |
-                  pip install pypots
-                  pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
-                  pip install -e ".[dev]"
+                  activate-environment: pypots-test
+                  python-version: ${{ matrix.python-version }}
+                  environment-file: tests/environment_for_conda_test.yml
+                  auto-activate-base: false
 
             - name: Fetch the test environment details
               run: |
                   which python
-                  pip list
+                  conda info
+                  conda list
 
             - name: Test with pytest
               run: |
-                  coverage run --source=pypots -m pytest --ignore tests/test_training_on_multi_gpus.py
-                  # ignore the test_training_on_multi_gpus.py because it requires multiple GPUs which are not available on GitHub Actions
+                  # run tests separately here due to Segmentation Fault in test_clustering when run all in
+                  # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
+                  python -m pytest -rA tests/classification/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/imputation/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/clustering/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/forecasting/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/optim/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/data/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/utils/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+                  python -m pytest -rA tests/cli/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
 
             - name: Generate the LCOV report
               run: |
@@ -74,4 +56,4 @@ jobs:
               uses: coverallsapp/github-action@master
               with:
                   github-token: ${{ secrets.GITHUB_TOKEN }}
-                  path-to-lcov: "coverage.lcov"
+                  path-to-lcov: 'coverage.lcov'
diff --git a/docs/pypots.forecasting.rst b/docs/pypots.forecasting.rst
@@ -1,11 +1,31 @@
 pypots.forecasting package
 ==========================
 
+Subpackages
+-----------
 
-pypots.forecasting.bttf module
+.. toctree::
+   :maxdepth: 4
+
+   pypots.forecasting.bttf
+   pypots.forecasting.template
+
+Submodules
+----------
+
+pypots.forecasting.base module
 ------------------------------
 
-.. automodule:: pypots.forecasting.bttf
+.. automodule:: pypots.forecasting.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
+Module contents
+---------------
+
+.. automodule:: pypots.forecasting
    :members:
    :undoc-members:
    :show-inheritance:

diff --git a/pypots/classification/grud/data.py b/pypots/classification/grud/data.py
@@ -123,7 +123,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
         missing_mask = (~torch.isnan(X)).to(torch.float32)
         X_filledLOCF = self.locf._locf_torch(X.unsqueeze(dim=0)).squeeze()
         X = torch.nan_to_num(X)

diff --git a/pypots/classification/template/dataset.py → pypots/classification/template/data.py b/pypots/classification/template/dataset.py → pypots/classification/template/data.py
diff --git a/pypots/clustering/template/dataset.py → pypots/clustering/template/data.py b/pypots/clustering/template/dataset.py → pypots/clustering/template/data.py
diff --git a/pypots/clustering/vader/data.py b/pypots/clustering/vader/data.py
@@ -6,12 +6,12 @@
 # License: GLP-v3
 
 
-from typing import Union
+from typing import Union, Iterable
 
-from ..crli.data import DatasetForCRLI
+from ...data.base import BaseDataset
 
 
-class DatasetForVaDER(DatasetForCRLI):
+class DatasetForVaDER(BaseDataset):
     """Dataset class for model VaDER.
 
     Parameters
@@ -45,3 +45,9 @@ def __init__(
         file_type: str = "h5py",
     ):
         super().__init__(data, return_labels, file_type)
+
+    def _fetch_data_from_array(self, idx: int) -> Iterable:
+        return super()._fetch_data_from_array(idx)
+
+    def _fetch_data_from_file(self, idx: int) -> Iterable:
+        return super()._fetch_data_from_file(idx)
diff --git a/pypots/data/base.py b/pypots/data/base.py
@@ -204,13 +204,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             The collated data sample, a list including all necessary sample info.
         """
 
-        X = self.X[idx]
-        missing_mask = ~torch.isnan(X)
+        X = self.X[idx].to(torch.float32)
+        missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)
         sample = [
             torch.tensor(idx),
-            X.to(torch.float32),
-            missing_mask.to(torch.float32),
+            X,
+            missing_mask,
         ]
 
         if self.y is not None and self.return_labels:
@@ -279,13 +279,13 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
-        missing_mask = ~torch.isnan(X)
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+        missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)
         sample = [
             torch.tensor(idx),
-            X.to(torch.float32),
-            missing_mask.to(torch.float32),
+            X,
+            missing_mask,
         ]
 
         # if the dataset has labels and is for training, then fetch it from the file

diff --git a/pypots/data/saving.py b/pypots/data/saving.py
@@ -14,7 +14,11 @@
 from pypots.utils.logging import logger
 
 
-def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
+def save_dict_into_h5(
+    data_dict: dict,
+    saving_dir: str,
+    saving_name: str = "datasets.h5",
+) -> None:
     """Save the given data (in a dictionary) into the given h5 file.
 
     Parameters
@@ -25,6 +29,9 @@ def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
     saving_dir : str,
         The h5 file to save the data.
 
+    saving_name : str, optional (default="datasets.h5")
+        The final name of the saved h5 file.
+
     """
 
     def save_set(handle, name, data):
@@ -36,7 +43,7 @@ def save_set(handle, name, data):
             handle.create_dataset(name, data=data)
 
     create_dir_if_not_exist(saving_dir)
-    saving_path = os.path.join(saving_dir, "datasets.h5")
+    saving_path = os.path.join(saving_dir, saving_name)
     with h5py.File(saving_path, "w") as hf:
         for k, v in data_dict.items():
             save_set(hf, k, v)

diff --git a/pypots/forecasting/template/dataset.py → pypots/forecasting/template/data.py b/pypots/forecasting/template/dataset.py → pypots/forecasting/template/data.py
diff --git a/pypots/imputation/brits/data.py b/pypots/imputation/brits/data.py
@@ -59,14 +59,14 @@ def __init__(
 
             self.processed_data = {
                 "forward": {
-                    "X": forward_X,
-                    "missing_mask": forward_missing_mask,
-                    "delta": forward_delta,
+                    "X": forward_X.to(torch.float32),
+                    "missing_mask": forward_missing_mask.to(torch.float32),
+                    "delta": forward_delta.to(torch.float32),
                 },
                 "backward": {
-                    "X": backward_X,
-                    "missing_mask": backward_missing_mask,
-                    "delta": backward_delta,
+                    "X": backward_X.to(torch.float32),
+                    "missing_mask": backward_missing_mask.to(torch.float32),
+                    "delta": backward_delta.to(torch.float32),
                 },
             }
 
@@ -101,13 +101,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
         sample = [
             torch.tensor(idx),
             # for forward
-            self.processed_data["forward"]["X"][idx].to(torch.float32),
-            self.processed_data["forward"]["missing_mask"][idx].to(torch.float32),
-            self.processed_data["forward"]["delta"][idx].to(torch.float32),
+            self.processed_data["forward"]["X"][idx],
+            self.processed_data["forward"]["missing_mask"][idx],
+            self.processed_data["forward"]["delta"][idx],
             # for backward
-            self.processed_data["backward"]["X"][idx].to(torch.float32),
-            self.processed_data["backward"]["missing_mask"][idx].to(torch.float32),
-            self.processed_data["backward"]["delta"][idx].to(torch.float32),
+            self.processed_data["backward"]["X"][idx],
+            self.processed_data["backward"]["missing_mask"][idx],
+            self.processed_data["backward"]["delta"][idx],
         ]
 
         if self.y is not None and self.return_labels:
@@ -133,7 +133,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
         missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)
 

diff --git a/pypots/imputation/gpvae/data.py b/pypots/imputation/gpvae/data.py
@@ -10,7 +10,6 @@
 import torch
 
 from ...data.base import BaseDataset
-from ...data.utils import torch_parse_delta
 
 
 class DatasetForGPVAE(BaseDataset):
@@ -51,7 +50,7 @@ def __init__(
         if not isinstance(self.data, str):
             # calculate all delta here.
             missing_mask = (~torch.isnan(self.X)).type(torch.float32)
-            X = torch.nan_to_num(self.X)
+            X = torch.nan_to_num(self.X).to(torch.float32)
 
             self.processed_data = {
                 "X": X,
@@ -89,8 +88,8 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
         sample = [
             torch.tensor(idx),
             # for forward
-            self.processed_data["X"][idx].to(torch.float32),
-            self.processed_data["missing_mask"][idx].to(torch.float32),
+            self.processed_data["X"][idx],
+            self.processed_data["missing_mask"][idx],
         ]
 
         if self.y is not None and self.return_labels:
@@ -116,7 +115,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        X = torch.from_numpy(self.file_handle["X"][idx])
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
         missing_mask = (~torch.isnan(X)).to(torch.float32)
         X = torch.nan_to_num(X)