Skip to content

Commit

Permalink
Refactor testing cases (#189)
Browse files Browse the repository at this point in the history
* refactor: clear up testing cases;

* refactor: refactor code in Dataset classes for models;

* refactor: adjust testing workflows according to refactored test cases;

* fix: turn missing_mask into torch.float;

* fix: error in BTTF testing case;

* feat: using pip to manage dependencies in CI testing workflow, and using conda in Daily testing workflow;
  • Loading branch information
WenjieDu authored Sep 21, 2023
1 parent 9bfffa1 commit ca6e2cd
Show file tree
Hide file tree
Showing 59 changed files with 2,111 additions and 2,227 deletions.
59 changes: 38 additions & 21 deletions .github/workflows/testing_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,60 @@ jobs:
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
shell: bash {0}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.7", "3.10"]
torch-version: ["1.13.1"]

steps:
- name: Check out the repo code
uses: actions/checkout@v3

- name: Set up Conda
uses: conda-incubator/setup-miniconda@v2
- name: Determine the Python version
uses: haya14busa/action-cond@v1
id: condval
with:
activate-environment: pypots-test
python-version: ${{ matrix.python-version }}
environment-file: tests/environment_for_conda_test.yml
auto-activate-base: false
cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
# Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
if_true: "3.7.16"
if_false: ${{ matrix.python-version }}

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ steps.condval.outputs.value }}
check-latest: true
cache: pip
cache-dependency-path: |
setup.cfg
- name: Install PyTorch ${{ matrix.torch-version }}+cpu
# we have to install torch in advance because torch_sparse needs it for compilation,
# refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
run: |
which python
which pip
python -m pip install --upgrade pip
pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
python -c "import torch; print('PyTorch:', torch.__version__)"
- name: Install other dependencies
run: |
pip install pypots
pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
pip install -e ".[dev]"
- name: Fetch the test environment details
run: |
which python
conda info
conda list
pip list
- name: Test with pytest
run: |
# run tests separately here due to Segmentation Fault in test_clustering when run all in
# one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
python -m pytest -rA tests/test_classification.py -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_imputation.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_clustering.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_forecasting.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_optim.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_data.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_utils.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/test_cli.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
coverage run --source=pypots -m pytest -rA tests/*/*
- name: Generate the LCOV report
run: |
Expand All @@ -61,4 +78,4 @@ jobs:
uses: coverallsapp/github-action@master
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
path-to-lcov: 'coverage.lcov'
path-to-lcov: "coverage.lcov"
60 changes: 21 additions & 39 deletions .github/workflows/testing_daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,61 +10,43 @@ jobs:
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash {0}
shell: bash -l {0}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
python-version: ["3.7", "3.8", "3.9", "3.10"]
torch-version: ["1.13.1"]
python-version: ["3.7", "3.10"]

steps:
- name: Check out the repo code
uses: actions/checkout@v3

- name: Determine the Python version
uses: haya14busa/action-cond@v1
id: condval
- name: Set up Conda
uses: conda-incubator/setup-miniconda@v2
with:
cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
# Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
if_true: "3.7.16"
if_false: ${{ matrix.python-version }}

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ steps.condval.outputs.value }}
check-latest: true
cache: pip
cache-dependency-path: |
setup.cfg
- name: Install PyTorch ${{ matrix.torch-version }}+cpu
# we have to install torch in advance because torch_sparse needs it for compilation,
# refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
run: |
which python
which pip
python -m pip install --upgrade pip
pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
python -c "import torch; print('PyTorch:', torch.__version__)"
- name: Install other dependencies
run: |
pip install pypots
pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
pip install -e ".[dev]"
activate-environment: pypots-test
python-version: ${{ matrix.python-version }}
environment-file: tests/environment_for_conda_test.yml
auto-activate-base: false

- name: Fetch the test environment details
run: |
which python
pip list
conda info
conda list
- name: Test with pytest
run: |
coverage run --source=pypots -m pytest --ignore tests/test_training_on_multi_gpus.py
# ignore the test_training_on_multi_gpus.py because it requires multiple GPUs which are not available on GitHub Actions
# run tests separately here due to Segmentation Fault in test_clustering when run all in
# one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
python -m pytest -rA tests/classification/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/imputation/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/clustering/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/forecasting/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/optim/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/data/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/utils/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
python -m pytest -rA tests/cli/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- name: Generate the LCOV report
run: |
Expand All @@ -74,4 +56,4 @@ jobs:
uses: coverallsapp/github-action@master
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
path-to-lcov: "coverage.lcov"
path-to-lcov: 'coverage.lcov'
24 changes: 22 additions & 2 deletions docs/pypots.forecasting.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
pypots.forecasting package
==========================

Subpackages
-----------

pypots.forecasting.bttf module
.. toctree::
:maxdepth: 4

pypots.forecasting.bttf
pypots.forecasting.template

Submodules
----------

pypots.forecasting.base module
------------------------------

.. automodule:: pypots.forecasting.bttf
.. automodule:: pypots.forecasting.base
:members:
:undoc-members:
:show-inheritance:
:inherited-members:

Module contents
---------------

.. automodule:: pypots.forecasting
:members:
:undoc-members:
:show-inheritance:
Expand Down
2 changes: 1 addition & 1 deletion pypots/classification/grud/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()

X = torch.from_numpy(self.file_handle["X"][idx])
X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X_filledLOCF = self.locf._locf_torch(X.unsqueeze(dim=0)).squeeze()
X = torch.nan_to_num(X)
Expand Down
File renamed without changes.
File renamed without changes.
12 changes: 9 additions & 3 deletions pypots/clustering/vader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
# License: GLP-v3


from typing import Union
from typing import Union, Iterable

from ..crli.data import DatasetForCRLI
from ...data.base import BaseDataset


class DatasetForVaDER(DatasetForCRLI):
class DatasetForVaDER(BaseDataset):
"""Dataset class for model VaDER.
Parameters
Expand Down Expand Up @@ -45,3 +45,9 @@ def __init__(
file_type: str = "h5py",
):
super().__init__(data, return_labels, file_type)

def _fetch_data_from_array(self, idx: int) -> Iterable:
return super()._fetch_data_from_array(idx)

def _fetch_data_from_file(self, idx: int) -> Iterable:
return super()._fetch_data_from_file(idx)
16 changes: 8 additions & 8 deletions pypots/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
The collated data sample, a list including all necessary sample info.
"""

X = self.X[idx]
missing_mask = ~torch.isnan(X)
X = self.X[idx].to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)
sample = [
torch.tensor(idx),
X.to(torch.float32),
missing_mask.to(torch.float32),
X,
missing_mask,
]

if self.y is not None and self.return_labels:
Expand Down Expand Up @@ -279,13 +279,13 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()

X = torch.from_numpy(self.file_handle["X"][idx])
missing_mask = ~torch.isnan(X)
X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)
sample = [
torch.tensor(idx),
X.to(torch.float32),
missing_mask.to(torch.float32),
X,
missing_mask,
]

# if the dataset has labels and is for training, then fetch it from the file
Expand Down
11 changes: 9 additions & 2 deletions pypots/data/saving.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
from pypots.utils.logging import logger


def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
def save_dict_into_h5(
data_dict: dict,
saving_dir: str,
saving_name: str = "datasets.h5",
) -> None:
"""Save the given data (in a dictionary) into the given h5 file.
Parameters
Expand All @@ -25,6 +29,9 @@ def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
saving_dir : str,
The h5 file to save the data.
saving_name : str, optional (default="datasets.h5")
The final name of the saved h5 file.
"""

def save_set(handle, name, data):
Expand All @@ -36,7 +43,7 @@ def save_set(handle, name, data):
handle.create_dataset(name, data=data)

create_dir_if_not_exist(saving_dir)
saving_path = os.path.join(saving_dir, "datasets.h5")
saving_path = os.path.join(saving_dir, saving_name)
with h5py.File(saving_path, "w") as hf:
for k, v in data_dict.items():
save_set(hf, k, v)
Expand Down
File renamed without changes.
26 changes: 13 additions & 13 deletions pypots/imputation/brits/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ def __init__(

self.processed_data = {
"forward": {
"X": forward_X,
"missing_mask": forward_missing_mask,
"delta": forward_delta,
"X": forward_X.to(torch.float32),
"missing_mask": forward_missing_mask.to(torch.float32),
"delta": forward_delta.to(torch.float32),
},
"backward": {
"X": backward_X,
"missing_mask": backward_missing_mask,
"delta": backward_delta,
"X": backward_X.to(torch.float32),
"missing_mask": backward_missing_mask.to(torch.float32),
"delta": backward_delta.to(torch.float32),
},
}

Expand Down Expand Up @@ -101,13 +101,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
sample = [
torch.tensor(idx),
# for forward
self.processed_data["forward"]["X"][idx].to(torch.float32),
self.processed_data["forward"]["missing_mask"][idx].to(torch.float32),
self.processed_data["forward"]["delta"][idx].to(torch.float32),
self.processed_data["forward"]["X"][idx],
self.processed_data["forward"]["missing_mask"][idx],
self.processed_data["forward"]["delta"][idx],
# for backward
self.processed_data["backward"]["X"][idx].to(torch.float32),
self.processed_data["backward"]["missing_mask"][idx].to(torch.float32),
self.processed_data["backward"]["delta"][idx].to(torch.float32),
self.processed_data["backward"]["X"][idx],
self.processed_data["backward"]["missing_mask"][idx],
self.processed_data["backward"]["delta"][idx],
]

if self.y is not None and self.return_labels:
Expand All @@ -133,7 +133,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()

X = torch.from_numpy(self.file_handle["X"][idx])
X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)

Expand Down
9 changes: 4 additions & 5 deletions pypots/imputation/gpvae/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import torch

from ...data.base import BaseDataset
from ...data.utils import torch_parse_delta


class DatasetForGPVAE(BaseDataset):
Expand Down Expand Up @@ -51,7 +50,7 @@ def __init__(
if not isinstance(self.data, str):
# calculate all delta here.
missing_mask = (~torch.isnan(self.X)).type(torch.float32)
X = torch.nan_to_num(self.X)
X = torch.nan_to_num(self.X).to(torch.float32)

self.processed_data = {
"X": X,
Expand Down Expand Up @@ -89,8 +88,8 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
sample = [
torch.tensor(idx),
# for forward
self.processed_data["X"][idx].to(torch.float32),
self.processed_data["missing_mask"][idx].to(torch.float32),
self.processed_data["X"][idx],
self.processed_data["missing_mask"][idx],
]

if self.y is not None and self.return_labels:
Expand All @@ -116,7 +115,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
if self.file_handle is None:
self.file_handle = self._open_file_handle()

X = torch.from_numpy(self.file_handle["X"][idx])
X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
missing_mask = (~torch.isnan(X)).to(torch.float32)
X = torch.nan_to_num(X)

Expand Down
Loading

0 comments on commit ca6e2cd

Please sign in to comment.