Merge pull request #357 from WenjieDu/dev

Implement forecasting CSDI and update the templates
WenjieDu · Apr 18, 2024 · 64d23fc · 64d23fc
2 parents cb1ae37 + 54a54cc
commit 64d23fc
Show file tree

Hide file tree

Showing 115 changed files with 2,780 additions and 1,237 deletions.
diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
@@ -78,8 +78,8 @@ jobs:
 
             - name: Test with pytest
               run: |
-                  python tests/global_test_config.py
                   rm -rf testing_results && rm -rf tests/__pycache__ && rm -rf tests/*/__pycache__
+                  python tests/global_test_config.py
                   python -m pytest -rA tests/*/* -s -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
 
             - name: Generate the LCOV report

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -31,6 +31,9 @@ build:
             - pip install ./TSDB_repo && pip install ./PyGrinder_repo && pip install .
 
         post_install:
+            # To fix the exception: This documentation is not using `furo.css` as the stylesheet.
+            #   If you have set `html_style` in your conf.py file, remove it.
+            - pip install sphinx==7.2.6
+            # this docutils version fixes issue#102, put it in post_install to avoid being
+            #   overwritten by other versions (like 0.19) while installing other packages
             - pip install docutils==0.20
-            # this version fixes issue#102, put it in post_install to avoid being
-            # overwritten by other versions (like 0.19) while installing other packages
diff --git a/README.md b/README.md
@@ -228,6 +228,7 @@ the same as we did in [SAITS paper](https://arxiv.org/pdf/2202.08516).**
 |       Neural Net       |    VaDER    |                         Variational Deep Embedding with Recurrence [^7]                         |   2019   |
 |  ***`Forecasting`***   |     🚥      |                                               🚥                                                |    🚥    |
 |        **Type**        |  **Abbr.**  |                           **Full name of the algorithm/model/paper**                            | **Year** |
+|       Neural Net       |    CSDI     |     Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation [^12]     |   2021   |
 |     Probabilistic      |    BTTF     |                           Bayesian Temporal Tensor Factorization [^8]                           |   2021   |
 
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -193,7 +193,7 @@ Imputation                     Neural Net       FEDformer (Frequency Enhanced De
 Imputation                     Neural Net       Informer (Beyond Efficient Transformer for Long Sequence Time-Series Forecasting)                         2021   :cite:`zhou2021informer`
 Imputation                     Neural Net       Autoformer (Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting)            2021   :cite:`wu2021autoformer`
 Imputation                     Neural Net       US-GAN (Unsupervised GAN for Multivariate Time Series Imputation)                                         2021   :cite:`miao2021SSGAN`
-Imputation                     Neural Net       CSDI (Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation)                  2021   :cite:`tashiro2021csdi`
+Imputation, Forecasting        Neural Net       CSDI (Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation)                  2021   :cite:`tashiro2021csdi`
 Imputation                     Neural Net       GP-VAE (Gaussian Process Variational Autoencoder)                                                         2020   :cite:`fortuin2020gpvae`
 Imputation, Classification     Neural Net       BRITS (Bidirectional Recurrent Imputation for Time Series)                                                2018   :cite:`cao2018BRITS`
 Imputation                     Neural Net       M-RNN (Multi-directional Recurrent Neural Network)                                                        2019   :cite:`yoon2019MRNN`

diff --git a/docs/pypots.data.rst b/docs/pypots.data.rst
@@ -1,10 +1,10 @@
 pypots.data package
 ===================
 
-pypots.data.base
+pypots.data.dataset
 -----------------------
 
-.. automodule:: pypots.data.base
+.. automodule:: pypots.data.dataset
    :members:
    :undoc-members:
    :show-inheritance:

diff --git a/pypots/base.py b/pypots/base.py
@@ -337,13 +337,13 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
         Parameters
         ----------
-        train_set : dict or str
+        train_set :
             The dataset for model training, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -352,7 +352,7 @@ def fit(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        val_set : dict or str
+        val_set :
             The dataset for model validating, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -361,7 +361,7 @@ def fit(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if train_set and val_set are path strings.
 
         """
@@ -371,13 +371,13 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
         Parameters
         ----------
-        test_set : dict or str
+        test_set :
             The dataset for model validating, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -386,12 +386,12 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict: dict
+        result_dict :
             Prediction results in a Python Dictionary for the given samples.
             It should be a dictionary including keys as 'imputation', 'classification', 'clustering', and 'forecasting'.
             For sure, only the keys that relevant tasks are supported by the model will be returned.
@@ -512,14 +512,14 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         raise NotImplementedError
 
     @abstractmethod
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
@@ -72,7 +72,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
@@ -106,15 +106,15 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
     @abstractmethod
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 
@@ -214,12 +214,12 @@ def __init__(
         self.n_classes = n_classes
 
     @abstractmethod
-    def _assemble_input_for_training(self, data) -> dict:
+    def _assemble_input_for_training(self, data: list) -> dict:
         """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list,
+        data :
             Input data from dataloader, should be list.
 
         Returns
@@ -230,12 +230,12 @@ def _assemble_input_for_training(self, data) -> dict:
         raise NotImplementedError
 
     @abstractmethod
-    def _assemble_input_for_validating(self, data) -> dict:
+    def _assemble_input_for_validating(self, data: list) -> dict:
         """Assemble the given data into a dictionary for validating input.
 
         Parameters
         ----------
-        data : list,
+        data :
             Data output from dataloader, should be list.
 
         Returns
@@ -246,7 +246,7 @@ def _assemble_input_for_validating(self, data) -> dict:
         raise NotImplementedError
 
     @abstractmethod
-    def _assemble_input_for_testing(self, data) -> dict:
+    def _assemble_input_for_testing(self, data: list) -> dict:
         """Assemble the given data into a dictionary for testing input.
 
         Notes
@@ -259,7 +259,7 @@ def _assemble_input_for_testing(self, data) -> dict:
 
         Parameters
         ----------
-        data : list,
+        data :
             Data output from dataloader, should be list.
 
         Returns
@@ -386,7 +386,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
@@ -420,15 +420,15 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
     @abstractmethod
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 

diff --git a/pypots/classification/brits/data.py b/pypots/classification/brits/data.py
@@ -17,7 +17,7 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -26,7 +26,7 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -35,14 +35,19 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_y: bool = True,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, False, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
diff --git a/pypots/classification/brits/model.py b/pypots/classification/brits/model.py
@@ -208,7 +208,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForBRITS(train_set, file_type=file_type)
@@ -239,10 +239,10 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = DatasetForBRITS(test_set, return_labels=False, file_type=file_type)
+        test_set = DatasetForBRITS(test_set, return_y=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -267,7 +267,7 @@ def predict(
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.