-
-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #215 from WenjieDu/wdu_dev
Replacing PyCorruptor with PyGrinder
- Loading branch information
Showing
16 changed files
with
92 additions
and
147 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,7 @@ Required Dependencies | |
* tensorboard | ||
* h5py | ||
* tsdb | ||
* pycorruptor | ||
* pygrinder | ||
|
||
|
||
Optional Dependencies | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,127 +5,33 @@ | |
# Created by Wenjie Du <[email protected]> | ||
# License: GLP-v3 | ||
|
||
|
||
from typing import Union | ||
|
||
import numpy as np | ||
import pycorruptor as corruptor | ||
import torch | ||
from tsdb import ( | ||
pickle_load as _pickle_load, | ||
pickle_dump as _pickle_dump, | ||
) | ||
|
||
pickle_load = _pickle_load | ||
pickle_dump = _pickle_dump | ||
|
||
|
||
def cal_missing_rate(X: Union[np.ndarray, torch.Tensor, list]) -> float: | ||
"""Calculate the missing rate of the given data. | ||
def _parse_delta_torch(missing_mask: torch.Tensor) -> torch.Tensor: | ||
"""Generate the time-gap matrix (i.e. the delta metrix) from the missing mask. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
X : | ||
The data to calculate missing rate. | ||
missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Binary masks indicate missing data (0 means missing values, 1 means observed values). | ||
Returns | ||
------- | ||
missing_rate : | ||
The missing rate of the given data. | ||
""" | ||
missing_rate = corruptor.cal_missing_rate(X) | ||
return missing_rate | ||
|
||
|
||
def masked_fill( | ||
X: Union[np.ndarray, torch.Tensor, list], | ||
mask: Union[np.ndarray, torch.Tensor, list], | ||
value: float, | ||
) -> Union[np.ndarray, torch.Tensor]: | ||
"""Fill the masked values in ``X`` according to ``mask`` with the given ``value``. | ||
delta : | ||
The delta matrix indicates the time gaps between observed values. | ||
With the same shape of missing_mask. | ||
Parameters | ||
References | ||
---------- | ||
X : | ||
The data to be filled. | ||
mask : | ||
The mask for filling the given data. | ||
value : | ||
The value to fill the masked values. | ||
Returns | ||
------- | ||
filled_X : | ||
The filled data. | ||
""" | ||
filled_X = corruptor.masked_fill(X, mask, value) | ||
return filled_X | ||
|
||
|
||
def mcar( | ||
X: Union[np.ndarray, torch.Tensor, list], | ||
rate: float, | ||
nan: float = 0, | ||
) -> Union[np.ndarray, torch.Tensor]: | ||
"""Generate missing values in the given data with MCAR (Missing Completely At Random) mechanism. | ||
Parameters | ||
---------- | ||
X : | ||
Data vector. If X has any missing values, they should be numpy.nan. | ||
rate : | ||
Artificially missing rate, rate of the observed values which will be artificially masked as missing. | ||
Note that, | ||
`rate` = (number of artificially missing values) / np.sum(~np.isnan(self.data)), | ||
not (number of artificially missing values) / np.product(self.data.shape), | ||
considering that the given data may already contain missing values, | ||
the latter way may be confusing because if the original missing rate >= `rate`, | ||
the function will do nothing, i.e. it won't play the role it has to be. | ||
nan : | ||
Value used to fill NaN values. | ||
Returns | ||
------- | ||
X_intact : array, | ||
Original data with missing values (nan) filled with given parameter `nan`, with observed values intact. | ||
X_intact is for loss calculation in the masked imputation task. | ||
X : array, | ||
Original X with artificial missing values. X is for model input. | ||
Both originally-missing and artificially-missing values are filled with given parameter `nan`. | ||
missing_mask : array, | ||
The mask indicates all missing values in X. | ||
In it, 1 indicates observed values, and 0 indicates missing values. | ||
.. [1] `Che, Zhengping, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan Liu. | ||
"Recurrent neural networks for multivariate time series with missing values." | ||
Scientific reports 8, no. 1 (2018): 6085. | ||
<https://www.nature.com/articles/s41598-018-24271-9.pdf>`_ | ||
indicating_mask : array, | ||
The mask indicates the artificially-missing values in X, namely missing parts different from X_intact. | ||
In it, 1 indicates artificially missing values, and other values are indicated as 0. | ||
""" | ||
X = corruptor.mcar(X, rate, nan) | ||
return X | ||
|
||
|
||
def torch_parse_delta(missing_mask: torch.Tensor) -> torch.Tensor: | ||
"""Generate time-gap (delta) matrix from missing masks. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
missing_mask : | ||
Binary masks indicate missing values. Shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Returns | ||
------- | ||
delta | ||
Delta matrix indicates time gaps of missing values. | ||
""" | ||
|
||
def cal_delta_for_single_sample(mask: torch.Tensor) -> torch.Tensor: | ||
|
@@ -156,18 +62,28 @@ def cal_delta_for_single_sample(mask: torch.Tensor) -> torch.Tensor: | |
return delta | ||
|
||
|
||
def numpy_parse_delta(missing_mask: np.ndarray) -> np.ndarray: | ||
"""Generate time-gap (delta) matrix from missing masks. Please refer to :cite:`che2018GRUD` for its math definition. | ||
def _parse_delta_numpy(missing_mask: np.ndarray) -> np.ndarray: | ||
"""Generate the time-gap matrix (i.e. the delta metrix) from the missing mask. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
missing_mask : | ||
Binary masks indicate missing values. Shape of [n_steps, n_features] or [n_samples, n_steps, n_features]. | ||
missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Binary masks indicate missing data (0 means missing values, 1 means observed values). | ||
Returns | ||
------- | ||
delta | ||
Delta matrix indicates time gaps of missing values. | ||
delta : | ||
The delta matrix indicates the time gaps between observed values. | ||
With the same shape of missing_mask. | ||
References | ||
---------- | ||
.. [1] `Che, Zhengping, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan Liu. | ||
"Recurrent neural networks for multivariate time series with missing values." | ||
Scientific reports 8, no. 1 (2018): 6085. | ||
<https://www.nature.com/articles/s41598-018-24271-9.pdf>`_ | ||
""" | ||
|
||
def cal_delta_for_single_sample(mask: np.ndarray) -> np.ndarray: | ||
|
@@ -194,6 +110,40 @@ def cal_delta_for_single_sample(mask: np.ndarray) -> np.ndarray: | |
return delta | ||
|
||
|
||
def parse_delta( | ||
missing_mask: Union[np.ndarray, torch.Tensor] | ||
) -> Union[np.ndarray, torch.Tensor]: | ||
"""Generate the time-gap matrix (i.e. the delta metrix) from the missing mask. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Binary masks indicate missing data (0 means missing values, 1 means observed values). | ||
Returns | ||
------- | ||
delta : | ||
The delta matrix indicates the time gaps between observed values. | ||
With the same shape of missing_mask. | ||
References | ||
---------- | ||
.. [1] `Che, Zhengping, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan Liu. | ||
"Recurrent neural networks for multivariate time series with missing values." | ||
Scientific reports 8, no. 1 (2018): 6085. | ||
<https://www.nature.com/articles/s41598-018-24271-9.pdf>`_ | ||
""" | ||
if isinstance(missing_mask, np.ndarray): | ||
delta = _parse_delta_numpy(missing_mask) | ||
elif isinstance(missing_mask, torch.Tensor): | ||
delta = _parse_delta_torch(missing_mask) | ||
else: | ||
raise RuntimeError | ||
return delta | ||
|
||
|
||
def sliding_window(time_series, window_len, sliding_len=None): | ||
"""Generate time series samples with sliding window method, truncating windows from time-series data | ||
with a given sequence length. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.