-
-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: removing modules from tsdb and pygrinder, now directly usin…
…g them;
- Loading branch information
Showing
7 changed files
with
79 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,127 +5,33 @@ | |
# Created by Wenjie Du <[email protected]> | ||
# License: GLP-v3 | ||
|
||
|
||
from typing import Union | ||
|
||
import numpy as np | ||
import pygrinder | ||
import torch | ||
from tsdb import ( | ||
pickle_load as _pickle_load, | ||
pickle_dump as _pickle_dump, | ||
) | ||
|
||
pickle_load = _pickle_load | ||
pickle_dump = _pickle_dump | ||
|
||
|
||
def cal_missing_rate(X: Union[np.ndarray, torch.Tensor, list]) -> float: | ||
"""Calculate the missing rate of the given data. | ||
Parameters | ||
---------- | ||
X : | ||
The data to calculate missing rate. | ||
|
||
Returns | ||
------- | ||
missing_rate : | ||
The missing rate of the given data. | ||
""" | ||
missing_rate = pygrinder.cal_missing_rate(X) | ||
return missing_rate | ||
|
||
|
||
def masked_fill( | ||
X: Union[np.ndarray, torch.Tensor, list], | ||
mask: Union[np.ndarray, torch.Tensor, list], | ||
value: float, | ||
) -> Union[np.ndarray, torch.Tensor]: | ||
"""Fill the masked values in ``X`` according to ``mask`` with the given ``value``. | ||
|
||
Parameters | ||
---------- | ||
X : | ||
The data to be filled. | ||
mask : | ||
The mask for filling the given data. | ||
value : | ||
The value to fill the masked values. | ||
Returns | ||
------- | ||
filled_X : | ||
The filled data. | ||
""" | ||
filled_X = pygrinder.masked_fill(X, mask, value) | ||
return filled_X | ||
|
||
|
||
def mcar( | ||
X: Union[np.ndarray, torch.Tensor, list], | ||
p: float, | ||
nan: float = 0, | ||
) -> Union[np.ndarray, torch.Tensor]: | ||
"""Create completely random missing values (MCAR case). | ||
def _parse_delta_torch(missing_mask: torch.Tensor) -> torch.Tensor: | ||
"""Generate the time-gap matrix (i.e. the delta metrix) from the missing mask. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
X : array, | ||
Data vector. If X has any missing values, they should be numpy.nan. | ||
p : float, in (0,1), | ||
The probability that values may be masked as missing completely at random. | ||
Note that the values are randomly selected no matter if they are originally missing or observed. | ||
If the selected values are originally missing, they will be kept as missing. | ||
If the selected values are originally observed, they will be masked as missing. | ||
Therefore, if the given X already contains missing data, the final missing rate in the output X could be | ||
in range [original_missing_rate, original_missing_rate+rate], but not strictly equal to | ||
`original_missing_rate+rate`. Because the selected values to be artificially masked out may be originally | ||
missing, and the masking operation on the values will do nothing. | ||
nan : int/float, optional, default=0 | ||
Value used to fill NaN values. | ||
missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Binary masks indicate missing data (0 means missing values, 1 means observed values). | ||
Returns | ||
------- | ||
X_intact : array, | ||
Original data with missing values (nan) filled with given parameter `nan`, with observed values intact. | ||
X_intact is for loss calculation in the masked imputation task. | ||
X : array, | ||
Original X with artificial missing values. X is for model input. | ||
Both originally-missing and artificially-missing values are filled with given parameter `nan`. | ||
delta : | ||
The delta matrix indicates the time gaps between observed values. | ||
With the same shape of missing_mask. | ||
missing_mask : array, | ||
The mask indicates all missing values in X. | ||
In it, 1 indicates observed values, and 0 indicates missing values. | ||
indicating_mask : array, | ||
The mask indicates the artificially-missing values in X, namely missing parts different from X_intact. | ||
In it, 1 indicates artificially missing values, and other values are indicated as 0. | ||
""" | ||
X = pygrinder.mcar(X, p, nan) | ||
return X | ||
|
||
|
||
def torch_parse_delta(missing_mask: torch.Tensor) -> torch.Tensor: | ||
"""Generate time-gap (delta) matrix from missing masks. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
References | ||
---------- | ||
missing_mask : | ||
Binary masks indicate missing values. Shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
.. [1] `Che, Zhengping, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan Liu. | ||
"Recurrent neural networks for multivariate time series with missing values." | ||
Scientific reports 8, no. 1 (2018): 6085. | ||
<https://www.nature.com/articles/s41598-018-24271-9.pdf>`_ | ||
Returns | ||
------- | ||
delta | ||
Delta matrix indicates time gaps of missing values. | ||
""" | ||
|
||
def cal_delta_for_single_sample(mask: torch.Tensor) -> torch.Tensor: | ||
|
@@ -156,18 +62,28 @@ def cal_delta_for_single_sample(mask: torch.Tensor) -> torch.Tensor: | |
return delta | ||
|
||
|
||
def numpy_parse_delta(missing_mask: np.ndarray) -> np.ndarray: | ||
"""Generate time-gap (delta) matrix from missing masks. Please refer to :cite:`che2018GRUD` for its math definition. | ||
def _parse_delta_numpy(missing_mask: np.ndarray) -> np.ndarray: | ||
"""Generate the time-gap matrix (i.e. the delta metrix) from the missing mask. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
missing_mask : | ||
Binary masks indicate missing values. Shape of [n_steps, n_features] or [n_samples, n_steps, n_features]. | ||
missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Binary masks indicate missing data (0 means missing values, 1 means observed values). | ||
Returns | ||
------- | ||
delta | ||
Delta matrix indicates time gaps of missing values. | ||
delta : | ||
The delta matrix indicates the time gaps between observed values. | ||
With the same shape of missing_mask. | ||
References | ||
---------- | ||
.. [1] `Che, Zhengping, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan Liu. | ||
"Recurrent neural networks for multivariate time series with missing values." | ||
Scientific reports 8, no. 1 (2018): 6085. | ||
<https://www.nature.com/articles/s41598-018-24271-9.pdf>`_ | ||
""" | ||
|
||
def cal_delta_for_single_sample(mask: np.ndarray) -> np.ndarray: | ||
|
@@ -194,6 +110,40 @@ def cal_delta_for_single_sample(mask: np.ndarray) -> np.ndarray: | |
return delta | ||
|
||
|
||
def parse_delta( | ||
missing_mask: Union[np.ndarray, torch.Tensor] | ||
) -> Union[np.ndarray, torch.Tensor]: | ||
"""Generate the time-gap matrix (i.e. the delta metrix) from the missing mask. | ||
Please refer to :cite:`che2018GRUD` for its math definition. | ||
Parameters | ||
---------- | ||
missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features] | ||
Binary masks indicate missing data (0 means missing values, 1 means observed values). | ||
Returns | ||
------- | ||
delta : | ||
The delta matrix indicates the time gaps between observed values. | ||
With the same shape of missing_mask. | ||
References | ||
---------- | ||
.. [1] `Che, Zhengping, Sanjay Purushotham, Kyunghyun Cho, David Sontag, and Yan Liu. | ||
"Recurrent neural networks for multivariate time series with missing values." | ||
Scientific reports 8, no. 1 (2018): 6085. | ||
<https://www.nature.com/articles/s41598-018-24271-9.pdf>`_ | ||
""" | ||
if isinstance(missing_mask, np.ndarray): | ||
delta = _parse_delta_numpy(missing_mask) | ||
elif isinstance(missing_mask, torch.Tensor): | ||
delta = _parse_delta_torch(missing_mask) | ||
else: | ||
raise RuntimeError | ||
return delta | ||
|
||
|
||
def sliding_window(time_series, window_len, sliding_len=None): | ||
"""Generate time series samples with sliding window method, truncating windows from time-series data | ||
with a given sequence length. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters