Skip to content

Commit

Permalink
matrix-profile-foundation#38 Dealing with missing values before runni…
Browse files Browse the repository at this point in the history
…ng algorithms.
  • Loading branch information
LuYueee committed Sep 12, 2020
1 parent 9069dca commit e14860d
Showing 1 changed file with 191 additions and 0 deletions.
191 changes: 191 additions & 0 deletions matrixprofile/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,194 @@
from matrixprofile.algorithms.skimp import maximum_subsequence


def is_subsequence_constant(subsequence):
"""
Determines whether the given time series subsequence is an array of constants.
Parameters
----------
subsequence : array_like
The time series subsequence to analyze.
Returns
-------
is_constant : bool
A boolean value indicating whether the given subsequence is an array of constants.
"""
if not core.is_array_like(subsequence):
raise ValueError('subsequence is not array like!')

temp = core.to_np_array(subsequence)
is_constant = np.all(temp == temp[0])

return is_constant


def add_noise_to_series(series):
"""
Adds noise to the given time series.
Parameters
----------
series : array_like
The time series subsequence to be added noise.
Returns
-------
temp : array_like
The time series subsequence after being added noise.
"""
if not core.is_array_like(series):
raise ValueError('series is not array like!')

temp = np.copy(core.to_np_array(series))
noise = np.random.uniform(0, 0.0000009, size=len(temp))
temp = temp + noise

return temp


def impute_missing(ts, window, method='mean', direction='forward'):
"""
Imputes missing data in time series.
Parameters
----------
ts : array_like
The time series to be handled.
window : int
The window size to compute the mean/median/minimum value/maximum
value.
method : string, Default = 'mean'
A string indicating the data imputation method, which should be
'mean', 'median', 'min' or 'max'.
direction : string, Default = 'forward'
A string indicating the data imputation direction, which should be
'forward', 'fwd', 'f', 'backward', 'bwd', 'b'. If the direction is
forward, we use previous data for imputation; if the direction is
backward, we use subsequent data for imputation.
Returns
-------
temp : array_like
The time series after being imputed missing data.
"""
method_map = {
'mean': np.mean,
'median': np.median,
'min': np.min,
'max': np.max
}

directions = ['forward', 'fwd', 'f', 'backward', 'bwd', 'b']

if not core.is_array_like(ts):
raise ValueError('ts is not array like!')

if method not in method_map:
raise ValueError('invalid imputation method! valid include options: {}'.format(', '.join(method_map.keys())))

if direction not in directions:
raise ValueError('invalid imputation direction! valid include options: ' + ', '.join(directions))

temp = np.copy(core.to_np_array(ts))
nan_infs = core.nan_inf_indices(temp)
func = method_map[method]

# Deal with missing data at the beginning and end of time series
if np.isnan(temp[0]) or np.isinf(temp[0]):
temp[0] = temp[~nan_infs][0]
nan_infs = core.nan_inf_indices(temp)

if np.isnan(temp[-1]) or np.isinf(temp[-1]):
temp[-1] = temp[~nan_infs][-1]
nan_infs = core.nan_inf_indices(temp)

# Use previous data for imputation / fills in data in a forward direction
if direction in directions[:3]:
for index in range(len(temp) - window + 1):
start = index
end = index + window
has_missing = np.any(nan_infs[index:index + window])

if has_missing:
subseq = temp[start:end]
nan_infs_subseq = nan_infs[start:end]
stat = func(temp[start:end][~nan_infs_subseq])
temp[start:end][nan_infs_subseq] = stat
# Update nan_infs after array 'temp' is changed
nan_infs = core.nan_inf_indices(temp)

# Use subsequent data for imputation / fills in data in a backward direction
elif direction in directions[3:]:
for index in range(len(temp) - window + 1, 0, -1):
start = index
end = index + window
has_missing = np.any(nan_infs[index:index + window])

if has_missing:
subseq = temp[start:end]
nan_infs_subseq = nan_infs[start:end]
stat = func(temp[start:end][~nan_infs_subseq])
temp[start:end][nan_infs_subseq] = stat
# Update nan_infs after array 'temp' is changed
nan_infs = core.nan_inf_indices(temp)

return temp


def preprocess(ts, window, impute_method='mean', impute_direction='forward', add_noise=True):
"""
Preprocesses the given time series by adding noise and imputing missing data.
Parameters
----------
ts : array_like
The time series to be preprocessed.
window : int
The window size to compute the mean/median/minimum value/maximum
value.
method : string, Default = 'mean'
A string indicating the data imputation method, which should be
'mean', 'median', 'min' or 'max'.
direction : string, Default = 'forward'
A string indicating the data imputation direction, which should be
'forward', 'fwd', 'f', 'backward', 'bwd', 'b'. If the direction is
forward, we use previous data for imputation; if the direction is
backward, we use subsequent data for imputation.
add_noise : bool, Default = True
A boolean value indicating whether noise needs to be added into the time series.
Returns
-------
temp : array_like
The time series after being preprocessed.
"""
if not core.is_array_like(ts):
raise ValueError('ts is not array like!')

temp = np.copy(core.to_np_array(ts))

# impute missing
temp = impute_missing(temp, window, method=impute_method, direction=impute_direction)

# handle constant values
if add_noise:
for index in range(len(temp) - window + 1):
start = index
end = index + window
subseq = temp[start:end]

if is_subsequence_constant(subseq):
temp[start:end] = add_noise_to_series(subseq)

return temp


def compute(ts, windows=None, query=None, sample_pct=1, threshold=0.98,
n_jobs=1):
"""
Expand Down Expand Up @@ -83,6 +271,9 @@ def compute(ts, windows=None, query=None, sample_pct=1, threshold=0.98,
if core.is_array_like(windows) and len(windows) == 1:
windows = windows[0]

# preprocess the time series
ts = preprocess(ts, 4)

# compute the upper window and pmp
if no_windows and has_threshold:
profile = maximum_subsequence(ts, threshold, include_pmp=True)
Expand Down

0 comments on commit e14860d

Please sign in to comment.