From 8614499c20438bdb9d4c921344baf39123854e50 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Tue, 13 Nov 2018 14:51:23 +0300 Subject: [PATCH] Proccesing for NaN values #231 (#236) --- analytics/analytics/models/drop_model.py | 13 ++++++++++--- analytics/analytics/models/general_model.py | 5 ++--- analytics/analytics/models/jump_model.py | 12 ++++++++++-- analytics/analytics/models/peak_model.py | 12 ++++++++++-- analytics/analytics/models/trough_model.py | 15 ++++++++++++--- analytics/analytics/utils/common.py | 14 ++++++++++++++ 6 files changed, 58 insertions(+), 13 deletions(-) diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 0f653df..7888ac1 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -39,8 +39,8 @@ class DropModel(Model): segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_data = data[segment_from_index: segment_to_index + 1] - - if len(segment_data) == 0: + percent_of_nans = segment_data.count(np.NaN) / len(segment_data) + if percent_of_nans > 0 or len(segment_data) == 0: continue segment_min = min(segment_data) segment_max = max(segment_data) @@ -164,6 +164,14 @@ class DropModel(Model): for segment in segments: if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] + percent_of_nans = convol_data.count(np.NaN) / len(convol_data) + if percent_of_nans > 0.5: + delete_list.append(segment) + continue + elif 0 < percent_of_nans <= 0.5: + nan_list = utils.find_nan_indexes(convol_data) + convol_data = utils.nan_to_zero(convol_data, nan_list) + pattern_data = utils.nan_to_zero(pattern_data, nan_list) conv = scipy.signal.fftconvolve(convol_data, pattern_data) upper_bound = self.state['convolve_max'] * 1.2 lower_bound = self.state['convolve_min'] * 0.8 @@ -181,5 +189,4 @@ class DropModel(Model): for item in delete_list: segments.remove(item) - return set(segments) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index dc74e3e..077be97 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -36,9 +36,9 @@ class GeneralModel(Model): if segment['labeled']: segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] - if len(segment_data) == 0: + percent_of_nans = segment_data.count(np.NaN) / len(segment_data) + if percent_of_nans > 0 or len(segment_data) == 0: continue x = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) self.ipats.append(x) @@ -110,7 +110,6 @@ class GeneralModel(Model): if len(segments) == 0 or len(self.ipats) == 0: return [] delete_list = [] - for val in segments: if self.all_conv[val] < self.state['convolve_min'] * 0.8: delete_list.append(val) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index ac0aef2..369bfb3 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -39,7 +39,8 @@ class JumpModel(Model): for segment in segments: if segment['labeled']: segment_from_index, segment_to_index, segment_data = parse_segment(segment, dataframe) - if len(segment_data) == 0: + percent_of_nans = segment_data.count(np.NaN) / len(segment_data) + if percent_of_nans > 0 or len(segment_data) == 0: continue segment_min = min(segment_data) segment_max = max(segment_data) @@ -169,6 +170,14 @@ class JumpModel(Model): for segment in segments: if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] + percent_of_nans = convol_data.count(np.NaN) / len(convol_data) + if percent_of_nans > 0.5: + delete_list.append(segment) + continue + elif 0 < percent_of_nans <= 0.5: + nan_list = utils.find_nan_indexes(convol_data) + convol_data = utils.nan_to_zero(convol_data, nan_list) + pattern_data = utils.nan_to_zero(pattern_data, nan_list) conv = scipy.signal.fftconvolve(convol_data, pattern_data) try: if max(conv) > upper_bound or max(conv) < lower_bound: @@ -179,7 +188,6 @@ class JumpModel(Model): delete_list.append(segment) else: delete_list.append(segment) - for item in delete_list: segments.remove(item) diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 678aba5..8356b73 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -37,7 +37,8 @@ class PeakModel(Model): segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_data = data[segment_from_index: segment_to_index + 1] - if len(segment_data) == 0: + percent_of_nans = segment_data.count(np.NaN) / len(segment_data) + if percent_of_nans > 0 or len(segment_data) == 0: continue segment_min = min(segment_data) segment_max = max(segment_data) @@ -125,6 +126,14 @@ class PeakModel(Model): if segment > self.state['WINDOW_SIZE']: convol_data = data[segment - self.state['WINDOW_SIZE']: segment + self.state['WINDOW_SIZE'] + 1] convol_data = convol_data - min(convol_data) + percent_of_nans = convol_data.count(np.NaN) / len(convol_data) + if percent_of_nans > 0.5: + delete_list.append(segment) + continue + elif 0 < percent_of_nans <= 0.5: + nan_list = utils.find_nan_indexes(convol_data) + convol_data = utils.nan_to_zero(convol_data, nan_list) + pattern_data = utils.nan_to_zero(pattern_data, nan_list) conv = scipy.signal.fftconvolve(convol_data, pattern_data) if max(conv) > self.state['convolve_max'] * 1.05 or max(conv) < self.state['convolve_min'] * 0.95: delete_list.append(segment) @@ -134,5 +143,4 @@ class PeakModel(Model): delete_list.append(segment) for item in delete_list: segments.remove(item) - return set(segments) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index e56d9ca..4cebc5c 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -36,9 +36,9 @@ class TroughModel(Model): if segment['labeled']: segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] - if len(segment_data) == 0: + percent_of_nans = segment_data.count(np.NaN) / len(segment_data) + if percent_of_nans > 0 or len(segment_data) == 0: continue segment_min = min(segment_data) segment_max = max(segment_data) @@ -64,7 +64,8 @@ class TroughModel(Model): segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_data = data[segment_from_index: segment_to_index + 1] - if len(segment_data) == 0: + percent_of_nans = segment_data.count(np.NaN) / len(segment_data) + if percent_of_nans > 0 or len(segment_data) == 0: continue del_min_index = segment_data.idxmin() deleted_trough = data[del_min_index - self.state['WINDOW_SIZE']: del_min_index + self.state['WINDOW_SIZE'] + 1] @@ -126,6 +127,14 @@ class TroughModel(Model): if segment > self.state['WINDOW_SIZE']: convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = convol_data - min(convol_data) + percent_of_nans = convol_data.count(np.NaN) / len(convol_data) + if percent_of_nans > 0.5: + delete_list.append(segment) + continue + elif 0 < percent_of_nans <= 0.5: + nan_list = utils.find_nan_indexes(convol_data) + convol_data = utils.nan_to_zero(convol_data, nan_list) + pattern_data = utils.nan_to_zero(pattern_data, nan_list) conv = scipy.signal.fftconvolve(convol_data, pattern_data) if max(conv) > self.state['convolve_max'] * 1.1 or max(conv) < self.state['convolve_min'] * 0.9: delete_list.append(segment) diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index a34d720..2eb8720 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -250,3 +250,17 @@ def best_pat(pat_list, data, dir): ind = i new_pat_list.append(ind) return new_pat_list + +def find_nan_indexes(segment): + nan_list = np.isnan(segment) + nan_indexes = [] + for i, val in enumerate(nan_list): + if val: + nan_indexes.append(i) + return nan_indexes + +def nan_to_zero(segment, nan_list): + for val in nan_list: + segment[val] = 0 + return segment +