From 99c9431f5d3d0c5080201c95361498b76bdc809f Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Mon, 21 Jan 2019 23:54:35 +0300 Subject: [PATCH] IndexError: list index out of range if segment has NaN #242 (#349) add threshold in model --- analytics/analytics/models/drop_model.py | 13 ++++++------- analytics/analytics/models/general_model.py | 13 ++++++------- analytics/analytics/models/jump_model.py | 13 ++++++------- analytics/analytics/models/model.py | 21 ++++++++++++++++----- analytics/analytics/models/peak_model.py | 13 ++++++------- analytics/analytics/models/trough_model.py | 14 ++++++-------- analytics/analytics/utils/segments.py | 2 +- 7 files changed, 47 insertions(+), 42 deletions(-) diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index e60fb79..0b8f1c2 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -37,10 +37,9 @@ class DropModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0 or len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') confidence = utils.find_confidence(segment_data) confidences.append(confidence) segment_cent_index, drop_height, drop_length = utils.find_parameters(segment_data, segment_from_index, 'drop') @@ -57,9 +56,9 @@ class DropModel(Model): del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - if len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') segment_cent_index = utils.find_parameters(segment_data, segment_from_index, 'drop')[0] deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) deleted_drop = utils.subtract_min_without_nan(deleted_drop) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index 265bcd8..cdc94d1 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -34,10 +34,9 @@ class GeneralModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0 or len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') center_ind = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) self.ipats.append(center_ind) segment_data = utils.get_interval(data, center_ind, self.state['WINDOW_SIZE']) @@ -50,9 +49,9 @@ class GeneralModel(Model): del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - if len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') del_mid_index = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) deleted_pat = utils.subtract_min_without_nan(segment_data) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index 85c8e0d..a666fc9 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -38,10 +38,9 @@ class JumpModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0 or len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') confidence = utils.find_confidence(segment_data) confidences.append(confidence) segment_cent_index, jump_height, jump_length = utils.find_parameters(segment_data, segment_from_index, 'jump') @@ -58,9 +57,9 @@ class JumpModel(Model): del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - if len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') segment_cent_index = utils.find_parameters(segment_data, segment_from_index, 'jump')[0] deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) deleted_jump = utils.subtract_min_without_nan(labeled_jump) diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index 0f8ed13..6d57cda 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -24,18 +24,29 @@ class Model(ABC): self.segments = segments segment_length_list = [] + filtered_segments = [] for segment in self.segments: - if segment['labeled']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - + if segment['labeled'] or segment['deleted']: + parse_segment_dict = utils.parse_segment(segment, dataframe) + segment_from_index = parse_segment_dict.get('from') + segment_to_index = parse_segment_dict.get('to') + segment_data = parse_segment_dict.get('data') + percent_of_nans = segment_data.isnull().sum() / len(segment_data) + if percent_of_nans > 0.1 or len(segment_data) == 0: + continue + if percent_of_nans > 0: + nan_list = utils.find_nan_indexes(segment_data) + segment_data = utils.nan_to_zero(segment_data, nan_list) + segment.update({'from': segment_from_index, 'to': segment_to_index, 'data': segment_data}) segment_length = abs(segment_to_index - segment_from_index) segment_length_list.append(segment_length) + filtered_segments.append(segment) + if len(segment_length_list) > 0: self.state['WINDOW_SIZE'] = math.ceil(max(segment_length_list) / 2) else: self.state['WINDOW_SIZE'] = 0 - self.do_fit(dataframe, segments) + self.do_fit(dataframe, filtered_segments) return self.state def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict: diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 98642e3..99b5af8 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -35,10 +35,9 @@ class PeakModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0 or len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') confidence = utils.find_confidence(segment_data) confidences.append(confidence) segment_max_index = segment_data.idxmax() @@ -53,9 +52,9 @@ class PeakModel(Model): del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - if len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') del_max_index = segment_data.idxmax() deleted_peak = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE']) deleted_peak = utils.subtract_min_without_nan(deleted_peak) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index 2d062a0..4e9639d 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -35,10 +35,9 @@ class TroughModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0 or len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') confidence = utils.find_confidence(segment_data) confidences.append(confidence) segment_min_index = segment_data.idxmin() @@ -53,10 +52,9 @@ class TroughModel(Model): del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0 or len(segment_data) == 0: - continue + segment_from_index = segment.get('from') + segment_to_index = segment.get('to') + segment_data = segment.get('data') del_min_index = segment_data.idxmin() deleted_trough = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE']) deleted_trough = utils.subtract_min_without_nan(deleted_trough) diff --git a/analytics/analytics/utils/segments.py b/analytics/analytics/utils/segments.py index 8d75067..9455529 100644 --- a/analytics/analytics/utils/segments.py +++ b/analytics/analytics/utils/segments.py @@ -6,4 +6,4 @@ def parse_segment(segment: dict, dataframe: pd.DataFrame): start = timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) end = timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) data = dataframe['value'][start: end + 1] - return start, end, data + return {'from': start, 'to': end, 'data': data}