From 5ab3ff64dd9830754130d2cd5bdae0ab5e980fd6 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Tue, 15 Jan 2019 15:50:01 +0300 Subject: [PATCH] Move data cropping to the models #335 (#336) --- analytics/analytics/analytic_unit_manager.py | 3 --- analytics/analytics/models/drop_model.py | 6 ++++-- analytics/analytics/models/general_model.py | 6 ++++-- analytics/analytics/models/jump_model.py | 6 ++++-- analytics/analytics/models/peak_model.py | 6 ++++-- analytics/analytics/models/trough_model.py | 6 ++++-- analytics/analytics/utils/common.py | 6 +++++- analytics/tests/test_dataset.py | 8 ++++---- 8 files changed, 29 insertions(+), 18 deletions(-) diff --git a/analytics/analytics/analytic_unit_manager.py b/analytics/analytics/analytic_unit_manager.py index 4025951..b2489ae 100644 --- a/analytics/analytics/analytic_unit_manager.py +++ b/analytics/analytics/analytic_unit_manager.py @@ -33,9 +33,6 @@ def prepare_data(data: list): data = pd.DataFrame(data, columns=['timestamp', 'value']) data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms') data.fillna(value = np.nan, inplace = True) - if not np.isnan(data['value'].min()): - data['value'] = data['value'] - min(data['value']) - return data diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 78d010b..e60fb79 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -28,7 +28,8 @@ class DropModel(Model): } def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] confidences = [] convolve_list = [] drop_height_list = [] @@ -101,7 +102,8 @@ class DropModel(Model): self.state['conv_del_max'] = self.state['WINDOW_SIZE'] def do_detect(self, dataframe: pd.DataFrame) -> list: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1) return self.__filter_detection(possible_drops, data) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index 58389e2..265bcd8 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -28,7 +28,8 @@ class GeneralModel(Model): self.all_conv = [] def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] convolve_list = [] patterns_list = [] for segment in segments: @@ -79,7 +80,8 @@ class GeneralModel(Model): self.state['conv_del_max'] = self.state['WINDOW_SIZE'] def do_detect(self, dataframe: pd.DataFrame) -> list: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] pat_data = self.model_gen y = max(pat_data) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index 77c9f88..85c8e0d 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -29,7 +29,8 @@ class JumpModel(Model): } def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] confidences = [] convolve_list = [] jump_height_list = [] @@ -102,7 +103,8 @@ class JumpModel(Model): self.state['conv_del_max'] = self.state['WINDOW_SIZE'] def do_detect(self, dataframe: pd.DataFrame) -> list: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) return self.__filter_detection(possible_jumps, data) diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index a0c8561..98642e3 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -28,7 +28,8 @@ class PeakModel(Model): } def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] confidences = [] convolve_list = [] patterns_list = [] @@ -87,7 +88,8 @@ class PeakModel(Model): self.state['conv_del_max'] = self.state['WINDOW_SIZE'] def do_detect(self, dataframe: pd.DataFrame): - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data all_maxs = argrelextrema(np.array(data), np.greater)[0] diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index da2f8cc..2d062a0 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -28,7 +28,8 @@ class TroughModel(Model): } def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] confidences = [] convolve_list = [] patterns_list = [] @@ -88,7 +89,8 @@ class TroughModel(Model): self.state['conv_del_max'] = self.state['WINDOW_SIZE'] def do_detect(self, dataframe: pd.DataFrame): - data = dataframe['value'] + data = utils.cut_dataframe(dataframe) + data = data['value'] window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data all_mins = argrelextrema(np.array(data), np.less)[0] diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 2c97029..8ab1b26 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -273,4 +273,8 @@ def pattern_intersection(segment_data: list, median: float, pattern_type: str) - return [x for (idx, x) in enumerate(center_index) if idx not in delete_index] - +def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: + data_min = data['value'].min() + if not np.isnan(data_min) and data_min > 0: + data['value'] = data['value'] - data_min + return data diff --git a/analytics/tests/test_dataset.py b/analytics/tests/test_dataset.py index ac66919..047a4c8 100644 --- a/analytics/tests/test_dataset.py +++ b/analytics/tests/test_dataset.py @@ -121,10 +121,10 @@ class TestDataset(unittest.TestCase): data_none = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] return_data_nan = prepare_data(data_nan) return_data_none = prepare_data(data_none) - for item in return_data_nan: - self.assertTrue(np.isnan(item.value)) - for item in return_data_none: - self.assertTrue(np.isnan(item.value)) + for item in return_data_nan.value: + self.assertTrue(np.isnan(item)) + for item in return_data_none.value: + self.assertTrue(np.isnan(item)) if __name__ == '__main__': unittest.main()