diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 72b17f8..db75089 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -67,8 +67,8 @@ class DropModel(Model): data = utils.cut_dataframe(dataframe) data = data['value'] possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1) - - return self.__filter_detection(possible_drops, data) + result = self.__filter_detection(possible_drops, data) + return [(val - 1, val + 1) for val in result] def __filter_detection(self, segments: list, data: list): delete_list = [] diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index 577df07..b5b1555 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -77,7 +77,7 @@ class GeneralModel(Model): filtered = self.__filter_detection(all_corr_peaks, data) filtered = list(filtered) logging.debug('Method do_detect completed correctly for analytic unit: {}'.format(AnalyticUnitId)) - return set(item + window_size for item in filtered) + return [(item, item + window_size * 2) for item in filtered] def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: if not self.state.get('pattern_center'): diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index 68d04bd..c81ff39 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -68,8 +68,8 @@ class JumpModel(Model): data = utils.cut_dataframe(dataframe) data = data['value'] possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) - - return self.__filter_detection(possible_jumps, data) + result = self.__filter_detection(possible_jumps, data) + return [(val - 1, val + 1) for val in result] def __filter_detection(self, segments, data): delete_list = [] diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index 93cdab3..57a79db 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -104,8 +104,8 @@ class Model(ABC): } result = self.do_detect(dataframe, id) segments = [( - utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x - 1]), - utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x + 1]) + utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]), + utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]), ) for x in result] if not self.state: logging.warning('Return empty self.state after detect') diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 1af6711..6a58c8d 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -80,8 +80,9 @@ class PeakModel(Model): for i in all_maxs: if data[i] > extrema_list[i]: segments.append(i) - - return self.__filter_detection(segments, data) + result = self.__filter_detection(segments, data) + result = utils.get_borders_of_peaks(result, data, self.state.get('WINDOW_SIZE'), self.state.get('confidence')) + return result def __filter_detection(self, segments: list, data: list) -> list: delete_list = [] diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index e7a7d5d..f78e7ca 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -80,8 +80,9 @@ class TroughModel(Model): for i in all_mins: if data[i] < extrema_list[i]: segments.append(i) - - return self.__filter_detection(segments, data) + result = self.__filter_detection(segments, data) + result = utils.get_borders_of_peaks(result, data, self.state.get('WINDOW_SIZE'), self.state.get('confidence'), inverse = True) + return result def __filter_detection(self, segments: list, data: list) -> list: delete_list = [] diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index e13e9cb..a854d36 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -6,14 +6,14 @@ from scipy.signal import argrelextrema from scipy.stats import gaussian_kde from scipy.stats.stats import pearsonr import math -from typing import Union, List, Generator +from typing import Union, List, Generator, Tuple import utils import logging from itertools import islice from collections import deque SHIFT_FACTOR = 0.05 -CONFIDENCE_FACTOR = 0.2 +CONFIDENCE_FACTOR = 0.5 SMOOTHING_FACTOR = 5 def exponential_smoothing(series, alpha): @@ -191,7 +191,12 @@ def find_extremum_index(segment: np.ndarray, selector: bool) -> int: else: return segment.argmin() -def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: +def get_interval(data: pd.Series, center: int, window_size: int, normalization = False) -> pd.Series: + """ + Get an interval with 2*window_size length + window_size to the left, window_size to the right of center + If normalization == True - subtract minimum from the interval + """ if center >= len(data): logging.warning('Pattern center {} is out of data with len {}'.format(center, len(data))) return [] @@ -201,7 +206,58 @@ def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: left_bound = 0 if right_bound > len(data): right_bound = len(data) - return data[left_bound: right_bound] + result_interval = data[left_bound: right_bound] + if normalization: + result_interval = subtract_min_without_nan(result_interval) + return result_interval + +def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> List[Tuple[int, int]]: + """ + Find start and end of patterns for peak + max_border_factor - final border of pattern + if reverse == True - segments will be inversed (trough -> peak / peak -> trough) + """ + if len(pattern_centers) == 0: + return [] + border_list = [] + window_size = math.ceil(max_border_factor * window_size) + for center in pattern_centers: + current_pattern = get_interval(data, center, window_size, True) + if inverse: + current_pattern = inverse_segment(current_pattern) + current_pattern = current_pattern - confidence + left_segment = current_pattern[:window_size] # a.iloc[a.index < center] + right_segment = current_pattern[window_size:] # a.iloc[a.index >= center] + left_border = get_end_of_segment(left_segment, descending = False) + right_border = get_end_of_segment(right_segment) + border_list.append((left_border, right_border)) + return border_list + +def get_end_of_segment(segment: pd.Series, skip_positive_values = True, descending = True) -> int: + """ + Find end of descending or ascending part of pattern + Allowable error is 1 index + """ + if not descending: + segment = segment.iloc[::-1] + if len(segment) == 0: + return 1 + for idx in range(1, len(segment) - 1): + if skip_positive_values and segment.values[idx] > 0: + continue + if segment.values[idx] >= segment.values[idx - 1]: + return segment.index[idx - 1] + return segment.index[-1] + +def inverse_segment(segment: pd.Series) -> pd.Series: + """ + Сonvert trough to peak and virce versa + """ + if len(segment) > 0: + rev_val = max(segment.values) + for idx in range(len(segment)): + segment.values[idx] = math.fabs(segment.values[idx] - rev_val) + return segment def subtract_min_without_nan(segment: pd.Series) -> pd.Series: if len(segment) == 0: diff --git a/analytics/tests/test_dataset.py b/analytics/tests/test_dataset.py index b59a1d4..50ce9de 100644 --- a/analytics/tests/test_dataset.py +++ b/analytics/tests/test_dataset.py @@ -180,7 +180,8 @@ class TestDataset(unittest.TestCase): for _ in range(2): model.do_detect(dataframe,'test') max_pattern_index = max(model.do_detect(dataframe, 'test')) - self.assertLessEqual(max_pattern_index, result) + self.assertLessEqual(max_pattern_index[0], result) + def test_peak_model_for_cache(self): cache = { diff --git a/analytics/tests/test_utils.py b/analytics/tests/test_utils.py index a5f230e..b8a3144 100644 --- a/analytics/tests/test_utils.py +++ b/analytics/tests/test_utils.py @@ -16,7 +16,7 @@ class TestUtils(unittest.TestCase): def test_confidence_all_normal_value(self): segment = [1, 2, 0, 6, 8, 5, 3] utils_result = utils.find_confidence(segment)[0] - result = 1.6 + result = 4.0 self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) def test_confidence_all_nan_value(self): @@ -26,7 +26,7 @@ class TestUtils(unittest.TestCase): def test_confidence_with_nan_value(self): data = [np.NaN, np.NaN, 0, 8] utils_result = utils.find_confidence(data)[0] - result = 1.6 + result = 4.0 self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) def test_interval_all_normal_value(self): @@ -248,6 +248,38 @@ class TestUtils(unittest.TestCase): corr_data = list(corr_data) self.assertGreaterEqual(len(corr_data), result) + def test_inverse_segment(self): + data = pd.Series([1,2,3,4,3,2,1]) + result = pd.Series([3,2,1,0,1,2,3]) + utils_result = utils.inverse_segment(data) + for ind, val in enumerate(utils_result): + self.assertEqual(val, result[ind]) + + def test_get_end_of_segment_equal(self): + data = pd.Series([5,4,3,2,1,0,0,0]) + result_list = [4, 5, 6] + self.assertIn(utils.get_end_of_segment(data, False), result_list) + + def test_get_end_of_segment_greater(self): + data = pd.Series([5,4,3,2,1,0,1,2,3]) + result_list = [4, 5, 6] + self.assertIn(utils.get_end_of_segment(data, False), result_list) + + def test_get_borders_of_peaks(self): + data = pd.Series([1,0,1,2,3,2,1,0,0,1,2,3,4,3,2,2,1,0,1,2,3,4,5,3,2,1,0]) + pattern_center = [4, 12, 22] + ws = 3 + confidence = 1.5 + result = [(1, 7), (9, 15), (19, 25)] + self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence), result) + + def test_get_borders_of_peaks_for_trough(self): + data = pd.Series([4,4,5,5,3,1,3,5,5,6,3,2]) + pattern_center = [5] + ws = 5 + confidence = 3 + result = [(3, 7)] + self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence, inverse = True), result) if __name__ == '__main__': unittest.main()