From ae06749b8d519ad61a38f60c4ac136b5c2c35e16 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Mon, 27 May 2019 22:18:49 +0300 Subject: [PATCH] Refactoring of Peak and Trough models #424 (#680) --- .../analytics/analytic_types/learning_info.py | 17 ++ analytics/analytics/models/__init__.py | 5 +- analytics/analytics/models/custom_model.py | 18 ++- analytics/analytics/models/drop_model.py | 21 +-- analytics/analytics/models/general_model.py | 11 +- analytics/analytics/models/jump_model.py | 21 +-- analytics/analytics/models/model.py | 36 ++--- analytics/analytics/models/peak_model.py | 147 ++++------------- analytics/analytics/models/triangle_model.py | 119 ++++++++++++++ analytics/analytics/models/trough_model.py | 148 ++++-------------- analytics/analytics/utils/common.py | 17 +- analytics/tests/test_manager.py | 2 +- 12 files changed, 262 insertions(+), 300 deletions(-) create mode 100644 analytics/analytics/analytic_types/learning_info.py create mode 100644 analytics/analytics/models/triangle_model.py diff --git a/analytics/analytics/analytic_types/learning_info.py b/analytics/analytics/analytic_types/learning_info.py new file mode 100644 index 0000000..1f499b8 --- /dev/null +++ b/analytics/analytics/analytic_types/learning_info.py @@ -0,0 +1,17 @@ +import utils.meta + +@utils.meta.JSONClass +class LearningInfo: + + def __init__(self): + super().__init__() + self.confidence = [] + self.patterns_list = [] + self.pattern_width = [] + self.pattern_height = [] + self.pattern_timestamp = [] + self.segment_center_list = [] + self.patterns_value = [] + + def __str__(self): + return str(self.to_json()) \ No newline at end of file diff --git a/analytics/analytics/models/__init__.py b/analytics/analytics/models/__init__.py index 646427e..f7aaec2 100644 --- a/analytics/analytics/models/__init__.py +++ b/analytics/analytics/models/__init__.py @@ -1,7 +1,8 @@ from models.model import Model, ModelState, AnalyticSegment +from models.triangle_model import TriangleModel, TriangleModelState from models.drop_model import DropModel, DropModelState -from models.peak_model import PeakModel, PeakModelState +from models.peak_model import PeakModel from models.jump_model import JumpModel, JumpModelState from models.custom_model import CustomModel -from models.trough_model import TroughModel, TroughModelState +from models.trough_model import TroughModel from models.general_model import GeneralModel, GeneralModelState diff --git a/analytics/analytics/models/custom_model.py b/analytics/analytics/models/custom_model.py index ab7a856..d7db419 100644 --- a/analytics/analytics/models/custom_model.py +++ b/analytics/analytics/models/custom_model.py @@ -1,7 +1,10 @@ -from models import Model, AnalyticSegment +from models import Model, AnalyticSegment, ModelState +from analytic_types import AnalyticUnitId, ModelCache +from analytic_types.learning_info import LearningInfo import utils + import pandas as pd -from typing import List +from typing import List, Optional class CustomModel(Model): @@ -10,9 +13,18 @@ class CustomModel(Model): dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], - learning_info: dict + learning_info: LearningInfo ) -> None: pass def do_detect(self, dataframe: pd.DataFrame) -> list: return [] + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + pass + + def get_model_type(self) -> (str, bool): + pass + + def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: + pass diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 3053d8e..0460d05 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -9,7 +9,8 @@ import utils import utils.meta import numpy as np import pandas as pd -from analytic_types import AnalyticUnitId +from analytic_types import AnalyticUnitId, TimeSeries +from analytic_types.learning_info import LearningInfo @utils.meta.JSONClass class DropModelState(ModelState): @@ -48,17 +49,17 @@ class DropModel(Model): dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], - learning_info: dict + learning_info: LearningInfo ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] window_size = self.state.window_size last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) - self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) + self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) + self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info['patterns_value'] + height_list = learning_info.patterns_value del_conv_list = [] delete_pattern_timestamp = [] @@ -70,18 +71,18 @@ class DropModel(Model): del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model) if len(del_conv_drop): del_conv_list.append(max(del_conv_drop)) - self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list) - self.state.drop_height = int(min(learning_info['pattern_height'], default = 1)) - self.state.drop_length = int(max(learning_info['pattern_width'], default = 1)) + self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) + self.state.drop_height = int(min(learning_info.pattern_height, default = 1)) + self.state.drop_length = int(max(learning_info.pattern_width, default = 1)) - def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: + def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: data = utils.cut_dataframe(dataframe) data = data['value'] possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1) result = self.__filter_detection(possible_drops, data) return [(val - 1, val + 1) for val in result] - def __filter_detection(self, segments: list, data: list): + def __filter_detection(self, segments: List[int], data: list): delete_list = [] variance_error = self.state.window_size close_patterns = utils.close_filtering(segments, variance_error) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index d2911bd..d2dd5ff 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -16,7 +16,8 @@ import logging from typing import Optional, List, Tuple import math -from analytic_types import AnalyticUnitId +from analytic_types import AnalyticUnitId, TimeSeries +from analytic_types.learning_info import LearningInfo PEARSON_FACTOR = 0.7 @@ -48,13 +49,13 @@ class GeneralModel(Model): dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], - learning_info: dict + learning_info: LearningInfo ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) - self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) + self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) + self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) @@ -71,7 +72,7 @@ class GeneralModel(Model): self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3) self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size) - def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: + def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: data = utils.cut_dataframe(dataframe) data = data['value'] pat_data = self.state.pattern_model diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index 7a6b056..71fb84e 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -10,7 +10,8 @@ from typing import Optional, List, Tuple import math from scipy.signal import argrelextrema from scipy.stats import gaussian_kde -from analytic_types import AnalyticUnitId +from analytic_types import AnalyticUnitId, TimeSeries +from analytic_types.learning_info import LearningInfo @utils.meta.JSONClass @@ -49,17 +50,17 @@ class JumpModel(Model): dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], - learning_info: dict + learning_info: LearningInfo ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] window_size = self.state.window_size last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) - self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) + self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) + self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info['patterns_value'] + height_list = learning_info.patterns_value del_conv_list = [] delete_pattern_timestamp = [] @@ -71,18 +72,18 @@ class JumpModel(Model): del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model) if len(del_conv_jump): del_conv_list.append(max(del_conv_jump)) - self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list) - self.state.jump_height = float(min(learning_info['pattern_height'], default = 1)) - self.state.jump_length = int(max(learning_info['pattern_width'], default = 1)) + self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) + self.state.jump_height = float(min(learning_info.pattern_height, default = 1)) + self.state.jump_length = int(max(learning_info.pattern_width, default = 1)) - def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: + def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: data = utils.cut_dataframe(dataframe) data = data['value'] possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1) result = self.__filter_detection(possible_jumps, data) return [(val - 1, val + 1) for val in result] - def __filter_detection(self, segments, data): + def __filter_detection(self, segments: List[int], data: pd.Series): delete_list = [] variance_error = self.state.window_size close_patterns = utils.close_filtering(segments, variance_error) diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index e72bf31..bba07ec 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -6,8 +6,9 @@ from typing import Optional, List, Tuple import pandas as pd import math import logging -from analytic_types import AnalyticUnitId, ModelCache +from analytic_types import AnalyticUnitId, ModelCache, TimeSeries from analytic_types.segment import Segment +from analytic_types.learning_info import LearningInfo import utils.meta @@ -96,12 +97,12 @@ class Model(ABC): dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], - learning_info: dict + learning_info: LearningInfo ) -> None: pass @abstractmethod - def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: + def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: pass @abstractmethod @@ -146,7 +147,6 @@ class Model(ABC): if self.state.window_size == 0: self.state.window_size = math.ceil(max_length / 2) if max_length else 0 model, model_type = self.get_model_type() - # TODO: learning_info: dict -> class learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type) self.do_fit(dataframe, labeled, deleted, learning_info) logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) @@ -176,37 +176,29 @@ class Model(ABC): def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: str, model_type: bool) -> dict: logging.debug('Start parsing segments') - learning_info = { - 'confidence': [], - 'patterns_list': [], - 'pattern_width': [], - 'pattern_height': [], - 'pattern_timestamp': [], - 'segment_center_list': [], - 'patterns_value': [], - } + learning_info = LearningInfo() data = dataframe['value'] for segment in labeled: confidence = utils.find_confidence(segment.data)[0] - learning_info['confidence'].append(confidence) + learning_info.confidence.append(confidence) segment_center = segment.center_index - learning_info['segment_center_list'].append(segment_center) - learning_info['pattern_timestamp'].append(segment.pattern_timestamp) + learning_info.segment_center_list.append(segment_center) + learning_info.pattern_timestamp.append(segment.pattern_timestamp) aligned_segment = utils.get_interval(data, segment_center, self.state.window_size) aligned_segment = utils.subtract_min_without_nan(aligned_segment) if len(aligned_segment) == 0: logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format( segment_center, self.state.window_size, len(data))) continue - learning_info['patterns_list'].append(aligned_segment) + learning_info.patterns_list.append(aligned_segment) if model == 'peak' or model == 'trough': - learning_info['pattern_height'].append(utils.find_confidence(aligned_segment)[1]) - learning_info['patterns_value'].append(aligned_segment.values.max()) + learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) + learning_info.patterns_value.append(aligned_segment.values.max()) if model == 'jump' or model == 'drop': pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model) - learning_info['pattern_height'].append(pattern_height) - learning_info['pattern_width'].append(pattern_length) - learning_info['patterns_value'].append(aligned_segment.values[self.state.window_size]) + learning_info.pattern_height.append(pattern_height) + learning_info.pattern_width.append(pattern_length) + learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info)) return learning_info diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 2f993bf..dc60d7e 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -1,36 +1,14 @@ -from models import Model, ModelState, AnalyticSegment +from analytic_types import TimeSeries +from models import TriangleModel +import utils import scipy.signal -from scipy.fftpack import fft from scipy.signal import argrelextrema from typing import Optional, List, Tuple -import utils -import utils.meta import numpy as np import pandas as pd -from analytic_types import AnalyticUnitId - -SMOOTHING_COEFF = 2400 -EXP_SMOOTHING_FACTOR = 0.01 - -@utils.meta.JSONClass -class PeakModelState(ModelState): - - def __init__( - self, - confidence: float = 0, - height_max: float = 0, - height_min: float = 0, - **kwargs - ): - super().__init__(**kwargs) - self.confidence = confidence - self.height_max = height_max - self.height_min = height_min - - -class PeakModel(Model): +class PeakModel(TriangleModel): def get_model_type(self) -> (str, bool): model = 'peak' @@ -42,98 +20,27 @@ class PeakModel(Model): segment = data[start: end] return segment.idxmax() - def get_state(self, cache: Optional[dict] = None) -> PeakModelState: - return PeakModelState.from_json(cache) - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: dict - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = self.state.window_size - last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) - self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info['patterns_value'] - - del_conv_list = [] - delete_pattern_width = [] - delete_pattern_height = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - del_max_index = segment.center_index - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted = utils.get_interval(data, del_max_index, window_size) - deleted = utils.subtract_min_without_nan(deleted) - del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) - if len(del_conv): del_conv_list.append(max(del_conv)) - delete_pattern_height.append(utils.find_confidence(deleted)[1]) - - self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list) - - def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data - all_maxs = argrelextrema(np.array(data), np.greater)[0] - - extrema_list = [] - for i in utils.exponential_smoothing(data + self.state.confidence, EXP_SMOOTHING_FACTOR): - extrema_list.append(i) - + def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: + pattern_list = [] + for val in close_patterns: + max_val = data[val[0]] + ind = val[0] + for i in val: + if data[i] > max_val: + max_val = data[i] + ind = i + pattern_list.append(ind) + return pattern_list + + def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: + return argrelextrema(data.values, np.greater)[0] + + def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: + return utils.exponential_smoothing(data + self.state.confidence, alpha) + + def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, peak_indexes: List[int]) -> List[int]: segments = [] - for i in all_maxs: - if data[i] > extrema_list[i]: - segments.append(i) - result = self.__filter_detection(segments, data) - result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence) - return result - - def __filter_detection(self, segments: list, data: list) -> list: - delete_list = [] - variance_error = self.state.window_size - close_patterns = utils.close_filtering(segments, variance_error) - segments = utils.best_pattern(close_patterns, data, 'max') - - if len(segments) == 0 or len(self.state.pattern_model) == 0: - return [] - pattern_data = self.state.pattern_model - up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) - low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) - up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) - low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) - up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) - low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) - for segment in segments: - if segment > self.state.window_size: - convol_data = utils.get_interval(data, segment, self.state.window_size) - convol_data = utils.subtract_min_without_nan(convol_data) - percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if percent_of_nans > 0.5: - delete_list.append(segment) - continue - elif 0 < percent_of_nans <= 0.5: - nan_list = utils.find_nan_indexes(convol_data) - convol_data = utils.nan_to_zero(convol_data, nan_list) - pattern_data = utils.nan_to_zero(pattern_data, nan_list) - conv = scipy.signal.fftconvolve(convol_data, pattern_data) - pattern_height = convol_data.values[self.state.window_size] - if pattern_height > up_height or pattern_height < low_height: - delete_list.append(segment) - continue - if max(conv) > up_conv or max(conv) < low_conv: - delete_list.append(segment) - continue - if max(conv) < up_del_conv and max(conv) > low_del_conv: - delete_list.append(segment) - else: - delete_list.append(segment) - for item in delete_list: - segments.remove(item) - return set(segments) + for idx in peak_indexes: + if data[idx] > smoothed_data[idx]: + segments.append(idx) + return segments diff --git a/analytics/analytics/models/triangle_model.py b/analytics/analytics/models/triangle_model.py new file mode 100644 index 0000000..9782dad --- /dev/null +++ b/analytics/analytics/models/triangle_model.py @@ -0,0 +1,119 @@ +from analytic_types import AnalyticUnitId, TimeSeries +from analytic_types.learning_info import LearningInfo +from models import Model, ModelState, AnalyticSegment +import utils +import utils.meta + +import scipy.signal +from scipy.fftpack import fft +from typing import Optional, List, Tuple +import numpy as np +import pandas as pd + + +EXP_SMOOTHING_FACTOR = 0.01 + + +@utils.meta.JSONClass +class TriangleModelState(ModelState): + + def __init__( + self, + confidence: float = 0, + height_max: float = 0, + height_min: float = 0, + **kwargs + ): + super().__init__(**kwargs) + self.confidence = confidence + self.height_max = height_max + self.height_min = height_min + +class TriangleModel(Model): + + def get_state(self, cache: Optional[dict] = None) -> TriangleModelState: + return TriangleModelState.from_json(cache) + + def do_fit( + self, + dataframe: pd.DataFrame, + labeled_segments: List[AnalyticSegment], + deleted_segments: List[AnalyticSegment], + learning_info: LearningInfo + ) -> None: + data = utils.cut_dataframe(dataframe) + data = data['value'] + self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list)) + self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) + convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) + correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) + height_list = learning_info.patterns_value + + del_conv_list = [] + delete_pattern_width = [] + delete_pattern_height = [] + delete_pattern_timestamp = [] + for segment in deleted_segments: + delete_pattern_timestamp.append(segment.pattern_timestamp) + deleted = utils.get_interval(data, segment.center_index, self.state.window_size) + deleted = utils.subtract_min_without_nan(deleted) + del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) + if len(del_conv): + del_conv_list.append(max(del_conv)) + delete_pattern_height.append(utils.find_confidence(deleted)[1]) + + self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) + + def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: + data = utils.cut_dataframe(dataframe) + data = data['value'] + + all_extremum_indexes = self.get_extremum_indexes(data) + smoothed_data = self.get_smoothed_data(data, self.state.confidence, EXP_SMOOTHING_FACTOR) + segments = self.get_possible_segments(data, smoothed_data, all_extremum_indexes) + result = self.__filter_detection(segments, data) + result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence) + return result + + def __filter_detection(self, segments: List[int], data: pd.Series) -> list: + delete_list = [] + variance_error = self.state.window_size + close_patterns = utils.close_filtering(segments, variance_error) + segments = self.get_best_pattern(close_patterns, data) + + if len(segments) == 0 or len(self.state.pattern_model) == 0: + return [] + pattern_data = self.state.pattern_model + up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) + low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) + up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) + low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) + up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) + low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) + for segment in segments: + if segment > self.state.window_size: + convol_data = utils.get_interval(data, segment, self.state.window_size) + convol_data = utils.subtract_min_without_nan(convol_data) + percent_of_nans = convol_data.isnull().sum() / len(convol_data) + if percent_of_nans > 0.5: + delete_list.append(segment) + continue + elif 0 < percent_of_nans <= 0.5: + nan_list = utils.find_nan_indexes(convol_data) + convol_data = utils.nan_to_zero(convol_data, nan_list) + pattern_data = utils.nan_to_zero(pattern_data, nan_list) + conv = scipy.signal.fftconvolve(convol_data, pattern_data) + pattern_height = convol_data.values.max() + if pattern_height > up_height or pattern_height < low_height: + delete_list.append(segment) + continue + if max(conv) > up_conv or max(conv) < low_conv: + delete_list.append(segment) + continue + if max(conv) < up_del_conv and max(conv) > low_del_conv: + delete_list.append(segment) + else: + delete_list.append(segment) + for item in delete_list: + segments.remove(item) + return set(segments) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index 47838b7..541cf79 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -1,36 +1,14 @@ -from models import Model, ModelState, AnalyticSegment +from analytic_types import TimeSeries +from models import TriangleModel +import utils import scipy.signal -from scipy.fftpack import fft from scipy.signal import argrelextrema from typing import Optional, List, Tuple -import utils -import utils.meta import numpy as np import pandas as pd -from analytic_types import AnalyticUnitId - -SMOOTHING_COEFF = 2400 -EXP_SMOOTHING_FACTOR = 0.01 - -@utils.meta.JSONClass -class TroughModelState(ModelState): - - def __init__( - self, - confidence: float = 0, - height_max: float = 0, - height_min: float = 0, - **kwargs - ): - super().__init__(**kwargs) - self.confidence = confidence - self.height_max = height_max - self.height_min = height_min - - -class TroughModel(Model): +class TroughModel(TriangleModel): def get_model_type(self) -> (str, bool): model = 'trough' @@ -42,99 +20,27 @@ class TroughModel(Model): segment = data[start: end] return segment.idxmin() - def get_state(self, cache: Optional[dict] = None) -> TroughModelState: - return TroughModelState.from_json(cache) - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: dict - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = self.state.window_size - last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) - self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info['patterns_value'] - - del_conv_list = [] - delete_pattern_width = [] - delete_pattern_height = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - del_min_index = segment.center_index - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted = utils.get_interval(data, del_min_index, window_size) - deleted = utils.subtract_min_without_nan(deleted) - del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) - if len(del_conv): del_conv_list.append(max(del_conv)) - delete_pattern_height.append(utils.find_confidence(deleted)[1]) - - self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list) - - def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data - all_mins = argrelextrema(np.array(data), np.less)[0] - - extrema_list = [] - for i in utils.exponential_smoothing(data - self.state.confidence, EXP_SMOOTHING_FACTOR): - extrema_list.append(i) - + def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: + pattern_list = [] + for val in close_patterns: + min_val = data[val[0]] + ind = val[0] + for i in val: + if data[i] < min_val: + min_val = data[i] + ind = i + pattern_list.append(ind) + return pattern_list + + def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: + return argrelextrema(data.values, np.less)[0] + + def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: + return utils.exponential_smoothing(data - self.state.confidence, alpha) + + def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, trough_indexes: List[int]) -> List[int]: segments = [] - for i in all_mins: - if data[i] < extrema_list[i]: - segments.append(i) - result = self.__filter_detection(segments, data) - result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence, inverse = True) - return result - - def __filter_detection(self, segments: list, data: list) -> list: - delete_list = [] - variance_error = self.state.window_size - close_patterns = utils.close_filtering(segments, variance_error) - segments = utils.best_pattern(close_patterns, data, 'min') - if len(segments) == 0 or len(self.state.pattern_center) == 0: - segments = [] - return segments - pattern_data = self.state.pattern_model - up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) - low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) - up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) - low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) - up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) - low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) - for segment in segments: - if segment > self.state.window_size: - convol_data = utils.get_interval(data, segment, self.state.window_size) - convol_data = utils.subtract_min_without_nan(convol_data) - percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if percent_of_nans > 0.5: - delete_list.append(segment) - continue - elif 0 < percent_of_nans <= 0.5: - nan_list = utils.find_nan_indexes(convol_data) - convol_data = utils.nan_to_zero(convol_data, nan_list) - pattern_data = utils.nan_to_zero(pattern_data, nan_list) - conv = scipy.signal.fftconvolve(convol_data, pattern_data) - pattern_height = convol_data.values.max() - if pattern_height > up_height or pattern_height < low_height: - delete_list.append(segment) - continue - if max(conv) > up_conv or max(conv) < low_conv: - delete_list.append(segment) - continue - if max(conv) < up_del_conv and max(conv) > low_del_conv: - delete_list.append(segment) - else: - delete_list.append(segment) - for item in delete_list: - segments.remove(item) - - return set(segments) + for idx in trough_indexes: + if data[idx] < smoothed_data[idx]: + segments.append(idx) + return segments diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 5bd8c4e..7191b9a 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -59,7 +59,10 @@ def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) pattern_list.append(i) return pattern_list -def find_jump(data, height, lenght): +def find_jump(data, height, lenght) -> List[int]: + ''' + Find jump indexes + ''' j_list = [] for i in range(len(data)-lenght-1): for x in range(1, lenght): @@ -67,7 +70,10 @@ def find_jump(data, height, lenght): j_list.append(i) return(j_list) -def find_drop(data, height, length): +def find_drop(data, height, length) -> List[int]: + ''' + Find drop indexes + ''' d_list = [] for i in range(len(data)-length-1): for x in range(1, length): @@ -116,7 +122,7 @@ def get_same_length(patterns_list): pat.extend(added_values) return patterns_list -def close_filtering(pattern_list: List[int], win_size: int) -> List[Tuple[int, int]]: +def close_filtering(pattern_list: List[int], win_size: int) -> TimeSeries: if len(pattern_list) == 0: return [] s = [[pattern_list[0]]] @@ -152,7 +158,7 @@ def find_interval(dataframe: pd.DataFrame) -> int: delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0]) return delta -def get_start_and_end_of_segments(segments: List[List[int]]) -> List[Tuple[int, int]]: +def get_start_and_end_of_segments(segments: List[List[int]]) -> TimeSeries: ''' find start and end of segment: [1, 2, 3, 4] -> [1, 4] if segment is 1 index - it will be doubled: [7] -> [7, 7] @@ -168,7 +174,6 @@ def get_start_and_end_of_segments(segments: List[List[int]]) -> List[Tuple[int, result.append(segment) return result - def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list: new_pattern_list = [] for val in pattern_list: @@ -261,7 +266,7 @@ def get_interval(data: pd.Series, center: int, window_size: int, normalization = result_interval = subtract_min_without_nan(result_interval) return result_interval -def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> List[Tuple[int, int]]: +def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> TimeSeries: """ Find start and end of patterns for peak max_border_factor - final border of pattern diff --git a/analytics/tests/test_manager.py b/analytics/tests/test_manager.py index 813f5c2..3fc3fed 100644 --- a/analytics/tests/test_manager.py +++ b/analytics/tests/test_manager.py @@ -1,5 +1,5 @@ from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel -from models import PeakModelState, DropModelState, TroughModelState, JumpModelState, GeneralModelState +from models import DropModelState, JumpModelState, GeneralModelState import utils.meta import aiounittest from analytic_unit_manager import AnalyticUnitManager