From b53b49dcf74766a6ecd495a92cbbe2f0a148dbb4 Mon Sep 17 00:00:00 2001 From: Alexander Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Tue, 7 Apr 2020 19:32:26 +0300 Subject: [PATCH] Refactoring of Jump and Drop models (#865) * add stair model * add stair model method * add types * fix * add tests for get stair * fix * fix imports * add todo * fixes * get stair indexes to stair model * fixes * remove old methods * use enum * fix get_model_type * remove exception * list(set) -> utils.remove_duplicates * refactor get_stair * fixes * fixes 2 * fixes 3 * todo --- analytics/analytics/models/__init__.py | 7 +- analytics/analytics/models/custom_model.py | 4 +- analytics/analytics/models/drop_model.py | 125 +--------------- analytics/analytics/models/general_model.py | 10 +- analytics/analytics/models/jump_model.py | 127 +--------------- analytics/analytics/models/model.py | 36 +++-- analytics/analytics/models/peak_model.py | 8 +- analytics/analytics/models/stair_model.py | 147 +++++++++++++++++++ analytics/analytics/models/triangle_model.py | 4 +- analytics/analytics/models/trough_model.py | 8 +- analytics/analytics/utils/common.py | 26 +--- analytics/tests/test_manager.py | 2 +- analytics/tests/test_models.py | 43 ++++++ analytics/tests/test_utils.py | 30 +--- 14 files changed, 257 insertions(+), 320 deletions(-) create mode 100644 analytics/analytics/models/stair_model.py create mode 100644 analytics/tests/test_models.py diff --git a/analytics/analytics/models/__init__.py b/analytics/analytics/models/__init__.py index f7aaec2..1241fec 100644 --- a/analytics/analytics/models/__init__.py +++ b/analytics/analytics/models/__init__.py @@ -1,8 +1,9 @@ -from models.model import Model, ModelState, AnalyticSegment +from models.model import Model, ModelState, AnalyticSegment, ModelType, ExtremumType from models.triangle_model import TriangleModel, TriangleModelState -from models.drop_model import DropModel, DropModelState +from models.stair_model import StairModel, StairModelState +from models.drop_model import DropModel from models.peak_model import PeakModel -from models.jump_model import JumpModel, JumpModelState +from models.jump_model import JumpModel from models.custom_model import CustomModel from models.trough_model import TroughModel from models.general_model import GeneralModel, GeneralModelState diff --git a/analytics/analytics/models/custom_model.py b/analytics/analytics/models/custom_model.py index d7db419..37fa039 100644 --- a/analytics/analytics/models/custom_model.py +++ b/analytics/analytics/models/custom_model.py @@ -1,4 +1,4 @@ -from models import Model, AnalyticSegment, ModelState +from models import Model, AnalyticSegment, ModelState, ModelType from analytic_types import AnalyticUnitId, ModelCache from analytic_types.learning_info import LearningInfo import utils @@ -23,7 +23,7 @@ class CustomModel(Model): def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: pass - def get_model_type(self) -> (str, bool): + def get_model_type(self) -> ModelType: pass def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 0460d05..f38db6b 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -1,122 +1,9 @@ -from models import Model, ModelState, AnalyticSegment +from models import StairModel, ModelType, ExtremumType -import scipy.signal -from scipy.fftpack import fft -from scipy.signal import argrelextrema -from scipy.stats import gaussian_kde -from typing import Optional, List, Tuple -import utils -import utils.meta -import numpy as np -import pandas as pd -from analytic_types import AnalyticUnitId, TimeSeries -from analytic_types.learning_info import LearningInfo +class DropModel(StairModel): -@utils.meta.JSONClass -class DropModelState(ModelState): + def get_model_type(self) -> ModelType: + return ModelType.DROP - def __init__( - self, - confidence: float = 0, - drop_height: float = 0, - drop_length: float = 0, - **kwargs - ): - super().__init__(**kwargs) - self.confidence = confidence - self.drop_height = drop_height - self.drop_length = drop_length - - -class DropModel(Model): - - def get_model_type(self) -> (str, bool): - model = 'drop' - type_model = False - return (model, type_model) - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - data = dataframe['value'] - segment = data[start: end] - segment_center_index = utils.find_pattern_center(segment, start, 'drop') - return segment_center_index - - def get_state(self, cache: Optional[dict] = None) -> DropModelState: - return DropModelState.from_json(cache) - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = self.state.window_size - last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) - self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info.patterns_value - - del_conv_list = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - segment_cent_index = segment.center_index - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted_drop = utils.get_interval(data, segment_cent_index, window_size) - deleted_drop = utils.subtract_min_without_nan(deleted_drop) - del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model) - if len(del_conv_drop): del_conv_list.append(max(del_conv_drop)) - - self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) - self.state.drop_height = int(min(learning_info.pattern_height, default = 1)) - self.state.drop_length = int(max(learning_info.pattern_width, default = 1)) - - def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: - data = utils.cut_dataframe(dataframe) - data = data['value'] - possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1) - result = self.__filter_detection(possible_drops, data) - return [(val - 1, val + 1) for val in result] - - def __filter_detection(self, segments: List[int], data: list): - delete_list = [] - variance_error = self.state.window_size - close_patterns = utils.close_filtering(segments, variance_error) - segments = utils.best_pattern(close_patterns, data, 'min') - if len(segments) == 0 or len(self.state.pattern_center) == 0: - segments = [] - return segments - pattern_data = self.state.pattern_model - for segment in segments: - if segment > self.state.window_size and segment < (len(data) - self.state.window_size): - convol_data = utils.get_interval(data, segment, self.state.window_size) - percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if len(convol_data) == 0 or percent_of_nans > 0.5: - delete_list.append(segment) - continue - elif 0 < percent_of_nans <= 0.5: - nan_list = utils.find_nan_indexes(convol_data) - convol_data = utils.nan_to_zero(convol_data, nan_list) - pattern_data = utils.nan_to_zero(pattern_data, nan_list) - conv = scipy.signal.fftconvolve(convol_data, pattern_data) - upper_bound = self.state.convolve_max * 1.2 - lower_bound = self.state.convolve_min * 0.8 - delete_up_bound = self.state.conv_del_max * 1.02 - delete_low_bound = self.state.conv_del_min * 0.98 - try: - if max(conv) > upper_bound or max(conv) < lower_bound: - delete_list.append(segment) - elif max(conv) < delete_up_bound and max(conv) > delete_low_bound: - delete_list.append(segment) - except ValueError: - delete_list.append(segment) - else: - delete_list.append(segment) - - for item in delete_list: - segments.remove(item) - return set(segments) + def get_extremum_type(self) -> ExtremumType: + return ExtremumType.MIN diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index d2dd5ff..0671502 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -1,5 +1,5 @@ from analytic_types import AnalyticUnitId -from models import Model, ModelState, AnalyticSegment +from models import Model, ModelState, AnalyticSegment, ModelType from typing import Union, List, Generator import utils import utils.meta @@ -30,10 +30,8 @@ class GeneralModelState(ModelState): class GeneralModel(Model): - def get_model_type(self) -> (str, bool): - model = 'general' - type_model = True - return (model, type_model) + def get_model_type(self) -> ModelType: + return ModelType.GENERAL def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: data = dataframe['value'] @@ -54,7 +52,7 @@ class GeneralModel(Model): data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) + self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index 71fb84e..5195fac 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -1,124 +1,9 @@ -from models import Model, ModelState, AnalyticSegment +from models import StairModel, ModelType, ExtremumType -import utils -import utils.meta -import numpy as np -import pandas as pd -import scipy.signal -from scipy.fftpack import fft -from typing import Optional, List, Tuple -import math -from scipy.signal import argrelextrema -from scipy.stats import gaussian_kde -from analytic_types import AnalyticUnitId, TimeSeries -from analytic_types.learning_info import LearningInfo +class JumpModel(StairModel): + def get_model_type(self) -> ModelType: + return ModelType.JUMP -@utils.meta.JSONClass -class JumpModelState(ModelState): - def __init__( - self, - confidence: float = 0, - jump_height: float = 0, - jump_length: float = 0, - **kwargs - ): - super().__init__(**kwargs) - self.confidence = confidence - self.jump_height = jump_height - self.jump_length = jump_length - - -class JumpModel(Model): - - def get_model_type(self) -> (str, bool): - model = 'jump' - type_model = True - return (model, type_model) - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - data = dataframe['value'] - segment = data[start: end] - segment_center_index = utils.find_pattern_center(segment, start, 'jump') - return segment_center_index - - def get_state(self, cache: Optional[dict] = None) -> JumpModelState: - return JumpModelState.from_json(cache) - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = self.state.window_size - last_pattern_center = self.state.pattern_center - self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) - self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info.patterns_value - - del_conv_list = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - segment_cent_index = segment.center_index - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted_jump = utils.get_interval(data, segment_cent_index, window_size) - deleted_jump = utils.subtract_min_without_nan(deleted_jump) - del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model) - if len(del_conv_jump): del_conv_list.append(max(del_conv_jump)) - - self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) - self.state.jump_height = float(min(learning_info.pattern_height, default = 1)) - self.state.jump_length = int(max(learning_info.pattern_width, default = 1)) - - def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: - data = utils.cut_dataframe(dataframe) - data = data['value'] - possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1) - result = self.__filter_detection(possible_jumps, data) - return [(val - 1, val + 1) for val in result] - - def __filter_detection(self, segments: List[int], data: pd.Series): - delete_list = [] - variance_error = self.state.window_size - close_patterns = utils.close_filtering(segments, variance_error) - segments = utils.best_pattern(close_patterns, data, 'max') - - if len(segments) == 0 or len(self.state.pattern_center) == 0: - segments = [] - return segments - pattern_data = self.state.pattern_model - upper_bound = self.state.convolve_max * 1.2 - lower_bound = self.state.convolve_min * 0.8 - delete_up_bound = self.state.conv_del_max * 1.02 - delete_low_bound = self.state.conv_del_min * 0.98 - for segment in segments: - if segment > self.state.window_size and segment < (len(data) - self.state.window_size): - convol_data = utils.get_interval(data, segment, self.state.window_size) - percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if len(convol_data) == 0 or percent_of_nans > 0.5: - delete_list.append(segment) - continue - elif 0 < percent_of_nans <= 0.5: - nan_list = utils.find_nan_indexes(convol_data) - convol_data = utils.nan_to_zero(convol_data, nan_list) - pattern_data = utils.nan_to_zero(pattern_data, nan_list) - conv = scipy.signal.fftconvolve(convol_data, pattern_data) - try: - if max(conv) > upper_bound or max(conv) < lower_bound: - delete_list.append(segment) - elif max(conv) < delete_up_bound and max(conv) > delete_low_bound: - delete_list.append(segment) - except ValueError: - delete_list.append(segment) - else: - delete_list.append(segment) - for item in delete_list: - segments.remove(item) - - return set(segments) + def get_extremum_type(self) -> ExtremumType: + return ExtremumType.MAX diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index a749585..dba057d 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -1,4 +1,9 @@ +from analytic_types import AnalyticUnitId, ModelCache, TimeSeries +from analytic_types.segment import Segment +from analytic_types.learning_info import LearningInfo + import utils +import utils.meta from abc import ABC, abstractmethod from attrdict import AttrDict @@ -6,11 +11,18 @@ from typing import Optional, List, Tuple import pandas as pd import math import logging -from analytic_types import AnalyticUnitId, ModelCache, TimeSeries -from analytic_types.segment import Segment -from analytic_types.learning_info import LearningInfo +from enum import Enum -import utils.meta +class ModelType(Enum): + JUMP = 'jump' + DROP = 'drop' + PEAK = 'peak' + TROUGH = 'trough' + GENERAL = 'general' + +class ExtremumType(Enum): + MAX = 'max' + MIN = 'min' class AnalyticSegment(Segment): ''' @@ -121,7 +133,7 @@ class Model(ABC): pass @abstractmethod - def get_model_type(self) -> (str, bool): + def get_model_type(self) -> ModelType: pass @abstractmethod @@ -160,8 +172,7 @@ class Model(ABC): if self.state.window_size == 0: self.state.window_size = math.ceil(max_length / 2) if max_length else 0 - model, model_type = self.get_model_type() - learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type) + learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, self.get_model_type()) self.do_fit(dataframe, labeled, deleted, learning_info) logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) return self.state @@ -181,14 +192,14 @@ class Model(ABC): 'cache': self.state, } - def _update_fiting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None: + def _update_fitting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None: state.confidence = float(min(confidences, default = 1.5)) state.convolve_min, state.convolve_max = utils.get_min_max(convolve_list, state.window_size) state.conv_del_min, state.conv_del_max = utils.get_min_max(del_conv_list, 0) if height_list is not None: state.height_min, state.height_max = utils.get_min_max(height_list, 0) - def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: str, model_type: bool) -> dict: + def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: ModelType) -> dict: logging.debug('Start parsing segments') learning_info = LearningInfo() data = dataframe['value'] @@ -205,11 +216,12 @@ class Model(ABC): segment_center, self.state.window_size, len(data))) continue learning_info.patterns_list.append(aligned_segment) - if model == 'peak' or model == 'trough': + # TODO: use Triangle/Stair types + if model == ModelType.PEAK or model == ModelType.TROUGH: learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) learning_info.patterns_value.append(aligned_segment.values.max()) - if model == 'jump' or model == 'drop': - pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model) + if model == ModelType.JUMP or model == ModelType.DROP: + pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model.value) learning_info.pattern_height.append(pattern_height) learning_info.pattern_width.append(pattern_length) learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index dc60d7e..843f291 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -1,5 +1,5 @@ from analytic_types import TimeSeries -from models import TriangleModel +from models import TriangleModel, ModelType import utils import scipy.signal @@ -10,10 +10,8 @@ import pandas as pd class PeakModel(TriangleModel): - def get_model_type(self) -> (str, bool): - model = 'peak' - type_model = True - return (model, type_model) + def get_model_type(self) -> ModelType: + return ModelType.PEAK def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: data = dataframe['value'] diff --git a/analytics/analytics/models/stair_model.py b/analytics/analytics/models/stair_model.py new file mode 100644 index 0000000..96549af --- /dev/null +++ b/analytics/analytics/models/stair_model.py @@ -0,0 +1,147 @@ +from models import Model, ModelState, AnalyticSegment, ModelType + +from analytic_types import TimeSeries +from analytic_types.learning_info import LearningInfo + +from scipy.fftpack import fft +from typing import Optional, List +from enum import Enum +import scipy.signal +import utils +import utils.meta +import pandas as pd +import numpy as np +import operator + +POSITIVE_SEGMENT_MEASUREMENT_ERROR = 0.2 +NEGATIVE_SEGMENT_MEASUREMENT_ERROR = 0.02 + +@utils.meta.JSONClass +class StairModelState(ModelState): + + def __init__( + self, + confidence: float = 0, + stair_height: float = 0, + stair_length: float = 0, + **kwargs + ): + super().__init__(**kwargs) + self.confidence = confidence + self.stair_height = stair_height + self.stair_length = stair_length + + +class StairModel(Model): + + def get_state(self, cache: Optional[dict] = None) -> StairModelState: + return StairModelState.from_json(cache) + + def get_stair_indexes(self, data: pd.Series, height: float, length: int) -> List[int]: + """Get list of start stair segment indexes. + + Keyword arguments: + data -- data, that contains stair (jump or drop) segments + length -- maximum count of values in the stair + height -- the difference between stair max_line and min_line(see utils.find_parameters) + """ + indexes = [] + for i in range(len(data) - length - 1): + is_stair = self.is_stair_in_segment(data.values[i:i + length + 1], height) + if is_stair == True: + indexes.append(i) + return indexes + + def is_stair_in_segment(self, segment: np.ndarray, height: float) -> bool: + if len(segment) < 2: + return False + comparison_operator = operator.ge + if self.get_model_type() == ModelType.DROP: + comparison_operator = operator.le + height = -height + return comparison_operator(max(segment[1:]), segment[0] + height) + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + data = dataframe['value'] + segment = data[start: end] + segment_center_index = utils.find_pattern_center(segment, start, self.get_model_type().value) + return segment_center_index + + def do_fit( + self, + dataframe: pd.DataFrame, + labeled_segments: List[AnalyticSegment], + deleted_segments: List[AnalyticSegment], + learning_info: LearningInfo + ) -> None: + data = utils.cut_dataframe(dataframe) + data = data['value'] + window_size = self.state.window_size + last_pattern_center = self.state.pattern_center + self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) + self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) + convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) + correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) + height_list = learning_info.patterns_value + + del_conv_list = [] + delete_pattern_timestamp = [] + for segment in deleted_segments: + segment_cent_index = segment.center_index + delete_pattern_timestamp.append(segment.pattern_timestamp) + deleted_stair = utils.get_interval(data, segment_cent_index, window_size) + deleted_stair = utils.subtract_min_without_nan(deleted_stair) + del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) + if len(del_conv_stair) > 0: + del_conv_list.append(max(del_conv_stair)) + + self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) + self.state.stair_height = int(min(learning_info.pattern_height, default = 1)) + self.state.stair_length = int(max(learning_info.pattern_width, default = 1)) + + def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: + data = utils.cut_dataframe(dataframe) + data = data['value'] + possible_stairs = self.get_stair_indexes(data, self.state.stair_height, self.state.stair_length + 1) + result = self.__filter_detection(possible_stairs, data) + return [(val - 1, val + 1) for val in result] + + def __filter_detection(self, segments_indexes: List[int], data: list): + delete_list = [] + variance_error = self.state.window_size + close_segments = utils.close_filtering(segments_indexes, variance_error) + segments_indexes = utils.best_pattern(close_segments, data, self.get_extremum_type().value) + if len(segments_indexes) == 0 or len(self.state.pattern_center) == 0: + return [] + pattern_data = self.state.pattern_model + for segment_index in segments_indexes: + if segment_index <= self.state.window_size or segment_index >= (len(data) - self.state.window_size): + delete_list.append(segment_index) + continue + convol_data = utils.get_interval(data, segment_index, self.state.window_size) + percent_of_nans = convol_data.isnull().sum() / len(convol_data) + if len(convol_data) == 0 or percent_of_nans > 0.5: + delete_list.append(segment_index) + continue + elif 0 < percent_of_nans <= 0.5: + nan_list = utils.find_nan_indexes(convol_data) + convol_data = utils.nan_to_zero(convol_data, nan_list) + pattern_data = utils.nan_to_zero(pattern_data, nan_list) + conv = scipy.signal.fftconvolve(convol_data, pattern_data) + if len(conv) == 0: + delete_list.append(segment_index) + continue + upper_bound = self.state.convolve_max * (1 + POSITIVE_SEGMENT_MEASUREMENT_ERROR) + lower_bound = self.state.convolve_min * (1 - POSITIVE_SEGMENT_MEASUREMENT_ERROR) + delete_up_bound = self.state.conv_del_max * (1 + NEGATIVE_SEGMENT_MEASUREMENT_ERROR) + delete_low_bound = self.state.conv_del_min * (1 - NEGATIVE_SEGMENT_MEASUREMENT_ERROR) + max_conv = max(conv) + if max_conv > upper_bound or max_conv < lower_bound: + delete_list.append(segment_index) + elif max_conv < delete_up_bound and max_conv > delete_low_bound: + delete_list.append(segment_index) + + for item in delete_list: + segments_indexes.remove(item) + segments_indexes = utils.remove_duplicates_and_sort(segments_indexes) + return segments_indexes diff --git a/analytics/analytics/models/triangle_model.py b/analytics/analytics/models/triangle_model.py index 9782dad..5c4c017 100644 --- a/analytics/analytics/models/triangle_model.py +++ b/analytics/analytics/models/triangle_model.py @@ -43,7 +43,7 @@ class TriangleModel(Model): ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] - self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list)) + self.state.pattern_center = utils.remove_duplicates_and_sort(self.state.pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) @@ -62,7 +62,7 @@ class TriangleModel(Model): del_conv_list.append(max(del_conv)) delete_pattern_height.append(utils.find_confidence(deleted)[1]) - self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) + self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: data = utils.cut_dataframe(dataframe) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index 541cf79..39116f1 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -1,5 +1,5 @@ from analytic_types import TimeSeries -from models import TriangleModel +from models import TriangleModel, ModelType import utils import scipy.signal @@ -10,10 +10,8 @@ import pandas as pd class TroughModel(TriangleModel): - def get_model_type(self) -> (str, bool): - model = 'trough' - type_model = False - return (model, type_model) + def get_model_type(self) -> ModelType: + return ModelType.TROUGH def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: data = dataframe['value'] diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 50bb9e0..07ff9ff 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -55,28 +55,6 @@ def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) pattern_list.append(i) return pattern_list -def find_jump(data, height: float, lenght: int) -> List[int]: - ''' - Find jump indexes - ''' - j_list = [] - for i in range(len(data)-lenght-1): - for x in range(1, lenght): - if(data[i + x] > data[i] + height): - j_list.append(i) - return(j_list) - -def find_drop(data, height: float, length: int) -> List[int]: - ''' - Find drop indexes - ''' - d_list = [] - for i in range(len(data)-length-1): - for x in range(1, length): - if(data[i + x] < data[i] - height): - d_list.append(i) - return(d_list) - def timestamp_to_index(dataframe: pd.DataFrame, timestamp: int): data = dataframe['timestamp'] idx, = np.where(data >= timestamp) @@ -459,3 +437,7 @@ def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: def get_min_max(array: list, default): return float(min(array, default=default)), float(max(array, default=default)) +def remove_duplicates_and_sort(array: list) -> list: + array = list(frozenset(array)) + array.sort() + return array diff --git a/analytics/tests/test_manager.py b/analytics/tests/test_manager.py index 3fc3fed..1886828 100644 --- a/analytics/tests/test_manager.py +++ b/analytics/tests/test_manager.py @@ -1,5 +1,5 @@ from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel -from models import DropModelState, JumpModelState, GeneralModelState +from models import GeneralModelState import utils.meta import aiounittest from analytic_unit_manager import AnalyticUnitManager diff --git a/analytics/tests/test_models.py b/analytics/tests/test_models.py new file mode 100644 index 0000000..11d4d19 --- /dev/null +++ b/analytics/tests/test_models.py @@ -0,0 +1,43 @@ +import unittest +import pandas as pd +import numpy as np +import models + +class TestModel(unittest.TestCase): + + def test_stair_model_get_indexes(self): + drop_model = models.DropModel() + jump_model = models.JumpModel() + drop_data = pd.Series([4, 4, 4, 1, 1, 1, 5, 5, 2, 2, 2]) + jump_data = pd.Series([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5]) + jump_data_one_stair = pd.Series([1, 3, 3]) + drop_data_one_stair = pd.Series([4, 2, 1]) + height = 2 + length = 2 + expected_result = [2, 7] + drop_model_result = drop_model.get_stair_indexes(drop_data, height, length) + jump_model_result = jump_model.get_stair_indexes(jump_data, height, length) + drop_one_stair_result = drop_model.get_stair_indexes(drop_data_one_stair, height, 1) + jump_one_stair_result = jump_model.get_stair_indexes(jump_data_one_stair, height, 1) + for val in expected_result: + self.assertIn(val, drop_model_result) + self.assertIn(val, jump_model_result) + self.assertEqual(0, drop_one_stair_result[0]) + self.assertEqual(0, jump_one_stair_result[0]) + + def test_stair_model_get_indexes_corner_cases(self): + drop_model = models.DropModel() + jump_model = models.JumpModel() + empty_data = pd.Series([]) + nan_data = pd.Series([np.nan, np.nan, np.nan, np.nan]) + height, length = 2, 2 + length_zero, height_zero = 0, 0 + expected_result = [] + drop_empty_data_result = drop_model.get_stair_indexes(empty_data, height, length) + drop_nan_data_result = drop_model.get_stair_indexes(nan_data, height_zero, length_zero) + jump_empty_data_result = jump_model.get_stair_indexes(empty_data, height, length) + jump_nan_data_result = jump_model.get_stair_indexes(nan_data, height_zero, length_zero) + self.assertEqual(drop_empty_data_result, expected_result) + self.assertEqual(drop_nan_data_result, expected_result) + self.assertEqual(jump_empty_data_result, expected_result) + self.assertEqual(jump_nan_data_result, expected_result) diff --git a/analytics/tests/test_utils.py b/analytics/tests/test_utils.py index b691069..6faf993 100644 --- a/analytics/tests/test_utils.py +++ b/analytics/tests/test_utils.py @@ -137,28 +137,6 @@ class TestUtils(unittest.TestCase): patterns_list = [[1, 1, 1], [2, 2, 2],[3,3,3]] result = [2.0, 2.0, 2.0] self.assertEqual(utils.get_av_model(patterns_list), result) - - def test_find_jump_nan_data(self): - data = [np.nan, np.nan, np.nan, np.nan] - data = pd.Series(data) - length = 2 - height = 3 - length_zero = 0 - height_zero = 0 - result = [] - self.assertEqual(utils.find_jump(data, height, length), result) - self.assertEqual(utils.find_jump(data, height_zero, length_zero), result) - - def test_find_drop_nan_data(self): - data = [np.nan, np.nan, np.nan, np.nan] - data = pd.Series(data) - length = 2 - height = 3 - length_zero = 0 - height_zero = 0 - result = [] - self.assertEqual(utils.find_drop(data, height, length), result) - self.assertEqual(utils.find_drop(data, height_zero, length_zero), result) def test_get_distribution_density(self): segment = [1, 1, 1, 3, 5, 5, 5] @@ -369,5 +347,13 @@ class TestUtils(unittest.TestCase): expected_result = [{ 'from': 100, 'to': 200 }] self.assertEqual(meta_result, expected_result) + def test_remove_duplicates_and_sort(self): + a1 = [1, 3, 5] + a2 = [8, 3, 6] + expected_result = [1, 3, 5, 6, 8] + utils_result = utils.remove_duplicates_and_sort(a1+a2) + self.assertEqual(utils_result, expected_result) + self.assertEqual([], []) + if __name__ == '__main__': unittest.main()