Browse Source

Refactoring of Peak and Trough models #424 (#680)

pull/1/head
Alexandr Velikiy 6 years ago committed by Evgeny Smyshlyaev
parent
commit
ae06749b8d
  1. 17
      analytics/analytics/analytic_types/learning_info.py
  2. 5
      analytics/analytics/models/__init__.py
  3. 18
      analytics/analytics/models/custom_model.py
  4. 21
      analytics/analytics/models/drop_model.py
  5. 11
      analytics/analytics/models/general_model.py
  6. 21
      analytics/analytics/models/jump_model.py
  7. 36
      analytics/analytics/models/model.py
  8. 147
      analytics/analytics/models/peak_model.py
  9. 119
      analytics/analytics/models/triangle_model.py
  10. 146
      analytics/analytics/models/trough_model.py
  11. 17
      analytics/analytics/utils/common.py
  12. 2
      analytics/tests/test_manager.py

17
analytics/analytics/analytic_types/learning_info.py

@ -0,0 +1,17 @@
import utils.meta
@utils.meta.JSONClass
class LearningInfo:
def __init__(self):
super().__init__()
self.confidence = []
self.patterns_list = []
self.pattern_width = []
self.pattern_height = []
self.pattern_timestamp = []
self.segment_center_list = []
self.patterns_value = []
def __str__(self):
return str(self.to_json())

5
analytics/analytics/models/__init__.py

@ -1,7 +1,8 @@
from models.model import Model, ModelState, AnalyticSegment from models.model import Model, ModelState, AnalyticSegment
from models.triangle_model import TriangleModel, TriangleModelState
from models.drop_model import DropModel, DropModelState from models.drop_model import DropModel, DropModelState
from models.peak_model import PeakModel, PeakModelState from models.peak_model import PeakModel
from models.jump_model import JumpModel, JumpModelState from models.jump_model import JumpModel, JumpModelState
from models.custom_model import CustomModel from models.custom_model import CustomModel
from models.trough_model import TroughModel, TroughModelState from models.trough_model import TroughModel
from models.general_model import GeneralModel, GeneralModelState from models.general_model import GeneralModel, GeneralModelState

18
analytics/analytics/models/custom_model.py

@ -1,7 +1,10 @@
from models import Model, AnalyticSegment from models import Model, AnalyticSegment, ModelState
from analytic_types import AnalyticUnitId, ModelCache
from analytic_types.learning_info import LearningInfo
import utils import utils
import pandas as pd import pandas as pd
from typing import List from typing import List, Optional
class CustomModel(Model): class CustomModel(Model):
@ -10,9 +13,18 @@ class CustomModel(Model):
dataframe: pd.DataFrame, dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment], labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment],
learning_info: dict learning_info: LearningInfo
) -> None: ) -> None:
pass pass
def do_detect(self, dataframe: pd.DataFrame) -> list: def do_detect(self, dataframe: pd.DataFrame) -> list:
return [] return []
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
pass
def get_model_type(self) -> (str, bool):
pass
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState:
pass

21
analytics/analytics/models/drop_model.py

@ -9,7 +9,8 @@ import utils
import utils.meta import utils.meta
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from analytic_types import AnalyticUnitId from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
@utils.meta.JSONClass @utils.meta.JSONClass
class DropModelState(ModelState): class DropModelState(ModelState):
@ -48,17 +49,17 @@ class DropModel(Model):
dataframe: pd.DataFrame, dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment], labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment],
learning_info: dict learning_info: LearningInfo
) -> None: ) -> None:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
window_size = self.state.window_size window_size = self.state.window_size
last_pattern_center = self.state.pattern_center last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value'] height_list = learning_info.patterns_value
del_conv_list = [] del_conv_list = []
delete_pattern_timestamp = [] delete_pattern_timestamp = []
@ -70,18 +71,18 @@ class DropModel(Model):
del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model) del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model)
if len(del_conv_drop): del_conv_list.append(max(del_conv_drop)) if len(del_conv_drop): del_conv_list.append(max(del_conv_drop))
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list) self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.drop_height = int(min(learning_info['pattern_height'], default = 1)) self.state.drop_height = int(min(learning_info.pattern_height, default = 1))
self.state.drop_length = int(max(learning_info['pattern_width'], default = 1)) self.state.drop_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1) possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1)
result = self.__filter_detection(possible_drops, data) result = self.__filter_detection(possible_drops, data)
return [(val - 1, val + 1) for val in result] return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments: list, data: list): def __filter_detection(self, segments: List[int], data: list):
delete_list = [] delete_list = []
variance_error = self.state.window_size variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error) close_patterns = utils.close_filtering(segments, variance_error)

11
analytics/analytics/models/general_model.py

@ -16,7 +16,8 @@ import logging
from typing import Optional, List, Tuple from typing import Optional, List, Tuple
import math import math
from analytic_types import AnalyticUnitId from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
PEARSON_FACTOR = 0.7 PEARSON_FACTOR = 0.7
@ -48,13 +49,13 @@ class GeneralModel(Model):
dataframe: pd.DataFrame, dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment], labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment],
learning_info: dict learning_info: LearningInfo
) -> None: ) -> None:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
last_pattern_center = self.state.pattern_center last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
@ -71,7 +72,7 @@ class GeneralModel(Model):
self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3) self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3)
self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size) self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size)
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
pat_data = self.state.pattern_model pat_data = self.state.pattern_model

21
analytics/analytics/models/jump_model.py

@ -10,7 +10,8 @@ from typing import Optional, List, Tuple
import math import math
from scipy.signal import argrelextrema from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from analytic_types import AnalyticUnitId from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
@utils.meta.JSONClass @utils.meta.JSONClass
@ -49,17 +50,17 @@ class JumpModel(Model):
dataframe: pd.DataFrame, dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment], labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment],
learning_info: dict learning_info: LearningInfo
) -> None: ) -> None:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
window_size = self.state.window_size window_size = self.state.window_size
last_pattern_center = self.state.pattern_center last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list'])) self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value'] height_list = learning_info.patterns_value
del_conv_list = [] del_conv_list = []
delete_pattern_timestamp = [] delete_pattern_timestamp = []
@ -71,18 +72,18 @@ class JumpModel(Model):
del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model) del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model)
if len(del_conv_jump): del_conv_list.append(max(del_conv_jump)) if len(del_conv_jump): del_conv_list.append(max(del_conv_jump))
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list) self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.jump_height = float(min(learning_info['pattern_height'], default = 1)) self.state.jump_height = float(min(learning_info.pattern_height, default = 1))
self.state.jump_length = int(max(learning_info['pattern_width'], default = 1)) self.state.jump_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1) possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1)
result = self.__filter_detection(possible_jumps, data) result = self.__filter_detection(possible_jumps, data)
return [(val - 1, val + 1) for val in result] return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments, data): def __filter_detection(self, segments: List[int], data: pd.Series):
delete_list = [] delete_list = []
variance_error = self.state.window_size variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error) close_patterns = utils.close_filtering(segments, variance_error)

36
analytics/analytics/models/model.py

@ -6,8 +6,9 @@ from typing import Optional, List, Tuple
import pandas as pd import pandas as pd
import math import math
import logging import logging
from analytic_types import AnalyticUnitId, ModelCache from analytic_types import AnalyticUnitId, ModelCache, TimeSeries
from analytic_types.segment import Segment from analytic_types.segment import Segment
from analytic_types.learning_info import LearningInfo
import utils.meta import utils.meta
@ -96,12 +97,12 @@ class Model(ABC):
dataframe: pd.DataFrame, dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment], labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment],
learning_info: dict learning_info: LearningInfo
) -> None: ) -> None:
pass pass
@abstractmethod @abstractmethod
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]: def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
pass pass
@abstractmethod @abstractmethod
@ -146,7 +147,6 @@ class Model(ABC):
if self.state.window_size == 0: if self.state.window_size == 0:
self.state.window_size = math.ceil(max_length / 2) if max_length else 0 self.state.window_size = math.ceil(max_length / 2) if max_length else 0
model, model_type = self.get_model_type() model, model_type = self.get_model_type()
# TODO: learning_info: dict -> class
learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type) learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type)
self.do_fit(dataframe, labeled, deleted, learning_info) self.do_fit(dataframe, labeled, deleted, learning_info)
logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id))
@ -176,37 +176,29 @@ class Model(ABC):
def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: str, model_type: bool) -> dict: def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: str, model_type: bool) -> dict:
logging.debug('Start parsing segments') logging.debug('Start parsing segments')
learning_info = { learning_info = LearningInfo()
'confidence': [],
'patterns_list': [],
'pattern_width': [],
'pattern_height': [],
'pattern_timestamp': [],
'segment_center_list': [],
'patterns_value': [],
}
data = dataframe['value'] data = dataframe['value']
for segment in labeled: for segment in labeled:
confidence = utils.find_confidence(segment.data)[0] confidence = utils.find_confidence(segment.data)[0]
learning_info['confidence'].append(confidence) learning_info.confidence.append(confidence)
segment_center = segment.center_index segment_center = segment.center_index
learning_info['segment_center_list'].append(segment_center) learning_info.segment_center_list.append(segment_center)
learning_info['pattern_timestamp'].append(segment.pattern_timestamp) learning_info.pattern_timestamp.append(segment.pattern_timestamp)
aligned_segment = utils.get_interval(data, segment_center, self.state.window_size) aligned_segment = utils.get_interval(data, segment_center, self.state.window_size)
aligned_segment = utils.subtract_min_without_nan(aligned_segment) aligned_segment = utils.subtract_min_without_nan(aligned_segment)
if len(aligned_segment) == 0: if len(aligned_segment) == 0:
logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format( logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format(
segment_center, self.state.window_size, len(data))) segment_center, self.state.window_size, len(data)))
continue continue
learning_info['patterns_list'].append(aligned_segment) learning_info.patterns_list.append(aligned_segment)
if model == 'peak' or model == 'trough': if model == 'peak' or model == 'trough':
learning_info['pattern_height'].append(utils.find_confidence(aligned_segment)[1]) learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1])
learning_info['patterns_value'].append(aligned_segment.values.max()) learning_info.patterns_value.append(aligned_segment.values.max())
if model == 'jump' or model == 'drop': if model == 'jump' or model == 'drop':
pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model) pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model)
learning_info['pattern_height'].append(pattern_height) learning_info.pattern_height.append(pattern_height)
learning_info['pattern_width'].append(pattern_length) learning_info.pattern_width.append(pattern_length)
learning_info['patterns_value'].append(aligned_segment.values[self.state.window_size]) learning_info.patterns_value.append(aligned_segment.values[self.state.window_size])
logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info)) logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info))
return learning_info return learning_info

147
analytics/analytics/models/peak_model.py

@ -1,36 +1,14 @@
from models import Model, ModelState, AnalyticSegment from analytic_types import TimeSeries
from models import TriangleModel
import utils
import scipy.signal import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema from scipy.signal import argrelextrema
from typing import Optional, List, Tuple from typing import Optional, List, Tuple
import utils
import utils.meta
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from analytic_types import AnalyticUnitId
SMOOTHING_COEFF = 2400
EXP_SMOOTHING_FACTOR = 0.01
@utils.meta.JSONClass class PeakModel(TriangleModel):
class PeakModelState(ModelState):
def __init__(
self,
confidence: float = 0,
height_max: float = 0,
height_min: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.height_max = height_max
self.height_min = height_min
class PeakModel(Model):
def get_model_type(self) -> (str, bool): def get_model_type(self) -> (str, bool):
model = 'peak' model = 'peak'
@ -42,98 +20,27 @@ class PeakModel(Model):
segment = data[start: end] segment = data[start: end]
return segment.idxmax() return segment.idxmax()
def get_state(self, cache: Optional[dict] = None) -> PeakModelState: def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]:
return PeakModelState.from_json(cache) pattern_list = []
for val in close_patterns:
def do_fit( max_val = data[val[0]]
self, ind = val[0]
dataframe: pd.DataFrame, for i in val:
labeled_segments: List[AnalyticSegment], if data[i] > max_val:
deleted_segments: List[AnalyticSegment], max_val = data[i]
learning_info: dict ind = i
) -> None: pattern_list.append(ind)
data = utils.cut_dataframe(dataframe) return pattern_list
data = data['value']
window_size = self.state.window_size def get_extremum_indexes(self, data: pd.Series) -> np.ndarray:
last_pattern_center = self.state.pattern_center return argrelextrema(data.values, np.greater)[0]
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series:
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) return utils.exponential_smoothing(data + self.state.confidence, alpha)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value'] def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, peak_indexes: List[int]) -> List[int]:
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_max_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, del_max_index, window_size)
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
if len(del_conv): del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data
all_maxs = argrelextrema(np.array(data), np.greater)[0]
extrema_list = []
for i in utils.exponential_smoothing(data + self.state.confidence, EXP_SMOOTHING_FACTOR):
extrema_list.append(i)
segments = [] segments = []
for i in all_maxs: for idx in peak_indexes:
if data[i] > extrema_list[i]: if data[idx] > smoothed_data[idx]:
segments.append(i) segments.append(idx)
result = self.__filter_detection(segments, data) return segments
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence)
return result
def __filter_detection(self, segments: list, data: list) -> list:
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = utils.best_pattern(close_patterns, data, 'max')
if len(segments) == 0 or len(self.state.pattern_model) == 0:
return []
pattern_data = self.state.pattern_model
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR)
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR)
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR)
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR)
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR)
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR)
for segment in segments:
if segment > self.state.window_size:
convol_data = utils.get_interval(data, segment, self.state.window_size)
convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
pattern_height = convol_data.values[self.state.window_size]
if pattern_height > up_height or pattern_height < low_height:
delete_list.append(segment)
continue
if max(conv) > up_conv or max(conv) < low_conv:
delete_list.append(segment)
continue
if max(conv) < up_del_conv and max(conv) > low_del_conv:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

119
analytics/analytics/models/triangle_model.py

@ -0,0 +1,119 @@
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
from models import Model, ModelState, AnalyticSegment
import utils
import utils.meta
import scipy.signal
from scipy.fftpack import fft
from typing import Optional, List, Tuple
import numpy as np
import pandas as pd
EXP_SMOOTHING_FACTOR = 0.01
@utils.meta.JSONClass
class TriangleModelState(ModelState):
def __init__(
self,
confidence: float = 0,
height_max: float = 0,
height_min: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.height_max = height_max
self.height_min = height_min
class TriangleModel(Model):
def get_state(self, cache: Optional[dict] = None) -> TriangleModelState:
return TriangleModelState.from_json(cache)
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, segment.center_index, self.state.window_size)
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
if len(del_conv):
del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
all_extremum_indexes = self.get_extremum_indexes(data)
smoothed_data = self.get_smoothed_data(data, self.state.confidence, EXP_SMOOTHING_FACTOR)
segments = self.get_possible_segments(data, smoothed_data, all_extremum_indexes)
result = self.__filter_detection(segments, data)
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence)
return result
def __filter_detection(self, segments: List[int], data: pd.Series) -> list:
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = self.get_best_pattern(close_patterns, data)
if len(segments) == 0 or len(self.state.pattern_model) == 0:
return []
pattern_data = self.state.pattern_model
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR)
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR)
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR)
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR)
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR)
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR)
for segment in segments:
if segment > self.state.window_size:
convol_data = utils.get_interval(data, segment, self.state.window_size)
convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
pattern_height = convol_data.values.max()
if pattern_height > up_height or pattern_height < low_height:
delete_list.append(segment)
continue
if max(conv) > up_conv or max(conv) < low_conv:
delete_list.append(segment)
continue
if max(conv) < up_del_conv and max(conv) > low_del_conv:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

146
analytics/analytics/models/trough_model.py

@ -1,36 +1,14 @@
from models import Model, ModelState, AnalyticSegment from analytic_types import TimeSeries
from models import TriangleModel
import utils
import scipy.signal import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema from scipy.signal import argrelextrema
from typing import Optional, List, Tuple from typing import Optional, List, Tuple
import utils
import utils.meta
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from analytic_types import AnalyticUnitId
SMOOTHING_COEFF = 2400
EXP_SMOOTHING_FACTOR = 0.01
@utils.meta.JSONClass class TroughModel(TriangleModel):
class TroughModelState(ModelState):
def __init__(
self,
confidence: float = 0,
height_max: float = 0,
height_min: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.height_max = height_max
self.height_min = height_min
class TroughModel(Model):
def get_model_type(self) -> (str, bool): def get_model_type(self) -> (str, bool):
model = 'trough' model = 'trough'
@ -42,99 +20,27 @@ class TroughModel(Model):
segment = data[start: end] segment = data[start: end]
return segment.idxmin() return segment.idxmin()
def get_state(self, cache: Optional[dict] = None) -> TroughModelState: def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]:
return TroughModelState.from_json(cache) pattern_list = []
for val in close_patterns:
def do_fit( min_val = data[val[0]]
self, ind = val[0]
dataframe: pd.DataFrame, for i in val:
labeled_segments: List[AnalyticSegment], if data[i] < min_val:
deleted_segments: List[AnalyticSegment], min_val = data[i]
learning_info: dict ind = i
) -> None: pattern_list.append(ind)
data = utils.cut_dataframe(dataframe) return pattern_list
data = data['value']
window_size = self.state.window_size def get_extremum_indexes(self, data: pd.Series) -> np.ndarray:
last_pattern_center = self.state.pattern_center return argrelextrema(data.values, np.less)[0]
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list']) def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series:
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) return utils.exponential_smoothing(data - self.state.confidence, alpha)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value'] def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, trough_indexes: List[int]) -> List[int]:
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_min_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, del_min_index, window_size)
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
if len(del_conv): del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data
all_mins = argrelextrema(np.array(data), np.less)[0]
extrema_list = []
for i in utils.exponential_smoothing(data - self.state.confidence, EXP_SMOOTHING_FACTOR):
extrema_list.append(i)
segments = []
for i in all_mins:
if data[i] < extrema_list[i]:
segments.append(i)
result = self.__filter_detection(segments, data)
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence, inverse = True)
return result
def __filter_detection(self, segments: list, data: list) -> list:
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = utils.best_pattern(close_patterns, data, 'min')
if len(segments) == 0 or len(self.state.pattern_center) == 0:
segments = [] segments = []
for idx in trough_indexes:
if data[idx] < smoothed_data[idx]:
segments.append(idx)
return segments return segments
pattern_data = self.state.pattern_model
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR)
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR)
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR)
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR)
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR)
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR)
for segment in segments:
if segment > self.state.window_size:
convol_data = utils.get_interval(data, segment, self.state.window_size)
convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
pattern_height = convol_data.values.max()
if pattern_height > up_height or pattern_height < low_height:
delete_list.append(segment)
continue
if max(conv) > up_conv or max(conv) < low_conv:
delete_list.append(segment)
continue
if max(conv) < up_del_conv and max(conv) > low_del_conv:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

17
analytics/analytics/utils/common.py

@ -59,7 +59,10 @@ def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str)
pattern_list.append(i) pattern_list.append(i)
return pattern_list return pattern_list
def find_jump(data, height, lenght): def find_jump(data, height, lenght) -> List[int]:
'''
Find jump indexes
'''
j_list = [] j_list = []
for i in range(len(data)-lenght-1): for i in range(len(data)-lenght-1):
for x in range(1, lenght): for x in range(1, lenght):
@ -67,7 +70,10 @@ def find_jump(data, height, lenght):
j_list.append(i) j_list.append(i)
return(j_list) return(j_list)
def find_drop(data, height, length): def find_drop(data, height, length) -> List[int]:
'''
Find drop indexes
'''
d_list = [] d_list = []
for i in range(len(data)-length-1): for i in range(len(data)-length-1):
for x in range(1, length): for x in range(1, length):
@ -116,7 +122,7 @@ def get_same_length(patterns_list):
pat.extend(added_values) pat.extend(added_values)
return patterns_list return patterns_list
def close_filtering(pattern_list: List[int], win_size: int) -> List[Tuple[int, int]]: def close_filtering(pattern_list: List[int], win_size: int) -> TimeSeries:
if len(pattern_list) == 0: if len(pattern_list) == 0:
return [] return []
s = [[pattern_list[0]]] s = [[pattern_list[0]]]
@ -152,7 +158,7 @@ def find_interval(dataframe: pd.DataFrame) -> int:
delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0]) delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0])
return delta return delta
def get_start_and_end_of_segments(segments: List[List[int]]) -> List[Tuple[int, int]]: def get_start_and_end_of_segments(segments: List[List[int]]) -> TimeSeries:
''' '''
find start and end of segment: [1, 2, 3, 4] -> [1, 4] find start and end of segment: [1, 2, 3, 4] -> [1, 4]
if segment is 1 index - it will be doubled: [7] -> [7, 7] if segment is 1 index - it will be doubled: [7] -> [7, 7]
@ -168,7 +174,6 @@ def get_start_and_end_of_segments(segments: List[List[int]]) -> List[Tuple[int,
result.append(segment) result.append(segment)
return result return result
def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list: def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list:
new_pattern_list = [] new_pattern_list = []
for val in pattern_list: for val in pattern_list:
@ -261,7 +266,7 @@ def get_interval(data: pd.Series, center: int, window_size: int, normalization =
result_interval = subtract_min_without_nan(result_interval) result_interval = subtract_min_without_nan(result_interval)
return result_interval return result_interval
def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> List[Tuple[int, int]]: def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> TimeSeries:
""" """
Find start and end of patterns for peak Find start and end of patterns for peak
max_border_factor - final border of pattern max_border_factor - final border of pattern

2
analytics/tests/test_manager.py

@ -1,5 +1,5 @@
from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel
from models import PeakModelState, DropModelState, TroughModelState, JumpModelState, GeneralModelState from models import DropModelState, JumpModelState, GeneralModelState
import utils.meta import utils.meta
import aiounittest import aiounittest
from analytic_unit_manager import AnalyticUnitManager from analytic_unit_manager import AnalyticUnitManager

Loading…
Cancel
Save