Browse Source

Refactoring of Peak and Trough models #424 (#680)

pull/1/head
Alexandr Velikiy 6 years ago committed by Evgeny Smyshlyaev
parent
commit
ae06749b8d
  1. 17
      analytics/analytics/analytic_types/learning_info.py
  2. 5
      analytics/analytics/models/__init__.py
  3. 18
      analytics/analytics/models/custom_model.py
  4. 21
      analytics/analytics/models/drop_model.py
  5. 11
      analytics/analytics/models/general_model.py
  6. 21
      analytics/analytics/models/jump_model.py
  7. 36
      analytics/analytics/models/model.py
  8. 147
      analytics/analytics/models/peak_model.py
  9. 119
      analytics/analytics/models/triangle_model.py
  10. 146
      analytics/analytics/models/trough_model.py
  11. 17
      analytics/analytics/utils/common.py
  12. 2
      analytics/tests/test_manager.py

17
analytics/analytics/analytic_types/learning_info.py

@ -0,0 +1,17 @@
import utils.meta
@utils.meta.JSONClass
class LearningInfo:
def __init__(self):
super().__init__()
self.confidence = []
self.patterns_list = []
self.pattern_width = []
self.pattern_height = []
self.pattern_timestamp = []
self.segment_center_list = []
self.patterns_value = []
def __str__(self):
return str(self.to_json())

5
analytics/analytics/models/__init__.py

@ -1,7 +1,8 @@
from models.model import Model, ModelState, AnalyticSegment
from models.triangle_model import TriangleModel, TriangleModelState
from models.drop_model import DropModel, DropModelState
from models.peak_model import PeakModel, PeakModelState
from models.peak_model import PeakModel
from models.jump_model import JumpModel, JumpModelState
from models.custom_model import CustomModel
from models.trough_model import TroughModel, TroughModelState
from models.trough_model import TroughModel
from models.general_model import GeneralModel, GeneralModelState

18
analytics/analytics/models/custom_model.py

@ -1,7 +1,10 @@
from models import Model, AnalyticSegment
from models import Model, AnalyticSegment, ModelState
from analytic_types import AnalyticUnitId, ModelCache
from analytic_types.learning_info import LearningInfo
import utils
import pandas as pd
from typing import List
from typing import List, Optional
class CustomModel(Model):
@ -10,9 +13,18 @@ class CustomModel(Model):
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
learning_info: LearningInfo
) -> None:
pass
def do_detect(self, dataframe: pd.DataFrame) -> list:
return []
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
pass
def get_model_type(self) -> (str, bool):
pass
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState:
pass

21
analytics/analytics/models/drop_model.py

@ -9,7 +9,8 @@ import utils
import utils.meta
import numpy as np
import pandas as pd
from analytic_types import AnalyticUnitId
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
@utils.meta.JSONClass
class DropModelState(ModelState):
@ -48,17 +49,17 @@ class DropModel(Model):
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list'])
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value']
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_timestamp = []
@ -70,18 +71,18 @@ class DropModel(Model):
del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model)
if len(del_conv_drop): del_conv_list.append(max(del_conv_drop))
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list)
self.state.drop_height = int(min(learning_info['pattern_height'], default = 1))
self.state.drop_length = int(max(learning_info['pattern_width'], default = 1))
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.drop_height = int(min(learning_info.pattern_height, default = 1))
self.state.drop_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1)
result = self.__filter_detection(possible_drops, data)
return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments: list, data: list):
def __filter_detection(self, segments: List[int], data: list):
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)

11
analytics/analytics/models/general_model.py

@ -16,7 +16,8 @@ import logging
from typing import Optional, List, Tuple
import math
from analytic_types import AnalyticUnitId
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
PEARSON_FACTOR = 0.7
@ -48,13 +49,13 @@ class GeneralModel(Model):
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list'])
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
@ -71,7 +72,7 @@ class GeneralModel(Model):
self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3)
self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size)
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
pat_data = self.state.pattern_model

21
analytics/analytics/models/jump_model.py

@ -10,7 +10,8 @@ from typing import Optional, List, Tuple
import math
from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde
from analytic_types import AnalyticUnitId
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
@utils.meta.JSONClass
@ -49,17 +50,17 @@ class JumpModel(Model):
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list'])
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value']
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_timestamp = []
@ -71,18 +72,18 @@ class JumpModel(Model):
del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model)
if len(del_conv_jump): del_conv_list.append(max(del_conv_jump))
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list)
self.state.jump_height = float(min(learning_info['pattern_height'], default = 1))
self.state.jump_length = int(max(learning_info['pattern_width'], default = 1))
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.jump_height = float(min(learning_info.pattern_height, default = 1))
self.state.jump_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1)
result = self.__filter_detection(possible_jumps, data)
return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments, data):
def __filter_detection(self, segments: List[int], data: pd.Series):
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)

36
analytics/analytics/models/model.py

@ -6,8 +6,9 @@ from typing import Optional, List, Tuple
import pandas as pd
import math
import logging
from analytic_types import AnalyticUnitId, ModelCache
from analytic_types import AnalyticUnitId, ModelCache, TimeSeries
from analytic_types.segment import Segment
from analytic_types.learning_info import LearningInfo
import utils.meta
@ -96,12 +97,12 @@ class Model(ABC):
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
learning_info: LearningInfo
) -> None:
pass
@abstractmethod
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
pass
@abstractmethod
@ -146,7 +147,6 @@ class Model(ABC):
if self.state.window_size == 0:
self.state.window_size = math.ceil(max_length / 2) if max_length else 0
model, model_type = self.get_model_type()
# TODO: learning_info: dict -> class
learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type)
self.do_fit(dataframe, labeled, deleted, learning_info)
logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id))
@ -176,37 +176,29 @@ class Model(ABC):
def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: str, model_type: bool) -> dict:
logging.debug('Start parsing segments')
learning_info = {
'confidence': [],
'patterns_list': [],
'pattern_width': [],
'pattern_height': [],
'pattern_timestamp': [],
'segment_center_list': [],
'patterns_value': [],
}
learning_info = LearningInfo()
data = dataframe['value']
for segment in labeled:
confidence = utils.find_confidence(segment.data)[0]
learning_info['confidence'].append(confidence)
learning_info.confidence.append(confidence)
segment_center = segment.center_index
learning_info['segment_center_list'].append(segment_center)
learning_info['pattern_timestamp'].append(segment.pattern_timestamp)
learning_info.segment_center_list.append(segment_center)
learning_info.pattern_timestamp.append(segment.pattern_timestamp)
aligned_segment = utils.get_interval(data, segment_center, self.state.window_size)
aligned_segment = utils.subtract_min_without_nan(aligned_segment)
if len(aligned_segment) == 0:
logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format(
segment_center, self.state.window_size, len(data)))
continue
learning_info['patterns_list'].append(aligned_segment)
learning_info.patterns_list.append(aligned_segment)
if model == 'peak' or model == 'trough':
learning_info['pattern_height'].append(utils.find_confidence(aligned_segment)[1])
learning_info['patterns_value'].append(aligned_segment.values.max())
learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1])
learning_info.patterns_value.append(aligned_segment.values.max())
if model == 'jump' or model == 'drop':
pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model)
learning_info['pattern_height'].append(pattern_height)
learning_info['pattern_width'].append(pattern_length)
learning_info['patterns_value'].append(aligned_segment.values[self.state.window_size])
learning_info.pattern_height.append(pattern_height)
learning_info.pattern_width.append(pattern_length)
learning_info.patterns_value.append(aligned_segment.values[self.state.window_size])
logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info))
return learning_info

147
analytics/analytics/models/peak_model.py

@ -1,36 +1,14 @@
from models import Model, ModelState, AnalyticSegment
from analytic_types import TimeSeries
from models import TriangleModel
import utils
import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema
from typing import Optional, List, Tuple
import utils
import utils.meta
import numpy as np
import pandas as pd
from analytic_types import AnalyticUnitId
SMOOTHING_COEFF = 2400
EXP_SMOOTHING_FACTOR = 0.01
@utils.meta.JSONClass
class PeakModelState(ModelState):
def __init__(
self,
confidence: float = 0,
height_max: float = 0,
height_min: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.height_max = height_max
self.height_min = height_min
class PeakModel(Model):
class PeakModel(TriangleModel):
def get_model_type(self) -> (str, bool):
model = 'peak'
@ -42,98 +20,27 @@ class PeakModel(Model):
segment = data[start: end]
return segment.idxmax()
def get_state(self, cache: Optional[dict] = None) -> PeakModelState:
return PeakModelState.from_json(cache)
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list'])
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value']
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_max_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, del_max_index, window_size)
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
if len(del_conv): del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data
all_maxs = argrelextrema(np.array(data), np.greater)[0]
extrema_list = []
for i in utils.exponential_smoothing(data + self.state.confidence, EXP_SMOOTHING_FACTOR):
extrema_list.append(i)
def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]:
pattern_list = []
for val in close_patterns:
max_val = data[val[0]]
ind = val[0]
for i in val:
if data[i] > max_val:
max_val = data[i]
ind = i
pattern_list.append(ind)
return pattern_list
def get_extremum_indexes(self, data: pd.Series) -> np.ndarray:
return argrelextrema(data.values, np.greater)[0]
def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series:
return utils.exponential_smoothing(data + self.state.confidence, alpha)
def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, peak_indexes: List[int]) -> List[int]:
segments = []
for i in all_maxs:
if data[i] > extrema_list[i]:
segments.append(i)
result = self.__filter_detection(segments, data)
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence)
return result
def __filter_detection(self, segments: list, data: list) -> list:
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = utils.best_pattern(close_patterns, data, 'max')
if len(segments) == 0 or len(self.state.pattern_model) == 0:
return []
pattern_data = self.state.pattern_model
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR)
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR)
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR)
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR)
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR)
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR)
for segment in segments:
if segment > self.state.window_size:
convol_data = utils.get_interval(data, segment, self.state.window_size)
convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
pattern_height = convol_data.values[self.state.window_size]
if pattern_height > up_height or pattern_height < low_height:
delete_list.append(segment)
continue
if max(conv) > up_conv or max(conv) < low_conv:
delete_list.append(segment)
continue
if max(conv) < up_del_conv and max(conv) > low_del_conv:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)
for idx in peak_indexes:
if data[idx] > smoothed_data[idx]:
segments.append(idx)
return segments

119
analytics/analytics/models/triangle_model.py

@ -0,0 +1,119 @@
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
from models import Model, ModelState, AnalyticSegment
import utils
import utils.meta
import scipy.signal
from scipy.fftpack import fft
from typing import Optional, List, Tuple
import numpy as np
import pandas as pd
EXP_SMOOTHING_FACTOR = 0.01
@utils.meta.JSONClass
class TriangleModelState(ModelState):
def __init__(
self,
confidence: float = 0,
height_max: float = 0,
height_min: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.height_max = height_max
self.height_min = height_min
class TriangleModel(Model):
def get_state(self, cache: Optional[dict] = None) -> TriangleModelState:
return TriangleModelState.from_json(cache)
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, segment.center_index, self.state.window_size)
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
if len(del_conv):
del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
all_extremum_indexes = self.get_extremum_indexes(data)
smoothed_data = self.get_smoothed_data(data, self.state.confidence, EXP_SMOOTHING_FACTOR)
segments = self.get_possible_segments(data, smoothed_data, all_extremum_indexes)
result = self.__filter_detection(segments, data)
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence)
return result
def __filter_detection(self, segments: List[int], data: pd.Series) -> list:
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = self.get_best_pattern(close_patterns, data)
if len(segments) == 0 or len(self.state.pattern_model) == 0:
return []
pattern_data = self.state.pattern_model
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR)
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR)
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR)
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR)
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR)
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR)
for segment in segments:
if segment > self.state.window_size:
convol_data = utils.get_interval(data, segment, self.state.window_size)
convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
pattern_height = convol_data.values.max()
if pattern_height > up_height or pattern_height < low_height:
delete_list.append(segment)
continue
if max(conv) > up_conv or max(conv) < low_conv:
delete_list.append(segment)
continue
if max(conv) < up_del_conv and max(conv) > low_del_conv:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

146
analytics/analytics/models/trough_model.py

@ -1,36 +1,14 @@
from models import Model, ModelState, AnalyticSegment
from analytic_types import TimeSeries
from models import TriangleModel
import utils
import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema
from typing import Optional, List, Tuple
import utils
import utils.meta
import numpy as np
import pandas as pd
from analytic_types import AnalyticUnitId
SMOOTHING_COEFF = 2400
EXP_SMOOTHING_FACTOR = 0.01
@utils.meta.JSONClass
class TroughModelState(ModelState):
def __init__(
self,
confidence: float = 0,
height_max: float = 0,
height_min: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.height_max = height_max
self.height_min = height_min
class TroughModel(Model):
class TroughModel(TriangleModel):
def get_model_type(self) -> (str, bool):
model = 'trough'
@ -42,99 +20,27 @@ class TroughModel(Model):
segment = data[start: end]
return segment.idxmin()
def get_state(self, cache: Optional[dict] = None) -> TroughModelState:
return TroughModelState.from_json(cache)
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: dict
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info['segment_center_list']))
self.state.pattern_model = utils.get_av_model(learning_info['patterns_list'])
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info['patterns_value']
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_min_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, del_min_index, window_size)
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
if len(del_conv): del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> List[Tuple[int, int]]:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = int(len(data)/SMOOTHING_COEFF) #test ws on flat data
all_mins = argrelextrema(np.array(data), np.less)[0]
extrema_list = []
for i in utils.exponential_smoothing(data - self.state.confidence, EXP_SMOOTHING_FACTOR):
extrema_list.append(i)
segments = []
for i in all_mins:
if data[i] < extrema_list[i]:
segments.append(i)
result = self.__filter_detection(segments, data)
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence, inverse = True)
return result
def __filter_detection(self, segments: list, data: list) -> list:
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = utils.best_pattern(close_patterns, data, 'min')
if len(segments) == 0 or len(self.state.pattern_center) == 0:
def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]:
pattern_list = []
for val in close_patterns:
min_val = data[val[0]]
ind = val[0]
for i in val:
if data[i] < min_val:
min_val = data[i]
ind = i
pattern_list.append(ind)
return pattern_list
def get_extremum_indexes(self, data: pd.Series) -> np.ndarray:
return argrelextrema(data.values, np.less)[0]
def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series:
return utils.exponential_smoothing(data - self.state.confidence, alpha)
def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, trough_indexes: List[int]) -> List[int]:
segments = []
for idx in trough_indexes:
if data[idx] < smoothed_data[idx]:
segments.append(idx)
return segments
pattern_data = self.state.pattern_model
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR)
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR)
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR)
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR)
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR)
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR)
for segment in segments:
if segment > self.state.window_size:
convol_data = utils.get_interval(data, segment, self.state.window_size)
convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
pattern_height = convol_data.values.max()
if pattern_height > up_height or pattern_height < low_height:
delete_list.append(segment)
continue
if max(conv) > up_conv or max(conv) < low_conv:
delete_list.append(segment)
continue
if max(conv) < up_del_conv and max(conv) > low_del_conv:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

17
analytics/analytics/utils/common.py

@ -59,7 +59,10 @@ def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str)
pattern_list.append(i)
return pattern_list
def find_jump(data, height, lenght):
def find_jump(data, height, lenght) -> List[int]:
'''
Find jump indexes
'''
j_list = []
for i in range(len(data)-lenght-1):
for x in range(1, lenght):
@ -67,7 +70,10 @@ def find_jump(data, height, lenght):
j_list.append(i)
return(j_list)
def find_drop(data, height, length):
def find_drop(data, height, length) -> List[int]:
'''
Find drop indexes
'''
d_list = []
for i in range(len(data)-length-1):
for x in range(1, length):
@ -116,7 +122,7 @@ def get_same_length(patterns_list):
pat.extend(added_values)
return patterns_list
def close_filtering(pattern_list: List[int], win_size: int) -> List[Tuple[int, int]]:
def close_filtering(pattern_list: List[int], win_size: int) -> TimeSeries:
if len(pattern_list) == 0:
return []
s = [[pattern_list[0]]]
@ -152,7 +158,7 @@ def find_interval(dataframe: pd.DataFrame) -> int:
delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0])
return delta
def get_start_and_end_of_segments(segments: List[List[int]]) -> List[Tuple[int, int]]:
def get_start_and_end_of_segments(segments: List[List[int]]) -> TimeSeries:
'''
find start and end of segment: [1, 2, 3, 4] -> [1, 4]
if segment is 1 index - it will be doubled: [7] -> [7, 7]
@ -168,7 +174,6 @@ def get_start_and_end_of_segments(segments: List[List[int]]) -> List[Tuple[int,
result.append(segment)
return result
def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list:
new_pattern_list = []
for val in pattern_list:
@ -261,7 +266,7 @@ def get_interval(data: pd.Series, center: int, window_size: int, normalization =
result_interval = subtract_min_without_nan(result_interval)
return result_interval
def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> List[Tuple[int, int]]:
def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> TimeSeries:
"""
Find start and end of patterns for peak
max_border_factor - final border of pattern

2
analytics/tests/test_manager.py

@ -1,5 +1,5 @@
from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel
from models import PeakModelState, DropModelState, TroughModelState, JumpModelState, GeneralModelState
from models import DropModelState, JumpModelState, GeneralModelState
import utils.meta
import aiounittest
from analytic_unit_manager import AnalyticUnitManager

Loading…
Cancel
Save