Browse Source

Refactoring of Jump and Drop models (#865)

* add stair model

* add stair model method

* add types

* fix

* add tests for get stair

* fix

* fix imports

* add todo

* fixes

* get stair indexes to stair model

* fixes

* remove old methods

* use enum

* fix get_model_type

* remove exception

* list(set) -> utils.remove_duplicates

* refactor get_stair

* fixes

* fixes 2

* fixes 3

* todo
pull/1/head
Alexander Velikiy 5 years ago committed by GitHub
parent
commit
b53b49dcf7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      analytics/analytics/models/__init__.py
  2. 4
      analytics/analytics/models/custom_model.py
  3. 125
      analytics/analytics/models/drop_model.py
  4. 10
      analytics/analytics/models/general_model.py
  5. 127
      analytics/analytics/models/jump_model.py
  6. 36
      analytics/analytics/models/model.py
  7. 8
      analytics/analytics/models/peak_model.py
  8. 147
      analytics/analytics/models/stair_model.py
  9. 4
      analytics/analytics/models/triangle_model.py
  10. 8
      analytics/analytics/models/trough_model.py
  11. 26
      analytics/analytics/utils/common.py
  12. 2
      analytics/tests/test_manager.py
  13. 43
      analytics/tests/test_models.py
  14. 30
      analytics/tests/test_utils.py

7
analytics/analytics/models/__init__.py

@ -1,8 +1,9 @@
from models.model import Model, ModelState, AnalyticSegment from models.model import Model, ModelState, AnalyticSegment, ModelType, ExtremumType
from models.triangle_model import TriangleModel, TriangleModelState from models.triangle_model import TriangleModel, TriangleModelState
from models.drop_model import DropModel, DropModelState from models.stair_model import StairModel, StairModelState
from models.drop_model import DropModel
from models.peak_model import PeakModel from models.peak_model import PeakModel
from models.jump_model import JumpModel, JumpModelState from models.jump_model import JumpModel
from models.custom_model import CustomModel from models.custom_model import CustomModel
from models.trough_model import TroughModel from models.trough_model import TroughModel
from models.general_model import GeneralModel, GeneralModelState from models.general_model import GeneralModel, GeneralModelState

4
analytics/analytics/models/custom_model.py

@ -1,4 +1,4 @@
from models import Model, AnalyticSegment, ModelState from models import Model, AnalyticSegment, ModelState, ModelType
from analytic_types import AnalyticUnitId, ModelCache from analytic_types import AnalyticUnitId, ModelCache
from analytic_types.learning_info import LearningInfo from analytic_types.learning_info import LearningInfo
import utils import utils
@ -23,7 +23,7 @@ class CustomModel(Model):
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
pass pass
def get_model_type(self) -> (str, bool): def get_model_type(self) -> ModelType:
pass pass
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: def get_state(self, cache: Optional[ModelCache] = None) -> ModelState:

125
analytics/analytics/models/drop_model.py

@ -1,122 +1,9 @@
from models import Model, ModelState, AnalyticSegment from models import StairModel, ModelType, ExtremumType
import scipy.signal class DropModel(StairModel):
from scipy.fftpack import fft
from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde
from typing import Optional, List, Tuple
import utils
import utils.meta
import numpy as np
import pandas as pd
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
@utils.meta.JSONClass def get_model_type(self) -> ModelType:
class DropModelState(ModelState): return ModelType.DROP
def __init__( def get_extremum_type(self) -> ExtremumType:
self, return ExtremumType.MIN
confidence: float = 0,
drop_height: float = 0,
drop_length: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.drop_height = drop_height
self.drop_length = drop_length
class DropModel(Model):
def get_model_type(self) -> (str, bool):
model = 'drop'
type_model = False
return (model, type_model)
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
segment_center_index = utils.find_pattern_center(segment, start, 'drop')
return segment_center_index
def get_state(self, cache: Optional[dict] = None) -> DropModelState:
return DropModelState.from_json(cache)
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_timestamp = []
for segment in deleted_segments:
segment_cent_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted_drop = utils.get_interval(data, segment_cent_index, window_size)
deleted_drop = utils.subtract_min_without_nan(deleted_drop)
del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model)
if len(del_conv_drop): del_conv_list.append(max(del_conv_drop))
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.drop_height = int(min(learning_info.pattern_height, default = 1))
self.state.drop_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1)
result = self.__filter_detection(possible_drops, data)
return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments: List[int], data: list):
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = utils.best_pattern(close_patterns, data, 'min')
if len(segments) == 0 or len(self.state.pattern_center) == 0:
segments = []
return segments
pattern_data = self.state.pattern_model
for segment in segments:
if segment > self.state.window_size and segment < (len(data) - self.state.window_size):
convol_data = utils.get_interval(data, segment, self.state.window_size)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if len(convol_data) == 0 or percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
upper_bound = self.state.convolve_max * 1.2
lower_bound = self.state.convolve_min * 0.8
delete_up_bound = self.state.conv_del_max * 1.02
delete_low_bound = self.state.conv_del_min * 0.98
try:
if max(conv) > upper_bound or max(conv) < lower_bound:
delete_list.append(segment)
elif max(conv) < delete_up_bound and max(conv) > delete_low_bound:
delete_list.append(segment)
except ValueError:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

10
analytics/analytics/models/general_model.py

@ -1,5 +1,5 @@
from analytic_types import AnalyticUnitId from analytic_types import AnalyticUnitId
from models import Model, ModelState, AnalyticSegment from models import Model, ModelState, AnalyticSegment, ModelType
from typing import Union, List, Generator from typing import Union, List, Generator
import utils import utils
import utils.meta import utils.meta
@ -30,10 +30,8 @@ class GeneralModelState(ModelState):
class GeneralModel(Model): class GeneralModel(Model):
def get_model_type(self) -> (str, bool): def get_model_type(self) -> ModelType:
model = 'general' return ModelType.GENERAL
type_model = True
return (model, type_model)
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value'] data = dataframe['value']
@ -54,7 +52,7 @@ class GeneralModel(Model):
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
last_pattern_center = self.state.pattern_center last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list)
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)

127
analytics/analytics/models/jump_model.py

@ -1,124 +1,9 @@
from models import Model, ModelState, AnalyticSegment from models import StairModel, ModelType, ExtremumType
import utils class JumpModel(StairModel):
import utils.meta
import numpy as np
import pandas as pd
import scipy.signal
from scipy.fftpack import fft
from typing import Optional, List, Tuple
import math
from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde
from analytic_types import AnalyticUnitId, TimeSeries
from analytic_types.learning_info import LearningInfo
def get_model_type(self) -> ModelType:
return ModelType.JUMP
@utils.meta.JSONClass def get_extremum_type(self) -> ExtremumType:
class JumpModelState(ModelState): return ExtremumType.MAX
def __init__(
self,
confidence: float = 0,
jump_height: float = 0,
jump_length: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.jump_height = jump_height
self.jump_length = jump_length
class JumpModel(Model):
def get_model_type(self) -> (str, bool):
model = 'jump'
type_model = True
return (model, type_model)
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
segment_center_index = utils.find_pattern_center(segment, start, 'jump')
return segment_center_index
def get_state(self, cache: Optional[dict] = None) -> JumpModelState:
return JumpModelState.from_json(cache)
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list))
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_timestamp = []
for segment in deleted_segments:
segment_cent_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted_jump = utils.get_interval(data, segment_cent_index, window_size)
deleted_jump = utils.subtract_min_without_nan(deleted_jump)
del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model)
if len(del_conv_jump): del_conv_list.append(max(del_conv_jump))
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.jump_height = float(min(learning_info.pattern_height, default = 1))
self.state.jump_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1)
result = self.__filter_detection(possible_jumps, data)
return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments: List[int], data: pd.Series):
delete_list = []
variance_error = self.state.window_size
close_patterns = utils.close_filtering(segments, variance_error)
segments = utils.best_pattern(close_patterns, data, 'max')
if len(segments) == 0 or len(self.state.pattern_center) == 0:
segments = []
return segments
pattern_data = self.state.pattern_model
upper_bound = self.state.convolve_max * 1.2
lower_bound = self.state.convolve_min * 0.8
delete_up_bound = self.state.conv_del_max * 1.02
delete_low_bound = self.state.conv_del_min * 0.98
for segment in segments:
if segment > self.state.window_size and segment < (len(data) - self.state.window_size):
convol_data = utils.get_interval(data, segment, self.state.window_size)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if len(convol_data) == 0 or percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
try:
if max(conv) > upper_bound or max(conv) < lower_bound:
delete_list.append(segment)
elif max(conv) < delete_up_bound and max(conv) > delete_low_bound:
delete_list.append(segment)
except ValueError:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return set(segments)

36
analytics/analytics/models/model.py

@ -1,4 +1,9 @@
from analytic_types import AnalyticUnitId, ModelCache, TimeSeries
from analytic_types.segment import Segment
from analytic_types.learning_info import LearningInfo
import utils import utils
import utils.meta
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from attrdict import AttrDict from attrdict import AttrDict
@ -6,11 +11,18 @@ from typing import Optional, List, Tuple
import pandas as pd import pandas as pd
import math import math
import logging import logging
from analytic_types import AnalyticUnitId, ModelCache, TimeSeries from enum import Enum
from analytic_types.segment import Segment
from analytic_types.learning_info import LearningInfo
import utils.meta class ModelType(Enum):
JUMP = 'jump'
DROP = 'drop'
PEAK = 'peak'
TROUGH = 'trough'
GENERAL = 'general'
class ExtremumType(Enum):
MAX = 'max'
MIN = 'min'
class AnalyticSegment(Segment): class AnalyticSegment(Segment):
''' '''
@ -121,7 +133,7 @@ class Model(ABC):
pass pass
@abstractmethod @abstractmethod
def get_model_type(self) -> (str, bool): def get_model_type(self) -> ModelType:
pass pass
@abstractmethod @abstractmethod
@ -160,8 +172,7 @@ class Model(ABC):
if self.state.window_size == 0: if self.state.window_size == 0:
self.state.window_size = math.ceil(max_length / 2) if max_length else 0 self.state.window_size = math.ceil(max_length / 2) if max_length else 0
model, model_type = self.get_model_type() learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, self.get_model_type())
learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type)
self.do_fit(dataframe, labeled, deleted, learning_info) self.do_fit(dataframe, labeled, deleted, learning_info)
logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id))
return self.state return self.state
@ -181,14 +192,14 @@ class Model(ABC):
'cache': self.state, 'cache': self.state,
} }
def _update_fiting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None: def _update_fitting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None:
state.confidence = float(min(confidences, default = 1.5)) state.confidence = float(min(confidences, default = 1.5))
state.convolve_min, state.convolve_max = utils.get_min_max(convolve_list, state.window_size) state.convolve_min, state.convolve_max = utils.get_min_max(convolve_list, state.window_size)
state.conv_del_min, state.conv_del_max = utils.get_min_max(del_conv_list, 0) state.conv_del_min, state.conv_del_max = utils.get_min_max(del_conv_list, 0)
if height_list is not None: if height_list is not None:
state.height_min, state.height_max = utils.get_min_max(height_list, 0) state.height_min, state.height_max = utils.get_min_max(height_list, 0)
def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: str, model_type: bool) -> dict: def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: ModelType) -> dict:
logging.debug('Start parsing segments') logging.debug('Start parsing segments')
learning_info = LearningInfo() learning_info = LearningInfo()
data = dataframe['value'] data = dataframe['value']
@ -205,11 +216,12 @@ class Model(ABC):
segment_center, self.state.window_size, len(data))) segment_center, self.state.window_size, len(data)))
continue continue
learning_info.patterns_list.append(aligned_segment) learning_info.patterns_list.append(aligned_segment)
if model == 'peak' or model == 'trough': # TODO: use Triangle/Stair types
if model == ModelType.PEAK or model == ModelType.TROUGH:
learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1])
learning_info.patterns_value.append(aligned_segment.values.max()) learning_info.patterns_value.append(aligned_segment.values.max())
if model == 'jump' or model == 'drop': if model == ModelType.JUMP or model == ModelType.DROP:
pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model) pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model.value)
learning_info.pattern_height.append(pattern_height) learning_info.pattern_height.append(pattern_height)
learning_info.pattern_width.append(pattern_length) learning_info.pattern_width.append(pattern_length)
learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) learning_info.patterns_value.append(aligned_segment.values[self.state.window_size])

8
analytics/analytics/models/peak_model.py

@ -1,5 +1,5 @@
from analytic_types import TimeSeries from analytic_types import TimeSeries
from models import TriangleModel from models import TriangleModel, ModelType
import utils import utils
import scipy.signal import scipy.signal
@ -10,10 +10,8 @@ import pandas as pd
class PeakModel(TriangleModel): class PeakModel(TriangleModel):
def get_model_type(self) -> (str, bool): def get_model_type(self) -> ModelType:
model = 'peak' return ModelType.PEAK
type_model = True
return (model, type_model)
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value'] data = dataframe['value']

147
analytics/analytics/models/stair_model.py

@ -0,0 +1,147 @@
from models import Model, ModelState, AnalyticSegment, ModelType
from analytic_types import TimeSeries
from analytic_types.learning_info import LearningInfo
from scipy.fftpack import fft
from typing import Optional, List
from enum import Enum
import scipy.signal
import utils
import utils.meta
import pandas as pd
import numpy as np
import operator
POSITIVE_SEGMENT_MEASUREMENT_ERROR = 0.2
NEGATIVE_SEGMENT_MEASUREMENT_ERROR = 0.02
@utils.meta.JSONClass
class StairModelState(ModelState):
def __init__(
self,
confidence: float = 0,
stair_height: float = 0,
stair_length: float = 0,
**kwargs
):
super().__init__(**kwargs)
self.confidence = confidence
self.stair_height = stair_height
self.stair_length = stair_length
class StairModel(Model):
def get_state(self, cache: Optional[dict] = None) -> StairModelState:
return StairModelState.from_json(cache)
def get_stair_indexes(self, data: pd.Series, height: float, length: int) -> List[int]:
"""Get list of start stair segment indexes.
Keyword arguments:
data -- data, that contains stair (jump or drop) segments
length -- maximum count of values in the stair
height -- the difference between stair max_line and min_line(see utils.find_parameters)
"""
indexes = []
for i in range(len(data) - length - 1):
is_stair = self.is_stair_in_segment(data.values[i:i + length + 1], height)
if is_stair == True:
indexes.append(i)
return indexes
def is_stair_in_segment(self, segment: np.ndarray, height: float) -> bool:
if len(segment) < 2:
return False
comparison_operator = operator.ge
if self.get_model_type() == ModelType.DROP:
comparison_operator = operator.le
height = -height
return comparison_operator(max(segment[1:]), segment[0] + height)
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
segment_center_index = utils.find_pattern_center(segment, start, self.get_model_type().value)
return segment_center_index
def do_fit(
self,
dataframe: pd.DataFrame,
labeled_segments: List[AnalyticSegment],
deleted_segments: List[AnalyticSegment],
learning_info: LearningInfo
) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
window_size = self.state.window_size
last_pattern_center = self.state.pattern_center
self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list)
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size)
height_list = learning_info.patterns_value
del_conv_list = []
delete_pattern_timestamp = []
for segment in deleted_segments:
segment_cent_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted_stair = utils.get_interval(data, segment_cent_index, window_size)
deleted_stair = utils.subtract_min_without_nan(deleted_stair)
del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model)
if len(del_conv_stair) > 0:
del_conv_list.append(max(del_conv_stair))
self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list)
self.state.stair_height = int(min(learning_info.pattern_height, default = 1))
self.state.stair_length = int(max(learning_info.pattern_width, default = 1))
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe)
data = data['value']
possible_stairs = self.get_stair_indexes(data, self.state.stair_height, self.state.stair_length + 1)
result = self.__filter_detection(possible_stairs, data)
return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments_indexes: List[int], data: list):
delete_list = []
variance_error = self.state.window_size
close_segments = utils.close_filtering(segments_indexes, variance_error)
segments_indexes = utils.best_pattern(close_segments, data, self.get_extremum_type().value)
if len(segments_indexes) == 0 or len(self.state.pattern_center) == 0:
return []
pattern_data = self.state.pattern_model
for segment_index in segments_indexes:
if segment_index <= self.state.window_size or segment_index >= (len(data) - self.state.window_size):
delete_list.append(segment_index)
continue
convol_data = utils.get_interval(data, segment_index, self.state.window_size)
percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if len(convol_data) == 0 or percent_of_nans > 0.5:
delete_list.append(segment_index)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data)
if len(conv) == 0:
delete_list.append(segment_index)
continue
upper_bound = self.state.convolve_max * (1 + POSITIVE_SEGMENT_MEASUREMENT_ERROR)
lower_bound = self.state.convolve_min * (1 - POSITIVE_SEGMENT_MEASUREMENT_ERROR)
delete_up_bound = self.state.conv_del_max * (1 + NEGATIVE_SEGMENT_MEASUREMENT_ERROR)
delete_low_bound = self.state.conv_del_min * (1 - NEGATIVE_SEGMENT_MEASUREMENT_ERROR)
max_conv = max(conv)
if max_conv > upper_bound or max_conv < lower_bound:
delete_list.append(segment_index)
elif max_conv < delete_up_bound and max_conv > delete_low_bound:
delete_list.append(segment_index)
for item in delete_list:
segments_indexes.remove(item)
segments_indexes = utils.remove_duplicates_and_sort(segments_indexes)
return segments_indexes

4
analytics/analytics/models/triangle_model.py

@ -43,7 +43,7 @@ class TriangleModel(Model):
) -> None: ) -> None:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list)) self.state.pattern_center = utils.remove_duplicates_and_sort(self.state.pattern_center + learning_info.segment_center_list)
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
@ -62,7 +62,7 @@ class TriangleModel(Model):
del_conv_list.append(max(del_conv)) del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1]) delete_pattern_height.append(utils.find_confidence(deleted)[1])
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list)
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries:
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)

8
analytics/analytics/models/trough_model.py

@ -1,5 +1,5 @@
from analytic_types import TimeSeries from analytic_types import TimeSeries
from models import TriangleModel from models import TriangleModel, ModelType
import utils import utils
import scipy.signal import scipy.signal
@ -10,10 +10,8 @@ import pandas as pd
class TroughModel(TriangleModel): class TroughModel(TriangleModel):
def get_model_type(self) -> (str, bool): def get_model_type(self) -> ModelType:
model = 'trough' return ModelType.TROUGH
type_model = False
return (model, type_model)
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value'] data = dataframe['value']

26
analytics/analytics/utils/common.py

@ -55,28 +55,6 @@ def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str)
pattern_list.append(i) pattern_list.append(i)
return pattern_list return pattern_list
def find_jump(data, height: float, lenght: int) -> List[int]:
'''
Find jump indexes
'''
j_list = []
for i in range(len(data)-lenght-1):
for x in range(1, lenght):
if(data[i + x] > data[i] + height):
j_list.append(i)
return(j_list)
def find_drop(data, height: float, length: int) -> List[int]:
'''
Find drop indexes
'''
d_list = []
for i in range(len(data)-length-1):
for x in range(1, length):
if(data[i + x] < data[i] - height):
d_list.append(i)
return(d_list)
def timestamp_to_index(dataframe: pd.DataFrame, timestamp: int): def timestamp_to_index(dataframe: pd.DataFrame, timestamp: int):
data = dataframe['timestamp'] data = dataframe['timestamp']
idx, = np.where(data >= timestamp) idx, = np.where(data >= timestamp)
@ -459,3 +437,7 @@ def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame:
def get_min_max(array: list, default): def get_min_max(array: list, default):
return float(min(array, default=default)), float(max(array, default=default)) return float(min(array, default=default)), float(max(array, default=default))
def remove_duplicates_and_sort(array: list) -> list:
array = list(frozenset(array))
array.sort()
return array

2
analytics/tests/test_manager.py

@ -1,5 +1,5 @@
from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel
from models import DropModelState, JumpModelState, GeneralModelState from models import GeneralModelState
import utils.meta import utils.meta
import aiounittest import aiounittest
from analytic_unit_manager import AnalyticUnitManager from analytic_unit_manager import AnalyticUnitManager

43
analytics/tests/test_models.py

@ -0,0 +1,43 @@
import unittest
import pandas as pd
import numpy as np
import models
class TestModel(unittest.TestCase):
def test_stair_model_get_indexes(self):
drop_model = models.DropModel()
jump_model = models.JumpModel()
drop_data = pd.Series([4, 4, 4, 1, 1, 1, 5, 5, 2, 2, 2])
jump_data = pd.Series([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5])
jump_data_one_stair = pd.Series([1, 3, 3])
drop_data_one_stair = pd.Series([4, 2, 1])
height = 2
length = 2
expected_result = [2, 7]
drop_model_result = drop_model.get_stair_indexes(drop_data, height, length)
jump_model_result = jump_model.get_stair_indexes(jump_data, height, length)
drop_one_stair_result = drop_model.get_stair_indexes(drop_data_one_stair, height, 1)
jump_one_stair_result = jump_model.get_stair_indexes(jump_data_one_stair, height, 1)
for val in expected_result:
self.assertIn(val, drop_model_result)
self.assertIn(val, jump_model_result)
self.assertEqual(0, drop_one_stair_result[0])
self.assertEqual(0, jump_one_stair_result[0])
def test_stair_model_get_indexes_corner_cases(self):
drop_model = models.DropModel()
jump_model = models.JumpModel()
empty_data = pd.Series([])
nan_data = pd.Series([np.nan, np.nan, np.nan, np.nan])
height, length = 2, 2
length_zero, height_zero = 0, 0
expected_result = []
drop_empty_data_result = drop_model.get_stair_indexes(empty_data, height, length)
drop_nan_data_result = drop_model.get_stair_indexes(nan_data, height_zero, length_zero)
jump_empty_data_result = jump_model.get_stair_indexes(empty_data, height, length)
jump_nan_data_result = jump_model.get_stair_indexes(nan_data, height_zero, length_zero)
self.assertEqual(drop_empty_data_result, expected_result)
self.assertEqual(drop_nan_data_result, expected_result)
self.assertEqual(jump_empty_data_result, expected_result)
self.assertEqual(jump_nan_data_result, expected_result)

30
analytics/tests/test_utils.py

@ -138,28 +138,6 @@ class TestUtils(unittest.TestCase):
result = [2.0, 2.0, 2.0] result = [2.0, 2.0, 2.0]
self.assertEqual(utils.get_av_model(patterns_list), result) self.assertEqual(utils.get_av_model(patterns_list), result)
def test_find_jump_nan_data(self):
data = [np.nan, np.nan, np.nan, np.nan]
data = pd.Series(data)
length = 2
height = 3
length_zero = 0
height_zero = 0
result = []
self.assertEqual(utils.find_jump(data, height, length), result)
self.assertEqual(utils.find_jump(data, height_zero, length_zero), result)
def test_find_drop_nan_data(self):
data = [np.nan, np.nan, np.nan, np.nan]
data = pd.Series(data)
length = 2
height = 3
length_zero = 0
height_zero = 0
result = []
self.assertEqual(utils.find_drop(data, height, length), result)
self.assertEqual(utils.find_drop(data, height_zero, length_zero), result)
def test_get_distribution_density(self): def test_get_distribution_density(self):
segment = [1, 1, 1, 3, 5, 5, 5] segment = [1, 1, 1, 3, 5, 5, 5]
segment = pd.Series(segment) segment = pd.Series(segment)
@ -369,5 +347,13 @@ class TestUtils(unittest.TestCase):
expected_result = [{ 'from': 100, 'to': 200 }] expected_result = [{ 'from': 100, 'to': 200 }]
self.assertEqual(meta_result, expected_result) self.assertEqual(meta_result, expected_result)
def test_remove_duplicates_and_sort(self):
a1 = [1, 3, 5]
a2 = [8, 3, 6]
expected_result = [1, 3, 5, 6, 8]
utils_result = utils.remove_duplicates_and_sort(a1+a2)
self.assertEqual(utils_result, expected_result)
self.assertEqual([], [])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save