Browse Source
* add stair model * add stair model method * add types * fix * add tests for get stair * fix * fix imports * add todo * fixes * get stair indexes to stair model * fixes * remove old methods * use enum * fix get_model_type * remove exception * list(set) -> utils.remove_duplicates * refactor get_stair * fixes * fixes 2 * fixes 3 * todopull/1/head
Alexander Velikiy
5 years ago
committed by
GitHub
14 changed files with 257 additions and 320 deletions
@ -1,8 +1,9 @@
|
||||
from models.model import Model, ModelState, AnalyticSegment |
||||
from models.model import Model, ModelState, AnalyticSegment, ModelType, ExtremumType |
||||
from models.triangle_model import TriangleModel, TriangleModelState |
||||
from models.drop_model import DropModel, DropModelState |
||||
from models.stair_model import StairModel, StairModelState |
||||
from models.drop_model import DropModel |
||||
from models.peak_model import PeakModel |
||||
from models.jump_model import JumpModel, JumpModelState |
||||
from models.jump_model import JumpModel |
||||
from models.custom_model import CustomModel |
||||
from models.trough_model import TroughModel |
||||
from models.general_model import GeneralModel, GeneralModelState |
||||
|
@ -1,122 +1,9 @@
|
||||
from models import Model, ModelState, AnalyticSegment |
||||
from models import StairModel, ModelType, ExtremumType |
||||
|
||||
import scipy.signal |
||||
from scipy.fftpack import fft |
||||
from scipy.signal import argrelextrema |
||||
from scipy.stats import gaussian_kde |
||||
from typing import Optional, List, Tuple |
||||
import utils |
||||
import utils.meta |
||||
import numpy as np |
||||
import pandas as pd |
||||
from analytic_types import AnalyticUnitId, TimeSeries |
||||
from analytic_types.learning_info import LearningInfo |
||||
class DropModel(StairModel): |
||||
|
||||
@utils.meta.JSONClass |
||||
class DropModelState(ModelState): |
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.DROP |
||||
|
||||
def __init__( |
||||
self, |
||||
confidence: float = 0, |
||||
drop_height: float = 0, |
||||
drop_length: float = 0, |
||||
**kwargs |
||||
): |
||||
super().__init__(**kwargs) |
||||
self.confidence = confidence |
||||
self.drop_height = drop_height |
||||
self.drop_length = drop_length |
||||
|
||||
|
||||
class DropModel(Model): |
||||
|
||||
def get_model_type(self) -> (str, bool): |
||||
model = 'drop' |
||||
type_model = False |
||||
return (model, type_model) |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
segment_center_index = utils.find_pattern_center(segment, start, 'drop') |
||||
return segment_center_index |
||||
|
||||
def get_state(self, cache: Optional[dict] = None) -> DropModelState: |
||||
return DropModelState.from_json(cache) |
||||
|
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
window_size = self.state.window_size |
||||
last_pattern_center = self.state.pattern_center |
||||
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) |
||||
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
height_list = learning_info.patterns_value |
||||
|
||||
del_conv_list = [] |
||||
delete_pattern_timestamp = [] |
||||
for segment in deleted_segments: |
||||
segment_cent_index = segment.center_index |
||||
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||
deleted_drop = utils.get_interval(data, segment_cent_index, window_size) |
||||
deleted_drop = utils.subtract_min_without_nan(deleted_drop) |
||||
del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.state.pattern_model) |
||||
if len(del_conv_drop): del_conv_list.append(max(del_conv_drop)) |
||||
|
||||
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) |
||||
self.state.drop_height = int(min(learning_info.pattern_height, default = 1)) |
||||
self.state.drop_length = int(max(learning_info.pattern_width, default = 1)) |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
possible_drops = utils.find_drop(data, self.state.drop_height, self.state.drop_length + 1) |
||||
result = self.__filter_detection(possible_drops, data) |
||||
return [(val - 1, val + 1) for val in result] |
||||
|
||||
def __filter_detection(self, segments: List[int], data: list): |
||||
delete_list = [] |
||||
variance_error = self.state.window_size |
||||
close_patterns = utils.close_filtering(segments, variance_error) |
||||
segments = utils.best_pattern(close_patterns, data, 'min') |
||||
if len(segments) == 0 or len(self.state.pattern_center) == 0: |
||||
segments = [] |
||||
return segments |
||||
pattern_data = self.state.pattern_model |
||||
for segment in segments: |
||||
if segment > self.state.window_size and segment < (len(data) - self.state.window_size): |
||||
convol_data = utils.get_interval(data, segment, self.state.window_size) |
||||
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||
if len(convol_data) == 0 or percent_of_nans > 0.5: |
||||
delete_list.append(segment) |
||||
continue |
||||
elif 0 < percent_of_nans <= 0.5: |
||||
nan_list = utils.find_nan_indexes(convol_data) |
||||
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||
upper_bound = self.state.convolve_max * 1.2 |
||||
lower_bound = self.state.convolve_min * 0.8 |
||||
delete_up_bound = self.state.conv_del_max * 1.02 |
||||
delete_low_bound = self.state.conv_del_min * 0.98 |
||||
try: |
||||
if max(conv) > upper_bound or max(conv) < lower_bound: |
||||
delete_list.append(segment) |
||||
elif max(conv) < delete_up_bound and max(conv) > delete_low_bound: |
||||
delete_list.append(segment) |
||||
except ValueError: |
||||
delete_list.append(segment) |
||||
else: |
||||
delete_list.append(segment) |
||||
|
||||
for item in delete_list: |
||||
segments.remove(item) |
||||
return set(segments) |
||||
def get_extremum_type(self) -> ExtremumType: |
||||
return ExtremumType.MIN |
||||
|
@ -1,124 +1,9 @@
|
||||
from models import Model, ModelState, AnalyticSegment |
||||
from models import StairModel, ModelType, ExtremumType |
||||
|
||||
import utils |
||||
import utils.meta |
||||
import numpy as np |
||||
import pandas as pd |
||||
import scipy.signal |
||||
from scipy.fftpack import fft |
||||
from typing import Optional, List, Tuple |
||||
import math |
||||
from scipy.signal import argrelextrema |
||||
from scipy.stats import gaussian_kde |
||||
from analytic_types import AnalyticUnitId, TimeSeries |
||||
from analytic_types.learning_info import LearningInfo |
||||
class JumpModel(StairModel): |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.JUMP |
||||
|
||||
@utils.meta.JSONClass |
||||
class JumpModelState(ModelState): |
||||
def __init__( |
||||
self, |
||||
confidence: float = 0, |
||||
jump_height: float = 0, |
||||
jump_length: float = 0, |
||||
**kwargs |
||||
): |
||||
super().__init__(**kwargs) |
||||
self.confidence = confidence |
||||
self.jump_height = jump_height |
||||
self.jump_length = jump_length |
||||
|
||||
|
||||
class JumpModel(Model): |
||||
|
||||
def get_model_type(self) -> (str, bool): |
||||
model = 'jump' |
||||
type_model = True |
||||
return (model, type_model) |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
segment_center_index = utils.find_pattern_center(segment, start, 'jump') |
||||
return segment_center_index |
||||
|
||||
def get_state(self, cache: Optional[dict] = None) -> JumpModelState: |
||||
return JumpModelState.from_json(cache) |
||||
|
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
window_size = self.state.window_size |
||||
last_pattern_center = self.state.pattern_center |
||||
self.state.pattern_center = list(set(last_pattern_center + learning_info.segment_center_list)) |
||||
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
height_list = learning_info.patterns_value |
||||
|
||||
del_conv_list = [] |
||||
delete_pattern_timestamp = [] |
||||
for segment in deleted_segments: |
||||
segment_cent_index = segment.center_index |
||||
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||
deleted_jump = utils.get_interval(data, segment_cent_index, window_size) |
||||
deleted_jump = utils.subtract_min_without_nan(deleted_jump) |
||||
del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.state.pattern_model) |
||||
if len(del_conv_jump): del_conv_list.append(max(del_conv_jump)) |
||||
|
||||
self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) |
||||
self.state.jump_height = float(min(learning_info.pattern_height, default = 1)) |
||||
self.state.jump_length = int(max(learning_info.pattern_width, default = 1)) |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
possible_jumps = utils.find_jump(data, self.state.jump_height, self.state.jump_length + 1) |
||||
result = self.__filter_detection(possible_jumps, data) |
||||
return [(val - 1, val + 1) for val in result] |
||||
|
||||
def __filter_detection(self, segments: List[int], data: pd.Series): |
||||
delete_list = [] |
||||
variance_error = self.state.window_size |
||||
close_patterns = utils.close_filtering(segments, variance_error) |
||||
segments = utils.best_pattern(close_patterns, data, 'max') |
||||
|
||||
if len(segments) == 0 or len(self.state.pattern_center) == 0: |
||||
segments = [] |
||||
return segments |
||||
pattern_data = self.state.pattern_model |
||||
upper_bound = self.state.convolve_max * 1.2 |
||||
lower_bound = self.state.convolve_min * 0.8 |
||||
delete_up_bound = self.state.conv_del_max * 1.02 |
||||
delete_low_bound = self.state.conv_del_min * 0.98 |
||||
for segment in segments: |
||||
if segment > self.state.window_size and segment < (len(data) - self.state.window_size): |
||||
convol_data = utils.get_interval(data, segment, self.state.window_size) |
||||
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||
if len(convol_data) == 0 or percent_of_nans > 0.5: |
||||
delete_list.append(segment) |
||||
continue |
||||
elif 0 < percent_of_nans <= 0.5: |
||||
nan_list = utils.find_nan_indexes(convol_data) |
||||
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||
try: |
||||
if max(conv) > upper_bound or max(conv) < lower_bound: |
||||
delete_list.append(segment) |
||||
elif max(conv) < delete_up_bound and max(conv) > delete_low_bound: |
||||
delete_list.append(segment) |
||||
except ValueError: |
||||
delete_list.append(segment) |
||||
else: |
||||
delete_list.append(segment) |
||||
for item in delete_list: |
||||
segments.remove(item) |
||||
|
||||
return set(segments) |
||||
def get_extremum_type(self) -> ExtremumType: |
||||
return ExtremumType.MAX |
||||
|
@ -0,0 +1,147 @@
|
||||
from models import Model, ModelState, AnalyticSegment, ModelType |
||||
|
||||
from analytic_types import TimeSeries |
||||
from analytic_types.learning_info import LearningInfo |
||||
|
||||
from scipy.fftpack import fft |
||||
from typing import Optional, List |
||||
from enum import Enum |
||||
import scipy.signal |
||||
import utils |
||||
import utils.meta |
||||
import pandas as pd |
||||
import numpy as np |
||||
import operator |
||||
|
||||
POSITIVE_SEGMENT_MEASUREMENT_ERROR = 0.2 |
||||
NEGATIVE_SEGMENT_MEASUREMENT_ERROR = 0.02 |
||||
|
||||
@utils.meta.JSONClass |
||||
class StairModelState(ModelState): |
||||
|
||||
def __init__( |
||||
self, |
||||
confidence: float = 0, |
||||
stair_height: float = 0, |
||||
stair_length: float = 0, |
||||
**kwargs |
||||
): |
||||
super().__init__(**kwargs) |
||||
self.confidence = confidence |
||||
self.stair_height = stair_height |
||||
self.stair_length = stair_length |
||||
|
||||
|
||||
class StairModel(Model): |
||||
|
||||
def get_state(self, cache: Optional[dict] = None) -> StairModelState: |
||||
return StairModelState.from_json(cache) |
||||
|
||||
def get_stair_indexes(self, data: pd.Series, height: float, length: int) -> List[int]: |
||||
"""Get list of start stair segment indexes. |
||||
|
||||
Keyword arguments: |
||||
data -- data, that contains stair (jump or drop) segments |
||||
length -- maximum count of values in the stair |
||||
height -- the difference between stair max_line and min_line(see utils.find_parameters) |
||||
""" |
||||
indexes = [] |
||||
for i in range(len(data) - length - 1): |
||||
is_stair = self.is_stair_in_segment(data.values[i:i + length + 1], height) |
||||
if is_stair == True: |
||||
indexes.append(i) |
||||
return indexes |
||||
|
||||
def is_stair_in_segment(self, segment: np.ndarray, height: float) -> bool: |
||||
if len(segment) < 2: |
||||
return False |
||||
comparison_operator = operator.ge |
||||
if self.get_model_type() == ModelType.DROP: |
||||
comparison_operator = operator.le |
||||
height = -height |
||||
return comparison_operator(max(segment[1:]), segment[0] + height) |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
segment_center_index = utils.find_pattern_center(segment, start, self.get_model_type().value) |
||||
return segment_center_index |
||||
|
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
window_size = self.state.window_size |
||||
last_pattern_center = self.state.pattern_center |
||||
self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) |
||||
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
height_list = learning_info.patterns_value |
||||
|
||||
del_conv_list = [] |
||||
delete_pattern_timestamp = [] |
||||
for segment in deleted_segments: |
||||
segment_cent_index = segment.center_index |
||||
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||
deleted_stair = utils.get_interval(data, segment_cent_index, window_size) |
||||
deleted_stair = utils.subtract_min_without_nan(deleted_stair) |
||||
del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) |
||||
if len(del_conv_stair) > 0: |
||||
del_conv_list.append(max(del_conv_stair)) |
||||
|
||||
self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) |
||||
self.state.stair_height = int(min(learning_info.pattern_height, default = 1)) |
||||
self.state.stair_length = int(max(learning_info.pattern_width, default = 1)) |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
possible_stairs = self.get_stair_indexes(data, self.state.stair_height, self.state.stair_length + 1) |
||||
result = self.__filter_detection(possible_stairs, data) |
||||
return [(val - 1, val + 1) for val in result] |
||||
|
||||
def __filter_detection(self, segments_indexes: List[int], data: list): |
||||
delete_list = [] |
||||
variance_error = self.state.window_size |
||||
close_segments = utils.close_filtering(segments_indexes, variance_error) |
||||
segments_indexes = utils.best_pattern(close_segments, data, self.get_extremum_type().value) |
||||
if len(segments_indexes) == 0 or len(self.state.pattern_center) == 0: |
||||
return [] |
||||
pattern_data = self.state.pattern_model |
||||
for segment_index in segments_indexes: |
||||
if segment_index <= self.state.window_size or segment_index >= (len(data) - self.state.window_size): |
||||
delete_list.append(segment_index) |
||||
continue |
||||
convol_data = utils.get_interval(data, segment_index, self.state.window_size) |
||||
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||
if len(convol_data) == 0 or percent_of_nans > 0.5: |
||||
delete_list.append(segment_index) |
||||
continue |
||||
elif 0 < percent_of_nans <= 0.5: |
||||
nan_list = utils.find_nan_indexes(convol_data) |
||||
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||
if len(conv) == 0: |
||||
delete_list.append(segment_index) |
||||
continue |
||||
upper_bound = self.state.convolve_max * (1 + POSITIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
lower_bound = self.state.convolve_min * (1 - POSITIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
delete_up_bound = self.state.conv_del_max * (1 + NEGATIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
delete_low_bound = self.state.conv_del_min * (1 - NEGATIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
max_conv = max(conv) |
||||
if max_conv > upper_bound or max_conv < lower_bound: |
||||
delete_list.append(segment_index) |
||||
elif max_conv < delete_up_bound and max_conv > delete_low_bound: |
||||
delete_list.append(segment_index) |
||||
|
||||
for item in delete_list: |
||||
segments_indexes.remove(item) |
||||
segments_indexes = utils.remove_duplicates_and_sort(segments_indexes) |
||||
return segments_indexes |
@ -0,0 +1,43 @@
|
||||
import unittest |
||||
import pandas as pd |
||||
import numpy as np |
||||
import models |
||||
|
||||
class TestModel(unittest.TestCase): |
||||
|
||||
def test_stair_model_get_indexes(self): |
||||
drop_model = models.DropModel() |
||||
jump_model = models.JumpModel() |
||||
drop_data = pd.Series([4, 4, 4, 1, 1, 1, 5, 5, 2, 2, 2]) |
||||
jump_data = pd.Series([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5]) |
||||
jump_data_one_stair = pd.Series([1, 3, 3]) |
||||
drop_data_one_stair = pd.Series([4, 2, 1]) |
||||
height = 2 |
||||
length = 2 |
||||
expected_result = [2, 7] |
||||
drop_model_result = drop_model.get_stair_indexes(drop_data, height, length) |
||||
jump_model_result = jump_model.get_stair_indexes(jump_data, height, length) |
||||
drop_one_stair_result = drop_model.get_stair_indexes(drop_data_one_stair, height, 1) |
||||
jump_one_stair_result = jump_model.get_stair_indexes(jump_data_one_stair, height, 1) |
||||
for val in expected_result: |
||||
self.assertIn(val, drop_model_result) |
||||
self.assertIn(val, jump_model_result) |
||||
self.assertEqual(0, drop_one_stair_result[0]) |
||||
self.assertEqual(0, jump_one_stair_result[0]) |
||||
|
||||
def test_stair_model_get_indexes_corner_cases(self): |
||||
drop_model = models.DropModel() |
||||
jump_model = models.JumpModel() |
||||
empty_data = pd.Series([]) |
||||
nan_data = pd.Series([np.nan, np.nan, np.nan, np.nan]) |
||||
height, length = 2, 2 |
||||
length_zero, height_zero = 0, 0 |
||||
expected_result = [] |
||||
drop_empty_data_result = drop_model.get_stair_indexes(empty_data, height, length) |
||||
drop_nan_data_result = drop_model.get_stair_indexes(nan_data, height_zero, length_zero) |
||||
jump_empty_data_result = jump_model.get_stair_indexes(empty_data, height, length) |
||||
jump_nan_data_result = jump_model.get_stair_indexes(nan_data, height_zero, length_zero) |
||||
self.assertEqual(drop_empty_data_result, expected_result) |
||||
self.assertEqual(drop_nan_data_result, expected_result) |
||||
self.assertEqual(jump_empty_data_result, expected_result) |
||||
self.assertEqual(jump_nan_data_result, expected_result) |
Loading…
Reference in new issue