From d399d0b84a6ab79f16d795dcf87a3c3772f100ae Mon Sep 17 00:00:00 2001 From: Evgeny Smyshlyaev Date: Wed, 23 Jan 2019 22:04:41 +0300 Subject: [PATCH] Remove duplicates from models #357 (#363) refactoring --- analytics/.vscode/launch.json | 8 +- analytics/.vscode/settings.json | 5 +- analytics/analytics/models/custom_model.py | 2 +- analytics/analytics/models/drop_model.py | 82 ++++++--------------- analytics/analytics/models/general_model.py | 49 +++--------- analytics/analytics/models/jump_model.py | 81 ++++++-------------- analytics/analytics/models/model.py | 72 ++++++++++++------ analytics/analytics/models/peak_model.py | 73 ++++++------------ analytics/analytics/models/trough_model.py | 76 ++++++------------- analytics/analytics/utils/common.py | 7 +- analytics/requirements.txt | 1 + 11 files changed, 165 insertions(+), 291 deletions(-) diff --git a/analytics/.vscode/launch.json b/analytics/.vscode/launch.json index 49615ea..c846f44 100644 --- a/analytics/.vscode/launch.json +++ b/analytics/.vscode/launch.json @@ -8,7 +8,13 @@ "name": "Python: Current File", "type": "python", "request": "launch", - "program": "${workspaceFolder}\\bin\\server" + "program": "${workspaceFolder}\\bin\\server", + "windows": { + "program": "${workspaceFolder}\\bin\\server" + }, + "linux": { + "program": "${workspaceFolder}/bin/server" + } } ] } diff --git a/analytics/.vscode/settings.json b/analytics/.vscode/settings.json index ea1baa9..67cf3df 100644 --- a/analytics/.vscode/settings.json +++ b/analytics/.vscode/settings.json @@ -1,6 +1,6 @@ { "python.pythonPath": "python", - "python.linting.enabled": false, + "python.linting.enabled": true, "terminal.integrated.shell.windows": "C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", "editor.tabSize": 4, "editor.insertSpaces": true, @@ -13,5 +13,6 @@ ], "python.unitTest.pyTestEnabled": false, "python.unitTest.nosetestsEnabled": false, -"python.unitTest.unittestEnabled": true +"python.unitTest.unittestEnabled": true, +"python.linting.pylintEnabled": true } diff --git a/analytics/analytics/models/custom_model.py b/analytics/analytics/models/custom_model.py index 485d750..20f7852 100644 --- a/analytics/analytics/models/custom_model.py +++ b/analytics/analytics/models/custom_model.py @@ -4,7 +4,7 @@ import pandas as pd class CustomModel(Model): - def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: pass def do_detect(self, dataframe: pd.DataFrame) -> list: diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 0b8f1c2..1a5e34b 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -27,7 +27,7 @@ class DropModel(Model): 'conv_del_max': 55000, } - def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] @@ -35,70 +35,32 @@ class DropModel(Model): drop_height_list = [] drop_length_list = [] patterns_list = [] - for segment in segments: - if segment['labeled']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - confidence = utils.find_confidence(segment_data) - confidences.append(confidence) - segment_cent_index, drop_height, drop_length = utils.find_parameters(segment_data, segment_from_index, 'drop') - drop_height_list.append(drop_height) - drop_length_list.append(drop_length) - self.idrops.append(segment_cent_index) - labeled_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) - labeled_drop = utils.subtract_min_without_nan(labeled_drop) - patterns_list.append(labeled_drop) + + for segment in labeled_segments: + confidence = utils.find_confidence(segment.data) + confidences.append(confidence) + segment_cent_index, drop_height, drop_length = utils.find_parameters(segment.data, segment.start, 'drop') + drop_height_list.append(drop_height) + drop_length_list.append(drop_length) + self.idrops.append(segment_cent_index) + labeled_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + labeled_drop = utils.subtract_min_without_nan(labeled_drop) + patterns_list.append(labeled_drop) self.model_drop = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE']) del_conv_list = [] - for segment in segments: - if segment['deleted']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - segment_cent_index = utils.find_parameters(segment_data, segment_from_index, 'drop')[0] - deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) - deleted_drop = utils.subtract_min_without_nan(deleted_drop) - del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop) - del_conv_list.append(max(del_conv_drop)) - - if len(confidences) > 0: - self.state['confidence'] = float(min(confidences)) - else: - self.state['confidence'] = 1.5 - - if len(convolve_list) > 0: - self.state['convolve_max'] = float(max(convolve_list)) - else: - self.state['convolve_max'] = self.state['WINDOW_SIZE'] - - if len(convolve_list) > 0: - self.state['convolve_min'] = float(min(convolve_list)) - else: - self.state['convolve_min'] = self.state['WINDOW_SIZE'] - - if len(drop_height_list) > 0: - self.state['DROP_HEIGHT'] = int(min(drop_height_list)) - else: - self.state['DROP_HEIGHT'] = 1 - - if len(drop_length_list) > 0: - self.state['DROP_LENGTH'] = int(max(drop_length_list)) - else: - self.state['DROP_LENGTH'] = 1 - - if len(del_conv_list) > 0: - self.state['conv_del_min'] = float(min(del_conv_list)) - else: - self.state['conv_del_min'] = self.state['WINDOW_SIZE'] - - if len(del_conv_list) > 0: - self.state['conv_del_max'] = float(max(del_conv_list)) - else: - self.state['conv_del_max'] = self.state['WINDOW_SIZE'] + for segment in deleted_segments: + segment_cent_index = utils.find_parameters(segment.data, segment.start, 'drop')[0] + deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + deleted_drop = utils.subtract_min_without_nan(deleted_drop) + del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop) + if len(del_conv_drop): del_conv_list.append(max(del_conv_drop)) + + self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list) + self.state['DROP_HEIGHT'] = int(min(drop_height_list, default = 1)) + self.state['DROP_LENGTH'] = int(max(drop_length_list, default = 1)) def do_detect(self, dataframe: pd.DataFrame) -> list: data = utils.cut_dataframe(dataframe) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index cdc94d1..e6fef80 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -27,17 +27,13 @@ class GeneralModel(Model): } self.all_conv = [] - def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] convolve_list = [] patterns_list = [] - for segment in segments: - if segment['labeled']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - center_ind = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) + for segment in labeled_segments: + center_ind = segment.start + math.ceil(segment.length / 2) self.ipats.append(center_ind) segment_data = utils.get_interval(data, center_ind, self.state['WINDOW_SIZE']) segment_data = utils.subtract_min_without_nan(segment_data) @@ -47,36 +43,15 @@ class GeneralModel(Model): convolve_list = utils.get_convolve(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE']) del_conv_list = [] - for segment in segments: - if segment['deleted']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - del_mid_index = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) - deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) - deleted_pat = utils.subtract_min_without_nan(segment_data) - del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen) - del_conv_list.append(max(del_conv_pat)) - - if len(convolve_list) > 0: - self.state['convolve_max'] = float(max(convolve_list)) - else: - self.state['convolve_max'] = self.state['WINDOW_SIZE'] / 3 - - if len(convolve_list) > 0: - self.state['convolve_min'] = float(min(convolve_list)) - else: - self.state['convolve_min'] = self.state['WINDOW_SIZE'] / 3 - - if len(del_conv_list) > 0: - self.state['conv_del_min'] = float(min(del_conv_list)) - else: - self.state['conv_del_min'] = self.state['WINDOW_SIZE'] - - if len(del_conv_list) > 0: - self.state['conv_del_max'] = float(max(del_conv_list)) - else: - self.state['conv_del_max'] = self.state['WINDOW_SIZE'] + for segment in deleted_segments: + del_mid_index = segment.start + math.ceil(segment.length / 2) + deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) + deleted_pat = utils.subtract_min_without_nan(deleted_pat) + del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen) + if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) + + self.state['convolve_min'], self.state['convolve_max'] = utils.get_min_max(convolve_list, self.state['WINDOW_SIZE'] / 3) + self.state['conv_del_min'], self.state['conv_del_max'] = utils.get_min_max(del_conv_list, self.state['WINDOW_SIZE']) def do_detect(self, dataframe: pd.DataFrame) -> list: data = utils.cut_dataframe(dataframe) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index a666fc9..8a6207e 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -28,7 +28,7 @@ class JumpModel(Model): 'conv_del_max': 55000, } - def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] @@ -36,70 +36,31 @@ class JumpModel(Model): jump_height_list = [] jump_length_list = [] patterns_list = [] - for segment in segments: - if segment['labeled']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - confidence = utils.find_confidence(segment_data) - confidences.append(confidence) - segment_cent_index, jump_height, jump_length = utils.find_parameters(segment_data, segment_from_index, 'jump') - jump_height_list.append(jump_height) - jump_length_list.append(jump_length) - self.ijumps.append(segment_cent_index) - labeled_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) - labeled_jump = utils.subtract_min_without_nan(labeled_jump) - patterns_list.append(labeled_jump) + for segment in labeled_segments: + confidence = utils.find_confidence(segment.data) + confidences.append(confidence) + segment_cent_index, jump_height, jump_length = utils.find_parameters(segment.data, segment.start, 'jump') + jump_height_list.append(jump_height) + jump_length_list.append(jump_length) + self.ijumps.append(segment_cent_index) + labeled_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + labeled_jump = utils.subtract_min_without_nan(labeled_jump) + patterns_list.append(labeled_jump) self.model_jump = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE']) del_conv_list = [] - for segment in segments: - if segment['deleted']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - segment_cent_index = utils.find_parameters(segment_data, segment_from_index, 'jump')[0] - deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) - deleted_jump = utils.subtract_min_without_nan(labeled_jump) - del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump) - del_conv_list.append(max(del_conv_jump)) - - if len(confidences) > 0: - self.state['confidence'] = float(min(confidences)) - else: - self.state['confidence'] = 1.5 - - if len(convolve_list) > 0: - self.state['convolve_max'] = float(max(convolve_list)) - else: - self.state['convolve_max'] = self.state['WINDOW_SIZE'] - - if len(convolve_list) > 0: - self.state['convolve_min'] = float(min(convolve_list)) - else: - self.state['convolve_min'] = self.state['WINDOW_SIZE'] - - if len(jump_height_list) > 0: - self.state['JUMP_HEIGHT'] = float(min(jump_height_list)) - else: - self.state['JUMP_HEIGHT'] = 1 - - if len(jump_length_list) > 0: - self.state['JUMP_LENGTH'] = int(max(jump_length_list)) - else: - self.state['JUMP_LENGTH'] = 1 - - if len(del_conv_list) > 0: - self.state['conv_del_min'] = float(min(del_conv_list)) - else: - self.state['conv_del_min'] = self.state['WINDOW_SIZE'] - - if len(del_conv_list) > 0: - self.state['conv_del_max'] = float(max(del_conv_list)) - else: - self.state['conv_del_max'] = self.state['WINDOW_SIZE'] + for segment in deleted_segments: + segment_cent_index = utils.find_parameters(segment.data, segment.start, 'jump')[0] + deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + deleted_jump = utils.subtract_min_without_nan(deleted_jump) + del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump) + if len(del_conv_jump): del_conv_list.append(max(del_conv_jump)) + + self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list) + self.state['JUMP_HEIGHT'] = float(min(jump_height_list, default = 1)) + self.state['JUMP_LENGTH'] = int(max(jump_length_list, default = 1)) def do_detect(self, dataframe: pd.DataFrame) -> list: data = utils.cut_dataframe(dataframe) diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index 6d57cda..9e8951a 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -1,12 +1,37 @@ import utils from abc import ABC, abstractmethod +from attrdict import AttrDict from typing import Optional import pandas as pd import math ModelCache = dict +class Segment(AttrDict): + + __percent_of_nans = 0 + + def __init__(self, dataframe: pd.DataFrame, segment_map: dict): + self.update(segment_map) + self.start = utils.timestamp_to_index(dataframe, pd.to_datetime(self['from'], unit='ms')) + self.end = utils.timestamp_to_index(dataframe, pd.to_datetime(self['to'], unit='ms')) + self.length = abs(self.end - self.start) + + assert len(dataframe['value']) >= self.end + 1, \ + 'segment {}-{} out of dataframe length={}'.format(self.start, self.end+1, len(dataframe['value'])) + + self.data = dataframe['value'][self.start: self.end + 1] + + @property + def percent_of_nans(self): + if not self.__percent_of_nans: + self.__percent_of_nans = self.data.isnull().sum() / len(self.data) + return self.__percent_of_nans + + def convert_nan_to_zero(self): + nan_list = utils.find_nan_indexes(self.data) + self.data = utils.nan_to_zero(self.data, nan_list) class Model(ABC): @@ -22,31 +47,24 @@ class Model(ABC): if type(cache) is ModelCache: self.state = cache - self.segments = segments - segment_length_list = [] - filtered_segments = [] - for segment in self.segments: - if segment['labeled'] or segment['deleted']: - parse_segment_dict = utils.parse_segment(segment, dataframe) - segment_from_index = parse_segment_dict.get('from') - segment_to_index = parse_segment_dict.get('to') - segment_data = parse_segment_dict.get('data') - percent_of_nans = segment_data.isnull().sum() / len(segment_data) - if percent_of_nans > 0.1 or len(segment_data) == 0: + max_length = 0 + labeled = [] + deleted = [] + for segment_map in segments: + if segment_map['labeled'] or segment_map['deleted']: + segment = Segment(dataframe, segment_map) + if segment.percent_of_nans > 0.1 or len(segment.data) == 0: continue - if percent_of_nans > 0: - nan_list = utils.find_nan_indexes(segment_data) - segment_data = utils.nan_to_zero(segment_data, nan_list) - segment.update({'from': segment_from_index, 'to': segment_to_index, 'data': segment_data}) - segment_length = abs(segment_to_index - segment_from_index) - segment_length_list.append(segment_length) - filtered_segments.append(segment) + if segment.percent_of_nans > 0: + segment.convert_nan_to_zero() + + max_length = max(segment.length, max_length) + if segment.labeled: labeled.append(segment) + if segment.deleted: deleted.append(segment) - if len(segment_length_list) > 0: - self.state['WINDOW_SIZE'] = math.ceil(max(segment_length_list) / 2) - else: - self.state['WINDOW_SIZE'] = 0 - self.do_fit(dataframe, filtered_segments) + + self.state['WINDOW_SIZE'] = math.ceil(max_length / 2) if max_length else 0 + self.do_fit(dataframe, labeled, deleted) return self.state def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict: @@ -64,3 +82,11 @@ class Model(ABC): 'segments': segments, 'cache': self.state } + + def _update_fiting_result(self, state: dict, confidences: list, convolve_list: list, del_conv_list: list) -> None: + if type(state) is dict: + state['confidence'] = float(min(confidences, default = 1.5)) + state['convolve_min'], state['convolve_max'] = utils.get_min_max(convolve_list, state['WINDOW_SIZE']) + state['conv_del_min'], state['conv_del_max'] = utils.get_min_max(del_conv_list, state['WINDOW_SIZE']) + else: + raise ValueError('got non-dict as state for update fiting result: {}'.format(state)) diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 99b5af8..30fc17d 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -17,7 +17,7 @@ class PeakModel(Model): super() self.segments = [] self.ipeaks = [] - self.model_peak = [] + self.model = [] self.state = { 'confidence': 1.5, 'convolve_max': 570000, @@ -27,64 +27,33 @@ class PeakModel(Model): 'conv_del_max': 55000, } - def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] convolve_list = [] patterns_list = [] - for segment in segments: - if segment['labeled']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - confidence = utils.find_confidence(segment_data) - confidences.append(confidence) - segment_max_index = segment_data.idxmax() - self.ipeaks.append(segment_max_index) - labeled_peak = utils.get_interval(data, segment_max_index, self.state['WINDOW_SIZE']) - labeled_peak = utils.subtract_min_without_nan(labeled_peak) - patterns_list.append(labeled_peak) - - self.model_peak = utils.get_av_model(patterns_list) - convolve_list = utils.get_convolve(self.ipeaks, self.model_peak, data, self.state['WINDOW_SIZE']) + for segment in labeled_segments: + confidence = utils.find_confidence(segment.data) + confidences.append(confidence) + segment_max_index = segment.data.idxmax() + self.ipeaks.append(segment_max_index) + labeled = utils.get_interval(data, segment_max_index, self.state['WINDOW_SIZE']) + labeled = utils.subtract_min_without_nan(labeled) + patterns_list.append(labeled) + + self.model = utils.get_av_model(patterns_list) + convolve_list = utils.get_convolve(self.ipeaks, self.model, data, self.state['WINDOW_SIZE']) del_conv_list = [] - for segment in segments: - if segment['deleted']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - del_max_index = segment_data.idxmax() - deleted_peak = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE']) - deleted_peak = utils.subtract_min_without_nan(deleted_peak) - del_conv_peak = scipy.signal.fftconvolve(deleted_peak, self.model_peak) - del_conv_list.append(max(del_conv_peak)) - - if len(confidences) > 0: - self.state['confidence'] = float(min(confidences)) - else: - self.state['confidence'] = 1.5 - - if len(convolve_list) > 0: - self.state['convolve_max'] = float(max(convolve_list)) - else: - self.state['convolve_max'] = self.state['WINDOW_SIZE'] - - if len(convolve_list) > 0: - self.state['convolve_min'] = float(min(convolve_list)) - else: - self.state['convolve_min'] = self.state['WINDOW_SIZE'] - - if len(del_conv_list) > 0: - self.state['conv_del_min'] = float(min(del_conv_list)) - else: - self.state['conv_del_min'] = self.state['WINDOW_SIZE'] + for segment in deleted_segments: + del_max_index = segment.data.idxmax() + deleted = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE']) + deleted = utils.subtract_min_without_nan(deleted) + del_conv = scipy.signal.fftconvolve(deleted, self.model) + if len(del_conv): del_conv_list.append(max(del_conv)) - if len(del_conv_list) > 0: - self.state['conv_del_max'] = float(max(del_conv_list)) - else: - self.state['conv_del_max'] = self.state['WINDOW_SIZE'] + self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list) def do_detect(self, dataframe: pd.DataFrame): data = utils.cut_dataframe(dataframe) @@ -111,7 +80,7 @@ class PeakModel(Model): if len(segments) == 0 or len(self.ipeaks) == 0: return [] - pattern_data = self.model_peak + pattern_data = self.model for segment in segments: if segment > self.state['WINDOW_SIZE']: convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index 4e9639d..0b89591 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -17,7 +17,7 @@ class TroughModel(Model): super() self.segments = [] self.itroughs = [] - self.model_trough = [] + self.model = [] self.state = { 'confidence': 1.5, 'convolve_max': 570000, @@ -27,64 +27,34 @@ class TroughModel(Model): 'conv_del_max': 55000, } - def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None: + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] convolve_list = [] patterns_list = [] - for segment in segments: - if segment['labeled']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - confidence = utils.find_confidence(segment_data) - confidences.append(confidence) - segment_min_index = segment_data.idxmin() - self.itroughs.append(segment_min_index) - labeled_trough = utils.get_interval(data, segment_min_index, self.state['WINDOW_SIZE']) - labeled_trough = utils.subtract_min_without_nan(labeled_trough) - patterns_list.append(labeled_trough) - - self.model_trough = utils.get_av_model(patterns_list) - convolve_list = utils.get_convolve(self.itroughs, self.model_trough, data, self.state['WINDOW_SIZE']) + + for segment in labeled_segments: + confidence = utils.find_confidence(segment.data) + confidences.append(confidence) + segment_min_index = segment.data.idxmin() + self.itroughs.append(segment_min_index) + labeled = utils.get_interval(data, segment_min_index, self.state['WINDOW_SIZE']) + labeled = utils.subtract_min_without_nan(labeled) + patterns_list.append(labeled) + + self.model = utils.get_av_model(patterns_list) + convolve_list = utils.get_convolve(self.itroughs, self.model, data, self.state['WINDOW_SIZE']) del_conv_list = [] - for segment in segments: - if segment['deleted']: - segment_from_index = segment.get('from') - segment_to_index = segment.get('to') - segment_data = segment.get('data') - del_min_index = segment_data.idxmin() - deleted_trough = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE']) - deleted_trough = utils.subtract_min_without_nan(deleted_trough) - del_conv_trough = scipy.signal.fftconvolve(deleted_trough, self.model_trough) - del_conv_list.append(max(del_conv_trough)) - - if len(confidences) > 0: - self.state['confidence'] = float(min(confidences)) - else: - self.state['confidence'] = 1.5 - - if len(convolve_list) > 0: - self.state['convolve_max'] = float(max(convolve_list)) - else: - self.state['convolve_max'] = self.state['WINDOW_SIZE'] - - if len(convolve_list) > 0: - self.state['convolve_min'] = float(min(convolve_list)) - else: - self.state['convolve_min'] = self.state['WINDOW_SIZE'] - - if len(del_conv_list) > 0: - self.state['conv_del_min'] = float(min(del_conv_list)) - else: - self.state['conv_del_min'] = self.state['WINDOW_SIZE'] - - if len(del_conv_list) > 0: - self.state['conv_del_max'] = float(max(del_conv_list)) - else: - self.state['conv_del_max'] = self.state['WINDOW_SIZE'] + for segment in deleted_segments: + del_min_index = segment.data.idxmin() + deleted = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE']) + deleted = utils.subtract_min_without_nan(deleted) + del_conv = scipy.signal.fftconvolve(deleted, self.model) + if len(del_conv): del_conv_list.append(max(del_conv)) + + self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list) def do_detect(self, dataframe: pd.DataFrame): data = utils.cut_dataframe(dataframe) @@ -111,7 +81,7 @@ class TroughModel(Model): if len(segments) == 0 or len(self.itroughs) == 0 : segments = [] return segments - pattern_data = self.model_trough + pattern_data = self.model for segment in segments: if segment > self.state['WINDOW_SIZE']: convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 8ab1b26..33c97b7 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -37,11 +37,11 @@ def segments_box(segments): max_time = pd.to_datetime(max_time, unit='ms') return min_time, max_time -def find_pattern(data: pd.Series, height: float, lenght: int, pattern_type: str) -> list: +def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) -> list: pattern_list = [] right_bound = len(data) - length - 1 for i in range(right_bound): - for x in range(1, lenght): + for x in range(1, length): if pattern_type == 'jump': if(data[i + x] > data[i] + height): pattern_list.append(i) @@ -278,3 +278,6 @@ def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: if not np.isnan(data_min) and data_min > 0: data['value'] = data['value'] - data_min return data + +def get_min_max(array, default): + return float(min(array, default=default)), float(max(array, default=default)) diff --git a/analytics/requirements.txt b/analytics/requirements.txt index 4fa84a6..49de724 100644 --- a/analytics/requirements.txt +++ b/analytics/requirements.txt @@ -1,4 +1,5 @@ altgraph==0.15 +attrdict==2.0.0 future==0.16.0 macholib==1.9 numpy==1.14.5