From 4c2efac97e7d8454582b82b6bb8b557344b9a1ce Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Tue, 19 Mar 2019 17:47:54 +0300 Subject: [PATCH] Keyerror pattern model #471 (#476) --- .../analytics/detectors/pattern_detector.py | 2 ++ analytics/analytics/models/general_model.py | 13 ++++++------ analytics/analytics/models/model.py | 21 ++++++++++++++----- analytics/tests/test_dataset.py | 2 +- server/src/services/data_puller.ts | 4 +++- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/analytics/analytics/detectors/pattern_detector.py b/analytics/analytics/detectors/pattern_detector.py index bdbc560..af895ae 100644 --- a/analytics/analytics/detectors/pattern_detector.py +++ b/analytics/analytics/detectors/pattern_detector.py @@ -43,6 +43,8 @@ class PatternDetector(Detector): def train(self, dataframe: pd.DataFrame, segments: list, cache: Optional[models.ModelCache]) -> models.ModelCache: # TODO: pass only part of dataframe that has segments new_cache = self.model.fit(dataframe, segments, cache) + if new_cache == None or len(new_cache) == 0: + logging.warning('new_cache is empty with data: {}, segments: {}, cache: {}'.format(dataframe, segments, cache)) return { 'cache': new_cache } diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index 82c5b93..11a1b18 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -65,23 +65,24 @@ class GeneralModel(Model): def do_detect(self, dataframe: pd.DataFrame) -> list: data = utils.cut_dataframe(dataframe) data = data['value'] - pat_data = self.state['pattern_model'] + pat_data = self.state.get('pattern_model', []) if pat_data.count(0) == len(pat_data): raise ValueError('Labeled patterns must not be empty') self.all_conv = [] self.all_corr = [] - for i in range(self.state['WINDOW_SIZE'], len(data) - self.state['WINDOW_SIZE']): - watch_data = data[i - self.state['WINDOW_SIZE']: i + self.state['WINDOW_SIZE'] + 1] + window_size = self.state.get('WINDOW_SIZE', 0) + for i in range(window_size, len(data) - window_size): + watch_data = data[i - window_size: i + window_size + 1] watch_data = utils.subtract_min_without_nan(watch_data) conv = scipy.signal.fftconvolve(watch_data, pat_data) correlation = pearsonr(watch_data, pat_data) self.all_corr.append(correlation[0]) self.all_conv.append(max(conv)) - all_conv_peaks = utils.peak_finder(self.all_conv, self.state['WINDOW_SIZE'] * 2) - all_corr_peaks = utils.peak_finder(self.all_corr, self.state['WINDOW_SIZE'] * 2) + all_conv_peaks = utils.peak_finder(self.all_conv, window_size * 2) + all_corr_peaks = utils.peak_finder(self.all_corr, window_size * 2) filtered = self.__filter_detection(all_corr_peaks, data) - return set(item + self.state['WINDOW_SIZE'] for item in filtered) + return set(item + window_size for item in filtered) def __filter_detection(self, segments: list, data: list): if len(segments) == 0 or len(self.state.get('pattern_center', [])) == 0: diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index 08cf5be..2d15591 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -5,6 +5,7 @@ from attrdict import AttrDict from typing import Optional import pandas as pd import math +import logging ModelCache = dict @@ -64,7 +65,7 @@ class Model(ABC): def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[ModelCache]) -> ModelCache: data = dataframe['value'] - if type(cache) is ModelCache and cache: + if cache != None and len(cache) > 0: self.state = cache max_length = 0 labeled = [] @@ -84,21 +85,31 @@ class Model(ABC): model, model_type = self.get_model_type() learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, model, model_type) self.do_fit(dataframe, labeled, deleted, learning_info) + logging.debug('fit complete successful with self.state: {}'.format(self.state)) return self.state def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict: - if type(cache) is ModelCache: + #If cache is None or empty dict - default parameters will be used instead + if cache != None and len(cache) > 0: self.state = cache - + else: + logging.debug('get empty cache in detect') + if not self.state: + logging.warning('self.state is empty - skip do_detect') + return { + 'segments': [], + 'cache': {}, + } result = self.do_detect(dataframe) segments = [( utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x - 1]), utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x + 1]) ) for x in result] - + if not self.state: + logging.warning('return empty self.state after detect') return { 'segments': segments, - 'cache': self.state + 'cache': self.state, } def _update_fiting_result(self, state: dict, confidences: list, convolve_list: list, del_conv_list: list, height_list: list) -> None: diff --git a/analytics/tests/test_dataset.py b/analytics/tests/test_dataset.py index 1b9a1d0..6d694eb 100644 --- a/analytics/tests/test_dataset.py +++ b/analytics/tests/test_dataset.py @@ -309,7 +309,7 @@ class TestDataset(unittest.TestCase): 'conv_del_min': 0, 'conv_del_max': 0, } - ws = random.randint(0, int(len(data['value']/2))) + ws = random.randint(1, int(len(data['value']/2))) pattern_model = create_random_model(ws) convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) confidence = 0.2 * (data['value'].max() - data['value'].min()) diff --git a/server/src/services/data_puller.ts b/server/src/services/data_puller.ts index ee9b535..2f6724c 100644 --- a/server/src/services/data_puller.ts +++ b/server/src/services/data_puller.ts @@ -82,7 +82,9 @@ export class DataPuller { throw Error(`data puller can't push unit: ${unit} data: ${data}`); } let task = new AnalyticsTask(unit.id, AnalyticsTaskType.PUSH, data); - + if(_.isEmpty(data.cache)) { + console.log('push empty cache to analytics') + } try { this.analyticsService.sendTask(task); let fromTime = new Date(data.from).toLocaleTimeString();