from analytic_types import AnalyticUnitId from models import Model, ModelState, AnalyticSegment, ModelType from typing import Union, List, Generator import utils import utils.meta import numpy as np import pandas as pd import scipy.signal from scipy.fftpack import fft from scipy.signal import argrelextrema from scipy.stats.stats import pearsonr from scipy.stats import gaussian_kde from scipy.stats import norm import logging from typing import Optional, List, Tuple import math from analytic_types import AnalyticUnitId, TimeSeries from analytic_types.learning_info import LearningInfo PEARSON_FACTOR = 0.7 @utils.meta.JSONClass class GeneralModelState(ModelState): def __init__(self, **kwargs): super().__init__(**kwargs) class GeneralModel(Model): def get_model_type(self) -> ModelType: return ModelType.GENERAL def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: data = dataframe['value'] segment = data[start: end] center_ind = start + math.ceil((end - start) / 2) return center_ind def get_state(self, cache: Optional[dict] = None) -> GeneralModelState: return GeneralModelState.from_json(cache) def do_fit( self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.pattern_center self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_mid_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_pat = utils.get_interval(data, del_mid_index, self.state.window_size) deleted_pat = utils.subtract_min_without_nan(deleted_pat) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state.pattern_model) if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3) self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size) def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: data = utils.cut_dataframe(dataframe) data = data['value'] pat_data = self.state.pattern_model if pat_data.count(0) == len(pat_data): raise ValueError('Labeled patterns must not be empty') window_size = self.state.window_size all_corr = utils.get_correlation_gen(data, window_size, pat_data) all_corr_peaks = utils.find_peaks(all_corr, window_size * 2) filtered = self.__filter_detection(all_corr_peaks, data) filtered = list(filtered) return [(item, item + window_size * 2) for item in filtered] def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: if not self.state.pattern_center: return [] window_size = self.state.window_size pattern_model = self.state.pattern_model for ind, val in segments: watch_data = data[ind - window_size: ind + window_size + 1] watch_data = utils.subtract_min_without_nan(watch_data) convolve_segment = scipy.signal.fftconvolve(watch_data, pattern_model) if len(convolve_segment) > 0: watch_conv = max(convolve_segment) else: continue if watch_conv < self.state.convolve_min * 0.8 or val < PEARSON_FACTOR: continue if watch_conv < self.state.conv_del_max * 1.02 and watch_conv > self.state.conv_del_min * 0.98: continue yield ind