From 922f1f3d1126b3f8854b7e5d258bca499e997d65 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Mon, 27 Aug 2018 07:10:03 +0300 Subject: [PATCH] WIP: Jump model v8 (#115) * new jump model with height * antipeak * universal n/w jumps --- analytics/models/jump_model.py | 130 +++++++++++++++++---------------- analytics/models/step_model.py | 71 ++++++++++++------ analytics/utils/__init__.py | 48 ++++++++++++ 3 files changed, 164 insertions(+), 85 deletions(-) diff --git a/analytics/models/jump_model.py b/analytics/models/jump_model.py index 68289ec..c8ec265 100644 --- a/analytics/models/jump_model.py +++ b/analytics/models/jump_model.py @@ -6,61 +6,68 @@ import scipy.signal from scipy.fftpack import fft from scipy.signal import argrelextrema import math +from scipy.stats import gaussian_kde +from scipy.stats import norm -WINDOW_SIZE = 120 +WINDOW_SIZE = 240 class JumpModel(Model): def __init__(self): super() + self.segments = [] + self.ijumps = [] self.state = { 'confidence': 1.5, - 'convolve_max': WINDOW_SIZE + 'convolve_max': WINDOW_SIZE, + 'JUMP_HEIGHT': 1, + 'JUMP_LENGTH': 1, } def fit(self, dataframe, segments): self.segments = segments - #self.alpha_finder() data = dataframe['value'] confidences = [] convolve_list = [] + jump_height_list = [] + jump_length_list = [] for segment in segments: if segment['labeled']: - segment_data = data[segment['start'] : segment['finish'] + 1] + segment_data = data[segment['start'] : segment['finish'] + 1].reset_index(drop=True) segment_min = min(segment_data) segment_max = max(segment_data) confidences.append(0.20 * (segment_max - segment_min)) - flat_segment = segment_data.rolling(window=4).mean() #сглаживаем сегмент - kde_segment = flat_data.dropna().plot.kde() # distribution density - ax_list = kde_segment.get_lines()[0].get_xydata() #take coordinates of kde - mids = argrelextrema(np.array(ax_list), np.less)[0] - maxs = argrelextrema(np.array(ax_list), np.greater)[0] - min_peak = maxs[0] - max_peak = maxs[1] - min_line = ax_list[min_peak, 0] - max_line = ax_list[max_peak, 0] - sigm_heidht = max_line - min_line - pat_sigm = utils.logistic_sigmoid(-WINDOW_SIZE, WINDOW_SIZE, 1, sigm_heidht) - for i in range(0, len(pat_sigm)): - pat_sigm[i] = pat_sigm[i] + min_line - cen_ind = utils.intersection_segment(flat_segment, mids[0]) #finds all interseprions with median - c = [] # choose the correct one interseption by convolve - jump_center = utils.find_jump_center(cen_ind) - - segment_cent_index = jump_center - 4 + flat_segment = segment_data.rolling(window=5).mean() + pdf = gaussian_kde(flat_segment.dropna()) + x = np.linspace(flat_segment.dropna().min() - 1, flat_segment.dropna().max() + 1, len(flat_segment.dropna())) + y = pdf(x) + ax_list = [] + for i in range(len(x)): + ax_list.append([x[i], y[i]]) + ax_list = np.array(ax_list, np.float32) + antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] + peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] + min_peak_index = peaks_kde[0] + max_peak_index = peaks_kde[1] + segment_median = ax_list[antipeaks_kde[0], 0] + segment_min_line = ax_list[min_peak_index, 0] + segment_max_line = ax_list[max_peak_index, 0] + jump_height = 0.9 * (segment_max_line - segment_min_line) + jump_height_list.append(jump_height) + jump_lenght = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) + jump_length_list.append(jump_lenght) + cen_ind = utils.intersection_segment(flat_segment, segment_median) #finds all interseprions with median + #cen_ind = utils.find_ind_median(segment_median, flat_segment) + jump_center = cen_ind[0] + segment_cent_index = jump_center - 5 + segment['start'] + self.ijumps.append(segment_cent_index) labeled_drop = data[segment_cent_index - WINDOW_SIZE : segment_cent_index + WINDOW_SIZE] - labeled_min = min(labeled_drop) - for value in labeled_drop: # обрезаем + labeled_min = min(labeled_drop) + for value in labeled_drop: value = value - labeled_min - labeled_max = max(labeled_drop) - for value in labeled_drop: # нормируем - value = value / labeled_max convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) - convolve_list.append(max(convolve)) # сворачиваем паттерн - # TODO: add convolve with alpha sigmoid - # TODO: add size of jump rize - + convolve_list.append(max(convolve)) if len(confidences) > 0: self.state['confidence'] = min(confidences) @@ -70,14 +77,23 @@ class JumpModel(Model): if len(convolve_list) > 0: self.state['convolve_max'] = max(convolve_list) else: - self.state['convolve_max'] = WINDOW_SIZE # макс метрика свертки равна отступу(WINDOW_SIZE), вау! + self.state['convolve_max'] = WINDOW_SIZE + + if len(jump_height_list) > 0: + self.state['JUMP_HEIGHT'] = min(jump_height_list) + else: + self.state['JUMP_HEIGHT'] = 1 + + if len(jump_length_list) > 0: + self.state['JUMP_LENGTH'] = max(jump_length_list) + else: + self.state['JUMP_LENGTH'] = 1 def predict(self, dataframe): data = dataframe['value'] result = self.__predict(data) result.sort() - if len(self.segments) > 0: result = [segment for segment in result if not utils.is_intersect(segment, self.segments)] return result @@ -86,50 +102,38 @@ class JumpModel(Model): window_size = 24 all_max_flatten_data = data.rolling(window=window_size).mean() all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] - possible_jumps = utils.find_all_jumps(all_max_flatten_data, 50, self.state['confidence']) - - ''' - for i in utils.exponential_smoothing(data + self.state['confidence'], 0.02): - extrema_list.append(i) - - segments = [] - for i in all_mins: - if all_max_flatten_data[i] > extrema_list[i]: - segments.append(i - window_size) - ''' + + possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) return [(x - 1, x + 1) for x in self.__filter_prediction(possible_jumps, all_max_flatten_data)] def __filter_prediction(self, segments, all_max_flatten_data): delete_list = [] variance_error = int(0.004 * len(all_max_flatten_data)) - if variance_error > 200: - variance_error = 200 + if variance_error > 50: + variance_error = 50 for i in range(1, len(segments)): if segments[i] < segments[i - 1] + variance_error: delete_list.append(segments[i]) for item in delete_list: segments.remove(item) - - # изменить секонд делит лист, сделать для свертки с сигмоидой - # !!!!!!!! - # написать фильтрацию паттернов-джампов! посмотерть каждый сегмент, обрезать его - # отнормировать, сравнить с выбранным патерном. - # !!!!!!!! delete_list = [] - pattern_data = all_max_flatten_data[segments[0] - WINDOW_SIZE : segments[0] + WINDOW_SIZE] + if len(segments) == 0 or len(self.ijumps) == 0 : + segments = [] + return segments + pattern_data = all_max_flatten_data[self.ijumps[0] - WINDOW_SIZE : self.ijumps[0] + WINDOW_SIZE] for segment in segments: - convol_data = all_max_flatten_data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] - conv = scipy.signal.fftconvolve(pattern_data, convol_data) - if max(conv) > self.state['convolve_max'] * 1.1 or max(conv) < self.state['convolve_max'] * 0.9: + if segment > WINDOW_SIZE: + convol_data = all_max_flatten_data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] + conv = scipy.signal.fftconvolve(pattern_data, convol_data) + if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: + delete_list.append(segment) + else: delete_list.append(segment) for item in delete_list: segments.remove(item) + + for ijump in self.ijumps: + segments.append(ijump) - return segments - - def alpha_finder(self, data): - """ - поиск альфы для логистической сигмоиды - """ - pass + return segments \ No newline at end of file diff --git a/analytics/models/step_model.py b/analytics/models/step_model.py index d9d78f5..828c717 100644 --- a/analytics/models/step_model.py +++ b/analytics/models/step_model.py @@ -10,10 +10,10 @@ import pickle class StepModel(Model): - def __init__(self): super() self.segments = [] + self.idrops = [] self.state = { 'confidence': 1.5, 'convolve_max': 570000 @@ -21,19 +21,26 @@ class StepModel(Model): def fit(self, dataframe, segments): self.segments = segments - data = dataframe['value'] + #dataframe = dataframe.iloc[::-1] + d_min = min(dataframe['value']) + for i in range(0,len(dataframe['value'])): + dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min + data = dataframe['value'] + new_data = [] + for val in data: + new_data.append(val) confidences = [] convolve_list = [] for segment in segments: if segment['labeled']: - segment_data = data[segment['start'] : segment['finish'] + 1] + segment_data = new_data[segment['start'] : segment['finish'] + 1] segment_min = min(segment_data) segment_max = max(segment_data) - confidences.append(0.20 * (segment_max - segment_min)) - flat_segment = segment_data.rolling(window=5).mean() - - segment_min_index = flat_segment.idxmin() - 5 - labeled_drop = data[segment_min_index - 120 : segment_min_index + 120] + confidences.append( 0.4*(segment_max - segment_min)) + flat_segment = segment_data #.rolling(window=5).mean() + segment_min_index = flat_segment.index(min(flat_segment)) - 5 + segment['start'] + self.idrops.append(segment_min_index) + labeled_drop = new_data[segment_min_index - 240 : segment_min_index + 240] convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) convolve_list.append(max(convolve)) @@ -47,7 +54,13 @@ class StepModel(Model): else: self.state['convolve_max'] = 570000 - def predict(self, dataframe): + + async def predict(self, dataframe): + #dataframe = dataframe.iloc[::-1] + d_min = min(dataframe['value']) + for i in range(0,len(dataframe['value'])): + dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min + data = dataframe['value'] result = self.__predict(data) @@ -60,24 +73,30 @@ class StepModel(Model): def __predict(self, data): window_size = 24 all_max_flatten_data = data.rolling(window=window_size).mean() + new_flat_data = [] + for val in all_max_flatten_data: + new_flat_data.append(val) + all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] + extrema_list = [] - - for i in utils.exponential_smoothing(data - self.state['confidence'], 0.03): + for i in utils.exponential_smoothing(data - self.state['confidence'], 0.01): extrema_list.append(i) + #extrema_list = extrema_list[::-1] segments = [] for i in all_mins: - if all_max_flatten_data[i] < extrema_list[i]: - segments.append(i - window_size) + if new_flat_data[i] < extrema_list[i]: + segments.append(i) #-window_size + - return [(x - 1, x + 1) for x in self.__filter_prediction(segments, all_max_flatten_data)] + return [(x - 1, x + 1) for x in self.__filter_prediction(segments, new_flat_data)] - def __filter_prediction(self, segments, all_max_flatten_data): + def __filter_prediction(self, segments, new_flat_data): delete_list = [] - variance_error = int(0.004 * len(all_max_flatten_data)) - if variance_error > 200: - variance_error = 200 + variance_error = int(0.004 * len(new_flat_data)) + if variance_error > 100: + variance_error = 100 for i in range(1, len(segments)): if segments[i] < segments[i - 1] + variance_error: delete_list.append(segments[i]) @@ -85,11 +104,19 @@ class StepModel(Model): segments.remove(item) delete_list = [] - pattern_data = all_max_flatten_data[segments[0] - 120 : segments[0] + 120] + print(self.idrops[0]) + pattern_data = new_flat_data[self.idrops[0] - 240 : self.idrops[0] + 240] + print(self.state['convolve_max']) for segment in segments: - convol_data = all_max_flatten_data[segment - 120 : segment + 120] - conv = scipy.signal.fftconvolve(pattern_data, convol_data) - if max(conv) > self.state['convolve_max'] * 1.1 or max(conv) < self.state['convolve_max'] * 0.9: + if segment > 240: + convol_data = new_flat_data[segment - 240 : segment + 240] + conv = scipy.signal.fftconvolve(pattern_data, convol_data) + if conv[480] > self.state['convolve_max'] * 1.2 or conv[480] < self.state['convolve_max'] * 0.9: + delete_list.append(segment) + print(segment, conv[480], 0) + else: + print(segment, conv[480], 1) + else: delete_list.append(segment) for item in delete_list: segments.remove(item) diff --git a/analytics/utils/__init__.py b/analytics/utils/__init__.py index ca5b90e..ef8ee74 100644 --- a/analytics/utils/__init__.py +++ b/analytics/utils/__init__.py @@ -81,6 +81,13 @@ def logistic_sigmoid_distribution(self, x1, x2, alpha, height): def logistic_sigmoid(x, alpha, height): return height / (1 + math.exp(-x * alpha)) +def MyLogisticSigmoid(interval, alpha, heigh): + distribution = [] + for i in range(-interval, interval): + F = height / (1 + math.exp(-i * alpha)) + distribution.append(F) + return distribution + def find_one_jump(data, x, size, height, err): l = [] for i in range(x + 1, x + size): @@ -108,3 +115,44 @@ def find_jump_center(cen_ind): if i > 0 and cx > c[i - 1]: jump_center = x return jump_center + +def find_ind_median(median, segment_data): + x = np.arange(0, len(segment_data)) + f = [] + for i in range(len(segment_data)): + f.append(median) + f = np.array(f) + g = [] + for i in segment_data: + g.append(i) + g = np.array(g) + idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 + return idx + +def find_jump_length(segment_data, min_line, max_line): + x = np.arange(0, len(segment_data)) + f = [] + l = [] + for i in range(len(segment_data)): + f.append(min_line) + l.append(max_line) + f = np.array(f) + l = np.array(l) + g = [] + for i in segment_data: + g.append(i) + g = np.array(g) + idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 + idl = np.argwhere(np.diff(np.sign(l - g)) != 0).reshape(-1) + 0 + if (idl[0] - idx[-1] + 1) > 0: + return idl[0] - idx[-1] + 1 + else: + return print("retard alert!") + +def find_jump(data, height, lenght): + j_list = [] + for i in range(len(data)-lenght-1): + for x in range(1, lenght): + if(data[i+x] > data[i] + height): + j_list.append(i) + return(j_list)