From 6f9f53cb1b089d8606820ce7cf0be6ef5cc97ef5 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Wed, 29 Aug 2018 14:51:14 +0300 Subject: [PATCH] WIP: Jump and drop v1 (#119) * new jump model with height * antipeak * universal n/w jumps * jump and drop models * rm unneeded lines and trailing spaces --- analytics/models/jump_model.py | 45 ++++++------ analytics/models/step_model.py | 123 ++++++++++++++++++++------------- analytics/utils/__init__.py | 40 +++++++++++ 3 files changed, 139 insertions(+), 69 deletions(-) diff --git a/analytics/models/jump_model.py b/analytics/models/jump_model.py index e0dd95b..3ae3b37 100644 --- a/analytics/models/jump_model.py +++ b/analytics/models/jump_model.py @@ -10,7 +10,7 @@ from scipy.stats import gaussian_kde from scipy.stats import norm -WINDOW_SIZE = 240 +WINDOW_SIZE = 400 class JumpModel(Model): @@ -24,7 +24,7 @@ class JumpModel(Model): 'JUMP_HEIGHT': 1, 'JUMP_LENGTH': 1, } - + def fit(self, dataframe, segments): self.segments = segments data = dataframe['value'] @@ -32,14 +32,13 @@ class JumpModel(Model): convolve_list = [] jump_height_list = [] jump_length_list = [] - print(segments) for segment in segments: if segment['labeled']: segment_data = data.loc[segment['from'] : segment['to'] + 1].reset_index(drop=True) segment_min = min(segment_data) segment_max = max(segment_data) confidences.append(0.20 * (segment_max - segment_min)) - flat_segment = segment_data.rolling(window=5).mean() + flat_segment = segment_data.rolling(window=5).mean() pdf = gaussian_kde(flat_segment.dropna()) x = np.linspace(flat_segment.dropna().min() - 1, flat_segment.dropna().max() + 1, len(flat_segment.dropna())) y = pdf(x) @@ -47,11 +46,11 @@ class JumpModel(Model): for i in range(len(x)): ax_list.append([x[i], y[i]]) ax_list = np.array(ax_list, np.float32) - antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] + antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] min_peak_index = peaks_kde[0] max_peak_index = peaks_kde[1] - segment_median = ax_list[antipeaks_kde[0], 0] + segment_median = ax_list[antipeaks_kde[0], 0] segment_min_line = ax_list[min_peak_index, 0] segment_max_line = ax_list[max_peak_index, 0] jump_height = 0.9 * (segment_max_line - segment_min_line) @@ -68,7 +67,7 @@ class JumpModel(Model): for value in labeled_drop: value = value - labeled_min convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) - convolve_list.append(max(convolve)) + convolve_list.append(max(convolve)) if len(confidences) > 0: self.state['confidence'] = min(confidences) @@ -79,17 +78,17 @@ class JumpModel(Model): self.state['convolve_max'] = max(convolve_list) else: self.state['convolve_max'] = WINDOW_SIZE - + if len(jump_height_list) > 0: self.state['JUMP_HEIGHT'] = min(jump_height_list) else: self.state['JUMP_HEIGHT'] = 1 - + if len(jump_length_list) > 0: self.state['JUMP_LENGTH'] = max(jump_length_list) else: - self.state['JUMP_LENGTH'] = 1 - + self.state['JUMP_LENGTH'] = 1 + def predict(self, dataframe): data = dataframe['value'] @@ -100,17 +99,16 @@ class JumpModel(Model): return result def __predict(self, data): - window_size = 24 - all_max_flatten_data = data.rolling(window=window_size).mean() - all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] - + #window_size = 24 + #all_max_flatten_data = data.rolling(window=window_size).mean() + #all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) - return [(x - 1, x + 1) for x in self.__filter_prediction(possible_jumps, all_max_flatten_data)] + return [(x - 1, x + 1) for x in self.__filter_prediction(possible_jumps, data)] - def __filter_prediction(self, segments, all_max_flatten_data): + def __filter_prediction(self, segments, data): delete_list = [] - variance_error = int(0.004 * len(all_max_flatten_data)) + variance_error = int(0.004 * len(data)) if variance_error > 50: variance_error = 50 for i in range(1, len(segments)): @@ -122,10 +120,12 @@ class JumpModel(Model): if len(segments) == 0 or len(self.ijumps) == 0 : segments = [] return segments - pattern_data = all_max_flatten_data[self.ijumps[0] - WINDOW_SIZE : self.ijumps[0] + WINDOW_SIZE] + + pattern_data = data[self.ijumps[0] - WINDOW_SIZE : self.ijumps[0] + WINDOW_SIZE] for segment in segments: - if segment > WINDOW_SIZE: - convol_data = all_max_flatten_data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] + if segment > WINDOW_SIZE and segment < (len(data) - WINDOW_SIZE): + convol_data = data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] + conv = scipy.signal.fftconvolve(pattern_data, convol_data) if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: delete_list.append(segment) @@ -133,8 +133,9 @@ class JumpModel(Model): delete_list.append(segment) for item in delete_list: segments.remove(item) - + for ijump in self.ijumps: segments.append(ijump) + return segments diff --git a/analytics/models/step_model.py b/analytics/models/step_model.py index cff3b56..9b95c46 100644 --- a/analytics/models/step_model.py +++ b/analytics/models/step_model.py @@ -3,11 +3,13 @@ from models import Model import scipy.signal from scipy.fftpack import fft from scipy.signal import argrelextrema +from scipy.stats import gaussian_kde import utils import numpy as np import pandas as pd +WINDOW_SIZE = 400 class StepModel(Model): def __init__(self): @@ -16,31 +18,61 @@ class StepModel(Model): self.idrops = [] self.state = { 'confidence': 1.5, - 'convolve_max': 570000 + 'convolve_max': WINDOW_SIZE, + 'DROP_HEIGHT': 1, + 'DROP_LENGTH': 1, } def fit(self, dataframe, segments): self.segments = segments - #dataframe = dataframe.iloc[::-1] d_min = min(dataframe['value']) for i in range(0,len(dataframe['value'])): - dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min - data = dataframe['value'] + dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min + data = dataframe['value'] + confidences = [] convolve_list = [] + drop_height_list = [] + drop_length_list = [] for segment in segments: if segment['labeled']: segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'])) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'])) - segment_data = data[segment_from_index : segment_to_index + 1] + segment_data = data[segment_from_index : segment_to_index + 1].reset_index(drop=True) segment_min = min(segment_data) segment_max = max(segment_data) - confidences.append( 0.4*(segment_max - segment_min)) - flat_segment = segment_data #.rolling(window=5).mean() - segment_min_index = flat_segment.idxmin() - 5 - self.idrops.append(segment_min_index) - labeled_drop = data[segment_min_index - 240 : segment_min_index + 240] + confidences.append(0.20 * (segment_max - segment_min)) + flat_segment = segment_data.rolling(window=5).mean() + pdf = gaussian_kde(flat_segment.dropna()) + x = np.linspace(flat_segment.dropna().min(), flat_segment.dropna().max(), len(flat_segment.dropna())) + y = pdf(x) + ax_list = [] + for i in range(len(x)): + ax_list.append([x[i], y[i]]) + ax_list = np.array(ax_list, np.float32) + antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] + peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] + min_peak_index = peaks_kde[0] + max_peak_index = peaks_kde[1] + segment_median = ax_list[antipeaks_kde[0], 0] + segment_min_line = ax_list[min_peak_index, 0] + segment_max_line = ax_list[max_peak_index, 0] + #print(segment_min_line, segment_max_line) + drop_height = 0.95 * (segment_max_line - segment_min_line) + drop_height_list.append(drop_height) + drop_lenght = utils.find_drop_length(segment_data, segment_min_line, segment_max_line) + #print(drop_lenght) + drop_length_list.append(drop_lenght) + cen_ind = utils.drop_intersection(flat_segment, segment_median) #finds all interseprions with median + drop_center = cen_ind[0] + segment_cent_index = drop_center - 5 + segment['start'] + self.idrops.append(segment_cent_index) + labeled_drop = data[segment_cent_index - WINDOW_SIZE : segment_cent_index + WINDOW_SIZE] + labeled_min = min(labeled_drop) + for value in labeled_drop: + value = value - labeled_min + convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) convolve_list.append(max(convolve)) @@ -52,11 +84,20 @@ class StepModel(Model): if len(convolve_list) > 0: self.state['convolve_max'] = max(convolve_list) else: - self.state['convolve_max'] = 570000 + self.state['convolve_max'] = WINDOW_SIZE + + if len(drop_height_list) > 0: + self.state['DROP_HEIGHT'] = min(drop_height_list) + else: + self.state['DROP_HEIGHT'] = 1 + + if len(drop_length_list) > 0: + self.state['DROP_LENGTH'] = max(drop_length_list) + else: + self.state['DROP_LENGTH'] = 1 async def predict(self, dataframe): - #dataframe = dataframe.iloc[::-1] d_min = min(dataframe['value']) for i in range(0,len(dataframe['value'])): dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min @@ -71,54 +112,42 @@ class StepModel(Model): return result def __predict(self, data): - window_size = 24 - all_max_flatten_data = data.rolling(window=window_size).mean() - new_flat_data = [] - for val in all_max_flatten_data: - new_flat_data.append(val) - - all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] - - extrema_list = [] - for i in utils.exponential_smoothing(data - self.state['confidence'], 0.01): - extrema_list.append(i) - #extrema_list = extrema_list[::-1] - - segments = [] - for i in all_mins: - if new_flat_data[i] < extrema_list[i]: - segments.append(i) #-window_size - - - return [(x - 1, x + 1) for x in self.__filter_prediction(segments, new_flat_data)] - - def __filter_prediction(self, segments, new_flat_data): + #window_size = 24 + #all_max_flatten_data = data.rolling(window=window_size).mean() + #all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] + #print(self.state['DROP_HEIGHT'],self.state['DROP_LENGTH'] ) + possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1) + return [(x - 1, x + 1) for x in self.__filter_prediction(possible_drops, data)] + + def __filter_prediction(self, segments, data): delete_list = [] - variance_error = int(0.004 * len(new_flat_data)) - if variance_error > 100: - variance_error = 100 + variance_error = int(0.004 * len(data)) + if variance_error > 50: + variance_error = 50 + for i in range(1, len(segments)): if segments[i] < segments[i - 1] + variance_error: delete_list.append(segments[i]) for item in delete_list: segments.remove(item) - delete_list = [] - print(self.idrops[0]) - pattern_data = new_flat_data[self.idrops[0] - 240 : self.idrops[0] + 240] - print(self.state['convolve_max']) + + if len(segments) == 0 or len(self.idrops) == 0 : + segments = [] + return segments + pattern_data = data[self.idrops[0] - WINDOW_SIZE : self.idrops[0] + WINDOW_SIZE] for segment in segments: - if segment > 240: - convol_data = new_flat_data[segment - 240 : segment + 240] + if segment > WINDOW_SIZE and segment < (len(data) - WINDOW_SIZE): + convol_data = data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] conv = scipy.signal.fftconvolve(pattern_data, convol_data) - if conv[480] > self.state['convolve_max'] * 1.2 or conv[480] < self.state['convolve_max'] * 0.9: + if conv[WINDOW_SIZE*2] > self.state['convolve_max'] * 1.2 or conv[WINDOW_SIZE*2] < self.state['convolve_max'] * 0.8: delete_list.append(segment) - print(segment, conv[480], 0) - else: - print(segment, conv[480], 1) else: delete_list.append(segment) for item in delete_list: segments.remove(item) + #print(segments) + for idrop in self.idrops: + segments.append(idrop) return segments diff --git a/analytics/utils/__init__.py b/analytics/utils/__init__.py index 2b3d857..4321d23 100644 --- a/analytics/utils/__init__.py +++ b/analytics/utils/__init__.py @@ -158,6 +158,46 @@ def find_jump(data, height, lenght): j_list.append(i) return(j_list) +def find_drop_length(segment_data, min_line, max_line): + x = np.arange(0, len(segment_data)) + f = [] + l = [] + for i in range(len(segment_data)): + f.append(min_line) + l.append(max_line) + f = np.array(f) + l = np.array(l) + g = [] + for i in segment_data: + g.append(i) + g = np.array(g) + idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 #min_line + idl = np.argwhere(np.diff(np.sign(l - g)) != 0).reshape(-1) + 0 #max_line + if (idx[0] - idl[-1] + 1) > 0: + return idx[0] - idl[-1] + 1 + else: + return print("retard alert!") + +def drop_intersection(segment_data, median_line): + x = np.arange(0, len(segment_data)) + f = [] + for i in range(len(segment_data)): + f.append(median_line) + f = np.array(f) + g = [] + for i in segment_data: + g.append(i) + g = np.array(g) + idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 + return idx + +def find_drop(data, height, lenght): + d_list = [] + for i in range(len(data)-lenght-1): + for x in range(1, lenght): + if(data[i+x] < data[i] - height): + d_list.append(i+36) + return(d_list) def timestamp_to_index(dataframe, timestamp): data = dataframe['timestamp']