From d389cf3adb8bc6eedfccc0dc319f8239799a5d74 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Wed, 28 Nov 2018 09:35:44 +0300 Subject: [PATCH] Move repeating code from models to utils #270 (#272) peak / trough / jump / drop / general common code to and utils refactoring --- analytics/analytics/models/drop_model.py | 70 +++--------------- analytics/analytics/models/general_model.py | 32 +++----- analytics/analytics/models/jump_model.py | 67 +++-------------- analytics/analytics/models/peak_model.py | 33 +++------ analytics/analytics/models/trough_model.py | 33 +++------ analytics/analytics/utils/common.py | 81 ++++++++++++++++++++- analytics/analytics/utils/segments.py | 2 +- 7 files changed, 135 insertions(+), 183 deletions(-) diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 0fa1175..46e3897 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -36,78 +36,32 @@ class DropModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) percent_of_nans = segment_data.isnull().sum() / len(segment_data) if percent_of_nans > 0 or len(segment_data) == 0: continue - segment_min = min(segment_data) - segment_max = max(segment_data) - confidences.append(0.20 * (segment_max - segment_min)) - flat_segment = segment_data.rolling(window = 5).mean() - pdf = gaussian_kde(flat_segment.dropna()) - max_drop = max(flat_segment.dropna()) - min_drop = min(flat_segment.dropna()) - x = np.linspace(flat_segment.dropna().min() - 1, flat_segment.dropna().max() + 1, len(flat_segment.dropna())) - y = pdf(x) - ax_list = list(zip(x, y)) - ax_list = np.array(ax_list, np.float32) - antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] - peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] - try: - min_peak_index = peaks_kde[0] - segment_min_line = ax_list[min_peak_index, 0] - max_peak_index = peaks_kde[1] - segment_max_line = ax_list[max_peak_index, 0] - segment_median = ax_list[antipeaks_kde[0], 0] - except IndexError: - segment_max_line = max_drop - segment_min_line = min_drop - segment_median = (max_drop - min_drop) / 2 + min_drop - drop_height = 0.95 * (segment_max_line - segment_min_line) + confidence = utils.find_confidence(segment_data) + confidences.append(confidence) + segment_cent_index, drop_height, drop_length = utils.find_drop_parameters(segment_data, segment_from_index) drop_height_list.append(drop_height) - drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line) drop_length_list.append(drop_length) - cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median) #finds all interseprions with median - drop_center = cen_ind[0] - segment_cent_index = drop_center - 5 + segment_from_index self.idrops.append(segment_cent_index) - labeled_drop = data[segment_cent_index - self.state['WINDOW_SIZE']: segment_cent_index + self.state['WINDOW_SIZE'] + 1] - labeled_drop = labeled_drop - min(labeled_drop) + labeled_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + labeled_drop = utils.subtract_min_without_nan(labeled_drop) patterns_list.append(labeled_drop) self.model_drop = utils.get_av_model(patterns_list) - for idrop in self.idrops: - labeled_drop = data[idrop - self.state['WINDOW_SIZE']: idrop + self.state['WINDOW_SIZE'] + 1] - labeled_drop = labeled_drop - min(labeled_drop) - auto_convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) - convolve_drop = scipy.signal.fftconvolve(labeled_drop, self.model_drop) - convolve_list.append(max(auto_convolve)) - convolve_list.append(max(convolve_drop)) + convolve_list = utils.get_convolve(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE']) del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) if len(segment_data) == 0: continue - flat_segment = segment_data.rolling(window = 5).mean() - flat_segment_dropna = flat_segment.dropna() - pdf = gaussian_kde(flat_segment_dropna) - x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna)) - y = pdf(x) - ax_list = list(zip(x, y)) - ax_list = np.array(ax_list, np.float32) - antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] - segment_median = ax_list[antipeaks_kde[0], 0] - cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median - drop_center = cen_ind[0] # or -1? test - segment_cent_index = drop_center - 5 + segment_from_index - deleted_drop = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE'] + 1] - deleted_drop = deleted_drop - min(labeled_drop) + segment_cent_index = utils.find_drop_parameters(segment_data, segment_from_index)[0] + deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + deleted_drop = utils.subtract_min_without_nan(deleted_drop) del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop) del_conv_list.append(max(del_conv_drop)) @@ -163,7 +117,7 @@ class DropModel(Model): pattern_data = self.model_drop for segment in segments: if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): - convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] + convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) percent_of_nans = convol_data.isnull().sum() / len(convol_data) if percent_of_nans > 0.5: delete_list.append(segment) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index 9e2896a..40d08a5 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -34,39 +34,28 @@ class GeneralModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) percent_of_nans = segment_data.isnull().sum() / len(segment_data) if percent_of_nans > 0 or len(segment_data) == 0: continue - x = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) - self.ipats.append(x) - segment_data = data[x - self.state['WINDOW_SIZE'] : x + self.state['WINDOW_SIZE']] - segment_min = min(segment_data) - segment_data = segment_data - segment_min + center_ind = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) + self.ipats.append(center_ind) + segment_data = utils.get_interval(data, center_ind, self.state['WINDOW_SIZE']) + segment_data = utils.subtract_min_without_nan(segment_data) patterns_list.append(segment_data) self.model_gen = utils.get_av_model(patterns_list) - for ipat in self.ipats: #labeled segments - labeled_data = data[ipat - self.state['WINDOW_SIZE']: ipat + self.state['WINDOW_SIZE'] + 1] - labeled_data = labeled_data - min(labeled_data) - auto_convolve = scipy.signal.fftconvolve(labeled_data, labeled_data) - convolve_data = scipy.signal.fftconvolve(labeled_data, self.model_gen) - convolve_list.append(max(auto_convolve)) - convolve_list.append(max(convolve_data)) + convolve_list = utils.get_convolve(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE']) del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) if len(segment_data) == 0: continue del_mid_index = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) - deleted_pat = data[del_mid_index - self.state['WINDOW_SIZE']: del_mid_index + self.state['WINDOW_SIZE'] + 1] - deleted_pat = deleted_pat - min(deleted_pat) + deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) + deleted_pat = utils.subtract_min_without_nan(segment_data) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen) del_conv_list.append(max(del_conv_pat)) @@ -97,8 +86,7 @@ class GeneralModel(Model): for i in range(self.state['WINDOW_SIZE'] * 2, len(data)): watch_data = data[i - self.state['WINDOW_SIZE'] * 2: i] - w = min(watch_data) - watch_data = watch_data - w + watch_data = utils.subtract_min_without_nan(watch_data) conv = scipy.signal.fftconvolve(watch_data, pat_data) self.all_conv.append(max(conv)) all_conv_peaks = utils.peak_finder(self.all_conv, self.state['WINDOW_SIZE'] * 2) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index ed2f5ca..a18a7cb 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -42,73 +42,28 @@ class JumpModel(Model): percent_of_nans = segment_data.isnull().sum() / len(segment_data) if percent_of_nans > 0 or len(segment_data) == 0: continue - segment_min = min(segment_data) - segment_max = max(segment_data) - confidences.append(0.20 * (segment_max - segment_min)) - flat_segment = segment_data.rolling(window = 5).mean() - flat_segment_dropna = flat_segment.dropna() - min_jump = min(flat_segment_dropna) - max_jump = max(flat_segment_dropna) - pdf = gaussian_kde(flat_segment_dropna) - x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna)) - y = pdf(x) - ax_list = list(zip(x, y)) - ax_list = np.array(ax_list, np.float32) - antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] - peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] - try: - min_peak_index = peaks_kde[0] - segment_min_line = ax_list[min_peak_index, 0] - max_peak_index = peaks_kde[1] - segment_max_line = ax_list[max_peak_index, 0] - segment_median = ax_list[antipeaks_kde[0], 0] - except IndexError: - segment_max_line = max_jump - segment_min_line = min_jump - segment_median = (max_jump - min_jump) / 2 + min_jump - jump_height = 0.95 * (segment_max_line - segment_min_line) + confidence = utils.find_confidence(segment_data) + confidences.append(confidence) + segment_cent_index, jump_height, jump_length = utils.find_jump_parameters(segment_data, segment_from_index) jump_height_list.append(jump_height) - jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) jump_length_list.append(jump_length) - cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median - jump_center = cen_ind[0] - segment_cent_index = jump_center - 5 + segment_from_index self.ijumps.append(segment_cent_index) - labeled_jump = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE'] + 1] - labeled_jump = labeled_jump - min(labeled_jump) + labeled_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + labeled_jump = utils.subtract_min_without_nan(labeled_jump) patterns_list.append(labeled_jump) self.model_jump = utils.get_av_model(patterns_list) - for ijump in self.ijumps: - labeled_jump = data[ijump - self.state['WINDOW_SIZE']: ijump + self.state['WINDOW_SIZE'] + 1] - labeled_jump = labeled_jump - min(labeled_jump) - auto_convolve = scipy.signal.fftconvolve(labeled_jump, labeled_jump) - convolve_jump = scipy.signal.fftconvolve(labeled_jump, self.model_jump) - convolve_list.append(max(auto_convolve)) - convolve_list.append(max(convolve_jump)) + convolve_list = utils.get_convolve(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE']) del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) if len(segment_data) == 0: continue - flat_segment = segment_data.rolling(window = 5).mean() - flat_segment_dropna = flat_segment.dropna() - pdf = gaussian_kde(flat_segment_dropna) - x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna)) - y = pdf(x) - ax_list = list(zip(x, y)) - ax_list = np.array(ax_list, np.float32) - antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] - segment_median = ax_list[antipeaks_kde[0], 0] - cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median - jump_center = cen_ind[0] - segment_cent_index = jump_center - 5 + segment_from_index - deleted_jump = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE'] + 1] - deleted_jump = deleted_jump - min(labeled_jump) + segment_cent_index = utils.find_jump_parameters(segment_data, segment_from_index)[0] + deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) + deleted_jump = utils.subtract_min_without_nan(labeled_jump) del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump) del_conv_list.append(max(del_conv_jump)) @@ -169,7 +124,7 @@ class JumpModel(Model): delete_low_bound = self.state['conv_del_min'] * 0.98 for segment in segments: if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): - convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] + convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) percent_of_nans = convol_data.isnull().sum() / len(convol_data) if percent_of_nans > 0.5: delete_list.append(segment) diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 7a0ec64..139e998 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -34,41 +34,30 @@ class PeakModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) percent_of_nans = segment_data.isnull().sum() / len(segment_data) if percent_of_nans > 0 or len(segment_data) == 0: continue - segment_min = min(segment_data) - segment_max = max(segment_data) - confidences.append(0.2 * (segment_max - segment_min)) + confidence = utils.find_confidence(segment_data) + confidences.append(confidence) segment_max_index = segment_data.idxmax() self.ipeaks.append(segment_max_index) - labeled_peak = data[segment_max_index - self.state['WINDOW_SIZE']: segment_max_index + self.state['WINDOW_SIZE'] + 1] - labeled_peak = labeled_peak - min(labeled_peak) + labeled_peak = utils.get_interval(data, segment_max_index, self.state['WINDOW_SIZE']) + labeled_peak = utils.subtract_min_without_nan(labeled_peak) patterns_list.append(labeled_peak) self.model_peak = utils.get_av_model(patterns_list) - for ipeak in self.ipeaks: #labeled segments - labeled_peak = data[ipeak - self.state['WINDOW_SIZE']: ipeak + self.state['WINDOW_SIZE'] + 1] - labeled_peak = labeled_peak - min(labeled_peak) - auto_convolve = scipy.signal.fftconvolve(labeled_peak, labeled_peak) - convolve_peak = scipy.signal.fftconvolve(labeled_peak, self.model_peak) - convolve_list.append(max(auto_convolve)) - convolve_list.append(max(convolve_peak)) + convolve_list = utils.get_convolve(self.ipeaks, self.model_peak, data, self.state['WINDOW_SIZE']) del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = parse_segment(segment, dataframe) if len(segment_data) == 0: continue del_max_index = segment_data.idxmax() - deleted_peak = data[del_max_index - self.state['WINDOW_SIZE']: del_max_index + self.state['WINDOW_SIZE'] + 1] - deleted_peak = deleted_peak - min(deleted_peak) + deleted_peak = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE']) + deleted_peak = utils.subtract_min_without_nan(deleted_peak) del_conv_peak = scipy.signal.fftconvolve(deleted_peak, self.model_peak) del_conv_list.append(max(del_conv_peak)) @@ -124,8 +113,8 @@ class PeakModel(Model): pattern_data = self.model_peak for segment in segments: if segment > self.state['WINDOW_SIZE']: - convol_data = data[segment - self.state['WINDOW_SIZE']: segment + self.state['WINDOW_SIZE'] + 1] - convol_data = convol_data - min(convol_data) + convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) + convol_data = utils.subtract_min_without_nan(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data) if percent_of_nans > 0.5: delete_list.append(segment) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index 7799fa7..e361469 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -34,42 +34,31 @@ class TroughModel(Model): patterns_list = [] for segment in segments: if segment['labeled']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) percent_of_nans = segment_data.isnull().sum() / len(segment_data) if percent_of_nans > 0 or len(segment_data) == 0: continue - segment_min = min(segment_data) - segment_max = max(segment_data) - confidences.append(0.2 * (segment_max - segment_min)) + confidence = utils.find_confidence(segment_data) + confidences.append(confidence) segment_min_index = segment_data.idxmin() self.itroughs.append(segment_min_index) - labeled_trough = data[segment_min_index - self.state['WINDOW_SIZE'] : segment_min_index + self.state['WINDOW_SIZE'] + 1] - labeled_trough = labeled_trough - min(labeled_trough) + labeled_trough = utils.get_interval(data, segment_min_index, self.state['WINDOW_SIZE']) + labeled_trough = utils.subtract_min_without_nan(labeled_trough) patterns_list.append(labeled_trough) self.model_trough = utils.get_av_model(patterns_list) - for itrough in self.itroughs: - labeled_trough = data[itrough - self.state['WINDOW_SIZE']: itrough + self.state['WINDOW_SIZE'] + 1] - labeled_trough = labeled_trough - min(labeled_trough) - auto_convolve = scipy.signal.fftconvolve(labeled_trough, labeled_trough) - convolve_trough = scipy.signal.fftconvolve(labeled_trough, self.model_trough) - convolve_list.append(max(auto_convolve)) - convolve_list.append(max(convolve_trough)) + convolve_list = utils.get_convolve(self.itroughs, self.model_trough, data, self.state['WINDOW_SIZE']) del_conv_list = [] for segment in segments: if segment['deleted']: - segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) - segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) - segment_data = data[segment_from_index: segment_to_index + 1] + segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe) percent_of_nans = segment_data.isnull().sum() / len(segment_data) if percent_of_nans > 0 or len(segment_data) == 0: continue del_min_index = segment_data.idxmin() - deleted_trough = data[del_min_index - self.state['WINDOW_SIZE']: del_min_index + self.state['WINDOW_SIZE'] + 1] - deleted_trough = deleted_trough - min(deleted_trough) + deleted_trough = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE']) + deleted_trough = utils.subtract_min_without_nan(deleted_trough) del_conv_trough = scipy.signal.fftconvolve(deleted_trough, self.model_trough) del_conv_list.append(max(del_conv_trough)) @@ -125,8 +114,8 @@ class TroughModel(Model): pattern_data = self.model_trough for segment in segments: if segment > self.state['WINDOW_SIZE']: - convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] - convol_data = convol_data - min(convol_data) + convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) + convol_data = utils.subtract_min_without_nan(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data) if percent_of_nans > 0.5: delete_list.append(segment) diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index e2efe7e..cb39109 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -1,6 +1,10 @@ import numpy as np import pandas as pd - +import scipy.signal +from scipy.fftpack import fft +from scipy.signal import argrelextrema +from scipy.stats import gaussian_kde +import utils def exponential_smoothing(series, alpha): result = [series[0]] @@ -275,4 +279,77 @@ def nan_to_zero(segment, nan_list): for val in nan_list: segment[val] = 0 return segment - + +def find_confidence(segment: pd.Series) -> float: + segment_min = min(segment) + segment_max = max(segment) + return 0.2 * (segment_max - segment_min) + +def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: + left_bound = center - window_size + right_bound = center + window_size + 1 + return data[left_bound: right_bound] + +def subtract_min_without_nan(segment: list) -> list: + if not np.isnan(min(segment)): + segment = segment - min(segment) + return segment + +def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: + labeled_segment = [] + convolve_list = [] + for segment in segments: + labeled_segment = utils.get_interval(data, segment, window_size) + labeled_segment = utils.subtract_min_without_nan(labeled_segment) + auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment) + convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model) + convolve_list.append(max(auto_convolve)) + convolve_list.append(max(convolve_segment)) + return convolve_list + + +def find_jump_parameters(segment_data: pd.Series, segment_from_index: int): + flat_segment = segment_data.rolling(window=5).mean() + flat_segment_dropna = flat_segment.dropna() + segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) + jump_height = 0.95 * (segment_max_line - segment_min_line) + jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) # finds all interseprions with median + cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) + jump_center = cen_ind[0] + segment_cent_index = jump_center - 5 + segment_from_index + return segment_cent_index, jump_height, jump_length + + +def find_drop_parameters(segment_data: pd.Series, segment_from_index: int): + flat_segment = segment_data.rolling(window=5).mean() + flat_segment_dropna = flat_segment.dropna() + segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) + drop_height = 0.95 * (segment_max_line - segment_min_line) + drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line) + cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median) + drop_center = cen_ind[0] + segment_cent_index = drop_center - 5 + segment_from_index + return segment_cent_index, drop_height, drop_length + + +def get_distribution_density(segment: pd.Series) -> float: + min_jump = min(segment) + max_jump = max(segment) + pdf = gaussian_kde(segment) + x = np.linspace(segment.min() - 1, segment.max() + 1, len(segment)) + y = pdf(x) + ax_list = list(zip(x, y)) + ax_list = np.array(ax_list, np.float32) + antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] + peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] + try: + min_peak_index = peaks_kde[0] + segment_min_line = ax_list[min_peak_index, 0] + max_peak_index = peaks_kde[1] + segment_max_line = ax_list[max_peak_index, 0] + segment_median = ax_list[antipeaks_kde[0], 0] + except IndexError: + segment_max_line = max_jump + segment_min_line = min_jump + segment_median = (max_jump - min_jump) / 2 + min_jump + return segment_median, segment_max_line, segment_min_line diff --git a/analytics/analytics/utils/segments.py b/analytics/analytics/utils/segments.py index 1d969d2..8d75067 100644 --- a/analytics/analytics/utils/segments.py +++ b/analytics/analytics/utils/segments.py @@ -2,7 +2,7 @@ import pandas as pd from utils.common import timestamp_to_index -def parse_segment(segment, dataframe): +def parse_segment(segment: dict, dataframe: pd.DataFrame): start = timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) end = timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) data = dataframe['value'][start: end + 1]