Browse Source

The center of drop and jump patterns is shifted from real value #291 (#292)

* fix centers in jumps and drops

* coeff

* add simple docstring

* fix utils

* fix spaces

* fix names
pull/1/head
Alexandr Velikiy 6 years ago committed by Alexey Velikiy
parent
commit
6641a5e52c
  1. 56
      analytics/analytics/utils/common.py

56
analytics/analytics/utils/common.py

@ -7,6 +7,9 @@ from scipy.stats import gaussian_kde
from typing import Union from typing import Union
import utils import utils
SHIFT_FACTOR = 0.05
CONFIDENCE_FACTOR = 0.2
def exponential_smoothing(series, alpha): def exponential_smoothing(series, alpha):
result = [series[0]] result = [series[0]]
if np.isnan(result): if np.isnan(result):
@ -62,9 +65,9 @@ def segments_box(segments):
max_time = pd.to_datetime(max_time, unit='ms') max_time = pd.to_datetime(max_time, unit='ms')
return min_time, max_time return min_time, max_time
def intersection_segment(data, median): def find_intersections(data: pd.Series, median: float) -> list:
""" """
Finds all intersections between flatten data and median Finds all intersections between drop pattern data and median
""" """
cen_ind = [] cen_ind = []
for i in range(1, len(data) - 1): for i in range(1, len(data) - 1):
@ -181,18 +184,21 @@ def find_drop_length(segment_data, min_line, max_line):
print("retard alert!") print("retard alert!")
return 0 return 0
def drop_intersection(segment_data, median_line): def find_drop_intersections(segment_data: pd.Series, median_line: float) -> list:
x = np.arange(0, len(segment_data)) """
f = [] Finds all intersections between flatten data and median
for i in range(len(segment_data)): """
f.append(median_line) cen_ind = []
f = np.array(f) for i in range(1, len(segment_data)-1):
g = [] if segment_data[i - 1] > median_line and segment_data[i + 1] < median_line:
for i in segment_data: cen_ind.append(i)
g.append(i) # Delete close values except the last one
g = np.array(g) del_ind = []
idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 for i in range(1, len(cen_ind)):
return idx if cen_ind[i] == cen_ind[i - 1] + 1:
del_ind.append(i - 1)
return [x for (idx, x) in enumerate(cen_ind) if idx not in del_ind]
def find_drop(data, height, length): def find_drop(data, height, length):
d_list = [] d_list = []
@ -219,7 +225,6 @@ def peak_finder(data, size):
def ar_mean(numbers): def ar_mean(numbers):
return float(sum(numbers)) / max(len(numbers), 1) return float(sum(numbers)) / max(len(numbers), 1)
def get_av_model(patterns_list): def get_av_model(patterns_list):
if len(patterns_list) == 0: if len(patterns_list) == 0:
return [] return []
@ -295,7 +300,7 @@ def find_confidence(segment: pd.Series) -> float:
segment = utils.check_nan_values(segment) segment = utils.check_nan_values(segment)
segment_min = min(segment) segment_min = min(segment)
segment_max = max(segment) segment_max = max(segment)
return 0.2 * (segment_max - segment_min) return CONFIDENCE_FACTOR * (segment_max - segment_min)
def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series:
left_bound = center - window_size left_bound = center - window_size
@ -329,31 +334,28 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i
convolve_list.append(max(convolve_segment)) convolve_list.append(max(convolve_segment))
return convolve_list return convolve_list
def find_jump_parameters(segment_data: pd.Series, segment_from_index: int): def find_jump_parameters(segment_data: pd.Series, segment_from_index: int):
flat_segment = segment_data.rolling(window=5).mean() flat_segment = segment_data.rolling(window=5).mean()
flat_segment_dropna = flat_segment.dropna() flat_segment_dropna = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna)
jump_height = 0.95 * (segment_max_line - segment_min_line) jump_height = (1 - SHIFT_FACTOR) * (segment_max_line - segment_min_line)
jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) # finds all interseprions with median jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) # finds all interseprions with median
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) cen_ind = utils.find_intersections(segment_data.tolist(), segment_median)
jump_center = cen_ind[0] jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment_from_index segment_cent_index = jump_center + segment_from_index
return segment_cent_index, jump_height, jump_length return segment_cent_index, jump_height, jump_length
def find_drop_parameters(segment_data: pd.Series, segment_from_index: int): def find_drop_parameters(segment_data: pd.Series, segment_from_index: int):
flat_segment = segment_data.rolling(window=5).mean() flat_segment = segment_data.rolling(window=5).mean()
flat_segment_dropna = flat_segment.dropna() flat_segment_dropna = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna)
drop_height = 0.95 * (segment_max_line - segment_min_line) drop_height = (1 - SHIFT_FACTOR) * (segment_max_line - segment_min_line)
drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line) drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line)
cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median) cen_ind = utils.find_drop_intersections(segment_data.tolist(), segment_median)
drop_center = cen_ind[0] drop_center = cen_ind[0]
segment_cent_index = drop_center - 5 + segment_from_index segment_cent_index = drop_center + segment_from_index
return segment_cent_index, drop_height, drop_length return segment_cent_index, drop_height, drop_length
def get_distribution_density(segment: pd.Series) -> float: def get_distribution_density(segment: pd.Series) -> float:
min_jump = min(segment) min_jump = min(segment)
max_jump = max(segment) max_jump = max(segment)
@ -371,7 +373,7 @@ def get_distribution_density(segment: pd.Series) -> float:
segment_max_line = ax_list[max_peak_index, 0] segment_max_line = ax_list[max_peak_index, 0]
segment_median = ax_list[antipeaks_kde[0], 0] segment_median = ax_list[antipeaks_kde[0], 0]
except IndexError: except IndexError:
segment_max_line = max_jump segment_max_line = max_jump * (1 - SHIFT_FACTOR)
segment_min_line = min_jump segment_min_line = min_jump * (1 - SHIFT_FACTOR)
segment_median = (max_jump - min_jump) / 2 + min_jump segment_median = (max_jump - min_jump) / 2 + min_jump
return segment_median, segment_max_line, segment_min_line return segment_median, segment_max_line, segment_min_line

Loading…
Cancel
Save