Browse Source

The center of drop and jump patterns is shifted from real value #291 (#292)

* fix centers in jumps and drops

* coeff

* add simple docstring

* fix utils

* fix spaces

* fix names
pull/1/head
Alexandr Velikiy 6 years ago committed by Alexey Velikiy
parent
commit
6641a5e52c
  1. 58
      analytics/analytics/utils/common.py

58
analytics/analytics/utils/common.py

@ -7,6 +7,9 @@ from scipy.stats import gaussian_kde
from typing import Union
import utils
SHIFT_FACTOR = 0.05
CONFIDENCE_FACTOR = 0.2
def exponential_smoothing(series, alpha):
result = [series[0]]
if np.isnan(result):
@ -62,12 +65,12 @@ def segments_box(segments):
max_time = pd.to_datetime(max_time, unit='ms')
return min_time, max_time
def intersection_segment(data, median):
def find_intersections(data: pd.Series, median: float) -> list:
"""
Finds all intersections between flatten data and median
Finds all intersections between drop pattern data and median
"""
cen_ind = []
for i in range(1, len(data)-1):
for i in range(1, len(data) - 1):
if data[i - 1] < median and data[i + 1] > median:
cen_ind.append(i)
del_ind = []
@ -181,18 +184,21 @@ def find_drop_length(segment_data, min_line, max_line):
print("retard alert!")
return 0
def drop_intersection(segment_data, median_line):
x = np.arange(0, len(segment_data))
f = []
for i in range(len(segment_data)):
f.append(median_line)
f = np.array(f)
g = []
for i in segment_data:
g.append(i)
g = np.array(g)
idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0
return idx
def find_drop_intersections(segment_data: pd.Series, median_line: float) -> list:
"""
Finds all intersections between flatten data and median
"""
cen_ind = []
for i in range(1, len(segment_data)-1):
if segment_data[i - 1] > median_line and segment_data[i + 1] < median_line:
cen_ind.append(i)
# Delete close values except the last one
del_ind = []
for i in range(1, len(cen_ind)):
if cen_ind[i] == cen_ind[i - 1] + 1:
del_ind.append(i - 1)
return [x for (idx, x) in enumerate(cen_ind) if idx not in del_ind]
def find_drop(data, height, length):
d_list = []
@ -219,7 +225,6 @@ def peak_finder(data, size):
def ar_mean(numbers):
return float(sum(numbers)) / max(len(numbers), 1)
def get_av_model(patterns_list):
if len(patterns_list) == 0:
return []
@ -295,7 +300,7 @@ def find_confidence(segment: pd.Series) -> float:
segment = utils.check_nan_values(segment)
segment_min = min(segment)
segment_max = max(segment)
return 0.2 * (segment_max - segment_min)
return CONFIDENCE_FACTOR * (segment_max - segment_min)
def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series:
left_bound = center - window_size
@ -329,31 +334,28 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i
convolve_list.append(max(convolve_segment))
return convolve_list
def find_jump_parameters(segment_data: pd.Series, segment_from_index: int):
flat_segment = segment_data.rolling(window=5).mean()
flat_segment_dropna = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna)
jump_height = 0.95 * (segment_max_line - segment_min_line)
jump_height = (1 - SHIFT_FACTOR) * (segment_max_line - segment_min_line)
jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) # finds all interseprions with median
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median)
cen_ind = utils.find_intersections(segment_data.tolist(), segment_median)
jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment_from_index
segment_cent_index = jump_center + segment_from_index
return segment_cent_index, jump_height, jump_length
def find_drop_parameters(segment_data: pd.Series, segment_from_index: int):
flat_segment = segment_data.rolling(window=5).mean()
flat_segment_dropna = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna)
drop_height = 0.95 * (segment_max_line - segment_min_line)
drop_height = (1 - SHIFT_FACTOR) * (segment_max_line - segment_min_line)
drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line)
cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median)
cen_ind = utils.find_drop_intersections(segment_data.tolist(), segment_median)
drop_center = cen_ind[0]
segment_cent_index = drop_center - 5 + segment_from_index
segment_cent_index = drop_center + segment_from_index
return segment_cent_index, drop_height, drop_length
def get_distribution_density(segment: pd.Series) -> float:
min_jump = min(segment)
max_jump = max(segment)
@ -371,7 +373,7 @@ def get_distribution_density(segment: pd.Series) -> float:
segment_max_line = ax_list[max_peak_index, 0]
segment_median = ax_list[antipeaks_kde[0], 0]
except IndexError:
segment_max_line = max_jump
segment_min_line = min_jump
segment_max_line = max_jump * (1 - SHIFT_FACTOR)
segment_min_line = min_jump * (1 - SHIFT_FACTOR)
segment_median = (max_jump - min_jump) / 2 + min_jump
return segment_median, segment_max_line, segment_min_line

Loading…
Cancel
Save