From 6641a5e52c21c850da2d5ba1548af7c4d0147c3f Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Fri, 7 Dec 2018 00:19:44 +0300 Subject: [PATCH] The center of drop and jump patterns is shifted from real value #291 (#292) * fix centers in jumps and drops * coeff * add simple docstring * fix utils * fix spaces * fix names --- analytics/analytics/utils/common.py | 58 +++++++++++++++-------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 92f1616..000aadc 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -7,6 +7,9 @@ from scipy.stats import gaussian_kde from typing import Union import utils +SHIFT_FACTOR = 0.05 +CONFIDENCE_FACTOR = 0.2 + def exponential_smoothing(series, alpha): result = [series[0]] if np.isnan(result): @@ -62,12 +65,12 @@ def segments_box(segments): max_time = pd.to_datetime(max_time, unit='ms') return min_time, max_time -def intersection_segment(data, median): +def find_intersections(data: pd.Series, median: float) -> list: """ - Finds all intersections between flatten data and median + Finds all intersections between drop pattern data and median """ cen_ind = [] - for i in range(1, len(data)-1): + for i in range(1, len(data) - 1): if data[i - 1] < median and data[i + 1] > median: cen_ind.append(i) del_ind = [] @@ -181,18 +184,21 @@ def find_drop_length(segment_data, min_line, max_line): print("retard alert!") return 0 -def drop_intersection(segment_data, median_line): - x = np.arange(0, len(segment_data)) - f = [] - for i in range(len(segment_data)): - f.append(median_line) - f = np.array(f) - g = [] - for i in segment_data: - g.append(i) - g = np.array(g) - idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 - return idx +def find_drop_intersections(segment_data: pd.Series, median_line: float) -> list: + """ + Finds all intersections between flatten data and median + """ + cen_ind = [] + for i in range(1, len(segment_data)-1): + if segment_data[i - 1] > median_line and segment_data[i + 1] < median_line: + cen_ind.append(i) + # Delete close values except the last one + del_ind = [] + for i in range(1, len(cen_ind)): + if cen_ind[i] == cen_ind[i - 1] + 1: + del_ind.append(i - 1) + + return [x for (idx, x) in enumerate(cen_ind) if idx not in del_ind] def find_drop(data, height, length): d_list = [] @@ -219,7 +225,6 @@ def peak_finder(data, size): def ar_mean(numbers): return float(sum(numbers)) / max(len(numbers), 1) - def get_av_model(patterns_list): if len(patterns_list) == 0: return [] @@ -295,7 +300,7 @@ def find_confidence(segment: pd.Series) -> float: segment = utils.check_nan_values(segment) segment_min = min(segment) segment_max = max(segment) - return 0.2 * (segment_max - segment_min) + return CONFIDENCE_FACTOR * (segment_max - segment_min) def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: left_bound = center - window_size @@ -329,31 +334,28 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i convolve_list.append(max(convolve_segment)) return convolve_list - def find_jump_parameters(segment_data: pd.Series, segment_from_index: int): flat_segment = segment_data.rolling(window=5).mean() flat_segment_dropna = flat_segment.dropna() segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) - jump_height = 0.95 * (segment_max_line - segment_min_line) + jump_height = (1 - SHIFT_FACTOR) * (segment_max_line - segment_min_line) jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) # finds all interseprions with median - cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) + cen_ind = utils.find_intersections(segment_data.tolist(), segment_median) jump_center = cen_ind[0] - segment_cent_index = jump_center - 5 + segment_from_index + segment_cent_index = jump_center + segment_from_index return segment_cent_index, jump_height, jump_length - def find_drop_parameters(segment_data: pd.Series, segment_from_index: int): flat_segment = segment_data.rolling(window=5).mean() flat_segment_dropna = flat_segment.dropna() segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) - drop_height = 0.95 * (segment_max_line - segment_min_line) + drop_height = (1 - SHIFT_FACTOR) * (segment_max_line - segment_min_line) drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line) - cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median) + cen_ind = utils.find_drop_intersections(segment_data.tolist(), segment_median) drop_center = cen_ind[0] - segment_cent_index = drop_center - 5 + segment_from_index + segment_cent_index = drop_center + segment_from_index return segment_cent_index, drop_height, drop_length - def get_distribution_density(segment: pd.Series) -> float: min_jump = min(segment) max_jump = max(segment) @@ -371,7 +373,7 @@ def get_distribution_density(segment: pd.Series) -> float: segment_max_line = ax_list[max_peak_index, 0] segment_median = ax_list[antipeaks_kde[0], 0] except IndexError: - segment_max_line = max_jump - segment_min_line = min_jump + segment_max_line = max_jump * (1 - SHIFT_FACTOR) + segment_min_line = min_jump * (1 - SHIFT_FACTOR) segment_median = (max_jump - min_jump) / 2 + min_jump return segment_median, segment_max_line, segment_min_line