diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 1a5e34b..41fd4d6 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -26,33 +26,45 @@ class DropModel(Model): 'conv_del_min': 54000, 'conv_del_max': 55000, } + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + data = dataframe['value'] + segment = data[start: end] + segment_center_index = utils.find_pattern_center(segment, start, 'drop') + return segment_center_index def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] convolve_list = [] + correlation_list = [] drop_height_list = [] drop_length_list = [] patterns_list = [] - + pattern_timestamp = [] for segment in labeled_segments: - confidence = utils.find_confidence(segment.data) + confidence = utils.find_confidence(segment.data)[0] confidences.append(confidence) - segment_cent_index, drop_height, drop_length = utils.find_parameters(segment.data, segment.start, 'drop') + segment_cent_index = segment.center_index + drop_height, drop_length = utils.find_parameters(segment.data, segment.start, 'drop') drop_height_list.append(drop_height) drop_length_list.append(drop_length) self.idrops.append(segment_cent_index) + pattern_timestamp.append(segment.pattern_timestamp) labeled_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) labeled_drop = utils.subtract_min_without_nan(labeled_drop) patterns_list.append(labeled_drop) self.model_drop = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE']) + correlation_list = utils.get_correlation(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE']) del_conv_list = [] + delete_pattern_timestamp = [] for segment in deleted_segments: - segment_cent_index = utils.find_parameters(segment.data, segment.start, 'drop')[0] + segment_cent_index = segment.center_index + delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) deleted_drop = utils.subtract_min_without_nan(deleted_drop) del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop) diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index e6fef80..31ae277 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -26,25 +26,37 @@ class GeneralModel(Model): 'conv_del_max': 120, } self.all_conv = [] + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + data = dataframe['value'] + segment = data[start: end] + center_ind = start + math.ceil((end - start) / 2) + return center_ind def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] convolve_list = [] + correlation_list = [] patterns_list = [] + pattern_timestamp = [] for segment in labeled_segments: - center_ind = segment.start + math.ceil(segment.length / 2) + center_ind = segment.center_index self.ipats.append(center_ind) + pattern_timestamp.append(segment.pattern_timestamp) segment_data = utils.get_interval(data, center_ind, self.state['WINDOW_SIZE']) segment_data = utils.subtract_min_without_nan(segment_data) patterns_list.append(segment_data) self.model_gen = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE']) + correlation_list = utils.get_correlation(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE']) del_conv_list = [] + delete_pattern_timestamp = [] for segment in deleted_segments: - del_mid_index = segment.start + math.ceil(segment.length / 2) + del_mid_index = segment.center_index + delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) deleted_pat = utils.subtract_min_without_nan(deleted_pat) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen) diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index 8a6207e..8b15731 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -27,32 +27,45 @@ class JumpModel(Model): 'conv_del_min': 54000, 'conv_del_max': 55000, } + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + data = dataframe['value'] + segment = data[start: end] + segment_center_index = utils.find_pattern_center(segment, start, 'jump') + return segment_center_index def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] convolve_list = [] + correlation_list = [] jump_height_list = [] jump_length_list = [] patterns_list = [] + pattern_timestamp = [] for segment in labeled_segments: - confidence = utils.find_confidence(segment.data) + confidence = utils.find_confidence(segment.data)[0] confidences.append(confidence) - segment_cent_index, jump_height, jump_length = utils.find_parameters(segment.data, segment.start, 'jump') + segment_cent_index = segment.center_index + jump_height, jump_length = utils.find_parameters(segment.data, segment.start, 'jump') jump_height_list.append(jump_height) jump_length_list.append(jump_length) self.ijumps.append(segment_cent_index) + pattern_timestamp.append(segment.pattern_timestamp) labeled_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) labeled_jump = utils.subtract_min_without_nan(labeled_jump) patterns_list.append(labeled_jump) self.model_jump = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE']) + correlation_list = utils.get_correlation(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE']) del_conv_list = [] + delete_pattern_timestamp = [] for segment in deleted_segments: - segment_cent_index = utils.find_parameters(segment.data, segment.start, 'jump')[0] + segment_cent_index = segment.center_index + delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE']) deleted_jump = utils.subtract_min_without_nan(deleted_jump) del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump) diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index 9e8951a..c98cf63 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -12,12 +12,19 @@ class Segment(AttrDict): __percent_of_nans = 0 - def __init__(self, dataframe: pd.DataFrame, segment_map: dict): + def __init__(self, dataframe: pd.DataFrame, segment_map: dict, center_finder = None): self.update(segment_map) self.start = utils.timestamp_to_index(dataframe, pd.to_datetime(self['from'], unit='ms')) self.end = utils.timestamp_to_index(dataframe, pd.to_datetime(self['to'], unit='ms')) self.length = abs(self.end - self.start) + if callable(center_finder): + self.center_index = center_finder(dataframe, self.start, self.end) + self.pattern_timestamp = dataframe['timestamp'][self.center_index] + else: + self.center_index = self.start + math.ceil(self.length / 2) + self.pattern_timestamp = dataframe['timestamp'][self.center_index] + assert len(dataframe['value']) >= self.end + 1, \ 'segment {}-{} out of dataframe length={}'.format(self.start, self.end+1, len(dataframe['value'])) @@ -43,6 +50,10 @@ class Model(ABC): def do_detect(self, dataframe: pd.DataFrame) -> list: pass + @abstractmethod + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + pass + def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[ModelCache]) -> ModelCache: if type(cache) is ModelCache: self.state = cache @@ -52,12 +63,11 @@ class Model(ABC): deleted = [] for segment_map in segments: if segment_map['labeled'] or segment_map['deleted']: - segment = Segment(dataframe, segment_map) + segment = Segment(dataframe, segment_map, self.find_segment_center) if segment.percent_of_nans > 0.1 or len(segment.data) == 0: continue if segment.percent_of_nans > 0: segment.convert_nan_to_zero() - max_length = max(segment.length, max_length) if segment.labeled: labeled.append(segment) if segment.deleted: deleted.append(segment) diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py index 30fc17d..c63ee01 100644 --- a/analytics/analytics/models/peak_model.py +++ b/analytics/analytics/models/peak_model.py @@ -26,32 +26,51 @@ class PeakModel(Model): 'conv_del_min': 54000, 'conv_del_max': 55000, } + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + data = dataframe['value'] + segment = data[start: end] + return segment.idxmax() def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] convolve_list = [] + correlation_list = [] patterns_list = [] + pattern_width = [] + pattern_height = [] + pattern_timestamp = [] for segment in labeled_segments: - confidence = utils.find_confidence(segment.data) + confidence = utils.find_confidence(segment.data)[0] confidences.append(confidence) - segment_max_index = segment.data.idxmax() + segment_max_index = segment.center_index self.ipeaks.append(segment_max_index) + pattern_timestamp.append(segment.pattern_timestamp) labeled = utils.get_interval(data, segment_max_index, self.state['WINDOW_SIZE']) labeled = utils.subtract_min_without_nan(labeled) patterns_list.append(labeled) + pattern_height.append(utils.find_confidence(labeled)[1]) + pattern_width.append(utils.find_width(labeled, True)) self.model = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.ipeaks, self.model, data, self.state['WINDOW_SIZE']) + correlation_list = utils.get_correlation(self.ipeaks, self.model, data, self.state['WINDOW_SIZE']) del_conv_list = [] + delete_pattern_width = [] + delete_pattern_height = [] + delete_pattern_timestamp = [] for segment in deleted_segments: - del_max_index = segment.data.idxmax() + del_max_index = segment.center_index + delete_pattern_timestamp.append(segment.pattern_timestamp) deleted = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE']) deleted = utils.subtract_min_without_nan(deleted) del_conv = scipy.signal.fftconvolve(deleted, self.model) if len(del_conv): del_conv_list.append(max(del_conv)) + delete_pattern_height.append(utils.find_confidence(deleted)[1]) + delete_pattern_width.append(utils.find_width(deleted, True)) self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py index 0b89591..c2e9bf7 100644 --- a/analytics/analytics/models/trough_model.py +++ b/analytics/analytics/models/trough_model.py @@ -26,33 +26,51 @@ class TroughModel(Model): 'conv_del_min': 54000, 'conv_del_max': 55000, } + + def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: + data = dataframe['value'] + segment = data[start: end] + return segment.idxmin() def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] confidences = [] convolve_list = [] + correlation_list = [] patterns_list = [] - + pattern_width = [] + pattern_height = [] + pattern_timestamp = [] for segment in labeled_segments: - confidence = utils.find_confidence(segment.data) + confidence = utils.find_confidence(segment.data)[0] confidences.append(confidence) - segment_min_index = segment.data.idxmin() + segment_min_index = segment.center_index self.itroughs.append(segment_min_index) + pattern_timestamp.append(segment.pattern_timestamp) labeled = utils.get_interval(data, segment_min_index, self.state['WINDOW_SIZE']) labeled = utils.subtract_min_without_nan(labeled) patterns_list.append(labeled) + pattern_height.append(utils.find_confidence(labeled)[1]) + pattern_width.append(utils.find_width(labeled, False)) self.model = utils.get_av_model(patterns_list) convolve_list = utils.get_convolve(self.itroughs, self.model, data, self.state['WINDOW_SIZE']) + correlation_list = utils.get_correlation(self.itroughs, self.model, data, self.state['WINDOW_SIZE']) del_conv_list = [] + delete_pattern_width = [] + delete_pattern_height = [] + delete_pattern_timestamp = [] for segment in deleted_segments: - del_min_index = segment.data.idxmin() + del_min_index = segment.center_index + delete_pattern_timestamp.append(segment.pattern_timestamp) deleted = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE']) deleted = utils.subtract_min_without_nan(deleted) del_conv = scipy.signal.fftconvolve(deleted, self.model) if len(del_conv): del_conv_list.append(max(del_conv)) + delete_pattern_height.append(utils.find_confidence(deleted)[1]) + delete_pattern_width.append(utils.find_width(deleted, False)) self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list) diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 33c97b7..259e263 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -4,6 +4,7 @@ import scipy.signal from scipy.fftpack import fft from scipy.signal import argrelextrema from scipy.stats import gaussian_kde +from scipy.stats.stats import pearsonr from typing import Union import utils @@ -154,11 +155,38 @@ def nan_to_zero(segment: Union[pd.Series, list], nan_list: list) -> Union[pd.Ser segment[val] = 0 return segment -def find_confidence(segment: pd.Series) -> float: +def find_confidence(segment: pd.Series) -> (float, float): segment = utils.check_nan_values(segment) segment_min = min(segment) segment_max = max(segment) - return CONFIDENCE_FACTOR * (segment_max - segment_min) + height = segment_max - segment_min + if height: + return (CONFIDENCE_FACTOR * height, height) + else: + return (0, 0) + +def find_width(pattern: pd.Series, selector) -> int: + pattern = pattern.values + center = utils.find_extremum_index(pattern, selector) + pattern_left = pattern[:center] + pattern_right = pattern[center:] + left_extremum_index = utils.find_last_extremum(pattern_left, selector) + right_extremum_index = utils.find_extremum_index(pattern_right, not selector) + left_width = center - left_extremum_index + right_width = right_extremum_index + 1 + return right_width + left_width + +def find_last_extremum(segment: np.ndarray, selector: bool) -> int: + segment = segment[::-1] + first_extremum_ind = find_extremum_index(segment, not selector) + last_extremum_ind = len(segment) - first_extremum_ind - 1 + return last_extremum_ind + +def find_extremum_index(segment: np.ndarray, selector: bool) -> int: + if selector: + return segment.argmax() + else: + return segment.argmin() def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: left_bound = center - window_size @@ -192,6 +220,19 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i convolve_list.append(max(convolve_segment)) return convolve_list +def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: + labeled_segment = [] + correlation_list = [] + p_value_list = [] + for segment in segments: + labeled_segment = utils.get_interval(data, segment, window_size) + labeled_segment = utils.subtract_min_without_nan(labeled_segment) + labeled_segment = utils.check_nan_values(labeled_segment) + correlation = pearsonr(labeled_segment, av_model) + correlation_list.append(correlation[0]) + p_value_list.append(correlation[1]) + return correlation_list + def get_distribution_density(segment: pd.Series) -> float: if len(segment) < 2: return (0, 0, 0) @@ -224,10 +265,14 @@ def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment) height = 0.95 * (segment_max_line - segment_min_line) length = utils.find_length(segment_data, segment_min_line, segment_max_line, pat_type) - cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pat_type) + return height, length + +def find_pattern_center(segment_data: pd.Series, segment_from_index: int, pattern_type: str): + segment_median = utils.get_distribution_density(segment_data)[0] + cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pattern_type) pat_center = cen_ind[0] segment_cent_index = pat_center + segment_from_index - return segment_cent_index, height, length + return segment_cent_index def find_length(segment_data: pd.Series, segment_min_line: float, segment_max_line: float, pat_type: str) -> int: x_abscissa = np.arange(0, len(segment_data)) diff --git a/analytics/tests/test_utils.py b/analytics/tests/test_utils.py index faaad63..03bd30f 100644 --- a/analytics/tests/test_utils.py +++ b/analytics/tests/test_utils.py @@ -14,17 +14,17 @@ class TestUtils(unittest.TestCase): def test_confidence_all_normal_value(self): segment = [1, 2, 0, 6, 8, 5, 3] - utils_result = utils.find_confidence(segment) + utils_result = utils.find_confidence(segment)[0] result = 1.6 self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) def test_confidence_all_nan_value(self): segment = [np.NaN, np.NaN, np.NaN, np.NaN] - self.assertEqual(utils.find_confidence(segment), 0) + self.assertEqual(utils.find_confidence(segment)[0], 0) def test_confidence_with_nan_value(self): data = [np.NaN, np.NaN, 0, 8] - utils_result = utils.find_confidence(data) + utils_result = utils.find_confidence(data)[0] result = 1.6 self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) @@ -91,39 +91,39 @@ class TestUtils(unittest.TestCase): segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] segment = pd.Series(segment) jump_center = [10, 11] - self.assertIn(utils.find_parameters(segment, 0, 'jump')[0], jump_center) + self.assertIn(utils.find_pattern_center(segment, 0, 'jump'), jump_center) def test_find_jump_parameters_height(self): segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] segment = pd.Series(segment) jump_height = [3.5, 4] - self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_height[0]) - self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_height[1]) + self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[0]) + self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[1]) def test_find_jump_parameters_length(self): segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] segment = pd.Series(segment) jump_length = 2 - self.assertEqual(utils.find_parameters(segment, 0, 'jump')[2], jump_length) + self.assertEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_length) def test_find_drop_parameters_center(self): segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] segment = pd.Series(segment) drop_center = [14, 15, 16] - self.assertIn(utils.find_parameters(segment, 0, 'drop')[0], drop_center) + self.assertIn(utils.find_pattern_center(segment, 0, 'drop'), drop_center) def test_find_drop_parameters_height(self): segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] segment = pd.Series(segment) drop_height = [3.5, 4] - self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_height[0]) - self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_height[1]) + self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[0]) + self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[1]) def test_find_drop_parameters_length(self): segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] segment = pd.Series(segment) drop_length = 2 - self.assertEqual(utils.find_parameters(segment, 0, 'drop')[2], drop_length) + self.assertEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_length) def test_get_av_model_empty_data(self): patterns_list = [] @@ -189,6 +189,13 @@ class TestUtils(unittest.TestCase): utils_result_segment = utils.get_distribution_density(segment) self.assertEqual(len(utils_result_data), 3) self.assertEqual(utils_result_segment, (0, 0, 0)) + + def test_find_pattern_jump_center(self): + data = [1.0, 1.0, 1.0, 5.0, 5.0, 5.0] + data = pd.Series(data) + median = 3.0 + result = 3 + self.assertEqual(result, utils.find_pattern_center(data, 0, 'jump')) if __name__ == '__main__': unittest.main()