From cbbc8c6cdd82ca22f1d31fd333096b19a50803d7 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Thu, 20 Dec 2018 06:46:06 +0300 Subject: [PATCH] Error: ValueError - dataset input should have multiple elements #325 (#327) --- analytics/analytics/utils/common.py | 11 ++++-- analytics/tests/test_dataset.py | 12 +++++++ analytics/tests/test_utils.py | 52 +++++++++++++++++++++++------ 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 1c7f5a2..2c97029 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -9,6 +9,7 @@ import utils SHIFT_FACTOR = 0.05 CONFIDENCE_FACTOR = 0.2 +SMOOTHING_FACTOR = 5 def exponential_smoothing(series, alpha): result = [series[0]] @@ -192,6 +193,8 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i return convolve_list def get_distribution_density(segment: pd.Series) -> float: + if len(segment) < 2: + return (0, 0, 0) min_jump = min(segment) max_jump = max(segment) pdf = gaussian_kde(segment) @@ -214,9 +217,11 @@ def get_distribution_density(segment: pd.Series) -> float: return segment_median, segment_max_line, segment_min_line def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]: - flat_segment = segment_data.rolling(window=5).mean() - flat_segment_dropna = flat_segment.dropna() - segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) + segment = segment_data + if len(segment_data) > SMOOTHING_FACTOR * 3: + flat_segment = segment_data.rolling(window = SMOOTHING_FACTOR).mean() + segment = flat_segment.dropna() + segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment) height = 0.95 * (segment_max_line - segment_min_line) length = utils.find_length(segment_data, segment_min_line, segment_max_line, pat_type) cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pat_type) diff --git a/analytics/tests/test_dataset.py b/analytics/tests/test_dataset.py index 77d35bd..0e2ed4f 100644 --- a/analytics/tests/test_dataset.py +++ b/analytics/tests/test_dataset.py @@ -88,6 +88,18 @@ class TestDataset(unittest.TestCase): model.fit(dataframe, segments, dict()) except ValueError: self.fail('Model {} raised unexpectedly'.format(model_name)) + + def test_value_error_dataset_input_should_have_multiple_elements(self): + data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,3.0,3.0,2.0,7.0,8.0,9.0,8.0,7.0,6.0] + dataframe = create_dataframe(data_val) + segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000007, 'to': 1523889000011, 'labeled': True, 'deleted': False}] + + try: + model = models.JumpModel() + model_name = model.__class__.__name__ + model.fit(dataframe, segments, dict()) + except ValueError: + self.fail('Model {} raised unexpectedly'.format(model_name)) if __name__ == '__main__': unittest.main() diff --git a/analytics/tests/test_utils.py b/analytics/tests/test_utils.py index 10f3e5c..faaad63 100644 --- a/analytics/tests/test_utils.py +++ b/analytics/tests/test_utils.py @@ -4,6 +4,8 @@ import numpy as np import pandas as pd import math +RELATIVE_TOLERANCE = 1e-1 + class TestUtils(unittest.TestCase): #example test for test's workflow purposes @@ -14,8 +16,7 @@ class TestUtils(unittest.TestCase): segment = [1, 2, 0, 6, 8, 5, 3] utils_result = utils.find_confidence(segment) result = 1.6 - relative_tolerance = 1e-2 - self.assertTrue(math.isclose(utils_result, result, rel_tol = relative_tolerance)) + self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) def test_confidence_all_nan_value(self): segment = [np.NaN, np.NaN, np.NaN, np.NaN] @@ -25,8 +26,7 @@ class TestUtils(unittest.TestCase): data = [np.NaN, np.NaN, 0, 8] utils_result = utils.find_confidence(data) result = 1.6 - relative_tolerance = 1e-2 - self.assertTrue(math.isclose(utils_result, result, rel_tol = relative_tolerance)) + self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) def test_interval_all_normal_value(self): data = [1, 2, 1, 2, 4, 1, 2, 4, 5, 6] @@ -87,12 +87,6 @@ class TestUtils(unittest.TestCase): self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size_zero), result) - def test_get_distribution_density(self): - segment = [1, 1, 1, 3, 5, 5, 5] - segment = pd.Series(segment) - result = (3, 5, 1) - self.assertEqual(utils.get_distribution_density(segment), result) - def test_find_jump_parameters_center(self): segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] segment = pd.Series(segment) @@ -157,6 +151,44 @@ class TestUtils(unittest.TestCase): result = [] self.assertEqual(utils.find_drop(data, height, length), result) self.assertEqual(utils.find_drop(data, height_zero, length_zero), result) + + def test_get_distribution_density(self): + segment = [1, 1, 1, 3, 5, 5, 5] + segment = pd.Series(segment) + result = (3, 5, 1) + self.assertEqual(utils.get_distribution_density(segment), result) + + def test_get_distribution_density_right(self): + data = [1.0, 5.0, 5.0, 4.0] + data = pd.Series(data) + median = 3.0 + max_line = 5.0 + min_line = 1.0 + utils_result = utils.get_distribution_density(data) + self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) + self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) + self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) + + def test_get_distribution_density_left(self): + data = [1.0, 1.0, 2.0, 1.0, 5.0] + data = pd.Series(data) + median = 3.0 + max_line = 5.0 + min_line = 1.0 + utils_result = utils.get_distribution_density(data) + self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) + self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) + self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) + + def test_get_distribution_density_short_data(self): + data = [1.0, 5.0] + data = pd.Series(data) + segment = [1.0] + segment = pd.Series(segment) + utils_result_data = utils.get_distribution_density(data) + utils_result_segment = utils.get_distribution_density(segment) + self.assertEqual(len(utils_result_data), 3) + self.assertEqual(utils_result_segment, (0, 0, 0)) if __name__ == '__main__': unittest.main()