Browse Source

Error: ValueError - dataset input should have multiple elements #325 (#327)

pull/1/head
Alexandr Velikiy 6 years ago committed by Alexey Velikiy
parent
commit
cbbc8c6cdd
  1. 11
      analytics/analytics/utils/common.py
  2. 12
      analytics/tests/test_dataset.py
  3. 52
      analytics/tests/test_utils.py

11
analytics/analytics/utils/common.py

@ -9,6 +9,7 @@ import utils
SHIFT_FACTOR = 0.05 SHIFT_FACTOR = 0.05
CONFIDENCE_FACTOR = 0.2 CONFIDENCE_FACTOR = 0.2
SMOOTHING_FACTOR = 5
def exponential_smoothing(series, alpha): def exponential_smoothing(series, alpha):
result = [series[0]] result = [series[0]]
@ -192,6 +193,8 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i
return convolve_list return convolve_list
def get_distribution_density(segment: pd.Series) -> float: def get_distribution_density(segment: pd.Series) -> float:
if len(segment) < 2:
return (0, 0, 0)
min_jump = min(segment) min_jump = min(segment)
max_jump = max(segment) max_jump = max(segment)
pdf = gaussian_kde(segment) pdf = gaussian_kde(segment)
@ -214,9 +217,11 @@ def get_distribution_density(segment: pd.Series) -> float:
return segment_median, segment_max_line, segment_min_line return segment_median, segment_max_line, segment_min_line
def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]: def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]:
flat_segment = segment_data.rolling(window=5).mean() segment = segment_data
flat_segment_dropna = flat_segment.dropna() if len(segment_data) > SMOOTHING_FACTOR * 3:
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna) flat_segment = segment_data.rolling(window = SMOOTHING_FACTOR).mean()
segment = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment)
height = 0.95 * (segment_max_line - segment_min_line) height = 0.95 * (segment_max_line - segment_min_line)
length = utils.find_length(segment_data, segment_min_line, segment_max_line, pat_type) length = utils.find_length(segment_data, segment_min_line, segment_max_line, pat_type)
cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pat_type) cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pat_type)

12
analytics/tests/test_dataset.py

@ -89,6 +89,18 @@ class TestDataset(unittest.TestCase):
except ValueError: except ValueError:
self.fail('Model {} raised unexpectedly'.format(model_name)) self.fail('Model {} raised unexpectedly'.format(model_name))
def test_value_error_dataset_input_should_have_multiple_elements(self):
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,3.0,3.0,2.0,7.0,8.0,9.0,8.0,7.0,6.0]
dataframe = create_dataframe(data_val)
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000007, 'to': 1523889000011, 'labeled': True, 'deleted': False}]
try:
model = models.JumpModel()
model_name = model.__class__.__name__
model.fit(dataframe, segments, dict())
except ValueError:
self.fail('Model {} raised unexpectedly'.format(model_name))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

52
analytics/tests/test_utils.py

@ -4,6 +4,8 @@ import numpy as np
import pandas as pd import pandas as pd
import math import math
RELATIVE_TOLERANCE = 1e-1
class TestUtils(unittest.TestCase): class TestUtils(unittest.TestCase):
#example test for test's workflow purposes #example test for test's workflow purposes
@ -14,8 +16,7 @@ class TestUtils(unittest.TestCase):
segment = [1, 2, 0, 6, 8, 5, 3] segment = [1, 2, 0, 6, 8, 5, 3]
utils_result = utils.find_confidence(segment) utils_result = utils.find_confidence(segment)
result = 1.6 result = 1.6
relative_tolerance = 1e-2 self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE))
self.assertTrue(math.isclose(utils_result, result, rel_tol = relative_tolerance))
def test_confidence_all_nan_value(self): def test_confidence_all_nan_value(self):
segment = [np.NaN, np.NaN, np.NaN, np.NaN] segment = [np.NaN, np.NaN, np.NaN, np.NaN]
@ -25,8 +26,7 @@ class TestUtils(unittest.TestCase):
data = [np.NaN, np.NaN, 0, 8] data = [np.NaN, np.NaN, 0, 8]
utils_result = utils.find_confidence(data) utils_result = utils.find_confidence(data)
result = 1.6 result = 1.6
relative_tolerance = 1e-2 self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE))
self.assertTrue(math.isclose(utils_result, result, rel_tol = relative_tolerance))
def test_interval_all_normal_value(self): def test_interval_all_normal_value(self):
data = [1, 2, 1, 2, 4, 1, 2, 4, 5, 6] data = [1, 2, 1, 2, 4, 1, 2, 4, 5, 6]
@ -87,12 +87,6 @@ class TestUtils(unittest.TestCase):
self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result)
self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size_zero), result) self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size_zero), result)
def test_get_distribution_density(self):
segment = [1, 1, 1, 3, 5, 5, 5]
segment = pd.Series(segment)
result = (3, 5, 1)
self.assertEqual(utils.get_distribution_density(segment), result)
def test_find_jump_parameters_center(self): def test_find_jump_parameters_center(self):
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
segment = pd.Series(segment) segment = pd.Series(segment)
@ -158,5 +152,43 @@ class TestUtils(unittest.TestCase):
self.assertEqual(utils.find_drop(data, height, length), result) self.assertEqual(utils.find_drop(data, height, length), result)
self.assertEqual(utils.find_drop(data, height_zero, length_zero), result) self.assertEqual(utils.find_drop(data, height_zero, length_zero), result)
def test_get_distribution_density(self):
segment = [1, 1, 1, 3, 5, 5, 5]
segment = pd.Series(segment)
result = (3, 5, 1)
self.assertEqual(utils.get_distribution_density(segment), result)
def test_get_distribution_density_right(self):
data = [1.0, 5.0, 5.0, 4.0]
data = pd.Series(data)
median = 3.0
max_line = 5.0
min_line = 1.0
utils_result = utils.get_distribution_density(data)
self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE))
self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE))
self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE))
def test_get_distribution_density_left(self):
data = [1.0, 1.0, 2.0, 1.0, 5.0]
data = pd.Series(data)
median = 3.0
max_line = 5.0
min_line = 1.0
utils_result = utils.get_distribution_density(data)
self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE))
self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE))
self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE))
def test_get_distribution_density_short_data(self):
data = [1.0, 5.0]
data = pd.Series(data)
segment = [1.0]
segment = pd.Series(segment)
utils_result_data = utils.get_distribution_density(data)
utils_result_segment = utils.get_distribution_density(segment)
self.assertEqual(len(utils_result_data), 3)
self.assertEqual(utils_result_segment, (0, 0, 0))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save