diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py index 93617ef..5ef90df 100644 --- a/analytics/analytics/models/drop_model.py +++ b/analytics/analytics/models/drop_model.py @@ -81,7 +81,7 @@ class DropModel(Model): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if percent_of_nans > 0.5: + if len(convol_data) == 0 or percent_of_nans > 0.5: delete_list.append(segment) continue elif 0 < percent_of_nans <= 0.5: diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index 36ee142..8465c33 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -63,7 +63,8 @@ class GeneralModel(Model): data = utils.cut_dataframe(dataframe) data = data['value'] pat_data = self.model_gen - y = max(pat_data) + if pat_data.count(0) == len(pat_data): + raise ValueError('Labeled patterns must not be empty') for i in range(self.state['WINDOW_SIZE'] * 2, len(data)): watch_data = data[i - self.state['WINDOW_SIZE'] * 2: i] diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py index e1d0da1..deb08f8 100644 --- a/analytics/analytics/models/jump_model.py +++ b/analytics/analytics/models/jump_model.py @@ -87,7 +87,7 @@ class JumpModel(Model): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if percent_of_nans > 0.5: + if len(convol_data) == 0 or percent_of_nans > 0.5: delete_list.append(segment) continue elif 0 < percent_of_nans <= 0.5: diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 8e7002e..98a0614 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -5,6 +5,7 @@ from scipy.fftpack import fft from scipy.signal import argrelextrema from scipy.stats import gaussian_kde from scipy.stats.stats import pearsonr +import math from typing import Union import utils @@ -81,7 +82,6 @@ def ar_mean(numbers): def get_av_model(patterns_list): if len(patterns_list) == 0: return [] - x = len(patterns_list[0]) if len(patterns_list) > 1 and len(patterns_list[1]) != x: raise NameError( @@ -210,8 +210,10 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i labeled_segment = utils.check_nan_values(labeled_segment) auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment) convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model) - convolve_list.append(max(auto_convolve)) - convolve_list.append(max(convolve_segment)) + if len(auto_convolve) > 0: + convolve_list.append(max(auto_convolve)) + if len(convolve_segment) > 0: + convolve_list.append(max(convolve_segment)) return convolve_list def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: @@ -228,7 +230,7 @@ def get_correlation(segments: list, av_model: list, data: pd.Series, window_size return correlation_list def get_distribution_density(segment: pd.Series) -> float: - if len(segment) < 2: + if len(segment) < 2 or len(segment.nonzero()[0]) == 0: return (0, 0, 0) min_jump = min(segment) max_jump = max(segment) @@ -264,8 +266,11 @@ def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: def find_pattern_center(segment_data: pd.Series, segment_from_index: int, pattern_type: str): segment_median = utils.get_distribution_density(segment_data)[0] cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pattern_type) - pat_center = cen_ind[0] - segment_cent_index = pat_center + segment_from_index + if len(cen_ind) > 0: + pat_center = cen_ind[0] + segment_cent_index = pat_center + segment_from_index + else: + segment_cent_index = math.ceil((len(segment_data)) / 2) return segment_cent_index def find_length(segment_data: pd.Series, segment_min_line: float, segment_max_line: float, pat_type: str) -> int: diff --git a/analytics/tests/test_dataset.py b/analytics/tests/test_dataset.py index 047a4c8..487bac7 100644 --- a/analytics/tests/test_dataset.py +++ b/analytics/tests/test_dataset.py @@ -90,6 +90,32 @@ class TestDataset(unittest.TestCase): except ValueError: self.fail('Model {} raised unexpectedly'.format(model_name)) + def test_jump_empty_segment(self): + data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + dataframe = create_dataframe(data_val) + segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, + {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] + + try: + model = models.JumpModel() + model_name = model.__class__.__name__ + model.fit(dataframe, segments, dict()) + except ValueError: + self.fail('Model {} raised unexpectedly'.format(model_name)) + + def test_drop_empty_segment(self): + data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + dataframe = create_dataframe(data_val) + segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, + {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] + + try: + model = models.DropModel() + model_name = model.__class__.__name__ + model.fit(dataframe, segments, dict()) + except ValueError: + self.fail('Model {} raised unexpectedly'.format(model_name)) + def test_value_error_dataset_input_should_have_multiple_elements(self): data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,3.0,3.0,2.0,7.0,8.0,9.0,8.0,7.0,6.0] dataframe = create_dataframe(data_val) @@ -125,6 +151,22 @@ class TestDataset(unittest.TestCase): self.assertTrue(np.isnan(item)) for item in return_data_none.value: self.assertTrue(np.isnan(item)) + + def test_three_value_segment(self): + data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 4.0] + dataframe = create_dataframe(data_val) + segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}] + + model_instances = [ + models.GeneralModel(), + models.PeakModel(), + ] + try: + for model in model_instances: + model_name = model.__class__.__name__ + model.fit(dataframe, segments, dict()) + except ValueError: + self.fail('Model {} raised unexpectedly'.format(model_name)) if __name__ == '__main__': unittest.main() diff --git a/analytics/tests/test_utils.py b/analytics/tests/test_utils.py index 03bd30f..c1d24f7 100644 --- a/analytics/tests/test_utils.py +++ b/analytics/tests/test_utils.py @@ -196,6 +196,17 @@ class TestUtils(unittest.TestCase): median = 3.0 result = 3 self.assertEqual(result, utils.find_pattern_center(data, 0, 'jump')) + + def test_get_convolve_wrong_index(self): + data = [1.0, 5.0, 2.0, 1.0, 6.0, 2.0] + data = pd.Series(data) + segemnts = [1, 11] + av_model = [0.0, 4.0, 0.0] + window_size = 1 + try: + utils.get_convolve(segemnts, av_model, data, window_size) + except ValueError: + self.fail('Method get_convolve raised unexpectedly') if __name__ == '__main__': unittest.main()