Error: too many values to unpack #721 (#725)

5 years ago · 96db3bdef8
7 changed files with 162 additions and 76 deletions
--- a/analytics/analytics/analytic_unit_manager.py
+++ b/analytics/analytics/analytic_unit_manager.py
@ -17,7 +17,7 @@ def get_detector_by_type(
    if detector_type == 'pattern':
        return detectors.PatternDetector(analytic_unit_type, analytic_unit_id)
    elif detector_type == 'threshold':
-        return detectors.ThresholdDetector()
+        return detectors.ThresholdDetector(analytic_unit_id)
    elif detector_type == 'anomaly':
        return detectors.AnomalyDetector(analytic_unit_id)
--- a/analytics/analytics/detectors/anomaly_detector.py
+++ b/analytics/analytics/detectors/anomaly_detector.py
@ -1,8 +1,6 @@
 from enum import Enum
 import logging
 import numpy as np
 import operator
 from collections import OrderedDict
 import pandas as pd
 import math
 from typing import Optional, Union, List, Tuple
@ -27,12 +25,12 @@ class Bound(Enum):
 class AnomalyDetector(ProcessingDetector):
    def __init__(self, analytic_unit_id: AnalyticUnitId):
-        self.analytic_unit_id = analytic_unit_id
+        super().__init__(analytic_unit_id)
        self.bucket = DataBucket()
    def train(self, dataframe: pd.DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache:
        segments = payload.get('segments')
-        enable_bounds: str = payload.get('enableBounds') or 'ALL'
+        enable_bounds = Bound(payload.get('enableBounds') or 'ALL')
        prepared_segments = []
        time_step = utils.find_interval(dataframe)
@ -40,7 +38,7 @@ class AnomalyDetector(ProcessingDetector):
            'confidence': payload['confidence'],
            'alpha': payload['alpha'],
            'timeStep': time_step,
-            'enableBounds': enable_bounds
+            'enableBounds': enable_bounds.value
        }
        if segments is not None:
@ -65,55 +63,53 @@ class AnomalyDetector(ProcessingDetector):
            'cache': new_cache
        }
-    # TODO: ModelCache -> ModelState
+    # TODO: ModelCache -> DetectorState
    def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult:
        if cache == None:
            raise f'Analytic unit {self.analytic_unit_id} got empty cache'
        data = dataframe['value']
        time_step = cache['timeStep']
        segments = cache.get('segments')
        enable_bounds: str = cache.get('enableBounds') or 'ALL'
-        smoothed_data = utils.exponential_smoothing(data, cache['alpha'])
+        # TODO: use class for cache to avoid using string literals
- 
+        alpha = self.get_value_from_cache(cache, 'alpha', required = True)
-        # TODO: use class for cache to avoid using string literals and Bound.TYPE.value
+        confidence = self.get_value_from_cache(cache, 'confidence', required = True)
-        bounds = OrderedDict()
+        segments = self.get_value_from_cache(cache, 'segments')
-        bounds[Bound.LOWER.value] = ( smoothed_data - cache['confidence'], operator.lt )
+        enable_bounds = Bound(self.get_value_from_cache(cache, 'enableBounds') or 'ALL')
        bounds[Bound.UPPER.value] = ( smoothed_data + cache['confidence'], operator.gt )
-        if enable_bounds == Bound.LOWER.value:
+        smoothed_data = utils.exponential_smoothing(data, alpha)
            del bounds[Bound.UPPER.value]
        if enable_bounds == Bound.UPPER.value:
            del bounds[Bound.LOWER.value]
        lower_bound = smoothed_data - confidence
        upper_bound = smoothed_data + confidence
        if segments is not None:
-            seasonality = cache.get('seasonality')
+            time_step = self.get_value_from_cache(cache, 'timeStep', required = True)
-            assert seasonality is not None and seasonality > 0, \
+            seasonality = self.get_value_from_cache(cache, 'seasonality', required = True)
            assert seasonality > 0, \
                f'{self.analytic_unit_id} got invalid seasonality {seasonality}'
            data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0])
            data_second_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][1])
            for segment in segments:
                seasonality_index = seasonality // time_step
                season_count = math.ceil(abs(segment['from'] - data_start_time) / seasonality)
                start_seasonal_segment = segment['from'] + seasonality * season_count
                seasonality_offset = (abs(start_seasonal_segment - data_start_time) % seasonality) // time_step
-                #TODO: upper and lower bounds for segment_data
+
                segment_data = pd.Series(segment['data'])
-                for bound_type, bound_data in bounds.items():
+
-                    bound_data, _ = bound_data
+                lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER)
-                    bounds[bound_type] = self.add_season_to_data(bound_data, segment_data, seasonality_offset, seasonality_index, bound_type)
+                upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER)
                    assert len(smoothed_data) == len(bounds[bound_type]), \
                        f'len smoothed {len(smoothed_data)} != len seasonality {len(bounds[bound_type])}'
        anomaly_indexes = []
        for idx, val in enumerate(data.values):
-            for bound_type, bound_data in bounds.items():
+            if val > upper_bound.values[idx]:
-                bound_data, comparator = bound_data
+                if enable_bounds == Bound.UPPER or enable_bounds == Bound.ALL:
                if comparator(val, bound_data.values[idx]):
                    anomaly_indexes.append(data.index[idx])
            if val < lower_bound.values[idx]:
                if enable_bounds == Bound.LOWER or enable_bounds == Bound.ALL:
                    anomaly_indexes.append(data.index[idx])
        # TODO: use Segment in utils
        segments = utils.close_filtering(anomaly_indexes, 1)
        segments = utils.get_start_and_end_of_segments(segments)
@ -176,34 +172,27 @@ class AnomalyDetector(ProcessingDetector):
        result.segments = utils.merge_intersecting_segments(result.segments, time_step)
        return result
-    # TODO: ModelCache -> ModelState (don't use string literals)
+    # TODO: remove duplication with detect()
    def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> AnomalyProcessingResult:
-        segments = cache.get('segments')
+        segments = self.get_value_from_cache(cache, 'segments')
-        enable_bounds: str = cache.get('enableBounds') or 'ALL'
+        alpha = self.get_value_from_cache(cache, 'alpha', required = True)
        confidence = self.get_value_from_cache(cache, 'confidence', required = True)
        enable_bounds = Bound(self.get_value_from_cache(cache, 'enableBounds') or 'ALL')
        # TODO: exponential_smoothing should return dataframe with related timestamps
-        smoothed_data = utils.exponential_smoothing(dataframe['value'], cache['alpha'])
+        smoothed_data = utils.exponential_smoothing(dataframe['value'], alpha)
        bounds = OrderedDict()
        bounds[Bound.LOWER.value] = smoothed_data - cache['confidence']
        bounds[Bound.UPPER.value] = smoothed_data + cache['confidence']
-        if enable_bounds == Bound.LOWER.value:
+        lower_bound = smoothed_data - confidence
-            del bounds[Bound.UPPER.value]
+        upper_bound = smoothed_data + confidence
        if enable_bounds == Bound.UPPER.value:
            del bounds[Bound.LOWER.value]
        # TODO: remove duplication with detect()
        if segments is not None:
-            seasonality = cache.get('seasonality')
+            seasonality = self.get_value_from_cache(cache, 'seasonality', required = True)
-            assert seasonality is not None and seasonality > 0, \
+            assert seasonality > 0, \
                f'{self.analytic_unit_id} got invalid seasonality {seasonality}'
            data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0])
-            time_step = cache['timeStep']
+
            time_step = self.get_value_from_cache(cache, 'timeStep', required = True)
            for segment in segments:
                seasonality_index = seasonality // time_step
@ -212,19 +201,22 @@ class AnomalyDetector(ProcessingDetector):
                start_seasonal_segment = segment['from'] + seasonality * season_count
                seasonality_offset = (abs(start_seasonal_segment - data_start_time) % seasonality) // time_step
                segment_data = pd.Series(segment['data'])
-                for bound_type, bound_data in bounds.items():
+
-                    bounds[bound_type] = self.add_season_to_data(bound_data, segment_data, seasonality_offset, seasonality_index, bound_type)
+                lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER)
-                    assert len(smoothed_data) == len(bounds[bound_type]), \
+                upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER)
                        f'len smoothed {len(smoothed_data)} != len seasonality {len(bounds[bound_type])}'
                # TODO: support multiple segments
        timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp)
-        result_bounds = {}
+        lower_bound_timeseries = list(zip(timestamps, lower_bound.values.tolist()))
-        for bound_type, bound_data in bounds.items():
+        upper_bound_timeseries = list(zip(timestamps, upper_bound.values.tolist()))
-            result_bounds[bound_type] = list(zip(timestamps, bound_data.values.tolist()))
+
-        result = AnomalyProcessingResult(lower_bound=result_bounds.get(Bound.LOWER.value), upper_bound=result_bounds.get(Bound.UPPER.value))
+        if enable_bounds == Bound.ALL:
-        return result
+            return AnomalyProcessingResult(lower_bound_timeseries, upper_bound_timeseries)
        elif enable_bounds == Bound.UPPER:
            return AnomalyProcessingResult(upper_bound = upper_bound_timeseries)
        elif enable_bounds == Bound.LOWER:
            return AnomalyProcessingResult(lower_bound = lower_bound_timeseries)
    def add_season_to_data(self, data: pd.Series, segment: pd.Series, offset: int, seasonality: int, bound_type: Bound) -> pd.Series:
        #data - smoothed data to which seasonality will be added
@ -236,14 +228,14 @@ class AnomalyDetector(ProcessingDetector):
                #TODO: add seasonality for non empty parts
                continue
            if (idx - offset) % seasonality == 0:
-                if bound_type == Bound.UPPER.value:
+                if bound_type == Bound.UPPER:
                    upper_segment_bound = self.get_bounds_for_segment(segment)[0]
                    data = data.add(pd.Series(upper_segment_bound.values, index = segment.index + idx), fill_value = 0)
-                elif bound_type == Bound.LOWER.value:
+                elif bound_type == Bound.LOWER:
                    lower_segment_bound = self.get_bounds_for_segment(segment)[1]
                    data = data.add(pd.Series(lower_segment_bound.values * -1, index = segment.index + idx), fill_value = 0)
                else:
-                    raise ValueError(f'unknown {bound_type}')
+                    raise ValueError(f'unknown bound type: {bound_type.value}')
        return data[:len_smoothed_data]
--- a/analytics/analytics/detectors/detector.py
+++ b/analytics/analytics/detectors/detector.py
@ -2,13 +2,16 @@ from abc import ABC, abstractmethod
 from pandas import DataFrame
 from typing import Optional, Union, List
-from analytic_types import ModelCache, TimeSeries
+from analytic_types import ModelCache, TimeSeries, AnalyticUnitId
 from analytic_types.detector_typing import DetectionResult, ProcessingResult
 from analytic_types.segment import Segment
 class Detector(ABC):
    def __init__(self, analytic_unit_id: AnalyticUnitId):
        self.analytic_unit_id = analytic_unit_id
    @abstractmethod
    def train(self, dataframe: DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache:
        """
@ -39,6 +42,12 @@ class Detector(ABC):
            result.cache = detection.cache
        return result
    def get_value_from_cache(self, cache: ModelCache, key: str, required = False):
        value = cache.get(key)
        if value == None and required:
            raise ValueError(f'Missing required "{key}" field in cache for analytic unit {self.analytic_unit_id}')
        return value
 class ProcessingDetector(Detector):
--- a/analytics/analytics/detectors/pattern_detector.py
+++ b/analytics/analytics/detectors/pattern_detector.py
@ -41,7 +41,7 @@ class PatternDetector(Detector):
    DEFAULT_WINDOW_SIZE = 1
    def __init__(self, pattern_type: str, analytic_unit_id: AnalyticUnitId):
-        self.analytic_unit_id = analytic_unit_id
+        super().__init__(analytic_unit_id)
        self.pattern_type = pattern_type
        self.model = resolve_model_by_pattern(self.pattern_type)
        self.bucket = DataBucket()
--- a/analytics/analytics/detectors/threshold_detector.py
+++ b/analytics/analytics/detectors/threshold_detector.py
@ -5,7 +5,7 @@ import pandas as pd
 import numpy as np
 from typing import Optional, List
-from analytic_types import ModelCache
+from analytic_types import ModelCache, AnalyticUnitId
 from analytic_types.detector_typing import DetectionResult
 from analytic_types.segment import Segment
 from detectors import Detector
@ -20,8 +20,8 @@ class ThresholdDetector(Detector):
    WINDOW_SIZE = 3
-    def __init__(self):
+    def __init__(self, analytic_unit_id: AnalyticUnitId):
-        pass
+        super().__init__(analytic_unit_id)
    def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache:
        time_step = utils.find_interval(dataframe)
--- a/analytics/analytics/utils/common.py
+++ b/analytics/analytics/utils/common.py
@ -36,6 +36,9 @@ def exponential_smoothing(series: pd.Series, alpha: float, last_smoothed_value:
            series.values[n] = result[n]
        else:
            result.append(alpha * series[n] + (1 - alpha) * result[n - 1])
    assert len(result) == len(series), \
        f'len of smoothed data {len(result)} != len of original dataset {len(series)}'
    return pd.Series(result, index = series.index)
 def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) -> list:
--- a/analytics/tests/test_detectors.py
+++ b/analytics/tests/test_detectors.py
@ -2,7 +2,7 @@ import unittest
 import pandas as pd
 from detectors import pattern_detector, threshold_detector, anomaly_detector
-from analytic_types.detector_typing import DetectionResult
+from analytic_types.detector_typing import DetectionResult, ProcessingResult
 class TestPatternDetector(unittest.TestCase):
@ -10,10 +10,9 @@ class TestPatternDetector(unittest.TestCase):
        data = [[0,1], [1,2]]
        dataframe = pd.DataFrame(data, columns=['timestamp', 'values'])
-        cache = {'windowSize': 10}
+        cache = { 'windowSize': 10 }
        detector = pattern_detector.PatternDetector('GENERAL', 'test_id')
        with self.assertRaises(ValueError):
            detector.detect(dataframe, cache)
@ -22,8 +21,8 @@ class TestThresholdDetector(unittest.TestCase):
    def test_invalid_cache(self):
-        detector = threshold_detector.ThresholdDetector()
+        detector = threshold_detector.ThresholdDetector('test_id')
-        
+
        with self.assertRaises(ValueError):
            detector.detect([], None)
@ -33,7 +32,7 @@ class TestThresholdDetector(unittest.TestCase):
 class TestAnomalyDetector(unittest.TestCase):
-    def test_dataframe(self):
+    def test_detect(self):
        data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1]
        data_ind = [1523889000000 + i for i in range(len(data_val))]
        data = {'timestamp': data_ind, 'value': data_val}
@ -45,8 +44,91 @@ class TestAnomalyDetector(unittest.TestCase):
            'timeStep': 1
        }
        detector = anomaly_detector.AnomalyDetector('test_id')
        detect_result: DetectionResult = detector.detect(dataframe, cache)
        detect_result: DetectionResult = detector.detect(dataframe, cache)
        detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments))
        result = [{ 'from': 1523889000005.0, 'to': 1523889000005.0 }]
        self.assertEqual(result, detected_segments)
        cache =  {
            'confidence': 2,
            'alpha': 0.1,
            'timeStep': 1,
            'seasonality': 4,
            'segments': [{ 'from': 1523889000001, 'to': 1523889000002, 'data': [10] }]
        }
        detect_result: DetectionResult = detector.detect(dataframe, cache)
        detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments))
        result = []
        self.assertEqual(result, detected_segments)
    def test_process_data(self):
        data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1]
        data_ind = [1523889000000 + i for i in range(len(data_val))]
        data = {'timestamp': data_ind, 'value': data_val}
        dataframe = pd.DataFrame(data = data)
        dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms')
        cache =  {
            'confidence': 2,
            'alpha': 0.1,
            'timeStep': 1
        }
        detector = anomaly_detector.AnomalyDetector('test_id')
        detect_result: ProcessingResult = detector.process_data(dataframe, cache)
        expected_result = {
            'lowerBound': [
                (1523889000000, -2.0),
                (1523889000001, -1.9),
                (1523889000002, -1.71),
                (1523889000003, -1.6389999999999998),
                (1523889000004, -1.4750999999999999),
                (1523889000005, -0.5275899999999998),
                (1523889000006, -0.5748309999999996),
                (1523889000007, -0.5173478999999996),
                (1523889000008, -0.5656131099999995)
            ],
            'upperBound': [
                (1523889000000, 2.0),
                (1523889000001, 2.1),
                (1523889000002, 2.29),
                (1523889000003, 2.361),
                (1523889000004, 2.5249),
                (1523889000005, 3.47241),
                (1523889000006, 3.4251690000000004),
                (1523889000007, 3.4826521),
                (1523889000008, 3.4343868900000007)
            ]}
        self.assertEqual(detect_result.to_json(), expected_result)
        cache =  {
            'confidence': 2,
            'alpha': 0.1,
            'timeStep': 1,
            'seasonality': 5,
            'segments': [{ 'from': 1523889000001, 'to': 1523889000002, 'data': [1] }]
        }
        detect_result: ProcessingResult = detector.process_data(dataframe, cache)
        expected_result = {
            'lowerBound': [
                (1523889000000, -2.0),
                (1523889000001, -2.9),
                (1523889000002, -1.71),
                (1523889000003, -1.6389999999999998),
                (1523889000004, -1.4750999999999999),
                (1523889000005, -0.5275899999999998),
                (1523889000006, -1.5748309999999996),
                (1523889000007, -0.5173478999999996),
                (1523889000008, -0.5656131099999995)
            ],
            'upperBound': [
                (1523889000000, 2.0),
                (1523889000001, 3.1),
                (1523889000002, 2.29),
                (1523889000003, 2.361),
                (1523889000004, 2.5249),
                (1523889000005, 3.47241),
                (1523889000006, 4.425169),
                (1523889000007, 3.4826521),
                (1523889000008, 3.4343868900000007)
            ]}
        self.assertEqual(detect_result.to_json(), expected_result)