Smoothing for anomaly detector #607 (#608)

6 years ago · d58f08c42d
4 changed files with 82 additions and 1 deletions
--- a/analytics/analytics/analytic_unit_manager.py
+++ b/analytics/analytics/analytic_unit_manager.py
@ -19,6 +19,8 @@ def get_detector_by_type(
        return detectors.PatternDetector(analytic_unit_type, analytic_unit_id)
    elif detector_type == 'threshold':
        return detectors.ThresholdDetector()
    elif detector_type == 'anomaly':
        return detectors.AnomalyDetector()
    raise ValueError('Unknown detector type "%s"' % detector_type)
--- a/analytics/analytics/detectors/init.py
+++ b/analytics/analytics/detectors/init.py
@ -1,3 +1,4 @@
 from detectors.detector import Detector
 from detectors.pattern_detector import PatternDetector
 from detectors.threshold_detector import ThresholdDetector
 from detectors.anomaly_detector import AnomalyDetector
--- a/analytics/analytics/detectors/anomaly_detector.py
+++ b/analytics/analytics/detectors/anomaly_detector.py
@ -0,0 +1,78 @@
 import logging
 import pandas as pd
 from typing import Optional, Union, List, Tuple
 from analytic_types.data_bucket import DataBucket
 from detectors import Detector
 from models import ModelCache
 import utils
 logger = logging.getLogger('ANOMALY_DETECTOR')
 class AnomalyDetector(Detector):
    def __init__(self, *args, **kwargs):
        self.bucket = DataBucket()
    def train(self, dataframe: pd.DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache:
        return {
            'cache': {
                'confidence': payload['confidence'],
                'alpha': payload['alpha']
            }
        }
    def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict:
        data = dataframe['value']
        last_values = None
        if cache is not None:
            last_values = cache['last_values']
        #TODO detection code here
        smoth_data = utils.exponential_smoothing(data, cache['alpha'])
        upper_bound = utils.exponential_smoothing(data + cache['confidence'], cache['alpha'])
        lower_bound = utils.exponential_smoothing(data - cache['confidence'], cache['alpha'])
        segemnts = []
        for idx, val in enumerate(data.values):
            if val > upper_bound[idx] or val < lower_bound[idx]:
                segemnts.append(idx)
        last_detection_time = dataframe['timestamp'][-1]
        return {
            'cache': cache,
            'segments': segemnts,
            'lastDetectionTime': last_detection_time
        }
    def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[dict]:
        self.detect(data, cache)
    def __smooth_data(self, dataframe: pd.DataFrame) -> List[Tuple[int, float]]:
        '''
        smooth data using exponential smoothing/moving average/weighted_average
        '''
    def __get_confidence_window(self, smooth_data: pd.Series, condfidence: float) -> Tuple[pd.Series, pd.Series]:
        '''
        build confidence interval above and below smoothed data
        '''
    def __get_dependency_level(self, alpha: float) -> int:
        '''
        get the number of values that will affect the next value
        '''
        for level in range(1, 100):
            if (1 - alpha) ** level < 0.1:
                break
        return level
    def get_window_size(self, cache: Optional[ModelCache]) -> int:
        if cache is None:
            raise ValueError('anomaly detector got None cache')
        #TODO: calculate value based on `alpha` value from cache
        return 1
--- a/analytics/analytics/detectors/detector.py
+++ b/analytics/analytics/detectors/detector.py
@ -14,7 +14,7 @@ class Detector(ABC):
        pass
    @abstractmethod
-    async def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> dict:
+    def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> dict:
        pass
    @abstractmethod