hastic-server/analytics/analytics/detectors/threshold_detector.py

import logging as log

import operator
import pandas as pd
import numpy as np
from typing import Optional, List

from analytic_types import ModelCache, AnalyticUnitId
from analytic_types.detector import DetectionResult, ProcessingResult
from analytic_types.segment import Segment
from detectors import ProcessingDetector
from time import time
import utils


logger = log.getLogger('THRESHOLD_DETECTOR')


class ThresholdDetector(ProcessingDetector):

    WINDOW_SIZE = 3

    def __init__(self, analytic_unit_id: AnalyticUnitId):
        super().__init__(analytic_unit_id)

    def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache:
        time_step = utils.find_interval(dataframe)
        return {
            'cache': {
                'value': threshold['value'],
                'condition': threshold['condition'],
                'timeStep': time_step
            }
        }

    def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult:
        if cache is None or cache == {}:
            raise ValueError('Threshold detector error: cannot detect before learning')
        if len(dataframe) == 0:
            return None

        value = cache['value']
        condition = cache['condition']

        segments = []
        for index, row in dataframe.iterrows():
            current_value = row['value']
            current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp'])
            segment = Segment(current_timestamp, current_timestamp)
            # TODO: merge segments
            if pd.isnull(current_value):
                if condition == 'NO_DATA':
                    segment.message = 'NO_DATA detected'
                    segments.append(segment)
                continue

            comparators = {
                '>': operator.gt,
                '<': operator.lt,
                '=': operator.eq,
                '>=': operator.ge,
                '<=': operator.le
            }

            assert condition in comparators.keys(), f'condition {condition} not allowed'

            if comparators[condition](current_value, value):
                segment.message = f"{current_value} {condition} threshold's value {value}"
                segments.append(segment)

        last_entry = dataframe.iloc[-1]
        last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp'])
        return DetectionResult(cache, segments, last_detection_time)


    def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:
        result = self.detect(data, cache)
        return result if result else None

    def get_window_size(self, cache: Optional[ModelCache]) -> int:
        return self.WINDOW_SIZE

    def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult:
        result = DetectionResult()
        time_step = detections[0].cache['timeStep']
        for detection in detections:
            result.segments.extend(detection.segments)
            result.last_detection_time = detection.last_detection_time
            result.cache = detection.cache
        result.segments = utils.merge_intersecting_segments(result.segments, time_step)
        return result

    def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult:
        data = dataframe['value']
        value = self.get_value_from_cache(cache, 'value', required = True)
        condition = self.get_value_from_cache(cache, 'condition', required = True)

        if condition == 'NO_DATA':
            return ProcessingResult()

        data.values[:] = value
        timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp)
        result_series = list(zip(timestamps, data.values.tolist()))

        if condition in ['>', '>=', '=']:
            return ProcessingResult(upper_bound = result_series)

        if condition in ['<', '<=']:
            return ProcessingResult(lower_bound = result_series)

        raise ValueError(f'{condition} condition not supported')
Fix webhooks (#341) 5 years ago			`import logging as log`
Threshold detector #324 (#330) 5 years ago
Segment info #693 (#700) 5 years ago			`import operator`
Threshold detector #324 (#330) 5 years ago			`import pandas as pd`
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`import numpy as np`
Segment intersection in anomaly detector #615 (#616) 5 years ago			`from typing import Optional, List`
Threshold detector #324 (#330) 5 years ago
Error: too many values to unpack #721 (#725) 5 years ago			`from analytic_types import ModelCache, AnalyticUnitId`
Rename modules and types (#860) 4 years ago			`from analytic_types.detector import DetectionResult, ProcessingResult`
Segment class #636 (#637) 5 years ago			`from analytic_types.segment import Segment`
Threshold line for HSR #727 (#729) 5 years ago			`from detectors import ProcessingDetector`
Fix webhooks (#341) 5 years ago			`from time import time`
Merge threshold segments #624 (#646) 5 years ago			`import utils`
Threshold detector #324 (#330) 5 years ago

Fix webhooks (#341) 5 years ago			`logger = log.getLogger('THRESHOLD_DETECTOR')`
Threshold detector #324 (#330) 5 years ago

Threshold line for HSR #727 (#729) 5 years ago			`class ThresholdDetector(ProcessingDetector):`
Threshold detector #324 (#330) 5 years ago
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`WINDOW_SIZE = 3`

Error: too many values to unpack #721 (#725) 5 years ago			`def __init__(self, analytic_unit_id: AnalyticUnitId):`
			`super().__init__(analytic_unit_id)`
Threshold detector #324 (#330) 5 years ago
			`def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache:`
Error: Can't find interval length of data #688 (#689) 5 years ago			`time_step = utils.find_interval(dataframe)`
Threshold detector #324 (#330) 5 years ago			`return {`
			`'cache': {`
			`'value': threshold['value'],`
Error: Can't find interval length of data #688 (#689) 5 years ago			`'condition': threshold['condition'],`
			`'timeStep': time_step`
Threshold detector #324 (#330) 5 years ago			`}`
			`}`

Make class for detection result (#634) 5 years ago			`def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult:`
Dataframe for detection less than two window size (#532) 2*WINDOW_SIZE checks 5 years ago			`if cache is None or cache == {}:`
			`raise ValueError('Threshold detector error: cannot detect before learning')`
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`if len(dataframe) == 0:`
			`return None`

Threshold detector #324 (#330) 5 years ago			`value = cache['value']`
			`condition = cache['condition']`

No data threshold #370 (#396) "No data" threshold support 5 years ago			`segments = []`
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`for index, row in dataframe.iterrows():`
Segment class #636 (#637) 5 years ago			`current_value = row['value']`
Merge threshold segments #624 (#646) 5 years ago			`current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp'])`
Segment class #636 (#637) 5 years ago			`segment = Segment(current_timestamp, current_timestamp)`
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`# TODO: merge segments`
Segment class #636 (#637) 5 years ago			`if pd.isnull(current_value):`
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`if condition == 'NO_DATA':`
Segment info #693 (#700) 5 years ago			`segment.message = 'NO_DATA detected'`
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`segments.append(segment)`
			`continue`
No data threshold #370 (#396) "No data" threshold support 5 years ago
Segment info #693 (#700) 5 years ago			`comparators = {`
			`'>': operator.gt,`
			`'<': operator.lt,`
			`'=': operator.eq,`
			`'>=': operator.ge,`
			`'<=': operator.le`
			`}`

			`assert condition in comparators.keys(), f'condition {condition} not allowed'`

			`if comparators[condition](current_value, value):`
			`segment.message = f"{current_value} {condition} threshold's value {value}"`
			`segments.append(segment)`
Threshold detector #324 (#330) 5 years ago
Detect thresholds on the whole dataset #505 (#625) 5 years ago			`last_entry = dataframe.iloc[-1]`
Merge threshold segments #624 (#646) 5 years ago			`last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp'])`
Error: Can't find interval length of data #688 (#689) 5 years ago			`return DetectionResult(cache, segments, last_detection_time)`
Make class for detection result (#634) 5 years ago
Threshold detector #324 (#330) 5 years ago
Make class for detection result (#634) 5 years ago			`def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:`
Missing segments in result #392 (#393) 5 years ago			`result = self.detect(data, cache)`
			`return result if result else None`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
			`def get_window_size(self, cache: Optional[ModelCache]) -> int:`
			`return self.WINDOW_SIZE`
Merge threshold segments #624 (#646) 5 years ago
Anomaly detector webhooks fix (#670) 5 years ago			`def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult:`
Merge threshold segments #624 (#646) 5 years ago			`result = DetectionResult()`
Error: Can't find interval length of data #688 (#689) 5 years ago			`time_step = detections[0].cache['timeStep']`
Merge threshold segments #624 (#646) 5 years ago			`for detection in detections:`
			`result.segments.extend(detection.segments)`
			`result.last_detection_time = detection.last_detection_time`
			`result.cache = detection.cache`
			`result.segments = utils.merge_intersecting_segments(result.segments, time_step)`
			`return result`
Threshold line for HSR #727 (#729) 5 years ago
			`def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult:`
			`data = dataframe['value']`
			`value = self.get_value_from_cache(cache, 'value', required = True)`
			`condition = self.get_value_from_cache(cache, 'condition', required = True)`

			`if condition == 'NO_DATA':`
			`return ProcessingResult()`

			`data.values[:] = value`
			`timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp)`
			`result_series = list(zip(timestamps, data.values.tolist()))`

			`if condition in ['>', '>=', '=']:`
			`return ProcessingResult(upper_bound = result_series)`

			`if condition in ['<', '<=']:`
			`return ProcessingResult(lower_bound = result_series)`

			`raise ValueError(f'{condition} condition not supported')`