import logging as log import operator import pandas as pd import numpy as np from typing import Optional, List from analytic_types import ModelCache, AnalyticUnitId from analytic_types.detector import DetectionResult, ProcessingResult from analytic_types.segment import Segment from detectors import ProcessingDetector from time import time import utils logger = log.getLogger('THRESHOLD_DETECTOR') class ThresholdDetector(ProcessingDetector): WINDOW_SIZE = 3 def __init__(self, analytic_unit_id: AnalyticUnitId): super().__init__(analytic_unit_id) def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache: time_step = utils.find_interval(dataframe) return { 'cache': { 'value': threshold['value'], 'condition': threshold['condition'], 'timeStep': time_step } } def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult: if cache is None or cache == {}: raise ValueError('Threshold detector error: cannot detect before learning') if len(dataframe) == 0: return None value = cache['value'] condition = cache['condition'] segments = [] for index, row in dataframe.iterrows(): current_value = row['value'] current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp']) segment = Segment(current_timestamp, current_timestamp) # TODO: merge segments if pd.isnull(current_value): if condition == 'NO_DATA': segment.message = 'NO_DATA detected' segments.append(segment) continue comparators = { '>': operator.gt, '<': operator.lt, '=': operator.eq, '>=': operator.ge, '<=': operator.le } assert condition in comparators.keys(), f'condition {condition} not allowed' if comparators[condition](current_value, value): segment.message = f"{current_value} {condition} threshold's value {value}" segments.append(segment) last_entry = dataframe.iloc[-1] last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp']) return DetectionResult(cache, segments, last_detection_time) def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: result = self.detect(data, cache) return result if result else None def get_window_size(self, cache: Optional[ModelCache]) -> int: return self.WINDOW_SIZE def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: result = DetectionResult() time_step = detections[0].cache['timeStep'] for detection in detections: result.segments.extend(detection.segments) result.last_detection_time = detection.last_detection_time result.cache = detection.cache result.segments = utils.merge_intersecting_segments(result.segments, time_step) return result def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: data = dataframe['value'] value = self.get_value_from_cache(cache, 'value', required = True) condition = self.get_value_from_cache(cache, 'condition', required = True) if condition == 'NO_DATA': return ProcessingResult() data.values[:] = value timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) result_series = list(zip(timestamps, data.values.tolist())) if condition in ['>', '>=', '=']: return ProcessingResult(upper_bound = result_series) if condition in ['<', '<=']: return ProcessingResult(lower_bound = result_series) raise ValueError(f'{condition} condition not supported')