You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

111 lines
4.0 KiB

import logging as log
import operator
import pandas as pd
import numpy as np
from typing import Optional, List
from analytic_types import ModelCache, AnalyticUnitId
from analytic_types.detector import DetectionResult, ProcessingResult
from analytic_types.segment import Segment
from detectors import ProcessingDetector
from time import time
import utils
logger = log.getLogger('THRESHOLD_DETECTOR')
class ThresholdDetector(ProcessingDetector):
WINDOW_SIZE = 3
def __init__(self, analytic_unit_id: AnalyticUnitId):
super().__init__(analytic_unit_id)
def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache:
time_step = utils.find_interval(dataframe)
return {
'cache': {
'value': threshold['value'],
'condition': threshold['condition'],
'timeStep': time_step
}
}
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult:
if cache is None or cache == {}:
raise ValueError('Threshold detector error: cannot detect before learning')
if len(dataframe) == 0:
return None
value = cache['value']
condition = cache['condition']
segments = []
for index, row in dataframe.iterrows():
current_value = row['value']
current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp'])
segment = Segment(current_timestamp, current_timestamp)
# TODO: merge segments
if pd.isnull(current_value):
if condition == 'NO_DATA':
segment.message = 'NO_DATA detected'
segments.append(segment)
continue
comparators = {
'>': operator.gt,
'<': operator.lt,
'=': operator.eq,
'>=': operator.ge,
'<=': operator.le
}
assert condition in comparators.keys(), f'condition {condition} not allowed'
if comparators[condition](current_value, value):
segment.message = f"{current_value} {condition} threshold's value {value}"
segments.append(segment)
last_entry = dataframe.iloc[-1]
last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp'])
return DetectionResult(cache, segments, last_detection_time)
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:
result = self.detect(data, cache)
return result if result else None
def get_window_size(self, cache: Optional[ModelCache]) -> int:
return self.WINDOW_SIZE
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult:
result = DetectionResult()
time_step = detections[0].cache['timeStep']
for detection in detections:
result.segments.extend(detection.segments)
result.last_detection_time = detection.last_detection_time
result.cache = detection.cache
result.segments = utils.merge_intersecting_segments(result.segments, time_step)
return result
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult:
data = dataframe['value']
value = self.get_value_from_cache(cache, 'value', required = True)
condition = self.get_value_from_cache(cache, 'condition', required = True)
if condition == 'NO_DATA':
return ProcessingResult()
data.values[:] = value
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp)
result_series = list(zip(timestamps, data.values.tolist()))
if condition in ['>', '>=', '=']:
return ProcessingResult(upper_bound = result_series)
if condition in ['<', '<=']:
return ProcessingResult(lower_bound = result_series)
raise ValueError(f'{condition} condition not supported')