You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

112 lines
4.0 KiB

4 years ago
import logging as log
import operator
import pandas as pd
import numpy as np
from typing import Optional, List
from analytic_types import ModelCache, AnalyticUnitId
from analytic_types.detector import DetectionResult, ProcessingResult
from analytic_types.segment import Segment
from detectors import ProcessingDetector
from time import time
import utils
logger = log.getLogger('THRESHOLD_DETECTOR')
class ThresholdDetector(ProcessingDetector):
WINDOW_SIZE = 3
def __init__(self, analytic_unit_id: AnalyticUnitId):
super().__init__(analytic_unit_id)
def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache:
time_step = utils.find_interval(dataframe)
return {
'cache': {
'value': threshold['value'],
'condition': threshold['condition'],
'timeStep': time_step
}
}
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult:
if cache is None or cache == {}:
raise ValueError('Threshold detector error: cannot detect before learning')
if len(dataframe) == 0:
return None
value = cache['value']
condition = cache['condition']
segments = []
for index, row in dataframe.iterrows():
current_value = row['value']
current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp'])
segment = Segment(current_timestamp, current_timestamp)
# TODO: merge segments
if pd.isnull(current_value):
if condition == 'NO_DATA':
segment.message = 'NO_DATA detected'
segments.append(segment)
continue
comparators = {
'>': operator.gt,
'<': operator.lt,
'=': operator.eq,
'>=': operator.ge,
'<=': operator.le
}
assert condition in comparators.keys(), f'condition {condition} not allowed'
if comparators[condition](current_value, value):
segment.message = f"{current_value} {condition} threshold's value {value}"
segments.append(segment)
last_entry = dataframe.iloc[-1]
last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp'])
return DetectionResult(cache, segments, last_detection_time)
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:
result = self.detect(data, cache)
return result if result else None
def get_window_size(self, cache: Optional[ModelCache]) -> int:
return self.WINDOW_SIZE
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult:
result = DetectionResult()
time_step = detections[0].cache['timeStep']
for detection in detections:
result.segments.extend(detection.segments)
result.last_detection_time = detection.last_detection_time
result.cache = detection.cache
result.segments = utils.merge_intersecting_segments(result.segments, time_step)
return result
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult:
data = dataframe['value']
value = self.get_value_from_cache(cache, 'value', required = True)
condition = self.get_value_from_cache(cache, 'condition', required = True)
if condition == 'NO_DATA':
return ProcessingResult()
data.values[:] = value
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp)
result_series = list(zip(timestamps, data.values.tolist()))
if condition in ['>', '>=', '=']:
return ProcessingResult(upper_bound = result_series)
if condition in ['<', '<=']:
return ProcessingResult(lower_bound = result_series)
raise ValueError(f'{condition} condition not supported')