|
|
|
import models
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
import logging
|
|
|
|
import config
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
from typing import Optional, Generator, List
|
|
|
|
|
|
|
|
from detectors import Detector
|
|
|
|
from analytic_types.data_bucket import DataBucket
|
|
|
|
from utils import convert_pd_timestamp_to_ms
|
|
|
|
from analytic_types import AnalyticUnitId, ModelCache
|
|
|
|
from analytic_types.detector import DetectionResult
|
|
|
|
from analytic_types.segment import Segment
|
|
|
|
import utils
|
|
|
|
|
|
|
|
logger = logging.getLogger('PATTERN_DETECTOR')
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_model_by_pattern(pattern: str) -> models.Model:
|
|
|
|
if pattern == 'GENERAL':
|
|
|
|
return models.GeneralModel()
|
|
|
|
if pattern == 'PEAK':
|
|
|
|
return models.PeakModel()
|
|
|
|
if pattern == 'TROUGH':
|
|
|
|
return models.TroughModel()
|
|
|
|
if pattern == 'DROP':
|
|
|
|
return models.DropModel()
|
|
|
|
if pattern == 'JUMP':
|
|
|
|
return models.JumpModel()
|
|
|
|
if pattern == 'CUSTOM':
|
|
|
|
return models.CustomModel()
|
|
|
|
raise ValueError('Unknown pattern "%s"' % pattern)
|
|
|
|
|
|
|
|
|
|
|
|
class PatternDetector(Detector):
|
|
|
|
|
|
|
|
MIN_BUCKET_SIZE = 150
|
|
|
|
BUCKET_WINDOW_SIZE_FACTOR = 5
|
|
|
|
DEFAULT_WINDOW_SIZE = 1
|
|
|
|
|
|
|
|
def __init__(self, pattern_type: str, analytic_unit_id: AnalyticUnitId):
|
|
|
|
super().__init__(analytic_unit_id)
|
|
|
|
self.pattern_type = pattern_type
|
|
|
|
self.model = resolve_model_by_pattern(self.pattern_type)
|
|
|
|
self.bucket = DataBucket()
|
|
|
|
|
|
|
|
def train(self, dataframe: pd.DataFrame, segments: List[Segment], cache: Optional[ModelCache]) -> ModelCache:
|
|
|
|
# TODO: pass only part of dataframe that has segments
|
|
|
|
|
|
|
|
if self.contains_labeled_segments(segments) == False:
|
|
|
|
msg = f'{self.analytic_unit_id} has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment'
|
|
|
|
logger.error(msg)
|
|
|
|
raise ValueError(msg)
|
|
|
|
|
|
|
|
self.model.state: models.ModelState = self.model.get_state(cache)
|
|
|
|
new_cache: models.ModelState = self.model.fit(dataframe, segments, self.analytic_unit_id)
|
|
|
|
|
|
|
|
# time step is optional
|
|
|
|
if len(dataframe) > 1:
|
|
|
|
new_cache.time_step = utils.find_interval(dataframe)
|
|
|
|
|
|
|
|
new_cache = new_cache.to_json()
|
|
|
|
if len(new_cache) == 0:
|
|
|
|
logging.warning('new_cache is empty with data: {}, segments: {}, cache: {}, analytic unit: {}'.format(dataframe, segments, cache, self.analytic_unit_id))
|
|
|
|
return {
|
|
|
|
'cache': new_cache
|
|
|
|
}
|
|
|
|
|
|
|
|
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult:
|
|
|
|
logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe)))
|
|
|
|
# TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)
|
|
|
|
|
|
|
|
if cache is None:
|
|
|
|
msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
|
|
|
|
logger.error(msg)
|
|
|
|
raise ValueError(msg)
|
|
|
|
|
|
|
|
self.model.state = self.model.get_state(cache)
|
|
|
|
window_size = self.model.state.window_size
|
|
|
|
|
|
|
|
if window_size is None:
|
|
|
|
message = '{} got cache without window_size for detection'.format(self.analytic_unit_id)
|
|
|
|
logger.error(message)
|
|
|
|
raise ValueError(message)
|
|
|
|
|
|
|
|
if len(dataframe) < window_size * 2:
|
|
|
|
message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points'
|
|
|
|
logger.error(message)
|
|
|
|
raise ValueError(message)
|
|
|
|
|
|
|
|
detected = self.model.detect(dataframe, self.analytic_unit_id)
|
|
|
|
|
|
|
|
segments = [Segment(segment[0], segment[1]) for segment in detected['segments']]
|
|
|
|
new_cache = detected['cache'].to_json()
|
|
|
|
last_dataframe_time = dataframe.iloc[-1]['timestamp']
|
|
|
|
last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time)
|
|
|
|
return DetectionResult(new_cache, segments, last_detection_time)
|
|
|
|
|
|
|
|
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:
|
|
|
|
logging.debug('Start consume_data for analytic unit {}'.format(self.analytic_unit_id))
|
|
|
|
|
|
|
|
if cache is None:
|
|
|
|
logging.debug(f'consume_data get invalid cache {cache} for task {self.analytic_unit_id}, skip')
|
|
|
|
return None
|
|
|
|
|
|
|
|
data_without_nan = data.dropna()
|
|
|
|
|
|
|
|
if len(data_without_nan) == 0:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# TODO: use ModelState
|
|
|
|
window_size = cache['windowSize']
|
|
|
|
bucket_max_size = max(window_size * self.BUCKET_WINDOW_SIZE_FACTOR, self.MIN_BUCKET_SIZE)
|
|
|
|
|
|
|
|
self.bucket.set_max_size(bucket_max_size)
|
|
|
|
self.bucket.append_data(data_without_nan)
|
|
|
|
|
|
|
|
bucket_size = self.bucket.get_current_size()
|
|
|
|
if bucket_size < window_size * 2:
|
|
|
|
msg = f'{self.analytic_unit_id} bucket data {bucket_size} less than two window size {window_size * 2}, skip run detection from consume_data'
|
|
|
|
logger.debug(msg)
|
|
|
|
return None
|
|
|
|
|
|
|
|
res = self.detect(self.bucket.data, cache)
|
|
|
|
|
|
|
|
logging.debug('End consume_data for analytic unit: {} with res: {}'.format(self.analytic_unit_id, str(res.to_json())))
|
|
|
|
|
|
|
|
if res:
|
|
|
|
return res
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def get_window_size(self, cache: Optional[ModelCache]) -> int:
|
|
|
|
if cache is None: return self.DEFAULT_WINDOW_SIZE
|
|
|
|
# TODO: windowSize -> window_size
|
|
|
|
return cache.get('windowSize', self.DEFAULT_WINDOW_SIZE)
|
|
|
|
|
|
|
|
def contains_labeled_segments(self, segments: List[Segment]) -> bool:
|
|
|
|
for segment in segments:
|
|
|
|
if segment.labeled == True:
|
|
|
|
return True
|
|
|
|
return False
|