Browse Source

fix

pull/1/head
amper43 6 years ago
parent
commit
c0a0ee5f12
  1. 79
      analytics/analytics/analytic_unit_worker.py
  2. 2
      analytics/analytics/detectors/detector.py
  3. 52
      analytics/analytics/detectors/pattern_detector.py
  4. 2
      analytics/analytics/detectors/threshold_detector.py

79
analytics/analytics/analytic_unit_worker.py

@ -2,7 +2,7 @@ import config
import detectors import detectors
import logging import logging
import pandas as pd import pandas as pd
from typing import Optional, Union from typing import Optional, Union, Generator
from models import ModelCache from models import ModelCache
import concurrent.futures import concurrent.futures
import asyncio import asyncio
@ -35,12 +35,83 @@ class AnalyticUnitWorker:
raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT)) raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))
async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> dict: async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> dict:
# TODO: return without await if cache is None:
return await self._detector.detect(data, cache) msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
logger.error(msg)
raise ValueError(msg)
window_size = cache['WINDOW_SIZE']
chunks = self.__get_data_chunks(data, window_size)
detection_result = {
'cache': None,
'segments': [],
'lastDetectionTime': None
}
for chunk in chunks:
await asyncio.sleep(0)
detected = self._detector.detect(data, cache)
if detected is not None:
detection_result['cache'] = detected['cache']
detection_result['lastDetectionTime'] = detected['lastDetectionTime']
detection_result['segments'].extend(detected['segments'])
return detection_result
def cancel(self): def cancel(self):
if self._training_future is not None: if self._training_future is not None:
self._training_future.cancel() self._training_future.cancel()
async def recieve_data(self, data: pd.DataFrame, cache: Optional[ModelCache]): async def recieve_data(self, data: pd.DataFrame, cache: Optional[ModelCache]):
return self._detector.recieve_data(data, cache) if cache is None:
msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
logger.error(msg)
raise ValueError(msg)
window_size = cache['WINDOW_SIZE']
chunks = self.__get_data_chunks(data, window_size)
detection_result = {
'cache': None,
'segments': [],
'lastDetectionTime': None
}
for chunk in chunks:
await asyncio.sleep(0)
detected = self._detector.recieve_data(data, cache)
if detected is not None:
detection_result['cache'] = detected['cache']
detection_result['lastDetectionTime'] = detected['lastDetectionTime']
detection_result['segments'].extend(detected['segments'])
return detection_result
def __get_data_chunks(self, dataframe: pd.DataFrame, window_size: int) -> Generator[pd.DataFrame, None, None]:
"""
TODO: fix description
Return generator, that yields dataframe's chunks. Chunks have 3 WINDOW_SIZE length and 2 WINDOW_SIZE step.
Example: recieved dataframe: [0, 1, 2, 3, 4, 5], returned chunks [0, 1, 2], [2, 3, 4], [4, 5].
"""
chunk_size = window_size * 100
intersection = window_size
data_len = len(dataframe)
if data_len < chunk_size:
return (chunk for chunk in (dataframe,))
def slices():
nonintersected = chunk_size - intersection
mod = data_len % nonintersected
chunks_number = data_len // nonintersected
offset = 0
for i in range(chunks_number):
yield slice(offset, offset + nonintersected + 1)
offset += nonintersected
yield slice(offset, offset + mod)
return (dataframe[chunk_slice] for chunk_slice in slices())

2
analytics/analytics/detectors/detector.py

@ -14,7 +14,7 @@ class Detector(ABC):
pass pass
@abstractmethod @abstractmethod
async def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> dict: def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> dict:
pass pass
@abstractmethod @abstractmethod

52
analytics/analytics/detectors/pattern_detector.py

@ -51,32 +51,13 @@ class PatternDetector(Detector):
'cache': new_cache 'cache': new_cache
} }
async def detect(self, dataframe: pd.DataFrame, cache: Optional[models.ModelCache]) -> dict: def detect(self, dataframe: pd.DataFrame, cache: Optional[models.ModelCache]) -> dict:
logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe))) logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe)))
# TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)
if not cache:
msg = f'{self.analytic_unit_id} detection got invalid cache {cache}, skip detection'
logger.error(msg)
raise ValueError(msg)
window_size = cache.get('WINDOW_SIZE')
if not window_size:
msg = f'{self.analytic_unit_id} detection got invalid window size {window_size}'
chunks = self.__get_data_chunks(dataframe, window_size)
segments = []
segment_parser = lambda segment: { 'from': segment[0], 'to': segment[1] }
for chunk in chunks:
await asyncio.sleep(0)
detected = self.model.detect(dataframe, self.analytic_unit_id, cache) detected = self.model.detect(dataframe, self.analytic_unit_id, cache)
for detected_segment in detected['segments']:
detected_segment = segment_parser(detected_segment)
if detected_segment not in segments:
segments.append(detected_segment)
segments = [{ 'from': segment[0], 'to': segment[1] } for segment in detected['segments']]
newCache = detected['cache'] newCache = detected['cache']
last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_dataframe_time = dataframe.iloc[-1]['timestamp']
@ -98,7 +79,7 @@ class PatternDetector(Detector):
if cache == None: if cache == None:
logging.debug('Recieve_data cache is None for task {}'.format(self.analytic_unit_id)) logging.debug('Recieve_data cache is None for task {}'.format(self.analytic_unit_id))
cache = {} cache = {}
bucket_size = max(cache.get('WINDOW_SIZE', 0) * 3, self.MIN_BUCKET_SIZE) bucket_size = max(cache.get('WINDOW_SIZE', 0) * 5, self.MIN_BUCKET_SIZE)
res = self.detect(self.bucket.data, cache) res = self.detect(self.bucket.data, cache)
@ -111,30 +92,3 @@ class PatternDetector(Detector):
else: else:
return None return None
def __get_data_chunks(self, dataframe: pd.DataFrame, window_size: int) -> Generator[pd.DataFrame, None, None]:
"""
TODO: fix description
Return generator, that yields dataframe's chunks. Chunks have 3 WINDOW_SIZE length and 2 WINDOW_SIZE step.
Example: recieved dataframe: [0, 1, 2, 3, 4, 5], returned chunks [0, 1, 2], [2, 3, 4], [4, 5].
"""
chunk_size = window_size * 100
intersection = window_size
data_len = len(dataframe)
if data_len < chunk_size:
return (chunk for chunk in (dataframe,))
def slices():
nonintersected = chunk_size - intersection
mod = data_len % nonintersected
chunks_number = data_len // nonintersected
offset = 0
for i in range(chunks_number):
yield slice(offset, offset + nonintersected + 1)
offset += nonintersected
yield slice(offset, offset + mod)
return (dataframe[chunk_slice] for chunk_slice in slices())

2
analytics/analytics/detectors/threshold_detector.py

@ -25,7 +25,7 @@ class ThresholdDetector(Detector):
} }
} }
async def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict: def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict:
if cache == None: if cache == None:
raise 'Threshold detector error: cannot detect before learning' raise 'Threshold detector error: cannot detect before learning'
value = cache['value'] value = cache['value']

Loading…
Cancel
Save