Browse Source

Revert "fix"

This reverts commit c0a0ee5f12.
pull/1/head
amper43 6 years ago
parent
commit
a1957005df
  1. 79
      analytics/analytics/analytic_unit_worker.py
  2. 2
      analytics/analytics/detectors/detector.py
  3. 56
      analytics/analytics/detectors/pattern_detector.py
  4. 2
      analytics/analytics/detectors/threshold_detector.py

79
analytics/analytics/analytic_unit_worker.py

@ -2,7 +2,7 @@ import config
import detectors import detectors
import logging import logging
import pandas as pd import pandas as pd
from typing import Optional, Union, Generator from typing import Optional, Union
from models import ModelCache from models import ModelCache
import concurrent.futures import concurrent.futures
import asyncio import asyncio
@ -35,83 +35,12 @@ class AnalyticUnitWorker:
raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT)) raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))
async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> dict: async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> dict:
if cache is None: # TODO: return without await
msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection' return await self._detector.detect(data, cache)
logger.error(msg)
raise ValueError(msg)
window_size = cache['WINDOW_SIZE']
chunks = self.__get_data_chunks(data, window_size)
detection_result = {
'cache': None,
'segments': [],
'lastDetectionTime': None
}
for chunk in chunks:
await asyncio.sleep(0)
detected = self._detector.detect(data, cache)
if detected is not None:
detection_result['cache'] = detected['cache']
detection_result['lastDetectionTime'] = detected['lastDetectionTime']
detection_result['segments'].extend(detected['segments'])
return detection_result
def cancel(self): def cancel(self):
if self._training_future is not None: if self._training_future is not None:
self._training_future.cancel() self._training_future.cancel()
async def recieve_data(self, data: pd.DataFrame, cache: Optional[ModelCache]): async def recieve_data(self, data: pd.DataFrame, cache: Optional[ModelCache]):
if cache is None: return self._detector.recieve_data(data, cache)
msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
logger.error(msg)
raise ValueError(msg)
window_size = cache['WINDOW_SIZE']
chunks = self.__get_data_chunks(data, window_size)
detection_result = {
'cache': None,
'segments': [],
'lastDetectionTime': None
}
for chunk in chunks:
await asyncio.sleep(0)
detected = self._detector.recieve_data(data, cache)
if detected is not None:
detection_result['cache'] = detected['cache']
detection_result['lastDetectionTime'] = detected['lastDetectionTime']
detection_result['segments'].extend(detected['segments'])
return detection_result
def __get_data_chunks(self, dataframe: pd.DataFrame, window_size: int) -> Generator[pd.DataFrame, None, None]:
"""
TODO: fix description
Return generator, that yields dataframe's chunks. Chunks have 3 WINDOW_SIZE length and 2 WINDOW_SIZE step.
Example: recieved dataframe: [0, 1, 2, 3, 4, 5], returned chunks [0, 1, 2], [2, 3, 4], [4, 5].
"""
chunk_size = window_size * 100
intersection = window_size
data_len = len(dataframe)
if data_len < chunk_size:
return (chunk for chunk in (dataframe,))
def slices():
nonintersected = chunk_size - intersection
mod = data_len % nonintersected
chunks_number = data_len // nonintersected
offset = 0
for i in range(chunks_number):
yield slice(offset, offset + nonintersected + 1)
offset += nonintersected
yield slice(offset, offset + mod)
return (dataframe[chunk_slice] for chunk_slice in slices())

2
analytics/analytics/detectors/detector.py

@ -14,7 +14,7 @@ class Detector(ABC):
pass pass
@abstractmethod @abstractmethod
def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> dict: async def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> dict:
pass pass
@abstractmethod @abstractmethod

56
analytics/analytics/detectors/pattern_detector.py

@ -51,13 +51,32 @@ class PatternDetector(Detector):
'cache': new_cache 'cache': new_cache
} }
def detect(self, dataframe: pd.DataFrame, cache: Optional[models.ModelCache]) -> dict: async def detect(self, dataframe: pd.DataFrame, cache: Optional[models.ModelCache]) -> dict:
logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe))) logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe)))
# TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)
detected = self.model.detect(dataframe, self.analytic_unit_id, cache)
segments = [{ 'from': segment[0], 'to': segment[1] } for segment in detected['segments']] if not cache:
msg = f'{self.analytic_unit_id} detection got invalid cache {cache}, skip detection'
logger.error(msg)
raise ValueError(msg)
window_size = cache.get('WINDOW_SIZE')
if not window_size:
msg = f'{self.analytic_unit_id} detection got invalid window size {window_size}'
chunks = self.__get_data_chunks(dataframe, window_size)
segments = []
segment_parser = lambda segment: { 'from': segment[0], 'to': segment[1] }
for chunk in chunks:
await asyncio.sleep(0)
detected = self.model.detect(dataframe, self.analytic_unit_id, cache)
for detected_segment in detected['segments']:
detected_segment = segment_parser(detected_segment)
if detected_segment not in segments:
segments.append(detected_segment)
newCache = detected['cache'] newCache = detected['cache']
last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_dataframe_time = dataframe.iloc[-1]['timestamp']
@ -79,7 +98,7 @@ class PatternDetector(Detector):
if cache == None: if cache == None:
logging.debug('Recieve_data cache is None for task {}'.format(self.analytic_unit_id)) logging.debug('Recieve_data cache is None for task {}'.format(self.analytic_unit_id))
cache = {} cache = {}
bucket_size = max(cache.get('WINDOW_SIZE', 0) * 5, self.MIN_BUCKET_SIZE) bucket_size = max(cache.get('WINDOW_SIZE', 0) * 3, self.MIN_BUCKET_SIZE)
res = self.detect(self.bucket.data, cache) res = self.detect(self.bucket.data, cache)
@ -92,3 +111,30 @@ class PatternDetector(Detector):
else: else:
return None return None
def __get_data_chunks(self, dataframe: pd.DataFrame, window_size: int) -> Generator[pd.DataFrame, None, None]:
"""
TODO: fix description
Return generator, that yields dataframe's chunks. Chunks have 3 WINDOW_SIZE length and 2 WINDOW_SIZE step.
Example: recieved dataframe: [0, 1, 2, 3, 4, 5], returned chunks [0, 1, 2], [2, 3, 4], [4, 5].
"""
chunk_size = window_size * 100
intersection = window_size
data_len = len(dataframe)
if data_len < chunk_size:
return (chunk for chunk in (dataframe,))
def slices():
nonintersected = chunk_size - intersection
mod = data_len % nonintersected
chunks_number = data_len // nonintersected
offset = 0
for i in range(chunks_number):
yield slice(offset, offset + nonintersected + 1)
offset += nonintersected
yield slice(offset, offset + mod)
return (dataframe[chunk_slice] for chunk_slice in slices())

2
analytics/analytics/detectors/threshold_detector.py

@ -25,7 +25,7 @@ class ThresholdDetector(Detector):
} }
} }
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict: async def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict:
if cache == None: if cache == None:
raise 'Threshold detector error: cannot detect before learning' raise 'Threshold detector error: cannot detect before learning'
value = cache['value'] value = cache['value']

Loading…
Cancel
Save