hastic-server/analytics/analytics/analytic_unit_worker.py

import config
import detectors
import logging
import pandas as pd
from typing import Optional, Union, Generator
from models import ModelCache
import concurrent.futures
import asyncio


logger = logging.getLogger('AnalyticUnitWorker')


class AnalyticUnitWorker:

    def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor):
        self.analytic_unit_id = analytic_unit_id
        self._detector = detector
        self._executor: concurrent.futures.Executor = executor
        self._training_future: asyncio.Future = None

    async def do_train(
        self, payload: Union[list, dict], data: pd.DataFrame, cache: Optional[ModelCache]
    ) -> Optional[ModelCache]:
        cfuture: concurrent.futures.Future = self._executor.submit(
            self._detector.train, data, payload, cache
        )
        self._training_future = asyncio.wrap_future(cfuture)
        try:
            new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT)
            return new_cache
        except asyncio.CancelledError:
            return None
        except asyncio.TimeoutError:
            raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))

    async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> dict:
        if cache is None:
            msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
            logger.error(msg)
            raise ValueError(msg)

        window_size = cache['WINDOW_SIZE']
        chunks = self.__get_data_chunks(data, window_size)

        detection_result = {
          'cache': None,
          'segments': [],
          'lastDetectionTime': None
        }

        for chunk in chunks:
            await asyncio.sleep(0)
            detected = self._detector.detect(data, cache)
            if detected is not None:
                detection_result['cache'] = detected['cache']
                detection_result['lastDetectionTime'] = detected['lastDetectionTime']
                detection_result['segments'].extend(detected['segments'])

        return detection_result

    def cancel(self):
        if self._training_future is not None:
            self._training_future.cancel()

    async def recieve_data(self, data: pd.DataFrame, cache: Optional[ModelCache]):
        if cache is None:
            msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
            logger.error(msg)
            raise ValueError(msg)

        window_size = cache['WINDOW_SIZE']
        chunks = self.__get_data_chunks(data, window_size)

        detection_result = {
          'cache': None,
          'segments': [],
          'lastDetectionTime': None
        }

        for chunk in chunks:
            await asyncio.sleep(0)
            detected = self._detector.recieve_data(data, cache)
            if detected is not None:
                detection_result['cache'] = detected['cache']
                detection_result['lastDetectionTime'] = detected['lastDetectionTime']
                detection_result['segments'].extend(detected['segments'])

        return detection_result

    def __get_data_chunks(self, dataframe: pd.DataFrame, window_size: int) -> Generator[pd.DataFrame, None, None]:
        """
        TODO: fix description
        Return generator, that yields dataframe's chunks. Chunks have 3 WINDOW_SIZE length and 2 WINDOW_SIZE step.
        Example: recieved dataframe: [0, 1, 2, 3, 4, 5], returned chunks [0, 1, 2], [2, 3, 4], [4, 5].
        """
        chunk_size = window_size * 100
        intersection = window_size

        data_len = len(dataframe)

        if data_len < chunk_size:
            return (chunk for chunk in (dataframe,))

        def slices():
            nonintersected = chunk_size - intersection
            mod = data_len % nonintersected
            chunks_number = data_len // nonintersected

            offset = 0
            for i in range(chunks_number):
                yield slice(offset, offset + nonintersected + 1)
                offset += nonintersected

            yield slice(offset, offset + mod)

        return (dataframe[chunk_slice] for chunk_slice in slices())
Analytics server messaging #24 v2 (#49) * add zmq to deps * basic zmq usage & build system fxs * continue zmq integration & refactorings * server.py + logging * some commit * ping-pong server-analytics & pair type * packing zmq.node for production 6 years ago			`import config`
data_service, renamings and detectors imports 6 years ago			`import detectors`
Add src 6 years ago			`import logging`
Fix prediction (#118) 6 years ago			`import pandas as pd`
fix 5 years ago			`from typing import Optional, Union, Generator`
Error: detect missing cache #299 (#302) * Pass cache to detect * rename AnalyticUnitCache to ModelCache * Send .data from cache * Drop nans from bucket && set window size from cache && check cache None * Read proper payload on DETECT message 6 years ago			`from models import ModelCache`
asyncio.wait_for training_future 5 years ago			`import concurrent.futures`
Workers for analyticunits #203 (#265) * rm async from analytic_unit_worker + some refactorings in maager * AnalyticUnitManager * workers for analytic units 6 years ago			`import asyncio`
Add src 6 years ago
asyncio.wait_for training_future 5 years ago
analytics: detector class + more types + remove Model.(save/load) 6 years ago			`logger = logging.getLogger('AnalyticUnitWorker')`
Analytics server messaging #24 v2 (#49) * add zmq to deps * basic zmq usage & build system fxs * continue zmq integration & refactorings * server.py + logging * some commit * ping-pong server-analytics & pair type * packing zmq.node for production 6 years ago
Add src 6 years ago
analytics: detector class + more types + remove Model.(save/load) 6 years ago			`class AnalyticUnitWorker:`

asyncio.wait_for training_future 5 years ago			`def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor):`
One panel - one worker #62 6 years ago			`self.analytic_unit_id = analytic_unit_id`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`self._detector = detector`
asyncio.wait_for training_future 5 years ago			`self._executor: concurrent.futures.Executor = executor`
Timeout for learning #481 (#485) 5 years ago			`self._training_future: asyncio.Future = None`
Add src 6 years ago
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`async def do_train(`
Threshold detector #324 (#330) 6 years ago			`self, payload: Union[list, dict], data: pd.DataFrame, cache: Optional[ModelCache]`
optional type in do_train 5 years ago			`) -> Optional[ModelCache]:`
asyncio.wait_for training_future 5 years ago			`cfuture: concurrent.futures.Future = self._executor.submit(`
Timeout for learning #481 (#485) 5 years ago			`self._detector.train, data, payload, cache`
Workers for analyticunits #203 (#265) * rm async from analytic_unit_worker + some refactorings in maager * AnalyticUnitManager * workers for analytic units 6 years ago			`)`
asyncio.wait_for training_future 5 years ago			`self._training_future = asyncio.wrap_future(cfuture)`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`try:`
asyncio.wait_for training_future 5 years ago			`new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT)`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`return new_cache`
asyncio.wait_for training_future 5 years ago			`except asyncio.CancelledError:`
			`return None`
			`except asyncio.TimeoutError:`
Timeout for learning #481 (#485) 5 years ago			`raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))`
trim trailing whitespaces 6 years ago
Error: detect missing cache #299 (#302) * Pass cache to detect * rename AnalyticUnitCache to ModelCache * Send .data from cache * Drop nans from bucket && set window size from cache && check cache None * Read proper payload on DETECT message 6 years ago			`async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> dict:`
fix 5 years ago			`if cache is None:`
			`msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'`
			`logger.error(msg)`
			`raise ValueError(msg)`

			`window_size = cache['WINDOW_SIZE']`
			`chunks = self.__get_data_chunks(data, window_size)`

			`detection_result = {`
			`'cache': None,`
			`'segments': [],`
			`'lastDetectionTime': None`
			`}`

			`for chunk in chunks:`
			`await asyncio.sleep(0)`
			`detected = self._detector.detect(data, cache)`
			`if detected is not None:`
			`detection_result['cache'] = detected['cache']`
			`detection_result['lastDetectionTime'] = detected['lastDetectionTime']`
			`detection_result['segments'].extend(detected['segments'])`

			`return detection_result`
trim trailing whitespaces 6 years ago
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`def cancel(self):`
Timeout for learning #481 (#485) 5 years ago			`if self._training_future is not None:`
			`self._training_future.cancel()`
Analytic unit worker bucket #273 (#297) 6 years ago
Error: detect missing cache #299 (#302) * Pass cache to detect * rename AnalyticUnitCache to ModelCache * Send .data from cache * Drop nans from bucket && set window size from cache && check cache None * Read proper payload on DETECT message 6 years ago			`async def recieve_data(self, data: pd.DataFrame, cache: Optional[ModelCache]):`
fix 5 years ago			`if cache is None:`
			`msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'`
			`logger.error(msg)`
			`raise ValueError(msg)`

			`window_size = cache['WINDOW_SIZE']`
			`chunks = self.__get_data_chunks(data, window_size)`

			`detection_result = {`
			`'cache': None,`
			`'segments': [],`
			`'lastDetectionTime': None`
			`}`

			`for chunk in chunks:`
			`await asyncio.sleep(0)`
			`detected = self._detector.recieve_data(data, cache)`
			`if detected is not None:`
			`detection_result['cache'] = detected['cache']`
			`detection_result['lastDetectionTime'] = detected['lastDetectionTime']`
			`detection_result['segments'].extend(detected['segments'])`

			`return detection_result`

			`def __get_data_chunks(self, dataframe: pd.DataFrame, window_size: int) -> Generator[pd.DataFrame, None, None]:`
			`"""`
			`TODO: fix description`
			`Return generator, that yields dataframe's chunks. Chunks have 3 WINDOW_SIZE length and 2 WINDOW_SIZE step.`
			`Example: recieved dataframe: [0, 1, 2, 3, 4, 5], returned chunks [0, 1, 2], [2, 3, 4], [4, 5].`
			`"""`
			`chunk_size = window_size * 100`
			`intersection = window_size`

			`data_len = len(dataframe)`

			`if data_len < chunk_size:`
			`return (chunk for chunk in (dataframe,))`

			`def slices():`
			`nonintersected = chunk_size - intersection`
			`mod = data_len % nonintersected`
			`chunks_number = data_len // nonintersected`

			`offset = 0`
			`for i in range(chunks_number):`
			`yield slice(offset, offset + nonintersected + 1)`
			`offset += nonintersected`

			`yield slice(offset, offset + mod)`

			`return (dataframe[chunk_slice] for chunk_slice in slices())`