hastic-server/analytics/analytics/analytic_unit_worker.py

import config
import detectors
import logging
import pandas as pd
from typing import Optional, Union, Generator, List
import concurrent.futures
import asyncio
import utils
from utils import get_intersected_chunks, get_chunks, prepare_data

from analytic_types import ModelCache
from analytic_types.detector_typing import DetectionResult

logger = logging.getLogger('AnalyticUnitWorker')


class AnalyticUnitWorker:

    CHUNK_WINDOW_SIZE_FACTOR = 100
    CHUNK_INTERSECTION_FACTOR = 2

    assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \
        'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR'

    def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor):
        self.analytic_unit_id = analytic_unit_id
        self._detector = detector
        self._executor: concurrent.futures.Executor = executor
        self._training_future: asyncio.Future = None

    async def do_train(
        self, payload: Union[list, dict], data: list, cache: Optional[ModelCache]
    ) -> Optional[ModelCache]:

        dataframe = prepare_data(data)

        cfuture: concurrent.futures.Future = self._executor.submit(
            self._detector.train, dataframe, payload, cache
        )
        self._training_future = asyncio.wrap_future(cfuture)
        try:
            new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT)
            return new_cache
        except asyncio.CancelledError:
            return None
        except asyncio.TimeoutError:
            raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))

    async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult:

        window_size = self._detector.get_window_size(cache)
        chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR
        chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR

        detection_result = DetectionResult()

        for chunk in get_intersected_chunks(data, chunk_intersection, chunk_size):
            await asyncio.sleep(0)
            chunk_dataframe = prepare_data(chunk)
            detected = self._detector.detect(chunk_dataframe, cache)
            self.__append_detection_result(detection_result, detected)
        detection_result.segments = self._detector.merge_segments(detection_result.segments)
        return detection_result.to_json()

    def cancel(self):
        if self._training_future is not None:
            self._training_future.cancel()

    async def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:
        window_size = self._detector.get_window_size(cache)

        detection_result = DetectionResult()

        for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR):
            await asyncio.sleep(0)
            chunk_dataframe = prepare_data(chunk)
            detected = self._detector.consume_data(chunk_dataframe, cache)
            self.__append_detection_result(detection_result, detected)
        
        detection_result.segments = self._detector.merge_segments(detection_result.segments)

        if detection_result.last_detection_time is None:
            return None
        else:
            return detection_result.to_json()

    # TODO: move result concatenation to Detectors
    def __append_detection_result(self, detection_result: DetectionResult, new_chunk: DetectionResult):
        if new_chunk is not None:
            detection_result.cache = new_chunk.cache
            detection_result.last_detection_time = new_chunk.last_detection_time
            detection_result.segments.extend(new_chunk.segments)
Analytics server messaging #24 v2 (#49) * add zmq to deps * basic zmq usage & build system fxs * continue zmq integration & refactorings * server.py + logging * some commit * ping-pong server-analytics & pair type * packing zmq.node for production 6 years ago			`import config`
data_service, renamings and detectors imports 6 years ago			`import detectors`
Add src 6 years ago			`import logging`
Fix prediction (#118) 6 years ago			`import pandas as pd`
Segment intersection in anomaly detector #615 (#616) 5 years ago			`from typing import Optional, Union, Generator, List`
asyncio.wait_for training_future 5 years ago			`import concurrent.futures`
Workers for analyticunits #203 (#265) * rm async from analytic_unit_worker + some refactorings in maager * AnalyticUnitManager * workers for analytic units 6 years ago			`import asyncio`
Segment intersection in anomaly detector #615 (#616) 5 years ago			`import utils`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`from utils import get_intersected_chunks, get_chunks, prepare_data`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Make class for detection result (#634) 5 years ago			`from analytic_types import ModelCache`
			`from analytic_types.detector_typing import DetectionResult`
asyncio.wait_for training_future 5 years ago
analytics: detector class + more types + remove Model.(save/load) 6 years ago			`logger = logging.getLogger('AnalyticUnitWorker')`
Analytics server messaging #24 v2 (#49) * add zmq to deps * basic zmq usage & build system fxs * continue zmq integration & refactorings * server.py + logging * some commit * ping-pong server-analytics & pair type * packing zmq.node for production 6 years ago
Add src 6 years ago
analytics: detector class + more types + remove Model.(save/load) 6 years ago			`class AnalyticUnitWorker:`

Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`CHUNK_WINDOW_SIZE_FACTOR = 100`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`CHUNK_INTERSECTION_FACTOR = 2`

			`assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \`
			`'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR'`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
asyncio.wait_for training_future 5 years ago			`def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor):`
One panel - one worker #62 6 years ago			`self.analytic_unit_id = analytic_unit_id`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`self._detector = detector`
asyncio.wait_for training_future 5 years ago			`self._executor: concurrent.futures.Executor = executor`
Timeout for learning #481 (#485) 5 years ago			`self._training_future: asyncio.Future = None`
Add src 6 years ago
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`async def do_train(`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`self, payload: Union[list, dict], data: list, cache: Optional[ModelCache]`
optional type in do_train 5 years ago			`) -> Optional[ModelCache]:`
Non intersected chunks for consuming data #529 (#530) 5 years ago
			`dataframe = prepare_data(data)`

asyncio.wait_for training_future 5 years ago			`cfuture: concurrent.futures.Future = self._executor.submit(`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`self._detector.train, dataframe, payload, cache`
Workers for analyticunits #203 (#265) * rm async from analytic_unit_worker + some refactorings in maager * AnalyticUnitManager * workers for analytic units 6 years ago			`)`
asyncio.wait_for training_future 5 years ago			`self._training_future = asyncio.wrap_future(cfuture)`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`try:`
asyncio.wait_for training_future 5 years ago			`new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT)`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`return new_cache`
asyncio.wait_for training_future 5 years ago			`except asyncio.CancelledError:`
			`return None`
			`except asyncio.TimeoutError:`
Timeout for learning #481 (#485) 5 years ago			`raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))`
trim trailing whitespaces 6 years ago
Make class for detection result (#634) 5 years ago			`async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult:`
Non intersected chunks for consuming data #529 (#530) 5 years ago
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`window_size = self._detector.get_window_size(cache)`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR`
			`chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Make class for detection result (#634) 5 years ago			`detection_result = DetectionResult()`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Revert "Merge branch 'concatinate-chunks-for-anomaly-detector-#614'" This reverts commit c6eb1bd4d2e22dc47f080c2667daeba0968b46b4, reversing changes made to 74d45bf4f4b81a68ac861c37f3078c021e9b171c. 5 years ago			`for chunk in get_intersected_chunks(data, chunk_intersection, chunk_size):`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`await asyncio.sleep(0)`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`chunk_dataframe = prepare_data(chunk)`
			`detected = self._detector.detect(chunk_dataframe, cache)`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`self.__append_detection_result(detection_result, detected)`
Segment class #636 (#637) 5 years ago			`detection_result.segments = self._detector.merge_segments(detection_result.segments)`
Make class for detection result (#634) 5 years ago			`return detection_result.to_json()`
trim trailing whitespaces 6 years ago
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`def cancel(self):`
Timeout for learning #481 (#485) 5 years ago			`if self._training_future is not None:`
			`self._training_future.cancel()`
Analytic unit worker bucket #273 (#297) 6 years ago
Make class for detection result (#634) 5 years ago			`async def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]:`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`window_size = self._detector.get_window_size(cache)`

Make class for detection result (#634) 5 years ago			`detection_result = DetectionResult()`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Non intersected chunks for consuming data #529 (#530) 5 years ago			`for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR):`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`await asyncio.sleep(0)`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`chunk_dataframe = prepare_data(chunk)`
			`detected = self._detector.consume_data(chunk_dataframe, cache)`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`self.__append_detection_result(detection_result, detected)`
Segment intersection in anomaly detector #615 (#616) 5 years ago
Segment class #636 (#637) 5 years ago			`detection_result.segments = self._detector.merge_segments(detection_result.segments)`
Revert "Merge branch 'concatinate-chunks-for-anomaly-detector-#614'" This reverts commit c6eb1bd4d2e22dc47f080c2667daeba0968b46b4, reversing changes made to 74d45bf4f4b81a68ac861c37f3078c021e9b171c. 5 years ago
Make class for detection result (#634) 5 years ago			`if detection_result.last_detection_time is None:`
Dataframe for detection less than two window size (#532) 2*WINDOW_SIZE checks 5 years ago			`return None`
			`else:`
Make class for detection result (#634) 5 years ago			`return detection_result.to_json()`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Make class for detection result (#634) 5 years ago			`# TODO: move result concatenation to Detectors`
Segment class #636 (#637) 5 years ago			`def __append_detection_result(self, detection_result: DetectionResult, new_chunk: DetectionResult):`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`if new_chunk is not None:`
Make class for detection result (#634) 5 years ago			`detection_result.cache = new_chunk.cache`
			`detection_result.last_detection_time = new_chunk.last_detection_time`
			`detection_result.segments.extend(new_chunk.segments)`