hastic-server/analytics/analytics/analytic_unit_worker.py

import config
import detectors
import logging
import pandas as pd
from typing import Optional, Union, Generator, List, Tuple
import concurrent.futures
import asyncio
import utils
from utils import get_intersected_chunks, get_chunks, prepare_data

from analytic_types import ModelCache, TimeSeries
from analytic_types.detector import DetectionResult

logger = logging.getLogger('AnalyticUnitWorker')


class AnalyticUnitWorker:

    CHUNK_WINDOW_SIZE_FACTOR = 100
    CHUNK_INTERSECTION_FACTOR = 2

    assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \
        'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR'

    def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor):
        self.analytic_unit_id = analytic_unit_id
        self._detector = detector
        self._executor: concurrent.futures.Executor = executor
        self._training_future: asyncio.Future = None

    async def do_train(
        self, payload: Union[list, dict], data: TimeSeries, cache: Optional[ModelCache]
    ) -> Optional[ModelCache]:

        dataframe = prepare_data(data)

        cfuture: concurrent.futures.Future = self._executor.submit(
            self._detector.train, dataframe, payload, cache
        )
        self._training_future = asyncio.wrap_future(cfuture)
        try:
            new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT)
            return new_cache
        except asyncio.CancelledError:
            return None
        except asyncio.TimeoutError:
            raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))

    async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult:

        window_size = self._detector.get_window_size(cache)
        chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR
        chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR

        detections: List[DetectionResult] = []
        chunks = []
        # XXX: get_chunks(data, chunk_size) == get_intersected_chunks(data, 0, chunk_size)
        if self._detector.is_detection_intersected():
            chunks = get_intersected_chunks(data, chunk_intersection, chunk_size)
        else:
            chunks = get_chunks(data, chunk_size)

        for chunk in chunks:
            await asyncio.sleep(0)
            chunk_dataframe = prepare_data(chunk)
            detected: DetectionResult = self._detector.detect(chunk_dataframe, cache)
            detections.append(detected)

        if len(detections) == 0:
            raise RuntimeError(f'do_detect for {self.analytic_unit_id} got empty detection results')

        detection_result = self._detector.concat_detection_results(detections)
        return detection_result.to_json()

    def cancel(self):
        if self._training_future is not None:
            self._training_future.cancel()

    async def consume_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> Optional[dict]:
        window_size = self._detector.get_window_size(cache)

        detections: List[DetectionResult] = []

        for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR):
            await asyncio.sleep(0)
            chunk_dataframe = prepare_data(chunk)
            detected = self._detector.consume_data(chunk_dataframe, cache)
            if detected is not None:
                detections.append(detected)

        if len(detections) == 0:
            return None
        else:
            detection_result = self._detector.concat_detection_results(detections)
            return detection_result.to_json()

    async def process_data(self, data: TimeSeries, cache: ModelCache) -> dict:
        assert isinstance(self._detector, detectors.ProcessingDetector), \
            f'{self.analytic_unit_id} detector is not ProcessingDetector, can`t process data'
        assert cache is not None, f'{self.analytic_unit_id} got empty cache for processing data'

        processed_chunks = []
        window_size = self._detector.get_window_size(cache)
        for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR):
            await asyncio.sleep(0)
            chunk_dataframe = prepare_data(chunk)
            processed = self._detector.process_data(chunk_dataframe, cache)
            if processed is not None:
                processed_chunks.append(processed)

        if len(processed_chunks) == 0:
            raise RuntimeError(f'process_data for {self.analytic_unit_id} got empty processing results')

        # TODO: maybe we should process all chunks inside of detector?
        result = self._detector.concat_processing_results(processed_chunks)
        return result.to_json()
Analytics server messaging #24 v2 (#49) * add zmq to deps * basic zmq usage & build system fxs * continue zmq integration & refactorings * server.py + logging * some commit * ping-pong server-analytics & pair type * packing zmq.node for production 6 years ago			`import config`
data_service, renamings and detectors imports 6 years ago			`import detectors`
Add src 6 years ago			`import logging`
Fix prediction (#118) 6 years ago			`import pandas as pd`
Endpoint for smoothing data #612 (#639) 5 years ago			`from typing import Optional, Union, Generator, List, Tuple`
asyncio.wait_for training_future 5 years ago			`import concurrent.futures`
Workers for analyticunits #203 (#265) * rm async from analytic_unit_worker + some refactorings in maager * AnalyticUnitManager * workers for analytic units 6 years ago			`import asyncio`
Segment intersection in anomaly detector #615 (#616) 5 years ago			`import utils`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`from utils import get_intersected_chunks, get_chunks, prepare_data`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Merge threshold segments #624 (#646) 5 years ago			`from analytic_types import ModelCache, TimeSeries`
Rename modules and types (#860) 4 years ago			`from analytic_types.detector import DetectionResult`
asyncio.wait_for training_future 5 years ago
analytics: detector class + more types + remove Model.(save/load) 6 years ago			`logger = logging.getLogger('AnalyticUnitWorker')`
Analytics server messaging #24 v2 (#49) * add zmq to deps * basic zmq usage & build system fxs * continue zmq integration & refactorings * server.py + logging * some commit * ping-pong server-analytics & pair type * packing zmq.node for production 6 years ago
Add src 6 years ago
analytics: detector class + more types + remove Model.(save/load) 6 years ago			`class AnalyticUnitWorker:`

Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`CHUNK_WINDOW_SIZE_FACTOR = 100`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`CHUNK_INTERSECTION_FACTOR = 2`

			`assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \`
			`'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR'`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
asyncio.wait_for training_future 5 years ago			`def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor):`
One panel - one worker #62 6 years ago			`self.analytic_unit_id = analytic_unit_id`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`self._detector = detector`
asyncio.wait_for training_future 5 years ago			`self._executor: concurrent.futures.Executor = executor`
Timeout for learning #481 (#485) 5 years ago			`self._training_future: asyncio.Future = None`
Add src 6 years ago
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`async def do_train(`
Change mutable default values to none #638 (#682) * add types, remove mutables from defaults * fix debug logging 5 years ago			`self, payload: Union[list, dict], data: TimeSeries, cache: Optional[ModelCache]`
optional type in do_train 5 years ago			`) -> Optional[ModelCache]:`
Non intersected chunks for consuming data #529 (#530) 5 years ago
			`dataframe = prepare_data(data)`

asyncio.wait_for training_future 5 years ago			`cfuture: concurrent.futures.Future = self._executor.submit(`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`self._detector.train, dataframe, payload, cache`
Workers for analyticunits #203 (#265) * rm async from analytic_unit_worker + some refactorings in maager * AnalyticUnitManager * workers for analytic units 6 years ago			`)`
asyncio.wait_for training_future 5 years ago			`self._training_future = asyncio.wrap_future(cfuture)`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`try:`
asyncio.wait_for training_future 5 years ago			`new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT)`
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`return new_cache`
asyncio.wait_for training_future 5 years ago			`except asyncio.CancelledError:`
			`return None`
			`except asyncio.TimeoutError:`
Timeout for learning #481 (#485) 5 years ago			`raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT))`
trim trailing whitespaces 6 years ago
Make class for detection result (#634) 5 years ago			`async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult:`
Non intersected chunks for consuming data #529 (#530) 5 years ago
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`window_size = self._detector.get_window_size(cache)`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR`
			`chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Endpoint for smoothing data #612 (#639) 5 years ago			`detections: List[DetectionResult] = []`
			`chunks = []`
			`# XXX: get_chunks(data, chunk_size) == get_intersected_chunks(data, 0, chunk_size)`
			`if self._detector.is_detection_intersected():`
			`chunks = get_intersected_chunks(data, chunk_intersection, chunk_size)`
			`else:`
			`chunks = get_chunks(data, chunk_size)`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Endpoint for smoothing data #612 (#639) 5 years ago			`for chunk in chunks:`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`await asyncio.sleep(0)`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`chunk_dataframe = prepare_data(chunk)`
Endpoint for smoothing data #612 (#639) 5 years ago			`detected: DetectionResult = self._detector.detect(chunk_dataframe, cache)`
			`detections.append(detected)`

			`if len(detections) == 0:`
			`raise RuntimeError(f'do_detect for {self.analytic_unit_id} got empty detection results')`

Anomaly detector webhooks fix (#670) 5 years ago			`detection_result = self._detector.concat_detection_results(detections)`
Make class for detection result (#634) 5 years ago			`return detection_result.to_json()`
trim trailing whitespaces 6 years ago
Cancel learning on analytic unit deletion #266 (#269) * basic cancelation in analytics * cancelation task on node * basic cancelation in analytics 6 years ago			`def cancel(self):`
Timeout for learning #481 (#485) 5 years ago			`if self._training_future is not None:`
			`self._training_future.cancel()`
Analytic unit worker bucket #273 (#297) 6 years ago
Merge threshold segments #624 (#646) 5 years ago			`async def consume_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> Optional[dict]:`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`window_size = self._detector.get_window_size(cache)`

Endpoint for smoothing data #612 (#639) 5 years ago			`detections: List[DetectionResult] = []`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Non intersected chunks for consuming data #529 (#530) 5 years ago			`for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR):`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago			`await asyncio.sleep(0)`
Non intersected chunks for consuming data #529 (#530) 5 years ago			`chunk_dataframe = prepare_data(chunk)`
			`detected = self._detector.consume_data(chunk_dataframe, cache)`
Endpoint for smoothing data #612 (#639) 5 years ago			`if detected is not None:`
			`detections.append(detected)`
Revert "Merge branch 'concatinate-chunks-for-anomaly-detector-#614'" This reverts commit c6eb1bd4d2e22dc47f080c2667daeba0968b46b4, reversing changes made to 74d45bf4f4b81a68ac861c37f3078c021e9b171c. 5 years ago
Endpoint for smoothing data #612 (#639) 5 years ago			`if len(detections) == 0:`
Dataframe for detection less than two window size (#532) 2*WINDOW_SIZE checks 5 years ago			`return None`
			`else:`
Anomaly detector webhooks fix (#670) 5 years ago			`detection_result = self._detector.concat_detection_results(detections)`
Make class for detection result (#634) 5 years ago			`return detection_result.to_json()`
Send data to detection in chunks #489 (#503) * Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc) 5 years ago
Change mutable default values to none #638 (#682) * add types, remove mutables from defaults * fix debug logging 5 years ago			`async def process_data(self, data: TimeSeries, cache: ModelCache) -> dict:`
Anomaly analytic unit: send confidence bounds instead of smoothed data #656 (#657) 5 years ago			`assert isinstance(self._detector, detectors.ProcessingDetector), \`
			f'{self.analytic_unit_id} detector is not ProcessingDetector, can`t process data'
Endpoint for smoothing data #612 (#639) 5 years ago			`assert cache is not None, f'{self.analytic_unit_id} got empty cache for processing data'`

Anomaly analytic unit: send confidence bounds instead of smoothed data #656 (#657) 5 years ago			`processed_chunks = []`
Endpoint for smoothing data #612 (#639) 5 years ago			`window_size = self._detector.get_window_size(cache)`
			`for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR):`
			`await asyncio.sleep(0)`
			`chunk_dataframe = prepare_data(chunk)`
Anomaly analytic unit: send confidence bounds instead of smoothed data #656 (#657) 5 years ago			`processed = self._detector.process_data(chunk_dataframe, cache)`
Endpoint for smoothing data #612 (#639) 5 years ago			`if processed is not None:`
			`processed_chunks.append(processed)`

			`if len(processed_chunks) == 0:`
			`raise RuntimeError(f'process_data for {self.analytic_unit_id} got empty processing results')`

Anomaly analytic unit: send confidence bounds instead of smoothed data #656 (#657) 5 years ago			`# TODO: maybe we should process all chunks inside of detector?`
Endpoint for smoothing data #612 (#639) 5 years ago			`result = self._detector.concat_processing_results(processed_chunks)`
			`return result.to_json()`