diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f094b78 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "analytics"] + path = analytics + url = https://github.com/hastic/analytics diff --git a/analytics b/analytics new file mode 160000 index 0000000..8734258 --- /dev/null +++ b/analytics @@ -0,0 +1 @@ +Subproject commit 8734258c84f3278bbc14508e1222c73dda5f90cd diff --git a/analytics/.dockerignore b/analytics/.dockerignore deleted file mode 100644 index f53d18e..0000000 --- a/analytics/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -__pycache__ -.vscode diff --git a/analytics/.gitignore b/analytics/.gitignore deleted file mode 100644 index ade4385..0000000 --- a/analytics/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -build/ -dist/ -*.spec -__pycache__/ -test/ \ No newline at end of file diff --git a/analytics/.vscode/.env b/analytics/.vscode/.env deleted file mode 100644 index 506628b..0000000 --- a/analytics/.vscode/.env +++ /dev/null @@ -1 +0,0 @@ -PYTHONPATH=analytics diff --git a/analytics/.vscode/launch.json b/analytics/.vscode/launch.json deleted file mode 100644 index 065a4d4..0000000 --- a/analytics/.vscode/launch.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Attach (Remote Debug)", - "type": "python", - "request": "attach", - "port": 5679, - "host": "localhost", - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "/var/www/analytics" - } - ] - }, - { - "name": "Python: Current File", - "type": "python", - "request": "launch", - "windows": { - "program": "${workspaceFolder}\\bin\\server" - }, - "linux": { - "program": "${workspaceFolder}/bin/server" - } - } - ] -} diff --git a/analytics/.vscode/settings.json b/analytics/.vscode/settings.json deleted file mode 100644 index c9adcc0..0000000 --- a/analytics/.vscode/settings.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "terminal.integrated.shell.windows": "C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", - "editor.insertSpaces": true, - "files.eol": "\n", - "files.exclude": { - "**/__pycache__/": true, - "dist": true, - "build": true - }, - "[python]": { - "editor.tabSize": 4, - }, - "python.envFile": "${workspaceFolder}/.vscode/.env", - "python.pythonPath": "python", - "python.linting.enabled": true, - "python.testing.unittestArgs": [ "-v" ], - "python.testing.pytestEnabled": false, - "python.testing.nosetestsEnabled": false, - "python.testing.unittestEnabled": true, - "python.linting.pylintEnabled": true, - "python.jediEnabled": false -} diff --git a/analytics/Codestyle.md b/analytics/Codestyle.md deleted file mode 100644 index cf0e4fb..0000000 --- a/analytics/Codestyle.md +++ /dev/null @@ -1,27 +0,0 @@ -# Type hints - -Please use: https://www.python.org/dev/peps/pep-0484/ - -# Line endings - -We use LF everywhere - -# Imports - -You import local files first, than spesific liba and then standart libs. -So you import from something very scecific to something very common. -It allows you to pay attention on most important things from beginning. - -``` - -from data_provider import DataProvider -from anomaly_model import AnomalyModel -from pattern_detection_model import PatternDetectionModel - -import numpy as np - -from scipy.signal import argrelextrema - -import pickle - -``` \ No newline at end of file diff --git a/analytics/Dockerfile b/analytics/Dockerfile deleted file mode 100644 index 79d1265..0000000 --- a/analytics/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM python:3.6.6 - -COPY requirements.txt /requirements.txt - -RUN pip install -r /requirements.txt - -WORKDIR /var/www/analytics - -COPY . /var/www/analytics/ - - -CMD ["python", "-u", "bin/server"] diff --git a/analytics/README.md b/analytics/README.md deleted file mode 100644 index a966db5..0000000 --- a/analytics/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Hastic-server-analytics - -Python service which gets tasks from [hastic-server-node](https://github.com/hastic/hastic-server/tree/master/server) like - -* trains statistical models -* detect patterns in time series data - -## Arhitecture - -The service uses [asyncio](https://docs.python.org/3/library/asyncio.html), -[concurrency](https://docs.python.org/3.6/library/concurrent.futures.html#module-concurrent.futures) and -[pyzmq](https://pyzmq.readthedocs.io/en/latest/). diff --git a/analytics/analytics/analytic_types/__init__.py b/analytics/analytics/analytic_types/__init__.py deleted file mode 100644 index 17b89cd..0000000 --- a/analytics/analytics/analytic_types/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -It is the place where we put all classes and types -common for all analytics code - -For example, if you write someting which is used -in analytic_unit_manager, it should be here. - -If you create something spicific which is used only in one place, -like PatternDetectionCache, then it should not be here. -""" - -import pandas as pd -from typing import Union, List, Tuple - -AnalyticUnitId = str - -ModelCache = dict - -# TODO: explicit timestamp / value -TimeSeries = List[Tuple[int, float]] - -""" -Example: - -tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) -ts = TimeSeries([4, 5, 6], tsis) -""" -Timestamp = Union[str, pd.Timestamp] - -class TimeSeriesIndex(pd.DatetimeIndex): - def __new__(cls, *args, **kwargs): - return pd.DatetimeIndex.__new__(cls, *args, **kwargs) - -# TODO: make generic type for values. See List definition for example of generic class -# TODO: constructor from DataFrame -# TODO: repleace TimeSeries (above) with this class: rename TimeSeries2 to TimeSeries -class TimeSeries2(pd.Series): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) diff --git a/analytics/analytics/analytic_types/cache.py b/analytics/analytics/analytic_types/cache.py deleted file mode 100644 index a21dc11..0000000 --- a/analytics/analytics/analytic_types/cache.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional, List, Dict - -from analytic_types.segment import AnomalyDetectorSegment -from analytic_types.detector import Bound - -from utils.meta import JSONClass, SerializableList - -@JSONClass -class AnomalyCache: - def __init__( - self, - alpha: float, - confidence: float, - enable_bounds: str, - seasonality: Optional[int] = None, - segments: Optional[List[Dict]] = None, - time_step: Optional[int] = None, - ): - self.alpha = alpha - self.confidence = confidence - self.enable_bounds = enable_bounds - if seasonality != None and seasonality < 0: - raise ValueError(f'Can`t create AnomalyCache: got invalid seasonality {seasonality}') - self.seasonality = seasonality - self.time_step = time_step - if segments != None: - anomaly_segments = map(AnomalyDetectorSegment.from_json, segments) - self.segments = SerializableList(anomaly_segments) - else: - self.segments = [] - - def set_segments(self, segments: List[AnomalyDetectorSegment]): - if len(segments) > 0: - self.segments = SerializableList(segments) - - def get_enabled_bounds(self) -> Bound: - #TODO: use class with to_json() - return Bound(self.enable_bounds) diff --git a/analytics/analytics/analytic_types/data_bucket.py b/analytics/analytics/analytic_types/data_bucket.py deleted file mode 100644 index 5eb3809..0000000 --- a/analytics/analytics/analytic_types/data_bucket.py +++ /dev/null @@ -1,14 +0,0 @@ -import pandas as pd - - -class DataBucket: - - def __init__(self): - self.data = pd.DataFrame([], columns=['timestamp', 'value']) - - def receive_data(self, data: pd.DataFrame): - self.data = self.data.append(data, ignore_index=True) - - def drop_data(self, count: int): - if count > 0: - self.data = self.data.iloc[count:] diff --git a/analytics/analytics/analytic_types/detector.py b/analytics/analytics/analytic_types/detector.py deleted file mode 100644 index 87585cc..0000000 --- a/analytics/analytics/analytic_types/detector.py +++ /dev/null @@ -1,47 +0,0 @@ -from analytic_types import ModelCache, TimeSeries -from analytic_types.segment import Segment - -from enum import Enum -from typing import List, Optional, Tuple - -import utils.meta - -class Bound(Enum): - ALL = 'ALL' - UPPER = 'UPPER' - LOWER = 'LOWER' - -class DetectionResult: - - def __init__( - self, - cache: Optional[ModelCache] = None, - segments: Optional[List[Segment]] = None, - last_detection_time: int = None - ): - if cache is None: - cache = {} - if segments is None: - segments = [] - self.cache = cache - self.segments = segments - self.last_detection_time = last_detection_time - - # TODO: use @utils.meta.JSONClass (now it can't serialize list of objects) - def to_json(self): - return { - 'cache': self.cache, - 'segments': list(map(lambda segment: segment.to_json(), self.segments)), - 'lastDetectionTime': self.last_detection_time - } - -@utils.meta.JSONClass -class ProcessingResult(): - - def __init__( - self, - lower_bound: Optional[TimeSeries] = None, - upper_bound: Optional[TimeSeries] = None, - ): - self.lower_bound = lower_bound - self.upper_bound = upper_bound diff --git a/analytics/analytics/analytic_types/learning_info.py b/analytics/analytics/analytic_types/learning_info.py deleted file mode 100644 index 1f499b8..0000000 --- a/analytics/analytics/analytic_types/learning_info.py +++ /dev/null @@ -1,17 +0,0 @@ -import utils.meta - -@utils.meta.JSONClass -class LearningInfo: - - def __init__(self): - super().__init__() - self.confidence = [] - self.patterns_list = [] - self.pattern_width = [] - self.pattern_height = [] - self.pattern_timestamp = [] - self.segment_center_list = [] - self.patterns_value = [] - - def __str__(self): - return str(self.to_json()) \ No newline at end of file diff --git a/analytics/analytics/analytic_types/segment.py b/analytics/analytics/analytic_types/segment.py deleted file mode 100644 index 8c45427..0000000 --- a/analytics/analytics/analytic_types/segment.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import Optional - -import utils.meta - -@utils.meta.JSONClass -class Segment: - ''' - Used for segment manipulation instead of { 'from': ..., 'to': ... } dict - ''' - - def __init__( - self, - from_timestamp: int, - to_timestamp: int, - _id: Optional[str] = None, - analytic_unit_id: Optional[str] = None, - labeled: Optional[bool] = None, - deleted: Optional[bool] = None, - message: Optional[str] = None - ): - if to_timestamp < from_timestamp: - raise ValueError(f'Can`t create segment with to < from: {to_timestamp} < {from_timestamp}') - self.from_timestamp = from_timestamp - self.to_timestamp = to_timestamp - self._id = _id - self.analytic_unit_id = analytic_unit_id - self.labeled = labeled - self.deleted = deleted - self.message = message - -@utils.meta.JSONClass -class AnomalyDetectorSegment(Segment): - ''' - Used for segment manipulation instead of { 'from': ..., 'to': ..., 'data': ... } dict - ''' - - def __init__( - self, - from_timestamp: int, - to_timestamp: int, - data = [], - _id: Optional[str] = None, - analytic_unit_id: Optional[str] = None, - labeled: Optional[bool] = None, - deleted: Optional[bool] = None, - message: Optional[str] = None - ): - super().__init__( - from_timestamp, - to_timestamp, - _id, - analytic_unit_id, - labeled, - deleted, - message - ) - self.data = data diff --git a/analytics/analytics/analytic_unit_manager.py b/analytics/analytics/analytic_unit_manager.py deleted file mode 100644 index e99fd36..0000000 --- a/analytics/analytics/analytic_unit_manager.py +++ /dev/null @@ -1,103 +0,0 @@ -from typing import Dict -import logging as log -import traceback -from concurrent.futures import Executor, ThreadPoolExecutor - -from analytic_unit_worker import AnalyticUnitWorker -from analytic_types import AnalyticUnitId, ModelCache -from analytic_types.segment import Segment -import detectors - - -logger = log.getLogger('AnalyticUnitManager') - - -def get_detector_by_type( - detector_type: str, analytic_unit_type: str, analytic_unit_id: AnalyticUnitId -) -> detectors.Detector: - if detector_type == 'pattern': - return detectors.PatternDetector(analytic_unit_type, analytic_unit_id) - elif detector_type == 'threshold': - return detectors.ThresholdDetector(analytic_unit_id) - elif detector_type == 'anomaly': - return detectors.AnomalyDetector(analytic_unit_id) - - raise ValueError('Unknown detector type "%s"' % detector_type) - - -class AnalyticUnitManager: - - def __init__(self): - self.analytic_workers: Dict[AnalyticUnitId, AnalyticUnitWorker] = dict() - self.workers_executor = ThreadPoolExecutor() - - def __ensure_worker( - self, - analytic_unit_id: AnalyticUnitId, - detector_type: str, - analytic_unit_type: str - ) -> AnalyticUnitWorker: - if analytic_unit_id in self.analytic_workers: - # TODO: check that type is the same - return self.analytic_workers[analytic_unit_id] - detector = get_detector_by_type(detector_type, analytic_unit_type, analytic_unit_id) - worker = AnalyticUnitWorker(analytic_unit_id, detector, self.workers_executor) - self.analytic_workers[analytic_unit_id] = worker - return worker - - async def __handle_analytic_task(self, task: object) -> dict: - """ - returns payload or None - """ - analytic_unit_id: AnalyticUnitId = task['analyticUnitId'] - log.debug('Analytics get task with type: {} for unit: {}'.format(task['type'], analytic_unit_id)) - if task['type'] == 'CANCEL': - if analytic_unit_id in self.analytic_workers: - self.analytic_workers[analytic_unit_id].cancel() - return - - payload = task['payload'] - worker = self.__ensure_worker(analytic_unit_id, payload['detector'], payload['analyticUnitType']) - data = payload.get('data') - if task['type'] == 'PUSH': - # TODO: do it a better way - res = await worker.consume_data(data, payload['cache']) - if res: - res.update({ 'analyticUnitId': analytic_unit_id }) - return res - elif task['type'] == 'LEARN': - if 'segments' in payload: - segments = payload['segments'] - segments = [Segment.from_json(segment) for segment in segments] - return await worker.do_train(segments, data, payload['cache']) - elif 'threshold' in payload: - return await worker.do_train(payload['threshold'], data, payload['cache']) - elif 'anomaly' in payload: - return await worker.do_train(payload['anomaly'], data, payload['cache']) - else: - raise ValueError('No segments or threshold in LEARN payload') - elif task['type'] == 'DETECT': - return await worker.do_detect(data, payload['cache']) - elif task['type'] == 'PROCESS': - return await worker.process_data(data, payload['cache']) - - raise ValueError('Unknown task type "%s"' % task['type']) - - async def handle_analytic_task(self, task: object): - try: - log.debug('Start handle_analytic_task with analytic unit: {}'.format(task['analyticUnitId'])) - result_payload = await self.__handle_analytic_task(task) - result_message = { - 'status': 'SUCCESS', - 'payload': result_payload - } - log.debug('End correctly handle_analytic_task with anatytic unit: {}'.format(task['analyticUnitId'])) - return result_message - except Exception as e: - error_text = traceback.format_exc() - logger.error("handle_analytic_task Exception: '%s'" % error_text) - # TODO: move result to a class which renders to json for messaging to analytics - return { - 'status': 'FAILED', - 'error': repr(e) - } diff --git a/analytics/analytics/analytic_unit_worker.py b/analytics/analytics/analytic_unit_worker.py deleted file mode 100644 index ad8b00f..0000000 --- a/analytics/analytics/analytic_unit_worker.py +++ /dev/null @@ -1,116 +0,0 @@ -import config -import detectors -import logging -import pandas as pd -from typing import Optional, Union, Generator, List, Tuple -import concurrent.futures -import asyncio -import utils -from utils import get_intersected_chunks, get_chunks, prepare_data - -from analytic_types import ModelCache, TimeSeries -from analytic_types.detector import DetectionResult - -logger = logging.getLogger('AnalyticUnitWorker') - - -class AnalyticUnitWorker: - - CHUNK_WINDOW_SIZE_FACTOR = 100 - CHUNK_INTERSECTION_FACTOR = 2 - - assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \ - 'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR' - - def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor): - self.analytic_unit_id = analytic_unit_id - self._detector = detector - self._executor: concurrent.futures.Executor = executor - self._training_future: asyncio.Future = None - - async def do_train( - self, payload: Union[list, dict], data: TimeSeries, cache: Optional[ModelCache] - ) -> Optional[ModelCache]: - - dataframe = prepare_data(data) - - cfuture: concurrent.futures.Future = self._executor.submit( - self._detector.train, dataframe, payload, cache - ) - self._training_future = asyncio.wrap_future(cfuture) - try: - new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT) - return new_cache - except asyncio.CancelledError: - return None - except asyncio.TimeoutError: - raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT)) - - async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: - - window_size = self._detector.get_window_size(cache) - chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR - chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR - - detections: List[DetectionResult] = [] - chunks = [] - # XXX: get_chunks(data, chunk_size) == get_intersected_chunks(data, 0, chunk_size) - if self._detector.is_detection_intersected(): - chunks = get_intersected_chunks(data, chunk_intersection, chunk_size) - else: - chunks = get_chunks(data, chunk_size) - - for chunk in chunks: - await asyncio.sleep(0) - chunk_dataframe = prepare_data(chunk) - detected: DetectionResult = self._detector.detect(chunk_dataframe, cache) - detections.append(detected) - - if len(detections) == 0: - raise RuntimeError(f'do_detect for {self.analytic_unit_id} got empty detection results') - - detection_result = self._detector.concat_detection_results(detections) - return detection_result.to_json() - - def cancel(self): - if self._training_future is not None: - self._training_future.cancel() - - async def consume_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> Optional[dict]: - window_size = self._detector.get_window_size(cache) - - detections: List[DetectionResult] = [] - - for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR): - await asyncio.sleep(0) - chunk_dataframe = prepare_data(chunk) - detected = self._detector.consume_data(chunk_dataframe, cache) - if detected is not None: - detections.append(detected) - - if len(detections) == 0: - return None - else: - detection_result = self._detector.concat_detection_results(detections) - return detection_result.to_json() - - async def process_data(self, data: TimeSeries, cache: ModelCache) -> dict: - assert isinstance(self._detector, detectors.ProcessingDetector), \ - f'{self.analytic_unit_id} detector is not ProcessingDetector, can`t process data' - assert cache is not None, f'{self.analytic_unit_id} got empty cache for processing data' - - processed_chunks = [] - window_size = self._detector.get_window_size(cache) - for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR): - await asyncio.sleep(0) - chunk_dataframe = prepare_data(chunk) - processed = self._detector.process_data(chunk_dataframe, cache) - if processed is not None: - processed_chunks.append(processed) - - if len(processed_chunks) == 0: - raise RuntimeError(f'process_data for {self.analytic_unit_id} got empty processing results') - - # TODO: maybe we should process all chunks inside of detector? - result = self._detector.concat_processing_results(processed_chunks) - return result.to_json() diff --git a/analytics/analytics/config.py b/analytics/analytics/config.py deleted file mode 100644 index a833df9..0000000 --- a/analytics/analytics/config.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -import json - - -PARENT_FOLDER = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -CONFIG_FILE = os.path.join(PARENT_FOLDER, 'config.json') - - -config_exists = os.path.isfile(CONFIG_FILE) -if config_exists: - with open(CONFIG_FILE) as f: - config = json.load(f) -else: - print('Config file %s doesn`t exist, using defaults' % CONFIG_FILE) - - -def get_config_field(field: str, default_val = None): - if field in os.environ: - return os.environ[field] - - if config_exists and field in config and config[field] != '': - return config[field] - - if default_val is not None: - return default_val - - raise Exception('Please configure {}'.format(field)) - -HASTIC_SERVER_URL = get_config_field('HASTIC_SERVER_URL', 'ws://localhost:8002') -LEARNING_TIMEOUT = get_config_field('LEARNING_TIMEOUT', 120) diff --git a/analytics/analytics/detectors/__init__.py b/analytics/analytics/detectors/__init__.py deleted file mode 100644 index 370f0f2..0000000 --- a/analytics/analytics/detectors/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from detectors.detector import Detector, ProcessingDetector -from detectors.pattern_detector import PatternDetector -from detectors.threshold_detector import ThresholdDetector -from detectors.anomaly_detector import AnomalyDetector diff --git a/analytics/analytics/detectors/anomaly_detector.py b/analytics/analytics/detectors/anomaly_detector.py deleted file mode 100644 index 7885d01..0000000 --- a/analytics/analytics/detectors/anomaly_detector.py +++ /dev/null @@ -1,277 +0,0 @@ -from enum import Enum -import logging -import numpy as np -import pandas as pd -import math -from typing import Optional, Union, List, Tuple, Generator -import operator - -from analytic_types import AnalyticUnitId, ModelCache -from analytic_types.detector import DetectionResult, ProcessingResult, Bound -from analytic_types.data_bucket import DataBucket -from analytic_types.segment import Segment, AnomalyDetectorSegment -from analytic_types.cache import AnomalyCache -from detectors import Detector, ProcessingDetector -import utils - -MAX_DEPENDENCY_LEVEL = 100 -MIN_DEPENDENCY_FACTOR = 0.1 -BASIC_ALPHA = 0.5 -logger = logging.getLogger('ANOMALY_DETECTOR') - - -class AnomalyDetector(ProcessingDetector): - - def __init__(self, analytic_unit_id: AnalyticUnitId): - super().__init__(analytic_unit_id) - self.bucket = DataBucket() - - def train(self, dataframe: pd.DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache: - cache = AnomalyCache.from_json(payload) - cache.time_step = utils.find_interval(dataframe) - segments = cache.segments - - if len(segments) > 0: - seasonality = cache.seasonality - prepared_segments = [] - - for segment in segments: - segment_len = (int(segment.to_timestamp) - int(segment.from_timestamp)) - assert segment_len <= seasonality, \ - f'seasonality {seasonality} must be greater than segment length {segment_len}' - - from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment.from_timestamp, unit='ms')) - to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment.to_timestamp, unit='ms')) - segment_data = dataframe[from_index : to_index] - prepared_segments.append( - AnomalyDetectorSegment( - segment.from_timestamp, - segment.to_timestamp, - segment_data.value.tolist() - ) - ) - cache.set_segments(prepared_segments) - - return { - 'cache': cache.to_json() - } - - # TODO: ModelCache -> DetectorState - def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: - if cache == None: - raise f'Analytic unit {self.analytic_unit_id} got empty cache' - data = dataframe['value'] - - cache = AnomalyCache.from_json(cache) - segments = cache.segments - enabled_bounds = cache.get_enabled_bounds() - - smoothed_data = utils.exponential_smoothing(data, cache.alpha) - - lower_bound = smoothed_data - cache.confidence - upper_bound = smoothed_data + cache.confidence - - if len(segments) > 0: - data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0]) - - for segment in segments: - seasonality_index = cache.seasonality // cache.time_step - seasonality_offset = self.get_seasonality_offset( - segment.from_timestamp, - cache.seasonality, - data_start_time, - cache.time_step - ) - segment_data = pd.Series(segment.data) - - lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) - upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) - - detected_segments = list(self.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds)) - - last_dataframe_time = dataframe.iloc[-1]['timestamp'] - last_detection_time = utils.convert_pd_timestamp_to_ms(last_dataframe_time) - - return DetectionResult(cache.to_json(), detected_segments, last_detection_time) - - def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: - if cache is None: - msg = f'consume_data got invalid cache {cache} for task {self.analytic_unit_id}' - logging.debug(msg) - raise ValueError(msg) - - data_without_nan = data.dropna() - - if len(data_without_nan) == 0: - return None - - self.bucket.receive_data(data_without_nan) - - if len(self.bucket.data) >= self.get_window_size(cache): - return self.detect(self.bucket.data, cache) - - return None - - def is_detection_intersected(self) -> bool: - return False - - def get_window_size(self, cache: Optional[ModelCache]) -> int: - ''' - get the number of values that will affect the next value - ''' - - if cache is None: - raise ValueError('anomaly detector got None cache') - cache = AnomalyCache.from_json(cache) - - for level in range(1, MAX_DEPENDENCY_LEVEL): - if (1 - cache.alpha) ** level < MIN_DEPENDENCY_FACTOR: - break - - seasonality = 0 - if len(cache.segments) > 0: - seasonality = cache.seasonality // cache.time_step - return max(level, seasonality) - - def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: - result = DetectionResult() - time_step = detections[0].cache['timeStep'] - for detection in detections: - result.segments.extend(detection.segments) - result.last_detection_time = detection.last_detection_time - result.cache = detection.cache - result.segments = utils.merge_intersecting_segments(result.segments, time_step) - return result - - # TODO: remove duplication with detect() - def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: - cache = AnomalyCache.from_json(cache) - segments = cache.segments - enabled_bounds = cache.get_enabled_bounds() - - # TODO: exponential_smoothing should return dataframe with related timestamps - smoothed_data = utils.exponential_smoothing(dataframe['value'], cache.alpha) - - lower_bound = smoothed_data - cache.confidence - upper_bound = smoothed_data + cache.confidence - - if len(segments) > 0: - data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0]) - - for segment in segments: - seasonality_index = cache.seasonality // cache.time_step - # TODO: move it to utils and add tests - seasonality_offset = self.get_seasonality_offset( - segment.from_timestamp, - cache.seasonality, - data_start_time, - cache.time_step - ) - segment_data = pd.Series(segment.data) - - lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) - upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) - - # TODO: support multiple segments - - timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) - lower_bound_timeseries = list(zip(timestamps, lower_bound.values.tolist())) - upper_bound_timeseries = list(zip(timestamps, upper_bound.values.tolist())) - - if enabled_bounds == Bound.ALL: - return ProcessingResult(lower_bound_timeseries, upper_bound_timeseries) - elif enabled_bounds == Bound.UPPER: - return ProcessingResult(upper_bound = upper_bound_timeseries) - elif enabled_bounds == Bound.LOWER: - return ProcessingResult(lower_bound = lower_bound_timeseries) - - def add_season_to_data(self, data: pd.Series, segment: pd.Series, offset: int, seasonality: int, bound_type: Bound) -> pd.Series: - #data - smoothed data to which seasonality will be added - #if addition == True -> segment is added - #if addition == False -> segment is subtracted - len_smoothed_data = len(data) - for idx, _ in enumerate(data): - if idx - offset < 0: - #TODO: add seasonality for non empty parts - continue - if (idx - offset) % seasonality == 0: - if bound_type == Bound.UPPER: - upper_segment_bound = self.get_segment_bound(segment, Bound.UPPER) - data = data.add(pd.Series(upper_segment_bound.values, index = segment.index + idx), fill_value = 0) - elif bound_type == Bound.LOWER: - lower_segment_bound = self.get_segment_bound(segment, Bound.LOWER) - data = data.add(pd.Series(lower_segment_bound.values * -1, index = segment.index + idx), fill_value = 0) - else: - raise ValueError(f'unknown bound type: {bound_type.value}') - - return data[:len_smoothed_data] - - def get_segment_bound(self, segment: pd.Series, bound: Bound) -> pd.Series: - ''' - segment is divided by the median to determine its top or bottom part - the part is smoothed and raised above the segment or put down below the segment - ''' - if len(segment) < 2: - return segment - comparison_operator = operator.gt if bound == Bound.UPPER else operator.le - segment = segment - segment.min() - segment_median = segment.median() - part = [val if comparison_operator(val, segment_median) else segment_median for val in segment.values] - part = pd.Series(part, index = segment.index) - smoothed_part = utils.exponential_smoothing(part, BASIC_ALPHA) - difference = [abs(x - y) for x, y in zip(part, smoothed_part)] - max_diff = max(difference) - bound = [val + max_diff for val in smoothed_part.values] - bound = pd.Series(bound, index = segment.index) - return bound - - def get_seasonality_offset(self, from_timestamp: int, seasonality: int, data_start_time: int, time_step: int) -> int: - season_count = math.ceil(abs(from_timestamp - data_start_time) / seasonality) - start_seasonal_segment = from_timestamp + seasonality * season_count - seasonality_time_offset = abs(start_seasonal_segment - data_start_time) % seasonality - seasonality_offset = math.ceil(seasonality_time_offset / time_step) - return seasonality_offset - - def detections_generator( - self, - dataframe: pd.DataFrame, - upper_bound: pd.DataFrame, - lower_bound: pd.DataFrame, - enabled_bounds: Bound - ) -> Generator[Segment, None, Segment]: - in_segment = False - segment_start = 0 - bound: Bound = None - for idx, val in enumerate(dataframe['value'].values): - if val > upper_bound.values[idx]: - if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL: - if not in_segment: - in_segment = True - segment_start = dataframe['timestamp'][idx] - bound = Bound.UPPER - continue - - if val < lower_bound.values[idx]: - if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL: - if not in_segment: - in_segment = True - segment_start = dataframe['timestamp'][idx] - bound = Bound.LOWER - continue - - if in_segment: - segment_end = dataframe['timestamp'][idx - 1] - yield Segment( - utils.convert_pd_timestamp_to_ms(segment_start), - utils.convert_pd_timestamp_to_ms(segment_end), - message=f'{val} out of {str(bound.value)} bound' - ) - in_segment = False - else: - if in_segment: - segment_end = dataframe['timestamp'][idx] - return Segment( - utils.convert_pd_timestamp_to_ms(segment_start), - utils.convert_pd_timestamp_to_ms(segment_end), - message=f'{val} out of {str(bound.value)} bound' - ) diff --git a/analytics/analytics/detectors/detector.py b/analytics/analytics/detectors/detector.py deleted file mode 100644 index b6fbcdf..0000000 --- a/analytics/analytics/detectors/detector.py +++ /dev/null @@ -1,80 +0,0 @@ -from abc import ABC, abstractmethod -from pandas import DataFrame -from typing import Optional, Union, List - -from analytic_types import ModelCache, TimeSeries, AnalyticUnitId -from analytic_types.detector import DetectionResult, ProcessingResult -from analytic_types.segment import Segment - - -class Detector(ABC): - - def __init__(self, analytic_unit_id: AnalyticUnitId): - self.analytic_unit_id = analytic_unit_id - - @abstractmethod - def train(self, dataframe: DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache: - """ - Should be thread-safe to other detectors' train method - """ - pass - - @abstractmethod - def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> DetectionResult: - pass - - @abstractmethod - def consume_data(self, data: DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: - pass - - @abstractmethod - def get_window_size(self, cache: Optional[ModelCache]) -> int: - pass - - def is_detection_intersected(self) -> bool: - return True - - def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: - result = DetectionResult() - for detection in detections: - result.segments.extend(detection.segments) - result.last_detection_time = detection.last_detection_time - result.cache = detection.cache - return result - - def get_value_from_cache(self, cache: ModelCache, key: str, required = False): - value = cache.get(key) - if value == None and required: - raise ValueError(f'Missing required "{key}" field in cache for analytic unit {self.analytic_unit_id}') - return value - - -class ProcessingDetector(Detector): - - @abstractmethod - def process_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> ProcessingResult: - ''' - Data processing to receive additional time series that represents detector's settings - ''' - pass - - def concat_processing_results(self, processing_results: List[ProcessingResult]) -> Optional[ProcessingResult]: - ''' - Concatenate sequential ProcessingResults that received via - splitting dataset to chunks in analytic worker - ''' - - if len(processing_results) == 0: - return None - - united_result = ProcessingResult() - for result in processing_results: - if result.lower_bound is not None: - if united_result.lower_bound is None: united_result.lower_bound = [] - united_result.lower_bound.extend(result.lower_bound) - - if result.upper_bound is not None: - if united_result.upper_bound is None: united_result.upper_bound = [] - united_result.upper_bound.extend(result.upper_bound) - - return united_result diff --git a/analytics/analytics/detectors/pattern_detector.py b/analytics/analytics/detectors/pattern_detector.py deleted file mode 100644 index 3e3a949..0000000 --- a/analytics/analytics/detectors/pattern_detector.py +++ /dev/null @@ -1,147 +0,0 @@ -import models - -import asyncio -import logging -import config - -import pandas as pd -from typing import Optional, Generator, List - -from detectors import Detector -from analytic_types.data_bucket import DataBucket -from utils import convert_pd_timestamp_to_ms -from analytic_types import AnalyticUnitId, ModelCache -from analytic_types.detector import DetectionResult -from analytic_types.segment import Segment -import utils - -logger = logging.getLogger('PATTERN_DETECTOR') - - -def resolve_model_by_pattern(pattern: str) -> models.Model: - if pattern == 'GENERAL': - return models.GeneralModel() - if pattern == 'PEAK': - return models.PeakModel() - if pattern == 'TROUGH': - return models.TroughModel() - if pattern == 'DROP': - return models.DropModel() - if pattern == 'JUMP': - return models.JumpModel() - if pattern == 'CUSTOM': - return models.CustomModel() - raise ValueError('Unknown pattern "%s"' % pattern) - - -class PatternDetector(Detector): - - MIN_BUCKET_SIZE = 150 - BUCKET_WINDOW_SIZE_FACTOR = 5 - DEFAULT_WINDOW_SIZE = 1 - - def __init__(self, pattern_type: str, analytic_unit_id: AnalyticUnitId): - super().__init__(analytic_unit_id) - self.pattern_type = pattern_type - self.model = resolve_model_by_pattern(self.pattern_type) - self.bucket = DataBucket() - - def train(self, dataframe: pd.DataFrame, segments: List[Segment], cache: Optional[ModelCache]) -> ModelCache: - # TODO: pass only part of dataframe that has segments - - if self.contains_labeled_segments(segments) == False: - msg = f'{self.analytic_unit_id} has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' - logger.error(msg) - raise ValueError(msg) - - self.model.state: models.ModelState = self.model.get_state(cache) - new_cache: models.ModelState = self.model.fit(dataframe, segments, self.analytic_unit_id) - - # time step is optional - if len(dataframe) > 1: - new_cache.time_step = utils.find_interval(dataframe) - - new_cache = new_cache.to_json() - if len(new_cache) == 0: - logging.warning('new_cache is empty with data: {}, segments: {}, cache: {}, analytic unit: {}'.format(dataframe, segments, cache, self.analytic_unit_id)) - return { - 'cache': new_cache - } - - def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: - logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe))) - # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) - - if cache is None: - msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection' - logger.error(msg) - raise ValueError(msg) - - self.model.state = self.model.get_state(cache) - window_size = self.model.state.window_size - - if window_size is None: - message = '{} got cache without window_size for detection'.format(self.analytic_unit_id) - logger.error(message) - raise ValueError(message) - - if len(dataframe) < window_size * 2: - message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points' - logger.error(message) - raise ValueError(message) - - detected = self.model.detect(dataframe, self.analytic_unit_id) - - segments = [Segment(segment[0], segment[1]) for segment in detected['segments']] - new_cache = detected['cache'].to_json() - last_dataframe_time = dataframe.iloc[-1]['timestamp'] - last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) - return DetectionResult(new_cache, segments, last_detection_time) - - def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: - logging.debug('Start consume_data for analytic unit {}'.format(self.analytic_unit_id)) - - if cache is None: - logging.debug(f'consume_data get invalid cache {cache} for task {self.analytic_unit_id}, skip') - return None - - data_without_nan = data.dropna() - - if len(data_without_nan) == 0: - return None - - self.bucket.receive_data(data_without_nan) - - # TODO: use ModelState - window_size = cache['windowSize'] - - bucket_len = len(self.bucket.data) - if bucket_len < window_size * 2: - msg = f'{self.analytic_unit_id} bucket data {bucket_len} less than two window size {window_size * 2}, skip run detection from consume_data' - logger.debug(msg) - return None - - res = self.detect(self.bucket.data, cache) - - bucket_size = max(window_size * self.BUCKET_WINDOW_SIZE_FACTOR, self.MIN_BUCKET_SIZE) - if bucket_len > bucket_size: - excess_data = bucket_len - bucket_size - self.bucket.drop_data(excess_data) - - logging.debug('End consume_data for analytic unit: {} with res: {}'.format(self.analytic_unit_id, str(res.to_json()))) - - if res: - return res - else: - return None - - def get_window_size(self, cache: Optional[ModelCache]) -> int: - if cache is None: return self.DEFAULT_WINDOW_SIZE - # TODO: windowSize -> window_size - return cache.get('windowSize', self.DEFAULT_WINDOW_SIZE) - - def contains_labeled_segments(self, segments: List[Segment]) -> bool: - for segment in segments: - if segment.labeled == True: - return True - return False diff --git a/analytics/analytics/detectors/threshold_detector.py b/analytics/analytics/detectors/threshold_detector.py deleted file mode 100644 index 385bd02..0000000 --- a/analytics/analytics/detectors/threshold_detector.py +++ /dev/null @@ -1,111 +0,0 @@ -import logging as log - -import operator -import pandas as pd -import numpy as np -from typing import Optional, List - -from analytic_types import ModelCache, AnalyticUnitId -from analytic_types.detector import DetectionResult, ProcessingResult -from analytic_types.segment import Segment -from detectors import ProcessingDetector -from time import time -import utils - - -logger = log.getLogger('THRESHOLD_DETECTOR') - - -class ThresholdDetector(ProcessingDetector): - - WINDOW_SIZE = 3 - - def __init__(self, analytic_unit_id: AnalyticUnitId): - super().__init__(analytic_unit_id) - - def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache: - time_step = utils.find_interval(dataframe) - return { - 'cache': { - 'value': threshold['value'], - 'condition': threshold['condition'], - 'timeStep': time_step - } - } - - def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult: - if cache is None or cache == {}: - raise ValueError('Threshold detector error: cannot detect before learning') - if len(dataframe) == 0: - return None - - value = cache['value'] - condition = cache['condition'] - - segments = [] - for index, row in dataframe.iterrows(): - current_value = row['value'] - current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp']) - segment = Segment(current_timestamp, current_timestamp) - # TODO: merge segments - if pd.isnull(current_value): - if condition == 'NO_DATA': - segment.message = 'NO_DATA detected' - segments.append(segment) - continue - - comparators = { - '>': operator.gt, - '<': operator.lt, - '=': operator.eq, - '>=': operator.ge, - '<=': operator.le - } - - assert condition in comparators.keys(), f'condition {condition} not allowed' - - if comparators[condition](current_value, value): - segment.message = f"{current_value} {condition} threshold's value {value}" - segments.append(segment) - - last_entry = dataframe.iloc[-1] - last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp']) - return DetectionResult(cache, segments, last_detection_time) - - - def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: - result = self.detect(data, cache) - return result if result else None - - def get_window_size(self, cache: Optional[ModelCache]) -> int: - return self.WINDOW_SIZE - - def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: - result = DetectionResult() - time_step = detections[0].cache['timeStep'] - for detection in detections: - result.segments.extend(detection.segments) - result.last_detection_time = detection.last_detection_time - result.cache = detection.cache - result.segments = utils.merge_intersecting_segments(result.segments, time_step) - return result - - def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: - data = dataframe['value'] - value = self.get_value_from_cache(cache, 'value', required = True) - condition = self.get_value_from_cache(cache, 'condition', required = True) - - if condition == 'NO_DATA': - return ProcessingResult() - - data.values[:] = value - timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) - result_series = list(zip(timestamps, data.values.tolist())) - - if condition in ['>', '>=', '=']: - return ProcessingResult(upper_bound = result_series) - - if condition in ['<', '<=']: - return ProcessingResult(lower_bound = result_series) - - raise ValueError(f'{condition} condition not supported') diff --git a/analytics/analytics/models/__init__.py b/analytics/analytics/models/__init__.py deleted file mode 100644 index 1241fec..0000000 --- a/analytics/analytics/models/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from models.model import Model, ModelState, AnalyticSegment, ModelType, ExtremumType -from models.triangle_model import TriangleModel, TriangleModelState -from models.stair_model import StairModel, StairModelState -from models.drop_model import DropModel -from models.peak_model import PeakModel -from models.jump_model import JumpModel -from models.custom_model import CustomModel -from models.trough_model import TroughModel -from models.general_model import GeneralModel, GeneralModelState diff --git a/analytics/analytics/models/custom_model.py b/analytics/analytics/models/custom_model.py deleted file mode 100644 index 37fa039..0000000 --- a/analytics/analytics/models/custom_model.py +++ /dev/null @@ -1,30 +0,0 @@ -from models import Model, AnalyticSegment, ModelState, ModelType -from analytic_types import AnalyticUnitId, ModelCache -from analytic_types.learning_info import LearningInfo -import utils - -import pandas as pd -from typing import List, Optional - - -class CustomModel(Model): - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - pass - - def do_detect(self, dataframe: pd.DataFrame) -> list: - return [] - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - pass - - def get_model_type(self) -> ModelType: - pass - - def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: - pass diff --git a/analytics/analytics/models/drop_model.py b/analytics/analytics/models/drop_model.py deleted file mode 100644 index f38db6b..0000000 --- a/analytics/analytics/models/drop_model.py +++ /dev/null @@ -1,9 +0,0 @@ -from models import StairModel, ModelType, ExtremumType - -class DropModel(StairModel): - - def get_model_type(self) -> ModelType: - return ModelType.DROP - - def get_extremum_type(self) -> ExtremumType: - return ExtremumType.MIN diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py deleted file mode 100644 index 0671502..0000000 --- a/analytics/analytics/models/general_model.py +++ /dev/null @@ -1,104 +0,0 @@ -from analytic_types import AnalyticUnitId -from models import Model, ModelState, AnalyticSegment, ModelType -from typing import Union, List, Generator -import utils -import utils.meta -import numpy as np -import pandas as pd -import scipy.signal -from scipy.fftpack import fft -from scipy.signal import argrelextrema -from scipy.stats.stats import pearsonr - -from scipy.stats import gaussian_kde -from scipy.stats import norm -import logging - -from typing import Optional, List, Tuple -import math -from analytic_types import AnalyticUnitId, TimeSeries -from analytic_types.learning_info import LearningInfo - -PEARSON_FACTOR = 0.7 - - -@utils.meta.JSONClass -class GeneralModelState(ModelState): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - -class GeneralModel(Model): - - def get_model_type(self) -> ModelType: - return ModelType.GENERAL - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - data = dataframe['value'] - segment = data[start: end] - center_ind = start + math.ceil((end - start) / 2) - return center_ind - - def get_state(self, cache: Optional[dict] = None) -> GeneralModelState: - return GeneralModelState.from_json(cache) - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - last_pattern_center = self.state.pattern_center - self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) - self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) - - del_conv_list = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - del_mid_index = segment.center_index - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted_pat = utils.get_interval(data, del_mid_index, self.state.window_size) - deleted_pat = utils.subtract_min_without_nan(deleted_pat) - del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state.pattern_model) - if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) - - self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3) - self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size) - - def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: - data = utils.cut_dataframe(dataframe) - data = data['value'] - pat_data = self.state.pattern_model - if pat_data.count(0) == len(pat_data): - raise ValueError('Labeled patterns must not be empty') - - window_size = self.state.window_size - all_corr = utils.get_correlation_gen(data, window_size, pat_data) - all_corr_peaks = utils.find_peaks(all_corr, window_size * 2) - filtered = self.__filter_detection(all_corr_peaks, data) - filtered = list(filtered) - return [(item, item + window_size * 2) for item in filtered] - - def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: - if not self.state.pattern_center: - return [] - window_size = self.state.window_size - pattern_model = self.state.pattern_model - for ind, val in segments: - watch_data = data[ind - window_size: ind + window_size + 1] - watch_data = utils.subtract_min_without_nan(watch_data) - convolve_segment = scipy.signal.fftconvolve(watch_data, pattern_model) - if len(convolve_segment) > 0: - watch_conv = max(convolve_segment) - else: - continue - if watch_conv < self.state.convolve_min * 0.8 or val < PEARSON_FACTOR: - continue - if watch_conv < self.state.conv_del_max * 1.02 and watch_conv > self.state.conv_del_min * 0.98: - continue - yield ind diff --git a/analytics/analytics/models/jump_model.py b/analytics/analytics/models/jump_model.py deleted file mode 100644 index 5195fac..0000000 --- a/analytics/analytics/models/jump_model.py +++ /dev/null @@ -1,9 +0,0 @@ -from models import StairModel, ModelType, ExtremumType - -class JumpModel(StairModel): - - def get_model_type(self) -> ModelType: - return ModelType.JUMP - - def get_extremum_type(self) -> ExtremumType: - return ExtremumType.MAX diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py deleted file mode 100644 index dba057d..0000000 --- a/analytics/analytics/models/model.py +++ /dev/null @@ -1,230 +0,0 @@ -from analytic_types import AnalyticUnitId, ModelCache, TimeSeries -from analytic_types.segment import Segment -from analytic_types.learning_info import LearningInfo - -import utils -import utils.meta - -from abc import ABC, abstractmethod -from attrdict import AttrDict -from typing import Optional, List, Tuple -import pandas as pd -import math -import logging -from enum import Enum - -class ModelType(Enum): - JUMP = 'jump' - DROP = 'drop' - PEAK = 'peak' - TROUGH = 'trough' - GENERAL = 'general' - -class ExtremumType(Enum): - MAX = 'max' - MIN = 'min' - -class AnalyticSegment(Segment): - ''' - Segment with specific analytics fields used by models: - - `labeled` / `deleted` flags - - `from` / `to` / `center` indices - - `length` - - `data` - - etc - ''' - - def __init__( - self, - from_timestamp: int, - to_timestamp: int, - _id: str, - analytic_unit_id: str, - labeled: bool, - deleted: bool, - message: str, - dataframe: pd.DataFrame, - center_finder = None - ): - super().__init__( - from_timestamp, - to_timestamp, - _id, - analytic_unit_id, - labeled, - deleted, - message - ) - - self.from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(self.from_timestamp, unit='ms')) - self.to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(self.to_timestamp, unit='ms')) - self.length = abs(self.to_index - self.from_index) - self.__percent_of_nans = 0 - - if callable(center_finder): - self.center_index = center_finder(dataframe, self.from_index, self.to_index) - self.pattern_timestamp = dataframe['timestamp'][self.center_index] - else: - self.center_index = self.from_index + math.ceil(self.length / 2) - self.pattern_timestamp = dataframe['timestamp'][self.center_index] - - assert len(dataframe['value']) >= self.to_index + 1, \ - 'segment {}-{} out of dataframe length={}'.format(self.from_index, self.to_index + 1, len(dataframe['value'])) - - self.data = dataframe['value'][self.from_index: self.to_index + 1] - - @property - def percent_of_nans(self): - if not self.__percent_of_nans: - self.__percent_of_nans = self.data.isnull().sum() / len(self.data) - return self.__percent_of_nans - - def convert_nan_to_zero(self): - nan_list = utils.find_nan_indexes(self.data) - self.data = utils.nan_to_zero(self.data, nan_list) - - -@utils.meta.JSONClass -class ModelState(): - - def __init__( - self, - time_step: int = 0, - pattern_center: List[int] = None, - pattern_model: List[float] = None, - convolve_max: float = 0, - convolve_min: float = 0, - window_size: int = 0, - conv_del_min: float = 0, - conv_del_max: float = 0 - ): - self.time_step = time_step - self.pattern_center = pattern_center if pattern_center is not None else [] - self.pattern_model = pattern_model if pattern_model is not None else [] - self.convolve_max = convolve_max - self.convolve_min = convolve_min - self.window_size = window_size - self.conv_del_min = conv_del_min - self.conv_del_max = conv_del_max - - -class Model(ABC): - - HEIGHT_ERROR = 0.1 - CONV_ERROR = 0.2 - DEL_CONV_ERROR = 0.02 - - @abstractmethod - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - pass - - @abstractmethod - def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: - pass - - @abstractmethod - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - pass - - @abstractmethod - def get_model_type(self) -> ModelType: - pass - - @abstractmethod - def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: - pass - - def fit(self, dataframe: pd.DataFrame, segments: List[Segment], id: AnalyticUnitId) -> ModelState: - logging.debug('Start method fit for analytic unit {}'.format(id)) - data = dataframe['value'] - max_length = 0 - labeled = [] - deleted = [] - for segment_map in segments: - if segment_map.labeled or segment_map.deleted: - segment = AnalyticSegment( - segment_map.from_timestamp, - segment_map.to_timestamp, - segment_map._id, - segment_map.analytic_unit_id, - segment_map.labeled, - segment_map.deleted, - segment_map.message, - dataframe, - self.find_segment_center - ) - if segment.percent_of_nans > 0.1 or len(segment.data) == 0: - logging.debug(f'segment {segment.from_index}-{segment.to_index} skip because of invalid data') - continue - if segment.percent_of_nans > 0: - segment.convert_nan_to_zero() - max_length = max(segment.length, max_length) - if segment.labeled: labeled.append(segment) - if segment.deleted: deleted.append(segment) - - assert len(labeled) > 0, f'labeled list empty, skip fitting for {id}' - - if self.state.window_size == 0: - self.state.window_size = math.ceil(max_length / 2) if max_length else 0 - learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, self.get_model_type()) - self.do_fit(dataframe, labeled, deleted, learning_info) - logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) - return self.state - - def detect(self, dataframe: pd.DataFrame, id: AnalyticUnitId) -> dict: - logging.debug('Start method detect for analytic unit {}'.format(id)) - result = self.do_detect(dataframe) - segments = [( - utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]), - utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]), - ) for x in result] - if not self.state: - logging.warning('Return empty self.state after detect') - logging.debug('Method detect complete successful for analytic unit {}'.format(id)) - return { - 'segments': segments, - 'cache': self.state, - } - - def _update_fitting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None: - state.confidence = float(min(confidences, default = 1.5)) - state.convolve_min, state.convolve_max = utils.get_min_max(convolve_list, state.window_size) - state.conv_del_min, state.conv_del_max = utils.get_min_max(del_conv_list, 0) - if height_list is not None: - state.height_min, state.height_max = utils.get_min_max(height_list, 0) - - def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: ModelType) -> dict: - logging.debug('Start parsing segments') - learning_info = LearningInfo() - data = dataframe['value'] - for segment in labeled: - confidence = utils.find_confidence(segment.data)[0] - learning_info.confidence.append(confidence) - segment_center = segment.center_index - learning_info.segment_center_list.append(segment_center) - learning_info.pattern_timestamp.append(segment.pattern_timestamp) - aligned_segment = utils.get_interval(data, segment_center, self.state.window_size) - aligned_segment = utils.subtract_min_without_nan(aligned_segment) - if len(aligned_segment) == 0: - logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format( - segment_center, self.state.window_size, len(data))) - continue - learning_info.patterns_list.append(aligned_segment) - # TODO: use Triangle/Stair types - if model == ModelType.PEAK or model == ModelType.TROUGH: - learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) - learning_info.patterns_value.append(aligned_segment.values.max()) - if model == ModelType.JUMP or model == ModelType.DROP: - pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model.value) - learning_info.pattern_height.append(pattern_height) - learning_info.pattern_width.append(pattern_length) - learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) - logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info)) - return learning_info - diff --git a/analytics/analytics/models/peak_model.py b/analytics/analytics/models/peak_model.py deleted file mode 100644 index 843f291..0000000 --- a/analytics/analytics/models/peak_model.py +++ /dev/null @@ -1,44 +0,0 @@ -from analytic_types import TimeSeries -from models import TriangleModel, ModelType -import utils - -import scipy.signal -from scipy.signal import argrelextrema -from typing import Optional, List, Tuple -import numpy as np -import pandas as pd - -class PeakModel(TriangleModel): - - def get_model_type(self) -> ModelType: - return ModelType.PEAK - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - data = dataframe['value'] - segment = data[start: end] - return segment.idxmax() - - def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: - pattern_list = [] - for val in close_patterns: - max_val = data[val[0]] - ind = val[0] - for i in val: - if data[i] > max_val: - max_val = data[i] - ind = i - pattern_list.append(ind) - return pattern_list - - def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: - return argrelextrema(data.values, np.greater)[0] - - def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: - return utils.exponential_smoothing(data + self.state.confidence, alpha) - - def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, peak_indexes: List[int]) -> List[int]: - segments = [] - for idx in peak_indexes: - if data[idx] > smoothed_data[idx]: - segments.append(idx) - return segments diff --git a/analytics/analytics/models/stair_model.py b/analytics/analytics/models/stair_model.py deleted file mode 100644 index 96549af..0000000 --- a/analytics/analytics/models/stair_model.py +++ /dev/null @@ -1,147 +0,0 @@ -from models import Model, ModelState, AnalyticSegment, ModelType - -from analytic_types import TimeSeries -from analytic_types.learning_info import LearningInfo - -from scipy.fftpack import fft -from typing import Optional, List -from enum import Enum -import scipy.signal -import utils -import utils.meta -import pandas as pd -import numpy as np -import operator - -POSITIVE_SEGMENT_MEASUREMENT_ERROR = 0.2 -NEGATIVE_SEGMENT_MEASUREMENT_ERROR = 0.02 - -@utils.meta.JSONClass -class StairModelState(ModelState): - - def __init__( - self, - confidence: float = 0, - stair_height: float = 0, - stair_length: float = 0, - **kwargs - ): - super().__init__(**kwargs) - self.confidence = confidence - self.stair_height = stair_height - self.stair_length = stair_length - - -class StairModel(Model): - - def get_state(self, cache: Optional[dict] = None) -> StairModelState: - return StairModelState.from_json(cache) - - def get_stair_indexes(self, data: pd.Series, height: float, length: int) -> List[int]: - """Get list of start stair segment indexes. - - Keyword arguments: - data -- data, that contains stair (jump or drop) segments - length -- maximum count of values in the stair - height -- the difference between stair max_line and min_line(see utils.find_parameters) - """ - indexes = [] - for i in range(len(data) - length - 1): - is_stair = self.is_stair_in_segment(data.values[i:i + length + 1], height) - if is_stair == True: - indexes.append(i) - return indexes - - def is_stair_in_segment(self, segment: np.ndarray, height: float) -> bool: - if len(segment) < 2: - return False - comparison_operator = operator.ge - if self.get_model_type() == ModelType.DROP: - comparison_operator = operator.le - height = -height - return comparison_operator(max(segment[1:]), segment[0] + height) - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - data = dataframe['value'] - segment = data[start: end] - segment_center_index = utils.find_pattern_center(segment, start, self.get_model_type().value) - return segment_center_index - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - window_size = self.state.window_size - last_pattern_center = self.state.pattern_center - self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) - self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) - height_list = learning_info.patterns_value - - del_conv_list = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - segment_cent_index = segment.center_index - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted_stair = utils.get_interval(data, segment_cent_index, window_size) - deleted_stair = utils.subtract_min_without_nan(deleted_stair) - del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) - if len(del_conv_stair) > 0: - del_conv_list.append(max(del_conv_stair)) - - self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) - self.state.stair_height = int(min(learning_info.pattern_height, default = 1)) - self.state.stair_length = int(max(learning_info.pattern_width, default = 1)) - - def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: - data = utils.cut_dataframe(dataframe) - data = data['value'] - possible_stairs = self.get_stair_indexes(data, self.state.stair_height, self.state.stair_length + 1) - result = self.__filter_detection(possible_stairs, data) - return [(val - 1, val + 1) for val in result] - - def __filter_detection(self, segments_indexes: List[int], data: list): - delete_list = [] - variance_error = self.state.window_size - close_segments = utils.close_filtering(segments_indexes, variance_error) - segments_indexes = utils.best_pattern(close_segments, data, self.get_extremum_type().value) - if len(segments_indexes) == 0 or len(self.state.pattern_center) == 0: - return [] - pattern_data = self.state.pattern_model - for segment_index in segments_indexes: - if segment_index <= self.state.window_size or segment_index >= (len(data) - self.state.window_size): - delete_list.append(segment_index) - continue - convol_data = utils.get_interval(data, segment_index, self.state.window_size) - percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if len(convol_data) == 0 or percent_of_nans > 0.5: - delete_list.append(segment_index) - continue - elif 0 < percent_of_nans <= 0.5: - nan_list = utils.find_nan_indexes(convol_data) - convol_data = utils.nan_to_zero(convol_data, nan_list) - pattern_data = utils.nan_to_zero(pattern_data, nan_list) - conv = scipy.signal.fftconvolve(convol_data, pattern_data) - if len(conv) == 0: - delete_list.append(segment_index) - continue - upper_bound = self.state.convolve_max * (1 + POSITIVE_SEGMENT_MEASUREMENT_ERROR) - lower_bound = self.state.convolve_min * (1 - POSITIVE_SEGMENT_MEASUREMENT_ERROR) - delete_up_bound = self.state.conv_del_max * (1 + NEGATIVE_SEGMENT_MEASUREMENT_ERROR) - delete_low_bound = self.state.conv_del_min * (1 - NEGATIVE_SEGMENT_MEASUREMENT_ERROR) - max_conv = max(conv) - if max_conv > upper_bound or max_conv < lower_bound: - delete_list.append(segment_index) - elif max_conv < delete_up_bound and max_conv > delete_low_bound: - delete_list.append(segment_index) - - for item in delete_list: - segments_indexes.remove(item) - segments_indexes = utils.remove_duplicates_and_sort(segments_indexes) - return segments_indexes diff --git a/analytics/analytics/models/triangle_model.py b/analytics/analytics/models/triangle_model.py deleted file mode 100644 index 5c4c017..0000000 --- a/analytics/analytics/models/triangle_model.py +++ /dev/null @@ -1,119 +0,0 @@ -from analytic_types import AnalyticUnitId, TimeSeries -from analytic_types.learning_info import LearningInfo -from models import Model, ModelState, AnalyticSegment -import utils -import utils.meta - -import scipy.signal -from scipy.fftpack import fft -from typing import Optional, List, Tuple -import numpy as np -import pandas as pd - - -EXP_SMOOTHING_FACTOR = 0.01 - - -@utils.meta.JSONClass -class TriangleModelState(ModelState): - - def __init__( - self, - confidence: float = 0, - height_max: float = 0, - height_min: float = 0, - **kwargs - ): - super().__init__(**kwargs) - self.confidence = confidence - self.height_max = height_max - self.height_min = height_min - -class TriangleModel(Model): - - def get_state(self, cache: Optional[dict] = None) -> TriangleModelState: - return TriangleModelState.from_json(cache) - - def do_fit( - self, - dataframe: pd.DataFrame, - labeled_segments: List[AnalyticSegment], - deleted_segments: List[AnalyticSegment], - learning_info: LearningInfo - ) -> None: - data = utils.cut_dataframe(dataframe) - data = data['value'] - self.state.pattern_center = utils.remove_duplicates_and_sort(self.state.pattern_center + learning_info.segment_center_list) - self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) - convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) - correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) - height_list = learning_info.patterns_value - - del_conv_list = [] - delete_pattern_width = [] - delete_pattern_height = [] - delete_pattern_timestamp = [] - for segment in deleted_segments: - delete_pattern_timestamp.append(segment.pattern_timestamp) - deleted = utils.get_interval(data, segment.center_index, self.state.window_size) - deleted = utils.subtract_min_without_nan(deleted) - del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) - if len(del_conv): - del_conv_list.append(max(del_conv)) - delete_pattern_height.append(utils.find_confidence(deleted)[1]) - - self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) - - def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: - data = utils.cut_dataframe(dataframe) - data = data['value'] - - all_extremum_indexes = self.get_extremum_indexes(data) - smoothed_data = self.get_smoothed_data(data, self.state.confidence, EXP_SMOOTHING_FACTOR) - segments = self.get_possible_segments(data, smoothed_data, all_extremum_indexes) - result = self.__filter_detection(segments, data) - result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence) - return result - - def __filter_detection(self, segments: List[int], data: pd.Series) -> list: - delete_list = [] - variance_error = self.state.window_size - close_patterns = utils.close_filtering(segments, variance_error) - segments = self.get_best_pattern(close_patterns, data) - - if len(segments) == 0 or len(self.state.pattern_model) == 0: - return [] - pattern_data = self.state.pattern_model - up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) - low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) - up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) - low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) - up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) - low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) - for segment in segments: - if segment > self.state.window_size: - convol_data = utils.get_interval(data, segment, self.state.window_size) - convol_data = utils.subtract_min_without_nan(convol_data) - percent_of_nans = convol_data.isnull().sum() / len(convol_data) - if percent_of_nans > 0.5: - delete_list.append(segment) - continue - elif 0 < percent_of_nans <= 0.5: - nan_list = utils.find_nan_indexes(convol_data) - convol_data = utils.nan_to_zero(convol_data, nan_list) - pattern_data = utils.nan_to_zero(pattern_data, nan_list) - conv = scipy.signal.fftconvolve(convol_data, pattern_data) - pattern_height = convol_data.values.max() - if pattern_height > up_height or pattern_height < low_height: - delete_list.append(segment) - continue - if max(conv) > up_conv or max(conv) < low_conv: - delete_list.append(segment) - continue - if max(conv) < up_del_conv and max(conv) > low_del_conv: - delete_list.append(segment) - else: - delete_list.append(segment) - for item in delete_list: - segments.remove(item) - return set(segments) diff --git a/analytics/analytics/models/trough_model.py b/analytics/analytics/models/trough_model.py deleted file mode 100644 index 39116f1..0000000 --- a/analytics/analytics/models/trough_model.py +++ /dev/null @@ -1,44 +0,0 @@ -from analytic_types import TimeSeries -from models import TriangleModel, ModelType -import utils - -import scipy.signal -from scipy.signal import argrelextrema -from typing import Optional, List, Tuple -import numpy as np -import pandas as pd - -class TroughModel(TriangleModel): - - def get_model_type(self) -> ModelType: - return ModelType.TROUGH - - def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: - data = dataframe['value'] - segment = data[start: end] - return segment.idxmin() - - def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: - pattern_list = [] - for val in close_patterns: - min_val = data[val[0]] - ind = val[0] - for i in val: - if data[i] < min_val: - min_val = data[i] - ind = i - pattern_list.append(ind) - return pattern_list - - def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: - return argrelextrema(data.values, np.less)[0] - - def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: - return utils.exponential_smoothing(data - self.state.confidence, alpha) - - def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, trough_indexes: List[int]) -> List[int]: - segments = [] - for idx in trough_indexes: - if data[idx] < smoothed_data[idx]: - segments.append(idx) - return segments diff --git a/analytics/analytics/server.py b/analytics/analytics/server.py deleted file mode 100644 index c32ed01..0000000 --- a/analytics/analytics/server.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import os - - -import config -import json -import logging -import asyncio -import traceback - -import services -from analytic_unit_manager import AnalyticUnitManager - - -server_service: services.ServerService = None -data_service: services.DataService = None -analytic_unit_manager: AnalyticUnitManager = None - -logger = logging.getLogger('SERVER') - - -async def handle_task(task: object): - try: - task_type = task['type'] - logger.info("Got {} task with id {}, analyticUnitId {}".format(task_type, task['_id'], task['analyticUnitId'])) - - task_result_payload = { - '_id': task['_id'], - 'task': task_type, - 'analyticUnitId': task['analyticUnitId'], - 'status': "IN_PROGRESS" - } - - if not task_type == 'PUSH': - message = services.server_service.ServerMessage('TASK_RESULT', task_result_payload) - await server_service.send_message_to_server(message) - - res = await analytic_unit_manager.handle_analytic_task(task) - res['_id'] = task['_id'] - - if not task_type == 'PUSH': - message = services.server_service.ServerMessage('TASK_RESULT', res) - await server_service.send_message_to_server(message) - - except Exception as e: - error_text = traceback.format_exc() - logger.error("handle_task Exception: '%s'" % error_text) - -async def handle_data(task: object): - res = await analytic_unit_manager.handle_analytic_task(task) - - if res['status'] == 'SUCCESS' and res['payload'] is not None: - res['_id'] = task['_id'] - message = services.server_service.ServerMessage('PUSH_DETECT', res) - await server_service.send_message_to_server(message) - -async def handle_message(message: services.ServerMessage): - if message.method == 'TASK': - await handle_task(message.payload) - if message.method == 'DATA': - await handle_data(message.payload) - -def init_services(): - global server_service - global data_service - global analytic_unit_manager - - logger.info("Starting services...") - logger.info("Server...") - server_service = services.ServerService() - logger.info("Ok") - logger.info("Data service...") - data_service = services.DataService(server_service) - logger.info("Ok") - logger.info("Analytic unit manager...") - analytic_unit_manager = AnalyticUnitManager() - logger.info("Ok") - -async def app_loop(): - async for message in server_service: - asyncio.ensure_future(handle_message(message)) - - -def run_server(): - loop = asyncio.get_event_loop() - #loop.set_debug(True) - logger.info("Ok") - init_services() - print('Analytics process is running') # we need to print to stdout and flush - sys.stdout.flush() # because node.js expects it - - loop.run_until_complete(app_loop()) diff --git a/analytics/analytics/services/__init__.py b/analytics/analytics/services/__init__.py deleted file mode 100644 index 8f5f5a4..0000000 --- a/analytics/analytics/services/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from services.server_service import ServerService, ServerMessage -from services.data_service import DataService diff --git a/analytics/analytics/services/data_service.py b/analytics/analytics/services/data_service.py deleted file mode 100644 index 9978243..0000000 --- a/analytics/analytics/services/data_service.py +++ /dev/null @@ -1,85 +0,0 @@ -from services.server_service import ServerMessage, ServerService - -import json -import asyncio - -""" -This is how you can save a file: - -async def test_file_save(): - async with data_service.open('filename') as f: - print('write content') - await f.write('test string') - - async with data_service.open('filename') as f: - content = await f.load() - print(content) - print('test file ok') -""" - - -LOCK_WAIT_SLEEP_TIMESPAN = 100 # mc - -class FileDescriptor: - def __init__(self, filename: str, data_service): - self.filename = filename - self.data_service = data_service - - async def write(self, content: str): - await self.data_service.save_file_content(self, content) - - async def load(self) -> str: - return await self.data_service.load_file_content(self) - - async def __aenter__(self): - await self.data_service.wait_and_lock(self) - return self - - async def __aexit__(self, *exc): - await self.data_service.unlock(self) - - -class DataService: - - def __init__(self, server_service: ServerService): - """Creates fs over network via server_service""" - self.server_service = server_service - self.locks = set() - - def open(self, filename: str) -> FileDescriptor: - return FileDescriptor(filename, self) - - async def wait_and_lock(self, file_descriptor: FileDescriptor): - filename = file_descriptor.filename - while True: - if filename in self.locks: - asyncio.sleep(LOCK_WAIT_SLEEP_TIMESPAN) - continue - else: - self.locks.add(filename) - break - - async def unlock(self, file_descriptor: FileDescriptor): - filename = file_descriptor.filename - self.locks.remove(filename) - - async def save_file_content(self, file_descriptor: FileDescriptor, content: str): - """ Saves json - serializable obj with file_descriptor.filename """ - self.__check_lock(file_descriptor) - message_payload = { - 'filename': file_descriptor.filename, - 'content': content - } - message = ServerMessage('FILE_SAVE', message_payload) - await self.server_service.send_request_to_server(message) - - async def load_file_content(self, file_descriptor: FileDescriptor) -> str: - self.__check_lock(file_descriptor) - message_payload = { 'filename': file_descriptor.filename } - message = ServerMessage('FILE_LOAD', message_payload) - return await self.server_service.send_request_to_server(message) - - def __check_lock(self, file_descriptor: FileDescriptor): - filename = file_descriptor.filename - if filename not in self.locks: - raise RuntimeError('No lock for file %s' % filename) diff --git a/analytics/analytics/services/server_service.py b/analytics/analytics/services/server_service.py deleted file mode 100644 index 039060a..0000000 --- a/analytics/analytics/services/server_service.py +++ /dev/null @@ -1,132 +0,0 @@ -import config - -import websockets - -import logging -import json -import asyncio -import traceback - -import utils.concurrent -import utils.meta - -from typing import Optional - -logger = logging.getLogger('SERVER_SERVICE') - - -PARSE_MESSAGE_OR_SAVE_LOOP_INTERRUPTED = False -SERVER_SOCKET_RECV_LOOP_INTERRUPTED = False - - -@utils.meta.JSONClass -class ServerMessage: - def __init__(self, method: str, payload: object = None, request_id: int = None): - # TODO: add error type / case - self.method = method - self.payload = payload - self.request_id = request_id - - -class ServerService(utils.concurrent.AsyncZmqActor): - - def __init__(self): - super(ServerService, self).__init__() - self.__aiter_inited = False - # this typing doesn't help vscode, maybe there is a mistake - self.__server_socket: Optional[websockets.Connect] = None - self.__request_next_id = 1 - self.__responses = dict() - self.start() - - async def send_message_to_server(self, message: ServerMessage): - # Following message will be sent to actor's self._on_message() - # We do it cuz we created self.__server_socket in self._run() method, - # which runs in the actor's thread, not the thread we created ServerService - - # in theory, we can try to use zmq.proxy: - # zmq.proxy(self.__actor_socket, self.__server_socket) - # and do here something like: - # self.__actor_socket.send_string(json.dumps(message.to_json())) - await self._put_message_to_thread(json.dumps(message.to_json())) - - async def send_request_to_server(self, message: ServerMessage) -> object: - if message.request_id is not None: - raise ValueError('Message can`t have request_id before it is scheduled') - request_id = message.request_id = self.__request_next_id - self.request_next_id = self.__request_next_id + 1 - asyncio.ensure_future(self.send_message_to_server(message)) - # you should await self.__responses[request_id] which should be a task, - # which you resolve somewhere else - while request_id not in self.__responses: - await asyncio.sleep(1) - response = self.__responses[request_id] - del self.__responses[request_id] - return response - - def __aiter__(self): - if self.__aiter_inited: - raise RuntimeError('Can`t iterate twice') - __aiter_inited = True - return self - - async def __anext__(self) -> ServerMessage: - while not PARSE_MESSAGE_OR_SAVE_LOOP_INTERRUPTED: - thread_message = await self._recv_message_from_thread() - server_message = self.__parse_message_or_save(thread_message) - if server_message is None: - continue - else: - return server_message - - async def _run_thread(self): - logger.info("Binding to %s ..." % config.HASTIC_SERVER_URL) - # TODO: consider to use async context for socket - await self.__server_socket_recv_loop() - - async def _on_message_to_thread(self, message: str): - await self.__server_socket.send(message) - - async def __server_socket_recv_loop(self): - while not SERVER_SOCKET_RECV_LOOP_INTERRUPTED: - received_string = await self.__reconnect_recv() - if received_string == 'PING': - asyncio.ensure_future(self.__handle_ping()) - else: - asyncio.ensure_future(self._send_message_from_thread(received_string)) - - async def __reconnect_recv(self) -> str: - while not SERVER_SOCKET_RECV_LOOP_INTERRUPTED: - try: - if self.__server_socket is None: - self.__server_socket = await websockets.connect(config.HASTIC_SERVER_URL) - first_message = await self.__server_socket.recv() - if first_message == 'EALREADYEXISTING': - raise ConnectionError('Can`t connect as a second analytics') - return await self.__server_socket.recv() - except (ConnectionRefusedError, websockets.ConnectionClosedError): - if not self.__server_socket is None: - self.__server_socket.close() - # TODO: this logic increases the number of ThreadPoolExecutor - self.__server_socket = None - # TODO: move to config - reconnect_delay = 3 - print('connection is refused or lost, trying to reconnect in %s seconds' % reconnect_delay) - await asyncio.sleep(reconnect_delay) - raise InterruptedError() - - async def __handle_ping(self): - # TODO: self.__server_socket can be None - await self.__server_socket.send('PONG') - - def __parse_message_or_save(self, text: str) -> Optional[ServerMessage]: - try: - message_object = json.loads(text) - message = ServerMessage.from_json(message_object) - if message.request_id is not None: - self.__responses[message_object['requestId']] = message.payload - return None - return message - except Exception: - error_text = traceback.format_exc() - logger.error("__handle_message Exception: '%s'" % error_text) diff --git a/analytics/analytics/utils/__init__.py b/analytics/analytics/utils/__init__.py deleted file mode 100644 index 21077e6..0000000 --- a/analytics/analytics/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from utils.common import * -from utils.time import * -from utils.dataframe import * -from utils.meta import * diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py deleted file mode 100644 index 07ff9ff..0000000 --- a/analytics/analytics/utils/common.py +++ /dev/null @@ -1,443 +0,0 @@ -import numpy as np -import pandas as pd -import scipy.signal -from scipy.fftpack import fft -from scipy.signal import argrelextrema -from scipy.stats import gaussian_kde -from scipy.stats.stats import pearsonr -import math -from typing import Optional, Union, List, Generator, Tuple -import utils -import logging -from itertools import islice -from collections import deque -from analytic_types import TimeSeries -from analytic_types.segment import Segment - -SHIFT_FACTOR = 0.05 -CONFIDENCE_FACTOR = 0.5 -SMOOTHING_FACTOR = 5 -MEASUREMENT_ERROR = 0.05 - - -def exponential_smoothing(series: pd.Series, alpha: float, last_smoothed_value: Optional[float] = None) -> pd.Series: - if alpha < 0 or alpha > 1: - raise ValueError('Alpha must be within the boundaries: 0 <= alpha <= 1') - if len(series) < 2: - return series - if last_smoothed_value is None: - result = [series.values[0]] - else: - result = [float(last_smoothed_value)] - if np.isnan(result): - result = [0] - for n in range(1, len(series)): - if np.isnan(series[n]): - result.append((1 - alpha) * result[n - 1]) - series.values[n] = result[n] - else: - result.append(alpha * series[n] + (1 - alpha) * result[n - 1]) - - assert len(result) == len(series), \ - f'len of smoothed data {len(result)} != len of original dataset {len(series)}' - return pd.Series(result, index = series.index) - -def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) -> list: - pattern_list = [] - right_bound = len(data) - length - 1 - for i in range(right_bound): - for x in range(1, length): - if pattern_type == 'jump': - if(data[i + x] > data[i] + height): - pattern_list.append(i) - elif pattern_type == 'drop': - if(data[i + x] < data[i] - height): - pattern_list.append(i) - return pattern_list - -def timestamp_to_index(dataframe: pd.DataFrame, timestamp: int): - data = dataframe['timestamp'] - idx, = np.where(data >= timestamp) - if len(idx) > 0: - time_ind = int(idx[0]) - else: - raise ValueError('Dataframe doesn`t contain timestamp: {}'.format(timestamp)) - return time_ind - -def find_peaks(data: Generator[float, None, None], size: int) -> Generator[float, None, None]: - window = deque(islice(data, size * 2 + 1)) - for i, v in enumerate(data, size): - current = window[size] - #TODO: remove max() from loop - if current == max(window) and current != window[size + 1]: - yield i, current - window.append(v) - window.popleft() - -def ar_mean(numbers: List[float]): - return float(sum(numbers)) / max(len(numbers), 1) - -def get_av_model(patterns_list: list): - if not patterns_list: return [] - patterns_list = get_same_length(patterns_list) - value_list = list(map(list, zip(*patterns_list))) - return list(map(ar_mean, value_list)) - -def get_same_length(patterns_list: list): - for index in range(len(patterns_list)): - if type(patterns_list[index]) == pd.Series: - patterns_list[index] = patterns_list[index].tolist() - patterns_list = list(filter(None, patterns_list)) - max_length = max(map(len, patterns_list)) - for pat in patterns_list: - if len(pat) < max_length: - length_difference = max_length - len(pat) - added_values = list(0 for _ in range(length_difference)) - pat.extend(added_values) - return patterns_list - -def close_filtering(pattern_list: List[int], win_size: int) -> TimeSeries: - if len(pattern_list) == 0: - return [] - s = [[pattern_list[0]]] - k = 0 - for i in range(1, len(pattern_list)): - if pattern_list[i] - win_size <= s[k][-1]: - s[k].append(pattern_list[i]) - else: - k += 1 - s.append([pattern_list[i]]) - return s - -def merge_intersecting_segments(segments: List[Segment], time_step: int) -> List[Segment]: - ''' - Find intersecting segments in segments list and merge it. - ''' - if len(segments) < 2: - return segments - segments = sorted(segments, key = lambda segment: segment.from_timestamp) - previous_segment = segments[0] - for i in range(1, len(segments)): - if segments[i].from_timestamp <= previous_segment.to_timestamp + time_step: - segments[i].message = segments[-1].message - segments[i].from_timestamp = min(previous_segment.from_timestamp, segments[i].from_timestamp) - segments[i].to_timestamp = max(previous_segment.to_timestamp, segments[i].to_timestamp) - segments[i - 1] = None - previous_segment = segments[i] - segments = [x for x in segments if x is not None] - return segments - -def find_interval(dataframe: pd.DataFrame) -> int: - if len(dataframe) < 2: - raise ValueError('Can`t find interval: length of data must be at least 2') - delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0]) - return delta - -def get_start_and_end_of_segments(segments: List[List[int]]) -> TimeSeries: - ''' - find start and end of segment: [1, 2, 3, 4] -> [1, 4] - if segment is 1 index - it will be doubled: [7] -> [7, 7] - ''' - result = [] - for segment in segments: - if len(segment) == 0: - continue - elif len(segment) > 1: - segment = [segment[0], segment[-1]] - else: - segment = [segment[0], segment[0]] - result.append(segment) - return result - -def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list: - new_pattern_list = [] - for val in pattern_list: - max_val = data[val[0]] - min_val = data[val[0]] - ind = val[0] - for i in val: - if dir == 'max': - if data[i] > max_val: - max_val = data[i] - ind = i - else: - if data[i] < min_val: - min_val = data[i] - ind = i - new_pattern_list.append(ind) - return new_pattern_list - -def find_nan_indexes(segment: pd.Series) -> list: - nan_list = pd.isnull(segment) - nan_list = np.array(nan_list) - nan_indexes = np.where(nan_list == True)[0] - return list(nan_indexes) - -def check_nan_values(segment: Union[pd.Series, list]) -> Union[pd.Series, list]: - nan_list = utils.find_nan_indexes(segment) - if len(nan_list) > 0: - segment = utils.nan_to_zero(segment, nan_list) - return segment - -def nan_to_zero(segment: Union[pd.Series, list], nan_list: list) -> Union[pd.Series, list]: - if type(segment) == pd.Series: - for val in nan_list: - segment.values[val] = 0 - else: - for val in nan_list: - segment[val] = 0 - return segment - -def find_confidence(segment: pd.Series) -> (float, float): - segment = utils.check_nan_values(segment) - segment_min = min(segment) - segment_max = max(segment) - height = segment_max - segment_min - if height: - return (CONFIDENCE_FACTOR * height, height) - else: - return (0, 0) - -def find_width(pattern: pd.Series, selector: bool) -> int: - pattern = pattern.values - center = utils.find_extremum_index(pattern, selector) - pattern_left = pattern[:center] - pattern_right = pattern[center:] - left_extremum_index = utils.find_last_extremum(pattern_left, selector) - right_extremum_index = utils.find_extremum_index(pattern_right, not selector) - left_width = center - left_extremum_index - right_width = right_extremum_index + 1 - return right_width + left_width - -def find_last_extremum(segment: np.ndarray, selector: bool) -> int: - segment = segment[::-1] - first_extremum_ind = find_extremum_index(segment, not selector) - last_extremum_ind = len(segment) - first_extremum_ind - 1 - return last_extremum_ind - -def find_extremum_index(segment: np.ndarray, selector: bool) -> int: - if selector: - return segment.argmax() - else: - return segment.argmin() - -def get_interval(data: pd.Series, center: int, window_size: int, normalization = False) -> pd.Series: - """ - Get an interval with 2*window_size length - window_size to the left, window_size to the right of center - If normalization == True - subtract minimum from the interval - """ - if center >= len(data): - logging.warning('Pattern center {} is out of data with len {}'.format(center, len(data))) - return [] - left_bound = center - window_size - right_bound = center + window_size + 1 - if left_bound < 0: - left_bound = 0 - if right_bound > len(data): - right_bound = len(data) - result_interval = data[left_bound: right_bound] - if normalization: - result_interval = subtract_min_without_nan(result_interval) - return result_interval - -def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> TimeSeries: - """ - Find start and end of patterns for peak - max_border_factor - final border of pattern - if reverse == True - segments will be inversed (trough -> peak / peak -> trough) - """ - if len(pattern_centers) == 0: - return [] - border_list = [] - window_size = math.ceil(max_border_factor * window_size) - for center in pattern_centers: - current_pattern = get_interval(data, center, window_size, True) - if inverse: - current_pattern = inverse_segment(current_pattern) - current_pattern = current_pattern - confidence - left_segment = current_pattern[:window_size] # a.iloc[a.index < center] - right_segment = current_pattern[window_size:] # a.iloc[a.index >= center] - left_border = get_end_of_segment(left_segment, descending = False) - right_border = get_end_of_segment(right_segment) - border_list.append((left_border, right_border)) - return border_list - -def get_end_of_segment(segment: pd.Series, skip_positive_values = True, descending = True) -> int: - """ - Find end of descending or ascending part of pattern - Allowable error is 1 index - """ - if not descending: - segment = segment.iloc[::-1] - if len(segment) == 0: - return 1 - for idx in range(1, len(segment) - 1): - if skip_positive_values and segment.values[idx] > 0: - continue - if segment.values[idx] >= segment.values[idx - 1]: - return segment.index[idx - 1] - return segment.index[-1] - -def inverse_segment(segment: pd.Series) -> pd.Series: - """ - Сonvert trough to peak and virce versa - """ - if len(segment) > 0: - rev_val = max(segment.values) - for idx in range(len(segment)): - segment.values[idx] = math.fabs(segment.values[idx] - rev_val) - return segment - -def subtract_min_without_nan(segment: pd.Series) -> pd.Series: - if len(segment) == 0: - return [] - nan_list = utils.find_nan_indexes(segment) - if len(nan_list) > 0: - return segment - else: - segment = segment - min(segment) - return segment - -def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: - labeled_segment = [] - convolve_list = [] - for segment in segments: - labeled_segment = utils.get_interval(data, segment, window_size) - labeled_segment = utils.subtract_min_without_nan(labeled_segment) - labeled_segment = utils.check_nan_values(labeled_segment) - auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment) - convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model) - if len(auto_convolve) > 0: - convolve_list.append(max(auto_convolve)) - if len(convolve_segment) > 0: - convolve_list.append(max(convolve_segment)) - return convolve_list - -def get_correlation_gen(data: pd.Series, window_size: int, pattern_model: List[float]) -> Generator[float, None, None]: - #Get a new dataset by correlating between a sliding window in data and pattern_model - for i in range(window_size, len(data) - window_size): - watch_data = data[i - window_size: i + window_size + 1] - correlation = pearsonr(watch_data, pattern_model) - if len(correlation) > 0: - yield(correlation[0]) - -def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: - labeled_segment = [] - correlation_list = [] - p_value_list = [] - for segment in segments: - labeled_segment = utils.get_interval(data, segment, window_size) - labeled_segment = utils.subtract_min_without_nan(labeled_segment) - labeled_segment = utils.check_nan_values(labeled_segment) - if len(labeled_segment) == 0 or len(labeled_segment) != len(av_model): - continue - correlation = pearsonr(labeled_segment, av_model) - if len(correlation) > 1: - correlation_list.append(correlation[0]) - p_value_list.append(correlation[1]) - return correlation_list - -def get_distribution_density(segment: pd.Series) -> float: - segment.dropna(inplace = True) - if len(segment) < 2 or len(segment.nonzero()[0]) == 0: - return (0, 0, 0) - min_jump = min(segment) - max_jump = max(segment) - pdf = gaussian_kde(segment) - x = np.linspace(segment.min() - 1, segment.max() + 1, len(segment)) - y = pdf(x) - ax_list = list(zip(x, y)) - ax_list = np.array(ax_list, np.float32) - antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] - peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] - try: - min_peak_index = peaks_kde[0] - segment_min_line = ax_list[min_peak_index, 0] - max_peak_index = peaks_kde[1] - segment_max_line = ax_list[max_peak_index, 0] - segment_median = ax_list[antipeaks_kde[0], 0] - except IndexError: - segment_max_line = max_jump * (1 - SHIFT_FACTOR) - segment_min_line = min_jump * (1 - SHIFT_FACTOR) - segment_median = (max_jump - min_jump) / 2 + min_jump - return segment_median, segment_max_line, segment_min_line - -def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]: - segment = segment_data - if len(segment_data) > SMOOTHING_FACTOR * 3: - flat_segment = segment_data.rolling(window = SMOOTHING_FACTOR).mean() - segment = flat_segment.dropna() - segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment) - height = 0.95 * (segment_max_line - segment_min_line) - length = utils.get_pattern_length(segment_data, segment_min_line, segment_max_line, pat_type) - return height, length - -def find_pattern_center(segment_data: pd.Series, segment_from_index: int, pattern_type: str): - segment_median = utils.get_distribution_density(segment_data)[0] - cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pattern_type) - if len(cen_ind) > 0: - pat_center = cen_ind[0] - segment_cent_index = pat_center + segment_from_index - else: - segment_cent_index = math.ceil((len(segment_data)) / 2) - return segment_cent_index - -def get_pattern_length(segment_data: pd.Series, segment_min_line: float, segment_max_line: float, pat_type: str) -> int: - # TODO: move function to jump & drop merged model - segment_max = max(segment_data) - segment_min = min(segment_data) - # TODO: use better way - if segment_min_line <= segment_min: - segment_min_line = segment_min * (1 + MEASUREMENT_ERROR) - if segment_max_line >= segment_max: - segment_max_line = segment_max * (1 - MEASUREMENT_ERROR) - min_line = [] - max_line = [] - for i in range(len(segment_data)): - min_line.append(segment_min_line) - max_line.append(segment_max_line) - min_line = np.array(min_line) - max_line = np.array(max_line) - segment_array = np.array(segment_data.tolist()) - idmin = np.argwhere(np.diff(np.sign(min_line - segment_array)) != 0).reshape(-1) - idmax = np.argwhere(np.diff(np.sign(max_line - segment_array)) != 0).reshape(-1) - if len(idmin) > 0 and len(idmax) > 0: - if pat_type == 'jump': - result_length = idmax[0] - idmin[-1] + 1 - elif pat_type == 'drop': - result_length = idmin[0] - idmax[-1] + 1 - return result_length if result_length > 0 else 0 - else: - return 0 - -def pattern_intersection(segment_data: list, median: float, pattern_type: str) -> list: - center_index = [] - if pattern_type == 'jump': - for i in range(1, len(segment_data) - 1): - if segment_data[i - 1] < median and segment_data[i + 1] > median: - center_index.append(i) - elif pattern_type == 'drop': - for i in range(1, len(segment_data) - 1): - if segment_data[i - 1] > median and segment_data[i + 1] < median: - center_index.append(i) - delete_index = [] - for i in range(1, len(center_index)): - if center_index[i] == center_index[i - 1] + 1: - delete_index.append(i - 1) - - return [x for (idx, x) in enumerate(center_index) if idx not in delete_index] - -def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: - data_min = data['value'].min() - if not np.isnan(data_min) and data_min > 0: - data['value'] = data['value'] - data_min - return data - -def get_min_max(array: list, default): - return float(min(array, default=default)), float(max(array, default=default)) - -def remove_duplicates_and_sort(array: list) -> list: - array = list(frozenset(array)) - array.sort() - return array diff --git a/analytics/analytics/utils/concurrent.py b/analytics/analytics/utils/concurrent.py deleted file mode 100644 index 356c24e..0000000 --- a/analytics/analytics/utils/concurrent.py +++ /dev/null @@ -1,130 +0,0 @@ -import asyncio -import threading -import zmq -import zmq.asyncio -from abc import ABC, abstractmethod - - -# This const defines Thread <-> Actor zmq one-to-one connection -# We create a seperate zmq context, so zqm address 'inproc://xxx' doesn't matter -# It is default address and you may want to use AsyncZmqThread another way -ZMQ_THREAD_ACTOR_ADDR = 'inproc://xxx' - - -# Inherience order (threading.Thread, ABC) is essential. Otherwise it's a MRO error. -class AsyncZmqThread(threading.Thread, ABC): - """Class for wrapping zmq socket into a thread with it's own asyncio event loop - - """ - - def __init__(self, - zmq_context: zmq.asyncio.Context, - zmq_socket_addr: str, - zmq_socket_type = zmq.PAIR - ): - super(AsyncZmqThread, self).__init__() - self._zmq_context = zmq_context # you can use it in child classes - self.__zmq_socket_addr = zmq_socket_addr - self.__zmq_socket_type = zmq_socket_type - self.__asyncio_loop = None - self.__zmq_socket = None - - async def __message_recv_loop(self): - while True: - text = await self.__zmq_socket.recv_string() - asyncio.ensure_future(self._on_message_to_thread(text)) - - async def _send_message_from_thread(self, message: str): - await self.__zmq_socket.send_string(message) - - @abstractmethod - async def _on_message_to_thread(self, message: str): - """Override this method to receive messages""" - - @abstractmethod - async def _run_thread(self): - """Override this method to do some async work. - This method uses a separate thread. - - You can block yourself here if you don't do any await. - - Example: - - ``` - async def _run_thread(self): - i = 0 - while True: - await asyncio.sleep(1) - i += 1 - await self._send_message_from_thread(f'{self.name}: ping {i}') - ``` - """ - - def run(self): - self.__asyncio_loop = asyncio.new_event_loop() - asyncio.set_event_loop(self.__asyncio_loop) - self.__zmq_socket = self._zmq_context.socket(self.__zmq_socket_type) - self.__zmq_socket.connect(self.__zmq_socket_addr) - asyncio.ensure_future(self.__message_recv_loop()) - self.__asyncio_loop.run_until_complete(self._run_thread()) - - # TODO: implement stop signal handling - - -class AsyncZmqActor(AsyncZmqThread): - """Threaded and Async Actor model based on ZMQ inproc communication - - override following: - ``` - async def _run_thread(self) - async def _on_message_to_thread(self, message: str) - ``` - - both methods run in actor's thread - - you can call `self._send_message_from_thread('txt')` - - to receive it later in `self._recv_message_from_thread()`. - - Example: - - ``` - class MyActor(AsyncZmqActor): - async def _run_thread(self): - self.counter = 0 - # runs in a different thread - await self._send_message_from_thread('some_txt_message_to_actor') - - def async _on_message_to_thread(self, message): - # runs in Thread-actor - self.counter++ - - asyncZmqActor = MyActor() - asyncZmqActor.start() - ``` - """ - - def __init__(self): - super(AsyncZmqActor, self).__init__(zmq.asyncio.Context(), ZMQ_THREAD_ACTOR_ADDR) - - self.__actor_socket = self._zmq_context.socket(zmq.PAIR) - self.__actor_socket.bind(ZMQ_THREAD_ACTOR_ADDR) - - async def _put_message_to_thread(self, message: str): - """It "sends" `message` to thread, - - but we can't await it's `AsyncZmqThread._on_message_to_thread()` - - so it's "put", not "send" - """ - await self.__actor_socket.send_string(message) - - async def _recv_message_from_thread(self) -> str: - """Returns next message ``'txt'`` from thread sent by - - ``AsyncZmqActor._send_message_from_thread('txt')`` - - """ - return await self.__actor_socket.recv_string() - - # TODO: implement graceful stopping diff --git a/analytics/analytics/utils/dataframe.py b/analytics/analytics/utils/dataframe.py deleted file mode 100644 index 65e64a1..0000000 --- a/analytics/analytics/utils/dataframe.py +++ /dev/null @@ -1,63 +0,0 @@ -from itertools import chain -import pandas as pd -import numpy as np -from typing import Generator - -def prepare_data(data: list) -> pd.DataFrame: - """ - Takes list - - converts it into pd.DataFrame, - - converts 'timestamp' column to pd.Datetime, - - subtracts min value from the dataset - """ - data = pd.DataFrame(data, columns=['timestamp', 'value']) - data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms') - data.fillna(value = np.nan, inplace = True) - return data - -def get_intersected_chunks(data: list, intersection: int, chunk_size: int) -> Generator[list, None, None]: - """ - Returns generator that splits dataframe on intersected segments. - Intersection makes it able to detect pattern that present in dataframe on the border between chunks. - intersection - length of intersection. - chunk_size - length of chunk - """ - assert chunk_size > 0, 'chunk size must be great than zero' - assert intersection > 0, 'intersection length must be great than zero' - - data_len = len(data) - - if data_len <= chunk_size: - yield data - return - - nonintersected = chunk_size - intersection - - offset = 0 - while True: - left_values = data_len - offset - if left_values == 0: - break - if left_values <= chunk_size: - yield data[offset : data_len] - break - else: - yield data[offset: offset + chunk_size] - offset += min(nonintersected, left_values) - -def get_chunks(data: list, chunk_size: int) -> Generator[list, None, None]: - """ - Returns generator that splits dataframe on non-intersected segments. - chunk_size - length of chunk - """ - assert chunk_size > 0, 'chunk size must be great than zero' - - chunks_iterables = [iter(data)] * chunk_size - result_chunks = zip(*chunks_iterables) - partial_chunk_len = len(data) % chunk_size - - if partial_chunk_len != 0: - result_chunks = chain(result_chunks, [data[-partial_chunk_len:]]) - - for chunk in result_chunks: - yield list(chunk) diff --git a/analytics/analytics/utils/meta.py b/analytics/analytics/utils/meta.py deleted file mode 100644 index 59116a9..0000000 --- a/analytics/analytics/utils/meta.py +++ /dev/null @@ -1,81 +0,0 @@ -from inspect import signature, Parameter -from functools import wraps -from typing import Optional, List -import re - - -CAMEL_REGEX = re.compile(r'([A-Z])') -UNDERSCORE_REGEX = re.compile(r'_([a-z])') - -def camel_to_underscore(name): - #TODO: need to rename 'from'/'to' to 'from_timestamp'/'to_timestamp' everywhere(in analytics, server, panel) - if name == 'from' or name == 'to': - name += '_timestamp' - return CAMEL_REGEX.sub(lambda x: '_' + x.group(1).lower(), name) - -def underscore_to_camel(name): - if name == 'from_timestamp' or name == 'to_timestamp': - name = name.replace('_timestamp', '') - return UNDERSCORE_REGEX.sub(lambda x: x.group(1).upper(), name) - -def is_field_private(field_name: str) -> Optional[str]: - m = re.match(r'_[^(__)]+__', field_name) - return m is not None - -def serialize(obj): - if hasattr(obj, 'to_json') == True: - return obj.to_json() - else: - return obj - -def inited_params(target_init): - target_params = signature(target_init).parameters.values() - if len(target_params) < 1: - raise ValueError('init function mush have at least self parameter') - if len(target_params) == 1: - return target_init - _, *target_params = target_params # we will not use self any more - - @wraps(target_init) - def wrapped_init(wrapped_self, *wrapped_args, **wrapped_kwargs): - for tp in target_params: - if tp.default is Parameter.empty: - continue - setattr(wrapped_self, tp.name, tp.default) - - for tp, v in zip(target_params, wrapped_args): - setattr(wrapped_self, tp.name, v) - - for k, v in wrapped_kwargs.items(): - setattr(wrapped_self, k, v) - - target_init(wrapped_self, *wrapped_args, **wrapped_kwargs) - - return wrapped_init - -def JSONClass(target_class): - - def to_json(self) -> dict: - """ - returns a json representation of the class - where all None - values and private fileds are skipped - """ - return { - underscore_to_camel(k): serialize(v) for k, v in self.__dict__.items() - if v is not None and not is_field_private(k) - } - - def from_json(json_object: Optional[dict]) -> target_class: - if json_object is None: - json_object = {} - init_object = { camel_to_underscore(k): v for k, v in json_object.items() } - return target_class(**init_object) - - # target_class.__init__ = inited_params(target_class.__init__) - target_class.to_json = to_json - target_class.from_json = from_json - return target_class - -class SerializableList(List[dict]): - def to_json(self): - return list(map(lambda s: s.to_json(), self)) diff --git a/analytics/analytics/utils/time.py b/analytics/analytics/utils/time.py deleted file mode 100644 index 39b69d6..0000000 --- a/analytics/analytics/utils/time.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas as pd -from typing import List - -def convert_sec_to_ms(sec) -> int: - return int(sec) * 1000 - -def convert_pd_timestamp_to_ms(timestamp: pd.Timestamp) -> int: - # TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6 - return int(timestamp.value) // 1000000 - -def convert_series_to_timestamp_list(series: pd.Series) -> List[int]: - timestamps = map(lambda value: convert_pd_timestamp_to_ms(value), series) - return list(timestamps) diff --git a/analytics/bin/server b/analytics/bin/server deleted file mode 100755 index 640e29a..0000000 --- a/analytics/bin/server +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import os - -if sys.version_info[:3] < (3, 6, 5) or sys.version_info[:2] >= (3, 7): - sys.stderr.write('Required python is >= 3.6.5 and < 3.7.0 \n') - sys.stderr.write('Your python version is: %d.%d.%d\n' % sys.version_info[:3]) - sys.exit(1) - -# #TODO: make wrapper script that set PYTHONPATH instead -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) - -import logging - -root_logger = logging.getLogger() -root_logger.setLevel(logging.DEBUG) - - -logging_formatter = logging.Formatter("%(asctime)s [Analytics] [%(levelname)-5.5s] %(message)s") - -logging_handler = logging.StreamHandler(sys.stdout) -logging_handler.setLevel(logging.DEBUG) -logging_handler.setFormatter(logging_formatter) - -root_logger.addHandler(logging_handler) - - -from server import run_server - -if __name__ == "__main__": - run_server() diff --git a/analytics/pyinstaller_hooks/hook-pandas.py b/analytics/pyinstaller_hooks/hook-pandas.py deleted file mode 100644 index a03a947..0000000 --- a/analytics/pyinstaller_hooks/hook-pandas.py +++ /dev/null @@ -1 +0,0 @@ -hiddenimports=['pandas._libs.tslibs.timedeltas'] diff --git a/analytics/pyinstaller_hooks/hook-scipy.py b/analytics/pyinstaller_hooks/hook-scipy.py deleted file mode 100644 index 5c8766b..0000000 --- a/analytics/pyinstaller_hooks/hook-scipy.py +++ /dev/null @@ -1 +0,0 @@ -hiddenimports=['scipy._lib.messagestream'] diff --git a/analytics/requirements.txt b/analytics/requirements.txt deleted file mode 100644 index f3bb4a1..0000000 --- a/analytics/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -attrdict==2.0.0 -aiounittest==1.1.0 -numpy==1.14.5 -pandas==0.20.3 -pyzmq==18.0.1 -scipy==1.1.0 -websockets==8.1 \ No newline at end of file diff --git a/analytics/scripts/build-dist.sh b/analytics/scripts/build-dist.sh deleted file mode 100644 index c1e9fff..0000000 --- a/analytics/scripts/build-dist.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -cd .. -python3.6 -m PyInstaller --paths=analytics/ --additional-hooks-dir=pyinstaller_hooks bin/server diff --git a/analytics/tests/__init__.py b/analytics/tests/__init__.py deleted file mode 100644 index bcc8e88..0000000 --- a/analytics/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -import sys -import os - -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) diff --git a/analytics/tests/test_analytic_types.py b/analytics/tests/test_analytic_types.py deleted file mode 100644 index 89a261a..0000000 --- a/analytics/tests/test_analytic_types.py +++ /dev/null @@ -1,16 +0,0 @@ -from analytic_types import TimeSeriesIndex, TimeSeries2 - -import unittest - - -class TestDataset(unittest.TestCase): - def test_basic_timeseries_index(self): - tsi = TimeSeriesIndex(['2017-12-31 16:00:00-08:00']) - self.assertEqual(len(tsi), 1) - tsi2 = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) - self.assertEqual(len(tsi2), 3) - - def test_basic_timeseries(self): - tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) - ts = TimeSeries2([4, 5, 6], tsis) - self.assertEqual(len(ts), 3) diff --git a/analytics/tests/test_bucket.py b/analytics/tests/test_bucket.py deleted file mode 100644 index 8bd138c..0000000 --- a/analytics/tests/test_bucket.py +++ /dev/null @@ -1,38 +0,0 @@ -import unittest -import pandas as pd -import random -from typing import List - -from analytic_types.data_bucket import DataBucket -from tests.test_dataset import create_list_of_timestamps - -class TestBucket(unittest.TestCase): - - def test_receive_data(self): - bucket = DataBucket() - data_val = list(range(6)) - timestamp_list = create_list_of_timestamps(len(data_val)) - for val in data_val: - bucket.receive_data(get_pd_dataframe([val], [1523889000000 + val])) - for idx, row in bucket.data.iterrows(): - self.assertEqual(data_val[idx], row['value']) - self.assertEqual(timestamp_list[idx], row['timestamp']) - - def test_drop_data(self): - bucket = DataBucket() - data_val = list(range(10)) - timestamp_list = create_list_of_timestamps(len(data_val)) - bucket.receive_data(get_pd_dataframe(data_val, timestamp_list)) - bucket.drop_data(5) - expected_data = data_val[5:] - expected_timestamp = timestamp_list[5:] - self.assertEqual(expected_data, bucket.data['value'].tolist()) - self.assertEqual(expected_timestamp, bucket.data['timestamp'].tolist()) - -if __name__ == '__main__': - unittest.main() - -def get_pd_dataframe(value: List[int], timestamp: List[int]) -> pd.DataFrame: - if len(value) != len(timestamp): - raise ValueError(f'len(value) should be equal to len(timestamp)') - return pd.DataFrame({ 'value': value, 'timestamp': timestamp }) diff --git a/analytics/tests/test_dataset.py b/analytics/tests/test_dataset.py deleted file mode 100644 index d74f2bc..0000000 --- a/analytics/tests/test_dataset.py +++ /dev/null @@ -1,386 +0,0 @@ -import unittest -import pandas as pd -import numpy as np -from utils import prepare_data -import models -import random -import scipy.signal -from typing import List - -from analytic_types.segment import Segment - -class TestDataset(unittest.TestCase): - - def test_models_with_corrupted_dataframe(self): - data = [[1523889000000 + i, float('nan')] for i in range(10)] - dataframe = pd.DataFrame(data, columns=['timestamp', 'value']) - segments = [] - - model_instances = [ - models.JumpModel(), - models.DropModel(), - models.GeneralModel(), - models.PeakModel(), - models.TroughModel() - ] - - for model in model_instances: - model_name = model.__class__.__name__ - model.state = model.get_state(None) - with self.assertRaises(AssertionError): - model.fit(dataframe, segments, 'test') - - def test_peak_antisegments(self): - data_val = [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.PeakModel() - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_jump_antisegments(self): - data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 9.0, 1.0, 1.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000016, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': True}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.JumpModel() - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_trough_antisegments(self): - data_val = [9.0, 9.0, 9.0, 9.0, 7.0, 4.0, 7.0, 9.0, 9.0, 9.0, 5.0, 1.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.TroughModel() - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_drop_antisegments(self): - data_val = [9.0, 9.0, 9.0, 9.0, 9.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 1.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000016, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': True}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.DropModel() - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_general_antisegments(self): - data_val = [1.0, 2.0, 1.0, 2.0, 5.0, 6.0, 3.0, 2.0, 1.0, 1.0, 8.0, 9.0, 8.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.GeneralModel() - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_jump_empty_segment(self): - data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.JumpModel() - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_drop_empty_segment(self): - data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.DropModel() - model.state = model.get_state(None) - model_name = model.__class__.__name__ - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_value_error_dataset_input_should_have_multiple_elements(self): - data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,3.0,3.0,2.0,7.0,8.0,9.0,8.0,7.0,6.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000007, 'to': 1523889000011, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.JumpModel() - model.state = model.get_state(None) - model_name = model.__class__.__name__ - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_prepare_data_for_nonetype(self): - data = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] - try: - data = prepare_data(data) - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_prepare_data_for_nan(self): - data = [[1523889000000, np.nan], [1523889000001, np.nan], [1523889000002, np.nan]] - try: - data = prepare_data(data) - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_prepare_data_output_fon_nan(self): - data_nan = [[1523889000000, np.nan], [1523889000001, np.nan], [1523889000002, np.nan]] - data_none = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] - return_data_nan = prepare_data(data_nan) - return_data_none = prepare_data(data_none) - for item in return_data_nan.value: - self.assertTrue(np.isnan(item)) - for item in return_data_none.value: - self.assertTrue(np.isnan(item)) - - def test_three_value_segment(self): - data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 4.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - model_instances = [ - models.GeneralModel(), - models.PeakModel(), - ] - try: - for model in model_instances: - model_name = model.__class__.__name__ - model.state = model.get_state(None) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_general_for_two_labeling(self): - data_val = [1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 2.0, 1.0, 0, 0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - model = models.GeneralModel() - model.state = model.get_state(None) - model.fit(dataframe, segments,'test') - result = len(data_val) + 1 - for _ in range(2): - model.do_detect(dataframe) - max_pattern_index = max(model.do_detect(dataframe)) - self.assertLessEqual(max_pattern_index[0], result) - - - def test_peak_model_for_cache(self): - cache = { - 'patternCenter': [1, 6], - 'patternModel': [1, 4, 0], - 'confidence': 2, - 'convolveMax': 8, - 'convolveMin': 7, - 'windowSize': 1, - 'convDelMin': 0, - 'convDelMax': 0, - 'heightMax': 4, - 'heightMin': 4, - } - data_val = [2.0, 5.0, 1.0, 1.0, 1.0, 2.0, 5.0, 1.0, 1.0, 2.0, 3.0, 7.0, 1.0, 1.0, 1.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - model = models.PeakModel() - model.state = model.get_state(cache) - result = model.fit(dataframe, segments, 'test') - self.assertEqual(len(result.pattern_center), 3) - - def test_trough_model_for_cache(self): - cache = { - 'patternCenter': [2, 6], - 'patternModel': [5, 0.5, 4], - 'confidence': 2, - 'convolveMax': 8, - 'convolveMin': 7, - 'window_size': 1, - 'convDelMin': 0, - 'convDelMax': 0, - } - data_val = [5.0, 5.0, 1.0, 4.0, 5.0, 5.0, 0.0, 4.0, 5.0, 5.0, 6.0, 1.0, 5.0, 5.0, 5.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - model = models.TroughModel() - model.state = model.get_state(cache) - result = model.fit(dataframe, segments, 'test') - self.assertEqual(len(result.pattern_center), 3) - - def test_jump_model_for_cache(self): - cache = { - 'patternCenter': [2, 6], - 'patternModel': [5, 0.5, 4], - 'confidence': 2, - 'convolveMax': 8, - 'convolveMin': 7, - 'window_size': 1, - 'convDelMin': 0, - 'convDelMax': 0, - } - data_val = [1.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 4.0, 4.0, 4.0, 4.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 152388900009, 'to': 1523889000013, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - model = models.JumpModel() - model.state = model.get_state(cache) - result = model.fit(dataframe, segments, 'test') - self.assertEqual(len(result.pattern_center), 3) - - def test_models_for_pattern_model_cache(self): - cache = { - 'patternCenter': [4, 12], - 'patternModel': [], - 'confidence': 2, - 'convolveMax': 8, - 'convolveMin': 7, - 'window_size': 2, - 'convDelMin': 0, - 'convDelMax': 0, - } - data_val = [5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 6.0, 6.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000024, 'labeled': True, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - - try: - model = models.DropModel() - model_name = model.__class__.__name__ - model.state = model.get_state(cache) - model.fit(dataframe, segments, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly'.format(model_name)) - - def test_problem_data_for_random_model(self): - problem_data = [2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, - 3.0, 3.0, 3.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, - 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 6.0, 7.0, 8.0, 8.0, 4.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, - 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, - 4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 5.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, - 2.0, 8.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] - data = create_dataframe(problem_data) - cache = { - 'patternCenter': [5, 50], - 'patternModel': [], - 'windowSize': 2, - 'convolveMin': 0, - 'convolveMax': 0, - 'convDelMin': 0, - 'convDelMax': 0, - } - max_ws = 20 - iteration = 1 - for ws in range(1, max_ws): - for _ in range(iteration): - pattern_model = create_random_model(ws) - convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) - cache['windowSize'] = ws - cache['patternModel'] = pattern_model - cache['convolveMin'] = max(convolve) - cache['convolveMax'] = max(convolve) - try: - model = models.GeneralModel() - model.state = model.get_state(cache) - model_name = model.__class__.__name__ - model.detect(data, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly with av_model {} and window size {}'.format(model_name, pattern_model, ws)) - - def test_random_dataset_for_random_model(self): - data = create_random_model(random.randint(1, 100)) - data = create_dataframe(data) - model_instances = [ - models.PeakModel(), - models.TroughModel() - ] - cache = { - 'patternCenter': [5, 50], - 'patternModel': [], - 'windowSize': 2, - 'convolveMin': 0, - 'convolveMax': 0, - 'confidence': 0, - 'heightMax': 0, - 'heightMin': 0, - 'convDelMin': 0, - 'convDelMax': 0, - } - ws = random.randint(1, int(len(data['value']/2))) - pattern_model = create_random_model(ws) - convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) - confidence = 0.2 * (data['value'].max() - data['value'].min()) - cache['windowSize'] = ws - cache['patternModel'] = pattern_model - cache['convolveMin'] = max(convolve) - cache['convolveMax'] = max(convolve) - cache['confidence'] = confidence - cache['heightMax'] = data['value'].max() - cache['heightMin'] = confidence - try: - for model in model_instances: - model_name = model.__class__.__name__ - model.state = model.get_state(cache) - model.detect(data, 'test') - except ValueError: - self.fail('Model {} raised unexpectedly with dataset {} and cache {}'.format(model_name, data['value'], cache)) - -if __name__ == '__main__': - unittest.main() - -def create_dataframe(data_val: list) -> pd.DataFrame: - data_ind = create_list_of_timestamps(len(data_val)) - data = {'timestamp': data_ind, 'value': data_val} - dataframe = pd.DataFrame(data) - dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') - return dataframe - -def create_list_of_timestamps(length: int) -> List[int]: - return [1523889000000 + i for i in range(length)] - -def create_random_model(window_size: int) -> list: - return [random.randint(0, 100) for _ in range(window_size * 2 + 1)] diff --git a/analytics/tests/test_detectors.py b/analytics/tests/test_detectors.py deleted file mode 100644 index 01a811c..0000000 --- a/analytics/tests/test_detectors.py +++ /dev/null @@ -1,265 +0,0 @@ -import unittest -import pandas as pd - -from detectors import pattern_detector, threshold_detector, anomaly_detector -from analytic_types.detector import DetectionResult, ProcessingResult, Bound -from analytic_types.segment import Segment -from tests.test_dataset import create_dataframe, create_list_of_timestamps -from utils import convert_pd_timestamp_to_ms - -class TestPatternDetector(unittest.TestCase): - - def test_small_dataframe(self): - - data = [[0,1], [1,2]] - dataframe = pd.DataFrame(data, columns=['timestamp', 'values']) - cache = { 'windowSize': 10 } - - detector = pattern_detector.PatternDetector('GENERAL', 'test_id') - with self.assertRaises(ValueError): - detector.detect(dataframe, cache) - - def test_only_negative_segments(self): - data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] - data_ind = [1523889000000 + i for i in range(len(data_val))] - data = {'timestamp': data_ind, 'value': data_val} - dataframe = pd.DataFrame(data = data) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': False, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - cache = {} - detector = pattern_detector.PatternDetector('PEAK', 'test_id') - excepted_error_message = 'test_id has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' - - try: - detector.train(dataframe, segments, cache) - except ValueError as e: - self.assertEqual(str(e), excepted_error_message) - - def test_positive_and_negative_segments(self): - data_val = [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] - dataframe = create_dataframe(data_val) - segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}, - {'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': False, 'deleted': False}] - segments = [Segment.from_json(segment) for segment in segments] - cache = {} - detector = pattern_detector.PatternDetector('PEAK', 'test_id') - try: - detector.train(dataframe, segments, cache) - except Exception as e: - self.fail('detector.train fail with error {}'.format(e)) - -class TestThresholdDetector(unittest.TestCase): - - def test_invalid_cache(self): - - detector = threshold_detector.ThresholdDetector('test_id') - - with self.assertRaises(ValueError): - detector.detect([], None) - - with self.assertRaises(ValueError): - detector.detect([], {}) - - -class TestAnomalyDetector(unittest.TestCase): - - def test_detect(self): - data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] - data_ind = [1523889000000 + i for i in range(len(data_val))] - data = {'timestamp': data_ind, 'value': data_val} - dataframe = pd.DataFrame(data = data) - dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') - cache = { - 'confidence': 2, - 'alpha': 0.1, - 'enableBounds': 'ALL', - 'timeStep': 1 - } - detector = anomaly_detector.AnomalyDetector('test_id') - - detect_result: DetectionResult = detector.detect(dataframe, cache) - detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) - result = [{ 'from': 1523889000005.0, 'to': 1523889000005.0 }] - self.assertEqual(result, detected_segments) - - cache = { - 'confidence': 2, - 'alpha': 0.1, - 'enableBounds': 'ALL', - 'timeStep': 1, - 'seasonality': 4, - 'segments': [{ 'from': 1523889000001, 'to': 1523889000002, 'data': [10] }] - } - detect_result: DetectionResult = detector.detect(dataframe, cache) - detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) - result = [] - self.assertEqual(result, detected_segments) - - def test_process_data(self): - data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] - data_ind = [1523889000000 + i for i in range(len(data_val))] - data = {'timestamp': data_ind, 'value': data_val} - dataframe = pd.DataFrame(data = data) - dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') - cache = { - 'confidence': 2, - 'alpha': 0.1, - 'enableBounds': 'ALL', - 'timeStep': 1 - } - detector = anomaly_detector.AnomalyDetector('test_id') - detect_result: ProcessingResult = detector.process_data(dataframe, cache) - expected_result = { - 'lowerBound': [ - (1523889000000, -2.0), - (1523889000001, -1.9), - (1523889000002, -1.71), - (1523889000003, -1.6389999999999998), - (1523889000004, -1.4750999999999999), - (1523889000005, -0.5275899999999998), - (1523889000006, -0.5748309999999996), - (1523889000007, -0.5173478999999996), - (1523889000008, -0.5656131099999995) - ], - 'upperBound': [ - (1523889000000, 2.0), - (1523889000001, 2.1), - (1523889000002, 2.29), - (1523889000003, 2.361), - (1523889000004, 2.5249), - (1523889000005, 3.47241), - (1523889000006, 3.4251690000000004), - (1523889000007, 3.4826521), - (1523889000008, 3.4343868900000007) - ]} - self.assertEqual(detect_result.to_json(), expected_result) - - cache = { - 'confidence': 2, - 'alpha': 0.1, - 'enableBounds': 'ALL', - 'timeStep': 1, - 'seasonality': 5, - 'segments': [{ 'from': 1523889000001, 'to': 1523889000002,'data': [1] }] - } - detect_result: ProcessingResult = detector.process_data(dataframe, cache) - expected_result = { - 'lowerBound': [ - (1523889000000, -2.0), - (1523889000001, -2.9), - (1523889000002, -1.71), - (1523889000003, -1.6389999999999998), - (1523889000004, -1.4750999999999999), - (1523889000005, -0.5275899999999998), - (1523889000006, -1.5748309999999996), - (1523889000007, -0.5173478999999996), - (1523889000008, -0.5656131099999995) - ], - 'upperBound': [ - (1523889000000, 2.0), - (1523889000001, 3.1), - (1523889000002, 2.29), - (1523889000003, 2.361), - (1523889000004, 2.5249), - (1523889000005, 3.47241), - (1523889000006, 4.425169), - (1523889000007, 3.4826521), - (1523889000008, 3.4343868900000007) - ]} - self.assertEqual(detect_result.to_json(), expected_result) - - def test_get_seasonality_offset(self): - detector = anomaly_detector.AnomalyDetector('test_id') - from_timestamp = 1573700973027 - seasonality = 3600000 - data_start_time = 1573698780000 - time_step = 30000 - detected_offset = detector.get_seasonality_offset(from_timestamp, seasonality, data_start_time, time_step) - expected_offset = 74 - self.assertEqual(detected_offset, expected_offset) - - def test_segment_generator(self): - detector = anomaly_detector.AnomalyDetector('test_id') - data = [1, 1, 5, 1, -4, 5, 5, 5, -3, 1] - timestamps = create_list_of_timestamps(len(data)) - dataframe = create_dataframe(data) - upper_bound = pd.Series([2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) - lower_bound = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) - segments = list(detector.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds=Bound.ALL)) - - segments_borders = list(map(lambda s: [s.from_timestamp, s.to_timestamp], segments)) - self.assertEqual(segments_borders, [[timestamps[2], timestamps[2]], [timestamps[4], timestamps[8]]]) - - def test_consume_data(self): - cache = { - 'confidence': 2, - 'alpha': 0.1, - 'enableBounds': 'ALL', - 'timeStep': 1 - } - detector = anomaly_detector.AnomalyDetector('test_id') - - detect_result: DetectionResult = None - for val in range(22): - value = 1 if val != 10 else 5 - dataframe = pd.DataFrame({'value': [value], 'timestamp': [1523889000000 + val]}) - dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') - detect_result = detector.consume_data(dataframe, cache) - - detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) - result = [{ 'from': 1523889000010, 'to': 1523889000010 }] - self.assertEqual(result, detected_segments) - - def test_get_segment_bound(self): - detector = anomaly_detector.AnomalyDetector('test_id') - peak_segment = pd.Series([1,2,3,4,3,2,1]) - trough_segment = pd.Series([4,3,2,1,2,3,4]) - expected_peak_segment_results = { - 'max_value': 3, - 'min_value': 1.5 - } - expected_trough_segment_results = { - 'max_value': 3.5, - 'min_value': 2.75 - } - peak_detector_result_upper = detector.get_segment_bound(peak_segment, Bound.UPPER) - peak_detector_result_lower = detector.get_segment_bound(peak_segment, Bound.LOWER) - trough_detector_result_upper = detector.get_segment_bound(trough_segment, Bound.UPPER) - trough_detector_result_lower = detector.get_segment_bound(trough_segment, Bound.LOWER) - - self.assertGreaterEqual( - max(peak_detector_result_upper), - expected_peak_segment_results['max_value'] - ) - self.assertLessEqual( - max(peak_detector_result_lower), - expected_peak_segment_results['min_value'] - ) - self.assertGreaterEqual( - max(trough_detector_result_upper), - expected_trough_segment_results['max_value'] - ) - self.assertLessEqual( - max(trough_detector_result_lower), - expected_trough_segment_results['min_value'] - ) - - def test_get_segment_bound_corner_cases(self): - detector = anomaly_detector.AnomalyDetector('test_id') - empty_segment = pd.Series([]) - same_values_segment = pd.Series([2,2,2,2,2,2]) - empty_detector_result_upper = detector.get_segment_bound(empty_segment, Bound.UPPER) - empty_detector_result_lower = detector.get_segment_bound(empty_segment, Bound.LOWER) - same_values_detector_result_upper = detector.get_segment_bound(same_values_segment, Bound.UPPER) - same_values_detector_result_lower = detector.get_segment_bound(same_values_segment, Bound.LOWER) - - self.assertEqual(len(empty_detector_result_upper), 0) - self.assertEqual(len(empty_detector_result_lower), 0) - self.assertEqual(min(same_values_detector_result_upper), 0) - self.assertEqual(max(same_values_detector_result_upper), 0) - self.assertEqual(min(same_values_detector_result_lower), 0) - self.assertEqual(max(same_values_detector_result_lower), 0) - -if __name__ == '__main__': - unittest.main() diff --git a/analytics/tests/test_manager.py b/analytics/tests/test_manager.py deleted file mode 100644 index 1886828..0000000 --- a/analytics/tests/test_manager.py +++ /dev/null @@ -1,100 +0,0 @@ -from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel -from models import GeneralModelState -import utils.meta -import aiounittest -from analytic_unit_manager import AnalyticUnitManager -from collections import namedtuple - -TestData = namedtuple('TestData', ['uid', 'type', 'values', 'segments']) - -def get_random_id() -> str: - return str(id(list())) - -class TestDataset(aiounittest.AsyncTestCase): - - timestep = 50 #ms - - def _fill_task(self, uid, data, task_type, analytic_unit_type, segments=None, cache=None): - task = { - 'analyticUnitId': uid, - 'type': task_type, - 'payload': { - 'data': data, - 'from': data[0][0], - 'to': data[-1][0], - 'analyticUnitType': analytic_unit_type, - 'detector': 'pattern', - 'cache': cache - }, - '_id': get_random_id() - } - if segments: task['payload']['segments'] = segments - - return task - - def _convert_values(self, values) -> list: - from_t = 0 - to_t = len(values) * self.timestep - return list(zip(range(from_t, to_t, self.timestep), values)) - - def _index_to_test_time(self, idx) -> int: - return idx * self.timestep - - def _get_learn_task(self, test_data): - uid, analytic_unit_type, values, segments = test_data - data = self._convert_values(values) - segments = [{ - 'analyticUnitId': uid, - 'from': self._index_to_test_time(s[0]), - 'to': self._index_to_test_time(s[1]), - 'labeled': True, - 'deleted': False - } for s in segments] - return self._fill_task(uid, data, 'LEARN', analytic_unit_type, segments=segments) - - def _get_detect_task(self, test_data, cache): - uid, analytic_unit_type, values, _ = test_data - data = self._convert_values(values) - return self._fill_task(uid, data, 'DETECT', analytic_unit_type, cache=cache) - - def _get_test_dataset(self, pattern) -> tuple: - """ - pattern name: ([dataset values], [list of segments]) - - segment - (begin, end) - indexes in dataset values - returns dataset in format (data: List[int], segments: List[List[int]]) - """ - datasets = { - 'PEAK': ([0, 0, 1, 2, 3, 4, 3, 2, 1, 0, 0], [[2, 8]]), - 'JUMP': ([0, 0, 1, 2, 3, 4, 4, 4], [[1, 6]]), - 'DROP': ([4, 4, 4, 3, 2, 1, 0, 0], [[1, 6]]), - 'TROUGH': ([4, 4, 3, 2, 1, 0, 1, 2, 3, 4, 4], [[1, 9]]), - 'GENERAL': ([0, 0, 1, 2, 3, 4, 3, 2, 1, 0, 0], [[2, 8]]) - } - return datasets[pattern] - - async def _learn(self, task, manager=None) -> dict: - if not manager: manager = AnalyticUnitManager() - result = await manager.handle_analytic_task(task) - return result['payload']['cache'] - - async def _detect(self, task, manager=None) -> dict: - if not manager: manager = AnalyticUnitManager() - result = await manager.handle_analytic_task(task) - return result - - async def _test_detect(self, test_data, manager=None): - learn_task = self._get_learn_task(test_data) - cache = await self._learn(learn_task, manager) - detect_task = self._get_detect_task(test_data, cache) - result = await self._detect(detect_task, manager) - return result - - async def test_unit_manager(self): - test_data = TestData(get_random_id(), 'PEAK', [0,1,2,5,10,5,2,1,1,1,0,0,0,0], [[1,7]]) - manager = AnalyticUnitManager() - - with_manager = await self._test_detect(test_data, manager) - without_manager = await self._test_detect(test_data) - self.assertEqual(with_manager, without_manager) - diff --git a/analytics/tests/test_models.py b/analytics/tests/test_models.py deleted file mode 100644 index 11d4d19..0000000 --- a/analytics/tests/test_models.py +++ /dev/null @@ -1,43 +0,0 @@ -import unittest -import pandas as pd -import numpy as np -import models - -class TestModel(unittest.TestCase): - - def test_stair_model_get_indexes(self): - drop_model = models.DropModel() - jump_model = models.JumpModel() - drop_data = pd.Series([4, 4, 4, 1, 1, 1, 5, 5, 2, 2, 2]) - jump_data = pd.Series([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5]) - jump_data_one_stair = pd.Series([1, 3, 3]) - drop_data_one_stair = pd.Series([4, 2, 1]) - height = 2 - length = 2 - expected_result = [2, 7] - drop_model_result = drop_model.get_stair_indexes(drop_data, height, length) - jump_model_result = jump_model.get_stair_indexes(jump_data, height, length) - drop_one_stair_result = drop_model.get_stair_indexes(drop_data_one_stair, height, 1) - jump_one_stair_result = jump_model.get_stair_indexes(jump_data_one_stair, height, 1) - for val in expected_result: - self.assertIn(val, drop_model_result) - self.assertIn(val, jump_model_result) - self.assertEqual(0, drop_one_stair_result[0]) - self.assertEqual(0, jump_one_stair_result[0]) - - def test_stair_model_get_indexes_corner_cases(self): - drop_model = models.DropModel() - jump_model = models.JumpModel() - empty_data = pd.Series([]) - nan_data = pd.Series([np.nan, np.nan, np.nan, np.nan]) - height, length = 2, 2 - length_zero, height_zero = 0, 0 - expected_result = [] - drop_empty_data_result = drop_model.get_stair_indexes(empty_data, height, length) - drop_nan_data_result = drop_model.get_stair_indexes(nan_data, height_zero, length_zero) - jump_empty_data_result = jump_model.get_stair_indexes(empty_data, height, length) - jump_nan_data_result = jump_model.get_stair_indexes(nan_data, height_zero, length_zero) - self.assertEqual(drop_empty_data_result, expected_result) - self.assertEqual(drop_nan_data_result, expected_result) - self.assertEqual(jump_empty_data_result, expected_result) - self.assertEqual(jump_nan_data_result, expected_result) diff --git a/analytics/tests/test_utils.py b/analytics/tests/test_utils.py deleted file mode 100644 index 6faf993..0000000 --- a/analytics/tests/test_utils.py +++ /dev/null @@ -1,359 +0,0 @@ -from analytic_types.segment import Segment - -import utils -import unittest -import numpy as np -import pandas as pd -import math -import random - -RELATIVE_TOLERANCE = 1e-1 - -class TestUtils(unittest.TestCase): - - #example test for test's workflow purposes - def test_segment_parsion(self): - self.assertTrue(True) - - def test_confidence_all_normal_value(self): - segment = [1, 2, 0, 6, 8, 5, 3] - utils_result = utils.find_confidence(segment)[0] - result = 4.0 - self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) - - def test_confidence_all_nan_value(self): - segment = [np.nan, np.nan, np.nan, np.nan] - self.assertEqual(utils.find_confidence(segment)[0], 0) - - def test_confidence_with_nan_value(self): - data = [np.nan, np.nan, 0, 8] - utils_result = utils.find_confidence(data)[0] - result = 4.0 - self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) - - def test_interval_all_normal_value(self): - data = [1, 2, 1, 2, 4, 1, 2, 4, 5, 6] - data = pd.Series(data) - center = 4 - window_size = 2 - result = [1, 2, 4, 1, 2] - self.assertEqual(list(utils.get_interval(data, center, window_size)), result) - - def test_interval_wrong_ws(self): - data = [1, 2, 4, 1, 2, 4] - data = pd.Series(data) - center = 3 - window_size = 6 - result = [1, 2, 4, 1, 2, 4] - self.assertEqual(list(utils.get_interval(data, center, window_size)), result) - - def test_subtract_min_without_nan(self): - segment = [1, 2, 4, 1, 2, 4] - segment = pd.Series(segment) - result = [0, 1, 3, 0, 1, 3] - utils_result = list(utils.subtract_min_without_nan(segment)) - self.assertEqual(utils_result, result) - - def test_subtract_min_with_nan(self): - segment = [np.nan, 2, 4, 1, 2, 4] - segment = pd.Series(segment) - result = [2, 4, 1, 2, 4] - utils_result = list(utils.subtract_min_without_nan(segment)[1:]) - self.assertEqual(utils_result, result) - - def test_get_convolve(self): - data = [1, 2, 3, 2, 2, 0, 2, 3, 4, 3, 2, 1, 1, 2, 3, 4, 3, 2, 0] - data = pd.Series(data) - pattern_index = [2, 8, 15] - window_size = 2 - av_model = [1, 2, 3, 2, 1] - result = [] - self.assertNotEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) - - def test_get_convolve_with_nan(self): - data = [1, 2, 3, 2, np.nan, 0, 2, 3, 4, np.nan, 2, 1, 1, 2, 3, 4, 3, np.nan, 0] - data = pd.Series(data) - pattern_index = [2, 8, 15] - window_size = 2 - av_model = [1, 2, 3, 2, 1] - result = utils.get_convolve(pattern_index, av_model, data, window_size) - for val in result: - self.assertFalse(np.isnan(val)) - - def test_get_convolve_empty_data(self): - data = [] - pattern_index = [] - window_size = 2 - window_size_zero = 0 - av_model = [] - result = [] - self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) - self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size_zero), result) - - def test_find_jump_parameters_center(self): - segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] - segment = pd.Series(segment) - jump_center = [10, 11] - self.assertIn(utils.find_pattern_center(segment, 0, 'jump'), jump_center) - - def test_find_jump_parameters_height(self): - segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] - segment = pd.Series(segment) - jump_height = [3.5, 4] - self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[0]) - self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[1]) - - def test_find_jump_parameters_length(self): - segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] - segment = pd.Series(segment) - jump_length = 2 - self.assertEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_length) - - def test_find_drop_parameters_center(self): - segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - segment = pd.Series(segment) - drop_center = [14, 15, 16] - self.assertIn(utils.find_pattern_center(segment, 0, 'drop'), drop_center) - - def test_find_drop_parameters_height(self): - segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - segment = pd.Series(segment) - drop_height = [3.5, 4] - self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[0]) - self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[1]) - - def test_find_drop_parameters_length(self): - segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - segment = pd.Series(segment) - drop_length = 2 - self.assertEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_length) - - def test_get_av_model_empty_data(self): - patterns_list = [] - result = [] - self.assertEqual(utils.get_av_model(patterns_list), result) - - def test_get_av_model_normal_data(self): - patterns_list = [[1, 1, 1], [2, 2, 2],[3,3,3]] - result = [2.0, 2.0, 2.0] - self.assertEqual(utils.get_av_model(patterns_list), result) - - def test_get_distribution_density(self): - segment = [1, 1, 1, 3, 5, 5, 5] - segment = pd.Series(segment) - result = (3, 5, 1) - self.assertEqual(utils.get_distribution_density(segment), result) - - def test_get_distribution_density_right(self): - data = [1.0, 5.0, 5.0, 4.0] - data = pd.Series(data) - median = 3.0 - max_line = 5.0 - min_line = 1.0 - utils_result = utils.get_distribution_density(data) - self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) - self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) - self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) - - def test_get_distribution_density_left(self): - data = [1.0, 1.0, 2.0, 1.0, 5.0] - data = pd.Series(data) - median = 3.0 - max_line = 5.0 - min_line = 1.0 - utils_result = utils.get_distribution_density(data) - self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) - self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) - self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) - - def test_get_distribution_density_short_data(self): - data = [1.0, 5.0] - data = pd.Series(data) - segment = [1.0] - segment = pd.Series(segment) - utils_result_data = utils.get_distribution_density(data) - utils_result_segment = utils.get_distribution_density(segment) - self.assertEqual(len(utils_result_data), 3) - self.assertEqual(utils_result_segment, (0, 0, 0)) - - def test_get_distribution_density_with_nans(self): - segment = [np.NaN, 1, 1, 1, np.NaN, 3, 5, 5, 5, np.NaN] - segment = pd.Series(segment) - result = (3, 5, 1) - self.assertEqual(utils.get_distribution_density(segment), result) - - def test_find_pattern_jump_center(self): - data = [1.0, 1.0, 1.0, 5.0, 5.0, 5.0] - data = pd.Series(data) - median = 3.0 - result = 3 - self.assertEqual(result, utils.find_pattern_center(data, 0, 'jump')) - - def test_get_convolve_wrong_index(self): - data = [1.0, 5.0, 2.0, 1.0, 6.0, 2.0] - data = pd.Series(data) - segemnts = [1, 11] - av_model = [0.0, 4.0, 0.0] - window_size = 1 - try: - utils.get_convolve(segemnts, av_model, data, window_size) - except ValueError: - self.fail('Method get_convolve raised unexpectedly') - - def test_get_av_model_for_different_length(self): - patterns_list = [[1.0, 1.0, 2.0], [4.0, 4.0], [2.0, 2.0, 2.0], [3.0, 3.0], []] - try: - utils.get_av_model(patterns_list) - except ValueError: - self.fail('Method get_convolve raised unexpectedly') - - def test_find_nan_indexes(self): - data = [1, 1, 1, 0, 0, np.nan, None, []] - data = pd.Series(data) - result = [5, 6] - self.assertEqual(utils.find_nan_indexes(data), result) - - def test_find_nan_indexes_normal_values(self): - data = [1, 1, 1, 0, 0, 0, 1, 1] - data = pd.Series(data) - result = [] - self.assertEqual(utils.find_nan_indexes(data), result) - - def test_find_nan_indexes_empty_values(self): - data = [] - result = [] - self.assertEqual(utils.find_nan_indexes(data), result) - - def test_create_correlation_data(self): - data = [random.randint(10, 999) for _ in range(10000)] - data = pd.Series(data) - pattern_model = [100, 200, 500, 300, 100] - ws = 2 - result = 6000 - corr_data = utils.get_correlation_gen(data, ws, pattern_model) - corr_data = list(corr_data) - self.assertGreaterEqual(len(corr_data), result) - - def test_inverse_segment(self): - data = pd.Series([1,2,3,4,3,2,1]) - result = pd.Series([3,2,1,0,1,2,3]) - utils_result = utils.inverse_segment(data) - for ind, val in enumerate(utils_result): - self.assertEqual(val, result[ind]) - - def test_get_end_of_segment_equal(self): - data = pd.Series([5,4,3,2,1,0,0,0]) - result_list = [4, 5, 6] - self.assertIn(utils.get_end_of_segment(data, False), result_list) - - def test_get_end_of_segment_greater(self): - data = pd.Series([5,4,3,2,1,0,1,2,3]) - result_list = [4, 5, 6] - self.assertIn(utils.get_end_of_segment(data, False), result_list) - - def test_get_borders_of_peaks(self): - data = pd.Series([1,0,1,2,3,2,1,0,0,1,2,3,4,3,2,2,1,0,1,2,3,4,5,3,2,1,0]) - pattern_center = [4, 12, 22] - ws = 3 - confidence = 1.5 - result = [(1, 7), (9, 15), (19, 25)] - self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence), result) - - def test_get_borders_of_peaks_for_trough(self): - data = pd.Series([4,4,5,5,3,1,3,5,5,6,3,2]) - pattern_center = [5] - ws = 5 - confidence = 3 - result = [(3, 7)] - self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence, inverse = True), result) - - def test_get_start_and_end_of_segments(self): - segments = [[1, 2, 3, 4], [5, 6, 7], [8], [], [12, 12]] - result = [[1, 4], [5, 7], [8, 8], [12, 12]] - utils_result = utils.get_start_and_end_of_segments(segments) - for got, expected in zip(utils_result, result): - self.assertEqual(got, expected) - - def test_get_start_and_end_of_segments_empty(self): - segments = [] - result = [] - utils_result = utils.get_start_and_end_of_segments(segments) - self.assertEqual(result, utils_result) - - def test_merge_intersecting_segments(self): - test_cases = [ - { - 'index': [Segment(10, 20), Segment(30, 40)], - 'result': [[10, 20], [30, 40]], - 'step': 0, - }, - { - 'index': [Segment(10, 20), Segment(13, 23), Segment(15, 17), Segment(20, 40)], - 'result': [[10, 40]], - 'step': 0, - }, - { - 'index': [], - 'result': [], - 'step': 0, - }, - { - 'index': [Segment(10, 20)], - 'result': [[10, 20]], - 'step': 0, - }, - { - 'index': [Segment(10, 20), Segment(13, 23), Segment(25, 30), Segment(35, 40)], - 'result': [[10, 23], [25, 30], [35, 40]], - 'step': 0, - }, - { - 'index': [Segment(10, 50), Segment(5, 40), Segment(15, 25), Segment(6, 50)], - 'result': [[5, 50]], - 'step': 0, - }, - { - 'index': [Segment(5, 10), Segment(10, 20), Segment(25, 50)], - 'result': [[5, 20], [25, 50]], - 'step': 0, - }, - { - 'index': [Segment(20, 40), Segment(10, 15), Segment(50, 60)], - 'result': [[10, 15], [20, 40], [50, 60]], - 'step': 0, - }, - { - 'index': [Segment(20, 40), Segment(10, 20), Segment(50, 60)], - 'result': [[10, 40], [50, 60]], - 'step': 0, - }, - { - 'index': [Segment(10, 10), Segment(20, 20), Segment(30, 30)], - 'result': [[10, 30]], - 'step': 10, - }, - ] - - for case in test_cases: - utils_result = utils.merge_intersecting_segments(case['index'], case['step']) - for got, expected in zip(utils_result, case['result']): - self.assertEqual(got.from_timestamp, expected[0]) - self.assertEqual(got.to_timestamp, expected[1]) - - def test_serialize(self): - segment_list = [Segment(100,200)] - serialize_list = utils.meta.SerializableList(segment_list) - meta_result = utils.meta.serialize(serialize_list) - expected_result = [{ 'from': 100, 'to': 200 }] - self.assertEqual(meta_result, expected_result) - - def test_remove_duplicates_and_sort(self): - a1 = [1, 3, 5] - a2 = [8, 3, 6] - expected_result = [1, 3, 5, 6, 8] - utils_result = utils.remove_duplicates_and_sort(a1+a2) - self.assertEqual(utils_result, expected_result) - self.assertEqual([], []) - -if __name__ == '__main__': - unittest.main() diff --git a/analytics/tests/test_utils_dataframe.py b/analytics/tests/test_utils_dataframe.py deleted file mode 100644 index 2985d6f..0000000 --- a/analytics/tests/test_utils_dataframe.py +++ /dev/null @@ -1,43 +0,0 @@ -import unittest -from utils import get_intersected_chunks, get_chunks -import pandas as pd - - -class TestUtils(unittest.TestCase): - - def test_chunks_generator(self): - intersection = 2 - chunk_size = 4 - - cases = [ - (list(range(8)), [[0,1,2,3], [2,3,4,5], [4,5,6,7]]), - ([], [[]]), - (list(range(1)), [[0]]), - (list(range(4)), [[0,1,2,3]]), - (list(range(9)), [[0,1,2,3], [2,3,4,5], [4,5,6,7], [6,7,8]]) - ] - - for tested, expected in cases: - tested_chunks = get_intersected_chunks(tested, intersection, chunk_size) - self.assertSequenceEqual(tuple(tested_chunks), expected) - - - def test_non_intersected_chunks(self): - chunk_size = 4 - - cases = [ - (tuple(range(12)), [[0,1,2,3], [4,5,6,7], [8,9,10,11]]), - (tuple(range(9)), [[0,1,2,3], [4,5,6,7], [8]]), - (tuple(range(10)), [[0,1,2,3], [4,5,6,7], [8,9]]), - (tuple(range(11)), [[0,1,2,3], [4,5,6,7], [8,9,10]]), - ([], []), - (tuple(range(1)), [[0]]), - (tuple(range(4)), [[0,1,2,3]]) - ] - - for tested, expected in cases: - tested_chunks = list(get_chunks(tested, chunk_size)) - self.assertSequenceEqual(tested_chunks, expected) - -if __name__ == '__main__': - unittest.main() diff --git a/analytics/tools/analytic_model_tester.py b/analytics/tools/analytic_model_tester.py deleted file mode 100644 index cffbb75..0000000 --- a/analytics/tools/analytic_model_tester.py +++ /dev/null @@ -1,122 +0,0 @@ -import sys -ANALYTICS_PATH = '../analytics' -TESTS_PATH = '../tests' -sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) - -import pandas as pd -import numpy as np -import utils -import test_dataset -from analytic_types.segment import Segment -from detectors import pattern_detector, threshold_detector, anomaly_detector - -# TODO: get_dataset -# TODO: get_segment -PEAK_DATASETS = [] -# dataset with 3 peaks -TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) -# TODO: more convenient way to specify labeled segments -POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}] -NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}] - -class TesterSegment(): - - def __init__(self, start: int, end: int, labeled: bool): - self.start = start - self.end = end - self.labeled = labeled - - def get_segment(self): - return { - '_id': 'q', - 'analyticUnitId': 'q', - 'from': self.start, - 'to': self.end, - 'labeled': self.labeled, - 'deleted': not self.labeled - } - -class Metric(): - - def __init__(self, expected_result, detector_result): - self.expected_result = expected_result - self.detector_result = detector_result['segments'] - - def get_amount(self): - return len(self.detector_result) / len(self.expected_result) - - def get_accuracy(self): - correct_segment = 0 - invalid_segment = 0 - for segment in self.detector_result: - current_cs = correct_segment - for pattern in self.expected_result: - if pattern['from'] <= segment['from'] and pattern['to'] >= segment['to']: - correct_segment += 1 - break - if correct_segment == current_cs: - invalid_segment += 1 - non_detected = len(self.expected_result) - correct_segment - return (correct_segment, invalid_segment, non_detected) - -class ModelData(): - - def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str): - self.frame = frame - self.positive_segments = positive_segments - self.negative_segments = negative_segments - self.model_type = model_type - - def get_segments_for_detection(self, positive_amount, negative_amount): - segments = [] - for idx, bounds in enumerate(self.positive_segments): - if idx >= positive_amount: - break - segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment()) - - for idx, bounds in enumerate(self.negative_segments): - if idx >= negative_amount: - break - segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment()) - - return segments - - def get_all_correct_segments(self): - return self.positive_segments - -PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') -PEAK_DATASETS.append(PEAK_DATA_1) - -def main(model_type: str) -> None: - table_metric = [] - if model_type == 'peak': - for data in PEAK_DATASETS: - dataset = data.frame - segments = data.get_segments_for_detection(1, 0) - segments = [Segment.from_json(segment) for segment in segments] - detector = pattern_detector.PatternDetector('PEAK', 'test_id') - training_result = detector.train(dataset, segments, {}) - cache = training_result['cache'] - detect_result = detector.detect(dataset, cache) - detect_result = detect_result.to_json() - peak_metric = Metric(data.get_all_correct_segments(), detect_result) - table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) - return table_metric - -if __name__ == '__main__': - ''' - This tool applies the model on datasets and verifies that the detection result corresponds to the correct values. - sys.argv[1] expects one of the models name -> see correct_name - ''' - # TODO: use enum - correct_name = ['peak', 'trough', 'jump', 'drop', 'general'] - if len(sys.argv) < 2: - print('Enter one of models name: {}'.format(correct_name)) - sys.exit(1) - model_type = str(sys.argv[1]).lower() - if model_type in correct_name: - print(main(model_type)) - else: - print('Enter one of models name: {}'.format(correct_name)) - - diff --git a/analytics/tools/send_zmq_message.py b/analytics/tools/send_zmq_message.py deleted file mode 100644 index f08be7f..0000000 --- a/analytics/tools/send_zmq_message.py +++ /dev/null @@ -1,104 +0,0 @@ -import zmq -import zmq.asyncio -import asyncio -import json -from uuid import uuid4 - -context = zmq.asyncio.Context() -socket = context.socket(zmq.PAIR) -socket.connect('tcp://0.0.0.0:8002') - -def create_message(): - message = { - "method": "DATA", - "payload": { - "_id": uuid4().hex, - "analyticUnitId": uuid4().hex, - "type": "PUSH", - "payload": { - "data": [ - [ - 1552652025000, - 12.499999999999998 - ], - [ - 1552652040000, - 12.500000000000002 - ], - [ - 1552652055000, - 12.499999999999996 - ], - [ - 1552652070000, - 12.500000000000002 - ], - [ - 1552652085000, - 12.499999999999998 - ], - [ - 1552652100000, - 12.5 - ], - [ - 1552652115000, - 12.83261113785909 - ] - ], - "from": 1552652025001, - "to": 1552652125541, - "analyticUnitType": "GENERAL", - "detector": "pattern", - "cache": { - "pattern_center": [ - 693 - ], - "pattern_model": [ - 1.7763568394002505e-15, - 5.329070518200751e-15, - 1.7763568394002505e-15, - 1.7763568394002505e-15, - 1.7763568394002505e-15, - 3.552713678800501e-15, - 1.7763568394002505e-15, - 3.552713678800501e-15, - 3.552713678800501e-15, - 1.7763568394002505e-15, - 1.7763568394002505e-15, - 0, - 1.7763568394002505e-15, - 1.7763568394002505e-15, - 0 - ], - "convolve_max": 7.573064690121713e-29, - "convolve_min": 7.573064690121713e-29, - "WINDOW_SIZE": 7, - "conv_del_min": 7, - "conv_del_max": 7 - } - } - } - } - - return json.dumps(message) - -async def handle_loop(): - while True: - received_bytes = await socket.recv() - text = received_bytes.decode('utf-8') - - print(text) - -async def send_detect(): - data = create_message().encode('utf-8') - await socket.send(data) - -if __name__ == "__main__": - loop = asyncio.get_event_loop() - socket.send(b'PING') - detects = [send_detect() for i in range(100)] - detects_group = asyncio.gather(*detects) - handle_group = asyncio.gather(handle_loop()) - common_group = asyncio.gather(handle_group, detects_group) - loop.run_until_complete(common_group)