CorpGlory Inc.
5 years ago
58 changed files with 4526 additions and 1 deletions
@ -0,0 +1,32 @@ |
|||||||
|
{ |
||||||
|
// Use IntelliSense to learn about possible attributes. |
||||||
|
// Hover to view descriptions of existing attributes. |
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 |
||||||
|
"version": "0.2.0", |
||||||
|
"configurations": [ |
||||||
|
{ |
||||||
|
"name": "Attach (Remote Debug)", |
||||||
|
"type": "python", |
||||||
|
"request": "attach", |
||||||
|
"port": 5679, |
||||||
|
"host": "localhost", |
||||||
|
"pathMappings": [ |
||||||
|
{ |
||||||
|
"localRoot": "${workspaceFolder}", |
||||||
|
"remoteRoot": "/var/www/analytics" |
||||||
|
} |
||||||
|
] |
||||||
|
}, |
||||||
|
{ |
||||||
|
"name": "Python: Current File", |
||||||
|
"type": "python", |
||||||
|
"request": "launch", |
||||||
|
"windows": { |
||||||
|
"program": "${workspaceFolder}\\bin\\server" |
||||||
|
}, |
||||||
|
"linux": { |
||||||
|
"program": "${workspaceFolder}/bin/server" |
||||||
|
} |
||||||
|
} |
||||||
|
] |
||||||
|
} |
@ -0,0 +1,22 @@ |
|||||||
|
{ |
||||||
|
"terminal.integrated.shell.windows": "C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", |
||||||
|
"editor.insertSpaces": true, |
||||||
|
"files.eol": "\n", |
||||||
|
"files.exclude": { |
||||||
|
"**/__pycache__/": true, |
||||||
|
"dist": true, |
||||||
|
"build": true |
||||||
|
}, |
||||||
|
"[python]": { |
||||||
|
"editor.tabSize": 4, |
||||||
|
}, |
||||||
|
"python.envFile": "${workspaceFolder}/.vscode/.env", |
||||||
|
"python.pythonPath": "python", |
||||||
|
"python.linting.enabled": true, |
||||||
|
"python.testing.unittestArgs": [ "-v" ], |
||||||
|
"python.testing.pytestEnabled": false, |
||||||
|
"python.testing.nosetestsEnabled": false, |
||||||
|
"python.testing.unittestEnabled": true, |
||||||
|
"python.linting.pylintEnabled": true, |
||||||
|
"python.jediEnabled": false |
||||||
|
} |
@ -0,0 +1,27 @@ |
|||||||
|
# Type hints |
||||||
|
|
||||||
|
Please use: https://www.python.org/dev/peps/pep-0484/ |
||||||
|
|
||||||
|
# Line endings |
||||||
|
|
||||||
|
We use LF everywhere |
||||||
|
|
||||||
|
# Imports |
||||||
|
|
||||||
|
You import local files first, than spesific liba and then standart libs. |
||||||
|
So you import from something very scecific to something very common. |
||||||
|
It allows you to pay attention on most important things from beginning. |
||||||
|
|
||||||
|
``` |
||||||
|
|
||||||
|
from data_provider import DataProvider |
||||||
|
from anomaly_model import AnomalyModel |
||||||
|
from pattern_detection_model import PatternDetectionModel |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
|
||||||
|
from scipy.signal import argrelextrema |
||||||
|
|
||||||
|
import pickle |
||||||
|
|
||||||
|
``` |
@ -0,0 +1,12 @@ |
|||||||
|
FROM python:3.6.6 |
||||||
|
|
||||||
|
COPY requirements.txt /requirements.txt |
||||||
|
|
||||||
|
RUN pip install -r /requirements.txt |
||||||
|
|
||||||
|
WORKDIR /var/www/analytics |
||||||
|
|
||||||
|
COPY . /var/www/analytics/ |
||||||
|
|
||||||
|
|
||||||
|
CMD ["python", "-u", "bin/server"] |
@ -1 +1,12 @@ |
|||||||
# analytics |
# Hastic-server-analytics |
||||||
|
|
||||||
|
Python service which gets tasks from [hastic-server-node](https://github.com/hastic/hastic-server/tree/master/server) like |
||||||
|
|
||||||
|
* trains statistical models |
||||||
|
* detect patterns in time series data |
||||||
|
|
||||||
|
## Arhitecture |
||||||
|
|
||||||
|
The service uses [asyncio](https://docs.python.org/3/library/asyncio.html), |
||||||
|
[concurrency](https://docs.python.org/3.6/library/concurrent.futures.html#module-concurrent.futures) and |
||||||
|
[pyzmq](https://pyzmq.readthedocs.io/en/latest/). |
||||||
|
@ -0,0 +1,39 @@ |
|||||||
|
""" |
||||||
|
It is the place where we put all classes and types |
||||||
|
common for all analytics code |
||||||
|
|
||||||
|
For example, if you write someting which is used |
||||||
|
in analytic_unit_manager, it should be here. |
||||||
|
|
||||||
|
If you create something spicific which is used only in one place, |
||||||
|
like PatternDetectionCache, then it should not be here. |
||||||
|
""" |
||||||
|
|
||||||
|
import pandas as pd |
||||||
|
from typing import Union, List, Tuple |
||||||
|
|
||||||
|
AnalyticUnitId = str |
||||||
|
|
||||||
|
ModelCache = dict |
||||||
|
|
||||||
|
# TODO: explicit timestamp / value |
||||||
|
TimeSeries = List[Tuple[int, float]] |
||||||
|
|
||||||
|
""" |
||||||
|
Example: |
||||||
|
|
||||||
|
tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) |
||||||
|
ts = TimeSeries([4, 5, 6], tsis) |
||||||
|
""" |
||||||
|
Timestamp = Union[str, pd.Timestamp] |
||||||
|
|
||||||
|
class TimeSeriesIndex(pd.DatetimeIndex): |
||||||
|
def __new__(cls, *args, **kwargs): |
||||||
|
return pd.DatetimeIndex.__new__(cls, *args, **kwargs) |
||||||
|
|
||||||
|
# TODO: make generic type for values. See List definition for example of generic class |
||||||
|
# TODO: constructor from DataFrame |
||||||
|
# TODO: repleace TimeSeries (above) with this class: rename TimeSeries2 to TimeSeries |
||||||
|
class TimeSeries2(pd.Series): |
||||||
|
def __init__(self, *args, **kwargs): |
||||||
|
super().__init__(*args, **kwargs) |
@ -0,0 +1,38 @@ |
|||||||
|
from typing import Optional, List, Dict |
||||||
|
|
||||||
|
from analytic_types.segment import AnomalyDetectorSegment |
||||||
|
from analytic_types.detector import Bound |
||||||
|
|
||||||
|
from utils.meta import JSONClass, SerializableList |
||||||
|
|
||||||
|
@JSONClass |
||||||
|
class AnomalyCache: |
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
alpha: float, |
||||||
|
confidence: float, |
||||||
|
enable_bounds: str, |
||||||
|
seasonality: Optional[int] = None, |
||||||
|
segments: Optional[List[Dict]] = None, |
||||||
|
time_step: Optional[int] = None, |
||||||
|
): |
||||||
|
self.alpha = alpha |
||||||
|
self.confidence = confidence |
||||||
|
self.enable_bounds = enable_bounds |
||||||
|
if seasonality != None and seasonality < 0: |
||||||
|
raise ValueError(f'Can`t create AnomalyCache: got invalid seasonality {seasonality}') |
||||||
|
self.seasonality = seasonality |
||||||
|
self.time_step = time_step |
||||||
|
if segments != None: |
||||||
|
anomaly_segments = map(AnomalyDetectorSegment.from_json, segments) |
||||||
|
self.segments = SerializableList(anomaly_segments) |
||||||
|
else: |
||||||
|
self.segments = [] |
||||||
|
|
||||||
|
def set_segments(self, segments: List[AnomalyDetectorSegment]): |
||||||
|
if len(segments) > 0: |
||||||
|
self.segments = SerializableList(segments) |
||||||
|
|
||||||
|
def get_enabled_bounds(self) -> Bound: |
||||||
|
#TODO: use class with to_json() |
||||||
|
return Bound(self.enable_bounds) |
@ -0,0 +1,14 @@ |
|||||||
|
import pandas as pd |
||||||
|
|
||||||
|
|
||||||
|
class DataBucket: |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
self.data = pd.DataFrame([], columns=['timestamp', 'value']) |
||||||
|
|
||||||
|
def receive_data(self, data: pd.DataFrame): |
||||||
|
self.data = self.data.append(data, ignore_index=True) |
||||||
|
|
||||||
|
def drop_data(self, count: int): |
||||||
|
if count > 0: |
||||||
|
self.data = self.data.iloc[count:] |
@ -0,0 +1,47 @@ |
|||||||
|
from analytic_types import ModelCache, TimeSeries |
||||||
|
from analytic_types.segment import Segment |
||||||
|
|
||||||
|
from enum import Enum |
||||||
|
from typing import List, Optional, Tuple |
||||||
|
|
||||||
|
import utils.meta |
||||||
|
|
||||||
|
class Bound(Enum): |
||||||
|
ALL = 'ALL' |
||||||
|
UPPER = 'UPPER' |
||||||
|
LOWER = 'LOWER' |
||||||
|
|
||||||
|
class DetectionResult: |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
cache: Optional[ModelCache] = None, |
||||||
|
segments: Optional[List[Segment]] = None, |
||||||
|
last_detection_time: int = None |
||||||
|
): |
||||||
|
if cache is None: |
||||||
|
cache = {} |
||||||
|
if segments is None: |
||||||
|
segments = [] |
||||||
|
self.cache = cache |
||||||
|
self.segments = segments |
||||||
|
self.last_detection_time = last_detection_time |
||||||
|
|
||||||
|
# TODO: use @utils.meta.JSONClass (now it can't serialize list of objects) |
||||||
|
def to_json(self): |
||||||
|
return { |
||||||
|
'cache': self.cache, |
||||||
|
'segments': list(map(lambda segment: segment.to_json(), self.segments)), |
||||||
|
'lastDetectionTime': self.last_detection_time |
||||||
|
} |
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class ProcessingResult(): |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
lower_bound: Optional[TimeSeries] = None, |
||||||
|
upper_bound: Optional[TimeSeries] = None, |
||||||
|
): |
||||||
|
self.lower_bound = lower_bound |
||||||
|
self.upper_bound = upper_bound |
@ -0,0 +1,17 @@ |
|||||||
|
import utils.meta |
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class LearningInfo: |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
super().__init__() |
||||||
|
self.confidence = [] |
||||||
|
self.patterns_list = [] |
||||||
|
self.pattern_width = [] |
||||||
|
self.pattern_height = [] |
||||||
|
self.pattern_timestamp = [] |
||||||
|
self.segment_center_list = [] |
||||||
|
self.patterns_value = [] |
||||||
|
|
||||||
|
def __str__(self): |
||||||
|
return str(self.to_json()) |
@ -0,0 +1,57 @@ |
|||||||
|
from typing import Optional |
||||||
|
|
||||||
|
import utils.meta |
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class Segment: |
||||||
|
''' |
||||||
|
Used for segment manipulation instead of { 'from': ..., 'to': ... } dict |
||||||
|
''' |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
from_timestamp: int, |
||||||
|
to_timestamp: int, |
||||||
|
_id: Optional[str] = None, |
||||||
|
analytic_unit_id: Optional[str] = None, |
||||||
|
labeled: Optional[bool] = None, |
||||||
|
deleted: Optional[bool] = None, |
||||||
|
message: Optional[str] = None |
||||||
|
): |
||||||
|
if to_timestamp < from_timestamp: |
||||||
|
raise ValueError(f'Can`t create segment with to < from: {to_timestamp} < {from_timestamp}') |
||||||
|
self.from_timestamp = from_timestamp |
||||||
|
self.to_timestamp = to_timestamp |
||||||
|
self._id = _id |
||||||
|
self.analytic_unit_id = analytic_unit_id |
||||||
|
self.labeled = labeled |
||||||
|
self.deleted = deleted |
||||||
|
self.message = message |
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class AnomalyDetectorSegment(Segment): |
||||||
|
''' |
||||||
|
Used for segment manipulation instead of { 'from': ..., 'to': ..., 'data': ... } dict |
||||||
|
''' |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
from_timestamp: int, |
||||||
|
to_timestamp: int, |
||||||
|
data = [], |
||||||
|
_id: Optional[str] = None, |
||||||
|
analytic_unit_id: Optional[str] = None, |
||||||
|
labeled: Optional[bool] = None, |
||||||
|
deleted: Optional[bool] = None, |
||||||
|
message: Optional[str] = None |
||||||
|
): |
||||||
|
super().__init__( |
||||||
|
from_timestamp, |
||||||
|
to_timestamp, |
||||||
|
_id, |
||||||
|
analytic_unit_id, |
||||||
|
labeled, |
||||||
|
deleted, |
||||||
|
message |
||||||
|
) |
||||||
|
self.data = data |
@ -0,0 +1,103 @@ |
|||||||
|
from typing import Dict |
||||||
|
import logging as log |
||||||
|
import traceback |
||||||
|
from concurrent.futures import Executor, ThreadPoolExecutor |
||||||
|
|
||||||
|
from analytic_unit_worker import AnalyticUnitWorker |
||||||
|
from analytic_types import AnalyticUnitId, ModelCache |
||||||
|
from analytic_types.segment import Segment |
||||||
|
import detectors |
||||||
|
|
||||||
|
|
||||||
|
logger = log.getLogger('AnalyticUnitManager') |
||||||
|
|
||||||
|
|
||||||
|
def get_detector_by_type( |
||||||
|
detector_type: str, analytic_unit_type: str, analytic_unit_id: AnalyticUnitId |
||||||
|
) -> detectors.Detector: |
||||||
|
if detector_type == 'pattern': |
||||||
|
return detectors.PatternDetector(analytic_unit_type, analytic_unit_id) |
||||||
|
elif detector_type == 'threshold': |
||||||
|
return detectors.ThresholdDetector(analytic_unit_id) |
||||||
|
elif detector_type == 'anomaly': |
||||||
|
return detectors.AnomalyDetector(analytic_unit_id) |
||||||
|
|
||||||
|
raise ValueError('Unknown detector type "%s"' % detector_type) |
||||||
|
|
||||||
|
|
||||||
|
class AnalyticUnitManager: |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
self.analytic_workers: Dict[AnalyticUnitId, AnalyticUnitWorker] = dict() |
||||||
|
self.workers_executor = ThreadPoolExecutor() |
||||||
|
|
||||||
|
def __ensure_worker( |
||||||
|
self, |
||||||
|
analytic_unit_id: AnalyticUnitId, |
||||||
|
detector_type: str, |
||||||
|
analytic_unit_type: str |
||||||
|
) -> AnalyticUnitWorker: |
||||||
|
if analytic_unit_id in self.analytic_workers: |
||||||
|
# TODO: check that type is the same |
||||||
|
return self.analytic_workers[analytic_unit_id] |
||||||
|
detector = get_detector_by_type(detector_type, analytic_unit_type, analytic_unit_id) |
||||||
|
worker = AnalyticUnitWorker(analytic_unit_id, detector, self.workers_executor) |
||||||
|
self.analytic_workers[analytic_unit_id] = worker |
||||||
|
return worker |
||||||
|
|
||||||
|
async def __handle_analytic_task(self, task: object) -> dict: |
||||||
|
""" |
||||||
|
returns payload or None |
||||||
|
""" |
||||||
|
analytic_unit_id: AnalyticUnitId = task['analyticUnitId'] |
||||||
|
log.debug('Analytics get task with type: {} for unit: {}'.format(task['type'], analytic_unit_id)) |
||||||
|
if task['type'] == 'CANCEL': |
||||||
|
if analytic_unit_id in self.analytic_workers: |
||||||
|
self.analytic_workers[analytic_unit_id].cancel() |
||||||
|
return |
||||||
|
|
||||||
|
payload = task['payload'] |
||||||
|
worker = self.__ensure_worker(analytic_unit_id, payload['detector'], payload['analyticUnitType']) |
||||||
|
data = payload.get('data') |
||||||
|
if task['type'] == 'PUSH': |
||||||
|
# TODO: do it a better way |
||||||
|
res = await worker.consume_data(data, payload['cache']) |
||||||
|
if res: |
||||||
|
res.update({ 'analyticUnitId': analytic_unit_id }) |
||||||
|
return res |
||||||
|
elif task['type'] == 'LEARN': |
||||||
|
if 'segments' in payload: |
||||||
|
segments = payload['segments'] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
return await worker.do_train(segments, data, payload['cache']) |
||||||
|
elif 'threshold' in payload: |
||||||
|
return await worker.do_train(payload['threshold'], data, payload['cache']) |
||||||
|
elif 'anomaly' in payload: |
||||||
|
return await worker.do_train(payload['anomaly'], data, payload['cache']) |
||||||
|
else: |
||||||
|
raise ValueError('No segments or threshold in LEARN payload') |
||||||
|
elif task['type'] == 'DETECT': |
||||||
|
return await worker.do_detect(data, payload['cache']) |
||||||
|
elif task['type'] == 'PROCESS': |
||||||
|
return await worker.process_data(data, payload['cache']) |
||||||
|
|
||||||
|
raise ValueError('Unknown task type "%s"' % task['type']) |
||||||
|
|
||||||
|
async def handle_analytic_task(self, task: object): |
||||||
|
try: |
||||||
|
log.debug('Start handle_analytic_task with analytic unit: {}'.format(task['analyticUnitId'])) |
||||||
|
result_payload = await self.__handle_analytic_task(task) |
||||||
|
result_message = { |
||||||
|
'status': 'SUCCESS', |
||||||
|
'payload': result_payload |
||||||
|
} |
||||||
|
log.debug('End correctly handle_analytic_task with anatytic unit: {}'.format(task['analyticUnitId'])) |
||||||
|
return result_message |
||||||
|
except Exception as e: |
||||||
|
error_text = traceback.format_exc() |
||||||
|
logger.error("handle_analytic_task Exception: '%s'" % error_text) |
||||||
|
# TODO: move result to a class which renders to json for messaging to analytics |
||||||
|
return { |
||||||
|
'status': 'FAILED', |
||||||
|
'error': repr(e) |
||||||
|
} |
@ -0,0 +1,116 @@ |
|||||||
|
import config |
||||||
|
import detectors |
||||||
|
import logging |
||||||
|
import pandas as pd |
||||||
|
from typing import Optional, Union, Generator, List, Tuple |
||||||
|
import concurrent.futures |
||||||
|
import asyncio |
||||||
|
import utils |
||||||
|
from utils import get_intersected_chunks, get_chunks, prepare_data |
||||||
|
|
||||||
|
from analytic_types import ModelCache, TimeSeries |
||||||
|
from analytic_types.detector import DetectionResult |
||||||
|
|
||||||
|
logger = logging.getLogger('AnalyticUnitWorker') |
||||||
|
|
||||||
|
|
||||||
|
class AnalyticUnitWorker: |
||||||
|
|
||||||
|
CHUNK_WINDOW_SIZE_FACTOR = 100 |
||||||
|
CHUNK_INTERSECTION_FACTOR = 2 |
||||||
|
|
||||||
|
assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \ |
||||||
|
'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR' |
||||||
|
|
||||||
|
def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor): |
||||||
|
self.analytic_unit_id = analytic_unit_id |
||||||
|
self._detector = detector |
||||||
|
self._executor: concurrent.futures.Executor = executor |
||||||
|
self._training_future: asyncio.Future = None |
||||||
|
|
||||||
|
async def do_train( |
||||||
|
self, payload: Union[list, dict], data: TimeSeries, cache: Optional[ModelCache] |
||||||
|
) -> Optional[ModelCache]: |
||||||
|
|
||||||
|
dataframe = prepare_data(data) |
||||||
|
|
||||||
|
cfuture: concurrent.futures.Future = self._executor.submit( |
||||||
|
self._detector.train, dataframe, payload, cache |
||||||
|
) |
||||||
|
self._training_future = asyncio.wrap_future(cfuture) |
||||||
|
try: |
||||||
|
new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT) |
||||||
|
return new_cache |
||||||
|
except asyncio.CancelledError: |
||||||
|
return None |
||||||
|
except asyncio.TimeoutError: |
||||||
|
raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT)) |
||||||
|
|
||||||
|
async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||||
|
|
||||||
|
window_size = self._detector.get_window_size(cache) |
||||||
|
chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR |
||||||
|
chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR |
||||||
|
|
||||||
|
detections: List[DetectionResult] = [] |
||||||
|
chunks = [] |
||||||
|
# XXX: get_chunks(data, chunk_size) == get_intersected_chunks(data, 0, chunk_size) |
||||||
|
if self._detector.is_detection_intersected(): |
||||||
|
chunks = get_intersected_chunks(data, chunk_intersection, chunk_size) |
||||||
|
else: |
||||||
|
chunks = get_chunks(data, chunk_size) |
||||||
|
|
||||||
|
for chunk in chunks: |
||||||
|
await asyncio.sleep(0) |
||||||
|
chunk_dataframe = prepare_data(chunk) |
||||||
|
detected: DetectionResult = self._detector.detect(chunk_dataframe, cache) |
||||||
|
detections.append(detected) |
||||||
|
|
||||||
|
if len(detections) == 0: |
||||||
|
raise RuntimeError(f'do_detect for {self.analytic_unit_id} got empty detection results') |
||||||
|
|
||||||
|
detection_result = self._detector.concat_detection_results(detections) |
||||||
|
return detection_result.to_json() |
||||||
|
|
||||||
|
def cancel(self): |
||||||
|
if self._training_future is not None: |
||||||
|
self._training_future.cancel() |
||||||
|
|
||||||
|
async def consume_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> Optional[dict]: |
||||||
|
window_size = self._detector.get_window_size(cache) |
||||||
|
|
||||||
|
detections: List[DetectionResult] = [] |
||||||
|
|
||||||
|
for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR): |
||||||
|
await asyncio.sleep(0) |
||||||
|
chunk_dataframe = prepare_data(chunk) |
||||||
|
detected = self._detector.consume_data(chunk_dataframe, cache) |
||||||
|
if detected is not None: |
||||||
|
detections.append(detected) |
||||||
|
|
||||||
|
if len(detections) == 0: |
||||||
|
return None |
||||||
|
else: |
||||||
|
detection_result = self._detector.concat_detection_results(detections) |
||||||
|
return detection_result.to_json() |
||||||
|
|
||||||
|
async def process_data(self, data: TimeSeries, cache: ModelCache) -> dict: |
||||||
|
assert isinstance(self._detector, detectors.ProcessingDetector), \ |
||||||
|
f'{self.analytic_unit_id} detector is not ProcessingDetector, can`t process data' |
||||||
|
assert cache is not None, f'{self.analytic_unit_id} got empty cache for processing data' |
||||||
|
|
||||||
|
processed_chunks = [] |
||||||
|
window_size = self._detector.get_window_size(cache) |
||||||
|
for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR): |
||||||
|
await asyncio.sleep(0) |
||||||
|
chunk_dataframe = prepare_data(chunk) |
||||||
|
processed = self._detector.process_data(chunk_dataframe, cache) |
||||||
|
if processed is not None: |
||||||
|
processed_chunks.append(processed) |
||||||
|
|
||||||
|
if len(processed_chunks) == 0: |
||||||
|
raise RuntimeError(f'process_data for {self.analytic_unit_id} got empty processing results') |
||||||
|
|
||||||
|
# TODO: maybe we should process all chunks inside of detector? |
||||||
|
result = self._detector.concat_processing_results(processed_chunks) |
||||||
|
return result.to_json() |
@ -0,0 +1,30 @@ |
|||||||
|
import os |
||||||
|
import json |
||||||
|
|
||||||
|
|
||||||
|
PARENT_FOLDER = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) |
||||||
|
CONFIG_FILE = os.path.join(PARENT_FOLDER, 'config.json') |
||||||
|
|
||||||
|
|
||||||
|
config_exists = os.path.isfile(CONFIG_FILE) |
||||||
|
if config_exists: |
||||||
|
with open(CONFIG_FILE) as f: |
||||||
|
config = json.load(f) |
||||||
|
else: |
||||||
|
print('Config file %s doesn`t exist, using defaults' % CONFIG_FILE) |
||||||
|
|
||||||
|
|
||||||
|
def get_config_field(field: str, default_val = None): |
||||||
|
if field in os.environ: |
||||||
|
return os.environ[field] |
||||||
|
|
||||||
|
if config_exists and field in config and config[field] != '': |
||||||
|
return config[field] |
||||||
|
|
||||||
|
if default_val is not None: |
||||||
|
return default_val |
||||||
|
|
||||||
|
raise Exception('Please configure {}'.format(field)) |
||||||
|
|
||||||
|
HASTIC_SERVER_URL = get_config_field('HASTIC_SERVER_URL', 'ws://localhost:8002') |
||||||
|
LEARNING_TIMEOUT = get_config_field('LEARNING_TIMEOUT', 120) |
@ -0,0 +1,4 @@ |
|||||||
|
from detectors.detector import Detector, ProcessingDetector |
||||||
|
from detectors.pattern_detector import PatternDetector |
||||||
|
from detectors.threshold_detector import ThresholdDetector |
||||||
|
from detectors.anomaly_detector import AnomalyDetector |
@ -0,0 +1,277 @@ |
|||||||
|
from enum import Enum |
||||||
|
import logging |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
import math |
||||||
|
from typing import Optional, Union, List, Tuple, Generator |
||||||
|
import operator |
||||||
|
|
||||||
|
from analytic_types import AnalyticUnitId, ModelCache |
||||||
|
from analytic_types.detector import DetectionResult, ProcessingResult, Bound |
||||||
|
from analytic_types.data_bucket import DataBucket |
||||||
|
from analytic_types.segment import Segment, AnomalyDetectorSegment |
||||||
|
from analytic_types.cache import AnomalyCache |
||||||
|
from detectors import Detector, ProcessingDetector |
||||||
|
import utils |
||||||
|
|
||||||
|
MAX_DEPENDENCY_LEVEL = 100 |
||||||
|
MIN_DEPENDENCY_FACTOR = 0.1 |
||||||
|
BASIC_ALPHA = 0.5 |
||||||
|
logger = logging.getLogger('ANOMALY_DETECTOR') |
||||||
|
|
||||||
|
|
||||||
|
class AnomalyDetector(ProcessingDetector): |
||||||
|
|
||||||
|
def __init__(self, analytic_unit_id: AnalyticUnitId): |
||||||
|
super().__init__(analytic_unit_id) |
||||||
|
self.bucket = DataBucket() |
||||||
|
|
||||||
|
def train(self, dataframe: pd.DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache: |
||||||
|
cache = AnomalyCache.from_json(payload) |
||||||
|
cache.time_step = utils.find_interval(dataframe) |
||||||
|
segments = cache.segments |
||||||
|
|
||||||
|
if len(segments) > 0: |
||||||
|
seasonality = cache.seasonality |
||||||
|
prepared_segments = [] |
||||||
|
|
||||||
|
for segment in segments: |
||||||
|
segment_len = (int(segment.to_timestamp) - int(segment.from_timestamp)) |
||||||
|
assert segment_len <= seasonality, \ |
||||||
|
f'seasonality {seasonality} must be greater than segment length {segment_len}' |
||||||
|
|
||||||
|
from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment.from_timestamp, unit='ms')) |
||||||
|
to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment.to_timestamp, unit='ms')) |
||||||
|
segment_data = dataframe[from_index : to_index] |
||||||
|
prepared_segments.append( |
||||||
|
AnomalyDetectorSegment( |
||||||
|
segment.from_timestamp, |
||||||
|
segment.to_timestamp, |
||||||
|
segment_data.value.tolist() |
||||||
|
) |
||||||
|
) |
||||||
|
cache.set_segments(prepared_segments) |
||||||
|
|
||||||
|
return { |
||||||
|
'cache': cache.to_json() |
||||||
|
} |
||||||
|
|
||||||
|
# TODO: ModelCache -> DetectorState |
||||||
|
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||||
|
if cache == None: |
||||||
|
raise f'Analytic unit {self.analytic_unit_id} got empty cache' |
||||||
|
data = dataframe['value'] |
||||||
|
|
||||||
|
cache = AnomalyCache.from_json(cache) |
||||||
|
segments = cache.segments |
||||||
|
enabled_bounds = cache.get_enabled_bounds() |
||||||
|
|
||||||
|
smoothed_data = utils.exponential_smoothing(data, cache.alpha) |
||||||
|
|
||||||
|
lower_bound = smoothed_data - cache.confidence |
||||||
|
upper_bound = smoothed_data + cache.confidence |
||||||
|
|
||||||
|
if len(segments) > 0: |
||||||
|
data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0]) |
||||||
|
|
||||||
|
for segment in segments: |
||||||
|
seasonality_index = cache.seasonality // cache.time_step |
||||||
|
seasonality_offset = self.get_seasonality_offset( |
||||||
|
segment.from_timestamp, |
||||||
|
cache.seasonality, |
||||||
|
data_start_time, |
||||||
|
cache.time_step |
||||||
|
) |
||||||
|
segment_data = pd.Series(segment.data) |
||||||
|
|
||||||
|
lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) |
||||||
|
upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) |
||||||
|
|
||||||
|
detected_segments = list(self.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds)) |
||||||
|
|
||||||
|
last_dataframe_time = dataframe.iloc[-1]['timestamp'] |
||||||
|
last_detection_time = utils.convert_pd_timestamp_to_ms(last_dataframe_time) |
||||||
|
|
||||||
|
return DetectionResult(cache.to_json(), detected_segments, last_detection_time) |
||||||
|
|
||||||
|
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||||
|
if cache is None: |
||||||
|
msg = f'consume_data got invalid cache {cache} for task {self.analytic_unit_id}' |
||||||
|
logging.debug(msg) |
||||||
|
raise ValueError(msg) |
||||||
|
|
||||||
|
data_without_nan = data.dropna() |
||||||
|
|
||||||
|
if len(data_without_nan) == 0: |
||||||
|
return None |
||||||
|
|
||||||
|
self.bucket.receive_data(data_without_nan) |
||||||
|
|
||||||
|
if len(self.bucket.data) >= self.get_window_size(cache): |
||||||
|
return self.detect(self.bucket.data, cache) |
||||||
|
|
||||||
|
return None |
||||||
|
|
||||||
|
def is_detection_intersected(self) -> bool: |
||||||
|
return False |
||||||
|
|
||||||
|
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||||
|
''' |
||||||
|
get the number of values that will affect the next value |
||||||
|
''' |
||||||
|
|
||||||
|
if cache is None: |
||||||
|
raise ValueError('anomaly detector got None cache') |
||||||
|
cache = AnomalyCache.from_json(cache) |
||||||
|
|
||||||
|
for level in range(1, MAX_DEPENDENCY_LEVEL): |
||||||
|
if (1 - cache.alpha) ** level < MIN_DEPENDENCY_FACTOR: |
||||||
|
break |
||||||
|
|
||||||
|
seasonality = 0 |
||||||
|
if len(cache.segments) > 0: |
||||||
|
seasonality = cache.seasonality // cache.time_step |
||||||
|
return max(level, seasonality) |
||||||
|
|
||||||
|
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: |
||||||
|
result = DetectionResult() |
||||||
|
time_step = detections[0].cache['timeStep'] |
||||||
|
for detection in detections: |
||||||
|
result.segments.extend(detection.segments) |
||||||
|
result.last_detection_time = detection.last_detection_time |
||||||
|
result.cache = detection.cache |
||||||
|
result.segments = utils.merge_intersecting_segments(result.segments, time_step) |
||||||
|
return result |
||||||
|
|
||||||
|
# TODO: remove duplication with detect() |
||||||
|
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: |
||||||
|
cache = AnomalyCache.from_json(cache) |
||||||
|
segments = cache.segments |
||||||
|
enabled_bounds = cache.get_enabled_bounds() |
||||||
|
|
||||||
|
# TODO: exponential_smoothing should return dataframe with related timestamps |
||||||
|
smoothed_data = utils.exponential_smoothing(dataframe['value'], cache.alpha) |
||||||
|
|
||||||
|
lower_bound = smoothed_data - cache.confidence |
||||||
|
upper_bound = smoothed_data + cache.confidence |
||||||
|
|
||||||
|
if len(segments) > 0: |
||||||
|
data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0]) |
||||||
|
|
||||||
|
for segment in segments: |
||||||
|
seasonality_index = cache.seasonality // cache.time_step |
||||||
|
# TODO: move it to utils and add tests |
||||||
|
seasonality_offset = self.get_seasonality_offset( |
||||||
|
segment.from_timestamp, |
||||||
|
cache.seasonality, |
||||||
|
data_start_time, |
||||||
|
cache.time_step |
||||||
|
) |
||||||
|
segment_data = pd.Series(segment.data) |
||||||
|
|
||||||
|
lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) |
||||||
|
upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) |
||||||
|
|
||||||
|
# TODO: support multiple segments |
||||||
|
|
||||||
|
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) |
||||||
|
lower_bound_timeseries = list(zip(timestamps, lower_bound.values.tolist())) |
||||||
|
upper_bound_timeseries = list(zip(timestamps, upper_bound.values.tolist())) |
||||||
|
|
||||||
|
if enabled_bounds == Bound.ALL: |
||||||
|
return ProcessingResult(lower_bound_timeseries, upper_bound_timeseries) |
||||||
|
elif enabled_bounds == Bound.UPPER: |
||||||
|
return ProcessingResult(upper_bound = upper_bound_timeseries) |
||||||
|
elif enabled_bounds == Bound.LOWER: |
||||||
|
return ProcessingResult(lower_bound = lower_bound_timeseries) |
||||||
|
|
||||||
|
def add_season_to_data(self, data: pd.Series, segment: pd.Series, offset: int, seasonality: int, bound_type: Bound) -> pd.Series: |
||||||
|
#data - smoothed data to which seasonality will be added |
||||||
|
#if addition == True -> segment is added |
||||||
|
#if addition == False -> segment is subtracted |
||||||
|
len_smoothed_data = len(data) |
||||||
|
for idx, _ in enumerate(data): |
||||||
|
if idx - offset < 0: |
||||||
|
#TODO: add seasonality for non empty parts |
||||||
|
continue |
||||||
|
if (idx - offset) % seasonality == 0: |
||||||
|
if bound_type == Bound.UPPER: |
||||||
|
upper_segment_bound = self.get_segment_bound(segment, Bound.UPPER) |
||||||
|
data = data.add(pd.Series(upper_segment_bound.values, index = segment.index + idx), fill_value = 0) |
||||||
|
elif bound_type == Bound.LOWER: |
||||||
|
lower_segment_bound = self.get_segment_bound(segment, Bound.LOWER) |
||||||
|
data = data.add(pd.Series(lower_segment_bound.values * -1, index = segment.index + idx), fill_value = 0) |
||||||
|
else: |
||||||
|
raise ValueError(f'unknown bound type: {bound_type.value}') |
||||||
|
|
||||||
|
return data[:len_smoothed_data] |
||||||
|
|
||||||
|
def get_segment_bound(self, segment: pd.Series, bound: Bound) -> pd.Series: |
||||||
|
''' |
||||||
|
segment is divided by the median to determine its top or bottom part |
||||||
|
the part is smoothed and raised above the segment or put down below the segment |
||||||
|
''' |
||||||
|
if len(segment) < 2: |
||||||
|
return segment |
||||||
|
comparison_operator = operator.gt if bound == Bound.UPPER else operator.le |
||||||
|
segment = segment - segment.min() |
||||||
|
segment_median = segment.median() |
||||||
|
part = [val if comparison_operator(val, segment_median) else segment_median for val in segment.values] |
||||||
|
part = pd.Series(part, index = segment.index) |
||||||
|
smoothed_part = utils.exponential_smoothing(part, BASIC_ALPHA) |
||||||
|
difference = [abs(x - y) for x, y in zip(part, smoothed_part)] |
||||||
|
max_diff = max(difference) |
||||||
|
bound = [val + max_diff for val in smoothed_part.values] |
||||||
|
bound = pd.Series(bound, index = segment.index) |
||||||
|
return bound |
||||||
|
|
||||||
|
def get_seasonality_offset(self, from_timestamp: int, seasonality: int, data_start_time: int, time_step: int) -> int: |
||||||
|
season_count = math.ceil(abs(from_timestamp - data_start_time) / seasonality) |
||||||
|
start_seasonal_segment = from_timestamp + seasonality * season_count |
||||||
|
seasonality_time_offset = abs(start_seasonal_segment - data_start_time) % seasonality |
||||||
|
seasonality_offset = math.ceil(seasonality_time_offset / time_step) |
||||||
|
return seasonality_offset |
||||||
|
|
||||||
|
def detections_generator( |
||||||
|
self, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
upper_bound: pd.DataFrame, |
||||||
|
lower_bound: pd.DataFrame, |
||||||
|
enabled_bounds: Bound |
||||||
|
) -> Generator[Segment, None, Segment]: |
||||||
|
in_segment = False |
||||||
|
segment_start = 0 |
||||||
|
bound: Bound = None |
||||||
|
for idx, val in enumerate(dataframe['value'].values): |
||||||
|
if val > upper_bound.values[idx]: |
||||||
|
if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL: |
||||||
|
if not in_segment: |
||||||
|
in_segment = True |
||||||
|
segment_start = dataframe['timestamp'][idx] |
||||||
|
bound = Bound.UPPER |
||||||
|
continue |
||||||
|
|
||||||
|
if val < lower_bound.values[idx]: |
||||||
|
if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL: |
||||||
|
if not in_segment: |
||||||
|
in_segment = True |
||||||
|
segment_start = dataframe['timestamp'][idx] |
||||||
|
bound = Bound.LOWER |
||||||
|
continue |
||||||
|
|
||||||
|
if in_segment: |
||||||
|
segment_end = dataframe['timestamp'][idx - 1] |
||||||
|
yield Segment( |
||||||
|
utils.convert_pd_timestamp_to_ms(segment_start), |
||||||
|
utils.convert_pd_timestamp_to_ms(segment_end), |
||||||
|
message=f'{val} out of {str(bound.value)} bound' |
||||||
|
) |
||||||
|
in_segment = False |
||||||
|
else: |
||||||
|
if in_segment: |
||||||
|
segment_end = dataframe['timestamp'][idx] |
||||||
|
return Segment( |
||||||
|
utils.convert_pd_timestamp_to_ms(segment_start), |
||||||
|
utils.convert_pd_timestamp_to_ms(segment_end), |
||||||
|
message=f'{val} out of {str(bound.value)} bound' |
||||||
|
) |
@ -0,0 +1,80 @@ |
|||||||
|
from abc import ABC, abstractmethod |
||||||
|
from pandas import DataFrame |
||||||
|
from typing import Optional, Union, List |
||||||
|
|
||||||
|
from analytic_types import ModelCache, TimeSeries, AnalyticUnitId |
||||||
|
from analytic_types.detector import DetectionResult, ProcessingResult |
||||||
|
from analytic_types.segment import Segment |
||||||
|
|
||||||
|
|
||||||
|
class Detector(ABC): |
||||||
|
|
||||||
|
def __init__(self, analytic_unit_id: AnalyticUnitId): |
||||||
|
self.analytic_unit_id = analytic_unit_id |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def train(self, dataframe: DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache: |
||||||
|
""" |
||||||
|
Should be thread-safe to other detectors' train method |
||||||
|
""" |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def consume_data(self, data: DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||||
|
pass |
||||||
|
|
||||||
|
def is_detection_intersected(self) -> bool: |
||||||
|
return True |
||||||
|
|
||||||
|
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: |
||||||
|
result = DetectionResult() |
||||||
|
for detection in detections: |
||||||
|
result.segments.extend(detection.segments) |
||||||
|
result.last_detection_time = detection.last_detection_time |
||||||
|
result.cache = detection.cache |
||||||
|
return result |
||||||
|
|
||||||
|
def get_value_from_cache(self, cache: ModelCache, key: str, required = False): |
||||||
|
value = cache.get(key) |
||||||
|
if value == None and required: |
||||||
|
raise ValueError(f'Missing required "{key}" field in cache for analytic unit {self.analytic_unit_id}') |
||||||
|
return value |
||||||
|
|
||||||
|
|
||||||
|
class ProcessingDetector(Detector): |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def process_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> ProcessingResult: |
||||||
|
''' |
||||||
|
Data processing to receive additional time series that represents detector's settings |
||||||
|
''' |
||||||
|
pass |
||||||
|
|
||||||
|
def concat_processing_results(self, processing_results: List[ProcessingResult]) -> Optional[ProcessingResult]: |
||||||
|
''' |
||||||
|
Concatenate sequential ProcessingResults that received via |
||||||
|
splitting dataset to chunks in analytic worker |
||||||
|
''' |
||||||
|
|
||||||
|
if len(processing_results) == 0: |
||||||
|
return None |
||||||
|
|
||||||
|
united_result = ProcessingResult() |
||||||
|
for result in processing_results: |
||||||
|
if result.lower_bound is not None: |
||||||
|
if united_result.lower_bound is None: united_result.lower_bound = [] |
||||||
|
united_result.lower_bound.extend(result.lower_bound) |
||||||
|
|
||||||
|
if result.upper_bound is not None: |
||||||
|
if united_result.upper_bound is None: united_result.upper_bound = [] |
||||||
|
united_result.upper_bound.extend(result.upper_bound) |
||||||
|
|
||||||
|
return united_result |
@ -0,0 +1,147 @@ |
|||||||
|
import models |
||||||
|
|
||||||
|
import asyncio |
||||||
|
import logging |
||||||
|
import config |
||||||
|
|
||||||
|
import pandas as pd |
||||||
|
from typing import Optional, Generator, List |
||||||
|
|
||||||
|
from detectors import Detector |
||||||
|
from analytic_types.data_bucket import DataBucket |
||||||
|
from utils import convert_pd_timestamp_to_ms |
||||||
|
from analytic_types import AnalyticUnitId, ModelCache |
||||||
|
from analytic_types.detector import DetectionResult |
||||||
|
from analytic_types.segment import Segment |
||||||
|
import utils |
||||||
|
|
||||||
|
logger = logging.getLogger('PATTERN_DETECTOR') |
||||||
|
|
||||||
|
|
||||||
|
def resolve_model_by_pattern(pattern: str) -> models.Model: |
||||||
|
if pattern == 'GENERAL': |
||||||
|
return models.GeneralModel() |
||||||
|
if pattern == 'PEAK': |
||||||
|
return models.PeakModel() |
||||||
|
if pattern == 'TROUGH': |
||||||
|
return models.TroughModel() |
||||||
|
if pattern == 'DROP': |
||||||
|
return models.DropModel() |
||||||
|
if pattern == 'JUMP': |
||||||
|
return models.JumpModel() |
||||||
|
if pattern == 'CUSTOM': |
||||||
|
return models.CustomModel() |
||||||
|
raise ValueError('Unknown pattern "%s"' % pattern) |
||||||
|
|
||||||
|
|
||||||
|
class PatternDetector(Detector): |
||||||
|
|
||||||
|
MIN_BUCKET_SIZE = 150 |
||||||
|
BUCKET_WINDOW_SIZE_FACTOR = 5 |
||||||
|
DEFAULT_WINDOW_SIZE = 1 |
||||||
|
|
||||||
|
def __init__(self, pattern_type: str, analytic_unit_id: AnalyticUnitId): |
||||||
|
super().__init__(analytic_unit_id) |
||||||
|
self.pattern_type = pattern_type |
||||||
|
self.model = resolve_model_by_pattern(self.pattern_type) |
||||||
|
self.bucket = DataBucket() |
||||||
|
|
||||||
|
def train(self, dataframe: pd.DataFrame, segments: List[Segment], cache: Optional[ModelCache]) -> ModelCache: |
||||||
|
# TODO: pass only part of dataframe that has segments |
||||||
|
|
||||||
|
if self.contains_labeled_segments(segments) == False: |
||||||
|
msg = f'{self.analytic_unit_id} has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' |
||||||
|
logger.error(msg) |
||||||
|
raise ValueError(msg) |
||||||
|
|
||||||
|
self.model.state: models.ModelState = self.model.get_state(cache) |
||||||
|
new_cache: models.ModelState = self.model.fit(dataframe, segments, self.analytic_unit_id) |
||||||
|
|
||||||
|
# time step is optional |
||||||
|
if len(dataframe) > 1: |
||||||
|
new_cache.time_step = utils.find_interval(dataframe) |
||||||
|
|
||||||
|
new_cache = new_cache.to_json() |
||||||
|
if len(new_cache) == 0: |
||||||
|
logging.warning('new_cache is empty with data: {}, segments: {}, cache: {}, analytic unit: {}'.format(dataframe, segments, cache, self.analytic_unit_id)) |
||||||
|
return { |
||||||
|
'cache': new_cache |
||||||
|
} |
||||||
|
|
||||||
|
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||||
|
logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe))) |
||||||
|
# TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) |
||||||
|
|
||||||
|
if cache is None: |
||||||
|
msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection' |
||||||
|
logger.error(msg) |
||||||
|
raise ValueError(msg) |
||||||
|
|
||||||
|
self.model.state = self.model.get_state(cache) |
||||||
|
window_size = self.model.state.window_size |
||||||
|
|
||||||
|
if window_size is None: |
||||||
|
message = '{} got cache without window_size for detection'.format(self.analytic_unit_id) |
||||||
|
logger.error(message) |
||||||
|
raise ValueError(message) |
||||||
|
|
||||||
|
if len(dataframe) < window_size * 2: |
||||||
|
message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points' |
||||||
|
logger.error(message) |
||||||
|
raise ValueError(message) |
||||||
|
|
||||||
|
detected = self.model.detect(dataframe, self.analytic_unit_id) |
||||||
|
|
||||||
|
segments = [Segment(segment[0], segment[1]) for segment in detected['segments']] |
||||||
|
new_cache = detected['cache'].to_json() |
||||||
|
last_dataframe_time = dataframe.iloc[-1]['timestamp'] |
||||||
|
last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) |
||||||
|
return DetectionResult(new_cache, segments, last_detection_time) |
||||||
|
|
||||||
|
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||||
|
logging.debug('Start consume_data for analytic unit {}'.format(self.analytic_unit_id)) |
||||||
|
|
||||||
|
if cache is None: |
||||||
|
logging.debug(f'consume_data get invalid cache {cache} for task {self.analytic_unit_id}, skip') |
||||||
|
return None |
||||||
|
|
||||||
|
data_without_nan = data.dropna() |
||||||
|
|
||||||
|
if len(data_without_nan) == 0: |
||||||
|
return None |
||||||
|
|
||||||
|
self.bucket.receive_data(data_without_nan) |
||||||
|
|
||||||
|
# TODO: use ModelState |
||||||
|
window_size = cache['windowSize'] |
||||||
|
|
||||||
|
bucket_len = len(self.bucket.data) |
||||||
|
if bucket_len < window_size * 2: |
||||||
|
msg = f'{self.analytic_unit_id} bucket data {bucket_len} less than two window size {window_size * 2}, skip run detection from consume_data' |
||||||
|
logger.debug(msg) |
||||||
|
return None |
||||||
|
|
||||||
|
res = self.detect(self.bucket.data, cache) |
||||||
|
|
||||||
|
bucket_size = max(window_size * self.BUCKET_WINDOW_SIZE_FACTOR, self.MIN_BUCKET_SIZE) |
||||||
|
if bucket_len > bucket_size: |
||||||
|
excess_data = bucket_len - bucket_size |
||||||
|
self.bucket.drop_data(excess_data) |
||||||
|
|
||||||
|
logging.debug('End consume_data for analytic unit: {} with res: {}'.format(self.analytic_unit_id, str(res.to_json()))) |
||||||
|
|
||||||
|
if res: |
||||||
|
return res |
||||||
|
else: |
||||||
|
return None |
||||||
|
|
||||||
|
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||||
|
if cache is None: return self.DEFAULT_WINDOW_SIZE |
||||||
|
# TODO: windowSize -> window_size |
||||||
|
return cache.get('windowSize', self.DEFAULT_WINDOW_SIZE) |
||||||
|
|
||||||
|
def contains_labeled_segments(self, segments: List[Segment]) -> bool: |
||||||
|
for segment in segments: |
||||||
|
if segment.labeled == True: |
||||||
|
return True |
||||||
|
return False |
@ -0,0 +1,111 @@ |
|||||||
|
import logging as log |
||||||
|
|
||||||
|
import operator |
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
from typing import Optional, List |
||||||
|
|
||||||
|
from analytic_types import ModelCache, AnalyticUnitId |
||||||
|
from analytic_types.detector import DetectionResult, ProcessingResult |
||||||
|
from analytic_types.segment import Segment |
||||||
|
from detectors import ProcessingDetector |
||||||
|
from time import time |
||||||
|
import utils |
||||||
|
|
||||||
|
|
||||||
|
logger = log.getLogger('THRESHOLD_DETECTOR') |
||||||
|
|
||||||
|
|
||||||
|
class ThresholdDetector(ProcessingDetector): |
||||||
|
|
||||||
|
WINDOW_SIZE = 3 |
||||||
|
|
||||||
|
def __init__(self, analytic_unit_id: AnalyticUnitId): |
||||||
|
super().__init__(analytic_unit_id) |
||||||
|
|
||||||
|
def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache: |
||||||
|
time_step = utils.find_interval(dataframe) |
||||||
|
return { |
||||||
|
'cache': { |
||||||
|
'value': threshold['value'], |
||||||
|
'condition': threshold['condition'], |
||||||
|
'timeStep': time_step |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult: |
||||||
|
if cache is None or cache == {}: |
||||||
|
raise ValueError('Threshold detector error: cannot detect before learning') |
||||||
|
if len(dataframe) == 0: |
||||||
|
return None |
||||||
|
|
||||||
|
value = cache['value'] |
||||||
|
condition = cache['condition'] |
||||||
|
|
||||||
|
segments = [] |
||||||
|
for index, row in dataframe.iterrows(): |
||||||
|
current_value = row['value'] |
||||||
|
current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp']) |
||||||
|
segment = Segment(current_timestamp, current_timestamp) |
||||||
|
# TODO: merge segments |
||||||
|
if pd.isnull(current_value): |
||||||
|
if condition == 'NO_DATA': |
||||||
|
segment.message = 'NO_DATA detected' |
||||||
|
segments.append(segment) |
||||||
|
continue |
||||||
|
|
||||||
|
comparators = { |
||||||
|
'>': operator.gt, |
||||||
|
'<': operator.lt, |
||||||
|
'=': operator.eq, |
||||||
|
'>=': operator.ge, |
||||||
|
'<=': operator.le |
||||||
|
} |
||||||
|
|
||||||
|
assert condition in comparators.keys(), f'condition {condition} not allowed' |
||||||
|
|
||||||
|
if comparators[condition](current_value, value): |
||||||
|
segment.message = f"{current_value} {condition} threshold's value {value}" |
||||||
|
segments.append(segment) |
||||||
|
|
||||||
|
last_entry = dataframe.iloc[-1] |
||||||
|
last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp']) |
||||||
|
return DetectionResult(cache, segments, last_detection_time) |
||||||
|
|
||||||
|
|
||||||
|
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||||
|
result = self.detect(data, cache) |
||||||
|
return result if result else None |
||||||
|
|
||||||
|
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||||
|
return self.WINDOW_SIZE |
||||||
|
|
||||||
|
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: |
||||||
|
result = DetectionResult() |
||||||
|
time_step = detections[0].cache['timeStep'] |
||||||
|
for detection in detections: |
||||||
|
result.segments.extend(detection.segments) |
||||||
|
result.last_detection_time = detection.last_detection_time |
||||||
|
result.cache = detection.cache |
||||||
|
result.segments = utils.merge_intersecting_segments(result.segments, time_step) |
||||||
|
return result |
||||||
|
|
||||||
|
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: |
||||||
|
data = dataframe['value'] |
||||||
|
value = self.get_value_from_cache(cache, 'value', required = True) |
||||||
|
condition = self.get_value_from_cache(cache, 'condition', required = True) |
||||||
|
|
||||||
|
if condition == 'NO_DATA': |
||||||
|
return ProcessingResult() |
||||||
|
|
||||||
|
data.values[:] = value |
||||||
|
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) |
||||||
|
result_series = list(zip(timestamps, data.values.tolist())) |
||||||
|
|
||||||
|
if condition in ['>', '>=', '=']: |
||||||
|
return ProcessingResult(upper_bound = result_series) |
||||||
|
|
||||||
|
if condition in ['<', '<=']: |
||||||
|
return ProcessingResult(lower_bound = result_series) |
||||||
|
|
||||||
|
raise ValueError(f'{condition} condition not supported') |
@ -0,0 +1,9 @@ |
|||||||
|
from models.model import Model, ModelState, AnalyticSegment, ModelType, ExtremumType |
||||||
|
from models.triangle_model import TriangleModel, TriangleModelState |
||||||
|
from models.stair_model import StairModel, StairModelState |
||||||
|
from models.drop_model import DropModel |
||||||
|
from models.peak_model import PeakModel |
||||||
|
from models.jump_model import JumpModel |
||||||
|
from models.custom_model import CustomModel |
||||||
|
from models.trough_model import TroughModel |
||||||
|
from models.general_model import GeneralModel, GeneralModelState |
@ -0,0 +1,30 @@ |
|||||||
|
from models import Model, AnalyticSegment, ModelState, ModelType |
||||||
|
from analytic_types import AnalyticUnitId, ModelCache |
||||||
|
from analytic_types.learning_info import LearningInfo |
||||||
|
import utils |
||||||
|
|
||||||
|
import pandas as pd |
||||||
|
from typing import List, Optional |
||||||
|
|
||||||
|
|
||||||
|
class CustomModel(Model): |
||||||
|
def do_fit( |
||||||
|
self, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
labeled_segments: List[AnalyticSegment], |
||||||
|
deleted_segments: List[AnalyticSegment], |
||||||
|
learning_info: LearningInfo |
||||||
|
) -> None: |
||||||
|
pass |
||||||
|
|
||||||
|
def do_detect(self, dataframe: pd.DataFrame) -> list: |
||||||
|
return [] |
||||||
|
|
||||||
|
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||||
|
pass |
||||||
|
|
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
pass |
||||||
|
|
||||||
|
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: |
||||||
|
pass |
@ -0,0 +1,9 @@ |
|||||||
|
from models import StairModel, ModelType, ExtremumType |
||||||
|
|
||||||
|
class DropModel(StairModel): |
||||||
|
|
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
return ModelType.DROP |
||||||
|
|
||||||
|
def get_extremum_type(self) -> ExtremumType: |
||||||
|
return ExtremumType.MIN |
@ -0,0 +1,104 @@ |
|||||||
|
from analytic_types import AnalyticUnitId |
||||||
|
from models import Model, ModelState, AnalyticSegment, ModelType |
||||||
|
from typing import Union, List, Generator |
||||||
|
import utils |
||||||
|
import utils.meta |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
import scipy.signal |
||||||
|
from scipy.fftpack import fft |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
from scipy.stats.stats import pearsonr |
||||||
|
|
||||||
|
from scipy.stats import gaussian_kde |
||||||
|
from scipy.stats import norm |
||||||
|
import logging |
||||||
|
|
||||||
|
from typing import Optional, List, Tuple |
||||||
|
import math |
||||||
|
from analytic_types import AnalyticUnitId, TimeSeries |
||||||
|
from analytic_types.learning_info import LearningInfo |
||||||
|
|
||||||
|
PEARSON_FACTOR = 0.7 |
||||||
|
|
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class GeneralModelState(ModelState): |
||||||
|
def __init__(self, **kwargs): |
||||||
|
super().__init__(**kwargs) |
||||||
|
|
||||||
|
|
||||||
|
class GeneralModel(Model): |
||||||
|
|
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
return ModelType.GENERAL |
||||||
|
|
||||||
|
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||||
|
data = dataframe['value'] |
||||||
|
segment = data[start: end] |
||||||
|
center_ind = start + math.ceil((end - start) / 2) |
||||||
|
return center_ind |
||||||
|
|
||||||
|
def get_state(self, cache: Optional[dict] = None) -> GeneralModelState: |
||||||
|
return GeneralModelState.from_json(cache) |
||||||
|
|
||||||
|
def do_fit( |
||||||
|
self, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
labeled_segments: List[AnalyticSegment], |
||||||
|
deleted_segments: List[AnalyticSegment], |
||||||
|
learning_info: LearningInfo |
||||||
|
) -> None: |
||||||
|
data = utils.cut_dataframe(dataframe) |
||||||
|
data = data['value'] |
||||||
|
last_pattern_center = self.state.pattern_center |
||||||
|
self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) |
||||||
|
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||||
|
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||||
|
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||||
|
|
||||||
|
del_conv_list = [] |
||||||
|
delete_pattern_timestamp = [] |
||||||
|
for segment in deleted_segments: |
||||||
|
del_mid_index = segment.center_index |
||||||
|
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||||
|
deleted_pat = utils.get_interval(data, del_mid_index, self.state.window_size) |
||||||
|
deleted_pat = utils.subtract_min_without_nan(deleted_pat) |
||||||
|
del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state.pattern_model) |
||||||
|
if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) |
||||||
|
|
||||||
|
self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3) |
||||||
|
self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size) |
||||||
|
|
||||||
|
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||||
|
data = utils.cut_dataframe(dataframe) |
||||||
|
data = data['value'] |
||||||
|
pat_data = self.state.pattern_model |
||||||
|
if pat_data.count(0) == len(pat_data): |
||||||
|
raise ValueError('Labeled patterns must not be empty') |
||||||
|
|
||||||
|
window_size = self.state.window_size |
||||||
|
all_corr = utils.get_correlation_gen(data, window_size, pat_data) |
||||||
|
all_corr_peaks = utils.find_peaks(all_corr, window_size * 2) |
||||||
|
filtered = self.__filter_detection(all_corr_peaks, data) |
||||||
|
filtered = list(filtered) |
||||||
|
return [(item, item + window_size * 2) for item in filtered] |
||||||
|
|
||||||
|
def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: |
||||||
|
if not self.state.pattern_center: |
||||||
|
return [] |
||||||
|
window_size = self.state.window_size |
||||||
|
pattern_model = self.state.pattern_model |
||||||
|
for ind, val in segments: |
||||||
|
watch_data = data[ind - window_size: ind + window_size + 1] |
||||||
|
watch_data = utils.subtract_min_without_nan(watch_data) |
||||||
|
convolve_segment = scipy.signal.fftconvolve(watch_data, pattern_model) |
||||||
|
if len(convolve_segment) > 0: |
||||||
|
watch_conv = max(convolve_segment) |
||||||
|
else: |
||||||
|
continue |
||||||
|
if watch_conv < self.state.convolve_min * 0.8 or val < PEARSON_FACTOR: |
||||||
|
continue |
||||||
|
if watch_conv < self.state.conv_del_max * 1.02 and watch_conv > self.state.conv_del_min * 0.98: |
||||||
|
continue |
||||||
|
yield ind |
@ -0,0 +1,9 @@ |
|||||||
|
from models import StairModel, ModelType, ExtremumType |
||||||
|
|
||||||
|
class JumpModel(StairModel): |
||||||
|
|
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
return ModelType.JUMP |
||||||
|
|
||||||
|
def get_extremum_type(self) -> ExtremumType: |
||||||
|
return ExtremumType.MAX |
@ -0,0 +1,230 @@ |
|||||||
|
from analytic_types import AnalyticUnitId, ModelCache, TimeSeries |
||||||
|
from analytic_types.segment import Segment |
||||||
|
from analytic_types.learning_info import LearningInfo |
||||||
|
|
||||||
|
import utils |
||||||
|
import utils.meta |
||||||
|
|
||||||
|
from abc import ABC, abstractmethod |
||||||
|
from attrdict import AttrDict |
||||||
|
from typing import Optional, List, Tuple |
||||||
|
import pandas as pd |
||||||
|
import math |
||||||
|
import logging |
||||||
|
from enum import Enum |
||||||
|
|
||||||
|
class ModelType(Enum): |
||||||
|
JUMP = 'jump' |
||||||
|
DROP = 'drop' |
||||||
|
PEAK = 'peak' |
||||||
|
TROUGH = 'trough' |
||||||
|
GENERAL = 'general' |
||||||
|
|
||||||
|
class ExtremumType(Enum): |
||||||
|
MAX = 'max' |
||||||
|
MIN = 'min' |
||||||
|
|
||||||
|
class AnalyticSegment(Segment): |
||||||
|
''' |
||||||
|
Segment with specific analytics fields used by models: |
||||||
|
- `labeled` / `deleted` flags |
||||||
|
- `from` / `to` / `center` indices |
||||||
|
- `length` |
||||||
|
- `data` |
||||||
|
- etc |
||||||
|
''' |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
from_timestamp: int, |
||||||
|
to_timestamp: int, |
||||||
|
_id: str, |
||||||
|
analytic_unit_id: str, |
||||||
|
labeled: bool, |
||||||
|
deleted: bool, |
||||||
|
message: str, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
center_finder = None |
||||||
|
): |
||||||
|
super().__init__( |
||||||
|
from_timestamp, |
||||||
|
to_timestamp, |
||||||
|
_id, |
||||||
|
analytic_unit_id, |
||||||
|
labeled, |
||||||
|
deleted, |
||||||
|
message |
||||||
|
) |
||||||
|
|
||||||
|
self.from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(self.from_timestamp, unit='ms')) |
||||||
|
self.to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(self.to_timestamp, unit='ms')) |
||||||
|
self.length = abs(self.to_index - self.from_index) |
||||||
|
self.__percent_of_nans = 0 |
||||||
|
|
||||||
|
if callable(center_finder): |
||||||
|
self.center_index = center_finder(dataframe, self.from_index, self.to_index) |
||||||
|
self.pattern_timestamp = dataframe['timestamp'][self.center_index] |
||||||
|
else: |
||||||
|
self.center_index = self.from_index + math.ceil(self.length / 2) |
||||||
|
self.pattern_timestamp = dataframe['timestamp'][self.center_index] |
||||||
|
|
||||||
|
assert len(dataframe['value']) >= self.to_index + 1, \ |
||||||
|
'segment {}-{} out of dataframe length={}'.format(self.from_index, self.to_index + 1, len(dataframe['value'])) |
||||||
|
|
||||||
|
self.data = dataframe['value'][self.from_index: self.to_index + 1] |
||||||
|
|
||||||
|
@property |
||||||
|
def percent_of_nans(self): |
||||||
|
if not self.__percent_of_nans: |
||||||
|
self.__percent_of_nans = self.data.isnull().sum() / len(self.data) |
||||||
|
return self.__percent_of_nans |
||||||
|
|
||||||
|
def convert_nan_to_zero(self): |
||||||
|
nan_list = utils.find_nan_indexes(self.data) |
||||||
|
self.data = utils.nan_to_zero(self.data, nan_list) |
||||||
|
|
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class ModelState(): |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
time_step: int = 0, |
||||||
|
pattern_center: List[int] = None, |
||||||
|
pattern_model: List[float] = None, |
||||||
|
convolve_max: float = 0, |
||||||
|
convolve_min: float = 0, |
||||||
|
window_size: int = 0, |
||||||
|
conv_del_min: float = 0, |
||||||
|
conv_del_max: float = 0 |
||||||
|
): |
||||||
|
self.time_step = time_step |
||||||
|
self.pattern_center = pattern_center if pattern_center is not None else [] |
||||||
|
self.pattern_model = pattern_model if pattern_model is not None else [] |
||||||
|
self.convolve_max = convolve_max |
||||||
|
self.convolve_min = convolve_min |
||||||
|
self.window_size = window_size |
||||||
|
self.conv_del_min = conv_del_min |
||||||
|
self.conv_del_max = conv_del_max |
||||||
|
|
||||||
|
|
||||||
|
class Model(ABC): |
||||||
|
|
||||||
|
HEIGHT_ERROR = 0.1 |
||||||
|
CONV_ERROR = 0.2 |
||||||
|
DEL_CONV_ERROR = 0.02 |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def do_fit( |
||||||
|
self, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
labeled_segments: List[AnalyticSegment], |
||||||
|
deleted_segments: List[AnalyticSegment], |
||||||
|
learning_info: LearningInfo |
||||||
|
) -> None: |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
pass |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: |
||||||
|
pass |
||||||
|
|
||||||
|
def fit(self, dataframe: pd.DataFrame, segments: List[Segment], id: AnalyticUnitId) -> ModelState: |
||||||
|
logging.debug('Start method fit for analytic unit {}'.format(id)) |
||||||
|
data = dataframe['value'] |
||||||
|
max_length = 0 |
||||||
|
labeled = [] |
||||||
|
deleted = [] |
||||||
|
for segment_map in segments: |
||||||
|
if segment_map.labeled or segment_map.deleted: |
||||||
|
segment = AnalyticSegment( |
||||||
|
segment_map.from_timestamp, |
||||||
|
segment_map.to_timestamp, |
||||||
|
segment_map._id, |
||||||
|
segment_map.analytic_unit_id, |
||||||
|
segment_map.labeled, |
||||||
|
segment_map.deleted, |
||||||
|
segment_map.message, |
||||||
|
dataframe, |
||||||
|
self.find_segment_center |
||||||
|
) |
||||||
|
if segment.percent_of_nans > 0.1 or len(segment.data) == 0: |
||||||
|
logging.debug(f'segment {segment.from_index}-{segment.to_index} skip because of invalid data') |
||||||
|
continue |
||||||
|
if segment.percent_of_nans > 0: |
||||||
|
segment.convert_nan_to_zero() |
||||||
|
max_length = max(segment.length, max_length) |
||||||
|
if segment.labeled: labeled.append(segment) |
||||||
|
if segment.deleted: deleted.append(segment) |
||||||
|
|
||||||
|
assert len(labeled) > 0, f'labeled list empty, skip fitting for {id}' |
||||||
|
|
||||||
|
if self.state.window_size == 0: |
||||||
|
self.state.window_size = math.ceil(max_length / 2) if max_length else 0 |
||||||
|
learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, self.get_model_type()) |
||||||
|
self.do_fit(dataframe, labeled, deleted, learning_info) |
||||||
|
logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) |
||||||
|
return self.state |
||||||
|
|
||||||
|
def detect(self, dataframe: pd.DataFrame, id: AnalyticUnitId) -> dict: |
||||||
|
logging.debug('Start method detect for analytic unit {}'.format(id)) |
||||||
|
result = self.do_detect(dataframe) |
||||||
|
segments = [( |
||||||
|
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]), |
||||||
|
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]), |
||||||
|
) for x in result] |
||||||
|
if not self.state: |
||||||
|
logging.warning('Return empty self.state after detect') |
||||||
|
logging.debug('Method detect complete successful for analytic unit {}'.format(id)) |
||||||
|
return { |
||||||
|
'segments': segments, |
||||||
|
'cache': self.state, |
||||||
|
} |
||||||
|
|
||||||
|
def _update_fitting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None: |
||||||
|
state.confidence = float(min(confidences, default = 1.5)) |
||||||
|
state.convolve_min, state.convolve_max = utils.get_min_max(convolve_list, state.window_size) |
||||||
|
state.conv_del_min, state.conv_del_max = utils.get_min_max(del_conv_list, 0) |
||||||
|
if height_list is not None: |
||||||
|
state.height_min, state.height_max = utils.get_min_max(height_list, 0) |
||||||
|
|
||||||
|
def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: ModelType) -> dict: |
||||||
|
logging.debug('Start parsing segments') |
||||||
|
learning_info = LearningInfo() |
||||||
|
data = dataframe['value'] |
||||||
|
for segment in labeled: |
||||||
|
confidence = utils.find_confidence(segment.data)[0] |
||||||
|
learning_info.confidence.append(confidence) |
||||||
|
segment_center = segment.center_index |
||||||
|
learning_info.segment_center_list.append(segment_center) |
||||||
|
learning_info.pattern_timestamp.append(segment.pattern_timestamp) |
||||||
|
aligned_segment = utils.get_interval(data, segment_center, self.state.window_size) |
||||||
|
aligned_segment = utils.subtract_min_without_nan(aligned_segment) |
||||||
|
if len(aligned_segment) == 0: |
||||||
|
logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format( |
||||||
|
segment_center, self.state.window_size, len(data))) |
||||||
|
continue |
||||||
|
learning_info.patterns_list.append(aligned_segment) |
||||||
|
# TODO: use Triangle/Stair types |
||||||
|
if model == ModelType.PEAK or model == ModelType.TROUGH: |
||||||
|
learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) |
||||||
|
learning_info.patterns_value.append(aligned_segment.values.max()) |
||||||
|
if model == ModelType.JUMP or model == ModelType.DROP: |
||||||
|
pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model.value) |
||||||
|
learning_info.pattern_height.append(pattern_height) |
||||||
|
learning_info.pattern_width.append(pattern_length) |
||||||
|
learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) |
||||||
|
logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info)) |
||||||
|
return learning_info |
||||||
|
|
@ -0,0 +1,44 @@ |
|||||||
|
from analytic_types import TimeSeries |
||||||
|
from models import TriangleModel, ModelType |
||||||
|
import utils |
||||||
|
|
||||||
|
import scipy.signal |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
from typing import Optional, List, Tuple |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
class PeakModel(TriangleModel): |
||||||
|
|
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
return ModelType.PEAK |
||||||
|
|
||||||
|
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||||
|
data = dataframe['value'] |
||||||
|
segment = data[start: end] |
||||||
|
return segment.idxmax() |
||||||
|
|
||||||
|
def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: |
||||||
|
pattern_list = [] |
||||||
|
for val in close_patterns: |
||||||
|
max_val = data[val[0]] |
||||||
|
ind = val[0] |
||||||
|
for i in val: |
||||||
|
if data[i] > max_val: |
||||||
|
max_val = data[i] |
||||||
|
ind = i |
||||||
|
pattern_list.append(ind) |
||||||
|
return pattern_list |
||||||
|
|
||||||
|
def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: |
||||||
|
return argrelextrema(data.values, np.greater)[0] |
||||||
|
|
||||||
|
def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: |
||||||
|
return utils.exponential_smoothing(data + self.state.confidence, alpha) |
||||||
|
|
||||||
|
def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, peak_indexes: List[int]) -> List[int]: |
||||||
|
segments = [] |
||||||
|
for idx in peak_indexes: |
||||||
|
if data[idx] > smoothed_data[idx]: |
||||||
|
segments.append(idx) |
||||||
|
return segments |
@ -0,0 +1,147 @@ |
|||||||
|
from models import Model, ModelState, AnalyticSegment, ModelType |
||||||
|
|
||||||
|
from analytic_types import TimeSeries |
||||||
|
from analytic_types.learning_info import LearningInfo |
||||||
|
|
||||||
|
from scipy.fftpack import fft |
||||||
|
from typing import Optional, List |
||||||
|
from enum import Enum |
||||||
|
import scipy.signal |
||||||
|
import utils |
||||||
|
import utils.meta |
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
import operator |
||||||
|
|
||||||
|
POSITIVE_SEGMENT_MEASUREMENT_ERROR = 0.2 |
||||||
|
NEGATIVE_SEGMENT_MEASUREMENT_ERROR = 0.02 |
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class StairModelState(ModelState): |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
confidence: float = 0, |
||||||
|
stair_height: float = 0, |
||||||
|
stair_length: float = 0, |
||||||
|
**kwargs |
||||||
|
): |
||||||
|
super().__init__(**kwargs) |
||||||
|
self.confidence = confidence |
||||||
|
self.stair_height = stair_height |
||||||
|
self.stair_length = stair_length |
||||||
|
|
||||||
|
|
||||||
|
class StairModel(Model): |
||||||
|
|
||||||
|
def get_state(self, cache: Optional[dict] = None) -> StairModelState: |
||||||
|
return StairModelState.from_json(cache) |
||||||
|
|
||||||
|
def get_stair_indexes(self, data: pd.Series, height: float, length: int) -> List[int]: |
||||||
|
"""Get list of start stair segment indexes. |
||||||
|
|
||||||
|
Keyword arguments: |
||||||
|
data -- data, that contains stair (jump or drop) segments |
||||||
|
length -- maximum count of values in the stair |
||||||
|
height -- the difference between stair max_line and min_line(see utils.find_parameters) |
||||||
|
""" |
||||||
|
indexes = [] |
||||||
|
for i in range(len(data) - length - 1): |
||||||
|
is_stair = self.is_stair_in_segment(data.values[i:i + length + 1], height) |
||||||
|
if is_stair == True: |
||||||
|
indexes.append(i) |
||||||
|
return indexes |
||||||
|
|
||||||
|
def is_stair_in_segment(self, segment: np.ndarray, height: float) -> bool: |
||||||
|
if len(segment) < 2: |
||||||
|
return False |
||||||
|
comparison_operator = operator.ge |
||||||
|
if self.get_model_type() == ModelType.DROP: |
||||||
|
comparison_operator = operator.le |
||||||
|
height = -height |
||||||
|
return comparison_operator(max(segment[1:]), segment[0] + height) |
||||||
|
|
||||||
|
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||||
|
data = dataframe['value'] |
||||||
|
segment = data[start: end] |
||||||
|
segment_center_index = utils.find_pattern_center(segment, start, self.get_model_type().value) |
||||||
|
return segment_center_index |
||||||
|
|
||||||
|
def do_fit( |
||||||
|
self, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
labeled_segments: List[AnalyticSegment], |
||||||
|
deleted_segments: List[AnalyticSegment], |
||||||
|
learning_info: LearningInfo |
||||||
|
) -> None: |
||||||
|
data = utils.cut_dataframe(dataframe) |
||||||
|
data = data['value'] |
||||||
|
window_size = self.state.window_size |
||||||
|
last_pattern_center = self.state.pattern_center |
||||||
|
self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) |
||||||
|
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||||
|
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||||
|
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||||
|
height_list = learning_info.patterns_value |
||||||
|
|
||||||
|
del_conv_list = [] |
||||||
|
delete_pattern_timestamp = [] |
||||||
|
for segment in deleted_segments: |
||||||
|
segment_cent_index = segment.center_index |
||||||
|
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||||
|
deleted_stair = utils.get_interval(data, segment_cent_index, window_size) |
||||||
|
deleted_stair = utils.subtract_min_without_nan(deleted_stair) |
||||||
|
del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) |
||||||
|
if len(del_conv_stair) > 0: |
||||||
|
del_conv_list.append(max(del_conv_stair)) |
||||||
|
|
||||||
|
self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) |
||||||
|
self.state.stair_height = int(min(learning_info.pattern_height, default = 1)) |
||||||
|
self.state.stair_length = int(max(learning_info.pattern_width, default = 1)) |
||||||
|
|
||||||
|
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||||
|
data = utils.cut_dataframe(dataframe) |
||||||
|
data = data['value'] |
||||||
|
possible_stairs = self.get_stair_indexes(data, self.state.stair_height, self.state.stair_length + 1) |
||||||
|
result = self.__filter_detection(possible_stairs, data) |
||||||
|
return [(val - 1, val + 1) for val in result] |
||||||
|
|
||||||
|
def __filter_detection(self, segments_indexes: List[int], data: list): |
||||||
|
delete_list = [] |
||||||
|
variance_error = self.state.window_size |
||||||
|
close_segments = utils.close_filtering(segments_indexes, variance_error) |
||||||
|
segments_indexes = utils.best_pattern(close_segments, data, self.get_extremum_type().value) |
||||||
|
if len(segments_indexes) == 0 or len(self.state.pattern_center) == 0: |
||||||
|
return [] |
||||||
|
pattern_data = self.state.pattern_model |
||||||
|
for segment_index in segments_indexes: |
||||||
|
if segment_index <= self.state.window_size or segment_index >= (len(data) - self.state.window_size): |
||||||
|
delete_list.append(segment_index) |
||||||
|
continue |
||||||
|
convol_data = utils.get_interval(data, segment_index, self.state.window_size) |
||||||
|
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||||
|
if len(convol_data) == 0 or percent_of_nans > 0.5: |
||||||
|
delete_list.append(segment_index) |
||||||
|
continue |
||||||
|
elif 0 < percent_of_nans <= 0.5: |
||||||
|
nan_list = utils.find_nan_indexes(convol_data) |
||||||
|
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||||
|
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||||
|
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||||
|
if len(conv) == 0: |
||||||
|
delete_list.append(segment_index) |
||||||
|
continue |
||||||
|
upper_bound = self.state.convolve_max * (1 + POSITIVE_SEGMENT_MEASUREMENT_ERROR) |
||||||
|
lower_bound = self.state.convolve_min * (1 - POSITIVE_SEGMENT_MEASUREMENT_ERROR) |
||||||
|
delete_up_bound = self.state.conv_del_max * (1 + NEGATIVE_SEGMENT_MEASUREMENT_ERROR) |
||||||
|
delete_low_bound = self.state.conv_del_min * (1 - NEGATIVE_SEGMENT_MEASUREMENT_ERROR) |
||||||
|
max_conv = max(conv) |
||||||
|
if max_conv > upper_bound or max_conv < lower_bound: |
||||||
|
delete_list.append(segment_index) |
||||||
|
elif max_conv < delete_up_bound and max_conv > delete_low_bound: |
||||||
|
delete_list.append(segment_index) |
||||||
|
|
||||||
|
for item in delete_list: |
||||||
|
segments_indexes.remove(item) |
||||||
|
segments_indexes = utils.remove_duplicates_and_sort(segments_indexes) |
||||||
|
return segments_indexes |
@ -0,0 +1,119 @@ |
|||||||
|
from analytic_types import AnalyticUnitId, TimeSeries |
||||||
|
from analytic_types.learning_info import LearningInfo |
||||||
|
from models import Model, ModelState, AnalyticSegment |
||||||
|
import utils |
||||||
|
import utils.meta |
||||||
|
|
||||||
|
import scipy.signal |
||||||
|
from scipy.fftpack import fft |
||||||
|
from typing import Optional, List, Tuple |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
|
||||||
|
EXP_SMOOTHING_FACTOR = 0.01 |
||||||
|
|
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class TriangleModelState(ModelState): |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, |
||||||
|
confidence: float = 0, |
||||||
|
height_max: float = 0, |
||||||
|
height_min: float = 0, |
||||||
|
**kwargs |
||||||
|
): |
||||||
|
super().__init__(**kwargs) |
||||||
|
self.confidence = confidence |
||||||
|
self.height_max = height_max |
||||||
|
self.height_min = height_min |
||||||
|
|
||||||
|
class TriangleModel(Model): |
||||||
|
|
||||||
|
def get_state(self, cache: Optional[dict] = None) -> TriangleModelState: |
||||||
|
return TriangleModelState.from_json(cache) |
||||||
|
|
||||||
|
def do_fit( |
||||||
|
self, |
||||||
|
dataframe: pd.DataFrame, |
||||||
|
labeled_segments: List[AnalyticSegment], |
||||||
|
deleted_segments: List[AnalyticSegment], |
||||||
|
learning_info: LearningInfo |
||||||
|
) -> None: |
||||||
|
data = utils.cut_dataframe(dataframe) |
||||||
|
data = data['value'] |
||||||
|
self.state.pattern_center = utils.remove_duplicates_and_sort(self.state.pattern_center + learning_info.segment_center_list) |
||||||
|
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||||
|
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||||
|
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||||
|
height_list = learning_info.patterns_value |
||||||
|
|
||||||
|
del_conv_list = [] |
||||||
|
delete_pattern_width = [] |
||||||
|
delete_pattern_height = [] |
||||||
|
delete_pattern_timestamp = [] |
||||||
|
for segment in deleted_segments: |
||||||
|
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||||
|
deleted = utils.get_interval(data, segment.center_index, self.state.window_size) |
||||||
|
deleted = utils.subtract_min_without_nan(deleted) |
||||||
|
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) |
||||||
|
if len(del_conv): |
||||||
|
del_conv_list.append(max(del_conv)) |
||||||
|
delete_pattern_height.append(utils.find_confidence(deleted)[1]) |
||||||
|
|
||||||
|
self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) |
||||||
|
|
||||||
|
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||||
|
data = utils.cut_dataframe(dataframe) |
||||||
|
data = data['value'] |
||||||
|
|
||||||
|
all_extremum_indexes = self.get_extremum_indexes(data) |
||||||
|
smoothed_data = self.get_smoothed_data(data, self.state.confidence, EXP_SMOOTHING_FACTOR) |
||||||
|
segments = self.get_possible_segments(data, smoothed_data, all_extremum_indexes) |
||||||
|
result = self.__filter_detection(segments, data) |
||||||
|
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence) |
||||||
|
return result |
||||||
|
|
||||||
|
def __filter_detection(self, segments: List[int], data: pd.Series) -> list: |
||||||
|
delete_list = [] |
||||||
|
variance_error = self.state.window_size |
||||||
|
close_patterns = utils.close_filtering(segments, variance_error) |
||||||
|
segments = self.get_best_pattern(close_patterns, data) |
||||||
|
|
||||||
|
if len(segments) == 0 or len(self.state.pattern_model) == 0: |
||||||
|
return [] |
||||||
|
pattern_data = self.state.pattern_model |
||||||
|
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) |
||||||
|
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) |
||||||
|
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) |
||||||
|
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) |
||||||
|
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) |
||||||
|
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) |
||||||
|
for segment in segments: |
||||||
|
if segment > self.state.window_size: |
||||||
|
convol_data = utils.get_interval(data, segment, self.state.window_size) |
||||||
|
convol_data = utils.subtract_min_without_nan(convol_data) |
||||||
|
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||||
|
if percent_of_nans > 0.5: |
||||||
|
delete_list.append(segment) |
||||||
|
continue |
||||||
|
elif 0 < percent_of_nans <= 0.5: |
||||||
|
nan_list = utils.find_nan_indexes(convol_data) |
||||||
|
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||||
|
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||||
|
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||||
|
pattern_height = convol_data.values.max() |
||||||
|
if pattern_height > up_height or pattern_height < low_height: |
||||||
|
delete_list.append(segment) |
||||||
|
continue |
||||||
|
if max(conv) > up_conv or max(conv) < low_conv: |
||||||
|
delete_list.append(segment) |
||||||
|
continue |
||||||
|
if max(conv) < up_del_conv and max(conv) > low_del_conv: |
||||||
|
delete_list.append(segment) |
||||||
|
else: |
||||||
|
delete_list.append(segment) |
||||||
|
for item in delete_list: |
||||||
|
segments.remove(item) |
||||||
|
return set(segments) |
@ -0,0 +1,44 @@ |
|||||||
|
from analytic_types import TimeSeries |
||||||
|
from models import TriangleModel, ModelType |
||||||
|
import utils |
||||||
|
|
||||||
|
import scipy.signal |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
from typing import Optional, List, Tuple |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
class TroughModel(TriangleModel): |
||||||
|
|
||||||
|
def get_model_type(self) -> ModelType: |
||||||
|
return ModelType.TROUGH |
||||||
|
|
||||||
|
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||||
|
data = dataframe['value'] |
||||||
|
segment = data[start: end] |
||||||
|
return segment.idxmin() |
||||||
|
|
||||||
|
def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: |
||||||
|
pattern_list = [] |
||||||
|
for val in close_patterns: |
||||||
|
min_val = data[val[0]] |
||||||
|
ind = val[0] |
||||||
|
for i in val: |
||||||
|
if data[i] < min_val: |
||||||
|
min_val = data[i] |
||||||
|
ind = i |
||||||
|
pattern_list.append(ind) |
||||||
|
return pattern_list |
||||||
|
|
||||||
|
def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: |
||||||
|
return argrelextrema(data.values, np.less)[0] |
||||||
|
|
||||||
|
def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: |
||||||
|
return utils.exponential_smoothing(data - self.state.confidence, alpha) |
||||||
|
|
||||||
|
def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, trough_indexes: List[int]) -> List[int]: |
||||||
|
segments = [] |
||||||
|
for idx in trough_indexes: |
||||||
|
if data[idx] < smoothed_data[idx]: |
||||||
|
segments.append(idx) |
||||||
|
return segments |
@ -0,0 +1,94 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
|
||||||
|
import sys |
||||||
|
import os |
||||||
|
|
||||||
|
|
||||||
|
import config |
||||||
|
import json |
||||||
|
import logging |
||||||
|
import asyncio |
||||||
|
import traceback |
||||||
|
|
||||||
|
import services |
||||||
|
from analytic_unit_manager import AnalyticUnitManager |
||||||
|
|
||||||
|
|
||||||
|
server_service: services.ServerService = None |
||||||
|
data_service: services.DataService = None |
||||||
|
analytic_unit_manager: AnalyticUnitManager = None |
||||||
|
|
||||||
|
logger = logging.getLogger('SERVER') |
||||||
|
|
||||||
|
|
||||||
|
async def handle_task(task: object): |
||||||
|
try: |
||||||
|
task_type = task['type'] |
||||||
|
logger.info("Got {} task with id {}, analyticUnitId {}".format(task_type, task['_id'], task['analyticUnitId'])) |
||||||
|
|
||||||
|
task_result_payload = { |
||||||
|
'_id': task['_id'], |
||||||
|
'task': task_type, |
||||||
|
'analyticUnitId': task['analyticUnitId'], |
||||||
|
'status': "IN_PROGRESS" |
||||||
|
} |
||||||
|
|
||||||
|
if not task_type == 'PUSH': |
||||||
|
message = services.server_service.ServerMessage('TASK_RESULT', task_result_payload) |
||||||
|
await server_service.send_message_to_server(message) |
||||||
|
|
||||||
|
res = await analytic_unit_manager.handle_analytic_task(task) |
||||||
|
res['_id'] = task['_id'] |
||||||
|
|
||||||
|
if not task_type == 'PUSH': |
||||||
|
message = services.server_service.ServerMessage('TASK_RESULT', res) |
||||||
|
await server_service.send_message_to_server(message) |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
error_text = traceback.format_exc() |
||||||
|
logger.error("handle_task Exception: '%s'" % error_text) |
||||||
|
|
||||||
|
async def handle_data(task: object): |
||||||
|
res = await analytic_unit_manager.handle_analytic_task(task) |
||||||
|
|
||||||
|
if res['status'] == 'SUCCESS' and res['payload'] is not None: |
||||||
|
res['_id'] = task['_id'] |
||||||
|
message = services.server_service.ServerMessage('PUSH_DETECT', res) |
||||||
|
await server_service.send_message_to_server(message) |
||||||
|
|
||||||
|
async def handle_message(message: services.ServerMessage): |
||||||
|
if message.method == 'TASK': |
||||||
|
await handle_task(message.payload) |
||||||
|
if message.method == 'DATA': |
||||||
|
await handle_data(message.payload) |
||||||
|
|
||||||
|
def init_services(): |
||||||
|
global server_service |
||||||
|
global data_service |
||||||
|
global analytic_unit_manager |
||||||
|
|
||||||
|
logger.info("Starting services...") |
||||||
|
logger.info("Server...") |
||||||
|
server_service = services.ServerService() |
||||||
|
logger.info("Ok") |
||||||
|
logger.info("Data service...") |
||||||
|
data_service = services.DataService(server_service) |
||||||
|
logger.info("Ok") |
||||||
|
logger.info("Analytic unit manager...") |
||||||
|
analytic_unit_manager = AnalyticUnitManager() |
||||||
|
logger.info("Ok") |
||||||
|
|
||||||
|
async def app_loop(): |
||||||
|
async for message in server_service: |
||||||
|
asyncio.ensure_future(handle_message(message)) |
||||||
|
|
||||||
|
|
||||||
|
def run_server(): |
||||||
|
loop = asyncio.get_event_loop() |
||||||
|
#loop.set_debug(True) |
||||||
|
logger.info("Ok") |
||||||
|
init_services() |
||||||
|
print('Analytics process is running') # we need to print to stdout and flush |
||||||
|
sys.stdout.flush() # because node.js expects it |
||||||
|
|
||||||
|
loop.run_until_complete(app_loop()) |
@ -0,0 +1,2 @@ |
|||||||
|
from services.server_service import ServerService, ServerMessage |
||||||
|
from services.data_service import DataService |
@ -0,0 +1,85 @@ |
|||||||
|
from services.server_service import ServerMessage, ServerService |
||||||
|
|
||||||
|
import json |
||||||
|
import asyncio |
||||||
|
|
||||||
|
""" |
||||||
|
This is how you can save a file: |
||||||
|
|
||||||
|
async def test_file_save(): |
||||||
|
async with data_service.open('filename') as f: |
||||||
|
print('write content') |
||||||
|
await f.write('test string') |
||||||
|
|
||||||
|
async with data_service.open('filename') as f: |
||||||
|
content = await f.load() |
||||||
|
print(content) |
||||||
|
print('test file ok') |
||||||
|
""" |
||||||
|
|
||||||
|
|
||||||
|
LOCK_WAIT_SLEEP_TIMESPAN = 100 # mc |
||||||
|
|
||||||
|
class FileDescriptor: |
||||||
|
def __init__(self, filename: str, data_service): |
||||||
|
self.filename = filename |
||||||
|
self.data_service = data_service |
||||||
|
|
||||||
|
async def write(self, content: str): |
||||||
|
await self.data_service.save_file_content(self, content) |
||||||
|
|
||||||
|
async def load(self) -> str: |
||||||
|
return await self.data_service.load_file_content(self) |
||||||
|
|
||||||
|
async def __aenter__(self): |
||||||
|
await self.data_service.wait_and_lock(self) |
||||||
|
return self |
||||||
|
|
||||||
|
async def __aexit__(self, *exc): |
||||||
|
await self.data_service.unlock(self) |
||||||
|
|
||||||
|
|
||||||
|
class DataService: |
||||||
|
|
||||||
|
def __init__(self, server_service: ServerService): |
||||||
|
"""Creates fs over network via server_service""" |
||||||
|
self.server_service = server_service |
||||||
|
self.locks = set() |
||||||
|
|
||||||
|
def open(self, filename: str) -> FileDescriptor: |
||||||
|
return FileDescriptor(filename, self) |
||||||
|
|
||||||
|
async def wait_and_lock(self, file_descriptor: FileDescriptor): |
||||||
|
filename = file_descriptor.filename |
||||||
|
while True: |
||||||
|
if filename in self.locks: |
||||||
|
asyncio.sleep(LOCK_WAIT_SLEEP_TIMESPAN) |
||||||
|
continue |
||||||
|
else: |
||||||
|
self.locks.add(filename) |
||||||
|
break |
||||||
|
|
||||||
|
async def unlock(self, file_descriptor: FileDescriptor): |
||||||
|
filename = file_descriptor.filename |
||||||
|
self.locks.remove(filename) |
||||||
|
|
||||||
|
async def save_file_content(self, file_descriptor: FileDescriptor, content: str): |
||||||
|
""" Saves json - serializable obj with file_descriptor.filename """ |
||||||
|
self.__check_lock(file_descriptor) |
||||||
|
message_payload = { |
||||||
|
'filename': file_descriptor.filename, |
||||||
|
'content': content |
||||||
|
} |
||||||
|
message = ServerMessage('FILE_SAVE', message_payload) |
||||||
|
await self.server_service.send_request_to_server(message) |
||||||
|
|
||||||
|
async def load_file_content(self, file_descriptor: FileDescriptor) -> str: |
||||||
|
self.__check_lock(file_descriptor) |
||||||
|
message_payload = { 'filename': file_descriptor.filename } |
||||||
|
message = ServerMessage('FILE_LOAD', message_payload) |
||||||
|
return await self.server_service.send_request_to_server(message) |
||||||
|
|
||||||
|
def __check_lock(self, file_descriptor: FileDescriptor): |
||||||
|
filename = file_descriptor.filename |
||||||
|
if filename not in self.locks: |
||||||
|
raise RuntimeError('No lock for file %s' % filename) |
@ -0,0 +1,149 @@ |
|||||||
|
import config |
||||||
|
|
||||||
|
import websockets |
||||||
|
|
||||||
|
import logging |
||||||
|
import json |
||||||
|
import asyncio |
||||||
|
import traceback |
||||||
|
|
||||||
|
import utils.concurrent |
||||||
|
import utils.meta |
||||||
|
|
||||||
|
from typing import Optional |
||||||
|
|
||||||
|
logger = logging.getLogger('SERVER_SERVICE') |
||||||
|
|
||||||
|
|
||||||
|
PARSE_MESSAGE_OR_SAVE_LOOP_INTERRUPTED = False |
||||||
|
SERVER_SOCKET_RECV_LOOP_INTERRUPTED = False |
||||||
|
|
||||||
|
|
||||||
|
@utils.meta.JSONClass |
||||||
|
class ServerMessage: |
||||||
|
def __init__(self, method: str, payload: object = None, request_id: int = None): |
||||||
|
# TODO: add error type / case |
||||||
|
self.method = method |
||||||
|
self.payload = payload |
||||||
|
self.request_id = request_id |
||||||
|
|
||||||
|
|
||||||
|
class ServerService(utils.concurrent.AsyncZmqActor): |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
super(ServerService, self).__init__() |
||||||
|
self.__aiter_inited = False |
||||||
|
# this typing doesn't help vscode, maybe there is a mistake |
||||||
|
self.__server_socket: Optional[websockets.Connect] = None |
||||||
|
self.__request_next_id = 1 |
||||||
|
self.__reconnecting = False |
||||||
|
self.__responses = dict() |
||||||
|
self.start() |
||||||
|
|
||||||
|
async def send_message_to_server(self, message: ServerMessage): |
||||||
|
# Following message will be sent to actor's self._on_message() |
||||||
|
# We do it cuz we created self.__server_socket in self._run() method, |
||||||
|
# which runs in the actor's thread, not the thread we created ServerService |
||||||
|
|
||||||
|
# in theory, we can try to use zmq.proxy: |
||||||
|
# zmq.proxy(self.__actor_socket, self.__server_socket) |
||||||
|
# and do here something like: |
||||||
|
# self.__actor_socket.send_string(json.dumps(message.to_json())) |
||||||
|
await self._put_message_to_thread(json.dumps(message.to_json())) |
||||||
|
|
||||||
|
async def send_request_to_server(self, message: ServerMessage) -> object: |
||||||
|
if message.request_id is not None: |
||||||
|
raise ValueError('Message can`t have request_id before it is scheduled') |
||||||
|
request_id = message.request_id = self.__request_next_id |
||||||
|
self.request_next_id = self.__request_next_id + 1 |
||||||
|
asyncio.ensure_future(self.send_message_to_server(message)) |
||||||
|
# you should await self.__responses[request_id] which should be a task, |
||||||
|
# which you resolve somewhere else |
||||||
|
while request_id not in self.__responses: |
||||||
|
await asyncio.sleep(1) |
||||||
|
response = self.__responses[request_id] |
||||||
|
del self.__responses[request_id] |
||||||
|
return response |
||||||
|
|
||||||
|
def __aiter__(self): |
||||||
|
if self.__aiter_inited: |
||||||
|
raise RuntimeError('Can`t iterate twice') |
||||||
|
__aiter_inited = True |
||||||
|
return self |
||||||
|
|
||||||
|
async def __anext__(self) -> ServerMessage: |
||||||
|
while not PARSE_MESSAGE_OR_SAVE_LOOP_INTERRUPTED: |
||||||
|
thread_message = await self._recv_message_from_thread() |
||||||
|
server_message = self.__parse_message_or_save(thread_message) |
||||||
|
if server_message is None: |
||||||
|
continue |
||||||
|
else: |
||||||
|
return server_message |
||||||
|
|
||||||
|
async def _run_thread(self): |
||||||
|
logger.info("Binding to %s ..." % config.HASTIC_SERVER_URL) |
||||||
|
# TODO: consider to use async context for socket |
||||||
|
await self.__server_socket_recv_loop() |
||||||
|
|
||||||
|
async def _on_message_to_thread(self, message: str): |
||||||
|
if self.__server_socket is None or self.__server_socket.closed: |
||||||
|
await self.__reconnect() |
||||||
|
await self.__server_socket.send(message) |
||||||
|
|
||||||
|
async def __server_socket_recv_loop(self): |
||||||
|
while not SERVER_SOCKET_RECV_LOOP_INTERRUPTED: |
||||||
|
received_string = await self.__reconnect_recv() |
||||||
|
if received_string == 'PING': |
||||||
|
asyncio.ensure_future(self.__handle_ping()) |
||||||
|
else: |
||||||
|
asyncio.ensure_future(self._send_message_from_thread(received_string)) |
||||||
|
|
||||||
|
async def __reconnect(self): |
||||||
|
if not self.__reconnecting: |
||||||
|
self.__reconnecting = True |
||||||
|
else: |
||||||
|
while self.__reconnecting: |
||||||
|
await asyncio.sleep(1) |
||||||
|
return |
||||||
|
|
||||||
|
if not self.__server_socket is None: |
||||||
|
await self.__server_socket.close() |
||||||
|
self.__server_socket = await websockets.connect(config.HASTIC_SERVER_URL) |
||||||
|
first_message = await self.__server_socket.recv() |
||||||
|
if first_message == 'EALREADYEXISTING': |
||||||
|
raise ConnectionError('Can`t connect as a second analytics') |
||||||
|
self.__reconnecting = False |
||||||
|
|
||||||
|
async def __reconnect_recv(self) -> str: |
||||||
|
while not SERVER_SOCKET_RECV_LOOP_INTERRUPTED: |
||||||
|
try: |
||||||
|
if self.__server_socket is None or self.__server_socket.closed: |
||||||
|
await self.__reconnect() |
||||||
|
return await self.__server_socket.recv() |
||||||
|
except (ConnectionRefusedError, websockets.ConnectionClosedError): |
||||||
|
if not self.__server_socket is None: |
||||||
|
await self.__server_socket.close() |
||||||
|
# TODO: this logic increases the number of ThreadPoolExecutor |
||||||
|
self.__server_socket = None |
||||||
|
# TODO: move to config |
||||||
|
reconnect_delay = 3 |
||||||
|
print('connection is refused or lost, trying to reconnect in %s seconds' % reconnect_delay) |
||||||
|
await asyncio.sleep(reconnect_delay) |
||||||
|
raise InterruptedError() |
||||||
|
|
||||||
|
async def __handle_ping(self): |
||||||
|
if self.__server_socket is None or self.__server_socket.closed: |
||||||
|
await self.__reconnect() |
||||||
|
await self.__server_socket.send('PONG') |
||||||
|
|
||||||
|
def __parse_message_or_save(self, text: str) -> Optional[ServerMessage]: |
||||||
|
try: |
||||||
|
message_object = json.loads(text) |
||||||
|
message = ServerMessage.from_json(message_object) |
||||||
|
if message.request_id is not None: |
||||||
|
self.__responses[message_object['requestId']] = message.payload |
||||||
|
return None |
||||||
|
return message |
||||||
|
except Exception: |
||||||
|
error_text = traceback.format_exc() |
||||||
|
logger.error("__handle_message Exception: '%s'" % error_text) |
@ -0,0 +1,4 @@ |
|||||||
|
from utils.common import * |
||||||
|
from utils.time import * |
||||||
|
from utils.dataframe import * |
||||||
|
from utils.meta import * |
@ -0,0 +1,443 @@ |
|||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
import scipy.signal |
||||||
|
from scipy.fftpack import fft |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
from scipy.stats import gaussian_kde |
||||||
|
from scipy.stats.stats import pearsonr |
||||||
|
import math |
||||||
|
from typing import Optional, Union, List, Generator, Tuple |
||||||
|
import utils |
||||||
|
import logging |
||||||
|
from itertools import islice |
||||||
|
from collections import deque |
||||||
|
from analytic_types import TimeSeries |
||||||
|
from analytic_types.segment import Segment |
||||||
|
|
||||||
|
SHIFT_FACTOR = 0.05 |
||||||
|
CONFIDENCE_FACTOR = 0.5 |
||||||
|
SMOOTHING_FACTOR = 5 |
||||||
|
MEASUREMENT_ERROR = 0.05 |
||||||
|
|
||||||
|
|
||||||
|
def exponential_smoothing(series: pd.Series, alpha: float, last_smoothed_value: Optional[float] = None) -> pd.Series: |
||||||
|
if alpha < 0 or alpha > 1: |
||||||
|
raise ValueError('Alpha must be within the boundaries: 0 <= alpha <= 1') |
||||||
|
if len(series) < 2: |
||||||
|
return series |
||||||
|
if last_smoothed_value is None: |
||||||
|
result = [series.values[0]] |
||||||
|
else: |
||||||
|
result = [float(last_smoothed_value)] |
||||||
|
if np.isnan(result): |
||||||
|
result = [0] |
||||||
|
for n in range(1, len(series)): |
||||||
|
if np.isnan(series[n]): |
||||||
|
result.append((1 - alpha) * result[n - 1]) |
||||||
|
series.values[n] = result[n] |
||||||
|
else: |
||||||
|
result.append(alpha * series[n] + (1 - alpha) * result[n - 1]) |
||||||
|
|
||||||
|
assert len(result) == len(series), \ |
||||||
|
f'len of smoothed data {len(result)} != len of original dataset {len(series)}' |
||||||
|
return pd.Series(result, index = series.index) |
||||||
|
|
||||||
|
def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) -> list: |
||||||
|
pattern_list = [] |
||||||
|
right_bound = len(data) - length - 1 |
||||||
|
for i in range(right_bound): |
||||||
|
for x in range(1, length): |
||||||
|
if pattern_type == 'jump': |
||||||
|
if(data[i + x] > data[i] + height): |
||||||
|
pattern_list.append(i) |
||||||
|
elif pattern_type == 'drop': |
||||||
|
if(data[i + x] < data[i] - height): |
||||||
|
pattern_list.append(i) |
||||||
|
return pattern_list |
||||||
|
|
||||||
|
def timestamp_to_index(dataframe: pd.DataFrame, timestamp: int): |
||||||
|
data = dataframe['timestamp'] |
||||||
|
idx, = np.where(data >= timestamp) |
||||||
|
if len(idx) > 0: |
||||||
|
time_ind = int(idx[0]) |
||||||
|
else: |
||||||
|
raise ValueError('Dataframe doesn`t contain timestamp: {}'.format(timestamp)) |
||||||
|
return time_ind |
||||||
|
|
||||||
|
def find_peaks(data: Generator[float, None, None], size: int) -> Generator[float, None, None]: |
||||||
|
window = deque(islice(data, size * 2 + 1)) |
||||||
|
for i, v in enumerate(data, size): |
||||||
|
current = window[size] |
||||||
|
#TODO: remove max() from loop |
||||||
|
if current == max(window) and current != window[size + 1]: |
||||||
|
yield i, current |
||||||
|
window.append(v) |
||||||
|
window.popleft() |
||||||
|
|
||||||
|
def ar_mean(numbers: List[float]): |
||||||
|
return float(sum(numbers)) / max(len(numbers), 1) |
||||||
|
|
||||||
|
def get_av_model(patterns_list: list): |
||||||
|
if not patterns_list: return [] |
||||||
|
patterns_list = get_same_length(patterns_list) |
||||||
|
value_list = list(map(list, zip(*patterns_list))) |
||||||
|
return list(map(ar_mean, value_list)) |
||||||
|
|
||||||
|
def get_same_length(patterns_list: list): |
||||||
|
for index in range(len(patterns_list)): |
||||||
|
if type(patterns_list[index]) == pd.Series: |
||||||
|
patterns_list[index] = patterns_list[index].tolist() |
||||||
|
patterns_list = list(filter(None, patterns_list)) |
||||||
|
max_length = max(map(len, patterns_list)) |
||||||
|
for pat in patterns_list: |
||||||
|
if len(pat) < max_length: |
||||||
|
length_difference = max_length - len(pat) |
||||||
|
added_values = list(0 for _ in range(length_difference)) |
||||||
|
pat.extend(added_values) |
||||||
|
return patterns_list |
||||||
|
|
||||||
|
def close_filtering(pattern_list: List[int], win_size: int) -> TimeSeries: |
||||||
|
if len(pattern_list) == 0: |
||||||
|
return [] |
||||||
|
s = [[pattern_list[0]]] |
||||||
|
k = 0 |
||||||
|
for i in range(1, len(pattern_list)): |
||||||
|
if pattern_list[i] - win_size <= s[k][-1]: |
||||||
|
s[k].append(pattern_list[i]) |
||||||
|
else: |
||||||
|
k += 1 |
||||||
|
s.append([pattern_list[i]]) |
||||||
|
return s |
||||||
|
|
||||||
|
def merge_intersecting_segments(segments: List[Segment], time_step: int) -> List[Segment]: |
||||||
|
''' |
||||||
|
Find intersecting segments in segments list and merge it. |
||||||
|
''' |
||||||
|
if len(segments) < 2: |
||||||
|
return segments |
||||||
|
segments = sorted(segments, key = lambda segment: segment.from_timestamp) |
||||||
|
previous_segment = segments[0] |
||||||
|
for i in range(1, len(segments)): |
||||||
|
if segments[i].from_timestamp <= previous_segment.to_timestamp + time_step: |
||||||
|
segments[i].message = segments[-1].message |
||||||
|
segments[i].from_timestamp = min(previous_segment.from_timestamp, segments[i].from_timestamp) |
||||||
|
segments[i].to_timestamp = max(previous_segment.to_timestamp, segments[i].to_timestamp) |
||||||
|
segments[i - 1] = None |
||||||
|
previous_segment = segments[i] |
||||||
|
segments = [x for x in segments if x is not None] |
||||||
|
return segments |
||||||
|
|
||||||
|
def find_interval(dataframe: pd.DataFrame) -> int: |
||||||
|
if len(dataframe) < 2: |
||||||
|
raise ValueError('Can`t find interval: length of data must be at least 2') |
||||||
|
delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0]) |
||||||
|
return delta |
||||||
|
|
||||||
|
def get_start_and_end_of_segments(segments: List[List[int]]) -> TimeSeries: |
||||||
|
''' |
||||||
|
find start and end of segment: [1, 2, 3, 4] -> [1, 4] |
||||||
|
if segment is 1 index - it will be doubled: [7] -> [7, 7] |
||||||
|
''' |
||||||
|
result = [] |
||||||
|
for segment in segments: |
||||||
|
if len(segment) == 0: |
||||||
|
continue |
||||||
|
elif len(segment) > 1: |
||||||
|
segment = [segment[0], segment[-1]] |
||||||
|
else: |
||||||
|
segment = [segment[0], segment[0]] |
||||||
|
result.append(segment) |
||||||
|
return result |
||||||
|
|
||||||
|
def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list: |
||||||
|
new_pattern_list = [] |
||||||
|
for val in pattern_list: |
||||||
|
max_val = data[val[0]] |
||||||
|
min_val = data[val[0]] |
||||||
|
ind = val[0] |
||||||
|
for i in val: |
||||||
|
if dir == 'max': |
||||||
|
if data[i] > max_val: |
||||||
|
max_val = data[i] |
||||||
|
ind = i |
||||||
|
else: |
||||||
|
if data[i] < min_val: |
||||||
|
min_val = data[i] |
||||||
|
ind = i |
||||||
|
new_pattern_list.append(ind) |
||||||
|
return new_pattern_list |
||||||
|
|
||||||
|
def find_nan_indexes(segment: pd.Series) -> list: |
||||||
|
nan_list = pd.isnull(segment) |
||||||
|
nan_list = np.array(nan_list) |
||||||
|
nan_indexes = np.where(nan_list == True)[0] |
||||||
|
return list(nan_indexes) |
||||||
|
|
||||||
|
def check_nan_values(segment: Union[pd.Series, list]) -> Union[pd.Series, list]: |
||||||
|
nan_list = utils.find_nan_indexes(segment) |
||||||
|
if len(nan_list) > 0: |
||||||
|
segment = utils.nan_to_zero(segment, nan_list) |
||||||
|
return segment |
||||||
|
|
||||||
|
def nan_to_zero(segment: Union[pd.Series, list], nan_list: list) -> Union[pd.Series, list]: |
||||||
|
if type(segment) == pd.Series: |
||||||
|
for val in nan_list: |
||||||
|
segment.values[val] = 0 |
||||||
|
else: |
||||||
|
for val in nan_list: |
||||||
|
segment[val] = 0 |
||||||
|
return segment |
||||||
|
|
||||||
|
def find_confidence(segment: pd.Series) -> (float, float): |
||||||
|
segment = utils.check_nan_values(segment) |
||||||
|
segment_min = min(segment) |
||||||
|
segment_max = max(segment) |
||||||
|
height = segment_max - segment_min |
||||||
|
if height: |
||||||
|
return (CONFIDENCE_FACTOR * height, height) |
||||||
|
else: |
||||||
|
return (0, 0) |
||||||
|
|
||||||
|
def find_width(pattern: pd.Series, selector: bool) -> int: |
||||||
|
pattern = pattern.values |
||||||
|
center = utils.find_extremum_index(pattern, selector) |
||||||
|
pattern_left = pattern[:center] |
||||||
|
pattern_right = pattern[center:] |
||||||
|
left_extremum_index = utils.find_last_extremum(pattern_left, selector) |
||||||
|
right_extremum_index = utils.find_extremum_index(pattern_right, not selector) |
||||||
|
left_width = center - left_extremum_index |
||||||
|
right_width = right_extremum_index + 1 |
||||||
|
return right_width + left_width |
||||||
|
|
||||||
|
def find_last_extremum(segment: np.ndarray, selector: bool) -> int: |
||||||
|
segment = segment[::-1] |
||||||
|
first_extremum_ind = find_extremum_index(segment, not selector) |
||||||
|
last_extremum_ind = len(segment) - first_extremum_ind - 1 |
||||||
|
return last_extremum_ind |
||||||
|
|
||||||
|
def find_extremum_index(segment: np.ndarray, selector: bool) -> int: |
||||||
|
if selector: |
||||||
|
return segment.argmax() |
||||||
|
else: |
||||||
|
return segment.argmin() |
||||||
|
|
||||||
|
def get_interval(data: pd.Series, center: int, window_size: int, normalization = False) -> pd.Series: |
||||||
|
""" |
||||||
|
Get an interval with 2*window_size length |
||||||
|
window_size to the left, window_size to the right of center |
||||||
|
If normalization == True - subtract minimum from the interval |
||||||
|
""" |
||||||
|
if center >= len(data): |
||||||
|
logging.warning('Pattern center {} is out of data with len {}'.format(center, len(data))) |
||||||
|
return [] |
||||||
|
left_bound = center - window_size |
||||||
|
right_bound = center + window_size + 1 |
||||||
|
if left_bound < 0: |
||||||
|
left_bound = 0 |
||||||
|
if right_bound > len(data): |
||||||
|
right_bound = len(data) |
||||||
|
result_interval = data[left_bound: right_bound] |
||||||
|
if normalization: |
||||||
|
result_interval = subtract_min_without_nan(result_interval) |
||||||
|
return result_interval |
||||||
|
|
||||||
|
def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> TimeSeries: |
||||||
|
""" |
||||||
|
Find start and end of patterns for peak |
||||||
|
max_border_factor - final border of pattern |
||||||
|
if reverse == True - segments will be inversed (trough -> peak / peak -> trough) |
||||||
|
""" |
||||||
|
if len(pattern_centers) == 0: |
||||||
|
return [] |
||||||
|
border_list = [] |
||||||
|
window_size = math.ceil(max_border_factor * window_size) |
||||||
|
for center in pattern_centers: |
||||||
|
current_pattern = get_interval(data, center, window_size, True) |
||||||
|
if inverse: |
||||||
|
current_pattern = inverse_segment(current_pattern) |
||||||
|
current_pattern = current_pattern - confidence |
||||||
|
left_segment = current_pattern[:window_size] # a.iloc[a.index < center] |
||||||
|
right_segment = current_pattern[window_size:] # a.iloc[a.index >= center] |
||||||
|
left_border = get_end_of_segment(left_segment, descending = False) |
||||||
|
right_border = get_end_of_segment(right_segment) |
||||||
|
border_list.append((left_border, right_border)) |
||||||
|
return border_list |
||||||
|
|
||||||
|
def get_end_of_segment(segment: pd.Series, skip_positive_values = True, descending = True) -> int: |
||||||
|
""" |
||||||
|
Find end of descending or ascending part of pattern |
||||||
|
Allowable error is 1 index |
||||||
|
""" |
||||||
|
if not descending: |
||||||
|
segment = segment.iloc[::-1] |
||||||
|
if len(segment) == 0: |
||||||
|
return 1 |
||||||
|
for idx in range(1, len(segment) - 1): |
||||||
|
if skip_positive_values and segment.values[idx] > 0: |
||||||
|
continue |
||||||
|
if segment.values[idx] >= segment.values[idx - 1]: |
||||||
|
return segment.index[idx - 1] |
||||||
|
return segment.index[-1] |
||||||
|
|
||||||
|
def inverse_segment(segment: pd.Series) -> pd.Series: |
||||||
|
""" |
||||||
|
Сonvert trough to peak and virce versa |
||||||
|
""" |
||||||
|
if len(segment) > 0: |
||||||
|
rev_val = max(segment.values) |
||||||
|
for idx in range(len(segment)): |
||||||
|
segment.values[idx] = math.fabs(segment.values[idx] - rev_val) |
||||||
|
return segment |
||||||
|
|
||||||
|
def subtract_min_without_nan(segment: pd.Series) -> pd.Series: |
||||||
|
if len(segment) == 0: |
||||||
|
return [] |
||||||
|
nan_list = utils.find_nan_indexes(segment) |
||||||
|
if len(nan_list) > 0: |
||||||
|
return segment |
||||||
|
else: |
||||||
|
segment = segment - min(segment) |
||||||
|
return segment |
||||||
|
|
||||||
|
def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: |
||||||
|
labeled_segment = [] |
||||||
|
convolve_list = [] |
||||||
|
for segment in segments: |
||||||
|
labeled_segment = utils.get_interval(data, segment, window_size) |
||||||
|
labeled_segment = utils.subtract_min_without_nan(labeled_segment) |
||||||
|
labeled_segment = utils.check_nan_values(labeled_segment) |
||||||
|
auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment) |
||||||
|
convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model) |
||||||
|
if len(auto_convolve) > 0: |
||||||
|
convolve_list.append(max(auto_convolve)) |
||||||
|
if len(convolve_segment) > 0: |
||||||
|
convolve_list.append(max(convolve_segment)) |
||||||
|
return convolve_list |
||||||
|
|
||||||
|
def get_correlation_gen(data: pd.Series, window_size: int, pattern_model: List[float]) -> Generator[float, None, None]: |
||||||
|
#Get a new dataset by correlating between a sliding window in data and pattern_model |
||||||
|
for i in range(window_size, len(data) - window_size): |
||||||
|
watch_data = data[i - window_size: i + window_size + 1] |
||||||
|
correlation = pearsonr(watch_data, pattern_model) |
||||||
|
if len(correlation) > 0: |
||||||
|
yield(correlation[0]) |
||||||
|
|
||||||
|
def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: |
||||||
|
labeled_segment = [] |
||||||
|
correlation_list = [] |
||||||
|
p_value_list = [] |
||||||
|
for segment in segments: |
||||||
|
labeled_segment = utils.get_interval(data, segment, window_size) |
||||||
|
labeled_segment = utils.subtract_min_without_nan(labeled_segment) |
||||||
|
labeled_segment = utils.check_nan_values(labeled_segment) |
||||||
|
if len(labeled_segment) == 0 or len(labeled_segment) != len(av_model): |
||||||
|
continue |
||||||
|
correlation = pearsonr(labeled_segment, av_model) |
||||||
|
if len(correlation) > 1: |
||||||
|
correlation_list.append(correlation[0]) |
||||||
|
p_value_list.append(correlation[1]) |
||||||
|
return correlation_list |
||||||
|
|
||||||
|
def get_distribution_density(segment: pd.Series) -> float: |
||||||
|
segment.dropna(inplace = True) |
||||||
|
if len(segment) < 2 or len(segment.nonzero()[0]) == 0: |
||||||
|
return (0, 0, 0) |
||||||
|
min_jump = min(segment) |
||||||
|
max_jump = max(segment) |
||||||
|
pdf = gaussian_kde(segment) |
||||||
|
x = np.linspace(segment.min() - 1, segment.max() + 1, len(segment)) |
||||||
|
y = pdf(x) |
||||||
|
ax_list = list(zip(x, y)) |
||||||
|
ax_list = np.array(ax_list, np.float32) |
||||||
|
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] |
||||||
|
peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] |
||||||
|
try: |
||||||
|
min_peak_index = peaks_kde[0] |
||||||
|
segment_min_line = ax_list[min_peak_index, 0] |
||||||
|
max_peak_index = peaks_kde[1] |
||||||
|
segment_max_line = ax_list[max_peak_index, 0] |
||||||
|
segment_median = ax_list[antipeaks_kde[0], 0] |
||||||
|
except IndexError: |
||||||
|
segment_max_line = max_jump * (1 - SHIFT_FACTOR) |
||||||
|
segment_min_line = min_jump * (1 - SHIFT_FACTOR) |
||||||
|
segment_median = (max_jump - min_jump) / 2 + min_jump |
||||||
|
return segment_median, segment_max_line, segment_min_line |
||||||
|
|
||||||
|
def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]: |
||||||
|
segment = segment_data |
||||||
|
if len(segment_data) > SMOOTHING_FACTOR * 3: |
||||||
|
flat_segment = segment_data.rolling(window = SMOOTHING_FACTOR).mean() |
||||||
|
segment = flat_segment.dropna() |
||||||
|
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment) |
||||||
|
height = 0.95 * (segment_max_line - segment_min_line) |
||||||
|
length = utils.get_pattern_length(segment_data, segment_min_line, segment_max_line, pat_type) |
||||||
|
return height, length |
||||||
|
|
||||||
|
def find_pattern_center(segment_data: pd.Series, segment_from_index: int, pattern_type: str): |
||||||
|
segment_median = utils.get_distribution_density(segment_data)[0] |
||||||
|
cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pattern_type) |
||||||
|
if len(cen_ind) > 0: |
||||||
|
pat_center = cen_ind[0] |
||||||
|
segment_cent_index = pat_center + segment_from_index |
||||||
|
else: |
||||||
|
segment_cent_index = math.ceil((len(segment_data)) / 2) |
||||||
|
return segment_cent_index |
||||||
|
|
||||||
|
def get_pattern_length(segment_data: pd.Series, segment_min_line: float, segment_max_line: float, pat_type: str) -> int: |
||||||
|
# TODO: move function to jump & drop merged model |
||||||
|
segment_max = max(segment_data) |
||||||
|
segment_min = min(segment_data) |
||||||
|
# TODO: use better way |
||||||
|
if segment_min_line <= segment_min: |
||||||
|
segment_min_line = segment_min * (1 + MEASUREMENT_ERROR) |
||||||
|
if segment_max_line >= segment_max: |
||||||
|
segment_max_line = segment_max * (1 - MEASUREMENT_ERROR) |
||||||
|
min_line = [] |
||||||
|
max_line = [] |
||||||
|
for i in range(len(segment_data)): |
||||||
|
min_line.append(segment_min_line) |
||||||
|
max_line.append(segment_max_line) |
||||||
|
min_line = np.array(min_line) |
||||||
|
max_line = np.array(max_line) |
||||||
|
segment_array = np.array(segment_data.tolist()) |
||||||
|
idmin = np.argwhere(np.diff(np.sign(min_line - segment_array)) != 0).reshape(-1) |
||||||
|
idmax = np.argwhere(np.diff(np.sign(max_line - segment_array)) != 0).reshape(-1) |
||||||
|
if len(idmin) > 0 and len(idmax) > 0: |
||||||
|
if pat_type == 'jump': |
||||||
|
result_length = idmax[0] - idmin[-1] + 1 |
||||||
|
elif pat_type == 'drop': |
||||||
|
result_length = idmin[0] - idmax[-1] + 1 |
||||||
|
return result_length if result_length > 0 else 0 |
||||||
|
else: |
||||||
|
return 0 |
||||||
|
|
||||||
|
def pattern_intersection(segment_data: list, median: float, pattern_type: str) -> list: |
||||||
|
center_index = [] |
||||||
|
if pattern_type == 'jump': |
||||||
|
for i in range(1, len(segment_data) - 1): |
||||||
|
if segment_data[i - 1] < median and segment_data[i + 1] > median: |
||||||
|
center_index.append(i) |
||||||
|
elif pattern_type == 'drop': |
||||||
|
for i in range(1, len(segment_data) - 1): |
||||||
|
if segment_data[i - 1] > median and segment_data[i + 1] < median: |
||||||
|
center_index.append(i) |
||||||
|
delete_index = [] |
||||||
|
for i in range(1, len(center_index)): |
||||||
|
if center_index[i] == center_index[i - 1] + 1: |
||||||
|
delete_index.append(i - 1) |
||||||
|
|
||||||
|
return [x for (idx, x) in enumerate(center_index) if idx not in delete_index] |
||||||
|
|
||||||
|
def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: |
||||||
|
data_min = data['value'].min() |
||||||
|
if not np.isnan(data_min) and data_min > 0: |
||||||
|
data['value'] = data['value'] - data_min |
||||||
|
return data |
||||||
|
|
||||||
|
def get_min_max(array: list, default): |
||||||
|
return float(min(array, default=default)), float(max(array, default=default)) |
||||||
|
|
||||||
|
def remove_duplicates_and_sort(array: list) -> list: |
||||||
|
array = list(frozenset(array)) |
||||||
|
array.sort() |
||||||
|
return array |
@ -0,0 +1,130 @@ |
|||||||
|
import asyncio |
||||||
|
import threading |
||||||
|
import zmq |
||||||
|
import zmq.asyncio |
||||||
|
from abc import ABC, abstractmethod |
||||||
|
|
||||||
|
|
||||||
|
# This const defines Thread <-> Actor zmq one-to-one connection |
||||||
|
# We create a seperate zmq context, so zqm address 'inproc://xxx' doesn't matter |
||||||
|
# It is default address and you may want to use AsyncZmqThread another way |
||||||
|
ZMQ_THREAD_ACTOR_ADDR = 'inproc://xxx' |
||||||
|
|
||||||
|
|
||||||
|
# Inherience order (threading.Thread, ABC) is essential. Otherwise it's a MRO error. |
||||||
|
class AsyncZmqThread(threading.Thread, ABC): |
||||||
|
"""Class for wrapping zmq socket into a thread with it's own asyncio event loop |
||||||
|
|
||||||
|
""" |
||||||
|
|
||||||
|
def __init__(self, |
||||||
|
zmq_context: zmq.asyncio.Context, |
||||||
|
zmq_socket_addr: str, |
||||||
|
zmq_socket_type = zmq.PAIR |
||||||
|
): |
||||||
|
super(AsyncZmqThread, self).__init__() |
||||||
|
self._zmq_context = zmq_context # you can use it in child classes |
||||||
|
self.__zmq_socket_addr = zmq_socket_addr |
||||||
|
self.__zmq_socket_type = zmq_socket_type |
||||||
|
self.__asyncio_loop = None |
||||||
|
self.__zmq_socket = None |
||||||
|
|
||||||
|
async def __message_recv_loop(self): |
||||||
|
while True: |
||||||
|
text = await self.__zmq_socket.recv_string() |
||||||
|
asyncio.ensure_future(self._on_message_to_thread(text)) |
||||||
|
|
||||||
|
async def _send_message_from_thread(self, message: str): |
||||||
|
await self.__zmq_socket.send_string(message) |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
async def _on_message_to_thread(self, message: str): |
||||||
|
"""Override this method to receive messages""" |
||||||
|
|
||||||
|
@abstractmethod |
||||||
|
async def _run_thread(self): |
||||||
|
"""Override this method to do some async work. |
||||||
|
This method uses a separate thread. |
||||||
|
|
||||||
|
You can block yourself here if you don't do any await. |
||||||
|
|
||||||
|
Example: |
||||||
|
|
||||||
|
``` |
||||||
|
async def _run_thread(self): |
||||||
|
i = 0 |
||||||
|
while True: |
||||||
|
await asyncio.sleep(1) |
||||||
|
i += 1 |
||||||
|
await self._send_message_from_thread(f'{self.name}: ping {i}') |
||||||
|
``` |
||||||
|
""" |
||||||
|
|
||||||
|
def run(self): |
||||||
|
self.__asyncio_loop = asyncio.new_event_loop() |
||||||
|
asyncio.set_event_loop(self.__asyncio_loop) |
||||||
|
self.__zmq_socket = self._zmq_context.socket(self.__zmq_socket_type) |
||||||
|
self.__zmq_socket.connect(self.__zmq_socket_addr) |
||||||
|
asyncio.ensure_future(self.__message_recv_loop()) |
||||||
|
self.__asyncio_loop.run_until_complete(self._run_thread()) |
||||||
|
|
||||||
|
# TODO: implement stop signal handling |
||||||
|
|
||||||
|
|
||||||
|
class AsyncZmqActor(AsyncZmqThread): |
||||||
|
"""Threaded and Async Actor model based on ZMQ inproc communication |
||||||
|
|
||||||
|
override following: |
||||||
|
``` |
||||||
|
async def _run_thread(self) |
||||||
|
async def _on_message_to_thread(self, message: str) |
||||||
|
``` |
||||||
|
|
||||||
|
both methods run in actor's thread |
||||||
|
|
||||||
|
you can call `self._send_message_from_thread('txt')` |
||||||
|
|
||||||
|
to receive it later in `self._recv_message_from_thread()`. |
||||||
|
|
||||||
|
Example: |
||||||
|
|
||||||
|
``` |
||||||
|
class MyActor(AsyncZmqActor): |
||||||
|
async def _run_thread(self): |
||||||
|
self.counter = 0 |
||||||
|
# runs in a different thread |
||||||
|
await self._send_message_from_thread('some_txt_message_to_actor') |
||||||
|
|
||||||
|
def async _on_message_to_thread(self, message): |
||||||
|
# runs in Thread-actor |
||||||
|
self.counter++ |
||||||
|
|
||||||
|
asyncZmqActor = MyActor() |
||||||
|
asyncZmqActor.start() |
||||||
|
``` |
||||||
|
""" |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
super(AsyncZmqActor, self).__init__(zmq.asyncio.Context(), ZMQ_THREAD_ACTOR_ADDR) |
||||||
|
|
||||||
|
self.__actor_socket = self._zmq_context.socket(zmq.PAIR) |
||||||
|
self.__actor_socket.bind(ZMQ_THREAD_ACTOR_ADDR) |
||||||
|
|
||||||
|
async def _put_message_to_thread(self, message: str): |
||||||
|
"""It "sends" `message` to thread, |
||||||
|
|
||||||
|
but we can't await it's `AsyncZmqThread._on_message_to_thread()` |
||||||
|
|
||||||
|
so it's "put", not "send" |
||||||
|
""" |
||||||
|
await self.__actor_socket.send_string(message) |
||||||
|
|
||||||
|
async def _recv_message_from_thread(self) -> str: |
||||||
|
"""Returns next message ``'txt'`` from thread sent by |
||||||
|
|
||||||
|
``AsyncZmqActor._send_message_from_thread('txt')`` |
||||||
|
|
||||||
|
""" |
||||||
|
return await self.__actor_socket.recv_string() |
||||||
|
|
||||||
|
# TODO: implement graceful stopping |
@ -0,0 +1,63 @@ |
|||||||
|
from itertools import chain |
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
from typing import Generator |
||||||
|
|
||||||
|
def prepare_data(data: list) -> pd.DataFrame: |
||||||
|
""" |
||||||
|
Takes list |
||||||
|
- converts it into pd.DataFrame, |
||||||
|
- converts 'timestamp' column to pd.Datetime, |
||||||
|
- subtracts min value from the dataset |
||||||
|
""" |
||||||
|
data = pd.DataFrame(data, columns=['timestamp', 'value']) |
||||||
|
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms') |
||||||
|
data.fillna(value = np.nan, inplace = True) |
||||||
|
return data |
||||||
|
|
||||||
|
def get_intersected_chunks(data: list, intersection: int, chunk_size: int) -> Generator[list, None, None]: |
||||||
|
""" |
||||||
|
Returns generator that splits dataframe on intersected segments. |
||||||
|
Intersection makes it able to detect pattern that present in dataframe on the border between chunks. |
||||||
|
intersection - length of intersection. |
||||||
|
chunk_size - length of chunk |
||||||
|
""" |
||||||
|
assert chunk_size > 0, 'chunk size must be great than zero' |
||||||
|
assert intersection > 0, 'intersection length must be great than zero' |
||||||
|
|
||||||
|
data_len = len(data) |
||||||
|
|
||||||
|
if data_len <= chunk_size: |
||||||
|
yield data |
||||||
|
return |
||||||
|
|
||||||
|
nonintersected = chunk_size - intersection |
||||||
|
|
||||||
|
offset = 0 |
||||||
|
while True: |
||||||
|
left_values = data_len - offset |
||||||
|
if left_values == 0: |
||||||
|
break |
||||||
|
if left_values <= chunk_size: |
||||||
|
yield data[offset : data_len] |
||||||
|
break |
||||||
|
else: |
||||||
|
yield data[offset: offset + chunk_size] |
||||||
|
offset += min(nonintersected, left_values) |
||||||
|
|
||||||
|
def get_chunks(data: list, chunk_size: int) -> Generator[list, None, None]: |
||||||
|
""" |
||||||
|
Returns generator that splits dataframe on non-intersected segments. |
||||||
|
chunk_size - length of chunk |
||||||
|
""" |
||||||
|
assert chunk_size > 0, 'chunk size must be great than zero' |
||||||
|
|
||||||
|
chunks_iterables = [iter(data)] * chunk_size |
||||||
|
result_chunks = zip(*chunks_iterables) |
||||||
|
partial_chunk_len = len(data) % chunk_size |
||||||
|
|
||||||
|
if partial_chunk_len != 0: |
||||||
|
result_chunks = chain(result_chunks, [data[-partial_chunk_len:]]) |
||||||
|
|
||||||
|
for chunk in result_chunks: |
||||||
|
yield list(chunk) |
@ -0,0 +1,81 @@ |
|||||||
|
from inspect import signature, Parameter |
||||||
|
from functools import wraps |
||||||
|
from typing import Optional, List |
||||||
|
import re |
||||||
|
|
||||||
|
|
||||||
|
CAMEL_REGEX = re.compile(r'([A-Z])') |
||||||
|
UNDERSCORE_REGEX = re.compile(r'_([a-z])') |
||||||
|
|
||||||
|
def camel_to_underscore(name): |
||||||
|
#TODO: need to rename 'from'/'to' to 'from_timestamp'/'to_timestamp' everywhere(in analytics, server, panel) |
||||||
|
if name == 'from' or name == 'to': |
||||||
|
name += '_timestamp' |
||||||
|
return CAMEL_REGEX.sub(lambda x: '_' + x.group(1).lower(), name) |
||||||
|
|
||||||
|
def underscore_to_camel(name): |
||||||
|
if name == 'from_timestamp' or name == 'to_timestamp': |
||||||
|
name = name.replace('_timestamp', '') |
||||||
|
return UNDERSCORE_REGEX.sub(lambda x: x.group(1).upper(), name) |
||||||
|
|
||||||
|
def is_field_private(field_name: str) -> Optional[str]: |
||||||
|
m = re.match(r'_[^(__)]+__', field_name) |
||||||
|
return m is not None |
||||||
|
|
||||||
|
def serialize(obj): |
||||||
|
if hasattr(obj, 'to_json') == True: |
||||||
|
return obj.to_json() |
||||||
|
else: |
||||||
|
return obj |
||||||
|
|
||||||
|
def inited_params(target_init): |
||||||
|
target_params = signature(target_init).parameters.values() |
||||||
|
if len(target_params) < 1: |
||||||
|
raise ValueError('init function mush have at least self parameter') |
||||||
|
if len(target_params) == 1: |
||||||
|
return target_init |
||||||
|
_, *target_params = target_params # we will not use self any more |
||||||
|
|
||||||
|
@wraps(target_init) |
||||||
|
def wrapped_init(wrapped_self, *wrapped_args, **wrapped_kwargs): |
||||||
|
for tp in target_params: |
||||||
|
if tp.default is Parameter.empty: |
||||||
|
continue |
||||||
|
setattr(wrapped_self, tp.name, tp.default) |
||||||
|
|
||||||
|
for tp, v in zip(target_params, wrapped_args): |
||||||
|
setattr(wrapped_self, tp.name, v) |
||||||
|
|
||||||
|
for k, v in wrapped_kwargs.items(): |
||||||
|
setattr(wrapped_self, k, v) |
||||||
|
|
||||||
|
target_init(wrapped_self, *wrapped_args, **wrapped_kwargs) |
||||||
|
|
||||||
|
return wrapped_init |
||||||
|
|
||||||
|
def JSONClass(target_class): |
||||||
|
|
||||||
|
def to_json(self) -> dict: |
||||||
|
""" |
||||||
|
returns a json representation of the class |
||||||
|
where all None - values and private fileds are skipped |
||||||
|
""" |
||||||
|
return { |
||||||
|
underscore_to_camel(k): serialize(v) for k, v in self.__dict__.items() |
||||||
|
if v is not None and not is_field_private(k) |
||||||
|
} |
||||||
|
|
||||||
|
def from_json(json_object: Optional[dict]) -> target_class: |
||||||
|
if json_object is None: |
||||||
|
json_object = {} |
||||||
|
init_object = { camel_to_underscore(k): v for k, v in json_object.items() } |
||||||
|
return target_class(**init_object) |
||||||
|
|
||||||
|
# target_class.__init__ = inited_params(target_class.__init__) |
||||||
|
target_class.to_json = to_json |
||||||
|
target_class.from_json = from_json |
||||||
|
return target_class |
||||||
|
|
||||||
|
class SerializableList(List[dict]): |
||||||
|
def to_json(self): |
||||||
|
return list(map(lambda s: s.to_json(), self)) |
@ -0,0 +1,13 @@ |
|||||||
|
import pandas as pd |
||||||
|
from typing import List |
||||||
|
|
||||||
|
def convert_sec_to_ms(sec) -> int: |
||||||
|
return int(sec) * 1000 |
||||||
|
|
||||||
|
def convert_pd_timestamp_to_ms(timestamp: pd.Timestamp) -> int: |
||||||
|
# TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6 |
||||||
|
return int(timestamp.value) // 1000000 |
||||||
|
|
||||||
|
def convert_series_to_timestamp_list(series: pd.Series) -> List[int]: |
||||||
|
timestamps = map(lambda value: convert_pd_timestamp_to_ms(value), series) |
||||||
|
return list(timestamps) |
@ -0,0 +1,32 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
|
||||||
|
import sys |
||||||
|
import os |
||||||
|
|
||||||
|
if sys.version_info[:3] < (3, 6, 5) or sys.version_info[:2] >= (3, 7): |
||||||
|
sys.stderr.write('Required python is >= 3.6.5 and < 3.7.0 \n') |
||||||
|
sys.stderr.write('Your python version is: %d.%d.%d\n' % sys.version_info[:3]) |
||||||
|
sys.exit(1) |
||||||
|
|
||||||
|
# #TODO: make wrapper script that set PYTHONPATH instead |
||||||
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) |
||||||
|
|
||||||
|
import logging |
||||||
|
|
||||||
|
root_logger = logging.getLogger() |
||||||
|
root_logger.setLevel(logging.DEBUG) |
||||||
|
|
||||||
|
|
||||||
|
logging_formatter = logging.Formatter("%(asctime)s [Analytics] [%(levelname)-5.5s] %(message)s") |
||||||
|
|
||||||
|
logging_handler = logging.StreamHandler(sys.stdout) |
||||||
|
logging_handler.setLevel(logging.DEBUG) |
||||||
|
logging_handler.setFormatter(logging_formatter) |
||||||
|
|
||||||
|
root_logger.addHandler(logging_handler) |
||||||
|
|
||||||
|
|
||||||
|
from server import run_server |
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
run_server() |
@ -0,0 +1 @@ |
|||||||
|
hiddenimports=['pandas._libs.tslibs.timedeltas'] |
@ -0,0 +1 @@ |
|||||||
|
hiddenimports=['scipy._lib.messagestream'] |
@ -0,0 +1,7 @@ |
|||||||
|
attrdict==2.0.0 |
||||||
|
aiounittest==1.1.0 |
||||||
|
numpy==1.14.5 |
||||||
|
pandas==0.20.3 |
||||||
|
pyzmq==18.0.1 |
||||||
|
scipy==1.1.0 |
||||||
|
websockets==8.1 |
@ -0,0 +1,3 @@ |
|||||||
|
#!/bin/bash |
||||||
|
cd .. |
||||||
|
python3.6 -m PyInstaller --paths=analytics/ --additional-hooks-dir=pyinstaller_hooks bin/server |
@ -0,0 +1,4 @@ |
|||||||
|
import sys |
||||||
|
import os |
||||||
|
|
||||||
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) |
@ -0,0 +1,16 @@ |
|||||||
|
from analytic_types import TimeSeriesIndex, TimeSeries2 |
||||||
|
|
||||||
|
import unittest |
||||||
|
|
||||||
|
|
||||||
|
class TestDataset(unittest.TestCase): |
||||||
|
def test_basic_timeseries_index(self): |
||||||
|
tsi = TimeSeriesIndex(['2017-12-31 16:00:00-08:00']) |
||||||
|
self.assertEqual(len(tsi), 1) |
||||||
|
tsi2 = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) |
||||||
|
self.assertEqual(len(tsi2), 3) |
||||||
|
|
||||||
|
def test_basic_timeseries(self): |
||||||
|
tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) |
||||||
|
ts = TimeSeries2([4, 5, 6], tsis) |
||||||
|
self.assertEqual(len(ts), 3) |
@ -0,0 +1,38 @@ |
|||||||
|
import unittest |
||||||
|
import pandas as pd |
||||||
|
import random |
||||||
|
from typing import List |
||||||
|
|
||||||
|
from analytic_types.data_bucket import DataBucket |
||||||
|
from tests.test_dataset import create_list_of_timestamps |
||||||
|
|
||||||
|
class TestBucket(unittest.TestCase): |
||||||
|
|
||||||
|
def test_receive_data(self): |
||||||
|
bucket = DataBucket() |
||||||
|
data_val = list(range(6)) |
||||||
|
timestamp_list = create_list_of_timestamps(len(data_val)) |
||||||
|
for val in data_val: |
||||||
|
bucket.receive_data(get_pd_dataframe([val], [1523889000000 + val])) |
||||||
|
for idx, row in bucket.data.iterrows(): |
||||||
|
self.assertEqual(data_val[idx], row['value']) |
||||||
|
self.assertEqual(timestamp_list[idx], row['timestamp']) |
||||||
|
|
||||||
|
def test_drop_data(self): |
||||||
|
bucket = DataBucket() |
||||||
|
data_val = list(range(10)) |
||||||
|
timestamp_list = create_list_of_timestamps(len(data_val)) |
||||||
|
bucket.receive_data(get_pd_dataframe(data_val, timestamp_list)) |
||||||
|
bucket.drop_data(5) |
||||||
|
expected_data = data_val[5:] |
||||||
|
expected_timestamp = timestamp_list[5:] |
||||||
|
self.assertEqual(expected_data, bucket.data['value'].tolist()) |
||||||
|
self.assertEqual(expected_timestamp, bucket.data['timestamp'].tolist()) |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
unittest.main() |
||||||
|
|
||||||
|
def get_pd_dataframe(value: List[int], timestamp: List[int]) -> pd.DataFrame: |
||||||
|
if len(value) != len(timestamp): |
||||||
|
raise ValueError(f'len(value) should be equal to len(timestamp)') |
||||||
|
return pd.DataFrame({ 'value': value, 'timestamp': timestamp }) |
@ -0,0 +1,386 @@ |
|||||||
|
import unittest |
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
from utils import prepare_data |
||||||
|
import models |
||||||
|
import random |
||||||
|
import scipy.signal |
||||||
|
from typing import List |
||||||
|
|
||||||
|
from analytic_types.segment import Segment |
||||||
|
|
||||||
|
class TestDataset(unittest.TestCase): |
||||||
|
|
||||||
|
def test_models_with_corrupted_dataframe(self): |
||||||
|
data = [[1523889000000 + i, float('nan')] for i in range(10)] |
||||||
|
dataframe = pd.DataFrame(data, columns=['timestamp', 'value']) |
||||||
|
segments = [] |
||||||
|
|
||||||
|
model_instances = [ |
||||||
|
models.JumpModel(), |
||||||
|
models.DropModel(), |
||||||
|
models.GeneralModel(), |
||||||
|
models.PeakModel(), |
||||||
|
models.TroughModel() |
||||||
|
] |
||||||
|
|
||||||
|
for model in model_instances: |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
with self.assertRaises(AssertionError): |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
|
||||||
|
def test_peak_antisegments(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.PeakModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_jump_antisegments(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 9.0, 1.0, 1.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000016, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': True}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.JumpModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_trough_antisegments(self): |
||||||
|
data_val = [9.0, 9.0, 9.0, 9.0, 7.0, 4.0, 7.0, 9.0, 9.0, 9.0, 5.0, 1.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.TroughModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_drop_antisegments(self): |
||||||
|
data_val = [9.0, 9.0, 9.0, 9.0, 9.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 1.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000016, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': True}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.DropModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_general_antisegments(self): |
||||||
|
data_val = [1.0, 2.0, 1.0, 2.0, 5.0, 6.0, 3.0, 2.0, 1.0, 1.0, 8.0, 9.0, 8.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.GeneralModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_jump_empty_segment(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.JumpModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_drop_empty_segment(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.DropModel() |
||||||
|
model.state = model.get_state(None) |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_value_error_dataset_input_should_have_multiple_elements(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,3.0,3.0,2.0,7.0,8.0,9.0,8.0,7.0,6.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000007, 'to': 1523889000011, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.JumpModel() |
||||||
|
model.state = model.get_state(None) |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_prepare_data_for_nonetype(self): |
||||||
|
data = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] |
||||||
|
try: |
||||||
|
data = prepare_data(data) |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_prepare_data_for_nan(self): |
||||||
|
data = [[1523889000000, np.nan], [1523889000001, np.nan], [1523889000002, np.nan]] |
||||||
|
try: |
||||||
|
data = prepare_data(data) |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_prepare_data_output_fon_nan(self): |
||||||
|
data_nan = [[1523889000000, np.nan], [1523889000001, np.nan], [1523889000002, np.nan]] |
||||||
|
data_none = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] |
||||||
|
return_data_nan = prepare_data(data_nan) |
||||||
|
return_data_none = prepare_data(data_none) |
||||||
|
for item in return_data_nan.value: |
||||||
|
self.assertTrue(np.isnan(item)) |
||||||
|
for item in return_data_none.value: |
||||||
|
self.assertTrue(np.isnan(item)) |
||||||
|
|
||||||
|
def test_three_value_segment(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 4.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
model_instances = [ |
||||||
|
models.GeneralModel(), |
||||||
|
models.PeakModel(), |
||||||
|
] |
||||||
|
try: |
||||||
|
for model in model_instances: |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_general_for_two_labeling(self): |
||||||
|
data_val = [1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 2.0, 1.0, 0, 0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
model = models.GeneralModel() |
||||||
|
model.state = model.get_state(None) |
||||||
|
model.fit(dataframe, segments,'test') |
||||||
|
result = len(data_val) + 1 |
||||||
|
for _ in range(2): |
||||||
|
model.do_detect(dataframe) |
||||||
|
max_pattern_index = max(model.do_detect(dataframe)) |
||||||
|
self.assertLessEqual(max_pattern_index[0], result) |
||||||
|
|
||||||
|
|
||||||
|
def test_peak_model_for_cache(self): |
||||||
|
cache = { |
||||||
|
'patternCenter': [1, 6], |
||||||
|
'patternModel': [1, 4, 0], |
||||||
|
'confidence': 2, |
||||||
|
'convolveMax': 8, |
||||||
|
'convolveMin': 7, |
||||||
|
'windowSize': 1, |
||||||
|
'convDelMin': 0, |
||||||
|
'convDelMax': 0, |
||||||
|
'heightMax': 4, |
||||||
|
'heightMin': 4, |
||||||
|
} |
||||||
|
data_val = [2.0, 5.0, 1.0, 1.0, 1.0, 2.0, 5.0, 1.0, 1.0, 2.0, 3.0, 7.0, 1.0, 1.0, 1.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
model = models.PeakModel() |
||||||
|
model.state = model.get_state(cache) |
||||||
|
result = model.fit(dataframe, segments, 'test') |
||||||
|
self.assertEqual(len(result.pattern_center), 3) |
||||||
|
|
||||||
|
def test_trough_model_for_cache(self): |
||||||
|
cache = { |
||||||
|
'patternCenter': [2, 6], |
||||||
|
'patternModel': [5, 0.5, 4], |
||||||
|
'confidence': 2, |
||||||
|
'convolveMax': 8, |
||||||
|
'convolveMin': 7, |
||||||
|
'window_size': 1, |
||||||
|
'convDelMin': 0, |
||||||
|
'convDelMax': 0, |
||||||
|
} |
||||||
|
data_val = [5.0, 5.0, 1.0, 4.0, 5.0, 5.0, 0.0, 4.0, 5.0, 5.0, 6.0, 1.0, 5.0, 5.0, 5.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
model = models.TroughModel() |
||||||
|
model.state = model.get_state(cache) |
||||||
|
result = model.fit(dataframe, segments, 'test') |
||||||
|
self.assertEqual(len(result.pattern_center), 3) |
||||||
|
|
||||||
|
def test_jump_model_for_cache(self): |
||||||
|
cache = { |
||||||
|
'patternCenter': [2, 6], |
||||||
|
'patternModel': [5, 0.5, 4], |
||||||
|
'confidence': 2, |
||||||
|
'convolveMax': 8, |
||||||
|
'convolveMin': 7, |
||||||
|
'window_size': 1, |
||||||
|
'convDelMin': 0, |
||||||
|
'convDelMax': 0, |
||||||
|
} |
||||||
|
data_val = [1.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 4.0, 4.0, 4.0, 4.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 152388900009, 'to': 1523889000013, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
model = models.JumpModel() |
||||||
|
model.state = model.get_state(cache) |
||||||
|
result = model.fit(dataframe, segments, 'test') |
||||||
|
self.assertEqual(len(result.pattern_center), 3) |
||||||
|
|
||||||
|
def test_models_for_pattern_model_cache(self): |
||||||
|
cache = { |
||||||
|
'patternCenter': [4, 12], |
||||||
|
'patternModel': [], |
||||||
|
'confidence': 2, |
||||||
|
'convolveMax': 8, |
||||||
|
'convolveMin': 7, |
||||||
|
'window_size': 2, |
||||||
|
'convDelMin': 0, |
||||||
|
'convDelMax': 0, |
||||||
|
} |
||||||
|
data_val = [5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 6.0, 6.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000024, 'labeled': True, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
|
||||||
|
try: |
||||||
|
model = models.DropModel() |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(cache) |
||||||
|
model.fit(dataframe, segments, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||||
|
|
||||||
|
def test_problem_data_for_random_model(self): |
||||||
|
problem_data = [2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, |
||||||
|
3.0, 3.0, 3.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, |
||||||
|
3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 6.0, 7.0, 8.0, 8.0, 4.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, |
||||||
|
4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, |
||||||
|
4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 5.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, |
||||||
|
2.0, 8.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] |
||||||
|
data = create_dataframe(problem_data) |
||||||
|
cache = { |
||||||
|
'patternCenter': [5, 50], |
||||||
|
'patternModel': [], |
||||||
|
'windowSize': 2, |
||||||
|
'convolveMin': 0, |
||||||
|
'convolveMax': 0, |
||||||
|
'convDelMin': 0, |
||||||
|
'convDelMax': 0, |
||||||
|
} |
||||||
|
max_ws = 20 |
||||||
|
iteration = 1 |
||||||
|
for ws in range(1, max_ws): |
||||||
|
for _ in range(iteration): |
||||||
|
pattern_model = create_random_model(ws) |
||||||
|
convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) |
||||||
|
cache['windowSize'] = ws |
||||||
|
cache['patternModel'] = pattern_model |
||||||
|
cache['convolveMin'] = max(convolve) |
||||||
|
cache['convolveMax'] = max(convolve) |
||||||
|
try: |
||||||
|
model = models.GeneralModel() |
||||||
|
model.state = model.get_state(cache) |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.detect(data, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly with av_model {} and window size {}'.format(model_name, pattern_model, ws)) |
||||||
|
|
||||||
|
def test_random_dataset_for_random_model(self): |
||||||
|
data = create_random_model(random.randint(1, 100)) |
||||||
|
data = create_dataframe(data) |
||||||
|
model_instances = [ |
||||||
|
models.PeakModel(), |
||||||
|
models.TroughModel() |
||||||
|
] |
||||||
|
cache = { |
||||||
|
'patternCenter': [5, 50], |
||||||
|
'patternModel': [], |
||||||
|
'windowSize': 2, |
||||||
|
'convolveMin': 0, |
||||||
|
'convolveMax': 0, |
||||||
|
'confidence': 0, |
||||||
|
'heightMax': 0, |
||||||
|
'heightMin': 0, |
||||||
|
'convDelMin': 0, |
||||||
|
'convDelMax': 0, |
||||||
|
} |
||||||
|
ws = random.randint(1, int(len(data['value']/2))) |
||||||
|
pattern_model = create_random_model(ws) |
||||||
|
convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) |
||||||
|
confidence = 0.2 * (data['value'].max() - data['value'].min()) |
||||||
|
cache['windowSize'] = ws |
||||||
|
cache['patternModel'] = pattern_model |
||||||
|
cache['convolveMin'] = max(convolve) |
||||||
|
cache['convolveMax'] = max(convolve) |
||||||
|
cache['confidence'] = confidence |
||||||
|
cache['heightMax'] = data['value'].max() |
||||||
|
cache['heightMin'] = confidence |
||||||
|
try: |
||||||
|
for model in model_instances: |
||||||
|
model_name = model.__class__.__name__ |
||||||
|
model.state = model.get_state(cache) |
||||||
|
model.detect(data, 'test') |
||||||
|
except ValueError: |
||||||
|
self.fail('Model {} raised unexpectedly with dataset {} and cache {}'.format(model_name, data['value'], cache)) |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
unittest.main() |
||||||
|
|
||||||
|
def create_dataframe(data_val: list) -> pd.DataFrame: |
||||||
|
data_ind = create_list_of_timestamps(len(data_val)) |
||||||
|
data = {'timestamp': data_ind, 'value': data_val} |
||||||
|
dataframe = pd.DataFrame(data) |
||||||
|
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||||
|
return dataframe |
||||||
|
|
||||||
|
def create_list_of_timestamps(length: int) -> List[int]: |
||||||
|
return [1523889000000 + i for i in range(length)] |
||||||
|
|
||||||
|
def create_random_model(window_size: int) -> list: |
||||||
|
return [random.randint(0, 100) for _ in range(window_size * 2 + 1)] |
@ -0,0 +1,265 @@ |
|||||||
|
import unittest |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
from detectors import pattern_detector, threshold_detector, anomaly_detector |
||||||
|
from analytic_types.detector import DetectionResult, ProcessingResult, Bound |
||||||
|
from analytic_types.segment import Segment |
||||||
|
from tests.test_dataset import create_dataframe, create_list_of_timestamps |
||||||
|
from utils import convert_pd_timestamp_to_ms |
||||||
|
|
||||||
|
class TestPatternDetector(unittest.TestCase): |
||||||
|
|
||||||
|
def test_small_dataframe(self): |
||||||
|
|
||||||
|
data = [[0,1], [1,2]] |
||||||
|
dataframe = pd.DataFrame(data, columns=['timestamp', 'values']) |
||||||
|
cache = { 'windowSize': 10 } |
||||||
|
|
||||||
|
detector = pattern_detector.PatternDetector('GENERAL', 'test_id') |
||||||
|
with self.assertRaises(ValueError): |
||||||
|
detector.detect(dataframe, cache) |
||||||
|
|
||||||
|
def test_only_negative_segments(self): |
||||||
|
data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] |
||||||
|
data_ind = [1523889000000 + i for i in range(len(data_val))] |
||||||
|
data = {'timestamp': data_ind, 'value': data_val} |
||||||
|
dataframe = pd.DataFrame(data = data) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': False, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
cache = {} |
||||||
|
detector = pattern_detector.PatternDetector('PEAK', 'test_id') |
||||||
|
excepted_error_message = 'test_id has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' |
||||||
|
|
||||||
|
try: |
||||||
|
detector.train(dataframe, segments, cache) |
||||||
|
except ValueError as e: |
||||||
|
self.assertEqual(str(e), excepted_error_message) |
||||||
|
|
||||||
|
def test_positive_and_negative_segments(self): |
||||||
|
data_val = [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
||||||
|
dataframe = create_dataframe(data_val) |
||||||
|
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}, |
||||||
|
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': False, 'deleted': False}] |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
cache = {} |
||||||
|
detector = pattern_detector.PatternDetector('PEAK', 'test_id') |
||||||
|
try: |
||||||
|
detector.train(dataframe, segments, cache) |
||||||
|
except Exception as e: |
||||||
|
self.fail('detector.train fail with error {}'.format(e)) |
||||||
|
|
||||||
|
class TestThresholdDetector(unittest.TestCase): |
||||||
|
|
||||||
|
def test_invalid_cache(self): |
||||||
|
|
||||||
|
detector = threshold_detector.ThresholdDetector('test_id') |
||||||
|
|
||||||
|
with self.assertRaises(ValueError): |
||||||
|
detector.detect([], None) |
||||||
|
|
||||||
|
with self.assertRaises(ValueError): |
||||||
|
detector.detect([], {}) |
||||||
|
|
||||||
|
|
||||||
|
class TestAnomalyDetector(unittest.TestCase): |
||||||
|
|
||||||
|
def test_detect(self): |
||||||
|
data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] |
||||||
|
data_ind = [1523889000000 + i for i in range(len(data_val))] |
||||||
|
data = {'timestamp': data_ind, 'value': data_val} |
||||||
|
dataframe = pd.DataFrame(data = data) |
||||||
|
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||||
|
cache = { |
||||||
|
'confidence': 2, |
||||||
|
'alpha': 0.1, |
||||||
|
'enableBounds': 'ALL', |
||||||
|
'timeStep': 1 |
||||||
|
} |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
|
||||||
|
detect_result: DetectionResult = detector.detect(dataframe, cache) |
||||||
|
detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) |
||||||
|
result = [{ 'from': 1523889000005.0, 'to': 1523889000005.0 }] |
||||||
|
self.assertEqual(result, detected_segments) |
||||||
|
|
||||||
|
cache = { |
||||||
|
'confidence': 2, |
||||||
|
'alpha': 0.1, |
||||||
|
'enableBounds': 'ALL', |
||||||
|
'timeStep': 1, |
||||||
|
'seasonality': 4, |
||||||
|
'segments': [{ 'from': 1523889000001, 'to': 1523889000002, 'data': [10] }] |
||||||
|
} |
||||||
|
detect_result: DetectionResult = detector.detect(dataframe, cache) |
||||||
|
detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) |
||||||
|
result = [] |
||||||
|
self.assertEqual(result, detected_segments) |
||||||
|
|
||||||
|
def test_process_data(self): |
||||||
|
data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] |
||||||
|
data_ind = [1523889000000 + i for i in range(len(data_val))] |
||||||
|
data = {'timestamp': data_ind, 'value': data_val} |
||||||
|
dataframe = pd.DataFrame(data = data) |
||||||
|
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||||
|
cache = { |
||||||
|
'confidence': 2, |
||||||
|
'alpha': 0.1, |
||||||
|
'enableBounds': 'ALL', |
||||||
|
'timeStep': 1 |
||||||
|
} |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
detect_result: ProcessingResult = detector.process_data(dataframe, cache) |
||||||
|
expected_result = { |
||||||
|
'lowerBound': [ |
||||||
|
(1523889000000, -2.0), |
||||||
|
(1523889000001, -1.9), |
||||||
|
(1523889000002, -1.71), |
||||||
|
(1523889000003, -1.6389999999999998), |
||||||
|
(1523889000004, -1.4750999999999999), |
||||||
|
(1523889000005, -0.5275899999999998), |
||||||
|
(1523889000006, -0.5748309999999996), |
||||||
|
(1523889000007, -0.5173478999999996), |
||||||
|
(1523889000008, -0.5656131099999995) |
||||||
|
], |
||||||
|
'upperBound': [ |
||||||
|
(1523889000000, 2.0), |
||||||
|
(1523889000001, 2.1), |
||||||
|
(1523889000002, 2.29), |
||||||
|
(1523889000003, 2.361), |
||||||
|
(1523889000004, 2.5249), |
||||||
|
(1523889000005, 3.47241), |
||||||
|
(1523889000006, 3.4251690000000004), |
||||||
|
(1523889000007, 3.4826521), |
||||||
|
(1523889000008, 3.4343868900000007) |
||||||
|
]} |
||||||
|
self.assertEqual(detect_result.to_json(), expected_result) |
||||||
|
|
||||||
|
cache = { |
||||||
|
'confidence': 2, |
||||||
|
'alpha': 0.1, |
||||||
|
'enableBounds': 'ALL', |
||||||
|
'timeStep': 1, |
||||||
|
'seasonality': 5, |
||||||
|
'segments': [{ 'from': 1523889000001, 'to': 1523889000002,'data': [1] }] |
||||||
|
} |
||||||
|
detect_result: ProcessingResult = detector.process_data(dataframe, cache) |
||||||
|
expected_result = { |
||||||
|
'lowerBound': [ |
||||||
|
(1523889000000, -2.0), |
||||||
|
(1523889000001, -2.9), |
||||||
|
(1523889000002, -1.71), |
||||||
|
(1523889000003, -1.6389999999999998), |
||||||
|
(1523889000004, -1.4750999999999999), |
||||||
|
(1523889000005, -0.5275899999999998), |
||||||
|
(1523889000006, -1.5748309999999996), |
||||||
|
(1523889000007, -0.5173478999999996), |
||||||
|
(1523889000008, -0.5656131099999995) |
||||||
|
], |
||||||
|
'upperBound': [ |
||||||
|
(1523889000000, 2.0), |
||||||
|
(1523889000001, 3.1), |
||||||
|
(1523889000002, 2.29), |
||||||
|
(1523889000003, 2.361), |
||||||
|
(1523889000004, 2.5249), |
||||||
|
(1523889000005, 3.47241), |
||||||
|
(1523889000006, 4.425169), |
||||||
|
(1523889000007, 3.4826521), |
||||||
|
(1523889000008, 3.4343868900000007) |
||||||
|
]} |
||||||
|
self.assertEqual(detect_result.to_json(), expected_result) |
||||||
|
|
||||||
|
def test_get_seasonality_offset(self): |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
from_timestamp = 1573700973027 |
||||||
|
seasonality = 3600000 |
||||||
|
data_start_time = 1573698780000 |
||||||
|
time_step = 30000 |
||||||
|
detected_offset = detector.get_seasonality_offset(from_timestamp, seasonality, data_start_time, time_step) |
||||||
|
expected_offset = 74 |
||||||
|
self.assertEqual(detected_offset, expected_offset) |
||||||
|
|
||||||
|
def test_segment_generator(self): |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
data = [1, 1, 5, 1, -4, 5, 5, 5, -3, 1] |
||||||
|
timestamps = create_list_of_timestamps(len(data)) |
||||||
|
dataframe = create_dataframe(data) |
||||||
|
upper_bound = pd.Series([2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) |
||||||
|
lower_bound = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) |
||||||
|
segments = list(detector.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds=Bound.ALL)) |
||||||
|
|
||||||
|
segments_borders = list(map(lambda s: [s.from_timestamp, s.to_timestamp], segments)) |
||||||
|
self.assertEqual(segments_borders, [[timestamps[2], timestamps[2]], [timestamps[4], timestamps[8]]]) |
||||||
|
|
||||||
|
def test_consume_data(self): |
||||||
|
cache = { |
||||||
|
'confidence': 2, |
||||||
|
'alpha': 0.1, |
||||||
|
'enableBounds': 'ALL', |
||||||
|
'timeStep': 1 |
||||||
|
} |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
|
||||||
|
detect_result: DetectionResult = None |
||||||
|
for val in range(22): |
||||||
|
value = 1 if val != 10 else 5 |
||||||
|
dataframe = pd.DataFrame({'value': [value], 'timestamp': [1523889000000 + val]}) |
||||||
|
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||||
|
detect_result = detector.consume_data(dataframe, cache) |
||||||
|
|
||||||
|
detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) |
||||||
|
result = [{ 'from': 1523889000010, 'to': 1523889000010 }] |
||||||
|
self.assertEqual(result, detected_segments) |
||||||
|
|
||||||
|
def test_get_segment_bound(self): |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
peak_segment = pd.Series([1,2,3,4,3,2,1]) |
||||||
|
trough_segment = pd.Series([4,3,2,1,2,3,4]) |
||||||
|
expected_peak_segment_results = { |
||||||
|
'max_value': 3, |
||||||
|
'min_value': 1.5 |
||||||
|
} |
||||||
|
expected_trough_segment_results = { |
||||||
|
'max_value': 3.5, |
||||||
|
'min_value': 2.75 |
||||||
|
} |
||||||
|
peak_detector_result_upper = detector.get_segment_bound(peak_segment, Bound.UPPER) |
||||||
|
peak_detector_result_lower = detector.get_segment_bound(peak_segment, Bound.LOWER) |
||||||
|
trough_detector_result_upper = detector.get_segment_bound(trough_segment, Bound.UPPER) |
||||||
|
trough_detector_result_lower = detector.get_segment_bound(trough_segment, Bound.LOWER) |
||||||
|
|
||||||
|
self.assertGreaterEqual( |
||||||
|
max(peak_detector_result_upper), |
||||||
|
expected_peak_segment_results['max_value'] |
||||||
|
) |
||||||
|
self.assertLessEqual( |
||||||
|
max(peak_detector_result_lower), |
||||||
|
expected_peak_segment_results['min_value'] |
||||||
|
) |
||||||
|
self.assertGreaterEqual( |
||||||
|
max(trough_detector_result_upper), |
||||||
|
expected_trough_segment_results['max_value'] |
||||||
|
) |
||||||
|
self.assertLessEqual( |
||||||
|
max(trough_detector_result_lower), |
||||||
|
expected_trough_segment_results['min_value'] |
||||||
|
) |
||||||
|
|
||||||
|
def test_get_segment_bound_corner_cases(self): |
||||||
|
detector = anomaly_detector.AnomalyDetector('test_id') |
||||||
|
empty_segment = pd.Series([]) |
||||||
|
same_values_segment = pd.Series([2,2,2,2,2,2]) |
||||||
|
empty_detector_result_upper = detector.get_segment_bound(empty_segment, Bound.UPPER) |
||||||
|
empty_detector_result_lower = detector.get_segment_bound(empty_segment, Bound.LOWER) |
||||||
|
same_values_detector_result_upper = detector.get_segment_bound(same_values_segment, Bound.UPPER) |
||||||
|
same_values_detector_result_lower = detector.get_segment_bound(same_values_segment, Bound.LOWER) |
||||||
|
|
||||||
|
self.assertEqual(len(empty_detector_result_upper), 0) |
||||||
|
self.assertEqual(len(empty_detector_result_lower), 0) |
||||||
|
self.assertEqual(min(same_values_detector_result_upper), 0) |
||||||
|
self.assertEqual(max(same_values_detector_result_upper), 0) |
||||||
|
self.assertEqual(min(same_values_detector_result_lower), 0) |
||||||
|
self.assertEqual(max(same_values_detector_result_lower), 0) |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
unittest.main() |
@ -0,0 +1,100 @@ |
|||||||
|
from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel |
||||||
|
from models import GeneralModelState |
||||||
|
import utils.meta |
||||||
|
import aiounittest |
||||||
|
from analytic_unit_manager import AnalyticUnitManager |
||||||
|
from collections import namedtuple |
||||||
|
|
||||||
|
TestData = namedtuple('TestData', ['uid', 'type', 'values', 'segments']) |
||||||
|
|
||||||
|
def get_random_id() -> str: |
||||||
|
return str(id(list())) |
||||||
|
|
||||||
|
class TestDataset(aiounittest.AsyncTestCase): |
||||||
|
|
||||||
|
timestep = 50 #ms |
||||||
|
|
||||||
|
def _fill_task(self, uid, data, task_type, analytic_unit_type, segments=None, cache=None): |
||||||
|
task = { |
||||||
|
'analyticUnitId': uid, |
||||||
|
'type': task_type, |
||||||
|
'payload': { |
||||||
|
'data': data, |
||||||
|
'from': data[0][0], |
||||||
|
'to': data[-1][0], |
||||||
|
'analyticUnitType': analytic_unit_type, |
||||||
|
'detector': 'pattern', |
||||||
|
'cache': cache |
||||||
|
}, |
||||||
|
'_id': get_random_id() |
||||||
|
} |
||||||
|
if segments: task['payload']['segments'] = segments |
||||||
|
|
||||||
|
return task |
||||||
|
|
||||||
|
def _convert_values(self, values) -> list: |
||||||
|
from_t = 0 |
||||||
|
to_t = len(values) * self.timestep |
||||||
|
return list(zip(range(from_t, to_t, self.timestep), values)) |
||||||
|
|
||||||
|
def _index_to_test_time(self, idx) -> int: |
||||||
|
return idx * self.timestep |
||||||
|
|
||||||
|
def _get_learn_task(self, test_data): |
||||||
|
uid, analytic_unit_type, values, segments = test_data |
||||||
|
data = self._convert_values(values) |
||||||
|
segments = [{ |
||||||
|
'analyticUnitId': uid, |
||||||
|
'from': self._index_to_test_time(s[0]), |
||||||
|
'to': self._index_to_test_time(s[1]), |
||||||
|
'labeled': True, |
||||||
|
'deleted': False |
||||||
|
} for s in segments] |
||||||
|
return self._fill_task(uid, data, 'LEARN', analytic_unit_type, segments=segments) |
||||||
|
|
||||||
|
def _get_detect_task(self, test_data, cache): |
||||||
|
uid, analytic_unit_type, values, _ = test_data |
||||||
|
data = self._convert_values(values) |
||||||
|
return self._fill_task(uid, data, 'DETECT', analytic_unit_type, cache=cache) |
||||||
|
|
||||||
|
def _get_test_dataset(self, pattern) -> tuple: |
||||||
|
""" |
||||||
|
pattern name: ([dataset values], [list of segments]) |
||||||
|
|
||||||
|
segment - (begin, end) - indexes in dataset values |
||||||
|
returns dataset in format (data: List[int], segments: List[List[int]]) |
||||||
|
""" |
||||||
|
datasets = { |
||||||
|
'PEAK': ([0, 0, 1, 2, 3, 4, 3, 2, 1, 0, 0], [[2, 8]]), |
||||||
|
'JUMP': ([0, 0, 1, 2, 3, 4, 4, 4], [[1, 6]]), |
||||||
|
'DROP': ([4, 4, 4, 3, 2, 1, 0, 0], [[1, 6]]), |
||||||
|
'TROUGH': ([4, 4, 3, 2, 1, 0, 1, 2, 3, 4, 4], [[1, 9]]), |
||||||
|
'GENERAL': ([0, 0, 1, 2, 3, 4, 3, 2, 1, 0, 0], [[2, 8]]) |
||||||
|
} |
||||||
|
return datasets[pattern] |
||||||
|
|
||||||
|
async def _learn(self, task, manager=None) -> dict: |
||||||
|
if not manager: manager = AnalyticUnitManager() |
||||||
|
result = await manager.handle_analytic_task(task) |
||||||
|
return result['payload']['cache'] |
||||||
|
|
||||||
|
async def _detect(self, task, manager=None) -> dict: |
||||||
|
if not manager: manager = AnalyticUnitManager() |
||||||
|
result = await manager.handle_analytic_task(task) |
||||||
|
return result |
||||||
|
|
||||||
|
async def _test_detect(self, test_data, manager=None): |
||||||
|
learn_task = self._get_learn_task(test_data) |
||||||
|
cache = await self._learn(learn_task, manager) |
||||||
|
detect_task = self._get_detect_task(test_data, cache) |
||||||
|
result = await self._detect(detect_task, manager) |
||||||
|
return result |
||||||
|
|
||||||
|
async def test_unit_manager(self): |
||||||
|
test_data = TestData(get_random_id(), 'PEAK', [0,1,2,5,10,5,2,1,1,1,0,0,0,0], [[1,7]]) |
||||||
|
manager = AnalyticUnitManager() |
||||||
|
|
||||||
|
with_manager = await self._test_detect(test_data, manager) |
||||||
|
without_manager = await self._test_detect(test_data) |
||||||
|
self.assertEqual(with_manager, without_manager) |
||||||
|
|
@ -0,0 +1,43 @@ |
|||||||
|
import unittest |
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
import models |
||||||
|
|
||||||
|
class TestModel(unittest.TestCase): |
||||||
|
|
||||||
|
def test_stair_model_get_indexes(self): |
||||||
|
drop_model = models.DropModel() |
||||||
|
jump_model = models.JumpModel() |
||||||
|
drop_data = pd.Series([4, 4, 4, 1, 1, 1, 5, 5, 2, 2, 2]) |
||||||
|
jump_data = pd.Series([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5]) |
||||||
|
jump_data_one_stair = pd.Series([1, 3, 3]) |
||||||
|
drop_data_one_stair = pd.Series([4, 2, 1]) |
||||||
|
height = 2 |
||||||
|
length = 2 |
||||||
|
expected_result = [2, 7] |
||||||
|
drop_model_result = drop_model.get_stair_indexes(drop_data, height, length) |
||||||
|
jump_model_result = jump_model.get_stair_indexes(jump_data, height, length) |
||||||
|
drop_one_stair_result = drop_model.get_stair_indexes(drop_data_one_stair, height, 1) |
||||||
|
jump_one_stair_result = jump_model.get_stair_indexes(jump_data_one_stair, height, 1) |
||||||
|
for val in expected_result: |
||||||
|
self.assertIn(val, drop_model_result) |
||||||
|
self.assertIn(val, jump_model_result) |
||||||
|
self.assertEqual(0, drop_one_stair_result[0]) |
||||||
|
self.assertEqual(0, jump_one_stair_result[0]) |
||||||
|
|
||||||
|
def test_stair_model_get_indexes_corner_cases(self): |
||||||
|
drop_model = models.DropModel() |
||||||
|
jump_model = models.JumpModel() |
||||||
|
empty_data = pd.Series([]) |
||||||
|
nan_data = pd.Series([np.nan, np.nan, np.nan, np.nan]) |
||||||
|
height, length = 2, 2 |
||||||
|
length_zero, height_zero = 0, 0 |
||||||
|
expected_result = [] |
||||||
|
drop_empty_data_result = drop_model.get_stair_indexes(empty_data, height, length) |
||||||
|
drop_nan_data_result = drop_model.get_stair_indexes(nan_data, height_zero, length_zero) |
||||||
|
jump_empty_data_result = jump_model.get_stair_indexes(empty_data, height, length) |
||||||
|
jump_nan_data_result = jump_model.get_stair_indexes(nan_data, height_zero, length_zero) |
||||||
|
self.assertEqual(drop_empty_data_result, expected_result) |
||||||
|
self.assertEqual(drop_nan_data_result, expected_result) |
||||||
|
self.assertEqual(jump_empty_data_result, expected_result) |
||||||
|
self.assertEqual(jump_nan_data_result, expected_result) |
@ -0,0 +1,359 @@ |
|||||||
|
from analytic_types.segment import Segment |
||||||
|
|
||||||
|
import utils |
||||||
|
import unittest |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
import math |
||||||
|
import random |
||||||
|
|
||||||
|
RELATIVE_TOLERANCE = 1e-1 |
||||||
|
|
||||||
|
class TestUtils(unittest.TestCase): |
||||||
|
|
||||||
|
#example test for test's workflow purposes |
||||||
|
def test_segment_parsion(self): |
||||||
|
self.assertTrue(True) |
||||||
|
|
||||||
|
def test_confidence_all_normal_value(self): |
||||||
|
segment = [1, 2, 0, 6, 8, 5, 3] |
||||||
|
utils_result = utils.find_confidence(segment)[0] |
||||||
|
result = 4.0 |
||||||
|
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
|
||||||
|
def test_confidence_all_nan_value(self): |
||||||
|
segment = [np.nan, np.nan, np.nan, np.nan] |
||||||
|
self.assertEqual(utils.find_confidence(segment)[0], 0) |
||||||
|
|
||||||
|
def test_confidence_with_nan_value(self): |
||||||
|
data = [np.nan, np.nan, 0, 8] |
||||||
|
utils_result = utils.find_confidence(data)[0] |
||||||
|
result = 4.0 |
||||||
|
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
|
||||||
|
def test_interval_all_normal_value(self): |
||||||
|
data = [1, 2, 1, 2, 4, 1, 2, 4, 5, 6] |
||||||
|
data = pd.Series(data) |
||||||
|
center = 4 |
||||||
|
window_size = 2 |
||||||
|
result = [1, 2, 4, 1, 2] |
||||||
|
self.assertEqual(list(utils.get_interval(data, center, window_size)), result) |
||||||
|
|
||||||
|
def test_interval_wrong_ws(self): |
||||||
|
data = [1, 2, 4, 1, 2, 4] |
||||||
|
data = pd.Series(data) |
||||||
|
center = 3 |
||||||
|
window_size = 6 |
||||||
|
result = [1, 2, 4, 1, 2, 4] |
||||||
|
self.assertEqual(list(utils.get_interval(data, center, window_size)), result) |
||||||
|
|
||||||
|
def test_subtract_min_without_nan(self): |
||||||
|
segment = [1, 2, 4, 1, 2, 4] |
||||||
|
segment = pd.Series(segment) |
||||||
|
result = [0, 1, 3, 0, 1, 3] |
||||||
|
utils_result = list(utils.subtract_min_without_nan(segment)) |
||||||
|
self.assertEqual(utils_result, result) |
||||||
|
|
||||||
|
def test_subtract_min_with_nan(self): |
||||||
|
segment = [np.nan, 2, 4, 1, 2, 4] |
||||||
|
segment = pd.Series(segment) |
||||||
|
result = [2, 4, 1, 2, 4] |
||||||
|
utils_result = list(utils.subtract_min_without_nan(segment)[1:]) |
||||||
|
self.assertEqual(utils_result, result) |
||||||
|
|
||||||
|
def test_get_convolve(self): |
||||||
|
data = [1, 2, 3, 2, 2, 0, 2, 3, 4, 3, 2, 1, 1, 2, 3, 4, 3, 2, 0] |
||||||
|
data = pd.Series(data) |
||||||
|
pattern_index = [2, 8, 15] |
||||||
|
window_size = 2 |
||||||
|
av_model = [1, 2, 3, 2, 1] |
||||||
|
result = [] |
||||||
|
self.assertNotEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) |
||||||
|
|
||||||
|
def test_get_convolve_with_nan(self): |
||||||
|
data = [1, 2, 3, 2, np.nan, 0, 2, 3, 4, np.nan, 2, 1, 1, 2, 3, 4, 3, np.nan, 0] |
||||||
|
data = pd.Series(data) |
||||||
|
pattern_index = [2, 8, 15] |
||||||
|
window_size = 2 |
||||||
|
av_model = [1, 2, 3, 2, 1] |
||||||
|
result = utils.get_convolve(pattern_index, av_model, data, window_size) |
||||||
|
for val in result: |
||||||
|
self.assertFalse(np.isnan(val)) |
||||||
|
|
||||||
|
def test_get_convolve_empty_data(self): |
||||||
|
data = [] |
||||||
|
pattern_index = [] |
||||||
|
window_size = 2 |
||||||
|
window_size_zero = 0 |
||||||
|
av_model = [] |
||||||
|
result = [] |
||||||
|
self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) |
||||||
|
self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size_zero), result) |
||||||
|
|
||||||
|
def test_find_jump_parameters_center(self): |
||||||
|
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] |
||||||
|
segment = pd.Series(segment) |
||||||
|
jump_center = [10, 11] |
||||||
|
self.assertIn(utils.find_pattern_center(segment, 0, 'jump'), jump_center) |
||||||
|
|
||||||
|
def test_find_jump_parameters_height(self): |
||||||
|
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] |
||||||
|
segment = pd.Series(segment) |
||||||
|
jump_height = [3.5, 4] |
||||||
|
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[0]) |
||||||
|
self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[1]) |
||||||
|
|
||||||
|
def test_find_jump_parameters_length(self): |
||||||
|
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] |
||||||
|
segment = pd.Series(segment) |
||||||
|
jump_length = 2 |
||||||
|
self.assertEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_length) |
||||||
|
|
||||||
|
def test_find_drop_parameters_center(self): |
||||||
|
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
||||||
|
segment = pd.Series(segment) |
||||||
|
drop_center = [14, 15, 16] |
||||||
|
self.assertIn(utils.find_pattern_center(segment, 0, 'drop'), drop_center) |
||||||
|
|
||||||
|
def test_find_drop_parameters_height(self): |
||||||
|
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
||||||
|
segment = pd.Series(segment) |
||||||
|
drop_height = [3.5, 4] |
||||||
|
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[0]) |
||||||
|
self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[1]) |
||||||
|
|
||||||
|
def test_find_drop_parameters_length(self): |
||||||
|
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
||||||
|
segment = pd.Series(segment) |
||||||
|
drop_length = 2 |
||||||
|
self.assertEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_length) |
||||||
|
|
||||||
|
def test_get_av_model_empty_data(self): |
||||||
|
patterns_list = [] |
||||||
|
result = [] |
||||||
|
self.assertEqual(utils.get_av_model(patterns_list), result) |
||||||
|
|
||||||
|
def test_get_av_model_normal_data(self): |
||||||
|
patterns_list = [[1, 1, 1], [2, 2, 2],[3,3,3]] |
||||||
|
result = [2.0, 2.0, 2.0] |
||||||
|
self.assertEqual(utils.get_av_model(patterns_list), result) |
||||||
|
|
||||||
|
def test_get_distribution_density(self): |
||||||
|
segment = [1, 1, 1, 3, 5, 5, 5] |
||||||
|
segment = pd.Series(segment) |
||||||
|
result = (3, 5, 1) |
||||||
|
self.assertEqual(utils.get_distribution_density(segment), result) |
||||||
|
|
||||||
|
def test_get_distribution_density_right(self): |
||||||
|
data = [1.0, 5.0, 5.0, 4.0] |
||||||
|
data = pd.Series(data) |
||||||
|
median = 3.0 |
||||||
|
max_line = 5.0 |
||||||
|
min_line = 1.0 |
||||||
|
utils_result = utils.get_distribution_density(data) |
||||||
|
self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
|
||||||
|
def test_get_distribution_density_left(self): |
||||||
|
data = [1.0, 1.0, 2.0, 1.0, 5.0] |
||||||
|
data = pd.Series(data) |
||||||
|
median = 3.0 |
||||||
|
max_line = 5.0 |
||||||
|
min_line = 1.0 |
||||||
|
utils_result = utils.get_distribution_density(data) |
||||||
|
self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) |
||||||
|
|
||||||
|
def test_get_distribution_density_short_data(self): |
||||||
|
data = [1.0, 5.0] |
||||||
|
data = pd.Series(data) |
||||||
|
segment = [1.0] |
||||||
|
segment = pd.Series(segment) |
||||||
|
utils_result_data = utils.get_distribution_density(data) |
||||||
|
utils_result_segment = utils.get_distribution_density(segment) |
||||||
|
self.assertEqual(len(utils_result_data), 3) |
||||||
|
self.assertEqual(utils_result_segment, (0, 0, 0)) |
||||||
|
|
||||||
|
def test_get_distribution_density_with_nans(self): |
||||||
|
segment = [np.NaN, 1, 1, 1, np.NaN, 3, 5, 5, 5, np.NaN] |
||||||
|
segment = pd.Series(segment) |
||||||
|
result = (3, 5, 1) |
||||||
|
self.assertEqual(utils.get_distribution_density(segment), result) |
||||||
|
|
||||||
|
def test_find_pattern_jump_center(self): |
||||||
|
data = [1.0, 1.0, 1.0, 5.0, 5.0, 5.0] |
||||||
|
data = pd.Series(data) |
||||||
|
median = 3.0 |
||||||
|
result = 3 |
||||||
|
self.assertEqual(result, utils.find_pattern_center(data, 0, 'jump')) |
||||||
|
|
||||||
|
def test_get_convolve_wrong_index(self): |
||||||
|
data = [1.0, 5.0, 2.0, 1.0, 6.0, 2.0] |
||||||
|
data = pd.Series(data) |
||||||
|
segemnts = [1, 11] |
||||||
|
av_model = [0.0, 4.0, 0.0] |
||||||
|
window_size = 1 |
||||||
|
try: |
||||||
|
utils.get_convolve(segemnts, av_model, data, window_size) |
||||||
|
except ValueError: |
||||||
|
self.fail('Method get_convolve raised unexpectedly') |
||||||
|
|
||||||
|
def test_get_av_model_for_different_length(self): |
||||||
|
patterns_list = [[1.0, 1.0, 2.0], [4.0, 4.0], [2.0, 2.0, 2.0], [3.0, 3.0], []] |
||||||
|
try: |
||||||
|
utils.get_av_model(patterns_list) |
||||||
|
except ValueError: |
||||||
|
self.fail('Method get_convolve raised unexpectedly') |
||||||
|
|
||||||
|
def test_find_nan_indexes(self): |
||||||
|
data = [1, 1, 1, 0, 0, np.nan, None, []] |
||||||
|
data = pd.Series(data) |
||||||
|
result = [5, 6] |
||||||
|
self.assertEqual(utils.find_nan_indexes(data), result) |
||||||
|
|
||||||
|
def test_find_nan_indexes_normal_values(self): |
||||||
|
data = [1, 1, 1, 0, 0, 0, 1, 1] |
||||||
|
data = pd.Series(data) |
||||||
|
result = [] |
||||||
|
self.assertEqual(utils.find_nan_indexes(data), result) |
||||||
|
|
||||||
|
def test_find_nan_indexes_empty_values(self): |
||||||
|
data = [] |
||||||
|
result = [] |
||||||
|
self.assertEqual(utils.find_nan_indexes(data), result) |
||||||
|
|
||||||
|
def test_create_correlation_data(self): |
||||||
|
data = [random.randint(10, 999) for _ in range(10000)] |
||||||
|
data = pd.Series(data) |
||||||
|
pattern_model = [100, 200, 500, 300, 100] |
||||||
|
ws = 2 |
||||||
|
result = 6000 |
||||||
|
corr_data = utils.get_correlation_gen(data, ws, pattern_model) |
||||||
|
corr_data = list(corr_data) |
||||||
|
self.assertGreaterEqual(len(corr_data), result) |
||||||
|
|
||||||
|
def test_inverse_segment(self): |
||||||
|
data = pd.Series([1,2,3,4,3,2,1]) |
||||||
|
result = pd.Series([3,2,1,0,1,2,3]) |
||||||
|
utils_result = utils.inverse_segment(data) |
||||||
|
for ind, val in enumerate(utils_result): |
||||||
|
self.assertEqual(val, result[ind]) |
||||||
|
|
||||||
|
def test_get_end_of_segment_equal(self): |
||||||
|
data = pd.Series([5,4,3,2,1,0,0,0]) |
||||||
|
result_list = [4, 5, 6] |
||||||
|
self.assertIn(utils.get_end_of_segment(data, False), result_list) |
||||||
|
|
||||||
|
def test_get_end_of_segment_greater(self): |
||||||
|
data = pd.Series([5,4,3,2,1,0,1,2,3]) |
||||||
|
result_list = [4, 5, 6] |
||||||
|
self.assertIn(utils.get_end_of_segment(data, False), result_list) |
||||||
|
|
||||||
|
def test_get_borders_of_peaks(self): |
||||||
|
data = pd.Series([1,0,1,2,3,2,1,0,0,1,2,3,4,3,2,2,1,0,1,2,3,4,5,3,2,1,0]) |
||||||
|
pattern_center = [4, 12, 22] |
||||||
|
ws = 3 |
||||||
|
confidence = 1.5 |
||||||
|
result = [(1, 7), (9, 15), (19, 25)] |
||||||
|
self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence), result) |
||||||
|
|
||||||
|
def test_get_borders_of_peaks_for_trough(self): |
||||||
|
data = pd.Series([4,4,5,5,3,1,3,5,5,6,3,2]) |
||||||
|
pattern_center = [5] |
||||||
|
ws = 5 |
||||||
|
confidence = 3 |
||||||
|
result = [(3, 7)] |
||||||
|
self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence, inverse = True), result) |
||||||
|
|
||||||
|
def test_get_start_and_end_of_segments(self): |
||||||
|
segments = [[1, 2, 3, 4], [5, 6, 7], [8], [], [12, 12]] |
||||||
|
result = [[1, 4], [5, 7], [8, 8], [12, 12]] |
||||||
|
utils_result = utils.get_start_and_end_of_segments(segments) |
||||||
|
for got, expected in zip(utils_result, result): |
||||||
|
self.assertEqual(got, expected) |
||||||
|
|
||||||
|
def test_get_start_and_end_of_segments_empty(self): |
||||||
|
segments = [] |
||||||
|
result = [] |
||||||
|
utils_result = utils.get_start_and_end_of_segments(segments) |
||||||
|
self.assertEqual(result, utils_result) |
||||||
|
|
||||||
|
def test_merge_intersecting_segments(self): |
||||||
|
test_cases = [ |
||||||
|
{ |
||||||
|
'index': [Segment(10, 20), Segment(30, 40)], |
||||||
|
'result': [[10, 20], [30, 40]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(10, 20), Segment(13, 23), Segment(15, 17), Segment(20, 40)], |
||||||
|
'result': [[10, 40]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [], |
||||||
|
'result': [], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(10, 20)], |
||||||
|
'result': [[10, 20]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(10, 20), Segment(13, 23), Segment(25, 30), Segment(35, 40)], |
||||||
|
'result': [[10, 23], [25, 30], [35, 40]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(10, 50), Segment(5, 40), Segment(15, 25), Segment(6, 50)], |
||||||
|
'result': [[5, 50]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(5, 10), Segment(10, 20), Segment(25, 50)], |
||||||
|
'result': [[5, 20], [25, 50]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(20, 40), Segment(10, 15), Segment(50, 60)], |
||||||
|
'result': [[10, 15], [20, 40], [50, 60]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(20, 40), Segment(10, 20), Segment(50, 60)], |
||||||
|
'result': [[10, 40], [50, 60]], |
||||||
|
'step': 0, |
||||||
|
}, |
||||||
|
{ |
||||||
|
'index': [Segment(10, 10), Segment(20, 20), Segment(30, 30)], |
||||||
|
'result': [[10, 30]], |
||||||
|
'step': 10, |
||||||
|
}, |
||||||
|
] |
||||||
|
|
||||||
|
for case in test_cases: |
||||||
|
utils_result = utils.merge_intersecting_segments(case['index'], case['step']) |
||||||
|
for got, expected in zip(utils_result, case['result']): |
||||||
|
self.assertEqual(got.from_timestamp, expected[0]) |
||||||
|
self.assertEqual(got.to_timestamp, expected[1]) |
||||||
|
|
||||||
|
def test_serialize(self): |
||||||
|
segment_list = [Segment(100,200)] |
||||||
|
serialize_list = utils.meta.SerializableList(segment_list) |
||||||
|
meta_result = utils.meta.serialize(serialize_list) |
||||||
|
expected_result = [{ 'from': 100, 'to': 200 }] |
||||||
|
self.assertEqual(meta_result, expected_result) |
||||||
|
|
||||||
|
def test_remove_duplicates_and_sort(self): |
||||||
|
a1 = [1, 3, 5] |
||||||
|
a2 = [8, 3, 6] |
||||||
|
expected_result = [1, 3, 5, 6, 8] |
||||||
|
utils_result = utils.remove_duplicates_and_sort(a1+a2) |
||||||
|
self.assertEqual(utils_result, expected_result) |
||||||
|
self.assertEqual([], []) |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
unittest.main() |
@ -0,0 +1,43 @@ |
|||||||
|
import unittest |
||||||
|
from utils import get_intersected_chunks, get_chunks |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
|
||||||
|
class TestUtils(unittest.TestCase): |
||||||
|
|
||||||
|
def test_chunks_generator(self): |
||||||
|
intersection = 2 |
||||||
|
chunk_size = 4 |
||||||
|
|
||||||
|
cases = [ |
||||||
|
(list(range(8)), [[0,1,2,3], [2,3,4,5], [4,5,6,7]]), |
||||||
|
([], [[]]), |
||||||
|
(list(range(1)), [[0]]), |
||||||
|
(list(range(4)), [[0,1,2,3]]), |
||||||
|
(list(range(9)), [[0,1,2,3], [2,3,4,5], [4,5,6,7], [6,7,8]]) |
||||||
|
] |
||||||
|
|
||||||
|
for tested, expected in cases: |
||||||
|
tested_chunks = get_intersected_chunks(tested, intersection, chunk_size) |
||||||
|
self.assertSequenceEqual(tuple(tested_chunks), expected) |
||||||
|
|
||||||
|
|
||||||
|
def test_non_intersected_chunks(self): |
||||||
|
chunk_size = 4 |
||||||
|
|
||||||
|
cases = [ |
||||||
|
(tuple(range(12)), [[0,1,2,3], [4,5,6,7], [8,9,10,11]]), |
||||||
|
(tuple(range(9)), [[0,1,2,3], [4,5,6,7], [8]]), |
||||||
|
(tuple(range(10)), [[0,1,2,3], [4,5,6,7], [8,9]]), |
||||||
|
(tuple(range(11)), [[0,1,2,3], [4,5,6,7], [8,9,10]]), |
||||||
|
([], []), |
||||||
|
(tuple(range(1)), [[0]]), |
||||||
|
(tuple(range(4)), [[0,1,2,3]]) |
||||||
|
] |
||||||
|
|
||||||
|
for tested, expected in cases: |
||||||
|
tested_chunks = list(get_chunks(tested, chunk_size)) |
||||||
|
self.assertSequenceEqual(tested_chunks, expected) |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
unittest.main() |
@ -0,0 +1,122 @@ |
|||||||
|
import sys |
||||||
|
ANALYTICS_PATH = '../analytics' |
||||||
|
TESTS_PATH = '../tests' |
||||||
|
sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) |
||||||
|
|
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
import utils |
||||||
|
import test_dataset |
||||||
|
from analytic_types.segment import Segment |
||||||
|
from detectors import pattern_detector, threshold_detector, anomaly_detector |
||||||
|
|
||||||
|
# TODO: get_dataset |
||||||
|
# TODO: get_segment |
||||||
|
PEAK_DATASETS = [] |
||||||
|
# dataset with 3 peaks |
||||||
|
TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) |
||||||
|
# TODO: more convenient way to specify labeled segments |
||||||
|
POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}] |
||||||
|
NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}] |
||||||
|
|
||||||
|
class TesterSegment(): |
||||||
|
|
||||||
|
def __init__(self, start: int, end: int, labeled: bool): |
||||||
|
self.start = start |
||||||
|
self.end = end |
||||||
|
self.labeled = labeled |
||||||
|
|
||||||
|
def get_segment(self): |
||||||
|
return { |
||||||
|
'_id': 'q', |
||||||
|
'analyticUnitId': 'q', |
||||||
|
'from': self.start, |
||||||
|
'to': self.end, |
||||||
|
'labeled': self.labeled, |
||||||
|
'deleted': not self.labeled |
||||||
|
} |
||||||
|
|
||||||
|
class Metric(): |
||||||
|
|
||||||
|
def __init__(self, expected_result, detector_result): |
||||||
|
self.expected_result = expected_result |
||||||
|
self.detector_result = detector_result['segments'] |
||||||
|
|
||||||
|
def get_amount(self): |
||||||
|
return len(self.detector_result) / len(self.expected_result) |
||||||
|
|
||||||
|
def get_accuracy(self): |
||||||
|
correct_segment = 0 |
||||||
|
invalid_segment = 0 |
||||||
|
for segment in self.detector_result: |
||||||
|
current_cs = correct_segment |
||||||
|
for pattern in self.expected_result: |
||||||
|
if pattern['from'] <= segment['from'] and pattern['to'] >= segment['to']: |
||||||
|
correct_segment += 1 |
||||||
|
break |
||||||
|
if correct_segment == current_cs: |
||||||
|
invalid_segment += 1 |
||||||
|
non_detected = len(self.expected_result) - correct_segment |
||||||
|
return (correct_segment, invalid_segment, non_detected) |
||||||
|
|
||||||
|
class ModelData(): |
||||||
|
|
||||||
|
def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str): |
||||||
|
self.frame = frame |
||||||
|
self.positive_segments = positive_segments |
||||||
|
self.negative_segments = negative_segments |
||||||
|
self.model_type = model_type |
||||||
|
|
||||||
|
def get_segments_for_detection(self, positive_amount, negative_amount): |
||||||
|
segments = [] |
||||||
|
for idx, bounds in enumerate(self.positive_segments): |
||||||
|
if idx >= positive_amount: |
||||||
|
break |
||||||
|
segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment()) |
||||||
|
|
||||||
|
for idx, bounds in enumerate(self.negative_segments): |
||||||
|
if idx >= negative_amount: |
||||||
|
break |
||||||
|
segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment()) |
||||||
|
|
||||||
|
return segments |
||||||
|
|
||||||
|
def get_all_correct_segments(self): |
||||||
|
return self.positive_segments |
||||||
|
|
||||||
|
PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') |
||||||
|
PEAK_DATASETS.append(PEAK_DATA_1) |
||||||
|
|
||||||
|
def main(model_type: str) -> None: |
||||||
|
table_metric = [] |
||||||
|
if model_type == 'peak': |
||||||
|
for data in PEAK_DATASETS: |
||||||
|
dataset = data.frame |
||||||
|
segments = data.get_segments_for_detection(1, 0) |
||||||
|
segments = [Segment.from_json(segment) for segment in segments] |
||||||
|
detector = pattern_detector.PatternDetector('PEAK', 'test_id') |
||||||
|
training_result = detector.train(dataset, segments, {}) |
||||||
|
cache = training_result['cache'] |
||||||
|
detect_result = detector.detect(dataset, cache) |
||||||
|
detect_result = detect_result.to_json() |
||||||
|
peak_metric = Metric(data.get_all_correct_segments(), detect_result) |
||||||
|
table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) |
||||||
|
return table_metric |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
''' |
||||||
|
This tool applies the model on datasets and verifies that the detection result corresponds to the correct values. |
||||||
|
sys.argv[1] expects one of the models name -> see correct_name |
||||||
|
''' |
||||||
|
# TODO: use enum |
||||||
|
correct_name = ['peak', 'trough', 'jump', 'drop', 'general'] |
||||||
|
if len(sys.argv) < 2: |
||||||
|
print('Enter one of models name: {}'.format(correct_name)) |
||||||
|
sys.exit(1) |
||||||
|
model_type = str(sys.argv[1]).lower() |
||||||
|
if model_type in correct_name: |
||||||
|
print(main(model_type)) |
||||||
|
else: |
||||||
|
print('Enter one of models name: {}'.format(correct_name)) |
||||||
|
|
||||||
|
|
@ -0,0 +1,104 @@ |
|||||||
|
import zmq |
||||||
|
import zmq.asyncio |
||||||
|
import asyncio |
||||||
|
import json |
||||||
|
from uuid import uuid4 |
||||||
|
|
||||||
|
context = zmq.asyncio.Context() |
||||||
|
socket = context.socket(zmq.PAIR) |
||||||
|
socket.connect('tcp://0.0.0.0:8002') |
||||||
|
|
||||||
|
def create_message(): |
||||||
|
message = { |
||||||
|
"method": "DATA", |
||||||
|
"payload": { |
||||||
|
"_id": uuid4().hex, |
||||||
|
"analyticUnitId": uuid4().hex, |
||||||
|
"type": "PUSH", |
||||||
|
"payload": { |
||||||
|
"data": [ |
||||||
|
[ |
||||||
|
1552652025000, |
||||||
|
12.499999999999998 |
||||||
|
], |
||||||
|
[ |
||||||
|
1552652040000, |
||||||
|
12.500000000000002 |
||||||
|
], |
||||||
|
[ |
||||||
|
1552652055000, |
||||||
|
12.499999999999996 |
||||||
|
], |
||||||
|
[ |
||||||
|
1552652070000, |
||||||
|
12.500000000000002 |
||||||
|
], |
||||||
|
[ |
||||||
|
1552652085000, |
||||||
|
12.499999999999998 |
||||||
|
], |
||||||
|
[ |
||||||
|
1552652100000, |
||||||
|
12.5 |
||||||
|
], |
||||||
|
[ |
||||||
|
1552652115000, |
||||||
|
12.83261113785909 |
||||||
|
] |
||||||
|
], |
||||||
|
"from": 1552652025001, |
||||||
|
"to": 1552652125541, |
||||||
|
"analyticUnitType": "GENERAL", |
||||||
|
"detector": "pattern", |
||||||
|
"cache": { |
||||||
|
"pattern_center": [ |
||||||
|
693 |
||||||
|
], |
||||||
|
"pattern_model": [ |
||||||
|
1.7763568394002505e-15, |
||||||
|
5.329070518200751e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
3.552713678800501e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
3.552713678800501e-15, |
||||||
|
3.552713678800501e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
0, |
||||||
|
1.7763568394002505e-15, |
||||||
|
1.7763568394002505e-15, |
||||||
|
0 |
||||||
|
], |
||||||
|
"convolve_max": 7.573064690121713e-29, |
||||||
|
"convolve_min": 7.573064690121713e-29, |
||||||
|
"WINDOW_SIZE": 7, |
||||||
|
"conv_del_min": 7, |
||||||
|
"conv_del_max": 7 |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return json.dumps(message) |
||||||
|
|
||||||
|
async def handle_loop(): |
||||||
|
while True: |
||||||
|
received_bytes = await socket.recv() |
||||||
|
text = received_bytes.decode('utf-8') |
||||||
|
|
||||||
|
print(text) |
||||||
|
|
||||||
|
async def send_detect(): |
||||||
|
data = create_message().encode('utf-8') |
||||||
|
await socket.send(data) |
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
loop = asyncio.get_event_loop() |
||||||
|
socket.send(b'PING') |
||||||
|
detects = [send_detect() for i in range(100)] |
||||||
|
detects_group = asyncio.gather(*detects) |
||||||
|
handle_group = asyncio.gather(handle_loop()) |
||||||
|
common_group = asyncio.gather(handle_group, detects_group) |
||||||
|
loop.run_until_complete(common_group) |
Loading…
Reference in new issue