CorpGlory Inc.
5 years ago
58 changed files with 4526 additions and 1 deletions
@ -0,0 +1,32 @@
|
||||
{ |
||||
// Use IntelliSense to learn about possible attributes. |
||||
// Hover to view descriptions of existing attributes. |
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 |
||||
"version": "0.2.0", |
||||
"configurations": [ |
||||
{ |
||||
"name": "Attach (Remote Debug)", |
||||
"type": "python", |
||||
"request": "attach", |
||||
"port": 5679, |
||||
"host": "localhost", |
||||
"pathMappings": [ |
||||
{ |
||||
"localRoot": "${workspaceFolder}", |
||||
"remoteRoot": "/var/www/analytics" |
||||
} |
||||
] |
||||
}, |
||||
{ |
||||
"name": "Python: Current File", |
||||
"type": "python", |
||||
"request": "launch", |
||||
"windows": { |
||||
"program": "${workspaceFolder}\\bin\\server" |
||||
}, |
||||
"linux": { |
||||
"program": "${workspaceFolder}/bin/server" |
||||
} |
||||
} |
||||
] |
||||
} |
@ -0,0 +1,22 @@
|
||||
{ |
||||
"terminal.integrated.shell.windows": "C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", |
||||
"editor.insertSpaces": true, |
||||
"files.eol": "\n", |
||||
"files.exclude": { |
||||
"**/__pycache__/": true, |
||||
"dist": true, |
||||
"build": true |
||||
}, |
||||
"[python]": { |
||||
"editor.tabSize": 4, |
||||
}, |
||||
"python.envFile": "${workspaceFolder}/.vscode/.env", |
||||
"python.pythonPath": "python", |
||||
"python.linting.enabled": true, |
||||
"python.testing.unittestArgs": [ "-v" ], |
||||
"python.testing.pytestEnabled": false, |
||||
"python.testing.nosetestsEnabled": false, |
||||
"python.testing.unittestEnabled": true, |
||||
"python.linting.pylintEnabled": true, |
||||
"python.jediEnabled": false |
||||
} |
@ -0,0 +1,27 @@
|
||||
# Type hints |
||||
|
||||
Please use: https://www.python.org/dev/peps/pep-0484/ |
||||
|
||||
# Line endings |
||||
|
||||
We use LF everywhere |
||||
|
||||
# Imports |
||||
|
||||
You import local files first, than spesific liba and then standart libs. |
||||
So you import from something very scecific to something very common. |
||||
It allows you to pay attention on most important things from beginning. |
||||
|
||||
``` |
||||
|
||||
from data_provider import DataProvider |
||||
from anomaly_model import AnomalyModel |
||||
from pattern_detection_model import PatternDetectionModel |
||||
|
||||
import numpy as np |
||||
|
||||
from scipy.signal import argrelextrema |
||||
|
||||
import pickle |
||||
|
||||
``` |
@ -0,0 +1,12 @@
|
||||
FROM python:3.6.6 |
||||
|
||||
COPY requirements.txt /requirements.txt |
||||
|
||||
RUN pip install -r /requirements.txt |
||||
|
||||
WORKDIR /var/www/analytics |
||||
|
||||
COPY . /var/www/analytics/ |
||||
|
||||
|
||||
CMD ["python", "-u", "bin/server"] |
@ -1 +1,12 @@
|
||||
# analytics |
||||
# Hastic-server-analytics |
||||
|
||||
Python service which gets tasks from [hastic-server-node](https://github.com/hastic/hastic-server/tree/master/server) like |
||||
|
||||
* trains statistical models |
||||
* detect patterns in time series data |
||||
|
||||
## Arhitecture |
||||
|
||||
The service uses [asyncio](https://docs.python.org/3/library/asyncio.html), |
||||
[concurrency](https://docs.python.org/3.6/library/concurrent.futures.html#module-concurrent.futures) and |
||||
[pyzmq](https://pyzmq.readthedocs.io/en/latest/). |
||||
|
@ -0,0 +1,39 @@
|
||||
""" |
||||
It is the place where we put all classes and types |
||||
common for all analytics code |
||||
|
||||
For example, if you write someting which is used |
||||
in analytic_unit_manager, it should be here. |
||||
|
||||
If you create something spicific which is used only in one place, |
||||
like PatternDetectionCache, then it should not be here. |
||||
""" |
||||
|
||||
import pandas as pd |
||||
from typing import Union, List, Tuple |
||||
|
||||
AnalyticUnitId = str |
||||
|
||||
ModelCache = dict |
||||
|
||||
# TODO: explicit timestamp / value |
||||
TimeSeries = List[Tuple[int, float]] |
||||
|
||||
""" |
||||
Example: |
||||
|
||||
tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) |
||||
ts = TimeSeries([4, 5, 6], tsis) |
||||
""" |
||||
Timestamp = Union[str, pd.Timestamp] |
||||
|
||||
class TimeSeriesIndex(pd.DatetimeIndex): |
||||
def __new__(cls, *args, **kwargs): |
||||
return pd.DatetimeIndex.__new__(cls, *args, **kwargs) |
||||
|
||||
# TODO: make generic type for values. See List definition for example of generic class |
||||
# TODO: constructor from DataFrame |
||||
# TODO: repleace TimeSeries (above) with this class: rename TimeSeries2 to TimeSeries |
||||
class TimeSeries2(pd.Series): |
||||
def __init__(self, *args, **kwargs): |
||||
super().__init__(*args, **kwargs) |
@ -0,0 +1,38 @@
|
||||
from typing import Optional, List, Dict |
||||
|
||||
from analytic_types.segment import AnomalyDetectorSegment |
||||
from analytic_types.detector import Bound |
||||
|
||||
from utils.meta import JSONClass, SerializableList |
||||
|
||||
@JSONClass |
||||
class AnomalyCache: |
||||
def __init__( |
||||
self, |
||||
alpha: float, |
||||
confidence: float, |
||||
enable_bounds: str, |
||||
seasonality: Optional[int] = None, |
||||
segments: Optional[List[Dict]] = None, |
||||
time_step: Optional[int] = None, |
||||
): |
||||
self.alpha = alpha |
||||
self.confidence = confidence |
||||
self.enable_bounds = enable_bounds |
||||
if seasonality != None and seasonality < 0: |
||||
raise ValueError(f'Can`t create AnomalyCache: got invalid seasonality {seasonality}') |
||||
self.seasonality = seasonality |
||||
self.time_step = time_step |
||||
if segments != None: |
||||
anomaly_segments = map(AnomalyDetectorSegment.from_json, segments) |
||||
self.segments = SerializableList(anomaly_segments) |
||||
else: |
||||
self.segments = [] |
||||
|
||||
def set_segments(self, segments: List[AnomalyDetectorSegment]): |
||||
if len(segments) > 0: |
||||
self.segments = SerializableList(segments) |
||||
|
||||
def get_enabled_bounds(self) -> Bound: |
||||
#TODO: use class with to_json() |
||||
return Bound(self.enable_bounds) |
@ -0,0 +1,14 @@
|
||||
import pandas as pd |
||||
|
||||
|
||||
class DataBucket: |
||||
|
||||
def __init__(self): |
||||
self.data = pd.DataFrame([], columns=['timestamp', 'value']) |
||||
|
||||
def receive_data(self, data: pd.DataFrame): |
||||
self.data = self.data.append(data, ignore_index=True) |
||||
|
||||
def drop_data(self, count: int): |
||||
if count > 0: |
||||
self.data = self.data.iloc[count:] |
@ -0,0 +1,47 @@
|
||||
from analytic_types import ModelCache, TimeSeries |
||||
from analytic_types.segment import Segment |
||||
|
||||
from enum import Enum |
||||
from typing import List, Optional, Tuple |
||||
|
||||
import utils.meta |
||||
|
||||
class Bound(Enum): |
||||
ALL = 'ALL' |
||||
UPPER = 'UPPER' |
||||
LOWER = 'LOWER' |
||||
|
||||
class DetectionResult: |
||||
|
||||
def __init__( |
||||
self, |
||||
cache: Optional[ModelCache] = None, |
||||
segments: Optional[List[Segment]] = None, |
||||
last_detection_time: int = None |
||||
): |
||||
if cache is None: |
||||
cache = {} |
||||
if segments is None: |
||||
segments = [] |
||||
self.cache = cache |
||||
self.segments = segments |
||||
self.last_detection_time = last_detection_time |
||||
|
||||
# TODO: use @utils.meta.JSONClass (now it can't serialize list of objects) |
||||
def to_json(self): |
||||
return { |
||||
'cache': self.cache, |
||||
'segments': list(map(lambda segment: segment.to_json(), self.segments)), |
||||
'lastDetectionTime': self.last_detection_time |
||||
} |
||||
|
||||
@utils.meta.JSONClass |
||||
class ProcessingResult(): |
||||
|
||||
def __init__( |
||||
self, |
||||
lower_bound: Optional[TimeSeries] = None, |
||||
upper_bound: Optional[TimeSeries] = None, |
||||
): |
||||
self.lower_bound = lower_bound |
||||
self.upper_bound = upper_bound |
@ -0,0 +1,17 @@
|
||||
import utils.meta |
||||
|
||||
@utils.meta.JSONClass |
||||
class LearningInfo: |
||||
|
||||
def __init__(self): |
||||
super().__init__() |
||||
self.confidence = [] |
||||
self.patterns_list = [] |
||||
self.pattern_width = [] |
||||
self.pattern_height = [] |
||||
self.pattern_timestamp = [] |
||||
self.segment_center_list = [] |
||||
self.patterns_value = [] |
||||
|
||||
def __str__(self): |
||||
return str(self.to_json()) |
@ -0,0 +1,57 @@
|
||||
from typing import Optional |
||||
|
||||
import utils.meta |
||||
|
||||
@utils.meta.JSONClass |
||||
class Segment: |
||||
''' |
||||
Used for segment manipulation instead of { 'from': ..., 'to': ... } dict |
||||
''' |
||||
|
||||
def __init__( |
||||
self, |
||||
from_timestamp: int, |
||||
to_timestamp: int, |
||||
_id: Optional[str] = None, |
||||
analytic_unit_id: Optional[str] = None, |
||||
labeled: Optional[bool] = None, |
||||
deleted: Optional[bool] = None, |
||||
message: Optional[str] = None |
||||
): |
||||
if to_timestamp < from_timestamp: |
||||
raise ValueError(f'Can`t create segment with to < from: {to_timestamp} < {from_timestamp}') |
||||
self.from_timestamp = from_timestamp |
||||
self.to_timestamp = to_timestamp |
||||
self._id = _id |
||||
self.analytic_unit_id = analytic_unit_id |
||||
self.labeled = labeled |
||||
self.deleted = deleted |
||||
self.message = message |
||||
|
||||
@utils.meta.JSONClass |
||||
class AnomalyDetectorSegment(Segment): |
||||
''' |
||||
Used for segment manipulation instead of { 'from': ..., 'to': ..., 'data': ... } dict |
||||
''' |
||||
|
||||
def __init__( |
||||
self, |
||||
from_timestamp: int, |
||||
to_timestamp: int, |
||||
data = [], |
||||
_id: Optional[str] = None, |
||||
analytic_unit_id: Optional[str] = None, |
||||
labeled: Optional[bool] = None, |
||||
deleted: Optional[bool] = None, |
||||
message: Optional[str] = None |
||||
): |
||||
super().__init__( |
||||
from_timestamp, |
||||
to_timestamp, |
||||
_id, |
||||
analytic_unit_id, |
||||
labeled, |
||||
deleted, |
||||
message |
||||
) |
||||
self.data = data |
@ -0,0 +1,103 @@
|
||||
from typing import Dict |
||||
import logging as log |
||||
import traceback |
||||
from concurrent.futures import Executor, ThreadPoolExecutor |
||||
|
||||
from analytic_unit_worker import AnalyticUnitWorker |
||||
from analytic_types import AnalyticUnitId, ModelCache |
||||
from analytic_types.segment import Segment |
||||
import detectors |
||||
|
||||
|
||||
logger = log.getLogger('AnalyticUnitManager') |
||||
|
||||
|
||||
def get_detector_by_type( |
||||
detector_type: str, analytic_unit_type: str, analytic_unit_id: AnalyticUnitId |
||||
) -> detectors.Detector: |
||||
if detector_type == 'pattern': |
||||
return detectors.PatternDetector(analytic_unit_type, analytic_unit_id) |
||||
elif detector_type == 'threshold': |
||||
return detectors.ThresholdDetector(analytic_unit_id) |
||||
elif detector_type == 'anomaly': |
||||
return detectors.AnomalyDetector(analytic_unit_id) |
||||
|
||||
raise ValueError('Unknown detector type "%s"' % detector_type) |
||||
|
||||
|
||||
class AnalyticUnitManager: |
||||
|
||||
def __init__(self): |
||||
self.analytic_workers: Dict[AnalyticUnitId, AnalyticUnitWorker] = dict() |
||||
self.workers_executor = ThreadPoolExecutor() |
||||
|
||||
def __ensure_worker( |
||||
self, |
||||
analytic_unit_id: AnalyticUnitId, |
||||
detector_type: str, |
||||
analytic_unit_type: str |
||||
) -> AnalyticUnitWorker: |
||||
if analytic_unit_id in self.analytic_workers: |
||||
# TODO: check that type is the same |
||||
return self.analytic_workers[analytic_unit_id] |
||||
detector = get_detector_by_type(detector_type, analytic_unit_type, analytic_unit_id) |
||||
worker = AnalyticUnitWorker(analytic_unit_id, detector, self.workers_executor) |
||||
self.analytic_workers[analytic_unit_id] = worker |
||||
return worker |
||||
|
||||
async def __handle_analytic_task(self, task: object) -> dict: |
||||
""" |
||||
returns payload or None |
||||
""" |
||||
analytic_unit_id: AnalyticUnitId = task['analyticUnitId'] |
||||
log.debug('Analytics get task with type: {} for unit: {}'.format(task['type'], analytic_unit_id)) |
||||
if task['type'] == 'CANCEL': |
||||
if analytic_unit_id in self.analytic_workers: |
||||
self.analytic_workers[analytic_unit_id].cancel() |
||||
return |
||||
|
||||
payload = task['payload'] |
||||
worker = self.__ensure_worker(analytic_unit_id, payload['detector'], payload['analyticUnitType']) |
||||
data = payload.get('data') |
||||
if task['type'] == 'PUSH': |
||||
# TODO: do it a better way |
||||
res = await worker.consume_data(data, payload['cache']) |
||||
if res: |
||||
res.update({ 'analyticUnitId': analytic_unit_id }) |
||||
return res |
||||
elif task['type'] == 'LEARN': |
||||
if 'segments' in payload: |
||||
segments = payload['segments'] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
return await worker.do_train(segments, data, payload['cache']) |
||||
elif 'threshold' in payload: |
||||
return await worker.do_train(payload['threshold'], data, payload['cache']) |
||||
elif 'anomaly' in payload: |
||||
return await worker.do_train(payload['anomaly'], data, payload['cache']) |
||||
else: |
||||
raise ValueError('No segments or threshold in LEARN payload') |
||||
elif task['type'] == 'DETECT': |
||||
return await worker.do_detect(data, payload['cache']) |
||||
elif task['type'] == 'PROCESS': |
||||
return await worker.process_data(data, payload['cache']) |
||||
|
||||
raise ValueError('Unknown task type "%s"' % task['type']) |
||||
|
||||
async def handle_analytic_task(self, task: object): |
||||
try: |
||||
log.debug('Start handle_analytic_task with analytic unit: {}'.format(task['analyticUnitId'])) |
||||
result_payload = await self.__handle_analytic_task(task) |
||||
result_message = { |
||||
'status': 'SUCCESS', |
||||
'payload': result_payload |
||||
} |
||||
log.debug('End correctly handle_analytic_task with anatytic unit: {}'.format(task['analyticUnitId'])) |
||||
return result_message |
||||
except Exception as e: |
||||
error_text = traceback.format_exc() |
||||
logger.error("handle_analytic_task Exception: '%s'" % error_text) |
||||
# TODO: move result to a class which renders to json for messaging to analytics |
||||
return { |
||||
'status': 'FAILED', |
||||
'error': repr(e) |
||||
} |
@ -0,0 +1,116 @@
|
||||
import config |
||||
import detectors |
||||
import logging |
||||
import pandas as pd |
||||
from typing import Optional, Union, Generator, List, Tuple |
||||
import concurrent.futures |
||||
import asyncio |
||||
import utils |
||||
from utils import get_intersected_chunks, get_chunks, prepare_data |
||||
|
||||
from analytic_types import ModelCache, TimeSeries |
||||
from analytic_types.detector import DetectionResult |
||||
|
||||
logger = logging.getLogger('AnalyticUnitWorker') |
||||
|
||||
|
||||
class AnalyticUnitWorker: |
||||
|
||||
CHUNK_WINDOW_SIZE_FACTOR = 100 |
||||
CHUNK_INTERSECTION_FACTOR = 2 |
||||
|
||||
assert CHUNK_WINDOW_SIZE_FACTOR > CHUNK_INTERSECTION_FACTOR, \ |
||||
'CHUNK_INTERSECTION_FACTOR should be less than CHUNK_WINDOW_SIZE_FACTOR' |
||||
|
||||
def __init__(self, analytic_unit_id: str, detector: detectors.Detector, executor: concurrent.futures.Executor): |
||||
self.analytic_unit_id = analytic_unit_id |
||||
self._detector = detector |
||||
self._executor: concurrent.futures.Executor = executor |
||||
self._training_future: asyncio.Future = None |
||||
|
||||
async def do_train( |
||||
self, payload: Union[list, dict], data: TimeSeries, cache: Optional[ModelCache] |
||||
) -> Optional[ModelCache]: |
||||
|
||||
dataframe = prepare_data(data) |
||||
|
||||
cfuture: concurrent.futures.Future = self._executor.submit( |
||||
self._detector.train, dataframe, payload, cache |
||||
) |
||||
self._training_future = asyncio.wrap_future(cfuture) |
||||
try: |
||||
new_cache: ModelCache = await asyncio.wait_for(self._training_future, timeout = config.LEARNING_TIMEOUT) |
||||
return new_cache |
||||
except asyncio.CancelledError: |
||||
return None |
||||
except asyncio.TimeoutError: |
||||
raise Exception('Timeout ({}s) exceeded while learning'.format(config.LEARNING_TIMEOUT)) |
||||
|
||||
async def do_detect(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||
|
||||
window_size = self._detector.get_window_size(cache) |
||||
chunk_size = window_size * self.CHUNK_WINDOW_SIZE_FACTOR |
||||
chunk_intersection = window_size * self.CHUNK_INTERSECTION_FACTOR |
||||
|
||||
detections: List[DetectionResult] = [] |
||||
chunks = [] |
||||
# XXX: get_chunks(data, chunk_size) == get_intersected_chunks(data, 0, chunk_size) |
||||
if self._detector.is_detection_intersected(): |
||||
chunks = get_intersected_chunks(data, chunk_intersection, chunk_size) |
||||
else: |
||||
chunks = get_chunks(data, chunk_size) |
||||
|
||||
for chunk in chunks: |
||||
await asyncio.sleep(0) |
||||
chunk_dataframe = prepare_data(chunk) |
||||
detected: DetectionResult = self._detector.detect(chunk_dataframe, cache) |
||||
detections.append(detected) |
||||
|
||||
if len(detections) == 0: |
||||
raise RuntimeError(f'do_detect for {self.analytic_unit_id} got empty detection results') |
||||
|
||||
detection_result = self._detector.concat_detection_results(detections) |
||||
return detection_result.to_json() |
||||
|
||||
def cancel(self): |
||||
if self._training_future is not None: |
||||
self._training_future.cancel() |
||||
|
||||
async def consume_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> Optional[dict]: |
||||
window_size = self._detector.get_window_size(cache) |
||||
|
||||
detections: List[DetectionResult] = [] |
||||
|
||||
for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR): |
||||
await asyncio.sleep(0) |
||||
chunk_dataframe = prepare_data(chunk) |
||||
detected = self._detector.consume_data(chunk_dataframe, cache) |
||||
if detected is not None: |
||||
detections.append(detected) |
||||
|
||||
if len(detections) == 0: |
||||
return None |
||||
else: |
||||
detection_result = self._detector.concat_detection_results(detections) |
||||
return detection_result.to_json() |
||||
|
||||
async def process_data(self, data: TimeSeries, cache: ModelCache) -> dict: |
||||
assert isinstance(self._detector, detectors.ProcessingDetector), \ |
||||
f'{self.analytic_unit_id} detector is not ProcessingDetector, can`t process data' |
||||
assert cache is not None, f'{self.analytic_unit_id} got empty cache for processing data' |
||||
|
||||
processed_chunks = [] |
||||
window_size = self._detector.get_window_size(cache) |
||||
for chunk in get_chunks(data, window_size * self.CHUNK_WINDOW_SIZE_FACTOR): |
||||
await asyncio.sleep(0) |
||||
chunk_dataframe = prepare_data(chunk) |
||||
processed = self._detector.process_data(chunk_dataframe, cache) |
||||
if processed is not None: |
||||
processed_chunks.append(processed) |
||||
|
||||
if len(processed_chunks) == 0: |
||||
raise RuntimeError(f'process_data for {self.analytic_unit_id} got empty processing results') |
||||
|
||||
# TODO: maybe we should process all chunks inside of detector? |
||||
result = self._detector.concat_processing_results(processed_chunks) |
||||
return result.to_json() |
@ -0,0 +1,30 @@
|
||||
import os |
||||
import json |
||||
|
||||
|
||||
PARENT_FOLDER = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) |
||||
CONFIG_FILE = os.path.join(PARENT_FOLDER, 'config.json') |
||||
|
||||
|
||||
config_exists = os.path.isfile(CONFIG_FILE) |
||||
if config_exists: |
||||
with open(CONFIG_FILE) as f: |
||||
config = json.load(f) |
||||
else: |
||||
print('Config file %s doesn`t exist, using defaults' % CONFIG_FILE) |
||||
|
||||
|
||||
def get_config_field(field: str, default_val = None): |
||||
if field in os.environ: |
||||
return os.environ[field] |
||||
|
||||
if config_exists and field in config and config[field] != '': |
||||
return config[field] |
||||
|
||||
if default_val is not None: |
||||
return default_val |
||||
|
||||
raise Exception('Please configure {}'.format(field)) |
||||
|
||||
HASTIC_SERVER_URL = get_config_field('HASTIC_SERVER_URL', 'ws://localhost:8002') |
||||
LEARNING_TIMEOUT = get_config_field('LEARNING_TIMEOUT', 120) |
@ -0,0 +1,4 @@
|
||||
from detectors.detector import Detector, ProcessingDetector |
||||
from detectors.pattern_detector import PatternDetector |
||||
from detectors.threshold_detector import ThresholdDetector |
||||
from detectors.anomaly_detector import AnomalyDetector |
@ -0,0 +1,277 @@
|
||||
from enum import Enum |
||||
import logging |
||||
import numpy as np |
||||
import pandas as pd |
||||
import math |
||||
from typing import Optional, Union, List, Tuple, Generator |
||||
import operator |
||||
|
||||
from analytic_types import AnalyticUnitId, ModelCache |
||||
from analytic_types.detector import DetectionResult, ProcessingResult, Bound |
||||
from analytic_types.data_bucket import DataBucket |
||||
from analytic_types.segment import Segment, AnomalyDetectorSegment |
||||
from analytic_types.cache import AnomalyCache |
||||
from detectors import Detector, ProcessingDetector |
||||
import utils |
||||
|
||||
MAX_DEPENDENCY_LEVEL = 100 |
||||
MIN_DEPENDENCY_FACTOR = 0.1 |
||||
BASIC_ALPHA = 0.5 |
||||
logger = logging.getLogger('ANOMALY_DETECTOR') |
||||
|
||||
|
||||
class AnomalyDetector(ProcessingDetector): |
||||
|
||||
def __init__(self, analytic_unit_id: AnalyticUnitId): |
||||
super().__init__(analytic_unit_id) |
||||
self.bucket = DataBucket() |
||||
|
||||
def train(self, dataframe: pd.DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache: |
||||
cache = AnomalyCache.from_json(payload) |
||||
cache.time_step = utils.find_interval(dataframe) |
||||
segments = cache.segments |
||||
|
||||
if len(segments) > 0: |
||||
seasonality = cache.seasonality |
||||
prepared_segments = [] |
||||
|
||||
for segment in segments: |
||||
segment_len = (int(segment.to_timestamp) - int(segment.from_timestamp)) |
||||
assert segment_len <= seasonality, \ |
||||
f'seasonality {seasonality} must be greater than segment length {segment_len}' |
||||
|
||||
from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment.from_timestamp, unit='ms')) |
||||
to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment.to_timestamp, unit='ms')) |
||||
segment_data = dataframe[from_index : to_index] |
||||
prepared_segments.append( |
||||
AnomalyDetectorSegment( |
||||
segment.from_timestamp, |
||||
segment.to_timestamp, |
||||
segment_data.value.tolist() |
||||
) |
||||
) |
||||
cache.set_segments(prepared_segments) |
||||
|
||||
return { |
||||
'cache': cache.to_json() |
||||
} |
||||
|
||||
# TODO: ModelCache -> DetectorState |
||||
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||
if cache == None: |
||||
raise f'Analytic unit {self.analytic_unit_id} got empty cache' |
||||
data = dataframe['value'] |
||||
|
||||
cache = AnomalyCache.from_json(cache) |
||||
segments = cache.segments |
||||
enabled_bounds = cache.get_enabled_bounds() |
||||
|
||||
smoothed_data = utils.exponential_smoothing(data, cache.alpha) |
||||
|
||||
lower_bound = smoothed_data - cache.confidence |
||||
upper_bound = smoothed_data + cache.confidence |
||||
|
||||
if len(segments) > 0: |
||||
data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0]) |
||||
|
||||
for segment in segments: |
||||
seasonality_index = cache.seasonality // cache.time_step |
||||
seasonality_offset = self.get_seasonality_offset( |
||||
segment.from_timestamp, |
||||
cache.seasonality, |
||||
data_start_time, |
||||
cache.time_step |
||||
) |
||||
segment_data = pd.Series(segment.data) |
||||
|
||||
lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) |
||||
upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) |
||||
|
||||
detected_segments = list(self.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds)) |
||||
|
||||
last_dataframe_time = dataframe.iloc[-1]['timestamp'] |
||||
last_detection_time = utils.convert_pd_timestamp_to_ms(last_dataframe_time) |
||||
|
||||
return DetectionResult(cache.to_json(), detected_segments, last_detection_time) |
||||
|
||||
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||
if cache is None: |
||||
msg = f'consume_data got invalid cache {cache} for task {self.analytic_unit_id}' |
||||
logging.debug(msg) |
||||
raise ValueError(msg) |
||||
|
||||
data_without_nan = data.dropna() |
||||
|
||||
if len(data_without_nan) == 0: |
||||
return None |
||||
|
||||
self.bucket.receive_data(data_without_nan) |
||||
|
||||
if len(self.bucket.data) >= self.get_window_size(cache): |
||||
return self.detect(self.bucket.data, cache) |
||||
|
||||
return None |
||||
|
||||
def is_detection_intersected(self) -> bool: |
||||
return False |
||||
|
||||
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||
''' |
||||
get the number of values that will affect the next value |
||||
''' |
||||
|
||||
if cache is None: |
||||
raise ValueError('anomaly detector got None cache') |
||||
cache = AnomalyCache.from_json(cache) |
||||
|
||||
for level in range(1, MAX_DEPENDENCY_LEVEL): |
||||
if (1 - cache.alpha) ** level < MIN_DEPENDENCY_FACTOR: |
||||
break |
||||
|
||||
seasonality = 0 |
||||
if len(cache.segments) > 0: |
||||
seasonality = cache.seasonality // cache.time_step |
||||
return max(level, seasonality) |
||||
|
||||
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: |
||||
result = DetectionResult() |
||||
time_step = detections[0].cache['timeStep'] |
||||
for detection in detections: |
||||
result.segments.extend(detection.segments) |
||||
result.last_detection_time = detection.last_detection_time |
||||
result.cache = detection.cache |
||||
result.segments = utils.merge_intersecting_segments(result.segments, time_step) |
||||
return result |
||||
|
||||
# TODO: remove duplication with detect() |
||||
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: |
||||
cache = AnomalyCache.from_json(cache) |
||||
segments = cache.segments |
||||
enabled_bounds = cache.get_enabled_bounds() |
||||
|
||||
# TODO: exponential_smoothing should return dataframe with related timestamps |
||||
smoothed_data = utils.exponential_smoothing(dataframe['value'], cache.alpha) |
||||
|
||||
lower_bound = smoothed_data - cache.confidence |
||||
upper_bound = smoothed_data + cache.confidence |
||||
|
||||
if len(segments) > 0: |
||||
data_start_time = utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][0]) |
||||
|
||||
for segment in segments: |
||||
seasonality_index = cache.seasonality // cache.time_step |
||||
# TODO: move it to utils and add tests |
||||
seasonality_offset = self.get_seasonality_offset( |
||||
segment.from_timestamp, |
||||
cache.seasonality, |
||||
data_start_time, |
||||
cache.time_step |
||||
) |
||||
segment_data = pd.Series(segment.data) |
||||
|
||||
lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) |
||||
upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) |
||||
|
||||
# TODO: support multiple segments |
||||
|
||||
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) |
||||
lower_bound_timeseries = list(zip(timestamps, lower_bound.values.tolist())) |
||||
upper_bound_timeseries = list(zip(timestamps, upper_bound.values.tolist())) |
||||
|
||||
if enabled_bounds == Bound.ALL: |
||||
return ProcessingResult(lower_bound_timeseries, upper_bound_timeseries) |
||||
elif enabled_bounds == Bound.UPPER: |
||||
return ProcessingResult(upper_bound = upper_bound_timeseries) |
||||
elif enabled_bounds == Bound.LOWER: |
||||
return ProcessingResult(lower_bound = lower_bound_timeseries) |
||||
|
||||
def add_season_to_data(self, data: pd.Series, segment: pd.Series, offset: int, seasonality: int, bound_type: Bound) -> pd.Series: |
||||
#data - smoothed data to which seasonality will be added |
||||
#if addition == True -> segment is added |
||||
#if addition == False -> segment is subtracted |
||||
len_smoothed_data = len(data) |
||||
for idx, _ in enumerate(data): |
||||
if idx - offset < 0: |
||||
#TODO: add seasonality for non empty parts |
||||
continue |
||||
if (idx - offset) % seasonality == 0: |
||||
if bound_type == Bound.UPPER: |
||||
upper_segment_bound = self.get_segment_bound(segment, Bound.UPPER) |
||||
data = data.add(pd.Series(upper_segment_bound.values, index = segment.index + idx), fill_value = 0) |
||||
elif bound_type == Bound.LOWER: |
||||
lower_segment_bound = self.get_segment_bound(segment, Bound.LOWER) |
||||
data = data.add(pd.Series(lower_segment_bound.values * -1, index = segment.index + idx), fill_value = 0) |
||||
else: |
||||
raise ValueError(f'unknown bound type: {bound_type.value}') |
||||
|
||||
return data[:len_smoothed_data] |
||||
|
||||
def get_segment_bound(self, segment: pd.Series, bound: Bound) -> pd.Series: |
||||
''' |
||||
segment is divided by the median to determine its top or bottom part |
||||
the part is smoothed and raised above the segment or put down below the segment |
||||
''' |
||||
if len(segment) < 2: |
||||
return segment |
||||
comparison_operator = operator.gt if bound == Bound.UPPER else operator.le |
||||
segment = segment - segment.min() |
||||
segment_median = segment.median() |
||||
part = [val if comparison_operator(val, segment_median) else segment_median for val in segment.values] |
||||
part = pd.Series(part, index = segment.index) |
||||
smoothed_part = utils.exponential_smoothing(part, BASIC_ALPHA) |
||||
difference = [abs(x - y) for x, y in zip(part, smoothed_part)] |
||||
max_diff = max(difference) |
||||
bound = [val + max_diff for val in smoothed_part.values] |
||||
bound = pd.Series(bound, index = segment.index) |
||||
return bound |
||||
|
||||
def get_seasonality_offset(self, from_timestamp: int, seasonality: int, data_start_time: int, time_step: int) -> int: |
||||
season_count = math.ceil(abs(from_timestamp - data_start_time) / seasonality) |
||||
start_seasonal_segment = from_timestamp + seasonality * season_count |
||||
seasonality_time_offset = abs(start_seasonal_segment - data_start_time) % seasonality |
||||
seasonality_offset = math.ceil(seasonality_time_offset / time_step) |
||||
return seasonality_offset |
||||
|
||||
def detections_generator( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
upper_bound: pd.DataFrame, |
||||
lower_bound: pd.DataFrame, |
||||
enabled_bounds: Bound |
||||
) -> Generator[Segment, None, Segment]: |
||||
in_segment = False |
||||
segment_start = 0 |
||||
bound: Bound = None |
||||
for idx, val in enumerate(dataframe['value'].values): |
||||
if val > upper_bound.values[idx]: |
||||
if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL: |
||||
if not in_segment: |
||||
in_segment = True |
||||
segment_start = dataframe['timestamp'][idx] |
||||
bound = Bound.UPPER |
||||
continue |
||||
|
||||
if val < lower_bound.values[idx]: |
||||
if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL: |
||||
if not in_segment: |
||||
in_segment = True |
||||
segment_start = dataframe['timestamp'][idx] |
||||
bound = Bound.LOWER |
||||
continue |
||||
|
||||
if in_segment: |
||||
segment_end = dataframe['timestamp'][idx - 1] |
||||
yield Segment( |
||||
utils.convert_pd_timestamp_to_ms(segment_start), |
||||
utils.convert_pd_timestamp_to_ms(segment_end), |
||||
message=f'{val} out of {str(bound.value)} bound' |
||||
) |
||||
in_segment = False |
||||
else: |
||||
if in_segment: |
||||
segment_end = dataframe['timestamp'][idx] |
||||
return Segment( |
||||
utils.convert_pd_timestamp_to_ms(segment_start), |
||||
utils.convert_pd_timestamp_to_ms(segment_end), |
||||
message=f'{val} out of {str(bound.value)} bound' |
||||
) |
@ -0,0 +1,80 @@
|
||||
from abc import ABC, abstractmethod |
||||
from pandas import DataFrame |
||||
from typing import Optional, Union, List |
||||
|
||||
from analytic_types import ModelCache, TimeSeries, AnalyticUnitId |
||||
from analytic_types.detector import DetectionResult, ProcessingResult |
||||
from analytic_types.segment import Segment |
||||
|
||||
|
||||
class Detector(ABC): |
||||
|
||||
def __init__(self, analytic_unit_id: AnalyticUnitId): |
||||
self.analytic_unit_id = analytic_unit_id |
||||
|
||||
@abstractmethod |
||||
def train(self, dataframe: DataFrame, payload: Union[list, dict], cache: Optional[ModelCache]) -> ModelCache: |
||||
""" |
||||
Should be thread-safe to other detectors' train method |
||||
""" |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def detect(self, dataframe: DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def consume_data(self, data: DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||
pass |
||||
|
||||
def is_detection_intersected(self) -> bool: |
||||
return True |
||||
|
||||
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: |
||||
result = DetectionResult() |
||||
for detection in detections: |
||||
result.segments.extend(detection.segments) |
||||
result.last_detection_time = detection.last_detection_time |
||||
result.cache = detection.cache |
||||
return result |
||||
|
||||
def get_value_from_cache(self, cache: ModelCache, key: str, required = False): |
||||
value = cache.get(key) |
||||
if value == None and required: |
||||
raise ValueError(f'Missing required "{key}" field in cache for analytic unit {self.analytic_unit_id}') |
||||
return value |
||||
|
||||
|
||||
class ProcessingDetector(Detector): |
||||
|
||||
@abstractmethod |
||||
def process_data(self, data: TimeSeries, cache: Optional[ModelCache]) -> ProcessingResult: |
||||
''' |
||||
Data processing to receive additional time series that represents detector's settings |
||||
''' |
||||
pass |
||||
|
||||
def concat_processing_results(self, processing_results: List[ProcessingResult]) -> Optional[ProcessingResult]: |
||||
''' |
||||
Concatenate sequential ProcessingResults that received via |
||||
splitting dataset to chunks in analytic worker |
||||
''' |
||||
|
||||
if len(processing_results) == 0: |
||||
return None |
||||
|
||||
united_result = ProcessingResult() |
||||
for result in processing_results: |
||||
if result.lower_bound is not None: |
||||
if united_result.lower_bound is None: united_result.lower_bound = [] |
||||
united_result.lower_bound.extend(result.lower_bound) |
||||
|
||||
if result.upper_bound is not None: |
||||
if united_result.upper_bound is None: united_result.upper_bound = [] |
||||
united_result.upper_bound.extend(result.upper_bound) |
||||
|
||||
return united_result |
@ -0,0 +1,147 @@
|
||||
import models |
||||
|
||||
import asyncio |
||||
import logging |
||||
import config |
||||
|
||||
import pandas as pd |
||||
from typing import Optional, Generator, List |
||||
|
||||
from detectors import Detector |
||||
from analytic_types.data_bucket import DataBucket |
||||
from utils import convert_pd_timestamp_to_ms |
||||
from analytic_types import AnalyticUnitId, ModelCache |
||||
from analytic_types.detector import DetectionResult |
||||
from analytic_types.segment import Segment |
||||
import utils |
||||
|
||||
logger = logging.getLogger('PATTERN_DETECTOR') |
||||
|
||||
|
||||
def resolve_model_by_pattern(pattern: str) -> models.Model: |
||||
if pattern == 'GENERAL': |
||||
return models.GeneralModel() |
||||
if pattern == 'PEAK': |
||||
return models.PeakModel() |
||||
if pattern == 'TROUGH': |
||||
return models.TroughModel() |
||||
if pattern == 'DROP': |
||||
return models.DropModel() |
||||
if pattern == 'JUMP': |
||||
return models.JumpModel() |
||||
if pattern == 'CUSTOM': |
||||
return models.CustomModel() |
||||
raise ValueError('Unknown pattern "%s"' % pattern) |
||||
|
||||
|
||||
class PatternDetector(Detector): |
||||
|
||||
MIN_BUCKET_SIZE = 150 |
||||
BUCKET_WINDOW_SIZE_FACTOR = 5 |
||||
DEFAULT_WINDOW_SIZE = 1 |
||||
|
||||
def __init__(self, pattern_type: str, analytic_unit_id: AnalyticUnitId): |
||||
super().__init__(analytic_unit_id) |
||||
self.pattern_type = pattern_type |
||||
self.model = resolve_model_by_pattern(self.pattern_type) |
||||
self.bucket = DataBucket() |
||||
|
||||
def train(self, dataframe: pd.DataFrame, segments: List[Segment], cache: Optional[ModelCache]) -> ModelCache: |
||||
# TODO: pass only part of dataframe that has segments |
||||
|
||||
if self.contains_labeled_segments(segments) == False: |
||||
msg = f'{self.analytic_unit_id} has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' |
||||
logger.error(msg) |
||||
raise ValueError(msg) |
||||
|
||||
self.model.state: models.ModelState = self.model.get_state(cache) |
||||
new_cache: models.ModelState = self.model.fit(dataframe, segments, self.analytic_unit_id) |
||||
|
||||
# time step is optional |
||||
if len(dataframe) > 1: |
||||
new_cache.time_step = utils.find_interval(dataframe) |
||||
|
||||
new_cache = new_cache.to_json() |
||||
if len(new_cache) == 0: |
||||
logging.warning('new_cache is empty with data: {}, segments: {}, cache: {}, analytic unit: {}'.format(dataframe, segments, cache, self.analytic_unit_id)) |
||||
return { |
||||
'cache': new_cache |
||||
} |
||||
|
||||
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: |
||||
logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe))) |
||||
# TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) |
||||
|
||||
if cache is None: |
||||
msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection' |
||||
logger.error(msg) |
||||
raise ValueError(msg) |
||||
|
||||
self.model.state = self.model.get_state(cache) |
||||
window_size = self.model.state.window_size |
||||
|
||||
if window_size is None: |
||||
message = '{} got cache without window_size for detection'.format(self.analytic_unit_id) |
||||
logger.error(message) |
||||
raise ValueError(message) |
||||
|
||||
if len(dataframe) < window_size * 2: |
||||
message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points' |
||||
logger.error(message) |
||||
raise ValueError(message) |
||||
|
||||
detected = self.model.detect(dataframe, self.analytic_unit_id) |
||||
|
||||
segments = [Segment(segment[0], segment[1]) for segment in detected['segments']] |
||||
new_cache = detected['cache'].to_json() |
||||
last_dataframe_time = dataframe.iloc[-1]['timestamp'] |
||||
last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) |
||||
return DetectionResult(new_cache, segments, last_detection_time) |
||||
|
||||
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||
logging.debug('Start consume_data for analytic unit {}'.format(self.analytic_unit_id)) |
||||
|
||||
if cache is None: |
||||
logging.debug(f'consume_data get invalid cache {cache} for task {self.analytic_unit_id}, skip') |
||||
return None |
||||
|
||||
data_without_nan = data.dropna() |
||||
|
||||
if len(data_without_nan) == 0: |
||||
return None |
||||
|
||||
self.bucket.receive_data(data_without_nan) |
||||
|
||||
# TODO: use ModelState |
||||
window_size = cache['windowSize'] |
||||
|
||||
bucket_len = len(self.bucket.data) |
||||
if bucket_len < window_size * 2: |
||||
msg = f'{self.analytic_unit_id} bucket data {bucket_len} less than two window size {window_size * 2}, skip run detection from consume_data' |
||||
logger.debug(msg) |
||||
return None |
||||
|
||||
res = self.detect(self.bucket.data, cache) |
||||
|
||||
bucket_size = max(window_size * self.BUCKET_WINDOW_SIZE_FACTOR, self.MIN_BUCKET_SIZE) |
||||
if bucket_len > bucket_size: |
||||
excess_data = bucket_len - bucket_size |
||||
self.bucket.drop_data(excess_data) |
||||
|
||||
logging.debug('End consume_data for analytic unit: {} with res: {}'.format(self.analytic_unit_id, str(res.to_json()))) |
||||
|
||||
if res: |
||||
return res |
||||
else: |
||||
return None |
||||
|
||||
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||
if cache is None: return self.DEFAULT_WINDOW_SIZE |
||||
# TODO: windowSize -> window_size |
||||
return cache.get('windowSize', self.DEFAULT_WINDOW_SIZE) |
||||
|
||||
def contains_labeled_segments(self, segments: List[Segment]) -> bool: |
||||
for segment in segments: |
||||
if segment.labeled == True: |
||||
return True |
||||
return False |
@ -0,0 +1,111 @@
|
||||
import logging as log |
||||
|
||||
import operator |
||||
import pandas as pd |
||||
import numpy as np |
||||
from typing import Optional, List |
||||
|
||||
from analytic_types import ModelCache, AnalyticUnitId |
||||
from analytic_types.detector import DetectionResult, ProcessingResult |
||||
from analytic_types.segment import Segment |
||||
from detectors import ProcessingDetector |
||||
from time import time |
||||
import utils |
||||
|
||||
|
||||
logger = log.getLogger('THRESHOLD_DETECTOR') |
||||
|
||||
|
||||
class ThresholdDetector(ProcessingDetector): |
||||
|
||||
WINDOW_SIZE = 3 |
||||
|
||||
def __init__(self, analytic_unit_id: AnalyticUnitId): |
||||
super().__init__(analytic_unit_id) |
||||
|
||||
def train(self, dataframe: pd.DataFrame, threshold: dict, cache: Optional[ModelCache]) -> ModelCache: |
||||
time_step = utils.find_interval(dataframe) |
||||
return { |
||||
'cache': { |
||||
'value': threshold['value'], |
||||
'condition': threshold['condition'], |
||||
'timeStep': time_step |
||||
} |
||||
} |
||||
|
||||
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult: |
||||
if cache is None or cache == {}: |
||||
raise ValueError('Threshold detector error: cannot detect before learning') |
||||
if len(dataframe) == 0: |
||||
return None |
||||
|
||||
value = cache['value'] |
||||
condition = cache['condition'] |
||||
|
||||
segments = [] |
||||
for index, row in dataframe.iterrows(): |
||||
current_value = row['value'] |
||||
current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp']) |
||||
segment = Segment(current_timestamp, current_timestamp) |
||||
# TODO: merge segments |
||||
if pd.isnull(current_value): |
||||
if condition == 'NO_DATA': |
||||
segment.message = 'NO_DATA detected' |
||||
segments.append(segment) |
||||
continue |
||||
|
||||
comparators = { |
||||
'>': operator.gt, |
||||
'<': operator.lt, |
||||
'=': operator.eq, |
||||
'>=': operator.ge, |
||||
'<=': operator.le |
||||
} |
||||
|
||||
assert condition in comparators.keys(), f'condition {condition} not allowed' |
||||
|
||||
if comparators[condition](current_value, value): |
||||
segment.message = f"{current_value} {condition} threshold's value {value}" |
||||
segments.append(segment) |
||||
|
||||
last_entry = dataframe.iloc[-1] |
||||
last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp']) |
||||
return DetectionResult(cache, segments, last_detection_time) |
||||
|
||||
|
||||
def consume_data(self, data: pd.DataFrame, cache: Optional[ModelCache]) -> Optional[DetectionResult]: |
||||
result = self.detect(data, cache) |
||||
return result if result else None |
||||
|
||||
def get_window_size(self, cache: Optional[ModelCache]) -> int: |
||||
return self.WINDOW_SIZE |
||||
|
||||
def concat_detection_results(self, detections: List[DetectionResult]) -> DetectionResult: |
||||
result = DetectionResult() |
||||
time_step = detections[0].cache['timeStep'] |
||||
for detection in detections: |
||||
result.segments.extend(detection.segments) |
||||
result.last_detection_time = detection.last_detection_time |
||||
result.cache = detection.cache |
||||
result.segments = utils.merge_intersecting_segments(result.segments, time_step) |
||||
return result |
||||
|
||||
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: |
||||
data = dataframe['value'] |
||||
value = self.get_value_from_cache(cache, 'value', required = True) |
||||
condition = self.get_value_from_cache(cache, 'condition', required = True) |
||||
|
||||
if condition == 'NO_DATA': |
||||
return ProcessingResult() |
||||
|
||||
data.values[:] = value |
||||
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) |
||||
result_series = list(zip(timestamps, data.values.tolist())) |
||||
|
||||
if condition in ['>', '>=', '=']: |
||||
return ProcessingResult(upper_bound = result_series) |
||||
|
||||
if condition in ['<', '<=']: |
||||
return ProcessingResult(lower_bound = result_series) |
||||
|
||||
raise ValueError(f'{condition} condition not supported') |
@ -0,0 +1,9 @@
|
||||
from models.model import Model, ModelState, AnalyticSegment, ModelType, ExtremumType |
||||
from models.triangle_model import TriangleModel, TriangleModelState |
||||
from models.stair_model import StairModel, StairModelState |
||||
from models.drop_model import DropModel |
||||
from models.peak_model import PeakModel |
||||
from models.jump_model import JumpModel |
||||
from models.custom_model import CustomModel |
||||
from models.trough_model import TroughModel |
||||
from models.general_model import GeneralModel, GeneralModelState |
@ -0,0 +1,30 @@
|
||||
from models import Model, AnalyticSegment, ModelState, ModelType |
||||
from analytic_types import AnalyticUnitId, ModelCache |
||||
from analytic_types.learning_info import LearningInfo |
||||
import utils |
||||
|
||||
import pandas as pd |
||||
from typing import List, Optional |
||||
|
||||
|
||||
class CustomModel(Model): |
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
pass |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> list: |
||||
return [] |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
pass |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
pass |
||||
|
||||
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: |
||||
pass |
@ -0,0 +1,9 @@
|
||||
from models import StairModel, ModelType, ExtremumType |
||||
|
||||
class DropModel(StairModel): |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.DROP |
||||
|
||||
def get_extremum_type(self) -> ExtremumType: |
||||
return ExtremumType.MIN |
@ -0,0 +1,104 @@
|
||||
from analytic_types import AnalyticUnitId |
||||
from models import Model, ModelState, AnalyticSegment, ModelType |
||||
from typing import Union, List, Generator |
||||
import utils |
||||
import utils.meta |
||||
import numpy as np |
||||
import pandas as pd |
||||
import scipy.signal |
||||
from scipy.fftpack import fft |
||||
from scipy.signal import argrelextrema |
||||
from scipy.stats.stats import pearsonr |
||||
|
||||
from scipy.stats import gaussian_kde |
||||
from scipy.stats import norm |
||||
import logging |
||||
|
||||
from typing import Optional, List, Tuple |
||||
import math |
||||
from analytic_types import AnalyticUnitId, TimeSeries |
||||
from analytic_types.learning_info import LearningInfo |
||||
|
||||
PEARSON_FACTOR = 0.7 |
||||
|
||||
|
||||
@utils.meta.JSONClass |
||||
class GeneralModelState(ModelState): |
||||
def __init__(self, **kwargs): |
||||
super().__init__(**kwargs) |
||||
|
||||
|
||||
class GeneralModel(Model): |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.GENERAL |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
center_ind = start + math.ceil((end - start) / 2) |
||||
return center_ind |
||||
|
||||
def get_state(self, cache: Optional[dict] = None) -> GeneralModelState: |
||||
return GeneralModelState.from_json(cache) |
||||
|
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
last_pattern_center = self.state.pattern_center |
||||
self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) |
||||
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||
|
||||
del_conv_list = [] |
||||
delete_pattern_timestamp = [] |
||||
for segment in deleted_segments: |
||||
del_mid_index = segment.center_index |
||||
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||
deleted_pat = utils.get_interval(data, del_mid_index, self.state.window_size) |
||||
deleted_pat = utils.subtract_min_without_nan(deleted_pat) |
||||
del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state.pattern_model) |
||||
if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) |
||||
|
||||
self.state.convolve_min, self.state.convolve_max = utils.get_min_max(convolve_list, self.state.window_size / 3) |
||||
self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(del_conv_list, self.state.window_size) |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
pat_data = self.state.pattern_model |
||||
if pat_data.count(0) == len(pat_data): |
||||
raise ValueError('Labeled patterns must not be empty') |
||||
|
||||
window_size = self.state.window_size |
||||
all_corr = utils.get_correlation_gen(data, window_size, pat_data) |
||||
all_corr_peaks = utils.find_peaks(all_corr, window_size * 2) |
||||
filtered = self.__filter_detection(all_corr_peaks, data) |
||||
filtered = list(filtered) |
||||
return [(item, item + window_size * 2) for item in filtered] |
||||
|
||||
def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: |
||||
if not self.state.pattern_center: |
||||
return [] |
||||
window_size = self.state.window_size |
||||
pattern_model = self.state.pattern_model |
||||
for ind, val in segments: |
||||
watch_data = data[ind - window_size: ind + window_size + 1] |
||||
watch_data = utils.subtract_min_without_nan(watch_data) |
||||
convolve_segment = scipy.signal.fftconvolve(watch_data, pattern_model) |
||||
if len(convolve_segment) > 0: |
||||
watch_conv = max(convolve_segment) |
||||
else: |
||||
continue |
||||
if watch_conv < self.state.convolve_min * 0.8 or val < PEARSON_FACTOR: |
||||
continue |
||||
if watch_conv < self.state.conv_del_max * 1.02 and watch_conv > self.state.conv_del_min * 0.98: |
||||
continue |
||||
yield ind |
@ -0,0 +1,9 @@
|
||||
from models import StairModel, ModelType, ExtremumType |
||||
|
||||
class JumpModel(StairModel): |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.JUMP |
||||
|
||||
def get_extremum_type(self) -> ExtremumType: |
||||
return ExtremumType.MAX |
@ -0,0 +1,230 @@
|
||||
from analytic_types import AnalyticUnitId, ModelCache, TimeSeries |
||||
from analytic_types.segment import Segment |
||||
from analytic_types.learning_info import LearningInfo |
||||
|
||||
import utils |
||||
import utils.meta |
||||
|
||||
from abc import ABC, abstractmethod |
||||
from attrdict import AttrDict |
||||
from typing import Optional, List, Tuple |
||||
import pandas as pd |
||||
import math |
||||
import logging |
||||
from enum import Enum |
||||
|
||||
class ModelType(Enum): |
||||
JUMP = 'jump' |
||||
DROP = 'drop' |
||||
PEAK = 'peak' |
||||
TROUGH = 'trough' |
||||
GENERAL = 'general' |
||||
|
||||
class ExtremumType(Enum): |
||||
MAX = 'max' |
||||
MIN = 'min' |
||||
|
||||
class AnalyticSegment(Segment): |
||||
''' |
||||
Segment with specific analytics fields used by models: |
||||
- `labeled` / `deleted` flags |
||||
- `from` / `to` / `center` indices |
||||
- `length` |
||||
- `data` |
||||
- etc |
||||
''' |
||||
|
||||
def __init__( |
||||
self, |
||||
from_timestamp: int, |
||||
to_timestamp: int, |
||||
_id: str, |
||||
analytic_unit_id: str, |
||||
labeled: bool, |
||||
deleted: bool, |
||||
message: str, |
||||
dataframe: pd.DataFrame, |
||||
center_finder = None |
||||
): |
||||
super().__init__( |
||||
from_timestamp, |
||||
to_timestamp, |
||||
_id, |
||||
analytic_unit_id, |
||||
labeled, |
||||
deleted, |
||||
message |
||||
) |
||||
|
||||
self.from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(self.from_timestamp, unit='ms')) |
||||
self.to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(self.to_timestamp, unit='ms')) |
||||
self.length = abs(self.to_index - self.from_index) |
||||
self.__percent_of_nans = 0 |
||||
|
||||
if callable(center_finder): |
||||
self.center_index = center_finder(dataframe, self.from_index, self.to_index) |
||||
self.pattern_timestamp = dataframe['timestamp'][self.center_index] |
||||
else: |
||||
self.center_index = self.from_index + math.ceil(self.length / 2) |
||||
self.pattern_timestamp = dataframe['timestamp'][self.center_index] |
||||
|
||||
assert len(dataframe['value']) >= self.to_index + 1, \ |
||||
'segment {}-{} out of dataframe length={}'.format(self.from_index, self.to_index + 1, len(dataframe['value'])) |
||||
|
||||
self.data = dataframe['value'][self.from_index: self.to_index + 1] |
||||
|
||||
@property |
||||
def percent_of_nans(self): |
||||
if not self.__percent_of_nans: |
||||
self.__percent_of_nans = self.data.isnull().sum() / len(self.data) |
||||
return self.__percent_of_nans |
||||
|
||||
def convert_nan_to_zero(self): |
||||
nan_list = utils.find_nan_indexes(self.data) |
||||
self.data = utils.nan_to_zero(self.data, nan_list) |
||||
|
||||
|
||||
@utils.meta.JSONClass |
||||
class ModelState(): |
||||
|
||||
def __init__( |
||||
self, |
||||
time_step: int = 0, |
||||
pattern_center: List[int] = None, |
||||
pattern_model: List[float] = None, |
||||
convolve_max: float = 0, |
||||
convolve_min: float = 0, |
||||
window_size: int = 0, |
||||
conv_del_min: float = 0, |
||||
conv_del_max: float = 0 |
||||
): |
||||
self.time_step = time_step |
||||
self.pattern_center = pattern_center if pattern_center is not None else [] |
||||
self.pattern_model = pattern_model if pattern_model is not None else [] |
||||
self.convolve_max = convolve_max |
||||
self.convolve_min = convolve_min |
||||
self.window_size = window_size |
||||
self.conv_del_min = conv_del_min |
||||
self.conv_del_max = conv_del_max |
||||
|
||||
|
||||
class Model(ABC): |
||||
|
||||
HEIGHT_ERROR = 0.1 |
||||
CONV_ERROR = 0.2 |
||||
DEL_CONV_ERROR = 0.02 |
||||
|
||||
@abstractmethod |
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def get_model_type(self) -> ModelType: |
||||
pass |
||||
|
||||
@abstractmethod |
||||
def get_state(self, cache: Optional[ModelCache] = None) -> ModelState: |
||||
pass |
||||
|
||||
def fit(self, dataframe: pd.DataFrame, segments: List[Segment], id: AnalyticUnitId) -> ModelState: |
||||
logging.debug('Start method fit for analytic unit {}'.format(id)) |
||||
data = dataframe['value'] |
||||
max_length = 0 |
||||
labeled = [] |
||||
deleted = [] |
||||
for segment_map in segments: |
||||
if segment_map.labeled or segment_map.deleted: |
||||
segment = AnalyticSegment( |
||||
segment_map.from_timestamp, |
||||
segment_map.to_timestamp, |
||||
segment_map._id, |
||||
segment_map.analytic_unit_id, |
||||
segment_map.labeled, |
||||
segment_map.deleted, |
||||
segment_map.message, |
||||
dataframe, |
||||
self.find_segment_center |
||||
) |
||||
if segment.percent_of_nans > 0.1 or len(segment.data) == 0: |
||||
logging.debug(f'segment {segment.from_index}-{segment.to_index} skip because of invalid data') |
||||
continue |
||||
if segment.percent_of_nans > 0: |
||||
segment.convert_nan_to_zero() |
||||
max_length = max(segment.length, max_length) |
||||
if segment.labeled: labeled.append(segment) |
||||
if segment.deleted: deleted.append(segment) |
||||
|
||||
assert len(labeled) > 0, f'labeled list empty, skip fitting for {id}' |
||||
|
||||
if self.state.window_size == 0: |
||||
self.state.window_size = math.ceil(max_length / 2) if max_length else 0 |
||||
learning_info = self.get_parameters_from_segments(dataframe, labeled, deleted, self.get_model_type()) |
||||
self.do_fit(dataframe, labeled, deleted, learning_info) |
||||
logging.debug('fit complete successful with self.state: {} for analytic unit: {}'.format(self.state, id)) |
||||
return self.state |
||||
|
||||
def detect(self, dataframe: pd.DataFrame, id: AnalyticUnitId) -> dict: |
||||
logging.debug('Start method detect for analytic unit {}'.format(id)) |
||||
result = self.do_detect(dataframe) |
||||
segments = [( |
||||
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]), |
||||
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]), |
||||
) for x in result] |
||||
if not self.state: |
||||
logging.warning('Return empty self.state after detect') |
||||
logging.debug('Method detect complete successful for analytic unit {}'.format(id)) |
||||
return { |
||||
'segments': segments, |
||||
'cache': self.state, |
||||
} |
||||
|
||||
def _update_fitting_result(self, state: ModelState, confidences: list, convolve_list: list, del_conv_list: list, height_list: Optional[list] = None) -> None: |
||||
state.confidence = float(min(confidences, default = 1.5)) |
||||
state.convolve_min, state.convolve_max = utils.get_min_max(convolve_list, state.window_size) |
||||
state.conv_del_min, state.conv_del_max = utils.get_min_max(del_conv_list, 0) |
||||
if height_list is not None: |
||||
state.height_min, state.height_max = utils.get_min_max(height_list, 0) |
||||
|
||||
def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: ModelType) -> dict: |
||||
logging.debug('Start parsing segments') |
||||
learning_info = LearningInfo() |
||||
data = dataframe['value'] |
||||
for segment in labeled: |
||||
confidence = utils.find_confidence(segment.data)[0] |
||||
learning_info.confidence.append(confidence) |
||||
segment_center = segment.center_index |
||||
learning_info.segment_center_list.append(segment_center) |
||||
learning_info.pattern_timestamp.append(segment.pattern_timestamp) |
||||
aligned_segment = utils.get_interval(data, segment_center, self.state.window_size) |
||||
aligned_segment = utils.subtract_min_without_nan(aligned_segment) |
||||
if len(aligned_segment) == 0: |
||||
logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format( |
||||
segment_center, self.state.window_size, len(data))) |
||||
continue |
||||
learning_info.patterns_list.append(aligned_segment) |
||||
# TODO: use Triangle/Stair types |
||||
if model == ModelType.PEAK or model == ModelType.TROUGH: |
||||
learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) |
||||
learning_info.patterns_value.append(aligned_segment.values.max()) |
||||
if model == ModelType.JUMP or model == ModelType.DROP: |
||||
pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model.value) |
||||
learning_info.pattern_height.append(pattern_height) |
||||
learning_info.pattern_width.append(pattern_length) |
||||
learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) |
||||
logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info)) |
||||
return learning_info |
||||
|
@ -0,0 +1,44 @@
|
||||
from analytic_types import TimeSeries |
||||
from models import TriangleModel, ModelType |
||||
import utils |
||||
|
||||
import scipy.signal |
||||
from scipy.signal import argrelextrema |
||||
from typing import Optional, List, Tuple |
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
class PeakModel(TriangleModel): |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.PEAK |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
return segment.idxmax() |
||||
|
||||
def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: |
||||
pattern_list = [] |
||||
for val in close_patterns: |
||||
max_val = data[val[0]] |
||||
ind = val[0] |
||||
for i in val: |
||||
if data[i] > max_val: |
||||
max_val = data[i] |
||||
ind = i |
||||
pattern_list.append(ind) |
||||
return pattern_list |
||||
|
||||
def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: |
||||
return argrelextrema(data.values, np.greater)[0] |
||||
|
||||
def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: |
||||
return utils.exponential_smoothing(data + self.state.confidence, alpha) |
||||
|
||||
def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, peak_indexes: List[int]) -> List[int]: |
||||
segments = [] |
||||
for idx in peak_indexes: |
||||
if data[idx] > smoothed_data[idx]: |
||||
segments.append(idx) |
||||
return segments |
@ -0,0 +1,147 @@
|
||||
from models import Model, ModelState, AnalyticSegment, ModelType |
||||
|
||||
from analytic_types import TimeSeries |
||||
from analytic_types.learning_info import LearningInfo |
||||
|
||||
from scipy.fftpack import fft |
||||
from typing import Optional, List |
||||
from enum import Enum |
||||
import scipy.signal |
||||
import utils |
||||
import utils.meta |
||||
import pandas as pd |
||||
import numpy as np |
||||
import operator |
||||
|
||||
POSITIVE_SEGMENT_MEASUREMENT_ERROR = 0.2 |
||||
NEGATIVE_SEGMENT_MEASUREMENT_ERROR = 0.02 |
||||
|
||||
@utils.meta.JSONClass |
||||
class StairModelState(ModelState): |
||||
|
||||
def __init__( |
||||
self, |
||||
confidence: float = 0, |
||||
stair_height: float = 0, |
||||
stair_length: float = 0, |
||||
**kwargs |
||||
): |
||||
super().__init__(**kwargs) |
||||
self.confidence = confidence |
||||
self.stair_height = stair_height |
||||
self.stair_length = stair_length |
||||
|
||||
|
||||
class StairModel(Model): |
||||
|
||||
def get_state(self, cache: Optional[dict] = None) -> StairModelState: |
||||
return StairModelState.from_json(cache) |
||||
|
||||
def get_stair_indexes(self, data: pd.Series, height: float, length: int) -> List[int]: |
||||
"""Get list of start stair segment indexes. |
||||
|
||||
Keyword arguments: |
||||
data -- data, that contains stair (jump or drop) segments |
||||
length -- maximum count of values in the stair |
||||
height -- the difference between stair max_line and min_line(see utils.find_parameters) |
||||
""" |
||||
indexes = [] |
||||
for i in range(len(data) - length - 1): |
||||
is_stair = self.is_stair_in_segment(data.values[i:i + length + 1], height) |
||||
if is_stair == True: |
||||
indexes.append(i) |
||||
return indexes |
||||
|
||||
def is_stair_in_segment(self, segment: np.ndarray, height: float) -> bool: |
||||
if len(segment) < 2: |
||||
return False |
||||
comparison_operator = operator.ge |
||||
if self.get_model_type() == ModelType.DROP: |
||||
comparison_operator = operator.le |
||||
height = -height |
||||
return comparison_operator(max(segment[1:]), segment[0] + height) |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
segment_center_index = utils.find_pattern_center(segment, start, self.get_model_type().value) |
||||
return segment_center_index |
||||
|
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
window_size = self.state.window_size |
||||
last_pattern_center = self.state.pattern_center |
||||
self.state.pattern_center = utils.remove_duplicates_and_sort(last_pattern_center + learning_info.segment_center_list) |
||||
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) |
||||
height_list = learning_info.patterns_value |
||||
|
||||
del_conv_list = [] |
||||
delete_pattern_timestamp = [] |
||||
for segment in deleted_segments: |
||||
segment_cent_index = segment.center_index |
||||
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||
deleted_stair = utils.get_interval(data, segment_cent_index, window_size) |
||||
deleted_stair = utils.subtract_min_without_nan(deleted_stair) |
||||
del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) |
||||
if len(del_conv_stair) > 0: |
||||
del_conv_list.append(max(del_conv_stair)) |
||||
|
||||
self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) |
||||
self.state.stair_height = int(min(learning_info.pattern_height, default = 1)) |
||||
self.state.stair_length = int(max(learning_info.pattern_width, default = 1)) |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
possible_stairs = self.get_stair_indexes(data, self.state.stair_height, self.state.stair_length + 1) |
||||
result = self.__filter_detection(possible_stairs, data) |
||||
return [(val - 1, val + 1) for val in result] |
||||
|
||||
def __filter_detection(self, segments_indexes: List[int], data: list): |
||||
delete_list = [] |
||||
variance_error = self.state.window_size |
||||
close_segments = utils.close_filtering(segments_indexes, variance_error) |
||||
segments_indexes = utils.best_pattern(close_segments, data, self.get_extremum_type().value) |
||||
if len(segments_indexes) == 0 or len(self.state.pattern_center) == 0: |
||||
return [] |
||||
pattern_data = self.state.pattern_model |
||||
for segment_index in segments_indexes: |
||||
if segment_index <= self.state.window_size or segment_index >= (len(data) - self.state.window_size): |
||||
delete_list.append(segment_index) |
||||
continue |
||||
convol_data = utils.get_interval(data, segment_index, self.state.window_size) |
||||
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||
if len(convol_data) == 0 or percent_of_nans > 0.5: |
||||
delete_list.append(segment_index) |
||||
continue |
||||
elif 0 < percent_of_nans <= 0.5: |
||||
nan_list = utils.find_nan_indexes(convol_data) |
||||
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||
if len(conv) == 0: |
||||
delete_list.append(segment_index) |
||||
continue |
||||
upper_bound = self.state.convolve_max * (1 + POSITIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
lower_bound = self.state.convolve_min * (1 - POSITIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
delete_up_bound = self.state.conv_del_max * (1 + NEGATIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
delete_low_bound = self.state.conv_del_min * (1 - NEGATIVE_SEGMENT_MEASUREMENT_ERROR) |
||||
max_conv = max(conv) |
||||
if max_conv > upper_bound or max_conv < lower_bound: |
||||
delete_list.append(segment_index) |
||||
elif max_conv < delete_up_bound and max_conv > delete_low_bound: |
||||
delete_list.append(segment_index) |
||||
|
||||
for item in delete_list: |
||||
segments_indexes.remove(item) |
||||
segments_indexes = utils.remove_duplicates_and_sort(segments_indexes) |
||||
return segments_indexes |
@ -0,0 +1,119 @@
|
||||
from analytic_types import AnalyticUnitId, TimeSeries |
||||
from analytic_types.learning_info import LearningInfo |
||||
from models import Model, ModelState, AnalyticSegment |
||||
import utils |
||||
import utils.meta |
||||
|
||||
import scipy.signal |
||||
from scipy.fftpack import fft |
||||
from typing import Optional, List, Tuple |
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
|
||||
EXP_SMOOTHING_FACTOR = 0.01 |
||||
|
||||
|
||||
@utils.meta.JSONClass |
||||
class TriangleModelState(ModelState): |
||||
|
||||
def __init__( |
||||
self, |
||||
confidence: float = 0, |
||||
height_max: float = 0, |
||||
height_min: float = 0, |
||||
**kwargs |
||||
): |
||||
super().__init__(**kwargs) |
||||
self.confidence = confidence |
||||
self.height_max = height_max |
||||
self.height_min = height_min |
||||
|
||||
class TriangleModel(Model): |
||||
|
||||
def get_state(self, cache: Optional[dict] = None) -> TriangleModelState: |
||||
return TriangleModelState.from_json(cache) |
||||
|
||||
def do_fit( |
||||
self, |
||||
dataframe: pd.DataFrame, |
||||
labeled_segments: List[AnalyticSegment], |
||||
deleted_segments: List[AnalyticSegment], |
||||
learning_info: LearningInfo |
||||
) -> None: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
self.state.pattern_center = utils.remove_duplicates_and_sort(self.state.pattern_center + learning_info.segment_center_list) |
||||
self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) |
||||
convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||
correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) |
||||
height_list = learning_info.patterns_value |
||||
|
||||
del_conv_list = [] |
||||
delete_pattern_width = [] |
||||
delete_pattern_height = [] |
||||
delete_pattern_timestamp = [] |
||||
for segment in deleted_segments: |
||||
delete_pattern_timestamp.append(segment.pattern_timestamp) |
||||
deleted = utils.get_interval(data, segment.center_index, self.state.window_size) |
||||
deleted = utils.subtract_min_without_nan(deleted) |
||||
del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) |
||||
if len(del_conv): |
||||
del_conv_list.append(max(del_conv)) |
||||
delete_pattern_height.append(utils.find_confidence(deleted)[1]) |
||||
|
||||
self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list) |
||||
|
||||
def do_detect(self, dataframe: pd.DataFrame) -> TimeSeries: |
||||
data = utils.cut_dataframe(dataframe) |
||||
data = data['value'] |
||||
|
||||
all_extremum_indexes = self.get_extremum_indexes(data) |
||||
smoothed_data = self.get_smoothed_data(data, self.state.confidence, EXP_SMOOTHING_FACTOR) |
||||
segments = self.get_possible_segments(data, smoothed_data, all_extremum_indexes) |
||||
result = self.__filter_detection(segments, data) |
||||
result = utils.get_borders_of_peaks(result, data, self.state.window_size, self.state.confidence) |
||||
return result |
||||
|
||||
def __filter_detection(self, segments: List[int], data: pd.Series) -> list: |
||||
delete_list = [] |
||||
variance_error = self.state.window_size |
||||
close_patterns = utils.close_filtering(segments, variance_error) |
||||
segments = self.get_best_pattern(close_patterns, data) |
||||
|
||||
if len(segments) == 0 or len(self.state.pattern_model) == 0: |
||||
return [] |
||||
pattern_data = self.state.pattern_model |
||||
up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) |
||||
low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) |
||||
up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) |
||||
low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) |
||||
up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) |
||||
low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) |
||||
for segment in segments: |
||||
if segment > self.state.window_size: |
||||
convol_data = utils.get_interval(data, segment, self.state.window_size) |
||||
convol_data = utils.subtract_min_without_nan(convol_data) |
||||
percent_of_nans = convol_data.isnull().sum() / len(convol_data) |
||||
if percent_of_nans > 0.5: |
||||
delete_list.append(segment) |
||||
continue |
||||
elif 0 < percent_of_nans <= 0.5: |
||||
nan_list = utils.find_nan_indexes(convol_data) |
||||
convol_data = utils.nan_to_zero(convol_data, nan_list) |
||||
pattern_data = utils.nan_to_zero(pattern_data, nan_list) |
||||
conv = scipy.signal.fftconvolve(convol_data, pattern_data) |
||||
pattern_height = convol_data.values.max() |
||||
if pattern_height > up_height or pattern_height < low_height: |
||||
delete_list.append(segment) |
||||
continue |
||||
if max(conv) > up_conv or max(conv) < low_conv: |
||||
delete_list.append(segment) |
||||
continue |
||||
if max(conv) < up_del_conv and max(conv) > low_del_conv: |
||||
delete_list.append(segment) |
||||
else: |
||||
delete_list.append(segment) |
||||
for item in delete_list: |
||||
segments.remove(item) |
||||
return set(segments) |
@ -0,0 +1,44 @@
|
||||
from analytic_types import TimeSeries |
||||
from models import TriangleModel, ModelType |
||||
import utils |
||||
|
||||
import scipy.signal |
||||
from scipy.signal import argrelextrema |
||||
from typing import Optional, List, Tuple |
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
class TroughModel(TriangleModel): |
||||
|
||||
def get_model_type(self) -> ModelType: |
||||
return ModelType.TROUGH |
||||
|
||||
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int: |
||||
data = dataframe['value'] |
||||
segment = data[start: end] |
||||
return segment.idxmin() |
||||
|
||||
def get_best_pattern(self, close_patterns: TimeSeries, data: pd.Series) -> List[int]: |
||||
pattern_list = [] |
||||
for val in close_patterns: |
||||
min_val = data[val[0]] |
||||
ind = val[0] |
||||
for i in val: |
||||
if data[i] < min_val: |
||||
min_val = data[i] |
||||
ind = i |
||||
pattern_list.append(ind) |
||||
return pattern_list |
||||
|
||||
def get_extremum_indexes(self, data: pd.Series) -> np.ndarray: |
||||
return argrelextrema(data.values, np.less)[0] |
||||
|
||||
def get_smoothed_data(self, data: pd.Series, confidence: float, alpha: float) -> pd.Series: |
||||
return utils.exponential_smoothing(data - self.state.confidence, alpha) |
||||
|
||||
def get_possible_segments(self, data: pd.Series, smoothed_data: pd.Series, trough_indexes: List[int]) -> List[int]: |
||||
segments = [] |
||||
for idx in trough_indexes: |
||||
if data[idx] < smoothed_data[idx]: |
||||
segments.append(idx) |
||||
return segments |
@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3 |
||||
|
||||
import sys |
||||
import os |
||||
|
||||
|
||||
import config |
||||
import json |
||||
import logging |
||||
import asyncio |
||||
import traceback |
||||
|
||||
import services |
||||
from analytic_unit_manager import AnalyticUnitManager |
||||
|
||||
|
||||
server_service: services.ServerService = None |
||||
data_service: services.DataService = None |
||||
analytic_unit_manager: AnalyticUnitManager = None |
||||
|
||||
logger = logging.getLogger('SERVER') |
||||
|
||||
|
||||
async def handle_task(task: object): |
||||
try: |
||||
task_type = task['type'] |
||||
logger.info("Got {} task with id {}, analyticUnitId {}".format(task_type, task['_id'], task['analyticUnitId'])) |
||||
|
||||
task_result_payload = { |
||||
'_id': task['_id'], |
||||
'task': task_type, |
||||
'analyticUnitId': task['analyticUnitId'], |
||||
'status': "IN_PROGRESS" |
||||
} |
||||
|
||||
if not task_type == 'PUSH': |
||||
message = services.server_service.ServerMessage('TASK_RESULT', task_result_payload) |
||||
await server_service.send_message_to_server(message) |
||||
|
||||
res = await analytic_unit_manager.handle_analytic_task(task) |
||||
res['_id'] = task['_id'] |
||||
|
||||
if not task_type == 'PUSH': |
||||
message = services.server_service.ServerMessage('TASK_RESULT', res) |
||||
await server_service.send_message_to_server(message) |
||||
|
||||
except Exception as e: |
||||
error_text = traceback.format_exc() |
||||
logger.error("handle_task Exception: '%s'" % error_text) |
||||
|
||||
async def handle_data(task: object): |
||||
res = await analytic_unit_manager.handle_analytic_task(task) |
||||
|
||||
if res['status'] == 'SUCCESS' and res['payload'] is not None: |
||||
res['_id'] = task['_id'] |
||||
message = services.server_service.ServerMessage('PUSH_DETECT', res) |
||||
await server_service.send_message_to_server(message) |
||||
|
||||
async def handle_message(message: services.ServerMessage): |
||||
if message.method == 'TASK': |
||||
await handle_task(message.payload) |
||||
if message.method == 'DATA': |
||||
await handle_data(message.payload) |
||||
|
||||
def init_services(): |
||||
global server_service |
||||
global data_service |
||||
global analytic_unit_manager |
||||
|
||||
logger.info("Starting services...") |
||||
logger.info("Server...") |
||||
server_service = services.ServerService() |
||||
logger.info("Ok") |
||||
logger.info("Data service...") |
||||
data_service = services.DataService(server_service) |
||||
logger.info("Ok") |
||||
logger.info("Analytic unit manager...") |
||||
analytic_unit_manager = AnalyticUnitManager() |
||||
logger.info("Ok") |
||||
|
||||
async def app_loop(): |
||||
async for message in server_service: |
||||
asyncio.ensure_future(handle_message(message)) |
||||
|
||||
|
||||
def run_server(): |
||||
loop = asyncio.get_event_loop() |
||||
#loop.set_debug(True) |
||||
logger.info("Ok") |
||||
init_services() |
||||
print('Analytics process is running') # we need to print to stdout and flush |
||||
sys.stdout.flush() # because node.js expects it |
||||
|
||||
loop.run_until_complete(app_loop()) |
@ -0,0 +1,2 @@
|
||||
from services.server_service import ServerService, ServerMessage |
||||
from services.data_service import DataService |
@ -0,0 +1,85 @@
|
||||
from services.server_service import ServerMessage, ServerService |
||||
|
||||
import json |
||||
import asyncio |
||||
|
||||
""" |
||||
This is how you can save a file: |
||||
|
||||
async def test_file_save(): |
||||
async with data_service.open('filename') as f: |
||||
print('write content') |
||||
await f.write('test string') |
||||
|
||||
async with data_service.open('filename') as f: |
||||
content = await f.load() |
||||
print(content) |
||||
print('test file ok') |
||||
""" |
||||
|
||||
|
||||
LOCK_WAIT_SLEEP_TIMESPAN = 100 # mc |
||||
|
||||
class FileDescriptor: |
||||
def __init__(self, filename: str, data_service): |
||||
self.filename = filename |
||||
self.data_service = data_service |
||||
|
||||
async def write(self, content: str): |
||||
await self.data_service.save_file_content(self, content) |
||||
|
||||
async def load(self) -> str: |
||||
return await self.data_service.load_file_content(self) |
||||
|
||||
async def __aenter__(self): |
||||
await self.data_service.wait_and_lock(self) |
||||
return self |
||||
|
||||
async def __aexit__(self, *exc): |
||||
await self.data_service.unlock(self) |
||||
|
||||
|
||||
class DataService: |
||||
|
||||
def __init__(self, server_service: ServerService): |
||||
"""Creates fs over network via server_service""" |
||||
self.server_service = server_service |
||||
self.locks = set() |
||||
|
||||
def open(self, filename: str) -> FileDescriptor: |
||||
return FileDescriptor(filename, self) |
||||
|
||||
async def wait_and_lock(self, file_descriptor: FileDescriptor): |
||||
filename = file_descriptor.filename |
||||
while True: |
||||
if filename in self.locks: |
||||
asyncio.sleep(LOCK_WAIT_SLEEP_TIMESPAN) |
||||
continue |
||||
else: |
||||
self.locks.add(filename) |
||||
break |
||||
|
||||
async def unlock(self, file_descriptor: FileDescriptor): |
||||
filename = file_descriptor.filename |
||||
self.locks.remove(filename) |
||||
|
||||
async def save_file_content(self, file_descriptor: FileDescriptor, content: str): |
||||
""" Saves json - serializable obj with file_descriptor.filename """ |
||||
self.__check_lock(file_descriptor) |
||||
message_payload = { |
||||
'filename': file_descriptor.filename, |
||||
'content': content |
||||
} |
||||
message = ServerMessage('FILE_SAVE', message_payload) |
||||
await self.server_service.send_request_to_server(message) |
||||
|
||||
async def load_file_content(self, file_descriptor: FileDescriptor) -> str: |
||||
self.__check_lock(file_descriptor) |
||||
message_payload = { 'filename': file_descriptor.filename } |
||||
message = ServerMessage('FILE_LOAD', message_payload) |
||||
return await self.server_service.send_request_to_server(message) |
||||
|
||||
def __check_lock(self, file_descriptor: FileDescriptor): |
||||
filename = file_descriptor.filename |
||||
if filename not in self.locks: |
||||
raise RuntimeError('No lock for file %s' % filename) |
@ -0,0 +1,149 @@
|
||||
import config |
||||
|
||||
import websockets |
||||
|
||||
import logging |
||||
import json |
||||
import asyncio |
||||
import traceback |
||||
|
||||
import utils.concurrent |
||||
import utils.meta |
||||
|
||||
from typing import Optional |
||||
|
||||
logger = logging.getLogger('SERVER_SERVICE') |
||||
|
||||
|
||||
PARSE_MESSAGE_OR_SAVE_LOOP_INTERRUPTED = False |
||||
SERVER_SOCKET_RECV_LOOP_INTERRUPTED = False |
||||
|
||||
|
||||
@utils.meta.JSONClass |
||||
class ServerMessage: |
||||
def __init__(self, method: str, payload: object = None, request_id: int = None): |
||||
# TODO: add error type / case |
||||
self.method = method |
||||
self.payload = payload |
||||
self.request_id = request_id |
||||
|
||||
|
||||
class ServerService(utils.concurrent.AsyncZmqActor): |
||||
|
||||
def __init__(self): |
||||
super(ServerService, self).__init__() |
||||
self.__aiter_inited = False |
||||
# this typing doesn't help vscode, maybe there is a mistake |
||||
self.__server_socket: Optional[websockets.Connect] = None |
||||
self.__request_next_id = 1 |
||||
self.__reconnecting = False |
||||
self.__responses = dict() |
||||
self.start() |
||||
|
||||
async def send_message_to_server(self, message: ServerMessage): |
||||
# Following message will be sent to actor's self._on_message() |
||||
# We do it cuz we created self.__server_socket in self._run() method, |
||||
# which runs in the actor's thread, not the thread we created ServerService |
||||
|
||||
# in theory, we can try to use zmq.proxy: |
||||
# zmq.proxy(self.__actor_socket, self.__server_socket) |
||||
# and do here something like: |
||||
# self.__actor_socket.send_string(json.dumps(message.to_json())) |
||||
await self._put_message_to_thread(json.dumps(message.to_json())) |
||||
|
||||
async def send_request_to_server(self, message: ServerMessage) -> object: |
||||
if message.request_id is not None: |
||||
raise ValueError('Message can`t have request_id before it is scheduled') |
||||
request_id = message.request_id = self.__request_next_id |
||||
self.request_next_id = self.__request_next_id + 1 |
||||
asyncio.ensure_future(self.send_message_to_server(message)) |
||||
# you should await self.__responses[request_id] which should be a task, |
||||
# which you resolve somewhere else |
||||
while request_id not in self.__responses: |
||||
await asyncio.sleep(1) |
||||
response = self.__responses[request_id] |
||||
del self.__responses[request_id] |
||||
return response |
||||
|
||||
def __aiter__(self): |
||||
if self.__aiter_inited: |
||||
raise RuntimeError('Can`t iterate twice') |
||||
__aiter_inited = True |
||||
return self |
||||
|
||||
async def __anext__(self) -> ServerMessage: |
||||
while not PARSE_MESSAGE_OR_SAVE_LOOP_INTERRUPTED: |
||||
thread_message = await self._recv_message_from_thread() |
||||
server_message = self.__parse_message_or_save(thread_message) |
||||
if server_message is None: |
||||
continue |
||||
else: |
||||
return server_message |
||||
|
||||
async def _run_thread(self): |
||||
logger.info("Binding to %s ..." % config.HASTIC_SERVER_URL) |
||||
# TODO: consider to use async context for socket |
||||
await self.__server_socket_recv_loop() |
||||
|
||||
async def _on_message_to_thread(self, message: str): |
||||
if self.__server_socket is None or self.__server_socket.closed: |
||||
await self.__reconnect() |
||||
await self.__server_socket.send(message) |
||||
|
||||
async def __server_socket_recv_loop(self): |
||||
while not SERVER_SOCKET_RECV_LOOP_INTERRUPTED: |
||||
received_string = await self.__reconnect_recv() |
||||
if received_string == 'PING': |
||||
asyncio.ensure_future(self.__handle_ping()) |
||||
else: |
||||
asyncio.ensure_future(self._send_message_from_thread(received_string)) |
||||
|
||||
async def __reconnect(self): |
||||
if not self.__reconnecting: |
||||
self.__reconnecting = True |
||||
else: |
||||
while self.__reconnecting: |
||||
await asyncio.sleep(1) |
||||
return |
||||
|
||||
if not self.__server_socket is None: |
||||
await self.__server_socket.close() |
||||
self.__server_socket = await websockets.connect(config.HASTIC_SERVER_URL) |
||||
first_message = await self.__server_socket.recv() |
||||
if first_message == 'EALREADYEXISTING': |
||||
raise ConnectionError('Can`t connect as a second analytics') |
||||
self.__reconnecting = False |
||||
|
||||
async def __reconnect_recv(self) -> str: |
||||
while not SERVER_SOCKET_RECV_LOOP_INTERRUPTED: |
||||
try: |
||||
if self.__server_socket is None or self.__server_socket.closed: |
||||
await self.__reconnect() |
||||
return await self.__server_socket.recv() |
||||
except (ConnectionRefusedError, websockets.ConnectionClosedError): |
||||
if not self.__server_socket is None: |
||||
await self.__server_socket.close() |
||||
# TODO: this logic increases the number of ThreadPoolExecutor |
||||
self.__server_socket = None |
||||
# TODO: move to config |
||||
reconnect_delay = 3 |
||||
print('connection is refused or lost, trying to reconnect in %s seconds' % reconnect_delay) |
||||
await asyncio.sleep(reconnect_delay) |
||||
raise InterruptedError() |
||||
|
||||
async def __handle_ping(self): |
||||
if self.__server_socket is None or self.__server_socket.closed: |
||||
await self.__reconnect() |
||||
await self.__server_socket.send('PONG') |
||||
|
||||
def __parse_message_or_save(self, text: str) -> Optional[ServerMessage]: |
||||
try: |
||||
message_object = json.loads(text) |
||||
message = ServerMessage.from_json(message_object) |
||||
if message.request_id is not None: |
||||
self.__responses[message_object['requestId']] = message.payload |
||||
return None |
||||
return message |
||||
except Exception: |
||||
error_text = traceback.format_exc() |
||||
logger.error("__handle_message Exception: '%s'" % error_text) |
@ -0,0 +1,4 @@
|
||||
from utils.common import * |
||||
from utils.time import * |
||||
from utils.dataframe import * |
||||
from utils.meta import * |
@ -0,0 +1,443 @@
|
||||
import numpy as np |
||||
import pandas as pd |
||||
import scipy.signal |
||||
from scipy.fftpack import fft |
||||
from scipy.signal import argrelextrema |
||||
from scipy.stats import gaussian_kde |
||||
from scipy.stats.stats import pearsonr |
||||
import math |
||||
from typing import Optional, Union, List, Generator, Tuple |
||||
import utils |
||||
import logging |
||||
from itertools import islice |
||||
from collections import deque |
||||
from analytic_types import TimeSeries |
||||
from analytic_types.segment import Segment |
||||
|
||||
SHIFT_FACTOR = 0.05 |
||||
CONFIDENCE_FACTOR = 0.5 |
||||
SMOOTHING_FACTOR = 5 |
||||
MEASUREMENT_ERROR = 0.05 |
||||
|
||||
|
||||
def exponential_smoothing(series: pd.Series, alpha: float, last_smoothed_value: Optional[float] = None) -> pd.Series: |
||||
if alpha < 0 or alpha > 1: |
||||
raise ValueError('Alpha must be within the boundaries: 0 <= alpha <= 1') |
||||
if len(series) < 2: |
||||
return series |
||||
if last_smoothed_value is None: |
||||
result = [series.values[0]] |
||||
else: |
||||
result = [float(last_smoothed_value)] |
||||
if np.isnan(result): |
||||
result = [0] |
||||
for n in range(1, len(series)): |
||||
if np.isnan(series[n]): |
||||
result.append((1 - alpha) * result[n - 1]) |
||||
series.values[n] = result[n] |
||||
else: |
||||
result.append(alpha * series[n] + (1 - alpha) * result[n - 1]) |
||||
|
||||
assert len(result) == len(series), \ |
||||
f'len of smoothed data {len(result)} != len of original dataset {len(series)}' |
||||
return pd.Series(result, index = series.index) |
||||
|
||||
def find_pattern(data: pd.Series, height: float, length: int, pattern_type: str) -> list: |
||||
pattern_list = [] |
||||
right_bound = len(data) - length - 1 |
||||
for i in range(right_bound): |
||||
for x in range(1, length): |
||||
if pattern_type == 'jump': |
||||
if(data[i + x] > data[i] + height): |
||||
pattern_list.append(i) |
||||
elif pattern_type == 'drop': |
||||
if(data[i + x] < data[i] - height): |
||||
pattern_list.append(i) |
||||
return pattern_list |
||||
|
||||
def timestamp_to_index(dataframe: pd.DataFrame, timestamp: int): |
||||
data = dataframe['timestamp'] |
||||
idx, = np.where(data >= timestamp) |
||||
if len(idx) > 0: |
||||
time_ind = int(idx[0]) |
||||
else: |
||||
raise ValueError('Dataframe doesn`t contain timestamp: {}'.format(timestamp)) |
||||
return time_ind |
||||
|
||||
def find_peaks(data: Generator[float, None, None], size: int) -> Generator[float, None, None]: |
||||
window = deque(islice(data, size * 2 + 1)) |
||||
for i, v in enumerate(data, size): |
||||
current = window[size] |
||||
#TODO: remove max() from loop |
||||
if current == max(window) and current != window[size + 1]: |
||||
yield i, current |
||||
window.append(v) |
||||
window.popleft() |
||||
|
||||
def ar_mean(numbers: List[float]): |
||||
return float(sum(numbers)) / max(len(numbers), 1) |
||||
|
||||
def get_av_model(patterns_list: list): |
||||
if not patterns_list: return [] |
||||
patterns_list = get_same_length(patterns_list) |
||||
value_list = list(map(list, zip(*patterns_list))) |
||||
return list(map(ar_mean, value_list)) |
||||
|
||||
def get_same_length(patterns_list: list): |
||||
for index in range(len(patterns_list)): |
||||
if type(patterns_list[index]) == pd.Series: |
||||
patterns_list[index] = patterns_list[index].tolist() |
||||
patterns_list = list(filter(None, patterns_list)) |
||||
max_length = max(map(len, patterns_list)) |
||||
for pat in patterns_list: |
||||
if len(pat) < max_length: |
||||
length_difference = max_length - len(pat) |
||||
added_values = list(0 for _ in range(length_difference)) |
||||
pat.extend(added_values) |
||||
return patterns_list |
||||
|
||||
def close_filtering(pattern_list: List[int], win_size: int) -> TimeSeries: |
||||
if len(pattern_list) == 0: |
||||
return [] |
||||
s = [[pattern_list[0]]] |
||||
k = 0 |
||||
for i in range(1, len(pattern_list)): |
||||
if pattern_list[i] - win_size <= s[k][-1]: |
||||
s[k].append(pattern_list[i]) |
||||
else: |
||||
k += 1 |
||||
s.append([pattern_list[i]]) |
||||
return s |
||||
|
||||
def merge_intersecting_segments(segments: List[Segment], time_step: int) -> List[Segment]: |
||||
''' |
||||
Find intersecting segments in segments list and merge it. |
||||
''' |
||||
if len(segments) < 2: |
||||
return segments |
||||
segments = sorted(segments, key = lambda segment: segment.from_timestamp) |
||||
previous_segment = segments[0] |
||||
for i in range(1, len(segments)): |
||||
if segments[i].from_timestamp <= previous_segment.to_timestamp + time_step: |
||||
segments[i].message = segments[-1].message |
||||
segments[i].from_timestamp = min(previous_segment.from_timestamp, segments[i].from_timestamp) |
||||
segments[i].to_timestamp = max(previous_segment.to_timestamp, segments[i].to_timestamp) |
||||
segments[i - 1] = None |
||||
previous_segment = segments[i] |
||||
segments = [x for x in segments if x is not None] |
||||
return segments |
||||
|
||||
def find_interval(dataframe: pd.DataFrame) -> int: |
||||
if len(dataframe) < 2: |
||||
raise ValueError('Can`t find interval: length of data must be at least 2') |
||||
delta = utils.convert_pd_timestamp_to_ms(dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(dataframe.timestamp[0]) |
||||
return delta |
||||
|
||||
def get_start_and_end_of_segments(segments: List[List[int]]) -> TimeSeries: |
||||
''' |
||||
find start and end of segment: [1, 2, 3, 4] -> [1, 4] |
||||
if segment is 1 index - it will be doubled: [7] -> [7, 7] |
||||
''' |
||||
result = [] |
||||
for segment in segments: |
||||
if len(segment) == 0: |
||||
continue |
||||
elif len(segment) > 1: |
||||
segment = [segment[0], segment[-1]] |
||||
else: |
||||
segment = [segment[0], segment[0]] |
||||
result.append(segment) |
||||
return result |
||||
|
||||
def best_pattern(pattern_list: list, data: pd.Series, dir: str) -> list: |
||||
new_pattern_list = [] |
||||
for val in pattern_list: |
||||
max_val = data[val[0]] |
||||
min_val = data[val[0]] |
||||
ind = val[0] |
||||
for i in val: |
||||
if dir == 'max': |
||||
if data[i] > max_val: |
||||
max_val = data[i] |
||||
ind = i |
||||
else: |
||||
if data[i] < min_val: |
||||
min_val = data[i] |
||||
ind = i |
||||
new_pattern_list.append(ind) |
||||
return new_pattern_list |
||||
|
||||
def find_nan_indexes(segment: pd.Series) -> list: |
||||
nan_list = pd.isnull(segment) |
||||
nan_list = np.array(nan_list) |
||||
nan_indexes = np.where(nan_list == True)[0] |
||||
return list(nan_indexes) |
||||
|
||||
def check_nan_values(segment: Union[pd.Series, list]) -> Union[pd.Series, list]: |
||||
nan_list = utils.find_nan_indexes(segment) |
||||
if len(nan_list) > 0: |
||||
segment = utils.nan_to_zero(segment, nan_list) |
||||
return segment |
||||
|
||||
def nan_to_zero(segment: Union[pd.Series, list], nan_list: list) -> Union[pd.Series, list]: |
||||
if type(segment) == pd.Series: |
||||
for val in nan_list: |
||||
segment.values[val] = 0 |
||||
else: |
||||
for val in nan_list: |
||||
segment[val] = 0 |
||||
return segment |
||||
|
||||
def find_confidence(segment: pd.Series) -> (float, float): |
||||
segment = utils.check_nan_values(segment) |
||||
segment_min = min(segment) |
||||
segment_max = max(segment) |
||||
height = segment_max - segment_min |
||||
if height: |
||||
return (CONFIDENCE_FACTOR * height, height) |
||||
else: |
||||
return (0, 0) |
||||
|
||||
def find_width(pattern: pd.Series, selector: bool) -> int: |
||||
pattern = pattern.values |
||||
center = utils.find_extremum_index(pattern, selector) |
||||
pattern_left = pattern[:center] |
||||
pattern_right = pattern[center:] |
||||
left_extremum_index = utils.find_last_extremum(pattern_left, selector) |
||||
right_extremum_index = utils.find_extremum_index(pattern_right, not selector) |
||||
left_width = center - left_extremum_index |
||||
right_width = right_extremum_index + 1 |
||||
return right_width + left_width |
||||
|
||||
def find_last_extremum(segment: np.ndarray, selector: bool) -> int: |
||||
segment = segment[::-1] |
||||
first_extremum_ind = find_extremum_index(segment, not selector) |
||||
last_extremum_ind = len(segment) - first_extremum_ind - 1 |
||||
return last_extremum_ind |
||||
|
||||
def find_extremum_index(segment: np.ndarray, selector: bool) -> int: |
||||
if selector: |
||||
return segment.argmax() |
||||
else: |
||||
return segment.argmin() |
||||
|
||||
def get_interval(data: pd.Series, center: int, window_size: int, normalization = False) -> pd.Series: |
||||
""" |
||||
Get an interval with 2*window_size length |
||||
window_size to the left, window_size to the right of center |
||||
If normalization == True - subtract minimum from the interval |
||||
""" |
||||
if center >= len(data): |
||||
logging.warning('Pattern center {} is out of data with len {}'.format(center, len(data))) |
||||
return [] |
||||
left_bound = center - window_size |
||||
right_bound = center + window_size + 1 |
||||
if left_bound < 0: |
||||
left_bound = 0 |
||||
if right_bound > len(data): |
||||
right_bound = len(data) |
||||
result_interval = data[left_bound: right_bound] |
||||
if normalization: |
||||
result_interval = subtract_min_without_nan(result_interval) |
||||
return result_interval |
||||
|
||||
def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> TimeSeries: |
||||
""" |
||||
Find start and end of patterns for peak |
||||
max_border_factor - final border of pattern |
||||
if reverse == True - segments will be inversed (trough -> peak / peak -> trough) |
||||
""" |
||||
if len(pattern_centers) == 0: |
||||
return [] |
||||
border_list = [] |
||||
window_size = math.ceil(max_border_factor * window_size) |
||||
for center in pattern_centers: |
||||
current_pattern = get_interval(data, center, window_size, True) |
||||
if inverse: |
||||
current_pattern = inverse_segment(current_pattern) |
||||
current_pattern = current_pattern - confidence |
||||
left_segment = current_pattern[:window_size] # a.iloc[a.index < center] |
||||
right_segment = current_pattern[window_size:] # a.iloc[a.index >= center] |
||||
left_border = get_end_of_segment(left_segment, descending = False) |
||||
right_border = get_end_of_segment(right_segment) |
||||
border_list.append((left_border, right_border)) |
||||
return border_list |
||||
|
||||
def get_end_of_segment(segment: pd.Series, skip_positive_values = True, descending = True) -> int: |
||||
""" |
||||
Find end of descending or ascending part of pattern |
||||
Allowable error is 1 index |
||||
""" |
||||
if not descending: |
||||
segment = segment.iloc[::-1] |
||||
if len(segment) == 0: |
||||
return 1 |
||||
for idx in range(1, len(segment) - 1): |
||||
if skip_positive_values and segment.values[idx] > 0: |
||||
continue |
||||
if segment.values[idx] >= segment.values[idx - 1]: |
||||
return segment.index[idx - 1] |
||||
return segment.index[-1] |
||||
|
||||
def inverse_segment(segment: pd.Series) -> pd.Series: |
||||
""" |
||||
Сonvert trough to peak and virce versa |
||||
""" |
||||
if len(segment) > 0: |
||||
rev_val = max(segment.values) |
||||
for idx in range(len(segment)): |
||||
segment.values[idx] = math.fabs(segment.values[idx] - rev_val) |
||||
return segment |
||||
|
||||
def subtract_min_without_nan(segment: pd.Series) -> pd.Series: |
||||
if len(segment) == 0: |
||||
return [] |
||||
nan_list = utils.find_nan_indexes(segment) |
||||
if len(nan_list) > 0: |
||||
return segment |
||||
else: |
||||
segment = segment - min(segment) |
||||
return segment |
||||
|
||||
def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: |
||||
labeled_segment = [] |
||||
convolve_list = [] |
||||
for segment in segments: |
||||
labeled_segment = utils.get_interval(data, segment, window_size) |
||||
labeled_segment = utils.subtract_min_without_nan(labeled_segment) |
||||
labeled_segment = utils.check_nan_values(labeled_segment) |
||||
auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment) |
||||
convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model) |
||||
if len(auto_convolve) > 0: |
||||
convolve_list.append(max(auto_convolve)) |
||||
if len(convolve_segment) > 0: |
||||
convolve_list.append(max(convolve_segment)) |
||||
return convolve_list |
||||
|
||||
def get_correlation_gen(data: pd.Series, window_size: int, pattern_model: List[float]) -> Generator[float, None, None]: |
||||
#Get a new dataset by correlating between a sliding window in data and pattern_model |
||||
for i in range(window_size, len(data) - window_size): |
||||
watch_data = data[i - window_size: i + window_size + 1] |
||||
correlation = pearsonr(watch_data, pattern_model) |
||||
if len(correlation) > 0: |
||||
yield(correlation[0]) |
||||
|
||||
def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: |
||||
labeled_segment = [] |
||||
correlation_list = [] |
||||
p_value_list = [] |
||||
for segment in segments: |
||||
labeled_segment = utils.get_interval(data, segment, window_size) |
||||
labeled_segment = utils.subtract_min_without_nan(labeled_segment) |
||||
labeled_segment = utils.check_nan_values(labeled_segment) |
||||
if len(labeled_segment) == 0 or len(labeled_segment) != len(av_model): |
||||
continue |
||||
correlation = pearsonr(labeled_segment, av_model) |
||||
if len(correlation) > 1: |
||||
correlation_list.append(correlation[0]) |
||||
p_value_list.append(correlation[1]) |
||||
return correlation_list |
||||
|
||||
def get_distribution_density(segment: pd.Series) -> float: |
||||
segment.dropna(inplace = True) |
||||
if len(segment) < 2 or len(segment.nonzero()[0]) == 0: |
||||
return (0, 0, 0) |
||||
min_jump = min(segment) |
||||
max_jump = max(segment) |
||||
pdf = gaussian_kde(segment) |
||||
x = np.linspace(segment.min() - 1, segment.max() + 1, len(segment)) |
||||
y = pdf(x) |
||||
ax_list = list(zip(x, y)) |
||||
ax_list = np.array(ax_list, np.float32) |
||||
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0] |
||||
peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0] |
||||
try: |
||||
min_peak_index = peaks_kde[0] |
||||
segment_min_line = ax_list[min_peak_index, 0] |
||||
max_peak_index = peaks_kde[1] |
||||
segment_max_line = ax_list[max_peak_index, 0] |
||||
segment_median = ax_list[antipeaks_kde[0], 0] |
||||
except IndexError: |
||||
segment_max_line = max_jump * (1 - SHIFT_FACTOR) |
||||
segment_min_line = min_jump * (1 - SHIFT_FACTOR) |
||||
segment_median = (max_jump - min_jump) / 2 + min_jump |
||||
return segment_median, segment_max_line, segment_min_line |
||||
|
||||
def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]: |
||||
segment = segment_data |
||||
if len(segment_data) > SMOOTHING_FACTOR * 3: |
||||
flat_segment = segment_data.rolling(window = SMOOTHING_FACTOR).mean() |
||||
segment = flat_segment.dropna() |
||||
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment) |
||||
height = 0.95 * (segment_max_line - segment_min_line) |
||||
length = utils.get_pattern_length(segment_data, segment_min_line, segment_max_line, pat_type) |
||||
return height, length |
||||
|
||||
def find_pattern_center(segment_data: pd.Series, segment_from_index: int, pattern_type: str): |
||||
segment_median = utils.get_distribution_density(segment_data)[0] |
||||
cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pattern_type) |
||||
if len(cen_ind) > 0: |
||||
pat_center = cen_ind[0] |
||||
segment_cent_index = pat_center + segment_from_index |
||||
else: |
||||
segment_cent_index = math.ceil((len(segment_data)) / 2) |
||||
return segment_cent_index |
||||
|
||||
def get_pattern_length(segment_data: pd.Series, segment_min_line: float, segment_max_line: float, pat_type: str) -> int: |
||||
# TODO: move function to jump & drop merged model |
||||
segment_max = max(segment_data) |
||||
segment_min = min(segment_data) |
||||
# TODO: use better way |
||||
if segment_min_line <= segment_min: |
||||
segment_min_line = segment_min * (1 + MEASUREMENT_ERROR) |
||||
if segment_max_line >= segment_max: |
||||
segment_max_line = segment_max * (1 - MEASUREMENT_ERROR) |
||||
min_line = [] |
||||
max_line = [] |
||||
for i in range(len(segment_data)): |
||||
min_line.append(segment_min_line) |
||||
max_line.append(segment_max_line) |
||||
min_line = np.array(min_line) |
||||
max_line = np.array(max_line) |
||||
segment_array = np.array(segment_data.tolist()) |
||||
idmin = np.argwhere(np.diff(np.sign(min_line - segment_array)) != 0).reshape(-1) |
||||
idmax = np.argwhere(np.diff(np.sign(max_line - segment_array)) != 0).reshape(-1) |
||||
if len(idmin) > 0 and len(idmax) > 0: |
||||
if pat_type == 'jump': |
||||
result_length = idmax[0] - idmin[-1] + 1 |
||||
elif pat_type == 'drop': |
||||
result_length = idmin[0] - idmax[-1] + 1 |
||||
return result_length if result_length > 0 else 0 |
||||
else: |
||||
return 0 |
||||
|
||||
def pattern_intersection(segment_data: list, median: float, pattern_type: str) -> list: |
||||
center_index = [] |
||||
if pattern_type == 'jump': |
||||
for i in range(1, len(segment_data) - 1): |
||||
if segment_data[i - 1] < median and segment_data[i + 1] > median: |
||||
center_index.append(i) |
||||
elif pattern_type == 'drop': |
||||
for i in range(1, len(segment_data) - 1): |
||||
if segment_data[i - 1] > median and segment_data[i + 1] < median: |
||||
center_index.append(i) |
||||
delete_index = [] |
||||
for i in range(1, len(center_index)): |
||||
if center_index[i] == center_index[i - 1] + 1: |
||||
delete_index.append(i - 1) |
||||
|
||||
return [x for (idx, x) in enumerate(center_index) if idx not in delete_index] |
||||
|
||||
def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: |
||||
data_min = data['value'].min() |
||||
if not np.isnan(data_min) and data_min > 0: |
||||
data['value'] = data['value'] - data_min |
||||
return data |
||||
|
||||
def get_min_max(array: list, default): |
||||
return float(min(array, default=default)), float(max(array, default=default)) |
||||
|
||||
def remove_duplicates_and_sort(array: list) -> list: |
||||
array = list(frozenset(array)) |
||||
array.sort() |
||||
return array |
@ -0,0 +1,130 @@
|
||||
import asyncio |
||||
import threading |
||||
import zmq |
||||
import zmq.asyncio |
||||
from abc import ABC, abstractmethod |
||||
|
||||
|
||||
# This const defines Thread <-> Actor zmq one-to-one connection |
||||
# We create a seperate zmq context, so zqm address 'inproc://xxx' doesn't matter |
||||
# It is default address and you may want to use AsyncZmqThread another way |
||||
ZMQ_THREAD_ACTOR_ADDR = 'inproc://xxx' |
||||
|
||||
|
||||
# Inherience order (threading.Thread, ABC) is essential. Otherwise it's a MRO error. |
||||
class AsyncZmqThread(threading.Thread, ABC): |
||||
"""Class for wrapping zmq socket into a thread with it's own asyncio event loop |
||||
|
||||
""" |
||||
|
||||
def __init__(self, |
||||
zmq_context: zmq.asyncio.Context, |
||||
zmq_socket_addr: str, |
||||
zmq_socket_type = zmq.PAIR |
||||
): |
||||
super(AsyncZmqThread, self).__init__() |
||||
self._zmq_context = zmq_context # you can use it in child classes |
||||
self.__zmq_socket_addr = zmq_socket_addr |
||||
self.__zmq_socket_type = zmq_socket_type |
||||
self.__asyncio_loop = None |
||||
self.__zmq_socket = None |
||||
|
||||
async def __message_recv_loop(self): |
||||
while True: |
||||
text = await self.__zmq_socket.recv_string() |
||||
asyncio.ensure_future(self._on_message_to_thread(text)) |
||||
|
||||
async def _send_message_from_thread(self, message: str): |
||||
await self.__zmq_socket.send_string(message) |
||||
|
||||
@abstractmethod |
||||
async def _on_message_to_thread(self, message: str): |
||||
"""Override this method to receive messages""" |
||||
|
||||
@abstractmethod |
||||
async def _run_thread(self): |
||||
"""Override this method to do some async work. |
||||
This method uses a separate thread. |
||||
|
||||
You can block yourself here if you don't do any await. |
||||
|
||||
Example: |
||||
|
||||
``` |
||||
async def _run_thread(self): |
||||
i = 0 |
||||
while True: |
||||
await asyncio.sleep(1) |
||||
i += 1 |
||||
await self._send_message_from_thread(f'{self.name}: ping {i}') |
||||
``` |
||||
""" |
||||
|
||||
def run(self): |
||||
self.__asyncio_loop = asyncio.new_event_loop() |
||||
asyncio.set_event_loop(self.__asyncio_loop) |
||||
self.__zmq_socket = self._zmq_context.socket(self.__zmq_socket_type) |
||||
self.__zmq_socket.connect(self.__zmq_socket_addr) |
||||
asyncio.ensure_future(self.__message_recv_loop()) |
||||
self.__asyncio_loop.run_until_complete(self._run_thread()) |
||||
|
||||
# TODO: implement stop signal handling |
||||
|
||||
|
||||
class AsyncZmqActor(AsyncZmqThread): |
||||
"""Threaded and Async Actor model based on ZMQ inproc communication |
||||
|
||||
override following: |
||||
``` |
||||
async def _run_thread(self) |
||||
async def _on_message_to_thread(self, message: str) |
||||
``` |
||||
|
||||
both methods run in actor's thread |
||||
|
||||
you can call `self._send_message_from_thread('txt')` |
||||
|
||||
to receive it later in `self._recv_message_from_thread()`. |
||||
|
||||
Example: |
||||
|
||||
``` |
||||
class MyActor(AsyncZmqActor): |
||||
async def _run_thread(self): |
||||
self.counter = 0 |
||||
# runs in a different thread |
||||
await self._send_message_from_thread('some_txt_message_to_actor') |
||||
|
||||
def async _on_message_to_thread(self, message): |
||||
# runs in Thread-actor |
||||
self.counter++ |
||||
|
||||
asyncZmqActor = MyActor() |
||||
asyncZmqActor.start() |
||||
``` |
||||
""" |
||||
|
||||
def __init__(self): |
||||
super(AsyncZmqActor, self).__init__(zmq.asyncio.Context(), ZMQ_THREAD_ACTOR_ADDR) |
||||
|
||||
self.__actor_socket = self._zmq_context.socket(zmq.PAIR) |
||||
self.__actor_socket.bind(ZMQ_THREAD_ACTOR_ADDR) |
||||
|
||||
async def _put_message_to_thread(self, message: str): |
||||
"""It "sends" `message` to thread, |
||||
|
||||
but we can't await it's `AsyncZmqThread._on_message_to_thread()` |
||||
|
||||
so it's "put", not "send" |
||||
""" |
||||
await self.__actor_socket.send_string(message) |
||||
|
||||
async def _recv_message_from_thread(self) -> str: |
||||
"""Returns next message ``'txt'`` from thread sent by |
||||
|
||||
``AsyncZmqActor._send_message_from_thread('txt')`` |
||||
|
||||
""" |
||||
return await self.__actor_socket.recv_string() |
||||
|
||||
# TODO: implement graceful stopping |
@ -0,0 +1,63 @@
|
||||
from itertools import chain |
||||
import pandas as pd |
||||
import numpy as np |
||||
from typing import Generator |
||||
|
||||
def prepare_data(data: list) -> pd.DataFrame: |
||||
""" |
||||
Takes list |
||||
- converts it into pd.DataFrame, |
||||
- converts 'timestamp' column to pd.Datetime, |
||||
- subtracts min value from the dataset |
||||
""" |
||||
data = pd.DataFrame(data, columns=['timestamp', 'value']) |
||||
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms') |
||||
data.fillna(value = np.nan, inplace = True) |
||||
return data |
||||
|
||||
def get_intersected_chunks(data: list, intersection: int, chunk_size: int) -> Generator[list, None, None]: |
||||
""" |
||||
Returns generator that splits dataframe on intersected segments. |
||||
Intersection makes it able to detect pattern that present in dataframe on the border between chunks. |
||||
intersection - length of intersection. |
||||
chunk_size - length of chunk |
||||
""" |
||||
assert chunk_size > 0, 'chunk size must be great than zero' |
||||
assert intersection > 0, 'intersection length must be great than zero' |
||||
|
||||
data_len = len(data) |
||||
|
||||
if data_len <= chunk_size: |
||||
yield data |
||||
return |
||||
|
||||
nonintersected = chunk_size - intersection |
||||
|
||||
offset = 0 |
||||
while True: |
||||
left_values = data_len - offset |
||||
if left_values == 0: |
||||
break |
||||
if left_values <= chunk_size: |
||||
yield data[offset : data_len] |
||||
break |
||||
else: |
||||
yield data[offset: offset + chunk_size] |
||||
offset += min(nonintersected, left_values) |
||||
|
||||
def get_chunks(data: list, chunk_size: int) -> Generator[list, None, None]: |
||||
""" |
||||
Returns generator that splits dataframe on non-intersected segments. |
||||
chunk_size - length of chunk |
||||
""" |
||||
assert chunk_size > 0, 'chunk size must be great than zero' |
||||
|
||||
chunks_iterables = [iter(data)] * chunk_size |
||||
result_chunks = zip(*chunks_iterables) |
||||
partial_chunk_len = len(data) % chunk_size |
||||
|
||||
if partial_chunk_len != 0: |
||||
result_chunks = chain(result_chunks, [data[-partial_chunk_len:]]) |
||||
|
||||
for chunk in result_chunks: |
||||
yield list(chunk) |
@ -0,0 +1,81 @@
|
||||
from inspect import signature, Parameter |
||||
from functools import wraps |
||||
from typing import Optional, List |
||||
import re |
||||
|
||||
|
||||
CAMEL_REGEX = re.compile(r'([A-Z])') |
||||
UNDERSCORE_REGEX = re.compile(r'_([a-z])') |
||||
|
||||
def camel_to_underscore(name): |
||||
#TODO: need to rename 'from'/'to' to 'from_timestamp'/'to_timestamp' everywhere(in analytics, server, panel) |
||||
if name == 'from' or name == 'to': |
||||
name += '_timestamp' |
||||
return CAMEL_REGEX.sub(lambda x: '_' + x.group(1).lower(), name) |
||||
|
||||
def underscore_to_camel(name): |
||||
if name == 'from_timestamp' or name == 'to_timestamp': |
||||
name = name.replace('_timestamp', '') |
||||
return UNDERSCORE_REGEX.sub(lambda x: x.group(1).upper(), name) |
||||
|
||||
def is_field_private(field_name: str) -> Optional[str]: |
||||
m = re.match(r'_[^(__)]+__', field_name) |
||||
return m is not None |
||||
|
||||
def serialize(obj): |
||||
if hasattr(obj, 'to_json') == True: |
||||
return obj.to_json() |
||||
else: |
||||
return obj |
||||
|
||||
def inited_params(target_init): |
||||
target_params = signature(target_init).parameters.values() |
||||
if len(target_params) < 1: |
||||
raise ValueError('init function mush have at least self parameter') |
||||
if len(target_params) == 1: |
||||
return target_init |
||||
_, *target_params = target_params # we will not use self any more |
||||
|
||||
@wraps(target_init) |
||||
def wrapped_init(wrapped_self, *wrapped_args, **wrapped_kwargs): |
||||
for tp in target_params: |
||||
if tp.default is Parameter.empty: |
||||
continue |
||||
setattr(wrapped_self, tp.name, tp.default) |
||||
|
||||
for tp, v in zip(target_params, wrapped_args): |
||||
setattr(wrapped_self, tp.name, v) |
||||
|
||||
for k, v in wrapped_kwargs.items(): |
||||
setattr(wrapped_self, k, v) |
||||
|
||||
target_init(wrapped_self, *wrapped_args, **wrapped_kwargs) |
||||
|
||||
return wrapped_init |
||||
|
||||
def JSONClass(target_class): |
||||
|
||||
def to_json(self) -> dict: |
||||
""" |
||||
returns a json representation of the class |
||||
where all None - values and private fileds are skipped |
||||
""" |
||||
return { |
||||
underscore_to_camel(k): serialize(v) for k, v in self.__dict__.items() |
||||
if v is not None and not is_field_private(k) |
||||
} |
||||
|
||||
def from_json(json_object: Optional[dict]) -> target_class: |
||||
if json_object is None: |
||||
json_object = {} |
||||
init_object = { camel_to_underscore(k): v for k, v in json_object.items() } |
||||
return target_class(**init_object) |
||||
|
||||
# target_class.__init__ = inited_params(target_class.__init__) |
||||
target_class.to_json = to_json |
||||
target_class.from_json = from_json |
||||
return target_class |
||||
|
||||
class SerializableList(List[dict]): |
||||
def to_json(self): |
||||
return list(map(lambda s: s.to_json(), self)) |
@ -0,0 +1,13 @@
|
||||
import pandas as pd |
||||
from typing import List |
||||
|
||||
def convert_sec_to_ms(sec) -> int: |
||||
return int(sec) * 1000 |
||||
|
||||
def convert_pd_timestamp_to_ms(timestamp: pd.Timestamp) -> int: |
||||
# TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6 |
||||
return int(timestamp.value) // 1000000 |
||||
|
||||
def convert_series_to_timestamp_list(series: pd.Series) -> List[int]: |
||||
timestamps = map(lambda value: convert_pd_timestamp_to_ms(value), series) |
||||
return list(timestamps) |
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python3 |
||||
|
||||
import sys |
||||
import os |
||||
|
||||
if sys.version_info[:3] < (3, 6, 5) or sys.version_info[:2] >= (3, 7): |
||||
sys.stderr.write('Required python is >= 3.6.5 and < 3.7.0 \n') |
||||
sys.stderr.write('Your python version is: %d.%d.%d\n' % sys.version_info[:3]) |
||||
sys.exit(1) |
||||
|
||||
# #TODO: make wrapper script that set PYTHONPATH instead |
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) |
||||
|
||||
import logging |
||||
|
||||
root_logger = logging.getLogger() |
||||
root_logger.setLevel(logging.DEBUG) |
||||
|
||||
|
||||
logging_formatter = logging.Formatter("%(asctime)s [Analytics] [%(levelname)-5.5s] %(message)s") |
||||
|
||||
logging_handler = logging.StreamHandler(sys.stdout) |
||||
logging_handler.setLevel(logging.DEBUG) |
||||
logging_handler.setFormatter(logging_formatter) |
||||
|
||||
root_logger.addHandler(logging_handler) |
||||
|
||||
|
||||
from server import run_server |
||||
|
||||
if __name__ == "__main__": |
||||
run_server() |
@ -0,0 +1 @@
|
||||
hiddenimports=['pandas._libs.tslibs.timedeltas'] |
@ -0,0 +1 @@
|
||||
hiddenimports=['scipy._lib.messagestream'] |
@ -0,0 +1,7 @@
|
||||
attrdict==2.0.0 |
||||
aiounittest==1.1.0 |
||||
numpy==1.14.5 |
||||
pandas==0.20.3 |
||||
pyzmq==18.0.1 |
||||
scipy==1.1.0 |
||||
websockets==8.1 |
@ -0,0 +1,3 @@
|
||||
#!/bin/bash |
||||
cd .. |
||||
python3.6 -m PyInstaller --paths=analytics/ --additional-hooks-dir=pyinstaller_hooks bin/server |
@ -0,0 +1,4 @@
|
||||
import sys |
||||
import os |
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) |
@ -0,0 +1,16 @@
|
||||
from analytic_types import TimeSeriesIndex, TimeSeries2 |
||||
|
||||
import unittest |
||||
|
||||
|
||||
class TestDataset(unittest.TestCase): |
||||
def test_basic_timeseries_index(self): |
||||
tsi = TimeSeriesIndex(['2017-12-31 16:00:00-08:00']) |
||||
self.assertEqual(len(tsi), 1) |
||||
tsi2 = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) |
||||
self.assertEqual(len(tsi2), 3) |
||||
|
||||
def test_basic_timeseries(self): |
||||
tsis = TimeSeriesIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00', '2017-12-31 18:00:00-08:00']) |
||||
ts = TimeSeries2([4, 5, 6], tsis) |
||||
self.assertEqual(len(ts), 3) |
@ -0,0 +1,38 @@
|
||||
import unittest |
||||
import pandas as pd |
||||
import random |
||||
from typing import List |
||||
|
||||
from analytic_types.data_bucket import DataBucket |
||||
from tests.test_dataset import create_list_of_timestamps |
||||
|
||||
class TestBucket(unittest.TestCase): |
||||
|
||||
def test_receive_data(self): |
||||
bucket = DataBucket() |
||||
data_val = list(range(6)) |
||||
timestamp_list = create_list_of_timestamps(len(data_val)) |
||||
for val in data_val: |
||||
bucket.receive_data(get_pd_dataframe([val], [1523889000000 + val])) |
||||
for idx, row in bucket.data.iterrows(): |
||||
self.assertEqual(data_val[idx], row['value']) |
||||
self.assertEqual(timestamp_list[idx], row['timestamp']) |
||||
|
||||
def test_drop_data(self): |
||||
bucket = DataBucket() |
||||
data_val = list(range(10)) |
||||
timestamp_list = create_list_of_timestamps(len(data_val)) |
||||
bucket.receive_data(get_pd_dataframe(data_val, timestamp_list)) |
||||
bucket.drop_data(5) |
||||
expected_data = data_val[5:] |
||||
expected_timestamp = timestamp_list[5:] |
||||
self.assertEqual(expected_data, bucket.data['value'].tolist()) |
||||
self.assertEqual(expected_timestamp, bucket.data['timestamp'].tolist()) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
|
||||
def get_pd_dataframe(value: List[int], timestamp: List[int]) -> pd.DataFrame: |
||||
if len(value) != len(timestamp): |
||||
raise ValueError(f'len(value) should be equal to len(timestamp)') |
||||
return pd.DataFrame({ 'value': value, 'timestamp': timestamp }) |
@ -0,0 +1,386 @@
|
||||
import unittest |
||||
import pandas as pd |
||||
import numpy as np |
||||
from utils import prepare_data |
||||
import models |
||||
import random |
||||
import scipy.signal |
||||
from typing import List |
||||
|
||||
from analytic_types.segment import Segment |
||||
|
||||
class TestDataset(unittest.TestCase): |
||||
|
||||
def test_models_with_corrupted_dataframe(self): |
||||
data = [[1523889000000 + i, float('nan')] for i in range(10)] |
||||
dataframe = pd.DataFrame(data, columns=['timestamp', 'value']) |
||||
segments = [] |
||||
|
||||
model_instances = [ |
||||
models.JumpModel(), |
||||
models.DropModel(), |
||||
models.GeneralModel(), |
||||
models.PeakModel(), |
||||
models.TroughModel() |
||||
] |
||||
|
||||
for model in model_instances: |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
with self.assertRaises(AssertionError): |
||||
model.fit(dataframe, segments, 'test') |
||||
|
||||
def test_peak_antisegments(self): |
||||
data_val = [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.PeakModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_jump_antisegments(self): |
||||
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 9.0, 1.0, 1.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000016, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': True}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.JumpModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_trough_antisegments(self): |
||||
data_val = [9.0, 9.0, 9.0, 9.0, 7.0, 4.0, 7.0, 9.0, 9.0, 9.0, 5.0, 1.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.TroughModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_drop_antisegments(self): |
||||
data_val = [9.0, 9.0, 9.0, 9.0, 9.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 1.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000016, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': True}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.DropModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_general_antisegments(self): |
||||
data_val = [1.0, 2.0, 1.0, 2.0, 5.0, 6.0, 3.0, 2.0, 1.0, 1.0, 8.0, 9.0, 8.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.GeneralModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_jump_empty_segment(self): |
||||
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.JumpModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_drop_empty_segment(self): |
||||
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.DropModel() |
||||
model.state = model.get_state(None) |
||||
model_name = model.__class__.__name__ |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_value_error_dataset_input_should_have_multiple_elements(self): |
||||
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,3.0,3.0,2.0,7.0,8.0,9.0,8.0,7.0,6.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000007, 'to': 1523889000011, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.JumpModel() |
||||
model.state = model.get_state(None) |
||||
model_name = model.__class__.__name__ |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_prepare_data_for_nonetype(self): |
||||
data = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] |
||||
try: |
||||
data = prepare_data(data) |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_prepare_data_for_nan(self): |
||||
data = [[1523889000000, np.nan], [1523889000001, np.nan], [1523889000002, np.nan]] |
||||
try: |
||||
data = prepare_data(data) |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_prepare_data_output_fon_nan(self): |
||||
data_nan = [[1523889000000, np.nan], [1523889000001, np.nan], [1523889000002, np.nan]] |
||||
data_none = [[1523889000000, None], [1523889000001, None], [1523889000002, None]] |
||||
return_data_nan = prepare_data(data_nan) |
||||
return_data_none = prepare_data(data_none) |
||||
for item in return_data_nan.value: |
||||
self.assertTrue(np.isnan(item)) |
||||
for item in return_data_none.value: |
||||
self.assertTrue(np.isnan(item)) |
||||
|
||||
def test_three_value_segment(self): |
||||
data_val = [1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 4.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
model_instances = [ |
||||
models.GeneralModel(), |
||||
models.PeakModel(), |
||||
] |
||||
try: |
||||
for model in model_instances: |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_general_for_two_labeling(self): |
||||
data_val = [1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 2.0, 1.0, 0, 0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
model = models.GeneralModel() |
||||
model.state = model.get_state(None) |
||||
model.fit(dataframe, segments,'test') |
||||
result = len(data_val) + 1 |
||||
for _ in range(2): |
||||
model.do_detect(dataframe) |
||||
max_pattern_index = max(model.do_detect(dataframe)) |
||||
self.assertLessEqual(max_pattern_index[0], result) |
||||
|
||||
|
||||
def test_peak_model_for_cache(self): |
||||
cache = { |
||||
'patternCenter': [1, 6], |
||||
'patternModel': [1, 4, 0], |
||||
'confidence': 2, |
||||
'convolveMax': 8, |
||||
'convolveMin': 7, |
||||
'windowSize': 1, |
||||
'convDelMin': 0, |
||||
'convDelMax': 0, |
||||
'heightMax': 4, |
||||
'heightMin': 4, |
||||
} |
||||
data_val = [2.0, 5.0, 1.0, 1.0, 1.0, 2.0, 5.0, 1.0, 1.0, 2.0, 3.0, 7.0, 1.0, 1.0, 1.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
model = models.PeakModel() |
||||
model.state = model.get_state(cache) |
||||
result = model.fit(dataframe, segments, 'test') |
||||
self.assertEqual(len(result.pattern_center), 3) |
||||
|
||||
def test_trough_model_for_cache(self): |
||||
cache = { |
||||
'patternCenter': [2, 6], |
||||
'patternModel': [5, 0.5, 4], |
||||
'confidence': 2, |
||||
'convolveMax': 8, |
||||
'convolveMin': 7, |
||||
'window_size': 1, |
||||
'convDelMin': 0, |
||||
'convDelMax': 0, |
||||
} |
||||
data_val = [5.0, 5.0, 1.0, 4.0, 5.0, 5.0, 0.0, 4.0, 5.0, 5.0, 6.0, 1.0, 5.0, 5.0, 5.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
model = models.TroughModel() |
||||
model.state = model.get_state(cache) |
||||
result = model.fit(dataframe, segments, 'test') |
||||
self.assertEqual(len(result.pattern_center), 3) |
||||
|
||||
def test_jump_model_for_cache(self): |
||||
cache = { |
||||
'patternCenter': [2, 6], |
||||
'patternModel': [5, 0.5, 4], |
||||
'confidence': 2, |
||||
'convolveMax': 8, |
||||
'convolveMin': 7, |
||||
'window_size': 1, |
||||
'convDelMin': 0, |
||||
'convDelMax': 0, |
||||
} |
||||
data_val = [1.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 4.0, 4.0, 4.0, 4.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 152388900009, 'to': 1523889000013, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
model = models.JumpModel() |
||||
model.state = model.get_state(cache) |
||||
result = model.fit(dataframe, segments, 'test') |
||||
self.assertEqual(len(result.pattern_center), 3) |
||||
|
||||
def test_models_for_pattern_model_cache(self): |
||||
cache = { |
||||
'patternCenter': [4, 12], |
||||
'patternModel': [], |
||||
'confidence': 2, |
||||
'convolveMax': 8, |
||||
'convolveMin': 7, |
||||
'window_size': 2, |
||||
'convDelMin': 0, |
||||
'convDelMax': 0, |
||||
} |
||||
data_val = [5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 6.0, 6.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000024, 'labeled': True, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
|
||||
try: |
||||
model = models.DropModel() |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(cache) |
||||
model.fit(dataframe, segments, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly'.format(model_name)) |
||||
|
||||
def test_problem_data_for_random_model(self): |
||||
problem_data = [2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, |
||||
3.0, 3.0, 3.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, |
||||
3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 6.0, 7.0, 8.0, 8.0, 4.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, |
||||
4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, |
||||
4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 5.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, |
||||
2.0, 8.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] |
||||
data = create_dataframe(problem_data) |
||||
cache = { |
||||
'patternCenter': [5, 50], |
||||
'patternModel': [], |
||||
'windowSize': 2, |
||||
'convolveMin': 0, |
||||
'convolveMax': 0, |
||||
'convDelMin': 0, |
||||
'convDelMax': 0, |
||||
} |
||||
max_ws = 20 |
||||
iteration = 1 |
||||
for ws in range(1, max_ws): |
||||
for _ in range(iteration): |
||||
pattern_model = create_random_model(ws) |
||||
convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) |
||||
cache['windowSize'] = ws |
||||
cache['patternModel'] = pattern_model |
||||
cache['convolveMin'] = max(convolve) |
||||
cache['convolveMax'] = max(convolve) |
||||
try: |
||||
model = models.GeneralModel() |
||||
model.state = model.get_state(cache) |
||||
model_name = model.__class__.__name__ |
||||
model.detect(data, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly with av_model {} and window size {}'.format(model_name, pattern_model, ws)) |
||||
|
||||
def test_random_dataset_for_random_model(self): |
||||
data = create_random_model(random.randint(1, 100)) |
||||
data = create_dataframe(data) |
||||
model_instances = [ |
||||
models.PeakModel(), |
||||
models.TroughModel() |
||||
] |
||||
cache = { |
||||
'patternCenter': [5, 50], |
||||
'patternModel': [], |
||||
'windowSize': 2, |
||||
'convolveMin': 0, |
||||
'convolveMax': 0, |
||||
'confidence': 0, |
||||
'heightMax': 0, |
||||
'heightMin': 0, |
||||
'convDelMin': 0, |
||||
'convDelMax': 0, |
||||
} |
||||
ws = random.randint(1, int(len(data['value']/2))) |
||||
pattern_model = create_random_model(ws) |
||||
convolve = scipy.signal.fftconvolve(pattern_model, pattern_model) |
||||
confidence = 0.2 * (data['value'].max() - data['value'].min()) |
||||
cache['windowSize'] = ws |
||||
cache['patternModel'] = pattern_model |
||||
cache['convolveMin'] = max(convolve) |
||||
cache['convolveMax'] = max(convolve) |
||||
cache['confidence'] = confidence |
||||
cache['heightMax'] = data['value'].max() |
||||
cache['heightMin'] = confidence |
||||
try: |
||||
for model in model_instances: |
||||
model_name = model.__class__.__name__ |
||||
model.state = model.get_state(cache) |
||||
model.detect(data, 'test') |
||||
except ValueError: |
||||
self.fail('Model {} raised unexpectedly with dataset {} and cache {}'.format(model_name, data['value'], cache)) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
|
||||
def create_dataframe(data_val: list) -> pd.DataFrame: |
||||
data_ind = create_list_of_timestamps(len(data_val)) |
||||
data = {'timestamp': data_ind, 'value': data_val} |
||||
dataframe = pd.DataFrame(data) |
||||
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||
return dataframe |
||||
|
||||
def create_list_of_timestamps(length: int) -> List[int]: |
||||
return [1523889000000 + i for i in range(length)] |
||||
|
||||
def create_random_model(window_size: int) -> list: |
||||
return [random.randint(0, 100) for _ in range(window_size * 2 + 1)] |
@ -0,0 +1,265 @@
|
||||
import unittest |
||||
import pandas as pd |
||||
|
||||
from detectors import pattern_detector, threshold_detector, anomaly_detector |
||||
from analytic_types.detector import DetectionResult, ProcessingResult, Bound |
||||
from analytic_types.segment import Segment |
||||
from tests.test_dataset import create_dataframe, create_list_of_timestamps |
||||
from utils import convert_pd_timestamp_to_ms |
||||
|
||||
class TestPatternDetector(unittest.TestCase): |
||||
|
||||
def test_small_dataframe(self): |
||||
|
||||
data = [[0,1], [1,2]] |
||||
dataframe = pd.DataFrame(data, columns=['timestamp', 'values']) |
||||
cache = { 'windowSize': 10 } |
||||
|
||||
detector = pattern_detector.PatternDetector('GENERAL', 'test_id') |
||||
with self.assertRaises(ValueError): |
||||
detector.detect(dataframe, cache) |
||||
|
||||
def test_only_negative_segments(self): |
||||
data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] |
||||
data_ind = [1523889000000 + i for i in range(len(data_val))] |
||||
data = {'timestamp': data_ind, 'value': data_val} |
||||
dataframe = pd.DataFrame(data = data) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': False, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
cache = {} |
||||
detector = pattern_detector.PatternDetector('PEAK', 'test_id') |
||||
excepted_error_message = 'test_id has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' |
||||
|
||||
try: |
||||
detector.train(dataframe, segments, cache) |
||||
except ValueError as e: |
||||
self.assertEqual(str(e), excepted_error_message) |
||||
|
||||
def test_positive_and_negative_segments(self): |
||||
data_val = [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
||||
dataframe = create_dataframe(data_val) |
||||
segments = [{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False}, |
||||
{'_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': False, 'deleted': False}] |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
cache = {} |
||||
detector = pattern_detector.PatternDetector('PEAK', 'test_id') |
||||
try: |
||||
detector.train(dataframe, segments, cache) |
||||
except Exception as e: |
||||
self.fail('detector.train fail with error {}'.format(e)) |
||||
|
||||
class TestThresholdDetector(unittest.TestCase): |
||||
|
||||
def test_invalid_cache(self): |
||||
|
||||
detector = threshold_detector.ThresholdDetector('test_id') |
||||
|
||||
with self.assertRaises(ValueError): |
||||
detector.detect([], None) |
||||
|
||||
with self.assertRaises(ValueError): |
||||
detector.detect([], {}) |
||||
|
||||
|
||||
class TestAnomalyDetector(unittest.TestCase): |
||||
|
||||
def test_detect(self): |
||||
data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] |
||||
data_ind = [1523889000000 + i for i in range(len(data_val))] |
||||
data = {'timestamp': data_ind, 'value': data_val} |
||||
dataframe = pd.DataFrame(data = data) |
||||
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||
cache = { |
||||
'confidence': 2, |
||||
'alpha': 0.1, |
||||
'enableBounds': 'ALL', |
||||
'timeStep': 1 |
||||
} |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
|
||||
detect_result: DetectionResult = detector.detect(dataframe, cache) |
||||
detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) |
||||
result = [{ 'from': 1523889000005.0, 'to': 1523889000005.0 }] |
||||
self.assertEqual(result, detected_segments) |
||||
|
||||
cache = { |
||||
'confidence': 2, |
||||
'alpha': 0.1, |
||||
'enableBounds': 'ALL', |
||||
'timeStep': 1, |
||||
'seasonality': 4, |
||||
'segments': [{ 'from': 1523889000001, 'to': 1523889000002, 'data': [10] }] |
||||
} |
||||
detect_result: DetectionResult = detector.detect(dataframe, cache) |
||||
detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) |
||||
result = [] |
||||
self.assertEqual(result, detected_segments) |
||||
|
||||
def test_process_data(self): |
||||
data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] |
||||
data_ind = [1523889000000 + i for i in range(len(data_val))] |
||||
data = {'timestamp': data_ind, 'value': data_val} |
||||
dataframe = pd.DataFrame(data = data) |
||||
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||
cache = { |
||||
'confidence': 2, |
||||
'alpha': 0.1, |
||||
'enableBounds': 'ALL', |
||||
'timeStep': 1 |
||||
} |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
detect_result: ProcessingResult = detector.process_data(dataframe, cache) |
||||
expected_result = { |
||||
'lowerBound': [ |
||||
(1523889000000, -2.0), |
||||
(1523889000001, -1.9), |
||||
(1523889000002, -1.71), |
||||
(1523889000003, -1.6389999999999998), |
||||
(1523889000004, -1.4750999999999999), |
||||
(1523889000005, -0.5275899999999998), |
||||
(1523889000006, -0.5748309999999996), |
||||
(1523889000007, -0.5173478999999996), |
||||
(1523889000008, -0.5656131099999995) |
||||
], |
||||
'upperBound': [ |
||||
(1523889000000, 2.0), |
||||
(1523889000001, 2.1), |
||||
(1523889000002, 2.29), |
||||
(1523889000003, 2.361), |
||||
(1523889000004, 2.5249), |
||||
(1523889000005, 3.47241), |
||||
(1523889000006, 3.4251690000000004), |
||||
(1523889000007, 3.4826521), |
||||
(1523889000008, 3.4343868900000007) |
||||
]} |
||||
self.assertEqual(detect_result.to_json(), expected_result) |
||||
|
||||
cache = { |
||||
'confidence': 2, |
||||
'alpha': 0.1, |
||||
'enableBounds': 'ALL', |
||||
'timeStep': 1, |
||||
'seasonality': 5, |
||||
'segments': [{ 'from': 1523889000001, 'to': 1523889000002,'data': [1] }] |
||||
} |
||||
detect_result: ProcessingResult = detector.process_data(dataframe, cache) |
||||
expected_result = { |
||||
'lowerBound': [ |
||||
(1523889000000, -2.0), |
||||
(1523889000001, -2.9), |
||||
(1523889000002, -1.71), |
||||
(1523889000003, -1.6389999999999998), |
||||
(1523889000004, -1.4750999999999999), |
||||
(1523889000005, -0.5275899999999998), |
||||
(1523889000006, -1.5748309999999996), |
||||
(1523889000007, -0.5173478999999996), |
||||
(1523889000008, -0.5656131099999995) |
||||
], |
||||
'upperBound': [ |
||||
(1523889000000, 2.0), |
||||
(1523889000001, 3.1), |
||||
(1523889000002, 2.29), |
||||
(1523889000003, 2.361), |
||||
(1523889000004, 2.5249), |
||||
(1523889000005, 3.47241), |
||||
(1523889000006, 4.425169), |
||||
(1523889000007, 3.4826521), |
||||
(1523889000008, 3.4343868900000007) |
||||
]} |
||||
self.assertEqual(detect_result.to_json(), expected_result) |
||||
|
||||
def test_get_seasonality_offset(self): |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
from_timestamp = 1573700973027 |
||||
seasonality = 3600000 |
||||
data_start_time = 1573698780000 |
||||
time_step = 30000 |
||||
detected_offset = detector.get_seasonality_offset(from_timestamp, seasonality, data_start_time, time_step) |
||||
expected_offset = 74 |
||||
self.assertEqual(detected_offset, expected_offset) |
||||
|
||||
def test_segment_generator(self): |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
data = [1, 1, 5, 1, -4, 5, 5, 5, -3, 1] |
||||
timestamps = create_list_of_timestamps(len(data)) |
||||
dataframe = create_dataframe(data) |
||||
upper_bound = pd.Series([2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) |
||||
lower_bound = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) |
||||
segments = list(detector.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds=Bound.ALL)) |
||||
|
||||
segments_borders = list(map(lambda s: [s.from_timestamp, s.to_timestamp], segments)) |
||||
self.assertEqual(segments_borders, [[timestamps[2], timestamps[2]], [timestamps[4], timestamps[8]]]) |
||||
|
||||
def test_consume_data(self): |
||||
cache = { |
||||
'confidence': 2, |
||||
'alpha': 0.1, |
||||
'enableBounds': 'ALL', |
||||
'timeStep': 1 |
||||
} |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
|
||||
detect_result: DetectionResult = None |
||||
for val in range(22): |
||||
value = 1 if val != 10 else 5 |
||||
dataframe = pd.DataFrame({'value': [value], 'timestamp': [1523889000000 + val]}) |
||||
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'], unit='ms') |
||||
detect_result = detector.consume_data(dataframe, cache) |
||||
|
||||
detected_segments = list(map(lambda s: {'from': s.from_timestamp, 'to': s.to_timestamp}, detect_result.segments)) |
||||
result = [{ 'from': 1523889000010, 'to': 1523889000010 }] |
||||
self.assertEqual(result, detected_segments) |
||||
|
||||
def test_get_segment_bound(self): |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
peak_segment = pd.Series([1,2,3,4,3,2,1]) |
||||
trough_segment = pd.Series([4,3,2,1,2,3,4]) |
||||
expected_peak_segment_results = { |
||||
'max_value': 3, |
||||
'min_value': 1.5 |
||||
} |
||||
expected_trough_segment_results = { |
||||
'max_value': 3.5, |
||||
'min_value': 2.75 |
||||
} |
||||
peak_detector_result_upper = detector.get_segment_bound(peak_segment, Bound.UPPER) |
||||
peak_detector_result_lower = detector.get_segment_bound(peak_segment, Bound.LOWER) |
||||
trough_detector_result_upper = detector.get_segment_bound(trough_segment, Bound.UPPER) |
||||
trough_detector_result_lower = detector.get_segment_bound(trough_segment, Bound.LOWER) |
||||
|
||||
self.assertGreaterEqual( |
||||
max(peak_detector_result_upper), |
||||
expected_peak_segment_results['max_value'] |
||||
) |
||||
self.assertLessEqual( |
||||
max(peak_detector_result_lower), |
||||
expected_peak_segment_results['min_value'] |
||||
) |
||||
self.assertGreaterEqual( |
||||
max(trough_detector_result_upper), |
||||
expected_trough_segment_results['max_value'] |
||||
) |
||||
self.assertLessEqual( |
||||
max(trough_detector_result_lower), |
||||
expected_trough_segment_results['min_value'] |
||||
) |
||||
|
||||
def test_get_segment_bound_corner_cases(self): |
||||
detector = anomaly_detector.AnomalyDetector('test_id') |
||||
empty_segment = pd.Series([]) |
||||
same_values_segment = pd.Series([2,2,2,2,2,2]) |
||||
empty_detector_result_upper = detector.get_segment_bound(empty_segment, Bound.UPPER) |
||||
empty_detector_result_lower = detector.get_segment_bound(empty_segment, Bound.LOWER) |
||||
same_values_detector_result_upper = detector.get_segment_bound(same_values_segment, Bound.UPPER) |
||||
same_values_detector_result_lower = detector.get_segment_bound(same_values_segment, Bound.LOWER) |
||||
|
||||
self.assertEqual(len(empty_detector_result_upper), 0) |
||||
self.assertEqual(len(empty_detector_result_lower), 0) |
||||
self.assertEqual(min(same_values_detector_result_upper), 0) |
||||
self.assertEqual(max(same_values_detector_result_upper), 0) |
||||
self.assertEqual(min(same_values_detector_result_lower), 0) |
||||
self.assertEqual(max(same_values_detector_result_lower), 0) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,100 @@
|
||||
from models import PeakModel, DropModel, TroughModel, JumpModel, GeneralModel |
||||
from models import GeneralModelState |
||||
import utils.meta |
||||
import aiounittest |
||||
from analytic_unit_manager import AnalyticUnitManager |
||||
from collections import namedtuple |
||||
|
||||
TestData = namedtuple('TestData', ['uid', 'type', 'values', 'segments']) |
||||
|
||||
def get_random_id() -> str: |
||||
return str(id(list())) |
||||
|
||||
class TestDataset(aiounittest.AsyncTestCase): |
||||
|
||||
timestep = 50 #ms |
||||
|
||||
def _fill_task(self, uid, data, task_type, analytic_unit_type, segments=None, cache=None): |
||||
task = { |
||||
'analyticUnitId': uid, |
||||
'type': task_type, |
||||
'payload': { |
||||
'data': data, |
||||
'from': data[0][0], |
||||
'to': data[-1][0], |
||||
'analyticUnitType': analytic_unit_type, |
||||
'detector': 'pattern', |
||||
'cache': cache |
||||
}, |
||||
'_id': get_random_id() |
||||
} |
||||
if segments: task['payload']['segments'] = segments |
||||
|
||||
return task |
||||
|
||||
def _convert_values(self, values) -> list: |
||||
from_t = 0 |
||||
to_t = len(values) * self.timestep |
||||
return list(zip(range(from_t, to_t, self.timestep), values)) |
||||
|
||||
def _index_to_test_time(self, idx) -> int: |
||||
return idx * self.timestep |
||||
|
||||
def _get_learn_task(self, test_data): |
||||
uid, analytic_unit_type, values, segments = test_data |
||||
data = self._convert_values(values) |
||||
segments = [{ |
||||
'analyticUnitId': uid, |
||||
'from': self._index_to_test_time(s[0]), |
||||
'to': self._index_to_test_time(s[1]), |
||||
'labeled': True, |
||||
'deleted': False |
||||
} for s in segments] |
||||
return self._fill_task(uid, data, 'LEARN', analytic_unit_type, segments=segments) |
||||
|
||||
def _get_detect_task(self, test_data, cache): |
||||
uid, analytic_unit_type, values, _ = test_data |
||||
data = self._convert_values(values) |
||||
return self._fill_task(uid, data, 'DETECT', analytic_unit_type, cache=cache) |
||||
|
||||
def _get_test_dataset(self, pattern) -> tuple: |
||||
""" |
||||
pattern name: ([dataset values], [list of segments]) |
||||
|
||||
segment - (begin, end) - indexes in dataset values |
||||
returns dataset in format (data: List[int], segments: List[List[int]]) |
||||
""" |
||||
datasets = { |
||||
'PEAK': ([0, 0, 1, 2, 3, 4, 3, 2, 1, 0, 0], [[2, 8]]), |
||||
'JUMP': ([0, 0, 1, 2, 3, 4, 4, 4], [[1, 6]]), |
||||
'DROP': ([4, 4, 4, 3, 2, 1, 0, 0], [[1, 6]]), |
||||
'TROUGH': ([4, 4, 3, 2, 1, 0, 1, 2, 3, 4, 4], [[1, 9]]), |
||||
'GENERAL': ([0, 0, 1, 2, 3, 4, 3, 2, 1, 0, 0], [[2, 8]]) |
||||
} |
||||
return datasets[pattern] |
||||
|
||||
async def _learn(self, task, manager=None) -> dict: |
||||
if not manager: manager = AnalyticUnitManager() |
||||
result = await manager.handle_analytic_task(task) |
||||
return result['payload']['cache'] |
||||
|
||||
async def _detect(self, task, manager=None) -> dict: |
||||
if not manager: manager = AnalyticUnitManager() |
||||
result = await manager.handle_analytic_task(task) |
||||
return result |
||||
|
||||
async def _test_detect(self, test_data, manager=None): |
||||
learn_task = self._get_learn_task(test_data) |
||||
cache = await self._learn(learn_task, manager) |
||||
detect_task = self._get_detect_task(test_data, cache) |
||||
result = await self._detect(detect_task, manager) |
||||
return result |
||||
|
||||
async def test_unit_manager(self): |
||||
test_data = TestData(get_random_id(), 'PEAK', [0,1,2,5,10,5,2,1,1,1,0,0,0,0], [[1,7]]) |
||||
manager = AnalyticUnitManager() |
||||
|
||||
with_manager = await self._test_detect(test_data, manager) |
||||
without_manager = await self._test_detect(test_data) |
||||
self.assertEqual(with_manager, without_manager) |
||||
|
@ -0,0 +1,43 @@
|
||||
import unittest |
||||
import pandas as pd |
||||
import numpy as np |
||||
import models |
||||
|
||||
class TestModel(unittest.TestCase): |
||||
|
||||
def test_stair_model_get_indexes(self): |
||||
drop_model = models.DropModel() |
||||
jump_model = models.JumpModel() |
||||
drop_data = pd.Series([4, 4, 4, 1, 1, 1, 5, 5, 2, 2, 2]) |
||||
jump_data = pd.Series([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5]) |
||||
jump_data_one_stair = pd.Series([1, 3, 3]) |
||||
drop_data_one_stair = pd.Series([4, 2, 1]) |
||||
height = 2 |
||||
length = 2 |
||||
expected_result = [2, 7] |
||||
drop_model_result = drop_model.get_stair_indexes(drop_data, height, length) |
||||
jump_model_result = jump_model.get_stair_indexes(jump_data, height, length) |
||||
drop_one_stair_result = drop_model.get_stair_indexes(drop_data_one_stair, height, 1) |
||||
jump_one_stair_result = jump_model.get_stair_indexes(jump_data_one_stair, height, 1) |
||||
for val in expected_result: |
||||
self.assertIn(val, drop_model_result) |
||||
self.assertIn(val, jump_model_result) |
||||
self.assertEqual(0, drop_one_stair_result[0]) |
||||
self.assertEqual(0, jump_one_stair_result[0]) |
||||
|
||||
def test_stair_model_get_indexes_corner_cases(self): |
||||
drop_model = models.DropModel() |
||||
jump_model = models.JumpModel() |
||||
empty_data = pd.Series([]) |
||||
nan_data = pd.Series([np.nan, np.nan, np.nan, np.nan]) |
||||
height, length = 2, 2 |
||||
length_zero, height_zero = 0, 0 |
||||
expected_result = [] |
||||
drop_empty_data_result = drop_model.get_stair_indexes(empty_data, height, length) |
||||
drop_nan_data_result = drop_model.get_stair_indexes(nan_data, height_zero, length_zero) |
||||
jump_empty_data_result = jump_model.get_stair_indexes(empty_data, height, length) |
||||
jump_nan_data_result = jump_model.get_stair_indexes(nan_data, height_zero, length_zero) |
||||
self.assertEqual(drop_empty_data_result, expected_result) |
||||
self.assertEqual(drop_nan_data_result, expected_result) |
||||
self.assertEqual(jump_empty_data_result, expected_result) |
||||
self.assertEqual(jump_nan_data_result, expected_result) |
@ -0,0 +1,359 @@
|
||||
from analytic_types.segment import Segment |
||||
|
||||
import utils |
||||
import unittest |
||||
import numpy as np |
||||
import pandas as pd |
||||
import math |
||||
import random |
||||
|
||||
RELATIVE_TOLERANCE = 1e-1 |
||||
|
||||
class TestUtils(unittest.TestCase): |
||||
|
||||
#example test for test's workflow purposes |
||||
def test_segment_parsion(self): |
||||
self.assertTrue(True) |
||||
|
||||
def test_confidence_all_normal_value(self): |
||||
segment = [1, 2, 0, 6, 8, 5, 3] |
||||
utils_result = utils.find_confidence(segment)[0] |
||||
result = 4.0 |
||||
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) |
||||
|
||||
def test_confidence_all_nan_value(self): |
||||
segment = [np.nan, np.nan, np.nan, np.nan] |
||||
self.assertEqual(utils.find_confidence(segment)[0], 0) |
||||
|
||||
def test_confidence_with_nan_value(self): |
||||
data = [np.nan, np.nan, 0, 8] |
||||
utils_result = utils.find_confidence(data)[0] |
||||
result = 4.0 |
||||
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) |
||||
|
||||
def test_interval_all_normal_value(self): |
||||
data = [1, 2, 1, 2, 4, 1, 2, 4, 5, 6] |
||||
data = pd.Series(data) |
||||
center = 4 |
||||
window_size = 2 |
||||
result = [1, 2, 4, 1, 2] |
||||
self.assertEqual(list(utils.get_interval(data, center, window_size)), result) |
||||
|
||||
def test_interval_wrong_ws(self): |
||||
data = [1, 2, 4, 1, 2, 4] |
||||
data = pd.Series(data) |
||||
center = 3 |
||||
window_size = 6 |
||||
result = [1, 2, 4, 1, 2, 4] |
||||
self.assertEqual(list(utils.get_interval(data, center, window_size)), result) |
||||
|
||||
def test_subtract_min_without_nan(self): |
||||
segment = [1, 2, 4, 1, 2, 4] |
||||
segment = pd.Series(segment) |
||||
result = [0, 1, 3, 0, 1, 3] |
||||
utils_result = list(utils.subtract_min_without_nan(segment)) |
||||
self.assertEqual(utils_result, result) |
||||
|
||||
def test_subtract_min_with_nan(self): |
||||
segment = [np.nan, 2, 4, 1, 2, 4] |
||||
segment = pd.Series(segment) |
||||
result = [2, 4, 1, 2, 4] |
||||
utils_result = list(utils.subtract_min_without_nan(segment)[1:]) |
||||
self.assertEqual(utils_result, result) |
||||
|
||||
def test_get_convolve(self): |
||||
data = [1, 2, 3, 2, 2, 0, 2, 3, 4, 3, 2, 1, 1, 2, 3, 4, 3, 2, 0] |
||||
data = pd.Series(data) |
||||
pattern_index = [2, 8, 15] |
||||
window_size = 2 |
||||
av_model = [1, 2, 3, 2, 1] |
||||
result = [] |
||||
self.assertNotEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) |
||||
|
||||
def test_get_convolve_with_nan(self): |
||||
data = [1, 2, 3, 2, np.nan, 0, 2, 3, 4, np.nan, 2, 1, 1, 2, 3, 4, 3, np.nan, 0] |
||||
data = pd.Series(data) |
||||
pattern_index = [2, 8, 15] |
||||
window_size = 2 |
||||
av_model = [1, 2, 3, 2, 1] |
||||
result = utils.get_convolve(pattern_index, av_model, data, window_size) |
||||
for val in result: |
||||
self.assertFalse(np.isnan(val)) |
||||
|
||||
def test_get_convolve_empty_data(self): |
||||
data = [] |
||||
pattern_index = [] |
||||
window_size = 2 |
||||
window_size_zero = 0 |
||||
av_model = [] |
||||
result = [] |
||||
self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size), result) |
||||
self.assertEqual(utils.get_convolve(pattern_index, av_model, data, window_size_zero), result) |
||||
|
||||
def test_find_jump_parameters_center(self): |
||||
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] |
||||
segment = pd.Series(segment) |
||||
jump_center = [10, 11] |
||||
self.assertIn(utils.find_pattern_center(segment, 0, 'jump'), jump_center) |
||||
|
||||
def test_find_jump_parameters_height(self): |
||||
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] |
||||
segment = pd.Series(segment) |
||||
jump_height = [3.5, 4] |
||||
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[0]) |
||||
self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[1]) |
||||
|
||||
def test_find_jump_parameters_length(self): |
||||
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] |
||||
segment = pd.Series(segment) |
||||
jump_length = 2 |
||||
self.assertEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_length) |
||||
|
||||
def test_find_drop_parameters_center(self): |
||||
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
||||
segment = pd.Series(segment) |
||||
drop_center = [14, 15, 16] |
||||
self.assertIn(utils.find_pattern_center(segment, 0, 'drop'), drop_center) |
||||
|
||||
def test_find_drop_parameters_height(self): |
||||
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
||||
segment = pd.Series(segment) |
||||
drop_height = [3.5, 4] |
||||
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[0]) |
||||
self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[1]) |
||||
|
||||
def test_find_drop_parameters_length(self): |
||||
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
||||
segment = pd.Series(segment) |
||||
drop_length = 2 |
||||
self.assertEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_length) |
||||
|
||||
def test_get_av_model_empty_data(self): |
||||
patterns_list = [] |
||||
result = [] |
||||
self.assertEqual(utils.get_av_model(patterns_list), result) |
||||
|
||||
def test_get_av_model_normal_data(self): |
||||
patterns_list = [[1, 1, 1], [2, 2, 2],[3,3,3]] |
||||
result = [2.0, 2.0, 2.0] |
||||
self.assertEqual(utils.get_av_model(patterns_list), result) |
||||
|
||||
def test_get_distribution_density(self): |
||||
segment = [1, 1, 1, 3, 5, 5, 5] |
||||
segment = pd.Series(segment) |
||||
result = (3, 5, 1) |
||||
self.assertEqual(utils.get_distribution_density(segment), result) |
||||
|
||||
def test_get_distribution_density_right(self): |
||||
data = [1.0, 5.0, 5.0, 4.0] |
||||
data = pd.Series(data) |
||||
median = 3.0 |
||||
max_line = 5.0 |
||||
min_line = 1.0 |
||||
utils_result = utils.get_distribution_density(data) |
||||
self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) |
||||
self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) |
||||
self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) |
||||
|
||||
def test_get_distribution_density_left(self): |
||||
data = [1.0, 1.0, 2.0, 1.0, 5.0] |
||||
data = pd.Series(data) |
||||
median = 3.0 |
||||
max_line = 5.0 |
||||
min_line = 1.0 |
||||
utils_result = utils.get_distribution_density(data) |
||||
self.assertTrue(math.isclose(utils_result[0], median, rel_tol = RELATIVE_TOLERANCE)) |
||||
self.assertTrue(math.isclose(utils_result[1], max_line, rel_tol = RELATIVE_TOLERANCE)) |
||||
self.assertTrue(math.isclose(utils_result[2], min_line, rel_tol = RELATIVE_TOLERANCE)) |
||||
|
||||
def test_get_distribution_density_short_data(self): |
||||
data = [1.0, 5.0] |
||||
data = pd.Series(data) |
||||
segment = [1.0] |
||||
segment = pd.Series(segment) |
||||
utils_result_data = utils.get_distribution_density(data) |
||||
utils_result_segment = utils.get_distribution_density(segment) |
||||
self.assertEqual(len(utils_result_data), 3) |
||||
self.assertEqual(utils_result_segment, (0, 0, 0)) |
||||
|
||||
def test_get_distribution_density_with_nans(self): |
||||
segment = [np.NaN, 1, 1, 1, np.NaN, 3, 5, 5, 5, np.NaN] |
||||
segment = pd.Series(segment) |
||||
result = (3, 5, 1) |
||||
self.assertEqual(utils.get_distribution_density(segment), result) |
||||
|
||||
def test_find_pattern_jump_center(self): |
||||
data = [1.0, 1.0, 1.0, 5.0, 5.0, 5.0] |
||||
data = pd.Series(data) |
||||
median = 3.0 |
||||
result = 3 |
||||
self.assertEqual(result, utils.find_pattern_center(data, 0, 'jump')) |
||||
|
||||
def test_get_convolve_wrong_index(self): |
||||
data = [1.0, 5.0, 2.0, 1.0, 6.0, 2.0] |
||||
data = pd.Series(data) |
||||
segemnts = [1, 11] |
||||
av_model = [0.0, 4.0, 0.0] |
||||
window_size = 1 |
||||
try: |
||||
utils.get_convolve(segemnts, av_model, data, window_size) |
||||
except ValueError: |
||||
self.fail('Method get_convolve raised unexpectedly') |
||||
|
||||
def test_get_av_model_for_different_length(self): |
||||
patterns_list = [[1.0, 1.0, 2.0], [4.0, 4.0], [2.0, 2.0, 2.0], [3.0, 3.0], []] |
||||
try: |
||||
utils.get_av_model(patterns_list) |
||||
except ValueError: |
||||
self.fail('Method get_convolve raised unexpectedly') |
||||
|
||||
def test_find_nan_indexes(self): |
||||
data = [1, 1, 1, 0, 0, np.nan, None, []] |
||||
data = pd.Series(data) |
||||
result = [5, 6] |
||||
self.assertEqual(utils.find_nan_indexes(data), result) |
||||
|
||||
def test_find_nan_indexes_normal_values(self): |
||||
data = [1, 1, 1, 0, 0, 0, 1, 1] |
||||
data = pd.Series(data) |
||||
result = [] |
||||
self.assertEqual(utils.find_nan_indexes(data), result) |
||||
|
||||
def test_find_nan_indexes_empty_values(self): |
||||
data = [] |
||||
result = [] |
||||
self.assertEqual(utils.find_nan_indexes(data), result) |
||||
|
||||
def test_create_correlation_data(self): |
||||
data = [random.randint(10, 999) for _ in range(10000)] |
||||
data = pd.Series(data) |
||||
pattern_model = [100, 200, 500, 300, 100] |
||||
ws = 2 |
||||
result = 6000 |
||||
corr_data = utils.get_correlation_gen(data, ws, pattern_model) |
||||
corr_data = list(corr_data) |
||||
self.assertGreaterEqual(len(corr_data), result) |
||||
|
||||
def test_inverse_segment(self): |
||||
data = pd.Series([1,2,3,4,3,2,1]) |
||||
result = pd.Series([3,2,1,0,1,2,3]) |
||||
utils_result = utils.inverse_segment(data) |
||||
for ind, val in enumerate(utils_result): |
||||
self.assertEqual(val, result[ind]) |
||||
|
||||
def test_get_end_of_segment_equal(self): |
||||
data = pd.Series([5,4,3,2,1,0,0,0]) |
||||
result_list = [4, 5, 6] |
||||
self.assertIn(utils.get_end_of_segment(data, False), result_list) |
||||
|
||||
def test_get_end_of_segment_greater(self): |
||||
data = pd.Series([5,4,3,2,1,0,1,2,3]) |
||||
result_list = [4, 5, 6] |
||||
self.assertIn(utils.get_end_of_segment(data, False), result_list) |
||||
|
||||
def test_get_borders_of_peaks(self): |
||||
data = pd.Series([1,0,1,2,3,2,1,0,0,1,2,3,4,3,2,2,1,0,1,2,3,4,5,3,2,1,0]) |
||||
pattern_center = [4, 12, 22] |
||||
ws = 3 |
||||
confidence = 1.5 |
||||
result = [(1, 7), (9, 15), (19, 25)] |
||||
self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence), result) |
||||
|
||||
def test_get_borders_of_peaks_for_trough(self): |
||||
data = pd.Series([4,4,5,5,3,1,3,5,5,6,3,2]) |
||||
pattern_center = [5] |
||||
ws = 5 |
||||
confidence = 3 |
||||
result = [(3, 7)] |
||||
self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence, inverse = True), result) |
||||
|
||||
def test_get_start_and_end_of_segments(self): |
||||
segments = [[1, 2, 3, 4], [5, 6, 7], [8], [], [12, 12]] |
||||
result = [[1, 4], [5, 7], [8, 8], [12, 12]] |
||||
utils_result = utils.get_start_and_end_of_segments(segments) |
||||
for got, expected in zip(utils_result, result): |
||||
self.assertEqual(got, expected) |
||||
|
||||
def test_get_start_and_end_of_segments_empty(self): |
||||
segments = [] |
||||
result = [] |
||||
utils_result = utils.get_start_and_end_of_segments(segments) |
||||
self.assertEqual(result, utils_result) |
||||
|
||||
def test_merge_intersecting_segments(self): |
||||
test_cases = [ |
||||
{ |
||||
'index': [Segment(10, 20), Segment(30, 40)], |
||||
'result': [[10, 20], [30, 40]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(10, 20), Segment(13, 23), Segment(15, 17), Segment(20, 40)], |
||||
'result': [[10, 40]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [], |
||||
'result': [], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(10, 20)], |
||||
'result': [[10, 20]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(10, 20), Segment(13, 23), Segment(25, 30), Segment(35, 40)], |
||||
'result': [[10, 23], [25, 30], [35, 40]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(10, 50), Segment(5, 40), Segment(15, 25), Segment(6, 50)], |
||||
'result': [[5, 50]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(5, 10), Segment(10, 20), Segment(25, 50)], |
||||
'result': [[5, 20], [25, 50]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(20, 40), Segment(10, 15), Segment(50, 60)], |
||||
'result': [[10, 15], [20, 40], [50, 60]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(20, 40), Segment(10, 20), Segment(50, 60)], |
||||
'result': [[10, 40], [50, 60]], |
||||
'step': 0, |
||||
}, |
||||
{ |
||||
'index': [Segment(10, 10), Segment(20, 20), Segment(30, 30)], |
||||
'result': [[10, 30]], |
||||
'step': 10, |
||||
}, |
||||
] |
||||
|
||||
for case in test_cases: |
||||
utils_result = utils.merge_intersecting_segments(case['index'], case['step']) |
||||
for got, expected in zip(utils_result, case['result']): |
||||
self.assertEqual(got.from_timestamp, expected[0]) |
||||
self.assertEqual(got.to_timestamp, expected[1]) |
||||
|
||||
def test_serialize(self): |
||||
segment_list = [Segment(100,200)] |
||||
serialize_list = utils.meta.SerializableList(segment_list) |
||||
meta_result = utils.meta.serialize(serialize_list) |
||||
expected_result = [{ 'from': 100, 'to': 200 }] |
||||
self.assertEqual(meta_result, expected_result) |
||||
|
||||
def test_remove_duplicates_and_sort(self): |
||||
a1 = [1, 3, 5] |
||||
a2 = [8, 3, 6] |
||||
expected_result = [1, 3, 5, 6, 8] |
||||
utils_result = utils.remove_duplicates_and_sort(a1+a2) |
||||
self.assertEqual(utils_result, expected_result) |
||||
self.assertEqual([], []) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,43 @@
|
||||
import unittest |
||||
from utils import get_intersected_chunks, get_chunks |
||||
import pandas as pd |
||||
|
||||
|
||||
class TestUtils(unittest.TestCase): |
||||
|
||||
def test_chunks_generator(self): |
||||
intersection = 2 |
||||
chunk_size = 4 |
||||
|
||||
cases = [ |
||||
(list(range(8)), [[0,1,2,3], [2,3,4,5], [4,5,6,7]]), |
||||
([], [[]]), |
||||
(list(range(1)), [[0]]), |
||||
(list(range(4)), [[0,1,2,3]]), |
||||
(list(range(9)), [[0,1,2,3], [2,3,4,5], [4,5,6,7], [6,7,8]]) |
||||
] |
||||
|
||||
for tested, expected in cases: |
||||
tested_chunks = get_intersected_chunks(tested, intersection, chunk_size) |
||||
self.assertSequenceEqual(tuple(tested_chunks), expected) |
||||
|
||||
|
||||
def test_non_intersected_chunks(self): |
||||
chunk_size = 4 |
||||
|
||||
cases = [ |
||||
(tuple(range(12)), [[0,1,2,3], [4,5,6,7], [8,9,10,11]]), |
||||
(tuple(range(9)), [[0,1,2,3], [4,5,6,7], [8]]), |
||||
(tuple(range(10)), [[0,1,2,3], [4,5,6,7], [8,9]]), |
||||
(tuple(range(11)), [[0,1,2,3], [4,5,6,7], [8,9,10]]), |
||||
([], []), |
||||
(tuple(range(1)), [[0]]), |
||||
(tuple(range(4)), [[0,1,2,3]]) |
||||
] |
||||
|
||||
for tested, expected in cases: |
||||
tested_chunks = list(get_chunks(tested, chunk_size)) |
||||
self.assertSequenceEqual(tested_chunks, expected) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,122 @@
|
||||
import sys |
||||
ANALYTICS_PATH = '../analytics' |
||||
TESTS_PATH = '../tests' |
||||
sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) |
||||
|
||||
import pandas as pd |
||||
import numpy as np |
||||
import utils |
||||
import test_dataset |
||||
from analytic_types.segment import Segment |
||||
from detectors import pattern_detector, threshold_detector, anomaly_detector |
||||
|
||||
# TODO: get_dataset |
||||
# TODO: get_segment |
||||
PEAK_DATASETS = [] |
||||
# dataset with 3 peaks |
||||
TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) |
||||
# TODO: more convenient way to specify labeled segments |
||||
POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}] |
||||
NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}] |
||||
|
||||
class TesterSegment(): |
||||
|
||||
def __init__(self, start: int, end: int, labeled: bool): |
||||
self.start = start |
||||
self.end = end |
||||
self.labeled = labeled |
||||
|
||||
def get_segment(self): |
||||
return { |
||||
'_id': 'q', |
||||
'analyticUnitId': 'q', |
||||
'from': self.start, |
||||
'to': self.end, |
||||
'labeled': self.labeled, |
||||
'deleted': not self.labeled |
||||
} |
||||
|
||||
class Metric(): |
||||
|
||||
def __init__(self, expected_result, detector_result): |
||||
self.expected_result = expected_result |
||||
self.detector_result = detector_result['segments'] |
||||
|
||||
def get_amount(self): |
||||
return len(self.detector_result) / len(self.expected_result) |
||||
|
||||
def get_accuracy(self): |
||||
correct_segment = 0 |
||||
invalid_segment = 0 |
||||
for segment in self.detector_result: |
||||
current_cs = correct_segment |
||||
for pattern in self.expected_result: |
||||
if pattern['from'] <= segment['from'] and pattern['to'] >= segment['to']: |
||||
correct_segment += 1 |
||||
break |
||||
if correct_segment == current_cs: |
||||
invalid_segment += 1 |
||||
non_detected = len(self.expected_result) - correct_segment |
||||
return (correct_segment, invalid_segment, non_detected) |
||||
|
||||
class ModelData(): |
||||
|
||||
def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str): |
||||
self.frame = frame |
||||
self.positive_segments = positive_segments |
||||
self.negative_segments = negative_segments |
||||
self.model_type = model_type |
||||
|
||||
def get_segments_for_detection(self, positive_amount, negative_amount): |
||||
segments = [] |
||||
for idx, bounds in enumerate(self.positive_segments): |
||||
if idx >= positive_amount: |
||||
break |
||||
segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment()) |
||||
|
||||
for idx, bounds in enumerate(self.negative_segments): |
||||
if idx >= negative_amount: |
||||
break |
||||
segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment()) |
||||
|
||||
return segments |
||||
|
||||
def get_all_correct_segments(self): |
||||
return self.positive_segments |
||||
|
||||
PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') |
||||
PEAK_DATASETS.append(PEAK_DATA_1) |
||||
|
||||
def main(model_type: str) -> None: |
||||
table_metric = [] |
||||
if model_type == 'peak': |
||||
for data in PEAK_DATASETS: |
||||
dataset = data.frame |
||||
segments = data.get_segments_for_detection(1, 0) |
||||
segments = [Segment.from_json(segment) for segment in segments] |
||||
detector = pattern_detector.PatternDetector('PEAK', 'test_id') |
||||
training_result = detector.train(dataset, segments, {}) |
||||
cache = training_result['cache'] |
||||
detect_result = detector.detect(dataset, cache) |
||||
detect_result = detect_result.to_json() |
||||
peak_metric = Metric(data.get_all_correct_segments(), detect_result) |
||||
table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) |
||||
return table_metric |
||||
|
||||
if __name__ == '__main__': |
||||
''' |
||||
This tool applies the model on datasets and verifies that the detection result corresponds to the correct values. |
||||
sys.argv[1] expects one of the models name -> see correct_name |
||||
''' |
||||
# TODO: use enum |
||||
correct_name = ['peak', 'trough', 'jump', 'drop', 'general'] |
||||
if len(sys.argv) < 2: |
||||
print('Enter one of models name: {}'.format(correct_name)) |
||||
sys.exit(1) |
||||
model_type = str(sys.argv[1]).lower() |
||||
if model_type in correct_name: |
||||
print(main(model_type)) |
||||
else: |
||||
print('Enter one of models name: {}'.format(correct_name)) |
||||
|
||||
|
@ -0,0 +1,104 @@
|
||||
import zmq |
||||
import zmq.asyncio |
||||
import asyncio |
||||
import json |
||||
from uuid import uuid4 |
||||
|
||||
context = zmq.asyncio.Context() |
||||
socket = context.socket(zmq.PAIR) |
||||
socket.connect('tcp://0.0.0.0:8002') |
||||
|
||||
def create_message(): |
||||
message = { |
||||
"method": "DATA", |
||||
"payload": { |
||||
"_id": uuid4().hex, |
||||
"analyticUnitId": uuid4().hex, |
||||
"type": "PUSH", |
||||
"payload": { |
||||
"data": [ |
||||
[ |
||||
1552652025000, |
||||
12.499999999999998 |
||||
], |
||||
[ |
||||
1552652040000, |
||||
12.500000000000002 |
||||
], |
||||
[ |
||||
1552652055000, |
||||
12.499999999999996 |
||||
], |
||||
[ |
||||
1552652070000, |
||||
12.500000000000002 |
||||
], |
||||
[ |
||||
1552652085000, |
||||
12.499999999999998 |
||||
], |
||||
[ |
||||
1552652100000, |
||||
12.5 |
||||
], |
||||
[ |
||||
1552652115000, |
||||
12.83261113785909 |
||||
] |
||||
], |
||||
"from": 1552652025001, |
||||
"to": 1552652125541, |
||||
"analyticUnitType": "GENERAL", |
||||
"detector": "pattern", |
||||
"cache": { |
||||
"pattern_center": [ |
||||
693 |
||||
], |
||||
"pattern_model": [ |
||||
1.7763568394002505e-15, |
||||
5.329070518200751e-15, |
||||
1.7763568394002505e-15, |
||||
1.7763568394002505e-15, |
||||
1.7763568394002505e-15, |
||||
3.552713678800501e-15, |
||||
1.7763568394002505e-15, |
||||
3.552713678800501e-15, |
||||
3.552713678800501e-15, |
||||
1.7763568394002505e-15, |
||||
1.7763568394002505e-15, |
||||
0, |
||||
1.7763568394002505e-15, |
||||
1.7763568394002505e-15, |
||||
0 |
||||
], |
||||
"convolve_max": 7.573064690121713e-29, |
||||
"convolve_min": 7.573064690121713e-29, |
||||
"WINDOW_SIZE": 7, |
||||
"conv_del_min": 7, |
||||
"conv_del_max": 7 |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return json.dumps(message) |
||||
|
||||
async def handle_loop(): |
||||
while True: |
||||
received_bytes = await socket.recv() |
||||
text = received_bytes.decode('utf-8') |
||||
|
||||
print(text) |
||||
|
||||
async def send_detect(): |
||||
data = create_message().encode('utf-8') |
||||
await socket.send(data) |
||||
|
||||
if __name__ == "__main__": |
||||
loop = asyncio.get_event_loop() |
||||
socket.send(b'PING') |
||||
detects = [send_detect() for i in range(100)] |
||||
detects_group = asyncio.gather(*detects) |
||||
handle_group = asyncio.gather(handle_loop()) |
||||
common_group = asyncio.gather(handle_group, detects_group) |
||||
loop.run_until_complete(common_group) |
Loading…
Reference in new issue