From aa943595fc9b42b1b9da6e1e9f2c5320f37d75ba Mon Sep 17 00:00:00 2001 From: rozetko Date: Fri, 8 Feb 2019 19:39:20 +0300 Subject: [PATCH] Wrong time in threshold segments #403 (#405) * Add convert_pd_timestamp_to_ms function to utils * Use datapoint time in segment if it is available --- .../analytics/detectors/pattern_detector.py | 4 +- .../analytics/detectors/threshold_detector.py | 51 +++++++++---------- analytics/analytics/models/model.py | 5 +- analytics/analytics/utils/__init__.py | 1 + analytics/analytics/utils/common.py | 2 - analytics/analytics/utils/time.py | 8 +++ 6 files changed, 38 insertions(+), 33 deletions(-) create mode 100644 analytics/analytics/utils/time.py diff --git a/analytics/analytics/detectors/pattern_detector.py b/analytics/analytics/detectors/pattern_detector.py index 547690e..84f5290 100644 --- a/analytics/analytics/detectors/pattern_detector.py +++ b/analytics/analytics/detectors/pattern_detector.py @@ -9,6 +9,7 @@ from typing import Optional from detectors import Detector from buckets import DataBucket from models import ModelCache +from utils import convert_pd_timestamp_to_ms logger = logging.getLogger('PATTERN_DETECTOR') @@ -56,8 +57,7 @@ class PatternDetector(Detector): newCache = detected['cache'] last_dataframe_time = dataframe.iloc[-1]['timestamp'] - # TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6 - last_detection_time = last_dataframe_time.value / 1000000 + last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) return { 'cache': newCache, 'segments': segments, diff --git a/analytics/analytics/detectors/threshold_detector.py b/analytics/analytics/detectors/threshold_detector.py index fc733e7..8f552c1 100644 --- a/analytics/analytics/detectors/threshold_detector.py +++ b/analytics/analytics/detectors/threshold_detector.py @@ -6,7 +6,7 @@ from typing import Optional from detectors import Detector from models import ModelCache from time import time -from utils import convert_sec_to_ms +from utils import convert_sec_to_ms, convert_pd_timestamp_to_ms logger = log.getLogger('THRESHOLD_DETECTOR') @@ -25,44 +25,43 @@ class ThresholdDetector(Detector): } } - def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict: + def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict: + if cache == None: + raise 'Threshold detector error: cannot detect before learning' value = cache['value'] condition = cache['condition'] now = convert_sec_to_ms(time()) - segment = ({'from': now, 'to': now}) segments = [] dataframe_without_nans = dataframe.dropna() if len(dataframe_without_nans) == 0: if condition == 'NO_DATA': - segments.append(segment) - return { - 'cache': cache, - 'segments': segments, - 'lastDetectionTime': now - } + segments.append({ 'from': now, 'to': now }) else: return None + else: + last_entry = dataframe_without_nans.iloc[-1] + last_time = convert_pd_timestamp_to_ms(last_entry['timestamp']) + last_value = last_entry['value'] + segment = { 'from': last_time, 'to': last_time } - last_entry = dataframe_without_nans.iloc[-1] - last_value = last_entry['value'] + if condition == '>': + if last_value > value: + segments.append(segment) + elif condition == '>=': + if last_value >= value: + segments.append(segment) + elif condition == '=': + if last_value == value: + segments.append(segment) + elif condition == '<=': + if last_value <= value: + segments.append(segment) + elif condition == '<': + if last_value < value: + segments.append(segment) - if condition == '>': - if last_value > value: - segments.append(segment) - elif condition == '>=': - if last_value >= value: - segments.append(segment) - elif condition == '=': - if last_value == value: - segments.append(segment) - elif condition == '<=': - if last_value <= value: - segments.append(segment) - elif condition == '<': - if last_value < value: - segments.append(segment) return { 'cache': cache, 'segments': segments, diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index 246268d..b6948c0 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -86,10 +86,9 @@ class Model(ABC): self.state = cache result = self.do_detect(dataframe) - # TODO: convert from ns to ms more proper way (not dividing by 10^6) segments = [( - dataframe['timestamp'][x - 1].value / 1000000, - dataframe['timestamp'][x + 1].value / 1000000 + utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x - 1]), + utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x + 1]) ) for x in result] return { diff --git a/analytics/analytics/utils/__init__.py b/analytics/analytics/utils/__init__.py index d7ce3ce..afdd013 100644 --- a/analytics/analytics/utils/__init__.py +++ b/analytics/analytics/utils/__init__.py @@ -1,2 +1,3 @@ from utils.common import * from utils.segments import * +from utils.time import * diff --git a/analytics/analytics/utils/common.py b/analytics/analytics/utils/common.py index 98a0614..1f3302d 100644 --- a/analytics/analytics/utils/common.py +++ b/analytics/analytics/utils/common.py @@ -326,5 +326,3 @@ def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame: def get_min_max(array, default): return float(min(array, default=default)), float(max(array, default=default)) -def convert_sec_to_ms(sec): - return int(sec) * 1000 diff --git a/analytics/analytics/utils/time.py b/analytics/analytics/utils/time.py new file mode 100644 index 0000000..958685a --- /dev/null +++ b/analytics/analytics/utils/time.py @@ -0,0 +1,8 @@ +import pandas as pd + +def convert_sec_to_ms(sec) -> int: + return int(sec) * 1000 + +def convert_pd_timestamp_to_ms(timestamp: pd.Timestamp) -> int: + # TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6 + return int(timestamp.value) / 1000000