Browse Source

Wrong time in threshold segments #403 (#405)

* Add convert_pd_timestamp_to_ms function to utils

* Use datapoint time in segment if it is available
pull/1/head
rozetko 6 years ago committed by GitHub
parent
commit
aa943595fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      analytics/analytics/detectors/pattern_detector.py
  2. 19
      analytics/analytics/detectors/threshold_detector.py
  3. 5
      analytics/analytics/models/model.py
  4. 1
      analytics/analytics/utils/__init__.py
  5. 2
      analytics/analytics/utils/common.py
  6. 8
      analytics/analytics/utils/time.py

4
analytics/analytics/detectors/pattern_detector.py

@ -9,6 +9,7 @@ from typing import Optional
from detectors import Detector from detectors import Detector
from buckets import DataBucket from buckets import DataBucket
from models import ModelCache from models import ModelCache
from utils import convert_pd_timestamp_to_ms
logger = logging.getLogger('PATTERN_DETECTOR') logger = logging.getLogger('PATTERN_DETECTOR')
@ -56,8 +57,7 @@ class PatternDetector(Detector):
newCache = detected['cache'] newCache = detected['cache']
last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_dataframe_time = dataframe.iloc[-1]['timestamp']
# TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6 last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time)
last_detection_time = last_dataframe_time.value / 1000000
return { return {
'cache': newCache, 'cache': newCache,
'segments': segments, 'segments': segments,

19
analytics/analytics/detectors/threshold_detector.py

@ -6,7 +6,7 @@ from typing import Optional
from detectors import Detector from detectors import Detector
from models import ModelCache from models import ModelCache
from time import time from time import time
from utils import convert_sec_to_ms from utils import convert_sec_to_ms, convert_pd_timestamp_to_ms
logger = log.getLogger('THRESHOLD_DETECTOR') logger = log.getLogger('THRESHOLD_DETECTOR')
@ -25,28 +25,26 @@ class ThresholdDetector(Detector):
} }
} }
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict: def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict:
if cache == None:
raise 'Threshold detector error: cannot detect before learning'
value = cache['value'] value = cache['value']
condition = cache['condition'] condition = cache['condition']
now = convert_sec_to_ms(time()) now = convert_sec_to_ms(time())
segment = ({'from': now, 'to': now})
segments = [] segments = []
dataframe_without_nans = dataframe.dropna() dataframe_without_nans = dataframe.dropna()
if len(dataframe_without_nans) == 0: if len(dataframe_without_nans) == 0:
if condition == 'NO_DATA': if condition == 'NO_DATA':
segments.append(segment) segments.append({ 'from': now, 'to': now })
return {
'cache': cache,
'segments': segments,
'lastDetectionTime': now
}
else: else:
return None return None
else:
last_entry = dataframe_without_nans.iloc[-1] last_entry = dataframe_without_nans.iloc[-1]
last_time = convert_pd_timestamp_to_ms(last_entry['timestamp'])
last_value = last_entry['value'] last_value = last_entry['value']
segment = { 'from': last_time, 'to': last_time }
if condition == '>': if condition == '>':
if last_value > value: if last_value > value:
@ -63,6 +61,7 @@ class ThresholdDetector(Detector):
elif condition == '<': elif condition == '<':
if last_value < value: if last_value < value:
segments.append(segment) segments.append(segment)
return { return {
'cache': cache, 'cache': cache,
'segments': segments, 'segments': segments,

5
analytics/analytics/models/model.py

@ -86,10 +86,9 @@ class Model(ABC):
self.state = cache self.state = cache
result = self.do_detect(dataframe) result = self.do_detect(dataframe)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
segments = [( segments = [(
dataframe['timestamp'][x - 1].value / 1000000, utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x - 1]),
dataframe['timestamp'][x + 1].value / 1000000 utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x + 1])
) for x in result] ) for x in result]
return { return {

1
analytics/analytics/utils/__init__.py

@ -1,2 +1,3 @@
from utils.common import * from utils.common import *
from utils.segments import * from utils.segments import *
from utils.time import *

2
analytics/analytics/utils/common.py

@ -326,5 +326,3 @@ def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame:
def get_min_max(array, default): def get_min_max(array, default):
return float(min(array, default=default)), float(max(array, default=default)) return float(min(array, default=default)), float(max(array, default=default))
def convert_sec_to_ms(sec):
return int(sec) * 1000

8
analytics/analytics/utils/time.py

@ -0,0 +1,8 @@
import pandas as pd
def convert_sec_to_ms(sec) -> int:
return int(sec) * 1000
def convert_pd_timestamp_to_ms(timestamp: pd.Timestamp) -> int:
# TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6
return int(timestamp.value) / 1000000
Loading…
Cancel
Save