Browse Source

Wrong time in threshold segments #403 (#405)

* Add convert_pd_timestamp_to_ms function to utils

* Use datapoint time in segment if it is available
pull/1/head
rozetko 6 years ago committed by GitHub
parent
commit
aa943595fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      analytics/analytics/detectors/pattern_detector.py
  2. 51
      analytics/analytics/detectors/threshold_detector.py
  3. 5
      analytics/analytics/models/model.py
  4. 1
      analytics/analytics/utils/__init__.py
  5. 2
      analytics/analytics/utils/common.py
  6. 8
      analytics/analytics/utils/time.py

4
analytics/analytics/detectors/pattern_detector.py

@ -9,6 +9,7 @@ from typing import Optional
from detectors import Detector
from buckets import DataBucket
from models import ModelCache
from utils import convert_pd_timestamp_to_ms
logger = logging.getLogger('PATTERN_DETECTOR')
@ -56,8 +57,7 @@ class PatternDetector(Detector):
newCache = detected['cache']
last_dataframe_time = dataframe.iloc[-1]['timestamp']
# TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6
last_detection_time = last_dataframe_time.value / 1000000
last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time)
return {
'cache': newCache,
'segments': segments,

51
analytics/analytics/detectors/threshold_detector.py

@ -6,7 +6,7 @@ from typing import Optional
from detectors import Detector
from models import ModelCache
from time import time
from utils import convert_sec_to_ms
from utils import convert_sec_to_ms, convert_pd_timestamp_to_ms
logger = log.getLogger('THRESHOLD_DETECTOR')
@ -25,44 +25,43 @@ class ThresholdDetector(Detector):
}
}
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> dict:
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict:
if cache == None:
raise 'Threshold detector error: cannot detect before learning'
value = cache['value']
condition = cache['condition']
now = convert_sec_to_ms(time())
segment = ({'from': now, 'to': now})
segments = []
dataframe_without_nans = dataframe.dropna()
if len(dataframe_without_nans) == 0:
if condition == 'NO_DATA':
segments.append(segment)
return {
'cache': cache,
'segments': segments,
'lastDetectionTime': now
}
segments.append({ 'from': now, 'to': now })
else:
return None
else:
last_entry = dataframe_without_nans.iloc[-1]
last_time = convert_pd_timestamp_to_ms(last_entry['timestamp'])
last_value = last_entry['value']
segment = { 'from': last_time, 'to': last_time }
last_entry = dataframe_without_nans.iloc[-1]
last_value = last_entry['value']
if condition == '>':
if last_value > value:
segments.append(segment)
elif condition == '>=':
if last_value >= value:
segments.append(segment)
elif condition == '=':
if last_value == value:
segments.append(segment)
elif condition == '<=':
if last_value <= value:
segments.append(segment)
elif condition == '<':
if last_value < value:
segments.append(segment)
if condition == '>':
if last_value > value:
segments.append(segment)
elif condition == '>=':
if last_value >= value:
segments.append(segment)
elif condition == '=':
if last_value == value:
segments.append(segment)
elif condition == '<=':
if last_value <= value:
segments.append(segment)
elif condition == '<':
if last_value < value:
segments.append(segment)
return {
'cache': cache,
'segments': segments,

5
analytics/analytics/models/model.py

@ -86,10 +86,9 @@ class Model(ABC):
self.state = cache
result = self.do_detect(dataframe)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
segments = [(
dataframe['timestamp'][x - 1].value / 1000000,
dataframe['timestamp'][x + 1].value / 1000000
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x - 1]),
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x + 1])
) for x in result]
return {

1
analytics/analytics/utils/__init__.py

@ -1,2 +1,3 @@
from utils.common import *
from utils.segments import *
from utils.time import *

2
analytics/analytics/utils/common.py

@ -326,5 +326,3 @@ def cut_dataframe(data: pd.DataFrame) -> pd.DataFrame:
def get_min_max(array, default):
return float(min(array, default=default)), float(max(array, default=default))
def convert_sec_to_ms(sec):
return int(sec) * 1000

8
analytics/analytics/utils/time.py

@ -0,0 +1,8 @@
import pandas as pd
def convert_sec_to_ms(sec) -> int:
return int(sec) * 1000
def convert_pd_timestamp_to_ms(timestamp: pd.Timestamp) -> int:
# TODO: convert from nanoseconds to millisecond in a better way: not by dividing by 10^6
return int(timestamp.value) / 1000000
Loading…
Cancel
Save