reafactor common functions from detectors to utils folder

7 years ago · 7527faf068
6 changed files with 81 additions and 98 deletions
--- a/analytics/detectors/general_detector.py
+++ b/analytics/detectors/general_detector.py
@ -1,3 +1,4 @@
 import utils
 from grafana_data_provider import GrafanaDataProvider
 from data_preprocessor import data_preprocessor
 import pandas as pd
@ -7,18 +8,12 @@ import config
 import os.path
 import json
 NANOSECONDS_IN_MS = 1000000
 logger = logging.getLogger('analytic_toolset')
 def anomalies_to_timestamp(anomalies):
    for anomaly in anomalies:
        anomaly['start'] = int(anomaly['start'].timestamp() * 1000)
        anomaly['finish'] = int(anomaly['finish'].timestamp() * 1000)
    return anomalies
 class GeneralDetector:
    def __init__(self, anomaly_name):
@ -46,37 +41,27 @@ class GeneralDetector:
        self.__load_model()
-    def anomalies_box(self, anomalies):
+    async def learn(self, segments):
        max_time = 0
        min_time = float("inf")
        for anomaly in anomalies:
            max_time = max(max_time, anomaly['finish'])
            min_time = min(min_time, anomaly['start'])
        min_time = pd.to_datetime(min_time, unit='ms')
        max_time = pd.to_datetime(max_time, unit='ms')
        return min_time, max_time
    async def learn(self, anomalies):
        logger.info("Start to learn for anomaly_name='%s'" % self.anomaly_name)
        confidence = 0.02
        dataframe = self.data_prov.get_dataframe()
        start_index, stop_index = 0, len(dataframe)
-        if len(anomalies) > 0:
+        if len(segments) > 0:
            confidence = 0.0
-            min_time, max_time = self.anomalies_box(anomalies)
+            min_time, max_time = utils.segments_box(segments)
            dataframe = dataframe[dataframe['timestamp'] <= max_time]
            dataframe = dataframe[dataframe['timestamp'] >= min_time]
        train_augmented = self.preprocessor.get_augmented_data(
            dataframe.index[0],
            dataframe.index[-1],
-            anomalies
+            segments
        )
        self.model = self.create_algorithm()
        await self.model.fit(train_augmented, confidence)
-        if len(anomalies) > 0:
+        if len(segments) > 0:
            last_dataframe_time = dataframe.iloc[-1]['timestamp']
            last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
        else:
@ -112,7 +97,7 @@ class GeneralDetector:
            last_row = self.data_prov.get_data_range(stop_index - 1, stop_index)
            last_dataframe_time = last_row.iloc[0]['timestamp']
-            predicted_anomalies = anomalies_to_timestamp(predicted_anomalies)
+            predicted_anomalies = utils.anomalies_to_timestamp(predicted_anomalies)
            last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
        logger.info("Predicting is finished for anomaly type='%s'" % self.anomaly_name)
--- a/analytics/detectors/jump_detector.py
+++ b/analytics/detectors/jump_detector.py
@ -1,3 +1,4 @@
 import utils
 import numpy as np
 import pickle
 import scipy.signal
@ -6,20 +7,6 @@ from scipy.signal import argrelextrema
 import math
 def is_intersect(target_segment, segments):
    for segment in segments:
        start = max(segment['start'], target_segment[0])
        finish = min(segment['finish'], target_segment[1])
        if start <= finish:
            return True
    return False
 def exponential_smoothing(series, alpha):
    result = [series[0]]
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return result
 class Jumpdetector:
    def __init__(self):
@ -56,6 +43,7 @@ class Jumpdetector:
    async def fit(self, dataframe, segments):
        #self.alpha_finder()
        data = dataframe['value']
        confidences = []
        convolve_list = []
@ -131,7 +119,7 @@ class Jumpdetector:
        result.sort()
        if len(self.segments) > 0:
-            result = [segment for segment in result if not is_intersect(segment, self.segments)]
+            result = [segment for segment in result if not utils.is_intersect(segment, self.segments)]
        return result
    def __predict(self, data):
@ -140,7 +128,7 @@ class Jumpdetector:
        extrema_list = []
        # добавить все пересечения экспоненты со сглаженным графиком
-        for i in exponential_smoothing(data + self.confidence, 0.02):
+        for i in utils.exponential_smoothing(data + self.confidence, 0.02):
            extrema_list.append(i)
        segments = []
--- a/analytics/detectors/pattern_detector.py
+++ b/analytics/detectors/pattern_detector.py
@ -1,4 +1,5 @@
 import detectors
 import utils
 from grafana_data_provider import GrafanaDataProvider
@ -14,15 +15,6 @@ import pandas as pd
 logger = logging.getLogger('analytic_toolset')
 def segments_box(segments):
    max_time = 0
    min_time = float("inf")
    for segment in segments:
        min_time = min(min_time, segment['start'])
        max_time = max(max_time, segment['finish'])
    min_time = pd.to_datetime(min_time, unit='ms')
    max_time = pd.to_datetime(max_time, unit='ms')
    return min_time, max_time
 def resolve_detector_by_pattern(pattern):
    if pattern == "peak":
--- a/analytics/detectors/peaks_detector.py
+++ b/analytics/detectors/peaks_detector.py
@ -1,37 +1,8 @@
 import utils
 from scipy import signal
 import numpy as np
 def find_steps(array, threshold):
    """
    Finds local maxima by segmenting array based on positions at which
    the threshold value is crossed. Note that this thresholding is
    applied after the absolute value of the array is taken. Thus,
    the distinction between upward and downward steps is lost. However,
    get_step_sizes can be used to determine directionality after the
    fact.
    Parameters
    ----------
    array : numpy array
        1 dimensional array that represents time series of data points
    threshold : int / float
        Threshold value that defines a step
    Returns
    -------
    steps : list
        List of indices of the detected steps
    """
    steps        = []
    array        = np.abs(array)
    above_points = np.where(array > threshold, 1, 0)
    ap_dif       = np.diff(above_points)
    cross_ups    = np.where(ap_dif == 1)[0]
    cross_dns    = np.where(ap_dif == -1)[0]
    for upi, dni in zip(cross_ups,cross_dns):
        steps.append(np.argmax(array[upi:dni]) + upi)
    return steps
 class PeaksDetector:
    def __init__(self):
        pass
@ -80,7 +51,7 @@ class PeaksDetector:
        data = filtered
        data /= data.max()
-        result = find_steps(data, 0.1)
+        result = utils.find_steps(data, 0.1)
        return [(dataframe.index[x], dataframe.index[x + window_size]) for x in result]
    def save(self, model_filename):
--- a/analytics/detectors/step_detector.py
+++ b/analytics/detectors/step_detector.py
@ -2,25 +2,11 @@ import scipy.signal
 from scipy.fftpack import fft
 from scipy.signal import argrelextrema
 import utils
 import numpy as np
 import pickle
 def is_intersect(target_segment, segments):
    for segment in segments:
        start = max(segment['start'], target_segment[0])
        finish = min(segment['finish'], target_segment[1])
        if start <= finish:
            return True
    return False
 def exponential_smoothing(series, alpha):
    result = [series[0]]
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return result
 class StepDetector:
    def __init__(self):
@ -58,20 +44,20 @@ class StepDetector:
    async def predict(self, dataframe):
        data = dataframe['value']
-        result = self.__predict(data)
+        result = await self.__predict(data)
        result.sort()
        if len(self.segments) > 0:
-            result = [segment for segment in result if not is_intersect(segment, self.segments)]
+            result = [segment for segment in result if not utils.is_intersect(segment, self.segments)]
        return result
-    def __predict(self, data):
+    async def __predict(self, data):
        window_size = 24
        all_max_flatten_data = data.rolling(window=window_size).mean()
        all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0]
        extrema_list = []
-        for i in exponential_smoothing(data - self.confidence, 0.03):
+        for i in utils.exponential_smoothing(data - self.confidence, 0.03):
            extrema_list.append(i)
        segments = []
--- a/analytics/utils/init.py
+++ b/analytics/utils/init.py
@ -0,0 +1,61 @@
 import numpy as np
 def is_intersect(target_segment, segments):
    for segment in segments:
        start = max(segment['start'], target_segment[0])
        finish = min(segment['finish'], target_segment[1])
        if start <= finish:
            return True
    return False
 def exponential_smoothing(series, alpha):
    result = [series[0]]
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n - 1])
    return result
 def find_steps(array, threshold):
    """
    Finds local maxima by segmenting array based on positions at which
    the threshold value is crossed. Note that this thresholding is
    applied after the absolute value of the array is taken. Thus,
    the distinction between upward and downward steps is lost. However,
    get_step_sizes can be used to determine directionality after the
    fact.
    Parameters
    ----------
    array : numpy array
        1 dimensional array that represents time series of data points
    threshold : int / float
        Threshold value that defines a step
    Returns
    -------
    steps : list
        List of indices of the detected steps
    """
    steps        = []
    array        = np.abs(array)
    above_points = np.where(array > threshold, 1, 0)
    ap_dif       = np.diff(above_points)
    cross_ups    = np.where(ap_dif == 1)[0]
    cross_dns    = np.where(ap_dif == -1)[0]
    for upi, dni in zip(cross_ups,cross_dns):
        steps.append(np.argmax(array[upi:dni]) + upi)
    return steps
 def anomalies_to_timestamp(anomalies):
    for anomaly in anomalies:
        anomaly['start'] = int(anomaly['start'].timestamp() * 1000)
        anomaly['finish'] = int(anomaly['finish'].timestamp() * 1000)
    return anomalies
 def segments_box(segments):
    max_time = 0
    min_time = float("inf")
    for segment in segments:
        min_time = min(min_time, segment['start'])
        max_time = max(max_time, segment['finish'])
    min_time = pd.to_datetime(min_time, unit='ms')
    max_time = pd.to_datetime(max_time, unit='ms')
    return min_time, max_time