Browse Source

reafactor common functions from detectors to utils folder

pull/1/head
Alexey Velikiy 6 years ago
parent
commit
7527faf068
  1. 31
      analytics/detectors/general_detector.py
  2. 20
      analytics/detectors/jump_detector.py
  3. 10
      analytics/detectors/pattern_detector.py
  4. 33
      analytics/detectors/peaks_detector.py
  5. 24
      analytics/detectors/step_detector.py
  6. 61
      analytics/utils/__init__.py

31
analytics/detectors/general_detector.py

@ -1,3 +1,4 @@
import utils
from grafana_data_provider import GrafanaDataProvider from grafana_data_provider import GrafanaDataProvider
from data_preprocessor import data_preprocessor from data_preprocessor import data_preprocessor
import pandas as pd import pandas as pd
@ -7,18 +8,12 @@ import config
import os.path import os.path
import json import json
NANOSECONDS_IN_MS = 1000000 NANOSECONDS_IN_MS = 1000000
logger = logging.getLogger('analytic_toolset') logger = logging.getLogger('analytic_toolset')
def anomalies_to_timestamp(anomalies):
for anomaly in anomalies:
anomaly['start'] = int(anomaly['start'].timestamp() * 1000)
anomaly['finish'] = int(anomaly['finish'].timestamp() * 1000)
return anomalies
class GeneralDetector: class GeneralDetector:
def __init__(self, anomaly_name): def __init__(self, anomaly_name):
@ -46,37 +41,27 @@ class GeneralDetector:
self.__load_model() self.__load_model()
def anomalies_box(self, anomalies): async def learn(self, segments):
max_time = 0
min_time = float("inf")
for anomaly in anomalies:
max_time = max(max_time, anomaly['finish'])
min_time = min(min_time, anomaly['start'])
min_time = pd.to_datetime(min_time, unit='ms')
max_time = pd.to_datetime(max_time, unit='ms')
return min_time, max_time
async def learn(self, anomalies):
logger.info("Start to learn for anomaly_name='%s'" % self.anomaly_name) logger.info("Start to learn for anomaly_name='%s'" % self.anomaly_name)
confidence = 0.02 confidence = 0.02
dataframe = self.data_prov.get_dataframe() dataframe = self.data_prov.get_dataframe()
start_index, stop_index = 0, len(dataframe) start_index, stop_index = 0, len(dataframe)
if len(anomalies) > 0: if len(segments) > 0:
confidence = 0.0 confidence = 0.0
min_time, max_time = self.anomalies_box(anomalies) min_time, max_time = utils.segments_box(segments)
dataframe = dataframe[dataframe['timestamp'] <= max_time] dataframe = dataframe[dataframe['timestamp'] <= max_time]
dataframe = dataframe[dataframe['timestamp'] >= min_time] dataframe = dataframe[dataframe['timestamp'] >= min_time]
train_augmented = self.preprocessor.get_augmented_data( train_augmented = self.preprocessor.get_augmented_data(
dataframe.index[0], dataframe.index[0],
dataframe.index[-1], dataframe.index[-1],
anomalies segments
) )
self.model = self.create_algorithm() self.model = self.create_algorithm()
await self.model.fit(train_augmented, confidence) await self.model.fit(train_augmented, confidence)
if len(anomalies) > 0: if len(segments) > 0:
last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_dataframe_time = dataframe.iloc[-1]['timestamp']
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
else: else:
@ -112,7 +97,7 @@ class GeneralDetector:
last_row = self.data_prov.get_data_range(stop_index - 1, stop_index) last_row = self.data_prov.get_data_range(stop_index - 1, stop_index)
last_dataframe_time = last_row.iloc[0]['timestamp'] last_dataframe_time = last_row.iloc[0]['timestamp']
predicted_anomalies = anomalies_to_timestamp(predicted_anomalies) predicted_anomalies = utils.anomalies_to_timestamp(predicted_anomalies)
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
logger.info("Predicting is finished for anomaly type='%s'" % self.anomaly_name) logger.info("Predicting is finished for anomaly type='%s'" % self.anomaly_name)

20
analytics/detectors/jump_detector.py

@ -1,3 +1,4 @@
import utils
import numpy as np import numpy as np
import pickle import pickle
import scipy.signal import scipy.signal
@ -6,20 +7,6 @@ from scipy.signal import argrelextrema
import math import math
def is_intersect(target_segment, segments):
for segment in segments:
start = max(segment['start'], target_segment[0])
finish = min(segment['finish'], target_segment[1])
if start <= finish:
return True
return False
def exponential_smoothing(series, alpha):
result = [series[0]]
for n in range(1, len(series)):
result.append(alpha * series[n] + (1 - alpha) * result[n-1])
return result
class Jumpdetector: class Jumpdetector:
def __init__(self): def __init__(self):
@ -56,6 +43,7 @@ class Jumpdetector:
async def fit(self, dataframe, segments): async def fit(self, dataframe, segments):
#self.alpha_finder()
data = dataframe['value'] data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
@ -131,7 +119,7 @@ class Jumpdetector:
result.sort() result.sort()
if len(self.segments) > 0: if len(self.segments) > 0:
result = [segment for segment in result if not is_intersect(segment, self.segments)] result = [segment for segment in result if not utils.is_intersect(segment, self.segments)]
return result return result
def __predict(self, data): def __predict(self, data):
@ -140,7 +128,7 @@ class Jumpdetector:
extrema_list = [] extrema_list = []
# добавить все пересечения экспоненты со сглаженным графиком # добавить все пересечения экспоненты со сглаженным графиком
for i in exponential_smoothing(data + self.confidence, 0.02): for i in utils.exponential_smoothing(data + self.confidence, 0.02):
extrema_list.append(i) extrema_list.append(i)
segments = [] segments = []

10
analytics/detectors/pattern_detector.py

@ -1,4 +1,5 @@
import detectors import detectors
import utils
from grafana_data_provider import GrafanaDataProvider from grafana_data_provider import GrafanaDataProvider
@ -14,15 +15,6 @@ import pandas as pd
logger = logging.getLogger('analytic_toolset') logger = logging.getLogger('analytic_toolset')
def segments_box(segments):
max_time = 0
min_time = float("inf")
for segment in segments:
min_time = min(min_time, segment['start'])
max_time = max(max_time, segment['finish'])
min_time = pd.to_datetime(min_time, unit='ms')
max_time = pd.to_datetime(max_time, unit='ms')
return min_time, max_time
def resolve_detector_by_pattern(pattern): def resolve_detector_by_pattern(pattern):
if pattern == "peak": if pattern == "peak":

33
analytics/detectors/peaks_detector.py

@ -1,37 +1,8 @@
import utils
from scipy import signal from scipy import signal
import numpy as np import numpy as np
def find_steps(array, threshold):
"""
Finds local maxima by segmenting array based on positions at which
the threshold value is crossed. Note that this thresholding is
applied after the absolute value of the array is taken. Thus,
the distinction between upward and downward steps is lost. However,
get_step_sizes can be used to determine directionality after the
fact.
Parameters
----------
array : numpy array
1 dimensional array that represents time series of data points
threshold : int / float
Threshold value that defines a step
Returns
-------
steps : list
List of indices of the detected steps
"""
steps = []
array = np.abs(array)
above_points = np.where(array > threshold, 1, 0)
ap_dif = np.diff(above_points)
cross_ups = np.where(ap_dif == 1)[0]
cross_dns = np.where(ap_dif == -1)[0]
for upi, dni in zip(cross_ups,cross_dns):
steps.append(np.argmax(array[upi:dni]) + upi)
return steps
class PeaksDetector: class PeaksDetector:
def __init__(self): def __init__(self):
pass pass
@ -80,7 +51,7 @@ class PeaksDetector:
data = filtered data = filtered
data /= data.max() data /= data.max()
result = find_steps(data, 0.1) result = utils.find_steps(data, 0.1)
return [(dataframe.index[x], dataframe.index[x + window_size]) for x in result] return [(dataframe.index[x], dataframe.index[x + window_size]) for x in result]
def save(self, model_filename): def save(self, model_filename):

24
analytics/detectors/step_detector.py

@ -2,25 +2,11 @@ import scipy.signal
from scipy.fftpack import fft from scipy.fftpack import fft
from scipy.signal import argrelextrema from scipy.signal import argrelextrema
import utils
import numpy as np import numpy as np
import pickle import pickle
def is_intersect(target_segment, segments):
for segment in segments:
start = max(segment['start'], target_segment[0])
finish = min(segment['finish'], target_segment[1])
if start <= finish:
return True
return False
def exponential_smoothing(series, alpha):
result = [series[0]]
for n in range(1, len(series)):
result.append(alpha * series[n] + (1 - alpha) * result[n-1])
return result
class StepDetector: class StepDetector:
def __init__(self): def __init__(self):
@ -58,20 +44,20 @@ class StepDetector:
async def predict(self, dataframe): async def predict(self, dataframe):
data = dataframe['value'] data = dataframe['value']
result = self.__predict(data) result = await self.__predict(data)
result.sort() result.sort()
if len(self.segments) > 0: if len(self.segments) > 0:
result = [segment for segment in result if not is_intersect(segment, self.segments)] result = [segment for segment in result if not utils.is_intersect(segment, self.segments)]
return result return result
def __predict(self, data): async def __predict(self, data):
window_size = 24 window_size = 24
all_max_flatten_data = data.rolling(window=window_size).mean() all_max_flatten_data = data.rolling(window=window_size).mean()
all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0]
extrema_list = [] extrema_list = []
for i in exponential_smoothing(data - self.confidence, 0.03): for i in utils.exponential_smoothing(data - self.confidence, 0.03):
extrema_list.append(i) extrema_list.append(i)
segments = [] segments = []

61
analytics/utils/__init__.py

@ -0,0 +1,61 @@
import numpy as np
def is_intersect(target_segment, segments):
for segment in segments:
start = max(segment['start'], target_segment[0])
finish = min(segment['finish'], target_segment[1])
if start <= finish:
return True
return False
def exponential_smoothing(series, alpha):
result = [series[0]]
for n in range(1, len(series)):
result.append(alpha * series[n] + (1 - alpha) * result[n - 1])
return result
def find_steps(array, threshold):
"""
Finds local maxima by segmenting array based on positions at which
the threshold value is crossed. Note that this thresholding is
applied after the absolute value of the array is taken. Thus,
the distinction between upward and downward steps is lost. However,
get_step_sizes can be used to determine directionality after the
fact.
Parameters
----------
array : numpy array
1 dimensional array that represents time series of data points
threshold : int / float
Threshold value that defines a step
Returns
-------
steps : list
List of indices of the detected steps
"""
steps = []
array = np.abs(array)
above_points = np.where(array > threshold, 1, 0)
ap_dif = np.diff(above_points)
cross_ups = np.where(ap_dif == 1)[0]
cross_dns = np.where(ap_dif == -1)[0]
for upi, dni in zip(cross_ups,cross_dns):
steps.append(np.argmax(array[upi:dni]) + upi)
return steps
def anomalies_to_timestamp(anomalies):
for anomaly in anomalies:
anomaly['start'] = int(anomaly['start'].timestamp() * 1000)
anomaly['finish'] = int(anomaly['finish'].timestamp() * 1000)
return anomalies
def segments_box(segments):
max_time = 0
min_time = float("inf")
for segment in segments:
min_time = min(min_time, segment['start'])
max_time = max(max_time, segment['finish'])
min_time = pd.to_datetime(min_time, unit='ms')
max_time = pd.to_datetime(max_time, unit='ms')
return min_time, max_time
Loading…
Cancel
Save