9 changed files with 108 additions and 150 deletions
@ -1 +0,0 @@ |
|||||||
from detectors.general_detector.general_detector import GeneralDetector |
|
@ -1,80 +0,0 @@ |
|||||||
from detectors.general_detector.supervised_algorithm import SupervisedAlgorithm |
|
||||||
from detectors import Detector |
|
||||||
from models import AnalyticUnitCache |
|
||||||
import utils |
|
||||||
|
|
||||||
import pandas as pd |
|
||||||
import logging |
|
||||||
import config |
|
||||||
import json |
|
||||||
from typing import Optional |
|
||||||
|
|
||||||
|
|
||||||
NANOSECONDS_IN_MS = 1000000 |
|
||||||
|
|
||||||
logger = logging.getLogger('GENERAL_DETECTOR') |
|
||||||
|
|
||||||
|
|
||||||
class GeneralDetector(Detector): |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
self.model = None |
|
||||||
|
|
||||||
async def train(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
|
||||||
|
|
||||||
confidence = 0.02 |
|
||||||
start_index, stop_index = 0, len(dataframe) |
|
||||||
if len(segments) > 0: |
|
||||||
confidence = 0.0 |
|
||||||
min_time, max_time = utils.segments_box(segments) |
|
||||||
dataframe = dataframe[dataframe['timestamp'] <= max_time] |
|
||||||
dataframe = dataframe[dataframe['timestamp'] >= min_time] |
|
||||||
|
|
||||||
train_augmented = self.preprocessor.get_augmented_data( |
|
||||||
dataframe.index[0], |
|
||||||
dataframe.index[-1], |
|
||||||
segments |
|
||||||
) |
|
||||||
|
|
||||||
self.model = SupervisedAlgorithm() |
|
||||||
await self.model.fit(train_augmented, confidence) |
|
||||||
if len(segments) > 0: |
|
||||||
last_dataframe_time = dataframe.iloc[-1]['timestamp'] |
|
||||||
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) |
|
||||||
else: |
|
||||||
last_prediction_time = 0 |
|
||||||
|
|
||||||
logger.info("Learning is finished for anomaly_name='%s'" % self.anomaly_name) |
|
||||||
return cache |
|
||||||
|
|
||||||
async def predict(self, dataframe: pd.DataFrame, cache: Optional[AnalyticUnitCache]) -> dict: |
|
||||||
logger.info("Start to predict for anomaly type='%s'" % self.anomaly_name) |
|
||||||
last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms') |
|
||||||
|
|
||||||
start_index = self.data_prov.get_upper_bound(last_prediction_time) |
|
||||||
stop_index = self.data_prov.size() |
|
||||||
last_prediction_time = int(last_prediction_time.value / NANOSECONDS_IN_MS) |
|
||||||
|
|
||||||
predicted_anomalies = [] |
|
||||||
if start_index < stop_index: |
|
||||||
max_chunk_size = 50000 |
|
||||||
predicted = pd.Series() |
|
||||||
for index in range(start_index, stop_index, max_chunk_size): |
|
||||||
chunk_start = index |
|
||||||
chunk_finish = min(index + max_chunk_size, stop_index) |
|
||||||
predict_augmented = self.preprocessor.get_augmented_data(chunk_start, chunk_finish) |
|
||||||
|
|
||||||
assert(len(predict_augmented) == chunk_finish - chunk_start) |
|
||||||
|
|
||||||
predicted_current = await self.model.predict(predict_augmented) |
|
||||||
predicted = pd.concat([predicted, predicted_current]) |
|
||||||
predicted_anomalies = self.preprocessor.inverse_transform_anomalies(predicted) |
|
||||||
|
|
||||||
last_row = self.data_prov.get_data_range(stop_index - 1, stop_index) |
|
||||||
|
|
||||||
last_dataframe_time = last_row.iloc[0]['timestamp'] |
|
||||||
predicted_anomalies = utils.anomalies_to_timestamp(predicted_anomalies) |
|
||||||
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) |
|
||||||
|
|
||||||
logger.info("Predicting is finished for anomaly type='%s'" % self.anomaly_name) |
|
||||||
return predicted_anomalies, last_prediction_time |
|
@ -1,61 +0,0 @@ |
|||||||
import pickle |
|
||||||
from tsfresh.transformers.feature_selector import FeatureSelector |
|
||||||
from sklearn.preprocessing import MinMaxScaler |
|
||||||
from sklearn.ensemble import IsolationForest |
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
|
|
||||||
class SupervisedAlgorithm(object): |
|
||||||
frame_size = 16 |
|
||||||
good_features = [ |
|
||||||
#"value__agg_linear_trend__f_agg_\"max\"__chunk_len_5__attr_\"intercept\"", |
|
||||||
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20", |
|
||||||
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5", |
|
||||||
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_10", |
|
||||||
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_20", |
|
||||||
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_8__w_20", |
|
||||||
# "value__fft_coefficient__coeff_3__attr_\"abs\"", |
|
||||||
"time_of_day_column_x", |
|
||||||
"time_of_day_column_y", |
|
||||||
"value__abs_energy", |
|
||||||
# "value__absolute_sum_of_changes", |
|
||||||
# "value__sum_of_reoccurring_data_points", |
|
||||||
] |
|
||||||
clf = None |
|
||||||
scaler = None |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
self.features = [] |
|
||||||
self.col_to_max, self.col_to_min, self.col_to_median = None, None, None |
|
||||||
self.augmented_path = None |
|
||||||
|
|
||||||
async def fit(self, dataset, contamination=0.005): |
|
||||||
dataset = dataset[self.good_features] |
|
||||||
dataset = dataset[-100000:] |
|
||||||
|
|
||||||
self.scaler = MinMaxScaler(feature_range=(-1, 1)) |
|
||||||
# self.clf = svm.OneClassSVM(nu=contamination, kernel="rbf", gamma=0.1) |
|
||||||
self.clf = IsolationForest(contamination=contamination) |
|
||||||
|
|
||||||
self.scaler.fit(dataset) |
|
||||||
|
|
||||||
dataset = self.scaler.transform(dataset) |
|
||||||
self.clf.fit(dataset) |
|
||||||
|
|
||||||
async def predict(self, dataframe): |
|
||||||
dataset = dataframe[self.good_features] |
|
||||||
dataset = self.scaler.transform(dataset) |
|
||||||
prediction = self.clf.predict(dataset) |
|
||||||
|
|
||||||
# for i in range(len(dataset)): |
|
||||||
# print(str(dataset[i]) + " " + str(prediction[i])) |
|
||||||
|
|
||||||
prediction = [x < 0.0 for x in prediction] |
|
||||||
return pd.Series(prediction, index=dataframe.index) |
|
||||||
|
|
||||||
def __select_features(self, x, y): |
|
||||||
# feature_selector = FeatureSelector() |
|
||||||
feature_selector = FeatureSelector() |
|
||||||
|
|
||||||
feature_selector.fit(x, y) |
|
||||||
return feature_selector.relevant_features |
|
@ -0,0 +1,97 @@ |
|||||||
|
from models import Model, AnalyticUnitCache |
||||||
|
|
||||||
|
import utils |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
import scipy.signal |
||||||
|
from scipy.fftpack import fft |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
import math |
||||||
|
from scipy.stats import gaussian_kde |
||||||
|
from scipy.stats import norm |
||||||
|
from typing import Optional |
||||||
|
|
||||||
|
|
||||||
|
WINDOW_SIZE = 350 |
||||||
|
|
||||||
|
|
||||||
|
class GeneralModel(Model): |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
super() |
||||||
|
self.segments = [] |
||||||
|
self.ipats = [] |
||||||
|
self.state = { |
||||||
|
'convolve_max': WINDOW_SIZE, |
||||||
|
} |
||||||
|
self.all_conv = [] |
||||||
|
|
||||||
|
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
||||||
|
if type(cache) is AnalyticUnitCache: |
||||||
|
self.state = cache |
||||||
|
self.segments = segments |
||||||
|
|
||||||
|
data = dataframe['value'] |
||||||
|
convolve_list = [] |
||||||
|
for segment in segments: |
||||||
|
if segment['labeled']: |
||||||
|
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'])) |
||||||
|
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'])) |
||||||
|
|
||||||
|
segment_data = data[segment_from_index: segment_to_index + 1] |
||||||
|
if len(segment_data) == 0: |
||||||
|
continue |
||||||
|
self.ipats.append(segment_from_index + int((segment_to_index - segment_from_index) / 2)) |
||||||
|
segment_min = min(segment_data) |
||||||
|
segment_data = segment_data - segment_min |
||||||
|
segment_max = max(segment_data) |
||||||
|
segment_data = segment_data / segment_max |
||||||
|
|
||||||
|
convolve = scipy.signal.fftconvolve(segment_data, segment_data) |
||||||
|
convolve_list.append(max(convolve)) |
||||||
|
|
||||||
|
if len(convolve_list) > 0: |
||||||
|
self.state['convolve_max'] = float(max(convolve_list)) |
||||||
|
else: |
||||||
|
self.state['convolve_max'] = WINDOW_SIZE / 3 |
||||||
|
|
||||||
|
return self.state |
||||||
|
|
||||||
|
def do_predict(self, dataframe: pd.DataFrame): |
||||||
|
data = dataframe['value'] |
||||||
|
pat_data = data[self.ipats[0] - WINDOW_SIZE: self.ipats[0] + WINDOW_SIZE] |
||||||
|
x = min(pat_data) |
||||||
|
pat_data = pat_data - x |
||||||
|
y = max(pat_data) |
||||||
|
pat_data = pat_data / y |
||||||
|
|
||||||
|
for i in range(WINDOW_SIZE * 2, len(data)): |
||||||
|
watch_data = data[i - WINDOW_SIZE * 2: i] |
||||||
|
w = min(watch_data) |
||||||
|
watch_data = watch_data - w |
||||||
|
r = max(watch_data) |
||||||
|
if r < y: |
||||||
|
watch_data = watch_data / y |
||||||
|
else: |
||||||
|
watch_data = watch_data / r |
||||||
|
conv = scipy.signal.fftconvolve(pat_data, watch_data) |
||||||
|
self.all_conv.append(max(conv)) |
||||||
|
all_conv_peaks = utils.peak_finder(self.all_conv, WINDOW_SIZE * 2) |
||||||
|
|
||||||
|
filtered = self.__filter_prediction(all_conv_peaks, data) |
||||||
|
return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered] |
||||||
|
|
||||||
|
def __filter_prediction(self, segments: list, data: list): |
||||||
|
if len(segments) == 0 or len(self.ipats) == 0: |
||||||
|
segments = [] |
||||||
|
return segments |
||||||
|
delete_list = [] |
||||||
|
|
||||||
|
for val in segments: |
||||||
|
if self.all_conv[val] < self.state['convolve_max'] * 0.8: |
||||||
|
delete_list.append(val) |
||||||
|
|
||||||
|
for item in delete_list: |
||||||
|
segments.remove(item) |
||||||
|
|
||||||
|
return segments |
Loading…
Reference in new issue