You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
70 lines
2.5 KiB
70 lines
2.5 KiB
7 years ago
|
import pickle
|
||
|
from tsfresh.transformers.feature_selector import FeatureSelector
|
||
|
from sklearn.preprocessing import MinMaxScaler
|
||
|
from sklearn.ensemble import IsolationForest
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
6 years ago
|
class SupervisedAlgorithm(object):
|
||
7 years ago
|
frame_size = 16
|
||
|
good_features = [
|
||
|
#"value__agg_linear_trend__f_agg_\"max\"__chunk_len_5__attr_\"intercept\"",
|
||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20",
|
||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5",
|
||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_10",
|
||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_20",
|
||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_8__w_20",
|
||
|
# "value__fft_coefficient__coeff_3__attr_\"abs\"",
|
||
|
"time_of_day_column_x",
|
||
|
"time_of_day_column_y",
|
||
|
"value__abs_energy",
|
||
|
# "value__absolute_sum_of_changes",
|
||
|
# "value__sum_of_reoccurring_data_points",
|
||
|
]
|
||
|
clf = None
|
||
|
scaler = None
|
||
|
|
||
|
def __init__(self):
|
||
|
self.features = []
|
||
|
self.col_to_max, self.col_to_min, self.col_to_median = None, None, None
|
||
|
self.augmented_path = None
|
||
|
|
||
6 years ago
|
async def fit(self, dataset, contamination=0.005):
|
||
7 years ago
|
dataset = dataset[self.good_features]
|
||
|
dataset = dataset[-100000:]
|
||
|
|
||
|
self.scaler = MinMaxScaler(feature_range=(-1, 1))
|
||
|
# self.clf = svm.OneClassSVM(nu=contamination, kernel="rbf", gamma=0.1)
|
||
|
self.clf = IsolationForest(contamination=contamination)
|
||
|
|
||
|
self.scaler.fit(dataset)
|
||
|
|
||
|
dataset = self.scaler.transform(dataset)
|
||
|
self.clf.fit(dataset)
|
||
|
|
||
6 years ago
|
async def predict(self, dataframe):
|
||
7 years ago
|
dataset = dataframe[self.good_features]
|
||
|
dataset = self.scaler.transform(dataset)
|
||
|
prediction = self.clf.predict(dataset)
|
||
|
|
||
|
# for i in range(len(dataset)):
|
||
|
# print(str(dataset[i]) + " " + str(prediction[i]))
|
||
|
|
||
|
prediction = [x < 0.0 for x in prediction]
|
||
|
return pd.Series(prediction, index=dataframe.index)
|
||
|
|
||
|
def save(self, model_filename):
|
||
|
with open(model_filename, 'wb') as file:
|
||
|
pickle.dump((self.clf, self.scaler), file)
|
||
|
|
||
|
def load(self, model_filename):
|
||
|
with open(model_filename, 'rb') as file:
|
||
|
self.clf, self.scaler = pickle.load(file)
|
||
|
|
||
|
def __select_features(self, x, y):
|
||
|
# feature_selector = FeatureSelector()
|
||
|
feature_selector = FeatureSelector()
|
||
|
|
||
|
feature_selector.fit(x, y)
|
||
|
return feature_selector.relevant_features
|