|
|
|
import numpy as np
|
|
|
|
import pickle
|
|
|
|
|
|
|
|
|
|
|
|
def find_segments(array, threshold):
|
|
|
|
segments = []
|
|
|
|
above_points = np.where(array > threshold, 1, 0)
|
|
|
|
ap_dif = np.diff(above_points)
|
|
|
|
cross_ups = np.where(ap_dif == 1)[0]
|
|
|
|
cross_dns = np.where(ap_dif == -1)[0]
|
|
|
|
for upi, dni in zip(cross_ups,cross_dns):
|
|
|
|
segments.append((upi, dni))
|
|
|
|
return segments
|
|
|
|
|
|
|
|
|
|
|
|
def is_intersect(target_segment, segments):
|
|
|
|
for segment in segments:
|
|
|
|
start = max(segment['start'], target_segment[0])
|
|
|
|
finish = min(segment['finish'], target_segment[1])
|
|
|
|
if start <= finish:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def calc_intersections(segments, finded_segments):
|
|
|
|
intersections = 0
|
|
|
|
labeled = 0
|
|
|
|
for segment in segments:
|
|
|
|
if not segment['labeled']:
|
|
|
|
continue
|
|
|
|
|
|
|
|
labeled += 1
|
|
|
|
intersect = False
|
|
|
|
for finded_segment in finded_segments:
|
|
|
|
start = max(segment['start'], finded_segment[0])
|
|
|
|
finish = min(segment['finish'], finded_segment[1])
|
|
|
|
if start <= finish:
|
|
|
|
intersect = True
|
|
|
|
break
|
|
|
|
if intersect:
|
|
|
|
intersections += 1
|
|
|
|
return intersections, labeled
|
|
|
|
|
|
|
|
|
|
|
|
def cost_function(segments, finded_segments):
|
|
|
|
intersections, labeled = calc_intersections(segments, finded_segments)
|
|
|
|
return intersections == labeled
|
|
|
|
|
|
|
|
|
|
|
|
def compress_segments(segments):
|
|
|
|
result = []
|
|
|
|
for segment in segments:
|
|
|
|
if len(result) == 0 or result[len(result) - 1][1] < segment[0]:
|
|
|
|
result.append(segment)
|
|
|
|
else:
|
|
|
|
result[len(result) - 1] = (result[len(result) - 1][0], segment[1])
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
class StepDetector:
|
|
|
|
def __init__(self, pattern):
|
|
|
|
self.pattern = pattern
|
|
|
|
self.mean = None
|
|
|
|
self.window_size = None
|
|
|
|
self.corr_max = None
|
|
|
|
self.threshold = None
|
|
|
|
self.segments = []
|
|
|
|
|
|
|
|
def fit(self, dataframe, segments, contamination=0.01):
|
|
|
|
array = dataframe['value'].as_matrix()
|
|
|
|
self.mean = array.mean()
|
|
|
|
self.segments = segments
|
|
|
|
|
|
|
|
norm_data = (array - self.mean)
|
|
|
|
|
|
|
|
self.__optimize(norm_data, segments, contamination)
|
|
|
|
|
|
|
|
# print(self.threshold)
|
|
|
|
|
|
|
|
# import matplotlib.pyplot as plt
|
|
|
|
# fig, ax = plt.subplots(figsize=[18, 16])
|
|
|
|
# ax = fig.add_subplot(2, 1, 1)
|
|
|
|
# ax.plot(array)
|
|
|
|
# ax = fig.add_subplot(2, 1, 2, sharex=ax)
|
|
|
|
# ax.plot(corr_res)
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
# #print(R.size)
|
|
|
|
# # Nw = 20
|
|
|
|
# # result = R[Nw,Nw:-1]
|
|
|
|
# # result[0] = 0
|
|
|
|
# #ax.plot(result)
|
|
|
|
# #print(len(data))
|
|
|
|
# #print(len(R))
|
|
|
|
#
|
|
|
|
# print(self.window_size)
|
|
|
|
# print(self.threshold)
|
|
|
|
|
|
|
|
def predict(self, dataframe):
|
|
|
|
array = dataframe['value'].as_matrix()
|
|
|
|
|
|
|
|
norm_data = (array - self.mean)
|
|
|
|
|
|
|
|
step_size = self.window_size // 2
|
|
|
|
pattern = np.concatenate([[-1] * step_size, [1] * step_size])
|
|
|
|
corr_res = np.correlate(norm_data, pattern, mode='valid') / self.window_size
|
|
|
|
corr_res = np.concatenate((np.zeros(step_size), corr_res, np.zeros(step_size)))
|
|
|
|
|
|
|
|
corr_res /= self.corr_max
|
|
|
|
|
|
|
|
result = self.__predict(corr_res, self.threshold)
|
|
|
|
|
|
|
|
# import matplotlib.pyplot as plt
|
|
|
|
# fig, ax = plt.subplots(figsize=[18, 16])
|
|
|
|
# ax = fig.add_subplot(2, 1, 1)
|
|
|
|
# ax.plot(array[:70000])
|
|
|
|
# ax = fig.add_subplot(2, 1, 2, sharex=ax)
|
|
|
|
# ax.plot(corr_res[:70000])
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
result.sort()
|
|
|
|
result = compress_segments(result)
|
|
|
|
|
|
|
|
if len(self.segments) > 0:
|
|
|
|
result = [segment for segment in result if not is_intersect(segment, self.segments)]
|
|
|
|
return result
|
|
|
|
|
|
|
|
def __optimize(self, data, segments, contamination):
|
|
|
|
window_size = 10
|
|
|
|
mincost = None
|
|
|
|
while window_size < 100:
|
|
|
|
# print(window_size)
|
|
|
|
cost = self.__optimize_threshold(data, window_size, segments, contamination)
|
|
|
|
if mincost is None or cost < mincost:
|
|
|
|
mincost = cost
|
|
|
|
self.window_size = window_size
|
|
|
|
window_size = int(window_size * 1.2)
|
|
|
|
self.__optimize_threshold(data, self.window_size, segments, contamination)
|
|
|
|
|
|
|
|
def __optimize_threshold(self, data, window_size, segments, contamination):
|
|
|
|
step_size = 3
|
|
|
|
pattern = np.concatenate([[-1] * step_size, [1] * step_size])
|
|
|
|
corr_res = np.correlate(data, pattern, mode='same') / window_size
|
|
|
|
corr_res = np.concatenate((np.zeros(step_size), corr_res, np.zeros(step_size)))
|
|
|
|
self.corr_max = corr_res.max()
|
|
|
|
corr_res /= self.corr_max
|
|
|
|
N = 20
|
|
|
|
lower = 0.
|
|
|
|
upper = 1.
|
|
|
|
cost = 0
|
|
|
|
for i in range(0, N):
|
|
|
|
self.threshold = 0.5 * (lower + upper)
|
|
|
|
result = self.__predict(corr_res, self.threshold)
|
|
|
|
|
|
|
|
if len(segments) > 0:
|
|
|
|
intersections, labeled = calc_intersections(segments, result)
|
|
|
|
good = intersections == labeled
|
|
|
|
cost = len(result)
|
|
|
|
else:
|
|
|
|
total_sum = 0
|
|
|
|
for segment in result:
|
|
|
|
total_sum += (segment[1] - segment[0])
|
|
|
|
good = total_sum > len(data) * contamination
|
|
|
|
cost = -self.threshold
|
|
|
|
|
|
|
|
if good:
|
|
|
|
lower = self.threshold
|
|
|
|
else:
|
|
|
|
upper = self.threshold
|
|
|
|
|
|
|
|
return cost
|
|
|
|
|
|
|
|
def __predict(self, data, threshold):
|
|
|
|
segments = find_segments(data, threshold)
|
|
|
|
segments += find_segments(data * -1, threshold)
|
|
|
|
#segments -= 1
|
|
|
|
return [(x - 1, y - 1) for (x, y) in segments]
|
|
|
|
|
|
|
|
def save(self, model_filename):
|
|
|
|
with open(model_filename, 'wb') as file:
|
|
|
|
pickle.dump((self.mean, self.window_size, self.corr_max, self.threshold), file)
|
|
|
|
|
|
|
|
def load(self, model_filename):
|
|
|
|
try:
|
|
|
|
with open(model_filename, 'rb') as file:
|
|
|
|
self.mean, self.window_size, self.corr_max, self.threshold = pickle.load(file)
|
|
|
|
except:
|
|
|
|
pass
|