Browse Source
- Subtract min value from dataset before passing to model - Rename StepModel -> DropModel - Use cache to save state in all models - Return `Segment { 'from': <timestamp>, 'to': <timestamp>}` instead of `Segment { 'from': <index>, 'to': <index>}` in all models - Integrate new peaks model (from https://github.com/hastic/hastic-server/pull/123) - Integrate new reverse-peaks model (from https://github.com/hastic/hastic-server/pull/123) - Refactor: make `predict` method in `Model` not abstract and remove it from all children - Refactor: add abstract `do_predict` method to modelspull/1/head
16 changed files with 396 additions and 198 deletions
@ -1,13 +1,15 @@ |
|||||||
|
from models import AnalyticUnitCache |
||||||
from abc import ABC, abstractmethod |
from abc import ABC, abstractmethod |
||||||
from pandas import DataFrame |
from pandas import DataFrame |
||||||
|
from typing import Optional |
||||||
|
|
||||||
|
|
||||||
class Detector(ABC): |
class Detector(ABC): |
||||||
|
|
||||||
@abstractmethod |
@abstractmethod |
||||||
async def train(self, dataframe: DataFrame, segments: list, cache: dict) -> dict: |
async def train(self, dataframe: DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
||||||
pass |
pass |
||||||
|
|
||||||
@abstractmethod |
@abstractmethod |
||||||
async def predict(self, dataframe: DataFrame, cache: dict) -> dict: |
async def predict(self, dataframe: DataFrame, cache: Optional[AnalyticUnitCache]) -> dict: |
||||||
pass |
pass |
||||||
|
@ -1,5 +1,8 @@ |
|||||||
from models.model import Model |
from models.model import Model, AnalyticUnitCache |
||||||
from models.step_model import StepModel |
from models.drop_model import DropModel |
||||||
from models.peaks_model import PeaksModel |
from models.peak_model import PeakModel |
||||||
from models.jump_model import JumpModel |
from models.jump_model import JumpModel |
||||||
from models.custom_model import CustomModel |
from models.custom_model import CustomModel |
||||||
|
|
||||||
|
from models.custom_model import CustomModel |
||||||
|
from models.reverse_peak_model import ReversePeakModel |
||||||
|
@ -1,13 +1,31 @@ |
|||||||
|
import utils |
||||||
|
|
||||||
from abc import ABC, abstractmethod |
from abc import ABC, abstractmethod |
||||||
from pandas import DataFrame |
from pandas import DataFrame |
||||||
|
from typing import Optional |
||||||
|
|
||||||
|
AnalyticUnitCache = dict |
||||||
|
|
||||||
class Model(ABC): |
class Model(ABC): |
||||||
|
|
||||||
@abstractmethod |
@abstractmethod |
||||||
def fit(self, dataframe: DataFrame, segments: list, cache: dict) -> dict: |
def fit(self, dataframe: DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
||||||
pass |
pass |
||||||
|
|
||||||
@abstractmethod |
@abstractmethod |
||||||
def predict(self, dataframe: DataFrame, cache: dict) -> dict: |
def do_predict(self, dataframe: DataFrame): |
||||||
pass |
pass |
||||||
|
|
||||||
|
def predict(self, dataframe: DataFrame, cache: Optional[AnalyticUnitCache]) -> dict: |
||||||
|
if type(cache) is AnalyticUnitCache: |
||||||
|
self.state = cache |
||||||
|
|
||||||
|
result = self.do_predict(dataframe) |
||||||
|
result.sort() |
||||||
|
|
||||||
|
if len(self.segments) > 0: |
||||||
|
result = [segment for segment in result if not utils.is_intersect(segment, self.segments)] |
||||||
|
return { |
||||||
|
'segments': result, |
||||||
|
'cache': self.state |
||||||
|
} |
||||||
|
@ -0,0 +1,113 @@ |
|||||||
|
from models import Model, AnalyticUnitCache |
||||||
|
|
||||||
|
import scipy.signal |
||||||
|
from scipy.fftpack import fft |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
|
||||||
|
import utils |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
from typing import Optional |
||||||
|
|
||||||
|
WINDOW_SIZE = 240 |
||||||
|
|
||||||
|
|
||||||
|
class PeakModel(Model): |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
super() |
||||||
|
self.segments = [] |
||||||
|
self.ipeaks = [] |
||||||
|
self.state = { |
||||||
|
'confidence': 1.5, |
||||||
|
'convolve_max': 570000 |
||||||
|
} |
||||||
|
|
||||||
|
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
||||||
|
if type(cache) is AnalyticUnitCache: |
||||||
|
self.state = cache |
||||||
|
|
||||||
|
self.segments = segments |
||||||
|
data = dataframe['value'] |
||||||
|
|
||||||
|
confidences = [] |
||||||
|
convolve_list = [] |
||||||
|
for segment in segments: |
||||||
|
if segment['labeled']: |
||||||
|
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'])) |
||||||
|
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'])) |
||||||
|
|
||||||
|
segment_data = data[segment_from_index: segment_to_index + 1] |
||||||
|
segment_min = min(segment_data) |
||||||
|
segment_max = max(segment_data) |
||||||
|
confidences.append(0.2 * (segment_max - segment_min)) |
||||||
|
flat_segment = segment_data.rolling(window=5).mean() |
||||||
|
flat_segment = flat_segment.dropna() |
||||||
|
segment_max_index = flat_segment.idxmax() # + segment['start'] |
||||||
|
self.ipeaks.append(segment_max_index) |
||||||
|
labeled_drop = data[segment_max_index - WINDOW_SIZE: segment_max_index + WINDOW_SIZE] |
||||||
|
labeled_min = min(labeled_drop) |
||||||
|
for value in labeled_drop: |
||||||
|
value = value - labeled_min |
||||||
|
convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) |
||||||
|
convolve_list.append(max(convolve)) |
||||||
|
|
||||||
|
if len(confidences) > 0: |
||||||
|
self.state['confidence'] = float(min(confidences)) |
||||||
|
else: |
||||||
|
self.state['confidence'] = 1.5 |
||||||
|
|
||||||
|
if len(convolve_list) > 0: |
||||||
|
self.state['convolve_max'] = float(max(convolve_list)) |
||||||
|
else: |
||||||
|
self.state['convolve_max'] = 570000 |
||||||
|
|
||||||
|
return self.state |
||||||
|
|
||||||
|
def do_predict(self, dataframe: pd.DataFrame): |
||||||
|
data = dataframe['value'] |
||||||
|
window_size = 24 |
||||||
|
all_max_flatten_data = data.rolling(window=window_size).mean() |
||||||
|
all_maxs = argrelextrema(np.array(all_max_flatten_data), np.greater)[0] |
||||||
|
|
||||||
|
extrema_list = [] |
||||||
|
for i in utils.exponential_smoothing(data + self.state['confidence'], 0.02): |
||||||
|
extrema_list.append(i) |
||||||
|
|
||||||
|
segments = [] |
||||||
|
for i in all_maxs: |
||||||
|
if all_max_flatten_data[i] > extrema_list[i]: |
||||||
|
segments.append(i+12) |
||||||
|
|
||||||
|
filtered = self.__filter_prediction(segments, data) |
||||||
|
return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered] |
||||||
|
|
||||||
|
def __filter_prediction(self, segments: list, all_max_flatten_data: list): |
||||||
|
delete_list = [] |
||||||
|
variance_error = int(0.004 * len(all_max_flatten_data)) |
||||||
|
if variance_error > 100: |
||||||
|
variance_error = 100 |
||||||
|
for i in range(1, len(segments)): |
||||||
|
if segments[i] < segments[i - 1] + variance_error: |
||||||
|
delete_list.append(segments[i]) |
||||||
|
for item in delete_list: |
||||||
|
segments.remove(item) |
||||||
|
|
||||||
|
delete_list = [] |
||||||
|
if len(segments) == 0 or len(self.ipeaks) == 0: |
||||||
|
segments = [] |
||||||
|
return segments |
||||||
|
|
||||||
|
pattern_data = all_max_flatten_data[self.ipeaks[0] - WINDOW_SIZE: self.ipeaks[0] + WINDOW_SIZE] |
||||||
|
for segment in segments: |
||||||
|
if segment > WINDOW_SIZE: |
||||||
|
convol_data = all_max_flatten_data[segment - WINDOW_SIZE: segment + WINDOW_SIZE] |
||||||
|
conv = scipy.signal.fftconvolve(pattern_data, convol_data) |
||||||
|
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: |
||||||
|
delete_list.append(segment) |
||||||
|
else: |
||||||
|
delete_list.append(segment) |
||||||
|
for item in delete_list: |
||||||
|
segments.remove(item) |
||||||
|
|
||||||
|
return segments |
@ -1,59 +0,0 @@ |
|||||||
from models import Model |
|
||||||
|
|
||||||
import utils |
|
||||||
from scipy import signal |
|
||||||
import numpy as np |
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
|
|
||||||
class PeaksModel(Model): |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
super() |
|
||||||
|
|
||||||
def fit(self, dataframe: pd.DataFrame, segments: list, cache: dict) -> dict: |
|
||||||
pass |
|
||||||
|
|
||||||
def predict(self, dataframe: pd.DataFrame, cache: dict) -> dict: |
|
||||||
array = dataframe['value'].as_matrix() |
|
||||||
window_size = 20 |
|
||||||
# window = np.ones(101) |
|
||||||
# mean_filtered = signal.fftconvolve( |
|
||||||
# np.concatenate([np.zeros(window_size), array, np.zeros(window_size)]), |
|
||||||
# window, |
|
||||||
# mode='valid' |
|
||||||
# ) |
|
||||||
# filtered = np.divide(array, mean_filtered / 101) |
|
||||||
|
|
||||||
window = signal.general_gaussian(2 * window_size + 1, p=0.5, sig=5) |
|
||||||
#print(window) |
|
||||||
filtered = signal.fftconvolve(array, window, mode='valid') |
|
||||||
|
|
||||||
# filtered = np.concatenate([ |
|
||||||
# np.zeros(window_size), |
|
||||||
# filtered, |
|
||||||
# np.zeros(window_size) |
|
||||||
# ]) |
|
||||||
filtered = filtered / np.sum(window) |
|
||||||
array = array[window_size:-window_size] |
|
||||||
filtered = np.subtract(array, filtered) |
|
||||||
|
|
||||||
# filtered = np.convolve(array, step, mode='valid') |
|
||||||
# print(len(array)) |
|
||||||
# print(len(filtered)) |
|
||||||
|
|
||||||
# step = np.hstack((np.ones(window_size), 0, -1*np.ones(window_size))) |
|
||||||
# |
|
||||||
# conv = np.convolve(array, step, mode='valid') |
|
||||||
# |
|
||||||
# conv = np.concatenate([ |
|
||||||
# np.zeros(window_size), |
|
||||||
# conv, |
|
||||||
# np.zeros(window_size)]) |
|
||||||
|
|
||||||
#data = step_detect.t_scan(array, window=window_size) |
|
||||||
data = filtered |
|
||||||
data /= data.max() |
|
||||||
|
|
||||||
result = utils.find_steps(data, 0.1) |
|
||||||
return [(dataframe.index[x], dataframe.index[x + window_size]) for x in result] |
|
@ -0,0 +1,112 @@ |
|||||||
|
from models import Model, AnalyticUnitCache |
||||||
|
|
||||||
|
import scipy.signal |
||||||
|
from scipy.fftpack import fft |
||||||
|
from scipy.signal import argrelextrema |
||||||
|
|
||||||
|
import utils |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
from typing import Optional |
||||||
|
|
||||||
|
WINDOW_SIZE = 240 |
||||||
|
|
||||||
|
class ReversePeakModel(Model): |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
super() |
||||||
|
self.segments = [] |
||||||
|
self.ipeaks = [] |
||||||
|
self.state = { |
||||||
|
'confidence': 1.5, |
||||||
|
'convolve_max': 570000 |
||||||
|
} |
||||||
|
|
||||||
|
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
||||||
|
if type(cache) is AnalyticUnitCache: |
||||||
|
self.state = cache |
||||||
|
|
||||||
|
self.segments = segments |
||||||
|
data = dataframe['value'] |
||||||
|
|
||||||
|
confidences = [] |
||||||
|
convolve_list = [] |
||||||
|
for segment in segments: |
||||||
|
if segment['labeled']: |
||||||
|
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'])) |
||||||
|
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'])) |
||||||
|
|
||||||
|
segment_data = data[segment_from_index: segment_to_index + 1] |
||||||
|
segment_min = min(segment_data) |
||||||
|
segment_max = max(segment_data) |
||||||
|
confidences.append(0.2 * (segment_max - segment_min)) |
||||||
|
flat_segment = segment_data.rolling(window=5).mean() |
||||||
|
flat_segment = flat_segment.dropna() |
||||||
|
segment_min_index = flat_segment.idxmin() #+ segment['start'] |
||||||
|
self.ipeaks.append(segment_min_index) |
||||||
|
labeled_drop = data[segment_min_index - WINDOW_SIZE : segment_min_index + WINDOW_SIZE] |
||||||
|
labeled_min = min(labeled_drop) |
||||||
|
for value in labeled_drop: |
||||||
|
value = value - labeled_min |
||||||
|
convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) |
||||||
|
convolve_list.append(max(convolve)) |
||||||
|
|
||||||
|
if len(confidences) > 0: |
||||||
|
self.state['confidence'] = min(confidences) |
||||||
|
else: |
||||||
|
self.state['confidence'] = 1.5 |
||||||
|
|
||||||
|
if len(convolve_list) > 0: |
||||||
|
self.state['convolve_max'] = max(convolve_list) |
||||||
|
else: |
||||||
|
self.state['convolve_max'] = 570000 |
||||||
|
|
||||||
|
return self.state |
||||||
|
|
||||||
|
def do_predict(self, dataframe: pd.DataFrame): |
||||||
|
data = dataframe['value'] |
||||||
|
window_size = 24 |
||||||
|
all_max_flatten_data = data.rolling(window=window_size).mean() |
||||||
|
all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0] |
||||||
|
|
||||||
|
extrema_list = [] |
||||||
|
for i in utils.exponential_smoothing(data - self.state['confidence'], 0.02): |
||||||
|
extrema_list.append(i) |
||||||
|
|
||||||
|
segments = [] |
||||||
|
for i in all_mins: |
||||||
|
if all_max_flatten_data[i] < extrema_list[i]: |
||||||
|
segments.append(i + 12) |
||||||
|
|
||||||
|
filtered = self.__filter_prediction(segments, data) |
||||||
|
return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered] |
||||||
|
|
||||||
|
def __filter_prediction(self, segments: list, all_max_flatten_data: list): |
||||||
|
delete_list = [] |
||||||
|
variance_error = int(0.004 * len(all_max_flatten_data)) |
||||||
|
if variance_error > 100: |
||||||
|
variance_error = 100 |
||||||
|
for i in range(1, len(segments)): |
||||||
|
if segments[i] < segments[i - 1] + variance_error: |
||||||
|
delete_list.append(segments[i]) |
||||||
|
for item in delete_list: |
||||||
|
segments.remove(item) |
||||||
|
|
||||||
|
delete_list = [] |
||||||
|
if len(segments) == 0 or len(self.ipeaks) == 0 : |
||||||
|
segments = [] |
||||||
|
return segments |
||||||
|
|
||||||
|
pattern_data = all_max_flatten_data[self.ipeaks[0] - WINDOW_SIZE : self.ipeaks[0] + WINDOW_SIZE] |
||||||
|
for segment in segments: |
||||||
|
if segment > WINDOW_SIZE: |
||||||
|
convol_data = all_max_flatten_data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] |
||||||
|
conv = scipy.signal.fftconvolve(pattern_data, convol_data) |
||||||
|
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: |
||||||
|
delete_list.append(segment) |
||||||
|
else: |
||||||
|
delete_list.append(segment) |
||||||
|
for item in delete_list: |
||||||
|
segments.remove(item) |
||||||
|
|
||||||
|
return segments |
Loading…
Reference in new issue