Browse Source

Consider segment width in models #136 (#141)

* fit -> do_fit in all models && add self.segment_length

* Move converting indices to timestamps to Model class

* add flexible win size to all models
pull/1/head
Alexandr Velikiy 6 years ago committed by rozetko
parent
commit
12c52f5ce9
  1. 3
      analytics/models/custom_model.py
  2. 31
      analytics/models/drop_model.py
  3. 32
      analytics/models/general_model.py
  4. 33
      analytics/models/jump_model.py
  5. 33
      analytics/models/model.py
  6. 64
      analytics/models/peak_model.py
  7. 71
      analytics/models/trough_model.py

3
analytics/models/custom_model.py

@ -1,11 +1,10 @@
from models import Model from models import Model
import utils import utils
import pandas as pd import pandas as pd
from typing import Optional
class CustomModel(Model): class CustomModel(Model):
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[dict]) -> dict: def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None:
pass pass
def do_predict(self, dataframe: pd.DataFrame) -> list: def do_predict(self, dataframe: pd.DataFrame) -> list:

31
analytics/models/drop_model.py

@ -1,4 +1,4 @@
from models import Model, AnalyticUnitCache from models import Model
import scipy.signal import scipy.signal
from scipy.fftpack import fft from scipy.fftpack import fft
@ -8,9 +8,7 @@ from scipy.stats import gaussian_kde
import utils import utils
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from typing import Optional
WINDOW_SIZE = 200
class DropModel(Model): class DropModel(Model):
def __init__(self): def __init__(self):
@ -19,16 +17,13 @@ class DropModel(Model):
self.idrops = [] self.idrops = []
self.state = { self.state = {
'confidence': 1.5, 'confidence': 1.5,
'convolve_max': WINDOW_SIZE, 'convolve_max': 200,
'DROP_HEIGHT': 1, 'DROP_HEIGHT': 1,
'DROP_LENGTH': 1, 'DROP_LENGTH': 1,
'WINDOW_SIZE': 240,
} }
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value'] data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
@ -68,7 +63,7 @@ class DropModel(Model):
drop_center = cen_ind[0] drop_center = cen_ind[0]
segment_cent_index = drop_center - 5 + segment_from_index segment_cent_index = drop_center - 5 + segment_from_index
self.idrops.append(segment_cent_index) self.idrops.append(segment_cent_index)
labeled_drop = data[segment_cent_index - WINDOW_SIZE : segment_cent_index + WINDOW_SIZE] labeled_drop = data[segment_cent_index - self.state['WINDOW_SIZE']: segment_cent_index + self.state['WINDOW_SIZE']]
labeled_min = min(labeled_drop) labeled_min = min(labeled_drop)
for value in labeled_drop: for value in labeled_drop:
value = value - labeled_min value = value - labeled_min
@ -84,7 +79,7 @@ class DropModel(Model):
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = float(max(convolve_list)) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = WINDOW_SIZE self.state['convolve_max'] = self.state['WINDOW_SIZE']
if len(drop_height_list) > 0: if len(drop_height_list) > 0:
self.state['DROP_HEIGHT'] = int(min(drop_height_list)) self.state['DROP_HEIGHT'] = int(min(drop_height_list))
@ -96,15 +91,11 @@ class DropModel(Model):
else: else:
self.state['DROP_LENGTH'] = 1 self.state['DROP_LENGTH'] = 1
return self.state
def do_predict(self, dataframe: pd.DataFrame) -> list: def do_predict(self, dataframe: pd.DataFrame) -> list:
data = dataframe['value'] data = dataframe['value']
possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1) possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1)
filtered = self.__filter_prediction(possible_drops, data) return self.__filter_prediction(possible_drops, data)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
return [(dataframe['timestamp'][x - 1].value / 1000000, dataframe['timestamp'][x + 1].value / 1000000) for x in filtered]
def __filter_prediction(self, segments: list, data: list): def __filter_prediction(self, segments: list, data: list):
delete_list = [] delete_list = []
@ -122,12 +113,12 @@ class DropModel(Model):
if len(segments) == 0 or len(self.idrops) == 0 : if len(segments) == 0 or len(self.idrops) == 0 :
segments = [] segments = []
return segments return segments
pattern_data = data[self.idrops[0] - WINDOW_SIZE : self.idrops[0] + WINDOW_SIZE] pattern_data = data[self.idrops[0] - self.state['WINDOW_SIZE'] : self.idrops[0] + self.state['WINDOW_SIZE']]
for segment in segments: for segment in segments:
if segment > WINDOW_SIZE and segment < (len(data) - WINDOW_SIZE): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']):
convol_data = data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE']]
conv = scipy.signal.fftconvolve(pattern_data, convol_data) conv = scipy.signal.fftconvolve(pattern_data, convol_data)
if conv[WINDOW_SIZE*2] > self.state['convolve_max'] * 1.2 or conv[WINDOW_SIZE*2] < self.state['convolve_max'] * 0.8: if conv[self.state['WINDOW_SIZE']*2] > self.state['convolve_max'] * 1.2 or conv[self.state['WINDOW_SIZE']*2] < self.state['convolve_max'] * 0.8:
delete_list.append(segment) delete_list.append(segment)
else: else:
delete_list.append(segment) delete_list.append(segment)

32
analytics/models/general_model.py

@ -1,4 +1,4 @@
from models import Model, AnalyticUnitCache from models import Model
import utils import utils
import numpy as np import numpy as np
@ -9,11 +9,8 @@ from scipy.signal import argrelextrema
import math import math
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from scipy.stats import norm from scipy.stats import norm
from typing import Optional
WINDOW_SIZE = 150
class GeneralModel(Model): class GeneralModel(Model):
@ -22,15 +19,12 @@ class GeneralModel(Model):
self.segments = [] self.segments = []
self.ipats = [] self.ipats = []
self.state = { self.state = {
'convolve_max': WINDOW_SIZE, 'convolve_max': 200,
'WINDOW_SIZE': 240,
} }
self.all_conv = [] self.all_conv = []
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value'] data = dataframe['value']
convolve_list = [] convolve_list = []
for segment in segments: for segment in segments:
@ -43,7 +37,7 @@ class GeneralModel(Model):
continue continue
x = segment_from_index + int((segment_to_index - segment_from_index) / 2) x = segment_from_index + int((segment_to_index - segment_from_index) / 2)
self.ipats.append(x) self.ipats.append(x)
segment_data = data[x - WINDOW_SIZE : x + WINDOW_SIZE] segment_data = data[x - self.state['WINDOW_SIZE'] : x + self.state['WINDOW_SIZE']]
segment_min = min(segment_data) segment_min = min(segment_data)
segment_data = segment_data - segment_min segment_data = segment_data - segment_min
convolve = scipy.signal.fftconvolve(segment_data, segment_data) convolve = scipy.signal.fftconvolve(segment_data, segment_data)
@ -52,29 +46,25 @@ class GeneralModel(Model):
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = float(max(convolve_list)) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = WINDOW_SIZE / 3 self.state['convolve_max'] = self.state['WINDOW_SIZE'] / 3
return self.state
def do_predict(self, dataframe: pd.DataFrame) -> list: def do_predict(self, dataframe: pd.DataFrame) -> list:
data = dataframe['value'] data = dataframe['value']
pat_data = data[self.ipats[0] - WINDOW_SIZE: self.ipats[0] + WINDOW_SIZE] pat_data = data[self.ipats[0] - self.state['WINDOW_SIZE']: self.ipats[0] + self.state['WINDOW_SIZE']]
x = min(pat_data) x = min(pat_data)
pat_data = pat_data - x pat_data = pat_data - x
y = max(pat_data) y = max(pat_data)
for i in range(WINDOW_SIZE * 2, len(data)): for i in range(self.state['WINDOW_SIZE'] * 2, len(data)):
watch_data = data[i - WINDOW_SIZE * 2: i] watch_data = data[i - self.state['WINDOW_SIZE'] * 2: i]
w = min(watch_data) w = min(watch_data)
watch_data = watch_data - w watch_data = watch_data - w
conv = scipy.signal.fftconvolve(pat_data, watch_data) conv = scipy.signal.fftconvolve(pat_data, watch_data)
self.all_conv.append(max(conv)) self.all_conv.append(max(conv))
all_conv_peaks = utils.peak_finder(self.all_conv, WINDOW_SIZE * 2) all_conv_peaks = utils.peak_finder(self.all_conv, self.state['WINDOW_SIZE'] * 2)
filtered = self.__filter_prediction(all_conv_peaks, data) filtered = self.__filter_prediction(all_conv_peaks, data)
filtered = set(item + WINDOW_SIZE for item in filtered) return set(item + self.state['WINDOW_SIZE'] for item in filtered)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
return [(dataframe['timestamp'][x - 1].value / 1000000, dataframe['timestamp'][x + 1].value / 1000000) for x in filtered]
def __filter_prediction(self, segments: list, data: list): def __filter_prediction(self, segments: list, data: list):
if len(segments) == 0 or len(self.ipats) == 0: if len(segments) == 0 or len(self.ipats) == 0:

33
analytics/models/jump_model.py

@ -1,18 +1,14 @@
from models import Model, AnalyticUnitCache from models import Model
import utils import utils
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import scipy.signal import scipy.signal
from scipy.fftpack import fft from scipy.fftpack import fft
from scipy.signal import argrelextrema
import math import math
from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from scipy.stats import norm
from typing import Optional
WINDOW_SIZE = 200
class JumpModel(Model): class JumpModel(Model):
@ -22,16 +18,12 @@ class JumpModel(Model):
self.ijumps = [] self.ijumps = []
self.state = { self.state = {
'confidence': 1.5, 'confidence': 1.5,
'convolve_max': WINDOW_SIZE, 'convolve_max': 230,
'JUMP_HEIGHT': 1, 'JUMP_HEIGHT': 1,
'JUMP_LENGTH': 1, 'JUMP_LENGTH': 1,
} }
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value'] data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
@ -69,11 +61,10 @@ class JumpModel(Model):
jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line)
jump_length_list.append(jump_length) jump_length_list.append(jump_length)
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median
#cen_ind = utils.find_ind_median(segment_median, flat_segment)
jump_center = cen_ind[0] jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment_from_index segment_cent_index = jump_center - 5 + segment_from_index
self.ijumps.append(segment_cent_index) self.ijumps.append(segment_cent_index)
labeled_jump = data[segment_cent_index - WINDOW_SIZE : segment_cent_index + WINDOW_SIZE] labeled_jump = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE']]
labeled_min = min(labeled_jump) labeled_min = min(labeled_jump)
for value in labeled_jump: for value in labeled_jump:
value = value - labeled_min value = value - labeled_min
@ -88,7 +79,7 @@ class JumpModel(Model):
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = float(max(convolve_list)) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = WINDOW_SIZE self.state['convolve_max'] = self.state['WINDOW_SIZE']
if len(jump_height_list) > 0: if len(jump_height_list) > 0:
self.state['JUMP_HEIGHT'] = int(min(jump_height_list)) self.state['JUMP_HEIGHT'] = int(min(jump_height_list))
@ -100,15 +91,11 @@ class JumpModel(Model):
else: else:
self.state['JUMP_LENGTH'] = 1 self.state['JUMP_LENGTH'] = 1
return self.state
def do_predict(self, dataframe: pd.DataFrame) -> list: def do_predict(self, dataframe: pd.DataFrame) -> list:
data = dataframe['value'] data = dataframe['value']
possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1)
filtered = self.__filter_prediction(possible_jumps, data) return self.__filter_prediction(possible_jumps, data)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
return [(dataframe['timestamp'][x - 1].value / 1000000, dataframe['timestamp'][x + 1].value / 1000000) for x in filtered]
def __filter_prediction(self, segments, data): def __filter_prediction(self, segments, data):
delete_list = [] delete_list = []
@ -125,10 +112,10 @@ class JumpModel(Model):
segments = [] segments = []
return segments return segments
pattern_data = data[self.ijumps[0] - WINDOW_SIZE : self.ijumps[0] + WINDOW_SIZE] pattern_data = data[self.ijumps[0] - self.state['WINDOW_SIZE'] : self.ijumps[0] + self.state['WINDOW_SIZE']]
for segment in segments: for segment in segments:
if segment > WINDOW_SIZE and segment < (len(data) - WINDOW_SIZE): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']):
convol_data = data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE']]
conv = scipy.signal.fftconvolve(pattern_data, convol_data) conv = scipy.signal.fftconvolve(pattern_data, convol_data)
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8:

33
analytics/models/model.py

@ -1,28 +1,51 @@
import utils import utils
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pandas import DataFrame
from typing import Optional from typing import Optional
import pandas as pd
import math
AnalyticUnitCache = dict AnalyticUnitCache = dict
class Model(ABC): class Model(ABC):
@abstractmethod @abstractmethod
def fit(self, dataframe: DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: def do_fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> None:
pass pass
@abstractmethod @abstractmethod
def do_predict(self, dataframe: DataFrame) -> list: def do_predict(self, dataframe: pd.DataFrame) -> list:
pass pass
def predict(self, dataframe: DataFrame, cache: Optional[AnalyticUnitCache]) -> dict: def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
segment_length_list = []
for segment in self.segments:
if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_length = abs(segment_to_index - segment_from_index)
segment_length_list.append(segment_length)
self.state['WINDOW_SIZE'] = math.ceil(max(segment_length_list) / 2)
self.do_fit(dataframe, segments)
return self.state
def predict(self, dataframe: pd.DataFrame, cache: Optional[AnalyticUnitCache]) -> dict:
if type(cache) is AnalyticUnitCache: if type(cache) is AnalyticUnitCache:
self.state = cache self.state = cache
result = self.do_predict(dataframe) result = self.do_predict(dataframe)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
segments = [(
dataframe['timestamp'][x - 1].value / 1000000,
dataframe['timestamp'][x + 1].value / 1000000
) for x in result]
return { return {
'segments': result, 'segments': segments,
'cache': self.state 'cache': self.state
} }

64
analytics/models/peak_model.py

@ -1,4 +1,4 @@
from models import Model, AnalyticUnitCache from models import Model
import scipy.signal import scipy.signal
from scipy.fftpack import fft from scipy.fftpack import fft
@ -7,9 +7,6 @@ from scipy.signal import argrelextrema
import utils import utils
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from typing import Optional
WINDOW_SIZE = 240
class PeakModel(Model): class PeakModel(Model):
@ -20,16 +17,13 @@ class PeakModel(Model):
self.ipeaks = [] self.ipeaks = []
self.state = { self.state = {
'confidence': 1.5, 'confidence': 1.5,
'convolve_max': 570000 'convolve_max': 570000,
'convolve_min': 530000,
'WINDOW_SIZE': 240,
} }
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value'] data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
for segment in segments: for segment in segments:
@ -43,16 +37,16 @@ class PeakModel(Model):
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
confidences.append(0.2 * (segment_max - segment_min)) confidences.append(0.2 * (segment_max - segment_min))
flat_segment = segment_data.rolling(window=5).mean() segment_max_index = segment_data.idxmax()
flat_segment = flat_segment.dropna()
segment_max_index = flat_segment.idxmax() # + segment['start']
self.ipeaks.append(segment_max_index) self.ipeaks.append(segment_max_index)
labeled_drop = data[segment_max_index - WINDOW_SIZE: segment_max_index + WINDOW_SIZE] labeled_peak = data[segment_max_index - self.state['WINDOW_SIZE']: segment_max_index + self.state['WINDOW_SIZE']]
labeled_min = min(labeled_drop) labeled_peak = labeled_peak - min(labeled_peak)
for value in labeled_drop: auto_convolve = scipy.signal.fftconvolve(labeled_peak, labeled_peak)
value = value - labeled_min first_peak = data[self.ipeaks[0] - self.state['WINDOW_SIZE']: self.ipeaks[0] + self.state['WINDOW_SIZE']]
convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) first_peak = first_peak - min(first_peak)
convolve_list.append(max(convolve)) convolve_peak = scipy.signal.fftconvolve(labeled_peak, first_peak)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_peak))
if len(confidences) > 0: if len(confidences) > 0:
self.state['confidence'] = float(min(confidences)) self.state['confidence'] = float(min(confidences))
@ -62,9 +56,12 @@ class PeakModel(Model):
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = float(max(convolve_list)) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = 570000 self.state['convolve_max'] = self.state['WINDOW_SIZE']
return self.state if len(convolve_list) > 0:
self.state['convolve_min'] = float(min(convolve_list))
else:
self.state['convolve_min'] = self.state['WINDOW_SIZE']
def do_predict(self, dataframe: pd.DataFrame): def do_predict(self, dataframe: pd.DataFrame):
data = dataframe['value'] data = dataframe['value']
@ -80,15 +77,13 @@ class PeakModel(Model):
if data[i] > extrema_list[i]: if data[i] > extrema_list[i]:
segments.append(i) segments.append(i)
filtered = self.__filter_prediction(segments, data) return self.__filter_prediction(segments, data)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
return [(dataframe['timestamp'][x - 1].value / 1000000, dataframe['timestamp'][x + 1].value / 1000000) for x in filtered]
def __filter_prediction(self, segments: list, data: list) -> list: def __filter_prediction(self, segments: list, data: list) -> list:
delete_list = [] delete_list = []
variance_error = int(0.004 * len(data)) variance_error = int(0.004 * len(data))
if variance_error > 100: if variance_error > 50:
variance_error = 100 variance_error = 50
for i in range(1, len(segments)): for i in range(1, len(segments)):
if segments[i] < segments[i - 1] + variance_error: if segments[i] < segments[i - 1] + variance_error:
delete_list.append(segments[i]) delete_list.append(segments[i])
@ -98,18 +93,19 @@ class PeakModel(Model):
delete_list = [] delete_list = []
if len(segments) == 0 or len(self.ipeaks) == 0: if len(segments) == 0 or len(self.ipeaks) == 0:
return [] return []
pattern_data = data[self.ipeaks[0] - self.state['WINDOW_SIZE']: self.ipeaks[0] + self.state['WINDOW_SIZE']]
pattern_data = data[self.ipeaks[0] - WINDOW_SIZE: self.ipeaks[0] + WINDOW_SIZE] pattern_data = pattern_data - min(pattern_data)
for segment in segments: for segment in segments:
if segment > WINDOW_SIZE: if segment > self.state['WINDOW_SIZE']:
convol_data = data[segment - WINDOW_SIZE: segment + WINDOW_SIZE] convol_data = data[segment - self.state['WINDOW_SIZE']: segment + self.state['WINDOW_SIZE']]
convol_data = convol_data - min(convol_data)
conv = scipy.signal.fftconvolve(pattern_data, convol_data) conv = scipy.signal.fftconvolve(pattern_data, convol_data)
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: if max(conv) > self.state['convolve_max'] * 1.05 or max(conv) < self.state['convolve_min'] * 0.95:
delete_list.append(segment) delete_list.append(segment)
else: else:
delete_list.append(segment) delete_list.append(segment)
# TODO: implement filtering # TODO: implement filtering
# for item in delete_list: for item in delete_list:
# segments.remove(item) segments.remove(item)
return set(segments) return set(segments)

71
analytics/models/trough_model.py

@ -1,4 +1,4 @@
from models import Model, AnalyticUnitCache from models import Model
import scipy.signal import scipy.signal
from scipy.fftpack import fft from scipy.fftpack import fft
@ -7,28 +7,23 @@ from scipy.signal import argrelextrema
import utils import utils
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from typing import Optional
WINDOW_SIZE = 240
class TroughModel(Model): class TroughModel(Model):
def __init__(self): def __init__(self):
super() super()
self.segments = [] self.segments = []
self.ipeaks = [] self.itroughs = []
self.state = { self.state = {
'confidence': 1.5, 'confidence': 1.5,
'convolve_max': 570000 'convolve_max': 570000,
'convolve_min': 530000,
'WINDOW_SIZE': 240,
} }
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: def do_fit(self, dataframe: pd.DataFrame, segments: list) -> None:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value'] data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
for segment in segments: for segment in segments:
@ -42,16 +37,16 @@ class TroughModel(Model):
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
confidences.append(0.2 * (segment_max - segment_min)) confidences.append(0.2 * (segment_max - segment_min))
flat_segment = segment_data.rolling(window=5).mean() segment_min_index = segment_data.idxmin()
flat_segment = flat_segment.dropna() self.itroughs.append(segment_min_index)
segment_min_index = flat_segment.idxmin() #+ segment['start'] labeled_trough = data[segment_min_index - self.state['WINDOW_SIZE'] : segment_min_index + self.state['WINDOW_SIZE']]
self.ipeaks.append(segment_min_index) labeled_trough = labeled_trough - min(labeled_trough)
labeled_drop = data[segment_min_index - WINDOW_SIZE : segment_min_index + WINDOW_SIZE] auto_convolve = scipy.signal.fftconvolve(labeled_trough, labeled_trough)
labeled_min = min(labeled_drop) first_trough = data[self.itroughs[0] - self.state['WINDOW_SIZE']: self.itroughs[0] + self.state['WINDOW_SIZE']]
for value in labeled_drop: first_trough = first_trough - min(first_trough)
value = value - labeled_min convolve_trough = scipy.signal.fftconvolve(labeled_trough, first_trough)
convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop) convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve)) convolve_list.append(max(convolve_trough))
if len(confidences) > 0: if len(confidences) > 0:
self.state['confidence'] = float(min(confidences)) self.state['confidence'] = float(min(confidences))
@ -61,9 +56,12 @@ class TroughModel(Model):
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = float(max(convolve_list)) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = 570000 self.state['convolve_max'] = self.state['WINDOW_SIZE']
return self.state if len(convolve_list) > 0:
self.state['convolve_min'] = float(min(convolve_list))
else:
self.state['convolve_min'] = self.state['WINDOW_SIZE']
def do_predict(self, dataframe: pd.DataFrame): def do_predict(self, dataframe: pd.DataFrame):
data = dataframe['value'] data = dataframe['value']
@ -78,16 +76,14 @@ class TroughModel(Model):
for i in all_mins: for i in all_mins:
if data[i] < extrema_list[i]: if data[i] < extrema_list[i]:
segments.append(i) segments.append(i)
test = dataframe['timestamp'][1].value
filtered = self.__filter_prediction(segments, data) return self.__filter_prediction(segments, data)
# TODO: convert from ns to ms more proper way (not dividing by 10^6)
return [(dataframe['timestamp'][x - 1].value / 1000000, dataframe['timestamp'][x + 1].value / 1000000) for x in filtered]
def __filter_prediction(self, segments: list, data: list) -> list: def __filter_prediction(self, segments: list, data: list) -> list:
delete_list = [] delete_list = []
variance_error = int(0.004 * len(data)) variance_error = int(0.004 * len(data))
if variance_error > 100: if variance_error > 50:
variance_error = 100 variance_error = 50
for i in range(1, len(segments)): for i in range(1, len(segments)):
if segments[i] < segments[i - 1] + variance_error: if segments[i] < segments[i - 1] + variance_error:
delete_list.append(segments[i]) delete_list.append(segments[i])
@ -95,21 +91,22 @@ class TroughModel(Model):
segments.remove(item) segments.remove(item)
delete_list = [] delete_list = []
if len(segments) == 0 or len(self.ipeaks) == 0 : if len(segments) == 0 or len(self.itroughs) == 0 :
segments = [] segments = []
return segments return segments
pattern_data = data[self.itroughs[0] - self.state['WINDOW_SIZE'] : self.itroughs[0] + self.state['WINDOW_SIZE']]
pattern_data = data[self.ipeaks[0] - WINDOW_SIZE : self.ipeaks[0] + WINDOW_SIZE] pattern_data = pattern_data - min(pattern_data)
for segment in segments: for segment in segments:
if segment > WINDOW_SIZE: if segment > self.state['WINDOW_SIZE']:
convol_data = data[segment - WINDOW_SIZE : segment + WINDOW_SIZE] convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE']]
convol_data = convol_data - min(convol_data)
conv = scipy.signal.fftconvolve(pattern_data, convol_data) conv = scipy.signal.fftconvolve(pattern_data, convol_data)
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8: if max(conv) > self.state['convolve_max'] * 1.05 or max(conv) < self.state['convolve_min'] * 0.95:
delete_list.append(segment) delete_list.append(segment)
else: else:
delete_list.append(segment) delete_list.append(segment)
# TODO: implement filtering # TODO: implement filtering
# for item in delete_list: for item in delete_list:
# segments.remove(item) segments.remove(item)
return set(segments) return set(segments)

Loading…
Cancel
Save