Browse Source

Make all models work && add reverse peak model (#124)

- Subtract min value from dataset before passing to model

- Rename StepModel -> DropModel

- Use cache to save state in all models

- Return `Segment { 'from': <timestamp>, 'to': <timestamp>}` instead of `Segment { 'from': <index>, 'to': <index>}` in all models

- Integrate new peaks model (from https://github.com/hastic/hastic-server/pull/123)

- Integrate new reverse-peaks model (from https://github.com/hastic/hastic-server/pull/123)

- Refactor: make `predict` method in `Model` not abstract and remove it from all children

- Refactor: add abstract `do_predict` method to models
pull/1/head
rozetko 6 years ago committed by GitHub
parent
commit
c2c3925979
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 21
      analytics/analytic_unit_manager.py
  2. 6
      analytics/analytic_unit_worker.py
  3. 6
      analytics/detectors/detector.py
  4. 8
      analytics/detectors/general_detector/general_detector.py
  5. 24
      analytics/detectors/pattern_detector.py
  6. 9
      analytics/models/__init__.py
  7. 5
      analytics/models/custom_model.py
  8. 40
      analytics/models/drop_model.py
  9. 51
      analytics/models/jump_model.py
  10. 22
      analytics/models/model.py
  11. 113
      analytics/models/peak_model.py
  12. 59
      analytics/models/peaks_model.py
  13. 112
      analytics/models/reverse_peak_model.py
  14. 9
      analytics/utils/__init__.py
  15. 73
      server/src/controllers/analytics_controller.ts
  16. 34
      server/src/models/analytic_unit_cache_model.ts

21
analytics/analytic_unit_manager.py

@ -33,12 +33,11 @@ async def handle_analytic_task(task):
worker = ensure_worker(task['analyticUnitId'], payload['pattern']) worker = ensure_worker(task['analyticUnitId'], payload['pattern'])
data = pd.DataFrame(payload['data'], columns=['timestamp', 'value']) data = prepare_data(payload['data'])
data['timestamp'] = pd.to_datetime(data['timestamp'])
result_payload = {} result_payload = {}
if task['type'] == "LEARN": if task['type'] == 'LEARN':
result_payload = await worker.do_learn(payload['segments'], data, payload['cache']) result_payload = await worker.do_learn(payload['segments'], data, payload['cache'])
elif task['type'] == "PREDICT": elif task['type'] == 'PREDICT':
result_payload = await worker.do_predict(data, payload['cache']) result_payload = await worker.do_predict(data, payload['cache'])
else: else:
raise ValueError('Unknown task type "%s"' % task['type']) raise ValueError('Unknown task type "%s"' % task['type'])
@ -52,8 +51,20 @@ async def handle_analytic_task(task):
logger.error("handle_analytic_task exception: '%s'" % error_text) logger.error("handle_analytic_task exception: '%s'" % error_text)
# TODO: move result to a class which renders to json for messaging to analytics # TODO: move result to a class which renders to json for messaging to analytics
return { return {
'status': "FAILED", 'status': 'FAILED',
'error': str(e) 'error': str(e)
} }
def prepare_data(data: list):
"""
Takes list
- converts it into pd.DataFrame,
- converts 'timestamp' column to pd.Datetime,
- subtracts min value from dataset
"""
data = pd.DataFrame(data, columns=['timestamp', 'value'])
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['value'] = data['value'] - min(data['value'])
return data

6
analytics/analytic_unit_worker.py

@ -2,6 +2,8 @@ import config
import detectors import detectors
import logging import logging
import pandas as pd import pandas as pd
from typing import Optional
from models import AnalyticUnitCache
logger = logging.getLogger('AnalyticUnitWorker') logger = logging.getLogger('AnalyticUnitWorker')
@ -13,8 +15,8 @@ class AnalyticUnitWorker:
self.analytic_unit_id = analytic_unit_id self.analytic_unit_id = analytic_unit_id
self.detector = detector self.detector = detector
async def do_learn(self, segments: list, data: pd.DataFrame, cache: dict) -> dict: async def do_learn(self, segments: list, data: pd.DataFrame, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
return await self.detector.train(data, segments, cache) return await self.detector.train(data, segments, cache)
async def do_predict(self, data: pd.DataFrame, cache: dict) -> dict: async def do_predict(self, data: pd.DataFrame, cache: Optional[AnalyticUnitCache]) -> dict:
return await self.detector.predict(data, cache) return await self.detector.predict(data, cache)

6
analytics/detectors/detector.py

@ -1,13 +1,15 @@
from models import AnalyticUnitCache
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pandas import DataFrame from pandas import DataFrame
from typing import Optional
class Detector(ABC): class Detector(ABC):
@abstractmethod @abstractmethod
async def train(self, dataframe: DataFrame, segments: list, cache: dict) -> dict: async def train(self, dataframe: DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
pass pass
@abstractmethod @abstractmethod
async def predict(self, dataframe: DataFrame, cache: dict) -> dict: async def predict(self, dataframe: DataFrame, cache: Optional[AnalyticUnitCache]) -> dict:
pass pass

8
analytics/detectors/general_detector/general_detector.py

@ -1,11 +1,13 @@
from detectors.general_detector.supervised_algorithm import SupervisedAlgorithm from detectors.general_detector.supervised_algorithm import SupervisedAlgorithm
from detectors import Detector from detectors import Detector
from models import AnalyticUnitCache
import utils import utils
import pandas as pd import pandas as pd
import logging import logging
import config import config
import json import json
from typing import Optional
NANOSECONDS_IN_MS = 1000000 NANOSECONDS_IN_MS = 1000000
@ -18,7 +20,7 @@ class GeneralDetector(Detector):
def __init__(self): def __init__(self):
self.model = None self.model = None
async def train(self, dataframe: pd.DataFrame, segments: list, cache: dict): async def train(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
confidence = 0.02 confidence = 0.02
start_index, stop_index = 0, len(dataframe) start_index, stop_index = 0, len(dataframe)
@ -43,9 +45,9 @@ class GeneralDetector(Detector):
last_prediction_time = 0 last_prediction_time = 0
logger.info("Learning is finished for anomaly_name='%s'" % self.anomaly_name) logger.info("Learning is finished for anomaly_name='%s'" % self.anomaly_name)
return last_prediction_time return cache
async def predict(self, dataframe: pd.DataFrame, cache: dict): async def predict(self, dataframe: pd.DataFrame, cache: Optional[AnalyticUnitCache]) -> dict:
logger.info("Start to predict for anomaly type='%s'" % self.anomaly_name) logger.info("Start to predict for anomaly type='%s'" % self.anomaly_name)
last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms') last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms')

24
analytics/detectors/pattern_detector.py

@ -4,6 +4,7 @@ import logging
import config import config
import pandas as pd import pandas as pd
from typing import Optional
from detectors import Detector from detectors import Detector
@ -13,9 +14,11 @@ logger = logging.getLogger('PATTERN_DETECTOR')
def resolve_model_by_pattern(pattern: str) -> models.Model: def resolve_model_by_pattern(pattern: str) -> models.Model:
if pattern == 'PEAK': if pattern == 'PEAK':
return models.PeaksModel() return models.PeakModel()
if pattern == 'REVERSE_PEAK':
return models.ReversePeakModel()
if pattern == 'DROP': if pattern == 'DROP':
return models.StepModel() return models.DropModel()
if pattern == 'JUMP': if pattern == 'JUMP':
return models.JumpModel() return models.JumpModel()
if pattern == 'CUSTOM': if pattern == 'CUSTOM':
@ -30,23 +33,24 @@ class PatternDetector(Detector):
self.model = resolve_model_by_pattern(self.pattern_type) self.model = resolve_model_by_pattern(self.pattern_type)
window_size = 100 window_size = 100
async def train(self, dataframe: pd.DataFrame, segments: list, cache: dict): async def train(self, dataframe: pd.DataFrame, segments: list, cache: Optional[models.AnalyticUnitCache]) -> models.AnalyticUnitCache:
# TODO: pass only part of dataframe that has segments # TODO: pass only part of dataframe that has segments
self.model.fit(dataframe, segments, cache) new_cache = self.model.fit(dataframe, segments, cache)
# TODO: save model after fit
return { return {
'cache': cache 'cache': new_cache
} }
async def predict(self, dataframe: pd.DataFrame, cache: dict): async def predict(self, dataframe: pd.DataFrame, cache: Optional[models.AnalyticUnitCache]) -> dict:
predicted = await self.model.predict(dataframe, cache) # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)
predicted = self.model.predict(dataframe, cache)
segments = [{ 'from': segment[0], 'to': segment[1] } for segment in predicted] segments = [{ 'from': segment[0], 'to': segment[1] } for segment in predicted['segments']]
newCache = predicted['cache']
last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_dataframe_time = dataframe.iloc[-1]['timestamp']
last_prediction_time = last_dataframe_time.value last_prediction_time = last_dataframe_time.value
return { return {
'cache': cache, 'cache': newCache,
'segments': segments, 'segments': segments,
'lastPredictionTime': last_prediction_time 'lastPredictionTime': last_prediction_time
} }

9
analytics/models/__init__.py

@ -1,5 +1,8 @@
from models.model import Model from models.model import Model, AnalyticUnitCache
from models.step_model import StepModel from models.drop_model import DropModel
from models.peaks_model import PeaksModel from models.peak_model import PeakModel
from models.jump_model import JumpModel from models.jump_model import JumpModel
from models.custom_model import CustomModel from models.custom_model import CustomModel
from models.custom_model import CustomModel
from models.reverse_peak_model import ReversePeakModel

5
analytics/models/custom_model.py

@ -1,6 +1,7 @@
from models import Model from models import Model
import utils import utils
import pandas as pd import pandas as pd
from typing import Optional
# Paste your model here: # Paste your model here:
class CustomModel(Model): class CustomModel(Model):
@ -11,8 +12,8 @@ class CustomModel(Model):
# It will be saved in filesystem and loaded after server restart # It will be saved in filesystem and loaded after server restart
self.state = {} self.state = {}
def fit(self, dataframe: pd.DataFrame, segments: list, cache: dict) -> dict: def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[dict]) -> dict:
pass pass
def predict(self, dataframe, cache: dict): def predict(self, dataframe, cache: Optional[dict]):
return [] return []

40
analytics/models/step_model.py → analytics/models/drop_model.py

@ -1,4 +1,4 @@
from models import Model from models import Model, AnalyticUnitCache
import scipy.signal import scipy.signal
from scipy.fftpack import fft from scipy.fftpack import fft
@ -8,10 +8,11 @@ from scipy.stats import gaussian_kde
import utils import utils
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from typing import Optional
WINDOW_SIZE = 400 WINDOW_SIZE = 400
class StepModel(Model): class DropModel(Model):
def __init__(self): def __init__(self):
super() super()
self.segments = [] self.segments = []
@ -23,13 +24,12 @@ class StepModel(Model):
'DROP_LENGTH': 1, 'DROP_LENGTH': 1,
} }
def fit(self, dataframe: pd.DataFrame, segments: list, cache: dict) -> dict: def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments self.segments = segments
d_min = min(dataframe['value'])
for i in range(0,len(dataframe['value'])):
dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min
data = dataframe['value']
data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
drop_height_list = [] drop_height_list = []
@ -76,46 +76,34 @@ class StepModel(Model):
convolve_list.append(max(convolve)) convolve_list.append(max(convolve))
if len(confidences) > 0: if len(confidences) > 0:
self.state['confidence'] = min(confidences) self.state['confidence'] = float(min(confidences))
else: else:
self.state['confidence'] = 1.5 self.state['confidence'] = 1.5
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = max(convolve_list) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = WINDOW_SIZE self.state['convolve_max'] = WINDOW_SIZE
if len(drop_height_list) > 0: if len(drop_height_list) > 0:
self.state['DROP_HEIGHT'] = min(drop_height_list) self.state['DROP_HEIGHT'] = int(min(drop_height_list))
else: else:
self.state['DROP_HEIGHT'] = 1 self.state['DROP_HEIGHT'] = 1
if len(drop_length_list) > 0: if len(drop_length_list) > 0:
self.state['DROP_LENGTH'] = max(drop_length_list) self.state['DROP_LENGTH'] = int(max(drop_length_list))
else: else:
self.state['DROP_LENGTH'] = 1 self.state['DROP_LENGTH'] = 1
async def predict(self, dataframe: pd.DataFrame, cache: dict) -> dict: return self.state
d_min = min(dataframe['value'])
for i in range(0,len(dataframe['value'])):
dataframe.loc[i, 'value'] = dataframe.loc[i, 'value'] - d_min
result = await self.__predict(dataframe)
if len(self.segments) > 0:
return [segment for segment in result if not utils.is_intersect(segment, self.segments)]
async def __predict(self, dataframe): def do_predict(self, dataframe: pd.DataFrame):
#window_size = 24
#all_max_flatten_data = data.rolling(window=window_size).mean()
#all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0]
#print(self.state['DROP_HEIGHT'],self.state['DROP_LENGTH'])
data = dataframe['value'] data = dataframe['value']
possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1) possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1)
filtered = self.__filter_prediction(possible_drops, data) filtered = self.__filter_prediction(possible_drops, data)
return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered] return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered]
def __filter_prediction(self, segments, data): def __filter_prediction(self, segments: list, data: list):
delete_list = [] delete_list = []
variance_error = int(0.004 * len(data)) variance_error = int(0.004 * len(data))
if variance_error > 50: if variance_error > 50:

51
analytics/models/jump_model.py

@ -1,4 +1,4 @@
from models import Model from models import Model, AnalyticUnitCache
import utils import utils
import numpy as np import numpy as np
@ -9,6 +9,7 @@ from scipy.signal import argrelextrema
import math import math
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from scipy.stats import norm from scipy.stats import norm
from typing import Optional
WINDOW_SIZE = 400 WINDOW_SIZE = 400
@ -26,8 +27,11 @@ class JumpModel(Model):
'JUMP_LENGTH': 1, 'JUMP_LENGTH': 1,
} }
def fit(self, dataframe: pd.DataFrame, segments: list, cache: dict) -> dict: def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments self.segments = segments
data = dataframe['value'] data = dataframe['value']
confidences = [] confidences = []
convolve_list = [] convolve_list = []
@ -35,13 +39,17 @@ class JumpModel(Model):
jump_length_list = [] jump_length_list = []
for segment in segments: for segment in segments:
if segment['labeled']: if segment['labeled']:
segment_data = data.loc[segment['from'] : segment['to'] + 1].reset_index(drop=True) segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from']))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to']))
segment_data = data.loc[segment_from_index : segment_to_index + 1].reset_index(drop=True)
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
confidences.append(0.20 * (segment_max - segment_min)) confidences.append(0.20 * (segment_max - segment_min))
flat_segment = segment_data.rolling(window=5).mean() flat_segment = segment_data.rolling(window=5).mean()
pdf = gaussian_kde(flat_segment.dropna()) flat_segment_dropna = flat_segment.dropna()
x = np.linspace(flat_segment.dropna().min() - 1, flat_segment.dropna().max() + 1, len(flat_segment.dropna())) pdf = gaussian_kde(flat_segment_dropna)
x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna))
y = pdf(x) y = pdf(x)
ax_list = [] ax_list = []
for i in range(len(x)): for i in range(len(x)):
@ -56,12 +64,12 @@ class JumpModel(Model):
segment_max_line = ax_list[max_peak_index, 0] segment_max_line = ax_list[max_peak_index, 0]
jump_height = 0.9 * (segment_max_line - segment_min_line) jump_height = 0.9 * (segment_max_line - segment_min_line)
jump_height_list.append(jump_height) jump_height_list.append(jump_height)
jump_lenght = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line)
jump_length_list.append(jump_lenght) jump_length_list.append(jump_length)
cen_ind = utils.intersection_segment(flat_segment, segment_median) #finds all interseprions with median cen_ind = utils.intersection_segment(flat_segment, segment_median) #finds all interseprions with median
#cen_ind = utils.find_ind_median(segment_median, flat_segment) #cen_ind = utils.find_ind_median(segment_median, flat_segment)
jump_center = cen_ind[0] jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment['from'] segment_cent_index = jump_center - 5 + segment_from_index
self.ijumps.append(segment_cent_index) self.ijumps.append(segment_cent_index)
labeled_drop = data[segment_cent_index - WINDOW_SIZE : segment_cent_index + WINDOW_SIZE] labeled_drop = data[segment_cent_index - WINDOW_SIZE : segment_cent_index + WINDOW_SIZE]
labeled_min = min(labeled_drop) labeled_min = min(labeled_drop)
@ -71,41 +79,33 @@ class JumpModel(Model):
convolve_list.append(max(convolve)) convolve_list.append(max(convolve))
if len(confidences) > 0: if len(confidences) > 0:
self.state['confidence'] = min(confidences) self.state['confidence'] = float(min(confidences))
else: else:
self.state['confidence'] = 1.5 self.state['confidence'] = 1.5
if len(convolve_list) > 0: if len(convolve_list) > 0:
self.state['convolve_max'] = max(convolve_list) self.state['convolve_max'] = float(max(convolve_list))
else: else:
self.state['convolve_max'] = WINDOW_SIZE self.state['convolve_max'] = WINDOW_SIZE
if len(jump_height_list) > 0: if len(jump_height_list) > 0:
self.state['JUMP_HEIGHT'] = min(jump_height_list) self.state['JUMP_HEIGHT'] = int(min(jump_height_list))
else: else:
self.state['JUMP_HEIGHT'] = 1 self.state['JUMP_HEIGHT'] = 1
if len(jump_length_list) > 0: if len(jump_length_list) > 0:
self.state['JUMP_LENGTH'] = max(jump_length_list) self.state['JUMP_LENGTH'] = int(max(jump_length_list))
else: else:
self.state['JUMP_LENGTH'] = 1 self.state['JUMP_LENGTH'] = 1
def predict(self, dataframe: pd.DataFrame, cache: dict) -> dict: return self.state
data = dataframe['value']
result = self.__predict(data)
result.sort()
if len(self.segments) > 0:
result = [segment for segment in result if not utils.is_intersect(segment, self.segments)]
return result
def __predict(self, data): def do_predict(self, dataframe: pd.DataFrame):
#window_size = 24 data = dataframe['value']
#all_max_flatten_data = data.rolling(window=window_size).mean()
#all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0]
possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1)
filtered = self.__filter_prediction(possible_jumps, data)
return [(x - 1, x + 1) for x in self.__filter_prediction(possible_jumps, data)] return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered]
def __filter_prediction(self, segments, data): def __filter_prediction(self, segments, data):
delete_list = [] delete_list = []
@ -138,5 +138,4 @@ class JumpModel(Model):
for ijump in self.ijumps: for ijump in self.ijumps:
segments.append(ijump) segments.append(ijump)
return segments return segments

22
analytics/models/model.py

@ -1,13 +1,31 @@
import utils
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pandas import DataFrame from pandas import DataFrame
from typing import Optional
AnalyticUnitCache = dict
class Model(ABC): class Model(ABC):
@abstractmethod @abstractmethod
def fit(self, dataframe: DataFrame, segments: list, cache: dict) -> dict: def fit(self, dataframe: DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
pass pass
@abstractmethod @abstractmethod
def predict(self, dataframe: DataFrame, cache: dict) -> dict: def do_predict(self, dataframe: DataFrame):
pass pass
def predict(self, dataframe: DataFrame, cache: Optional[AnalyticUnitCache]) -> dict:
if type(cache) is AnalyticUnitCache:
self.state = cache
result = self.do_predict(dataframe)
result.sort()
if len(self.segments) > 0:
result = [segment for segment in result if not utils.is_intersect(segment, self.segments)]
return {
'segments': result,
'cache': self.state
}

113
analytics/models/peak_model.py

@ -0,0 +1,113 @@
from models import Model, AnalyticUnitCache
import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema
import utils
import numpy as np
import pandas as pd
from typing import Optional
WINDOW_SIZE = 240
class PeakModel(Model):
def __init__(self):
super()
self.segments = []
self.ipeaks = []
self.state = {
'confidence': 1.5,
'convolve_max': 570000
}
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value']
confidences = []
convolve_list = []
for segment in segments:
if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from']))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to']))
segment_data = data[segment_from_index: segment_to_index + 1]
segment_min = min(segment_data)
segment_max = max(segment_data)
confidences.append(0.2 * (segment_max - segment_min))
flat_segment = segment_data.rolling(window=5).mean()
flat_segment = flat_segment.dropna()
segment_max_index = flat_segment.idxmax() # + segment['start']
self.ipeaks.append(segment_max_index)
labeled_drop = data[segment_max_index - WINDOW_SIZE: segment_max_index + WINDOW_SIZE]
labeled_min = min(labeled_drop)
for value in labeled_drop:
value = value - labeled_min
convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop)
convolve_list.append(max(convolve))
if len(confidences) > 0:
self.state['confidence'] = float(min(confidences))
else:
self.state['confidence'] = 1.5
if len(convolve_list) > 0:
self.state['convolve_max'] = float(max(convolve_list))
else:
self.state['convolve_max'] = 570000
return self.state
def do_predict(self, dataframe: pd.DataFrame):
data = dataframe['value']
window_size = 24
all_max_flatten_data = data.rolling(window=window_size).mean()
all_maxs = argrelextrema(np.array(all_max_flatten_data), np.greater)[0]
extrema_list = []
for i in utils.exponential_smoothing(data + self.state['confidence'], 0.02):
extrema_list.append(i)
segments = []
for i in all_maxs:
if all_max_flatten_data[i] > extrema_list[i]:
segments.append(i+12)
filtered = self.__filter_prediction(segments, data)
return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered]
def __filter_prediction(self, segments: list, all_max_flatten_data: list):
delete_list = []
variance_error = int(0.004 * len(all_max_flatten_data))
if variance_error > 100:
variance_error = 100
for i in range(1, len(segments)):
if segments[i] < segments[i - 1] + variance_error:
delete_list.append(segments[i])
for item in delete_list:
segments.remove(item)
delete_list = []
if len(segments) == 0 or len(self.ipeaks) == 0:
segments = []
return segments
pattern_data = all_max_flatten_data[self.ipeaks[0] - WINDOW_SIZE: self.ipeaks[0] + WINDOW_SIZE]
for segment in segments:
if segment > WINDOW_SIZE:
convol_data = all_max_flatten_data[segment - WINDOW_SIZE: segment + WINDOW_SIZE]
conv = scipy.signal.fftconvolve(pattern_data, convol_data)
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return segments

59
analytics/models/peaks_model.py

@ -1,59 +0,0 @@
from models import Model
import utils
from scipy import signal
import numpy as np
import pandas as pd
class PeaksModel(Model):
def __init__(self):
super()
def fit(self, dataframe: pd.DataFrame, segments: list, cache: dict) -> dict:
pass
def predict(self, dataframe: pd.DataFrame, cache: dict) -> dict:
array = dataframe['value'].as_matrix()
window_size = 20
# window = np.ones(101)
# mean_filtered = signal.fftconvolve(
# np.concatenate([np.zeros(window_size), array, np.zeros(window_size)]),
# window,
# mode='valid'
# )
# filtered = np.divide(array, mean_filtered / 101)
window = signal.general_gaussian(2 * window_size + 1, p=0.5, sig=5)
#print(window)
filtered = signal.fftconvolve(array, window, mode='valid')
# filtered = np.concatenate([
# np.zeros(window_size),
# filtered,
# np.zeros(window_size)
# ])
filtered = filtered / np.sum(window)
array = array[window_size:-window_size]
filtered = np.subtract(array, filtered)
# filtered = np.convolve(array, step, mode='valid')
# print(len(array))
# print(len(filtered))
# step = np.hstack((np.ones(window_size), 0, -1*np.ones(window_size)))
#
# conv = np.convolve(array, step, mode='valid')
#
# conv = np.concatenate([
# np.zeros(window_size),
# conv,
# np.zeros(window_size)])
#data = step_detect.t_scan(array, window=window_size)
data = filtered
data /= data.max()
result = utils.find_steps(data, 0.1)
return [(dataframe.index[x], dataframe.index[x + window_size]) for x in result]

112
analytics/models/reverse_peak_model.py

@ -0,0 +1,112 @@
from models import Model, AnalyticUnitCache
import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema
import utils
import numpy as np
import pandas as pd
from typing import Optional
WINDOW_SIZE = 240
class ReversePeakModel(Model):
def __init__(self):
super()
self.segments = []
self.ipeaks = []
self.state = {
'confidence': 1.5,
'convolve_max': 570000
}
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache:
if type(cache) is AnalyticUnitCache:
self.state = cache
self.segments = segments
data = dataframe['value']
confidences = []
convolve_list = []
for segment in segments:
if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from']))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to']))
segment_data = data[segment_from_index: segment_to_index + 1]
segment_min = min(segment_data)
segment_max = max(segment_data)
confidences.append(0.2 * (segment_max - segment_min))
flat_segment = segment_data.rolling(window=5).mean()
flat_segment = flat_segment.dropna()
segment_min_index = flat_segment.idxmin() #+ segment['start']
self.ipeaks.append(segment_min_index)
labeled_drop = data[segment_min_index - WINDOW_SIZE : segment_min_index + WINDOW_SIZE]
labeled_min = min(labeled_drop)
for value in labeled_drop:
value = value - labeled_min
convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop)
convolve_list.append(max(convolve))
if len(confidences) > 0:
self.state['confidence'] = min(confidences)
else:
self.state['confidence'] = 1.5
if len(convolve_list) > 0:
self.state['convolve_max'] = max(convolve_list)
else:
self.state['convolve_max'] = 570000
return self.state
def do_predict(self, dataframe: pd.DataFrame):
data = dataframe['value']
window_size = 24
all_max_flatten_data = data.rolling(window=window_size).mean()
all_mins = argrelextrema(np.array(all_max_flatten_data), np.less)[0]
extrema_list = []
for i in utils.exponential_smoothing(data - self.state['confidence'], 0.02):
extrema_list.append(i)
segments = []
for i in all_mins:
if all_max_flatten_data[i] < extrema_list[i]:
segments.append(i + 12)
filtered = self.__filter_prediction(segments, data)
return [(dataframe['timestamp'][x - 1].value, dataframe['timestamp'][x + 1].value) for x in filtered]
def __filter_prediction(self, segments: list, all_max_flatten_data: list):
delete_list = []
variance_error = int(0.004 * len(all_max_flatten_data))
if variance_error > 100:
variance_error = 100
for i in range(1, len(segments)):
if segments[i] < segments[i - 1] + variance_error:
delete_list.append(segments[i])
for item in delete_list:
segments.remove(item)
delete_list = []
if len(segments) == 0 or len(self.ipeaks) == 0 :
segments = []
return segments
pattern_data = all_max_flatten_data[self.ipeaks[0] - WINDOW_SIZE : self.ipeaks[0] + WINDOW_SIZE]
for segment in segments:
if segment > WINDOW_SIZE:
convol_data = all_max_flatten_data[segment - WINDOW_SIZE : segment + WINDOW_SIZE]
conv = scipy.signal.fftconvolve(pattern_data, convol_data)
if max(conv) > self.state['convolve_max'] * 1.2 or max(conv) < self.state['convolve_max'] * 0.8:
delete_list.append(segment)
else:
delete_list.append(segment)
for item in delete_list:
segments.remove(item)
return segments

9
analytics/utils/__init__.py

@ -148,7 +148,8 @@ def find_jump_length(segment_data, min_line, max_line):
if (idl[0] - idx[-1] + 1) > 0: if (idl[0] - idx[-1] + 1) > 0:
return idl[0] - idx[-1] + 1 return idl[0] - idx[-1] + 1
else: else:
return print("retard alert!") print("retard alert!")
return 0
def find_jump(data, height, lenght): def find_jump(data, height, lenght):
j_list = [] j_list = []
@ -192,10 +193,10 @@ def drop_intersection(segment_data, median_line):
idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0 idx = np.argwhere(np.diff(np.sign(f - g)) != 0).reshape(-1) + 0
return idx return idx
def find_drop(data, height, lenght): def find_drop(data, height, length):
d_list = [] d_list = []
for i in range(len(data)-lenght-1): for i in range(len(data)-length-1):
for x in range(1, lenght): for x in range(1, length):
if(data[i+x] < data[i] - height): if(data[i+x] < data[i] - height):
d_list.append(i+36) d_list.append(i+36)
return(d_list) return(d_list)

73
server/src/controllers/analytics_controller.ts

@ -112,6 +112,8 @@ export async function runLearning(id: AnalyticUnit.AnalyticUnitId) {
let oldCache = await AnalyticUnitCache.findById(id); let oldCache = await AnalyticUnitCache.findById(id);
if(oldCache !== null) { if(oldCache !== null) {
oldCache = oldCache.data; oldCache = oldCache.data;
} else {
await AnalyticUnitCache.create(id);
} }
let task = new AnalyticsTask( let task = new AnalyticsTask(
id, AnalyticsTaskType.LEARN, { pattern, segments: segmentObjs, data, cache: oldCache } id, AnalyticsTaskType.LEARN, { pattern, segments: segmentObjs, data, cache: oldCache }
@ -121,7 +123,7 @@ export async function runLearning(id: AnalyticUnit.AnalyticUnitId) {
if(result.status !== AnalyticUnit.AnalyticUnitStatus.SUCCESS) { if(result.status !== AnalyticUnit.AnalyticUnitStatus.SUCCESS) {
throw new Error(result.error) throw new Error(result.error)
} }
AnalyticUnitCache.setData(id, result.payload.cache); await AnalyticUnitCache.setData(id, result.payload.cache);
} catch (err) { } catch (err) {
let message = err.message || JSON.stringify(err); let message = err.message || JSON.stringify(err);
await AnalyticUnit.setStatus(id, AnalyticUnit.AnalyticUnitStatus.FAILED, message); await AnalyticUnit.setStatus(id, AnalyticUnit.AnalyticUnitStatus.FAILED, message);
@ -129,34 +131,6 @@ export async function runLearning(id: AnalyticUnit.AnalyticUnitId) {
} }
function processPredictionResult(analyticUnitId: AnalyticUnit.AnalyticUnitId, taskResult: any): {
lastPredictionTime: number,
segments: Segment.Segment[],
cache: any
} {
let payload = taskResult.payload;
if(payload === undefined) {
throw new Error(`Missing payload in result: ${taskResult}`);
}
if(payload.segments === undefined || !Array.isArray(payload.segments)) {
throw new Error(`Missing segments in result or it is corrupted: ${JSON.stringify(payload)}`);
}
if(payload.lastPredictionTime === undefined || isNaN(+payload.lastPredictionTime)) {
throw new Error(
`Missing lastPredictionTime is result or it is corrupted: ${JSON.stringify(payload)}`
);
}
let segments = payload.segments.map(segment => new Segment.Segment(analyticUnitId, segment.from, segment.to, false));
return {
lastPredictionTime: payload.lastPredictionTime,
segments: segments,
cache: {}
};
}
export async function runPredict(id: AnalyticUnit.AnalyticUnitId) { export async function runPredict(id: AnalyticUnit.AnalyticUnitId) {
let previousLastPredictionTime: number = undefined; let previousLastPredictionTime: number = undefined;
@ -166,20 +140,26 @@ export async function runPredict(id: AnalyticUnit.AnalyticUnitId) {
let pattern = unit.type; let pattern = unit.type;
let segments = await Segment.findMany(id, { labeled: true }); let segments = await Segment.findMany(id, { labeled: true });
if (segments.length < 2) { if(segments.length < 2) {
throw new Error('Need at least 2 labeled segments'); throw new Error('Need at least 2 labeled segments');
} }
let { from, to } = getQueryRangeForLearningBySegments(segments); let { from, to } = getQueryRangeForLearningBySegments(segments);
let data = await queryByMetric(unit.metric, unit.panelUrl, from, to); let data = await queryByMetric(unit.metric, unit.panelUrl, from, to);
if (data.length === 0) { if(data.length === 0) {
throw new Error('Empty data to predict on'); throw new Error('Empty data to predict on');
} }
let oldCache = await AnalyticUnitCache.findById(id);
if(oldCache !== null) {
oldCache = oldCache.data;
} else {
await AnalyticUnitCache.create(id);
}
let task = new AnalyticsTask( let task = new AnalyticsTask(
id, id,
AnalyticsTaskType.PREDICT, AnalyticsTaskType.PREDICT,
{ pattern, lastPredictionTime: unit.lastPredictionTime, data, cache: {} } { pattern, lastPredictionTime: unit.lastPredictionTime, data, cache: oldCache }
); );
let result = await runTask(task); let result = await runTask(task);
if(result.status === AnalyticUnit.AnalyticUnitStatus.FAILED) { if(result.status === AnalyticUnit.AnalyticUnitStatus.FAILED) {
@ -200,6 +180,7 @@ export async function runPredict(id: AnalyticUnit.AnalyticUnitId) {
} }
Segment.insertSegments(payload.segments); Segment.insertSegments(payload.segments);
AnalyticUnitCache.setData(id, payload.cache);
AnalyticUnit.setPredictionTime(id, payload.lastPredictionTime); AnalyticUnit.setPredictionTime(id, payload.lastPredictionTime);
AnalyticUnit.setStatus(id, AnalyticUnit.AnalyticUnitStatus.READY); AnalyticUnit.setStatus(id, AnalyticUnit.AnalyticUnitStatus.READY);
} catch(err) { } catch(err) {
@ -211,6 +192,34 @@ export async function runPredict(id: AnalyticUnit.AnalyticUnitId) {
} }
} }
function processPredictionResult(analyticUnitId: AnalyticUnit.AnalyticUnitId, taskResult: any): {
lastPredictionTime: number,
segments: Segment.Segment[],
cache: any
} {
let payload = taskResult.payload;
if (payload === undefined) {
throw new Error(`Missing payload in result: ${taskResult}`);
}
if (payload.segments === undefined || !Array.isArray(payload.segments)) {
throw new Error(`Missing segments in result or it is corrupted: ${JSON.stringify(payload)}`);
}
if (payload.lastPredictionTime === undefined || isNaN(+payload.lastPredictionTime)) {
throw new Error(
`Missing lastPredictionTime is result or it is corrupted: ${JSON.stringify(payload)}`
);
}
let segments = payload.segments.map(segment => new Segment.Segment(analyticUnitId, segment.from, segment.to, false));
return {
lastPredictionTime: payload.lastPredictionTime,
segments: segments,
cache: payload.cache
};
}
export function isAnalyticReady(): boolean { export function isAnalyticReady(): boolean {
return analyticsService.ready; return analyticsService.ready;
} }

34
server/src/models/analytic_unit_cache_model.ts

@ -5,40 +5,32 @@ import { Collection, makeDBQ } from '../services/data_service';
let db = makeDBQ(Collection.ANALYTIC_UNIT_CACHES); let db = makeDBQ(Collection.ANALYTIC_UNIT_CACHES);
export type AnalyticUnitCacheId = string;
export class AnalyticUnitCache { export class AnalyticUnitCache {
public constructor( public constructor(
public analyticUnitId: AnalyticUnitId, public id: AnalyticUnitId,
public data?: any, public data?: any
public id?: AnalyticUnitCacheId,
) { ) {
if(analyticUnitId === undefined) { if(id === undefined) {
throw new Error(`Missing field "analyticUnitId"`); throw new Error(`Missing field "id"`);
} }
} }
public toObject() { public toObject() {
return { return {
_id: this.id, data: this.data || null,
analyticUnitId: this.analyticUnitId, _id: this.id
data: this.data
}; };
} }
static fromObject(obj: any): AnalyticUnitCache { static fromObject(obj: any): AnalyticUnitCache {
if(obj.method === undefined) {
throw new Error('No method in obj:' + obj);
}
return new AnalyticUnitCache( return new AnalyticUnitCache(
obj.method, obj._id,
obj.data, obj.data,
obj._id
); );
} }
} }
export async function findById(id: AnalyticUnitCacheId): Promise<AnalyticUnitCache> { export async function findById(id: AnalyticUnitId): Promise<AnalyticUnitCache> {
let obj = await db.findOne(id); let obj = await db.findOne(id);
if(obj === null) { if(obj === null) {
return null; return null;
@ -46,15 +38,15 @@ export async function findById(id: AnalyticUnitCacheId): Promise<AnalyticUnitCac
return AnalyticUnitCache.fromObject(obj); return AnalyticUnitCache.fromObject(obj);
} }
export async function create(unit: AnalyticUnitCache): Promise<AnalyticUnitCacheId> { export async function create(id: AnalyticUnitId): Promise<AnalyticUnitId> {
let obj = unit.toObject(); let cache = new AnalyticUnitCache(id);
return db.insertOne(obj); return db.insertOne(cache.toObject());
} }
export async function setData(id: AnalyticUnitCacheId, data: any) { export async function setData(id: AnalyticUnitId, data: any) {
return db.updateOne(id, { data }); return db.updateOne(id, { data });
} }
export async function remove(id: AnalyticUnitCacheId): Promise<void> { export async function remove(id: AnalyticUnitId): Promise<void> {
await db.removeOne(id); await db.removeOne(id);
} }

Loading…
Cancel
Save