hastic-server/analytics/detectors/pattern_detector.py

import detectors
import utils

from grafana_data_provider import GrafanaDataProvider

import logging
from urllib.parse import urlparse
import os.path
import json
import config

import pandas as pd


logger = logging.getLogger('analytic_toolset')


def resolve_detector_by_pattern(pattern):
    if pattern == 'peak':
        return detectors.PeaksDetector()
    if pattern == 'drop':
        return detectors.StepDetector()
    if pattern == 'jump':
        return detectors.JumpDetector()
    raise ValueError('Unknown pattern "%s"' % pattern)


class PatternDetector:

    def __init__(self, analytic_unit_id, pattern_type):
        self.analytic_unit_id = analytic_unit_id
        self.pattern_type = pattern_type

        self.__load_anomaly_config()

        parsedUrl = urlparse(self.anomaly_config['panelUrl'])
        origin = parsedUrl.scheme + '://' + parsedUrl.netloc

        datasource = self.anomaly_config['datasource']
        metric_name = self.anomaly_config['metric']['targets'][0]

        target_filename = os.path.join(config.METRICS_FOLDER, metric_name + ".json")
        datasource['origin'] = origin
        dataset_filename = os.path.join(config.DATASET_FOLDER, metric_name + ".csv")

        with open(target_filename, 'r') as file:
            target = json.load(file)

        self.data_prov = GrafanaDataProvider(datasource, target, dataset_filename)

        self.model = None
        self.__load_model(pattern_type)

    async def learn(self, segments):
        self.model = resolve_detector_by_pattern(self.pattern_type)
        window_size = 200

        dataframe = self.data_prov.get_dataframe()

        segments = self.data_prov.transform_anomalies(segments)
        # TODO: pass only part of dataframe that has segments
        await self.model.fit(dataframe, segments)
        self.__save_model()
        return 0

    async def predict(self, last_prediction_time):
        if self.model is None:
            return [], last_prediction_time

        window_size = 100
        last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms')

        start_index = self.data_prov.get_upper_bound(last_prediction_time)
        start_index = max(0, start_index - window_size)
        dataframe = self.data_prov.get_data_range(start_index)

        predicted_indexes = await self.model.predict(dataframe)
        predicted_indexes = [(x, y) for (x, y) in predicted_indexes if x >= start_index and y >= start_index]

        predicted_times = self.data_prov.inverse_transform_indexes(predicted_indexes)
        segments = []
        for time_value in predicted_times:
            ts1 = int(time_value[0].timestamp() * 1000)
            ts2 = int(time_value[1].timestamp() * 1000)
            segments.append({
                'start': min(ts1, ts2),
                'finish': max(ts1, ts2)
            })

        last_dataframe_time = dataframe.iloc[-1]['timestamp']
        last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
        return segments, last_prediction_time
        # return predicted_anomalies, last_prediction_time

    def synchronize_data(self):
        self.data_prov.synchronize()

    def __load_anomaly_config(self):
        with open(os.path.join(config.ANALYTIC_UNITS_FOLDER, self.analytic_unit_id + ".json"), 'r') as config_file:
            self.anomaly_config = json.load(config_file)

    def __save_model(self):
        logger.info("Save model '%s'" % self.analytic_unit_id)
        model_filename = os.path.join(config.MODELS_FOLDER, self.analytic_unit_id + ".m")
        self.model.save(model_filename)

    def __load_model(self, pattern):
        logger.info("Load model '%s'" % self.analytic_unit_id)
        model_filename = os.path.join(config.MODELS_FOLDER, self.pattern_type + ".m")
        if os.path.exists(model_filename):
            self.model = resolve_detector_by_pattern(pattern)
            self.model.load(model_filename)
detectors cleanup & jump_detector integration 6 years ago			`import detectors`
reafactor common functions from detectors to utils folder 6 years ago			`import utils`
detectors to folder 6 years ago
data_service, renamings and detectors imports 6 years ago			`from grafana_data_provider import GrafanaDataProvider`
detectors to folder 6 years ago
Add src 7 years ago			`import logging`
Pattern type handler (#6) 7 years ago			`from urllib.parse import urlparse`
Add src 7 years ago			`import os.path`
			`import json`
folders config++ 7 years ago			`import config`

Add src 7 years ago			`import pandas as pd`

folders config++ 7 years ago
Add src 7 years ago			`logger = logging.getLogger('analytic_toolset')`



detectors cleanup & jump_detector integration 6 years ago			`def resolve_detector_by_pattern(pattern):`
Jumpdetector -> JumpDetector & all_mins in JumpDetector.__predict 6 years ago			`if pattern == 'peak':`
detectors cleanup & jump_detector integration 6 years ago			`return detectors.PeaksDetector()`
Jumpdetector -> JumpDetector & all_mins in JumpDetector.__predict 6 years ago			`if pattern == 'drop':`
detectors cleanup & jump_detector integration 6 years ago			`return detectors.StepDetector()`
Jumpdetector -> JumpDetector & all_mins in JumpDetector.__predict 6 years ago			`if pattern == 'jump':`
			`return detectors.JumpDetector()`
detectors cleanup & jump_detector integration 6 years ago			`raise ValueError('Unknown pattern "%s"' % pattern)`
Add src 7 years ago
detectors cleanup & jump_detector integration 6 years ago
			`class PatternDetector:`
Add src 7 years ago
detectors to folder 6 years ago			`def __init__(self, analytic_unit_id, pattern_type):`
Use anomaly IDs #42 6 years ago			`self.analytic_unit_id = analytic_unit_id`
detectors to folder 6 years ago			`self.pattern_type = pattern_type`
Add src 7 years ago
			`self.__load_anomaly_config()`
Pattern type handler (#6) 7 years ago
			`parsedUrl = urlparse(self.anomaly_config['panelUrl'])`
			`origin = parsedUrl.scheme + '://' + parsedUrl.netloc`

			`datasource = self.anomaly_config['datasource']`
Add src 7 years ago			`metric_name = self.anomaly_config['metric']['targets'][0]`

folders config++ 7 years ago			`target_filename = os.path.join(config.METRICS_FOLDER, metric_name + ".json")`
Pattern type handler (#6) 7 years ago			`datasource['origin'] = origin`
folders config++ 7 years ago			`dataset_filename = os.path.join(config.DATASET_FOLDER, metric_name + ".csv")`
Add src 7 years ago
			`with open(target_filename, 'r') as file:`
			`target = json.load(file)`

data_service, renamings and detectors imports 6 years ago			`self.data_prov = GrafanaDataProvider(datasource, target, dataset_filename)`
Add src 7 years ago
			`self.model = None`
detectors to folder 6 years ago			`self.__load_model(pattern_type)`
Add src 7 years ago
asyncio usage (#88) * asyncio integration (buggy) PEAKS_DETECTION doesnt works 6 years ago			`async def learn(self, segments):`
detectors cleanup & jump_detector integration 6 years ago			`self.model = resolve_detector_by_pattern(self.pattern_type)`
Add src 7 years ago			`window_size = 200`

			`dataframe = self.data_prov.get_dataframe()`

			`segments = self.data_prov.transform_anomalies(segments)`
177-improve-drops-model 7 years ago			`# TODO: pass only part of dataframe that has segments`
detectors cleanup & jump_detector integration 6 years ago			`await self.model.fit(dataframe, segments)`
Add src 7 years ago			`self.__save_model()`
			`return 0`

asyncio usage (#88) * asyncio integration (buggy) PEAKS_DETECTION doesnt works 6 years ago			`async def predict(self, last_prediction_time):`
Add src 7 years ago			`if self.model is None:`
			`return [], last_prediction_time`

			`window_size = 100`
			`last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms')`

			`start_index = self.data_prov.get_upper_bound(last_prediction_time)`
			`start_index = max(0, start_index - window_size)`
			`dataframe = self.data_prov.get_data_range(start_index)`

asyncio usage (#88) * asyncio integration (buggy) PEAKS_DETECTION doesnt works 6 years ago			`predicted_indexes = await self.model.predict(dataframe)`
Add src 7 years ago			`predicted_indexes = [(x, y) for (x, y) in predicted_indexes if x >= start_index and y >= start_index]`

			`predicted_times = self.data_prov.inverse_transform_indexes(predicted_indexes)`
			`segments = []`
			`for time_value in predicted_times:`
			`ts1 = int(time_value[0].timestamp() * 1000)`
			`ts2 = int(time_value[1].timestamp() * 1000)`
			`segments.append({`
Fix issue with start_time > finish_time 7 years ago			`'start': min(ts1, ts2),`
			`'finish': max(ts1, ts2)`
Add src 7 years ago			`})`

detectors cleanup & jump_detector integration 6 years ago			`last_dataframe_time = dataframe.iloc[-1]['timestamp']`
Add src 7 years ago			`last_prediction_time = int(last_dataframe_time.timestamp() * 1000)`
			`return segments, last_prediction_time`
			`# return predicted_anomalies, last_prediction_time`

			`def synchronize_data(self):`
			`self.data_prov.synchronize()`

			`def __load_anomaly_config(self):`
config paths fixs 6 years ago			`with open(os.path.join(config.ANALYTIC_UNITS_FOLDER, self.analytic_unit_id + ".json"), 'r') as config_file:`
Add src 7 years ago			`self.anomaly_config = json.load(config_file)`

			`def __save_model(self):`
Use anomaly IDs #42 6 years ago			`logger.info("Save model '%s'" % self.analytic_unit_id)`
			`model_filename = os.path.join(config.MODELS_FOLDER, self.analytic_unit_id + ".m")`
Add src 7 years ago			`self.model.save(model_filename)`

Pattern type handler (#6) 7 years ago			`def __load_model(self, pattern):`
Use anomaly IDs #42 6 years ago			`logger.info("Load model '%s'" % self.analytic_unit_id)`
detectors to folder 6 years ago			`model_filename = os.path.join(config.MODELS_FOLDER, self.pattern_type + ".m")`
Add src 7 years ago			`if os.path.exists(model_filename):`
detectors cleanup & jump_detector integration 6 years ago			`self.model = resolve_detector_by_pattern(pattern)`
Pattern type handler (#6) 7 years ago			`self.model.load(model_filename)`