From 10331e65ed5b0f35686c99d90dc823ab7f80e800 Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Tue, 16 Apr 2019 13:49:42 +0300 Subject: [PATCH 1/2] Tool for model testing #561 (#562) --- analytics/tools/analytic_model_tester.py | 118 +++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 analytics/tools/analytic_model_tester.py diff --git a/analytics/tools/analytic_model_tester.py b/analytics/tools/analytic_model_tester.py new file mode 100644 index 0000000..6a62c03 --- /dev/null +++ b/analytics/tools/analytic_model_tester.py @@ -0,0 +1,118 @@ +import sys +ANALYTICS_PATH = '../analytics' +TESTS_PATH = '../tests' +sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) + +import pandas as pd +import numpy as np +import utils +import models +import test_dataset + +# TODO: get_dataset +# TODO: get_segment +PEAK_DATASETS = [] +# dataset with 3 peaks +TEST_DATA = test_dataset.create_dataframe([0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) +POSITIVE_SEGMENTS = [(1523889000000, 1523889000006), (1523889000021, 1523889000027)] +NEGATIVE_SEGMENTS = [(1523889000009, 1523889000017)] + +class Segment(): + + def __init__(self, start: int, end: int, labeled: bool): + self.start = start + self.end = end + self.labeled = labeled + + def get_segment(self): + return { + '_id': 'q', + 'analyticUnitId': 'q', + 'from': self.start, + 'to': self.end, + 'labeled': self.labeled, + 'deleted': not self.labeled + } + +class Metric(): + + def __init__(self, true_result, model_result): + self.true_result = true_result + self.model_result = model_result['segments'] + + def get_amount(self): + return len(self.model_result) / len(self.true_result) + + def get_accuracy(self): + correct_segment = 0 + invalid_segment = 0 + for segment in self.model_result: + current_cs = correct_segment + for pattern in self.true_result: + if pattern[0] <= segment[0] and pattern[1] >= segment[1]: + correct_segment += 1 + break + if correct_segment == current_cs: + invalid_segment += 1 + non_detected = len(self.true_result) - correct_segment + return (correct_segment, invalid_segment, non_detected) + +class ModelData(): + + def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str): + self.frame = frame + self.positive_segments = positive_segments + self.negative_segments = negative_segments + self.model_type = model_type + + def get_segments_for_detection(self, positive_amount, negative_amount): + segments = [] + for idx, bounds in enumerate(self.positive_segments): + if idx >= positive_amount: + break + segments.append(Segment(bounds[0], bounds[1], True).get_segment()) + + for idx, bounds in enumerate(self.negative_segments): + if idx >= negative_amount: + break + segments.append(Segment(bounds[0], bounds[1], False).get_segment()) + + return segments + + def get_all_correct_segments(self): + return self.positive_segments + +PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') +PEAK_DATASETS.append(PEAK_DATA_1) + +def main(model_type: str) -> None: + table_metric = [] + if model_type == 'peak': + for data in PEAK_DATASETS: + dataset = data.frame + segments = data.get_segments_for_detection(1, 0) + model = models.PeakModel() + cache = model.fit(dataset, segments, 'test', {}) + detect_result = model.detect(dataset, 'test', cache) + peak_metric = Metric(data.get_all_correct_segments(), detect_result) + table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) + return table_metric + +if __name__ == '__main__': + ''' + This tool applies the model on datasets and verifies that the detection result corresponds to the correct values. + sys.argv[1] expects one of the models name -> see correct_name + ''' + # TODO: use enum + correct_name = ['peak', 'trough', 'jump', 'drop', 'general'] + if len(sys.argv) < 2: + print('Enter one of models name: {}'.format(correct_name)) + sys.exit(1) + model_type = str(sys.argv[1]).lower() + if model_type in correct_name: + print(main(model_type)) + else: + print('Enter one of models name: {}'.format(correct_name)) + + + From 6a690c09f0359d0de743edefcd706243d1f2594f Mon Sep 17 00:00:00 2001 From: Alexandr Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Tue, 16 Apr 2019 14:11:11 +0300 Subject: [PATCH 2/2] Add base class for models state #574 (#575) --- analytics/analytics/models/model.py | 39 ++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index f1c4bed..74e2d8a 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -2,7 +2,7 @@ import utils from abc import ABC, abstractmethod from attrdict import AttrDict -from typing import Optional +from typing import Optional, List import pandas as pd import math import logging @@ -42,6 +42,43 @@ class Segment(AttrDict): nan_list = utils.find_nan_indexes(self.data) self.data = utils.nan_to_zero(self.data, nan_list) +class ModelState(): + + def __init__( + self, + pattern_center: List[int] = [], + pattern_model: List[float] = [], + convolve_max: float = 0, + convolve_min: float = 0, + window_size: int = 0, + conv_del_min: float = 0, + conv_del_max: float = 0 + ): + self.pattern_center = pattern_center + self.pattern_model = pattern_model + self.convolve_max = convolve_max + self.convolve_min = convolve_min + self.window_size = window_size + self.conv_del_min = conv_del_min + self.conv_del_max = conv_del_max + + def to_json(self) -> dict: + return { + 'pattern_center': self.pattern_center, + 'pattern_model': self.pattern_model, + 'convolve_max': self.convolve_max, + 'convolve_min': self.convolve_min, + 'window_size': self.window_size, + 'conv_del_min': self.conv_del_min, + 'conv_del_max': self.conv_del_max, + } + + @staticmethod + def from_json(json: Optional[dict] = None): + if json is None: + json = {} + return ModelState(**json) + class Model(ABC): HEIGHT_ERROR = 0.1