From df41f4b7c4d29b6ddfab49f4c05515fadc65fff1 Mon Sep 17 00:00:00 2001 From: Alexander Velikiy <39257464+VargBurz@users.noreply.github.com> Date: Fri, 10 Apr 2020 20:49:47 +0300 Subject: [PATCH] Fix analytic model tester tool (#885) * model-> detector * train result * fix * add todo Co-Authored-By: rozetko Co-authored-by: rozetko --- analytics/tools/analytic_model_tester.py | 42 +++++++++++++----------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/analytics/tools/analytic_model_tester.py b/analytics/tools/analytic_model_tester.py index 6a62c03..cffbb75 100644 --- a/analytics/tools/analytic_model_tester.py +++ b/analytics/tools/analytic_model_tester.py @@ -6,18 +6,20 @@ sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) import pandas as pd import numpy as np import utils -import models import test_dataset +from analytic_types.segment import Segment +from detectors import pattern_detector, threshold_detector, anomaly_detector # TODO: get_dataset # TODO: get_segment PEAK_DATASETS = [] # dataset with 3 peaks -TEST_DATA = test_dataset.create_dataframe([0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) -POSITIVE_SEGMENTS = [(1523889000000, 1523889000006), (1523889000021, 1523889000027)] -NEGATIVE_SEGMENTS = [(1523889000009, 1523889000017)] +TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) +# TODO: more convenient way to specify labeled segments +POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}] +NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}] -class Segment(): +class TesterSegment(): def __init__(self, start: int, end: int, labeled: bool): self.start = start @@ -36,25 +38,25 @@ class Segment(): class Metric(): - def __init__(self, true_result, model_result): - self.true_result = true_result - self.model_result = model_result['segments'] + def __init__(self, expected_result, detector_result): + self.expected_result = expected_result + self.detector_result = detector_result['segments'] def get_amount(self): - return len(self.model_result) / len(self.true_result) + return len(self.detector_result) / len(self.expected_result) def get_accuracy(self): correct_segment = 0 invalid_segment = 0 - for segment in self.model_result: + for segment in self.detector_result: current_cs = correct_segment - for pattern in self.true_result: - if pattern[0] <= segment[0] and pattern[1] >= segment[1]: + for pattern in self.expected_result: + if pattern['from'] <= segment['from'] and pattern['to'] >= segment['to']: correct_segment += 1 break if correct_segment == current_cs: invalid_segment += 1 - non_detected = len(self.true_result) - correct_segment + non_detected = len(self.expected_result) - correct_segment return (correct_segment, invalid_segment, non_detected) class ModelData(): @@ -70,12 +72,12 @@ class ModelData(): for idx, bounds in enumerate(self.positive_segments): if idx >= positive_amount: break - segments.append(Segment(bounds[0], bounds[1], True).get_segment()) + segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment()) for idx, bounds in enumerate(self.negative_segments): if idx >= negative_amount: break - segments.append(Segment(bounds[0], bounds[1], False).get_segment()) + segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment()) return segments @@ -91,9 +93,12 @@ def main(model_type: str) -> None: for data in PEAK_DATASETS: dataset = data.frame segments = data.get_segments_for_detection(1, 0) - model = models.PeakModel() - cache = model.fit(dataset, segments, 'test', {}) - detect_result = model.detect(dataset, 'test', cache) + segments = [Segment.from_json(segment) for segment in segments] + detector = pattern_detector.PatternDetector('PEAK', 'test_id') + training_result = detector.train(dataset, segments, {}) + cache = training_result['cache'] + detect_result = detector.detect(dataset, cache) + detect_result = detect_result.to_json() peak_metric = Metric(data.get_all_correct_segments(), detect_result) table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) return table_metric @@ -115,4 +120,3 @@ if __name__ == '__main__': print('Enter one of models name: {}'.format(correct_name)) -