Browse Source

Fix analytic model tester tool (#885)

* model-> detector

* train result

* fix

* add todo

Co-Authored-By: rozetko <rozetko@hastic.io>

Co-authored-by: rozetko <rozetko@hastic.io>
pull/1/head
Alexander Velikiy 4 years ago committed by GitHub
parent
commit
df41f4b7c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 42
      analytics/tools/analytic_model_tester.py

42
analytics/tools/analytic_model_tester.py

@ -6,18 +6,20 @@ sys.path.extend([ANALYTICS_PATH, TESTS_PATH])
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import utils import utils
import models
import test_dataset import test_dataset
from analytic_types.segment import Segment
from detectors import pattern_detector, threshold_detector, anomaly_detector
# TODO: get_dataset # TODO: get_dataset
# TODO: get_segment # TODO: get_segment
PEAK_DATASETS = [] PEAK_DATASETS = []
# dataset with 3 peaks # dataset with 3 peaks
TEST_DATA = test_dataset.create_dataframe([0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1])
POSITIVE_SEGMENTS = [(1523889000000, 1523889000006), (1523889000021, 1523889000027)] # TODO: more convenient way to specify labeled segments
NEGATIVE_SEGMENTS = [(1523889000009, 1523889000017)] POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}]
NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}]
class Segment(): class TesterSegment():
def __init__(self, start: int, end: int, labeled: bool): def __init__(self, start: int, end: int, labeled: bool):
self.start = start self.start = start
@ -36,25 +38,25 @@ class Segment():
class Metric(): class Metric():
def __init__(self, true_result, model_result): def __init__(self, expected_result, detector_result):
self.true_result = true_result self.expected_result = expected_result
self.model_result = model_result['segments'] self.detector_result = detector_result['segments']
def get_amount(self): def get_amount(self):
return len(self.model_result) / len(self.true_result) return len(self.detector_result) / len(self.expected_result)
def get_accuracy(self): def get_accuracy(self):
correct_segment = 0 correct_segment = 0
invalid_segment = 0 invalid_segment = 0
for segment in self.model_result: for segment in self.detector_result:
current_cs = correct_segment current_cs = correct_segment
for pattern in self.true_result: for pattern in self.expected_result:
if pattern[0] <= segment[0] and pattern[1] >= segment[1]: if pattern['from'] <= segment['from'] and pattern['to'] >= segment['to']:
correct_segment += 1 correct_segment += 1
break break
if correct_segment == current_cs: if correct_segment == current_cs:
invalid_segment += 1 invalid_segment += 1
non_detected = len(self.true_result) - correct_segment non_detected = len(self.expected_result) - correct_segment
return (correct_segment, invalid_segment, non_detected) return (correct_segment, invalid_segment, non_detected)
class ModelData(): class ModelData():
@ -70,12 +72,12 @@ class ModelData():
for idx, bounds in enumerate(self.positive_segments): for idx, bounds in enumerate(self.positive_segments):
if idx >= positive_amount: if idx >= positive_amount:
break break
segments.append(Segment(bounds[0], bounds[1], True).get_segment()) segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment())
for idx, bounds in enumerate(self.negative_segments): for idx, bounds in enumerate(self.negative_segments):
if idx >= negative_amount: if idx >= negative_amount:
break break
segments.append(Segment(bounds[0], bounds[1], False).get_segment()) segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment())
return segments return segments
@ -91,9 +93,12 @@ def main(model_type: str) -> None:
for data in PEAK_DATASETS: for data in PEAK_DATASETS:
dataset = data.frame dataset = data.frame
segments = data.get_segments_for_detection(1, 0) segments = data.get_segments_for_detection(1, 0)
model = models.PeakModel() segments = [Segment.from_json(segment) for segment in segments]
cache = model.fit(dataset, segments, 'test', {}) detector = pattern_detector.PatternDetector('PEAK', 'test_id')
detect_result = model.detect(dataset, 'test', cache) training_result = detector.train(dataset, segments, {})
cache = training_result['cache']
detect_result = detector.detect(dataset, cache)
detect_result = detect_result.to_json()
peak_metric = Metric(data.get_all_correct_segments(), detect_result) peak_metric = Metric(data.get_all_correct_segments(), detect_result)
table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy()))
return table_metric return table_metric
@ -115,4 +120,3 @@ if __name__ == '__main__':
print('Enter one of models name: {}'.format(correct_name)) print('Enter one of models name: {}'.format(correct_name))

Loading…
Cancel
Save