Browse Source

Merge pull request #16 from hastic/tester-tool-update

Use tasks in analytic model tester tool
master
rozetko 5 years ago committed by GitHub
parent
commit
5a904dd7c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 135
      tools/analytic_model_tester.py

135
tools/analytic_model_tester.py

@ -3,21 +3,30 @@ ANALYTICS_PATH = '../analytics'
TESTS_PATH = '../tests' TESTS_PATH = '../tests'
sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) sys.path.extend([ANALYTICS_PATH, TESTS_PATH])
import pandas as pd import asyncio
import numpy as np from typing import List, Tuple
import utils
import test_dataset
from analytic_types.segment import Segment
from detectors import pattern_detector, threshold_detector, anomaly_detector
from analytic_unit_manager import AnalyticUnitManager
START_TIMESTAMP = 1523889000000
# TODO: get_dataset # TODO: get_dataset
# TODO: get_segment # TODO: get_segment
PEAK_DATASETS = []
# dataset with 3 peaks # dataset with 3 peaks
TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) TEST_DATA = [0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]
# TODO: more convenient way to specify labeled segments # TODO: more convenient way to specify labeled segments
POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}] POSITIVE_SEGMENTS = [{ 'from': 1, 'to': 7 }, { 'from': 22, 'to': 28 }]
NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}] NEGATIVE_SEGMENTS = [{ 'from': 11, 'to': 17 }]
DATA_MODELS = [
{
'type': 'peak',
'serie': TEST_DATA,
'segments': {
'positive': POSITIVE_SEGMENTS,
'negative': NEGATIVE_SEGMENTS
}
}
]
class TesterSegment(): class TesterSegment():
@ -30,8 +39,8 @@ class TesterSegment():
return { return {
'_id': 'q', '_id': 'q',
'analyticUnitId': 'q', 'analyticUnitId': 'q',
'from': self.start, 'from': START_TIMESTAMP + self.start,
'to': self.end, 'to': START_TIMESTAMP + self.end,
'labeled': self.labeled, 'labeled': self.labeled,
'deleted': not self.labeled 'deleted': not self.labeled
} }
@ -59,47 +68,76 @@ class Metric():
non_detected = len(self.expected_result) - correct_segment non_detected = len(self.expected_result) - correct_segment
return (correct_segment, invalid_segment, non_detected) return (correct_segment, invalid_segment, non_detected)
class ModelData(): class TestDataModel():
def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str): def __init__(self, data_values: List[float], positive_segments: List[dict], negative_segments: List[dict], model_type: str):
self.frame = frame self.data_values = data_values
self.positive_segments = positive_segments self.positive_segments = positive_segments
self.negative_segments = negative_segments self.negative_segments = negative_segments
self.model_type = model_type self.model_type = model_type
def get_segments_for_detection(self, positive_amount, negative_amount): def get_segments_for_detection(self, positive_amount: int, negative_amount: int):
segments = [] positive_segments = [segment for idx, segment in enumerate(self.get_positive_segments()) if idx < positive_amount]
for idx, bounds in enumerate(self.positive_segments): negative_segments = [segment for idx, segment in enumerate(self.get_negative_segments()) if idx < negative_amount]
if idx >= positive_amount: return positive_segments + negative_segments
break
segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment()) def get_formated_segments(self, segments: List[dict], positive: bool):
# TODO: add enum
for idx, bounds in enumerate(self.negative_segments): return [TesterSegment(segment['from'], segment['to'], positive).get_segment() for segment in segments]
if idx >= negative_amount:
break def get_positive_segments(self):
segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment()) return self.get_formated_segments(self.positive_segments, True)
return segments def get_negative_segments(self):
return self.get_formated_segments(self.negative_segments, False)
def get_all_correct_segments(self):
return self.positive_segments def get_timestamp_values_list(self) -> List[Tuple[int, float]]:
data_timestamp_list = [START_TIMESTAMP + i for i in range(len(self.data_values))]
PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') return list(zip(data_timestamp_list, self.data_values))
PEAK_DATASETS.append(PEAK_DATA_1)
def get_task(self, task_type: str, cache = None) -> dict:
def main(model_type: str) -> None: data = self.get_timestamp_values_list()
start_timestamp, end_timestamp = data[0][0], data[-1][0]
analytic_unit_type = self.model_type.upper()
task = {
'analyticUnitId': 'testUnitId',
'type': task_type,
'payload': {
'data': data,
'from': start_timestamp,
'to': end_timestamp,
'analyticUnitType': analytic_unit_type,
'detector': 'pattern',
'cache': cache
},
'_id': 'testId'
}
# TODO: enum for task_type
if(task_type == 'LEARN'):
segments = self.get_segments_for_detection(1, 0)
task['payload']['segments'] = segments
return task
PEAK_DATA_MODELS = list(map(
lambda data_model: TestDataModel(
data_model['serie'],
data_model['segments']['positive'],
data_model['segments']['negative'],
data_model['type']
),
DATA_MODELS
))
async def main(model_type: str) -> None:
table_metric = [] table_metric = []
if model_type == 'peak': if model_type == 'peak':
for data in PEAK_DATASETS: for data_model in PEAK_DATA_MODELS:
dataset = data.frame manager = AnalyticUnitManager()
segments = data.get_segments_for_detection(1, 0) learning_task = data_model.get_task('LEARN')
segments = [Segment.from_json(segment) for segment in segments] learning_result = await manager.handle_analytic_task(learning_task)
detector = pattern_detector.PatternDetector('PEAK', 'test_id') detect_task = data_model.get_task('DETECT', learning_result['payload']['cache'])
training_result = detector.train(dataset, segments, {}) detect_result = await manager.handle_analytic_task(detect_task)
cache = training_result['cache'] peak_metric = Metric(data_model.get_positive_segments(), detect_result['payload'])
detect_result = detector.detect(dataset, cache)
detect_result = detect_result.to_json()
peak_metric = Metric(data.get_all_correct_segments(), detect_result)
table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy()))
return table_metric return table_metric
@ -114,9 +152,10 @@ if __name__ == '__main__':
print('Enter one of models name: {}'.format(correct_name)) print('Enter one of models name: {}'.format(correct_name))
sys.exit(1) sys.exit(1)
model_type = str(sys.argv[1]).lower() model_type = str(sys.argv[1]).lower()
loop = asyncio.get_event_loop()
if model_type in correct_name: if model_type in correct_name:
print(main(model_type)) result = loop.run_until_complete(main(model_type))
print(result)
else: else:
print('Enter one of models name: {}'.format(correct_name)) print('Enter one of models name: {}'.format(correct_name))

Loading…
Cancel
Save