From 6f352b98ee520f1f37d1eba793f9cd639c506f63 Mon Sep 17 00:00:00 2001 From: VargBurz Date: Thu, 28 May 2020 15:53:31 +0300 Subject: [PATCH 1/3] detector -> manager --- tools/analytic_model_tester.py | 107 ++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/tools/analytic_model_tester.py b/tools/analytic_model_tester.py index cffbb75..01cbf8c 100644 --- a/tools/analytic_model_tester.py +++ b/tools/analytic_model_tester.py @@ -5,19 +5,22 @@ sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) import pandas as pd import numpy as np +import asyncio +from typing import List, Tuple + import utils -import test_dataset from analytic_types.segment import Segment -from detectors import pattern_detector, threshold_detector, anomaly_detector +from analytic_unit_manager import AnalyticUnitManager +START_TIMESTAMP = 1523889000000 # TODO: get_dataset # TODO: get_segment -PEAK_DATASETS = [] +PEAK_DATA_MODELS = [] # dataset with 3 peaks -TEST_DATA = test_dataset.create_dataframe([0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1]) +TEST_DATA = [0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1] # TODO: more convenient way to specify labeled segments -POSITIVE_SEGMENTS = [{'from': 1523889000001, 'to': 1523889000007}, {'from': 1523889000022, 'to': 1523889000028}] -NEGATIVE_SEGMENTS = [{'from': 1523889000011, 'to': 1523889000017}] +POSITIVE_SEGMENTS = [{ 'from': 1, 'to': 7 }, { 'from': 22, 'to': 28 }] +NEGATIVE_SEGMENTS = [{ 'from': 11, 'to': 17 }] class TesterSegment(): @@ -30,8 +33,8 @@ class TesterSegment(): return { '_id': 'q', 'analyticUnitId': 'q', - 'from': self.start, - 'to': self.end, + 'from': START_TIMESTAMP + self.start, + 'to': START_TIMESTAMP + self.end, 'labeled': self.labeled, 'deleted': not self.labeled } @@ -59,47 +62,68 @@ class Metric(): non_detected = len(self.expected_result) - correct_segment return (correct_segment, invalid_segment, non_detected) -class ModelData(): +class TestDataModel(): - def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str): - self.frame = frame + def __init__(self, data_values: List[float], positive_segments, negative_segments, model_type: str): + self.data_values = data_values self.positive_segments = positive_segments self.negative_segments = negative_segments self.model_type = model_type def get_segments_for_detection(self, positive_amount, negative_amount): - segments = [] - for idx, bounds in enumerate(self.positive_segments): - if idx >= positive_amount: - break - segments.append(TesterSegment(bounds['from'], bounds['to'], True).get_segment()) - - for idx, bounds in enumerate(self.negative_segments): - if idx >= negative_amount: - break - segments.append(TesterSegment(bounds['from'], bounds['to'], False).get_segment()) - - return segments - - def get_all_correct_segments(self): - return self.positive_segments + positive_segments = [segment for idx, segment in enumerate(self.get_positive_segments()) if idx < positive_amount] + negative_segments = [segment for idx, segment in enumerate(self.get_negative_segments()) if idx < negative_amount] + return positive_segments + negative_segments + + def get_formated_segments(self, segments, positive: bool): + return [TesterSegment(segment['from'], segment['to'], positive).get_segment() for segment in segments] + + def get_positive_segments(self): + return self.get_formated_segments(self.positive_segments, True) + + def get_negative_segments(self): + return self.get_formated_segments(self.negative_segments, False) + + def get_timestamp_values_list(self) -> List[Tuple[int, float]]: + data_timestamp_list = [START_TIMESTAMP + i for i in range(len(self.data_values))] + return list(zip(data_timestamp_list, self.data_values)) + + def get_task(self, task_type: str, cache = None) -> dict: + data = self.get_timestamp_values_list() + start_timestamp, end_timestamp = data[0][0], data[-1][0] + analytic_unit_type = self.model_type.upper() + task = { + 'analyticUnitId': 'testUnitId', + 'type': task_type, + 'payload': { + 'data': data, + 'from': start_timestamp, + 'to': end_timestamp, + 'analyticUnitType': analytic_unit_type, + 'detector': 'pattern', + 'cache': cache + }, + '_id': 'testId' + } + # TODO: enum for task_type + if(task_type == 'LEARN'): + segments = self.get_segments_for_detection(1, 0) + task['payload']['segments'] = segments + return task -PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') -PEAK_DATASETS.append(PEAK_DATA_1) +PEAK_DATA_MODEL = TestDataModel(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') +PEAK_DATA_MODELS.append(PEAK_DATA_MODEL) -def main(model_type: str) -> None: +async def main(model_type: str) -> None: table_metric = [] if model_type == 'peak': - for data in PEAK_DATASETS: - dataset = data.frame - segments = data.get_segments_for_detection(1, 0) - segments = [Segment.from_json(segment) for segment in segments] - detector = pattern_detector.PatternDetector('PEAK', 'test_id') - training_result = detector.train(dataset, segments, {}) - cache = training_result['cache'] - detect_result = detector.detect(dataset, cache) - detect_result = detect_result.to_json() - peak_metric = Metric(data.get_all_correct_segments(), detect_result) + for data_model in PEAK_DATA_MODELS: + manager = AnalyticUnitManager() + learning_task = data_model.get_task('LEARN') + learning_result = await manager.handle_analytic_task(learning_task) + detect_task = data_model.get_task('DETECT', learning_result['payload']['cache']) + detect_result = await manager.handle_analytic_task(detect_task) + peak_metric = Metric(data_model.get_positive_segments(), detect_result['payload']) table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy())) return table_metric @@ -114,9 +138,10 @@ if __name__ == '__main__': print('Enter one of models name: {}'.format(correct_name)) sys.exit(1) model_type = str(sys.argv[1]).lower() + loop = asyncio.get_event_loop() if model_type in correct_name: - print(main(model_type)) + result = loop.run_until_complete(main(model_type)) + print(result) else: print('Enter one of models name: {}'.format(correct_name)) - From 14403e353f35e8ca629d51a4b1ca2cb650db1db7 Mon Sep 17 00:00:00 2001 From: vargburz Date: Mon, 1 Jun 2020 15:31:41 +0300 Subject: [PATCH 2/3] map data models --- tools/analytic_model_tester.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tools/analytic_model_tester.py b/tools/analytic_model_tester.py index 01cbf8c..6b847a8 100644 --- a/tools/analytic_model_tester.py +++ b/tools/analytic_model_tester.py @@ -3,24 +3,30 @@ ANALYTICS_PATH = '../analytics' TESTS_PATH = '../tests' sys.path.extend([ANALYTICS_PATH, TESTS_PATH]) -import pandas as pd -import numpy as np import asyncio from typing import List, Tuple -import utils -from analytic_types.segment import Segment from analytic_unit_manager import AnalyticUnitManager START_TIMESTAMP = 1523889000000 # TODO: get_dataset # TODO: get_segment -PEAK_DATA_MODELS = [] + # dataset with 3 peaks TEST_DATA = [0, 0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1] # TODO: more convenient way to specify labeled segments POSITIVE_SEGMENTS = [{ 'from': 1, 'to': 7 }, { 'from': 22, 'to': 28 }] NEGATIVE_SEGMENTS = [{ 'from': 11, 'to': 17 }] +DATA_MODELS = [ + { + 'type': 'peak', + 'serie': TEST_DATA, + 'segments': { + 'positive': POSITIVE_SEGMENTS, + 'negative': NEGATIVE_SEGMENTS + } + } +] class TesterSegment(): @@ -64,18 +70,19 @@ class Metric(): class TestDataModel(): - def __init__(self, data_values: List[float], positive_segments, negative_segments, model_type: str): + def __init__(self, data_values: List[float], positive_segments: List[dict], negative_segments: List[dict], model_type: str): self.data_values = data_values self.positive_segments = positive_segments self.negative_segments = negative_segments self.model_type = model_type - def get_segments_for_detection(self, positive_amount, negative_amount): + def get_segments_for_detection(self, positive_amount: int, negative_amount: int): positive_segments = [segment for idx, segment in enumerate(self.get_positive_segments()) if idx < positive_amount] negative_segments = [segment for idx, segment in enumerate(self.get_negative_segments()) if idx < negative_amount] return positive_segments + negative_segments - def get_formated_segments(self, segments, positive: bool): + def get_formated_segments(self, segments: List[dict], positive: bool): + # TODO: add enum return [TesterSegment(segment['from'], segment['to'], positive).get_segment() for segment in segments] def get_positive_segments(self): @@ -111,8 +118,14 @@ class TestDataModel(): task['payload']['segments'] = segments return task -PEAK_DATA_MODEL = TestDataModel(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak') -PEAK_DATA_MODELS.append(PEAK_DATA_MODEL) +PEAK_DATA_MODELS = list(map( + lambda data_model: TestDataModel( + data_model['serie'], + data_model['segments']['positive'], + data_model['segments']['negative'], + data_model['type'] + ) +)) async def main(model_type: str) -> None: table_metric = [] From 56c4976db201abe4c5080a1debcb95256fe15444 Mon Sep 17 00:00:00 2001 From: vargburz Date: Mon, 1 Jun 2020 15:38:16 +0300 Subject: [PATCH 3/3] fix --- tools/analytic_model_tester.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/analytic_model_tester.py b/tools/analytic_model_tester.py index 6b847a8..76e49bc 100644 --- a/tools/analytic_model_tester.py +++ b/tools/analytic_model_tester.py @@ -124,7 +124,8 @@ PEAK_DATA_MODELS = list(map( data_model['segments']['positive'], data_model['segments']['negative'], data_model['type'] - ) + ), + DATA_MODELS )) async def main(model_type: str) -> None: