Merge branch 'master' of github.com:hastic/hastic-server

6 years ago · 664ebe5f4d
2 changed files with 156 additions and 1 deletions
--- a/analytics/analytics/models/model.py
+++ b/analytics/analytics/models/model.py
@ -2,7 +2,7 @@ import utils

 from abc import ABC, abstractmethod
 from attrdict import AttrDict
-from typing import Optional
+from typing import Optional, List
 import pandas as pd
 import math
 import logging
@ -42,6 +42,43 @@ class Segment(AttrDict):
        nan_list = utils.find_nan_indexes(self.data)
        self.data = utils.nan_to_zero(self.data, nan_list)

+class ModelState():
+
+    def __init__(
+        self, 
+        pattern_center: List[int] = [],
+        pattern_model: List[float] = [],
+        convolve_max: float = 0,
+        convolve_min: float = 0,
+        window_size: int = 0,
+        conv_del_min: float = 0,
+        conv_del_max: float = 0
+    ):
+        self.pattern_center = pattern_center
+        self.pattern_model = pattern_model
+        self.convolve_max = convolve_max
+        self.convolve_min = convolve_min
+        self.window_size = window_size
+        self.conv_del_min = conv_del_min
+        self.conv_del_max = conv_del_max
+
+    def to_json(self) -> dict:
+        return {
+            'pattern_center': self.pattern_center,
+            'pattern_model': self.pattern_model,
+            'convolve_max': self.convolve_max,
+            'convolve_min': self.convolve_min,
+            'window_size': self.window_size,
+            'conv_del_min': self.conv_del_min,
+            'conv_del_max': self.conv_del_max,
+        }
+    
+    @staticmethod
+    def from_json(json: Optional[dict] = None):
+        if json is None:
+            json = {}
+        return ModelState(**json)
+
 class Model(ABC):

    HEIGHT_ERROR = 0.1
--- a/analytics/tools/analytic_model_tester.py
+++ b/analytics/tools/analytic_model_tester.py
@ -0,0 +1,118 @@
+import sys
+ANALYTICS_PATH = '../analytics'
+TESTS_PATH = '../tests'
+sys.path.extend([ANALYTICS_PATH, TESTS_PATH])
+
+import pandas as pd
+import numpy as np
+import utils
+import models
+import test_dataset
+
+# TODO: get_dataset
+# TODO: get_segment
+PEAK_DATASETS = []
+# dataset with 3 peaks
+TEST_DATA = test_dataset.create_dataframe([0, 3, 5, 7, 5, 3, 0, 0, 1, 0, 1, 4, 6, 8, 6, 4, 1, 0, 0, 0, 1, 0, 3, 5, 7, 5, 3, 0, 1, 1])
+POSITIVE_SEGMENTS = [(1523889000000, 1523889000006), (1523889000021, 1523889000027)]
+NEGATIVE_SEGMENTS = [(1523889000009, 1523889000017)]
+
+class Segment():
+
+    def __init__(self, start: int, end: int, labeled: bool):
+        self.start = start
+        self.end = end
+        self.labeled = labeled
+
+    def get_segment(self):
+        return {
+            '_id': 'q',
+            'analyticUnitId': 'q',
+            'from': self.start,
+            'to': self.end,
+            'labeled': self.labeled,
+            'deleted': not self.labeled
+        }
+
+class Metric():
+
+    def __init__(self, true_result, model_result):
+        self.true_result = true_result
+        self.model_result = model_result['segments']
+
+    def get_amount(self):
+        return len(self.model_result) / len(self.true_result)
+
+    def get_accuracy(self):
+        correct_segment = 0
+        invalid_segment = 0
+        for segment in self.model_result:
+            current_cs = correct_segment
+            for pattern in self.true_result:
+                if pattern[0] <= segment[0] and pattern[1] >= segment[1]:
+                    correct_segment += 1
+                    break
+            if correct_segment == current_cs:
+                invalid_segment += 1
+        non_detected = len(self.true_result) - correct_segment
+        return (correct_segment, invalid_segment, non_detected)
+
+class ModelData():
+
+    def __init__(self, frame: pd.DataFrame, positive_segments, negative_segments, model_type: str):
+        self.frame = frame
+        self.positive_segments = positive_segments
+        self.negative_segments = negative_segments
+        self.model_type = model_type
+
+    def get_segments_for_detection(self, positive_amount, negative_amount):
+        segments = []
+        for idx, bounds in enumerate(self.positive_segments):
+            if idx >= positive_amount:
+                break
+            segments.append(Segment(bounds[0], bounds[1], True).get_segment())
+
+        for idx, bounds in enumerate(self.negative_segments):
+            if idx >= negative_amount:
+                break
+            segments.append(Segment(bounds[0], bounds[1], False).get_segment())
+
+        return segments
+
+    def get_all_correct_segments(self):
+        return self.positive_segments
+
+PEAK_DATA_1 = ModelData(TEST_DATA, POSITIVE_SEGMENTS, NEGATIVE_SEGMENTS, 'peak')
+PEAK_DATASETS.append(PEAK_DATA_1)
+
+def main(model_type: str) -> None:
+    table_metric = []
+    if model_type == 'peak':
+        for data in PEAK_DATASETS:
+            dataset = data.frame
+            segments = data.get_segments_for_detection(1, 0)
+            model = models.PeakModel()
+            cache = model.fit(dataset, segments, 'test', {})
+            detect_result = model.detect(dataset, 'test', cache)
+            peak_metric = Metric(data.get_all_correct_segments(), detect_result)
+            table_metric.append((peak_metric.get_amount(), peak_metric.get_accuracy()))
+    return table_metric
+
+if __name__ == '__main__':
+    '''
+        This tool applies the model on datasets and verifies that the detection result corresponds to the correct values.
+        sys.argv[1] expects one of the models name -> see correct_name
+    '''
+    # TODO: use enum
+    correct_name = ['peak', 'trough', 'jump', 'drop', 'general']
+    if len(sys.argv) < 2:
+        print('Enter one of models name: {}'.format(correct_name))
+        sys.exit(1)
+    model_type = str(sys.argv[1]).lower()
+    if model_type in correct_name:
+        print(main(model_type))
+    else:
+        print('Enter one of models name: {}'.format(correct_name))
+
+
+