diff --git a/analytics/analytics/analytic_types/__init__.py b/analytics/analytics/analytic_types/__init__.py new file mode 100644 index 0000000..02159d8 --- /dev/null +++ b/analytics/analytics/analytic_types/__init__.py @@ -0,0 +1,14 @@ +""" +It is the place where we put all classes and types +which is common for all of the code. + +For example, if you write someting which is used +in analytic_unit_manager, it should be here. + +If you create something spicific which is used only in one place, +like PatternDetectionCache, then it should not be here. +""" + +from analytic_types.data_bucket import DataBucket + +AnalyticUnitId = str diff --git a/analytics/analytics/buckets/data_bucket.py b/analytics/analytics/analytic_types/data_bucket.py similarity index 99% rename from analytics/analytics/buckets/data_bucket.py rename to analytics/analytics/analytic_types/data_bucket.py index 6606984..0561872 100644 --- a/analytics/analytics/buckets/data_bucket.py +++ b/analytics/analytics/analytic_types/data_bucket.py @@ -1,5 +1,6 @@ import pandas as pd + class DataBucket(object): data: pd.DataFrame diff --git a/analytics/analytics/analytic_unit_manager.py b/analytics/analytics/analytic_unit_manager.py index 2061df9..a55d32c 100644 --- a/analytics/analytics/analytic_unit_manager.py +++ b/analytics/analytics/analytic_unit_manager.py @@ -4,14 +4,13 @@ import traceback from concurrent.futures import Executor, ThreadPoolExecutor from analytic_unit_worker import AnalyticUnitWorker +from analytic_types import AnalyticUnitId import detectors from models import ModelCache logger = log.getLogger('AnalyticUnitManager') -AnalyticUnitId = str - def get_detector_by_type( detector_type: str, analytic_unit_type: str, analytic_unit_id: AnalyticUnitId diff --git a/analytics/analytics/buckets/__init__.py b/analytics/analytics/buckets/__init__.py deleted file mode 100644 index d5481d4..0000000 --- a/analytics/analytics/buckets/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from buckets.data_bucket import DataBucket diff --git a/analytics/analytics/detectors/pattern_detector.py b/analytics/analytics/detectors/pattern_detector.py index 3223075..4c75c84 100644 --- a/analytics/analytics/detectors/pattern_detector.py +++ b/analytics/analytics/detectors/pattern_detector.py @@ -8,9 +8,10 @@ import pandas as pd from typing import Optional, Generator from detectors import Detector -from buckets import DataBucket +from analytic_types import DataBucket from models import ModelCache from utils import convert_pd_timestamp_to_ms +from analytic_types import AnalyticUnitId logger = logging.getLogger('PATTERN_DETECTOR') @@ -31,7 +32,7 @@ def resolve_model_by_pattern(pattern: str) -> models.Model: return models.CustomModel() raise ValueError('Unknown pattern "%s"' % pattern) -AnalyticUnitId = str + class PatternDetector(Detector): MIN_BUCKET_SIZE = 150 diff --git a/analytics/analytics/models/general_model.py b/analytics/analytics/models/general_model.py index b5b1555..5e12196 100644 --- a/analytics/analytics/models/general_model.py +++ b/analytics/analytics/models/general_model.py @@ -11,6 +11,8 @@ import math from scipy.stats import gaussian_kde from scipy.stats import norm import logging +from analytic_types import AnalyticUnitId + PEARSON_FACTOR = 0.7 @@ -39,8 +41,8 @@ class GeneralModel(Model): center_ind = start + math.ceil((end - start) / 2) return center_ind - def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict, AnalyticUnitId: str) -> None: - logging.debug('Start method do_fit for analytic unit: {}'.format(AnalyticUnitId)) + def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict, id: AnalyticUnitId) -> None: + logging.debug('Start method do_fit for analytic unit: {}'.format(id)) data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.get('pattern_center', []) @@ -61,10 +63,10 @@ class GeneralModel(Model): self.state['convolve_min'], self.state['convolve_max'] = utils.get_min_max(convolve_list, self.state['WINDOW_SIZE'] / 3) self.state['conv_del_min'], self.state['conv_del_max'] = utils.get_min_max(del_conv_list, self.state['WINDOW_SIZE']) - logging.debug('Method do_fit completed correctly for analytic unit: {}'.format(AnalyticUnitId)) + logging.debug('Method do_fit completed correctly for analytic unit: {}'.format(id)) - def do_detect(self, dataframe: pd.DataFrame, AnalyticUnitId: str) -> List[int]: - logging.debug('Start method do_detect for analytic unit: {}'.format(AnalyticUnitId)) + def do_detect(self, dataframe: pd.DataFrame, id: AnalyticUnitId) -> List[int]: + logging.debug('Start method do_detect for analytic unit: {}'.format(id)) data = utils.cut_dataframe(dataframe) data = data['value'] pat_data = self.state.get('pattern_model', []) @@ -76,7 +78,7 @@ class GeneralModel(Model): all_corr_peaks = utils.find_peaks(all_corr, window_size * 2) filtered = self.__filter_detection(all_corr_peaks, data) filtered = list(filtered) - logging.debug('Method do_detect completed correctly for analytic unit: {}'.format(AnalyticUnitId)) + logging.debug('Method do_detect completed correctly for analytic unit: {}'.format(id)) return [(item, item + window_size * 2) for item in filtered] def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: diff --git a/analytics/analytics/models/model.py b/analytics/analytics/models/model.py index adbbe82..f1c4bed 100644 --- a/analytics/analytics/models/model.py +++ b/analytics/analytics/models/model.py @@ -6,6 +6,7 @@ from typing import Optional import pandas as pd import math import logging +from analytic_types import AnalyticUnitId ModelCache = dict @@ -62,9 +63,8 @@ class Model(ABC): @abstractmethod def get_model_type(self) -> (str, bool): pass - - # TODO: id: str -> id: AnalyticUnitId in all models - def fit(self, dataframe: pd.DataFrame, segments: list, id: str, cache: Optional[ModelCache]) -> ModelCache: + + def fit(self, dataframe: pd.DataFrame, segments: list, id: AnalyticUnitId, cache: Optional[ModelCache]) -> ModelCache: logging.debug('Start method fit for analytic unit {}'.format(id)) data = dataframe['value'] if cache != None and len(cache) > 0: