From 86c4433b65f8753e7f71f8380c6593aab4be6415 Mon Sep 17 00:00:00 2001 From: Evgeny Smyshlyaev Date: Fri, 1 Mar 2019 14:02:16 +0300 Subject: [PATCH] Analytic bucket size #446 (#451) --- analytics/analytics/buckets/data_bucket.py | 3 ++- analytics/analytics/detectors/pattern_detector.py | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/analytics/analytics/buckets/data_bucket.py b/analytics/analytics/buckets/data_bucket.py index ee791d1..6606984 100644 --- a/analytics/analytics/buckets/data_bucket.py +++ b/analytics/analytics/buckets/data_bucket.py @@ -11,4 +11,5 @@ class DataBucket(object): self.data = self.data.append(data, ignore_index=True) def drop_data(self, count: int): - self.data = self.data.iloc[count:] + if count > 0: + self.data = self.data.iloc[count:] diff --git a/analytics/analytics/detectors/pattern_detector.py b/analytics/analytics/detectors/pattern_detector.py index 84f5290..bdbc560 100644 --- a/analytics/analytics/detectors/pattern_detector.py +++ b/analytics/analytics/detectors/pattern_detector.py @@ -37,8 +37,7 @@ class PatternDetector(Detector): self.analytic_unit_id = analytic_unit_id self.pattern_type = pattern_type self.model = resolve_model_by_pattern(self.pattern_type) - self.max_window_size = 150 - self.window_size = 0 + self.min_bucket_size = 150 self.bucket = DataBucket() def train(self, dataframe: pd.DataFrame, segments: list, cache: Optional[models.ModelCache]) -> models.ModelCache: @@ -71,12 +70,13 @@ class PatternDetector(Detector): return None self.bucket.receive_data(data_without_nan) - if cache and self.window_size == 0: - self.window_size = cache['WINDOW_SIZE'] + if not cache: cache = {} + bucket_size = max(cache.get('WINDOW_SIZE', 0) * 3, self.min_bucket_size) res = self.detect(self.bucket.data, cache) - if len(self.bucket.data) >= self.window_size and cache != None: - excess_data = len(self.bucket.data) - self.max_window_size + + if len(self.bucket.data) > bucket_size: + excess_data = len(self.bucket.data) - bucket_size self.bucket.drop_data(excess_data) if res: