Browse Source

AnomalyDetector: multiple segments labeling #658 (#659)

pull/1/head
Alexandr Velikiy 6 years ago committed by rozetko
parent
commit
f5aa46fc5e
  1. 34
      analytics/analytics/detectors/anomaly_detector.py

34
analytics/analytics/detectors/anomaly_detector.py

@ -87,18 +87,16 @@ class AnomalyDetector(ProcessingDetector):
seasonality_index = seasonality // time_step seasonality_index = seasonality // time_step
#TODO: upper and lower bounds for segment_data #TODO: upper and lower bounds for segment_data
segment_data = utils.exponential_smoothing(pd.Series(segment['data']), BASIC_ALPHA) segment_data = utils.exponential_smoothing(pd.Series(segment['data']), BASIC_ALPHA)
upper_seasonality_curve = self.add_season_to_data( upper_bound = self.add_season_to_data(
smoothed_data, segment_data, seasonality_offset, seasonality_index, True upper_bound, segment_data, seasonality_offset, seasonality_index, True
) )
lower_seasonality_curve = self.add_season_to_data( lower_bound = self.add_season_to_data(
smoothed_data, segment_data, seasonality_offset, seasonality_index, False lower_bound, segment_data, seasonality_offset, seasonality_index, False
) )
assert len(smoothed_data) == len(upper_seasonality_curve), \ assert len(smoothed_data) == len(upper_bound) == len(lower_bound), \
f'len smoothed {len(smoothed_data)} != len seasonality {len(upper_seasonality_curve)}' f'len smoothed {len(smoothed_data)} != len seasonality {len(upper_bound)}'
# TODO: use class for cache to avoid using string literals # TODO: use class for cache to avoid using string literals
upper_bound = upper_seasonality_curve + cache['confidence']
lower_bound = lower_seasonality_curve - cache['confidence']
anomaly_indexes = [] anomaly_indexes = []
for idx, val in enumerate(data.values): for idx, val in enumerate(data.values):
@ -172,10 +170,11 @@ class AnomalyDetector(ProcessingDetector):
# TODO: exponential_smoothing should return dataframe with related timestamps # TODO: exponential_smoothing should return dataframe with related timestamps
smoothed = utils.exponential_smoothing(dataframe['value'], cache['alpha'], cache.get('lastValue')) smoothed = utils.exponential_smoothing(dataframe['value'], cache['alpha'], cache.get('lastValue'))
upper_bound = smoothed + cache['confidence']
lower_bound = smoothed - cache['confidence']
# TODO: remove duplication with detect() # TODO: remove duplication with detect()
upper_bound = dataframe['value'] + cache['confidence']
lower_bound = dataframe['value'] - cache['confidence']
if segments is not None: if segments is not None:
seasonality = cache.get('seasonality') seasonality = cache.get('seasonality')
assert seasonality is not None and seasonality > 0, \ assert seasonality is not None and seasonality > 0, \
@ -188,19 +187,16 @@ class AnomalyDetector(ProcessingDetector):
seasonality_offset = (abs(segment['from'] - data_start_time) % seasonality) // time_step seasonality_offset = (abs(segment['from'] - data_start_time) % seasonality) // time_step
seasonality_index = seasonality // time_step seasonality_index = seasonality // time_step
segment_data = utils.exponential_smoothing(pd.Series(segment['data']), BASIC_ALPHA) segment_data = utils.exponential_smoothing(pd.Series(segment['data']), BASIC_ALPHA)
upper_seasonality_curve = self.add_season_to_data( upper_bound = self.add_season_to_data(
smoothed, segment_data, seasonality_offset, seasonality_index, True upper_bound, segment_data, seasonality_offset, seasonality_index, True
) )
lower_seasonality_curve = self.add_season_to_data( lower_bound = self.add_season_to_data(
smoothed, segment_data, seasonality_offset, seasonality_index, False lower_bound, segment_data, seasonality_offset, seasonality_index, False
) )
assert len(smoothed) == len(upper_seasonality_curve), \ assert len(smoothed) == len(upper_bound) == len(lower_bound), \
f'len smoothed {len(smoothed)} != len seasonality {len(upper_seasonality_curve)}' f'len smoothed {len(smoothed)} != len seasonality {len(upper_bound)}'
smoothed = upper_seasonality_curve
# TODO: support multiple segments # TODO: support multiple segments
upper_bound = upper_seasonality_curve + cache['confidence']
lower_bound = lower_seasonality_curve - cache['confidence']
timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp) timestamps = utils.convert_series_to_timestamp_list(dataframe.timestamp)
upper_bound_timeseries = list(zip(timestamps, upper_bound.values.tolist())) upper_bound_timeseries = list(zip(timestamps, upper_bound.values.tolist()))

Loading…
Cancel
Save