Browse Source

Find start and end of peaks and troughs #506 (#507)

pull/1/head
Alexandr Velikiy 6 years ago committed by rozetko
parent
commit
0f1d7774fe
  1. 4
      analytics/analytics/models/drop_model.py
  2. 2
      analytics/analytics/models/general_model.py
  3. 4
      analytics/analytics/models/jump_model.py
  4. 4
      analytics/analytics/models/model.py
  5. 5
      analytics/analytics/models/peak_model.py
  6. 5
      analytics/analytics/models/trough_model.py
  7. 64
      analytics/analytics/utils/common.py
  8. 3
      analytics/tests/test_dataset.py
  9. 36
      analytics/tests/test_utils.py

4
analytics/analytics/models/drop_model.py

@ -67,8 +67,8 @@ class DropModel(Model):
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1) possible_drops = utils.find_drop(data, self.state['DROP_HEIGHT'], self.state['DROP_LENGTH'] + 1)
result = self.__filter_detection(possible_drops, data)
return self.__filter_detection(possible_drops, data) return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments: list, data: list): def __filter_detection(self, segments: list, data: list):
delete_list = [] delete_list = []

2
analytics/analytics/models/general_model.py

@ -77,7 +77,7 @@ class GeneralModel(Model):
filtered = self.__filter_detection(all_corr_peaks, data) filtered = self.__filter_detection(all_corr_peaks, data)
filtered = list(filtered) filtered = list(filtered)
logging.debug('Method do_detect completed correctly for analytic unit: {}'.format(AnalyticUnitId)) logging.debug('Method do_detect completed correctly for analytic unit: {}'.format(AnalyticUnitId))
return set(item + window_size for item in filtered) return [(item, item + window_size * 2) for item in filtered]
def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]:
if not self.state.get('pattern_center'): if not self.state.get('pattern_center'):

4
analytics/analytics/models/jump_model.py

@ -68,8 +68,8 @@ class JumpModel(Model):
data = utils.cut_dataframe(dataframe) data = utils.cut_dataframe(dataframe)
data = data['value'] data = data['value']
possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1) possible_jumps = utils.find_jump(data, self.state['JUMP_HEIGHT'], self.state['JUMP_LENGTH'] + 1)
result = self.__filter_detection(possible_jumps, data)
return self.__filter_detection(possible_jumps, data) return [(val - 1, val + 1) for val in result]
def __filter_detection(self, segments, data): def __filter_detection(self, segments, data):
delete_list = [] delete_list = []

4
analytics/analytics/models/model.py

@ -104,8 +104,8 @@ class Model(ABC):
} }
result = self.do_detect(dataframe, id) result = self.do_detect(dataframe, id)
segments = [( segments = [(
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x - 1]), utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]),
utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x + 1]) utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]),
) for x in result] ) for x in result]
if not self.state: if not self.state:
logging.warning('Return empty self.state after detect') logging.warning('Return empty self.state after detect')

5
analytics/analytics/models/peak_model.py

@ -80,8 +80,9 @@ class PeakModel(Model):
for i in all_maxs: for i in all_maxs:
if data[i] > extrema_list[i]: if data[i] > extrema_list[i]:
segments.append(i) segments.append(i)
result = self.__filter_detection(segments, data)
return self.__filter_detection(segments, data) result = utils.get_borders_of_peaks(result, data, self.state.get('WINDOW_SIZE'), self.state.get('confidence'))
return result
def __filter_detection(self, segments: list, data: list) -> list: def __filter_detection(self, segments: list, data: list) -> list:
delete_list = [] delete_list = []

5
analytics/analytics/models/trough_model.py

@ -80,8 +80,9 @@ class TroughModel(Model):
for i in all_mins: for i in all_mins:
if data[i] < extrema_list[i]: if data[i] < extrema_list[i]:
segments.append(i) segments.append(i)
result = self.__filter_detection(segments, data)
return self.__filter_detection(segments, data) result = utils.get_borders_of_peaks(result, data, self.state.get('WINDOW_SIZE'), self.state.get('confidence'), inverse = True)
return result
def __filter_detection(self, segments: list, data: list) -> list: def __filter_detection(self, segments: list, data: list) -> list:
delete_list = [] delete_list = []

64
analytics/analytics/utils/common.py

@ -6,14 +6,14 @@ from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from scipy.stats.stats import pearsonr from scipy.stats.stats import pearsonr
import math import math
from typing import Union, List, Generator from typing import Union, List, Generator, Tuple
import utils import utils
import logging import logging
from itertools import islice from itertools import islice
from collections import deque from collections import deque
SHIFT_FACTOR = 0.05 SHIFT_FACTOR = 0.05
CONFIDENCE_FACTOR = 0.2 CONFIDENCE_FACTOR = 0.5
SMOOTHING_FACTOR = 5 SMOOTHING_FACTOR = 5
def exponential_smoothing(series, alpha): def exponential_smoothing(series, alpha):
@ -191,7 +191,12 @@ def find_extremum_index(segment: np.ndarray, selector: bool) -> int:
else: else:
return segment.argmin() return segment.argmin()
def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series: def get_interval(data: pd.Series, center: int, window_size: int, normalization = False) -> pd.Series:
"""
Get an interval with 2*window_size length
window_size to the left, window_size to the right of center
If normalization == True - subtract minimum from the interval
"""
if center >= len(data): if center >= len(data):
logging.warning('Pattern center {} is out of data with len {}'.format(center, len(data))) logging.warning('Pattern center {} is out of data with len {}'.format(center, len(data)))
return [] return []
@ -201,7 +206,58 @@ def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series:
left_bound = 0 left_bound = 0
if right_bound > len(data): if right_bound > len(data):
right_bound = len(data) right_bound = len(data)
return data[left_bound: right_bound] result_interval = data[left_bound: right_bound]
if normalization:
result_interval = subtract_min_without_nan(result_interval)
return result_interval
def get_borders_of_peaks(pattern_centers: List[int], data: pd.Series, window_size: int, confidence: float, max_border_factor = 1.0, inverse = False) -> List[Tuple[int, int]]:
"""
Find start and end of patterns for peak
max_border_factor - final border of pattern
if reverse == True - segments will be inversed (trough -> peak / peak -> trough)
"""
if len(pattern_centers) == 0:
return []
border_list = []
window_size = math.ceil(max_border_factor * window_size)
for center in pattern_centers:
current_pattern = get_interval(data, center, window_size, True)
if inverse:
current_pattern = inverse_segment(current_pattern)
current_pattern = current_pattern - confidence
left_segment = current_pattern[:window_size] # a.iloc[a.index < center]
right_segment = current_pattern[window_size:] # a.iloc[a.index >= center]
left_border = get_end_of_segment(left_segment, descending = False)
right_border = get_end_of_segment(right_segment)
border_list.append((left_border, right_border))
return border_list
def get_end_of_segment(segment: pd.Series, skip_positive_values = True, descending = True) -> int:
"""
Find end of descending or ascending part of pattern
Allowable error is 1 index
"""
if not descending:
segment = segment.iloc[::-1]
if len(segment) == 0:
return 1
for idx in range(1, len(segment) - 1):
if skip_positive_values and segment.values[idx] > 0:
continue
if segment.values[idx] >= segment.values[idx - 1]:
return segment.index[idx - 1]
return segment.index[-1]
def inverse_segment(segment: pd.Series) -> pd.Series:
"""
Сonvert trough to peak and virce versa
"""
if len(segment) > 0:
rev_val = max(segment.values)
for idx in range(len(segment)):
segment.values[idx] = math.fabs(segment.values[idx] - rev_val)
return segment
def subtract_min_without_nan(segment: pd.Series) -> pd.Series: def subtract_min_without_nan(segment: pd.Series) -> pd.Series:
if len(segment) == 0: if len(segment) == 0:

3
analytics/tests/test_dataset.py

@ -180,7 +180,8 @@ class TestDataset(unittest.TestCase):
for _ in range(2): for _ in range(2):
model.do_detect(dataframe,'test') model.do_detect(dataframe,'test')
max_pattern_index = max(model.do_detect(dataframe, 'test')) max_pattern_index = max(model.do_detect(dataframe, 'test'))
self.assertLessEqual(max_pattern_index, result) self.assertLessEqual(max_pattern_index[0], result)
def test_peak_model_for_cache(self): def test_peak_model_for_cache(self):
cache = { cache = {

36
analytics/tests/test_utils.py

@ -16,7 +16,7 @@ class TestUtils(unittest.TestCase):
def test_confidence_all_normal_value(self): def test_confidence_all_normal_value(self):
segment = [1, 2, 0, 6, 8, 5, 3] segment = [1, 2, 0, 6, 8, 5, 3]
utils_result = utils.find_confidence(segment)[0] utils_result = utils.find_confidence(segment)[0]
result = 1.6 result = 4.0
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE))
def test_confidence_all_nan_value(self): def test_confidence_all_nan_value(self):
@ -26,7 +26,7 @@ class TestUtils(unittest.TestCase):
def test_confidence_with_nan_value(self): def test_confidence_with_nan_value(self):
data = [np.NaN, np.NaN, 0, 8] data = [np.NaN, np.NaN, 0, 8]
utils_result = utils.find_confidence(data)[0] utils_result = utils.find_confidence(data)[0]
result = 1.6 result = 4.0
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE)) self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE))
def test_interval_all_normal_value(self): def test_interval_all_normal_value(self):
@ -248,6 +248,38 @@ class TestUtils(unittest.TestCase):
corr_data = list(corr_data) corr_data = list(corr_data)
self.assertGreaterEqual(len(corr_data), result) self.assertGreaterEqual(len(corr_data), result)
def test_inverse_segment(self):
data = pd.Series([1,2,3,4,3,2,1])
result = pd.Series([3,2,1,0,1,2,3])
utils_result = utils.inverse_segment(data)
for ind, val in enumerate(utils_result):
self.assertEqual(val, result[ind])
def test_get_end_of_segment_equal(self):
data = pd.Series([5,4,3,2,1,0,0,0])
result_list = [4, 5, 6]
self.assertIn(utils.get_end_of_segment(data, False), result_list)
def test_get_end_of_segment_greater(self):
data = pd.Series([5,4,3,2,1,0,1,2,3])
result_list = [4, 5, 6]
self.assertIn(utils.get_end_of_segment(data, False), result_list)
def test_get_borders_of_peaks(self):
data = pd.Series([1,0,1,2,3,2,1,0,0,1,2,3,4,3,2,2,1,0,1,2,3,4,5,3,2,1,0])
pattern_center = [4, 12, 22]
ws = 3
confidence = 1.5
result = [(1, 7), (9, 15), (19, 25)]
self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence), result)
def test_get_borders_of_peaks_for_trough(self):
data = pd.Series([4,4,5,5,3,1,3,5,5,6,3,2])
pattern_center = [5]
ws = 5
confidence = 3
result = [(3, 7)]
self.assertEqual(utils.get_borders_of_peaks(pattern_center, data, ws, confidence, inverse = True), result)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save