Browse Source

Increase the number of parameters obtained during learning #364 (#383)

Increase the number of parameters obtained during learning #364
pull/1/head
Alexandr Velikiy 5 years ago committed by Evgeny Smyshlyaev
parent
commit
6e7073b212
  1. 20
      analytics/analytics/models/drop_model.py
  2. 16
      analytics/analytics/models/general_model.py
  3. 19
      analytics/analytics/models/jump_model.py
  4. 16
      analytics/analytics/models/model.py
  5. 25
      analytics/analytics/models/peak_model.py
  6. 26
      analytics/analytics/models/trough_model.py
  7. 53
      analytics/analytics/utils/common.py
  8. 29
      analytics/tests/test_utils.py

20
analytics/analytics/models/drop_model.py

@ -26,33 +26,45 @@ class DropModel(Model):
'conv_del_min': 54000,
'conv_del_max': 55000,
}
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
segment_center_index = utils.find_pattern_center(segment, start, 'drop')
return segment_center_index
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
confidences = []
convolve_list = []
correlation_list = []
drop_height_list = []
drop_length_list = []
patterns_list = []
pattern_timestamp = []
for segment in labeled_segments:
confidence = utils.find_confidence(segment.data)
confidence = utils.find_confidence(segment.data)[0]
confidences.append(confidence)
segment_cent_index, drop_height, drop_length = utils.find_parameters(segment.data, segment.start, 'drop')
segment_cent_index = segment.center_index
drop_height, drop_length = utils.find_parameters(segment.data, segment.start, 'drop')
drop_height_list.append(drop_height)
drop_length_list.append(drop_length)
self.idrops.append(segment_cent_index)
pattern_timestamp.append(segment.pattern_timestamp)
labeled_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
labeled_drop = utils.subtract_min_without_nan(labeled_drop)
patterns_list.append(labeled_drop)
self.model_drop = utils.get_av_model(patterns_list)
convolve_list = utils.get_convolve(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE'])
correlation_list = utils.get_correlation(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE'])
del_conv_list = []
delete_pattern_timestamp = []
for segment in deleted_segments:
segment_cent_index = utils.find_parameters(segment.data, segment.start, 'drop')[0]
segment_cent_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
deleted_drop = utils.subtract_min_without_nan(deleted_drop)
del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop)

16
analytics/analytics/models/general_model.py

@ -26,25 +26,37 @@ class GeneralModel(Model):
'conv_del_max': 120,
}
self.all_conv = []
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
center_ind = start + math.ceil((end - start) / 2)
return center_ind
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
convolve_list = []
correlation_list = []
patterns_list = []
pattern_timestamp = []
for segment in labeled_segments:
center_ind = segment.start + math.ceil(segment.length / 2)
center_ind = segment.center_index
self.ipats.append(center_ind)
pattern_timestamp.append(segment.pattern_timestamp)
segment_data = utils.get_interval(data, center_ind, self.state['WINDOW_SIZE'])
segment_data = utils.subtract_min_without_nan(segment_data)
patterns_list.append(segment_data)
self.model_gen = utils.get_av_model(patterns_list)
convolve_list = utils.get_convolve(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE'])
correlation_list = utils.get_correlation(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE'])
del_conv_list = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_mid_index = segment.start + math.ceil(segment.length / 2)
del_mid_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE'])
deleted_pat = utils.subtract_min_without_nan(deleted_pat)
del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen)

19
analytics/analytics/models/jump_model.py

@ -27,32 +27,45 @@ class JumpModel(Model):
'conv_del_min': 54000,
'conv_del_max': 55000,
}
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
segment_center_index = utils.find_pattern_center(segment, start, 'jump')
return segment_center_index
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
confidences = []
convolve_list = []
correlation_list = []
jump_height_list = []
jump_length_list = []
patterns_list = []
pattern_timestamp = []
for segment in labeled_segments:
confidence = utils.find_confidence(segment.data)
confidence = utils.find_confidence(segment.data)[0]
confidences.append(confidence)
segment_cent_index, jump_height, jump_length = utils.find_parameters(segment.data, segment.start, 'jump')
segment_cent_index = segment.center_index
jump_height, jump_length = utils.find_parameters(segment.data, segment.start, 'jump')
jump_height_list.append(jump_height)
jump_length_list.append(jump_length)
self.ijumps.append(segment_cent_index)
pattern_timestamp.append(segment.pattern_timestamp)
labeled_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
labeled_jump = utils.subtract_min_without_nan(labeled_jump)
patterns_list.append(labeled_jump)
self.model_jump = utils.get_av_model(patterns_list)
convolve_list = utils.get_convolve(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE'])
correlation_list = utils.get_correlation(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE'])
del_conv_list = []
delete_pattern_timestamp = []
for segment in deleted_segments:
segment_cent_index = utils.find_parameters(segment.data, segment.start, 'jump')[0]
segment_cent_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
deleted_jump = utils.subtract_min_without_nan(deleted_jump)
del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump)

16
analytics/analytics/models/model.py

@ -12,12 +12,19 @@ class Segment(AttrDict):
__percent_of_nans = 0
def __init__(self, dataframe: pd.DataFrame, segment_map: dict):
def __init__(self, dataframe: pd.DataFrame, segment_map: dict, center_finder = None):
self.update(segment_map)
self.start = utils.timestamp_to_index(dataframe, pd.to_datetime(self['from'], unit='ms'))
self.end = utils.timestamp_to_index(dataframe, pd.to_datetime(self['to'], unit='ms'))
self.length = abs(self.end - self.start)
if callable(center_finder):
self.center_index = center_finder(dataframe, self.start, self.end)
self.pattern_timestamp = dataframe['timestamp'][self.center_index]
else:
self.center_index = self.start + math.ceil(self.length / 2)
self.pattern_timestamp = dataframe['timestamp'][self.center_index]
assert len(dataframe['value']) >= self.end + 1, \
'segment {}-{} out of dataframe length={}'.format(self.start, self.end+1, len(dataframe['value']))
@ -43,6 +50,10 @@ class Model(ABC):
def do_detect(self, dataframe: pd.DataFrame) -> list:
pass
@abstractmethod
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
pass
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[ModelCache]) -> ModelCache:
if type(cache) is ModelCache:
self.state = cache
@ -52,12 +63,11 @@ class Model(ABC):
deleted = []
for segment_map in segments:
if segment_map['labeled'] or segment_map['deleted']:
segment = Segment(dataframe, segment_map)
segment = Segment(dataframe, segment_map, self.find_segment_center)
if segment.percent_of_nans > 0.1 or len(segment.data) == 0:
continue
if segment.percent_of_nans > 0:
segment.convert_nan_to_zero()
max_length = max(segment.length, max_length)
if segment.labeled: labeled.append(segment)
if segment.deleted: deleted.append(segment)

25
analytics/analytics/models/peak_model.py

@ -26,32 +26,51 @@ class PeakModel(Model):
'conv_del_min': 54000,
'conv_del_max': 55000,
}
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
return segment.idxmax()
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
confidences = []
convolve_list = []
correlation_list = []
patterns_list = []
pattern_width = []
pattern_height = []
pattern_timestamp = []
for segment in labeled_segments:
confidence = utils.find_confidence(segment.data)
confidence = utils.find_confidence(segment.data)[0]
confidences.append(confidence)
segment_max_index = segment.data.idxmax()
segment_max_index = segment.center_index
self.ipeaks.append(segment_max_index)
pattern_timestamp.append(segment.pattern_timestamp)
labeled = utils.get_interval(data, segment_max_index, self.state['WINDOW_SIZE'])
labeled = utils.subtract_min_without_nan(labeled)
patterns_list.append(labeled)
pattern_height.append(utils.find_confidence(labeled)[1])
pattern_width.append(utils.find_width(labeled, True))
self.model = utils.get_av_model(patterns_list)
convolve_list = utils.get_convolve(self.ipeaks, self.model, data, self.state['WINDOW_SIZE'])
correlation_list = utils.get_correlation(self.ipeaks, self.model, data, self.state['WINDOW_SIZE'])
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_max_index = segment.data.idxmax()
del_max_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE'])
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.model)
if len(del_conv): del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
delete_pattern_width.append(utils.find_width(deleted, True))
self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list)

26
analytics/analytics/models/trough_model.py

@ -26,33 +26,51 @@ class TroughModel(Model):
'conv_del_min': 54000,
'conv_del_max': 55000,
}
def find_segment_center(self, dataframe: pd.DataFrame, start: int, end: int) -> int:
data = dataframe['value']
segment = data[start: end]
return segment.idxmin()
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list) -> None:
data = utils.cut_dataframe(dataframe)
data = data['value']
confidences = []
convolve_list = []
correlation_list = []
patterns_list = []
pattern_width = []
pattern_height = []
pattern_timestamp = []
for segment in labeled_segments:
confidence = utils.find_confidence(segment.data)
confidence = utils.find_confidence(segment.data)[0]
confidences.append(confidence)
segment_min_index = segment.data.idxmin()
segment_min_index = segment.center_index
self.itroughs.append(segment_min_index)
pattern_timestamp.append(segment.pattern_timestamp)
labeled = utils.get_interval(data, segment_min_index, self.state['WINDOW_SIZE'])
labeled = utils.subtract_min_without_nan(labeled)
patterns_list.append(labeled)
pattern_height.append(utils.find_confidence(labeled)[1])
pattern_width.append(utils.find_width(labeled, False))
self.model = utils.get_av_model(patterns_list)
convolve_list = utils.get_convolve(self.itroughs, self.model, data, self.state['WINDOW_SIZE'])
correlation_list = utils.get_correlation(self.itroughs, self.model, data, self.state['WINDOW_SIZE'])
del_conv_list = []
delete_pattern_width = []
delete_pattern_height = []
delete_pattern_timestamp = []
for segment in deleted_segments:
del_min_index = segment.data.idxmin()
del_min_index = segment.center_index
delete_pattern_timestamp.append(segment.pattern_timestamp)
deleted = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE'])
deleted = utils.subtract_min_without_nan(deleted)
del_conv = scipy.signal.fftconvolve(deleted, self.model)
if len(del_conv): del_conv_list.append(max(del_conv))
delete_pattern_height.append(utils.find_confidence(deleted)[1])
delete_pattern_width.append(utils.find_width(deleted, False))
self._update_fiting_result(self.state, confidences, convolve_list, del_conv_list)

53
analytics/analytics/utils/common.py

@ -4,6 +4,7 @@ import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde
from scipy.stats.stats import pearsonr
from typing import Union
import utils
@ -154,11 +155,38 @@ def nan_to_zero(segment: Union[pd.Series, list], nan_list: list) -> Union[pd.Ser
segment[val] = 0
return segment
def find_confidence(segment: pd.Series) -> float:
def find_confidence(segment: pd.Series) -> (float, float):
segment = utils.check_nan_values(segment)
segment_min = min(segment)
segment_max = max(segment)
return CONFIDENCE_FACTOR * (segment_max - segment_min)
height = segment_max - segment_min
if height:
return (CONFIDENCE_FACTOR * height, height)
else:
return (0, 0)
def find_width(pattern: pd.Series, selector) -> int:
pattern = pattern.values
center = utils.find_extremum_index(pattern, selector)
pattern_left = pattern[:center]
pattern_right = pattern[center:]
left_extremum_index = utils.find_last_extremum(pattern_left, selector)
right_extremum_index = utils.find_extremum_index(pattern_right, not selector)
left_width = center - left_extremum_index
right_width = right_extremum_index + 1
return right_width + left_width
def find_last_extremum(segment: np.ndarray, selector: bool) -> int:
segment = segment[::-1]
first_extremum_ind = find_extremum_index(segment, not selector)
last_extremum_ind = len(segment) - first_extremum_ind - 1
return last_extremum_ind
def find_extremum_index(segment: np.ndarray, selector: bool) -> int:
if selector:
return segment.argmax()
else:
return segment.argmin()
def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series:
left_bound = center - window_size
@ -192,6 +220,19 @@ def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: i
convolve_list.append(max(convolve_segment))
return convolve_list
def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list:
labeled_segment = []
correlation_list = []
p_value_list = []
for segment in segments:
labeled_segment = utils.get_interval(data, segment, window_size)
labeled_segment = utils.subtract_min_without_nan(labeled_segment)
labeled_segment = utils.check_nan_values(labeled_segment)
correlation = pearsonr(labeled_segment, av_model)
correlation_list.append(correlation[0])
p_value_list.append(correlation[1])
return correlation_list
def get_distribution_density(segment: pd.Series) -> float:
if len(segment) < 2:
return (0, 0, 0)
@ -224,10 +265,14 @@ def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type:
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(segment)
height = 0.95 * (segment_max_line - segment_min_line)
length = utils.find_length(segment_data, segment_min_line, segment_max_line, pat_type)
cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pat_type)
return height, length
def find_pattern_center(segment_data: pd.Series, segment_from_index: int, pattern_type: str):
segment_median = utils.get_distribution_density(segment_data)[0]
cen_ind = utils.pattern_intersection(segment_data.tolist(), segment_median, pattern_type)
pat_center = cen_ind[0]
segment_cent_index = pat_center + segment_from_index
return segment_cent_index, height, length
return segment_cent_index
def find_length(segment_data: pd.Series, segment_min_line: float, segment_max_line: float, pat_type: str) -> int:
x_abscissa = np.arange(0, len(segment_data))

29
analytics/tests/test_utils.py

@ -14,17 +14,17 @@ class TestUtils(unittest.TestCase):
def test_confidence_all_normal_value(self):
segment = [1, 2, 0, 6, 8, 5, 3]
utils_result = utils.find_confidence(segment)
utils_result = utils.find_confidence(segment)[0]
result = 1.6
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE))
def test_confidence_all_nan_value(self):
segment = [np.NaN, np.NaN, np.NaN, np.NaN]
self.assertEqual(utils.find_confidence(segment), 0)
self.assertEqual(utils.find_confidence(segment)[0], 0)
def test_confidence_with_nan_value(self):
data = [np.NaN, np.NaN, 0, 8]
utils_result = utils.find_confidence(data)
utils_result = utils.find_confidence(data)[0]
result = 1.6
self.assertTrue(math.isclose(utils_result, result, rel_tol = RELATIVE_TOLERANCE))
@ -91,39 +91,39 @@ class TestUtils(unittest.TestCase):
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
segment = pd.Series(segment)
jump_center = [10, 11]
self.assertIn(utils.find_parameters(segment, 0, 'jump')[0], jump_center)
self.assertIn(utils.find_pattern_center(segment, 0, 'jump'), jump_center)
def test_find_jump_parameters_height(self):
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
segment = pd.Series(segment)
jump_height = [3.5, 4]
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_height[0])
self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_height[1])
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[0])
self.assertLessEqual(utils.find_parameters(segment, 0, 'jump')[0], jump_height[1])
def test_find_jump_parameters_length(self):
segment = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
segment = pd.Series(segment)
jump_length = 2
self.assertEqual(utils.find_parameters(segment, 0, 'jump')[2], jump_length)
self.assertEqual(utils.find_parameters(segment, 0, 'jump')[1], jump_length)
def test_find_drop_parameters_center(self):
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
segment = pd.Series(segment)
drop_center = [14, 15, 16]
self.assertIn(utils.find_parameters(segment, 0, 'drop')[0], drop_center)
self.assertIn(utils.find_pattern_center(segment, 0, 'drop'), drop_center)
def test_find_drop_parameters_height(self):
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
segment = pd.Series(segment)
drop_height = [3.5, 4]
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_height[0])
self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_height[1])
self.assertGreaterEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[0])
self.assertLessEqual(utils.find_parameters(segment, 0, 'drop')[0], drop_height[1])
def test_find_drop_parameters_length(self):
segment = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
segment = pd.Series(segment)
drop_length = 2
self.assertEqual(utils.find_parameters(segment, 0, 'drop')[2], drop_length)
self.assertEqual(utils.find_parameters(segment, 0, 'drop')[1], drop_length)
def test_get_av_model_empty_data(self):
patterns_list = []
@ -189,6 +189,13 @@ class TestUtils(unittest.TestCase):
utils_result_segment = utils.get_distribution_density(segment)
self.assertEqual(len(utils_result_data), 3)
self.assertEqual(utils_result_segment, (0, 0, 0))
def test_find_pattern_jump_center(self):
data = [1.0, 1.0, 1.0, 5.0, 5.0, 5.0]
data = pd.Series(data)
median = 3.0
result = 3
self.assertEqual(result, utils.find_pattern_center(data, 0, 'jump'))
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save