Browse Source

Proccesing for NaN values #231 (#236)

pull/1/head
Alexandr Velikiy 6 years ago committed by rozetko
parent
commit
8614499c20
  1. 13
      analytics/analytics/models/drop_model.py
  2. 5
      analytics/analytics/models/general_model.py
  3. 12
      analytics/analytics/models/jump_model.py
  4. 12
      analytics/analytics/models/peak_model.py
  5. 15
      analytics/analytics/models/trough_model.py
  6. 14
      analytics/analytics/utils/common.py

13
analytics/analytics/models/drop_model.py

@ -39,8 +39,8 @@ class DropModel(Model):
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1] segment_data = data[segment_from_index: segment_to_index + 1]
percent_of_nans = segment_data.count(np.NaN) / len(segment_data)
if len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
@ -164,6 +164,14 @@ class DropModel(Model):
for segment in segments: for segment in segments:
if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']):
convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1]
percent_of_nans = convol_data.count(np.NaN) / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data) conv = scipy.signal.fftconvolve(convol_data, pattern_data)
upper_bound = self.state['convolve_max'] * 1.2 upper_bound = self.state['convolve_max'] * 1.2
lower_bound = self.state['convolve_min'] * 0.8 lower_bound = self.state['convolve_min'] * 0.8
@ -181,5 +189,4 @@ class DropModel(Model):
for item in delete_list: for item in delete_list:
segments.remove(item) segments.remove(item)
return set(segments) return set(segments)

5
analytics/analytics/models/general_model.py

@ -36,9 +36,9 @@ class GeneralModel(Model):
if segment['labeled']: if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1] segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: percent_of_nans = segment_data.count(np.NaN) / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
x = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) x = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2)
self.ipats.append(x) self.ipats.append(x)
@ -110,7 +110,6 @@ class GeneralModel(Model):
if len(segments) == 0 or len(self.ipats) == 0: if len(segments) == 0 or len(self.ipats) == 0:
return [] return []
delete_list = [] delete_list = []
for val in segments: for val in segments:
if self.all_conv[val] < self.state['convolve_min'] * 0.8: if self.all_conv[val] < self.state['convolve_min'] * 0.8:
delete_list.append(val) delete_list.append(val)

12
analytics/analytics/models/jump_model.py

@ -39,7 +39,8 @@ class JumpModel(Model):
for segment in segments: for segment in segments:
if segment['labeled']: if segment['labeled']:
segment_from_index, segment_to_index, segment_data = parse_segment(segment, dataframe) segment_from_index, segment_to_index, segment_data = parse_segment(segment, dataframe)
if len(segment_data) == 0: percent_of_nans = segment_data.count(np.NaN) / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
@ -169,6 +170,14 @@ class JumpModel(Model):
for segment in segments: for segment in segments:
if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']):
convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1]
percent_of_nans = convol_data.count(np.NaN) / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data) conv = scipy.signal.fftconvolve(convol_data, pattern_data)
try: try:
if max(conv) > upper_bound or max(conv) < lower_bound: if max(conv) > upper_bound or max(conv) < lower_bound:
@ -179,7 +188,6 @@ class JumpModel(Model):
delete_list.append(segment) delete_list.append(segment)
else: else:
delete_list.append(segment) delete_list.append(segment)
for item in delete_list: for item in delete_list:
segments.remove(item) segments.remove(item)

12
analytics/analytics/models/peak_model.py

@ -37,7 +37,8 @@ class PeakModel(Model):
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1] segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: percent_of_nans = segment_data.count(np.NaN) / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
@ -125,6 +126,14 @@ class PeakModel(Model):
if segment > self.state['WINDOW_SIZE']: if segment > self.state['WINDOW_SIZE']:
convol_data = data[segment - self.state['WINDOW_SIZE']: segment + self.state['WINDOW_SIZE'] + 1] convol_data = data[segment - self.state['WINDOW_SIZE']: segment + self.state['WINDOW_SIZE'] + 1]
convol_data = convol_data - min(convol_data) convol_data = convol_data - min(convol_data)
percent_of_nans = convol_data.count(np.NaN) / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data) conv = scipy.signal.fftconvolve(convol_data, pattern_data)
if max(conv) > self.state['convolve_max'] * 1.05 or max(conv) < self.state['convolve_min'] * 0.95: if max(conv) > self.state['convolve_max'] * 1.05 or max(conv) < self.state['convolve_min'] * 0.95:
delete_list.append(segment) delete_list.append(segment)
@ -134,5 +143,4 @@ class PeakModel(Model):
delete_list.append(segment) delete_list.append(segment)
for item in delete_list: for item in delete_list:
segments.remove(item) segments.remove(item)
return set(segments) return set(segments)

15
analytics/analytics/models/trough_model.py

@ -36,9 +36,9 @@ class TroughModel(Model):
if segment['labeled']: if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1] segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: percent_of_nans = segment_data.count(np.NaN) / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) segment_min = min(segment_data)
segment_max = max(segment_data) segment_max = max(segment_data)
@ -64,7 +64,8 @@ class TroughModel(Model):
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1] segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: percent_of_nans = segment_data.count(np.NaN) / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
del_min_index = segment_data.idxmin() del_min_index = segment_data.idxmin()
deleted_trough = data[del_min_index - self.state['WINDOW_SIZE']: del_min_index + self.state['WINDOW_SIZE'] + 1] deleted_trough = data[del_min_index - self.state['WINDOW_SIZE']: del_min_index + self.state['WINDOW_SIZE'] + 1]
@ -126,6 +127,14 @@ class TroughModel(Model):
if segment > self.state['WINDOW_SIZE']: if segment > self.state['WINDOW_SIZE']:
convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1]
convol_data = convol_data - min(convol_data) convol_data = convol_data - min(convol_data)
percent_of_nans = convol_data.count(np.NaN) / len(convol_data)
if percent_of_nans > 0.5:
delete_list.append(segment)
continue
elif 0 < percent_of_nans <= 0.5:
nan_list = utils.find_nan_indexes(convol_data)
convol_data = utils.nan_to_zero(convol_data, nan_list)
pattern_data = utils.nan_to_zero(pattern_data, nan_list)
conv = scipy.signal.fftconvolve(convol_data, pattern_data) conv = scipy.signal.fftconvolve(convol_data, pattern_data)
if max(conv) > self.state['convolve_max'] * 1.1 or max(conv) < self.state['convolve_min'] * 0.9: if max(conv) > self.state['convolve_max'] * 1.1 or max(conv) < self.state['convolve_min'] * 0.9:
delete_list.append(segment) delete_list.append(segment)

14
analytics/analytics/utils/common.py

@ -250,3 +250,17 @@ def best_pat(pat_list, data, dir):
ind = i ind = i
new_pat_list.append(ind) new_pat_list.append(ind)
return new_pat_list return new_pat_list
def find_nan_indexes(segment):
nan_list = np.isnan(segment)
nan_indexes = []
for i, val in enumerate(nan_list):
if val:
nan_indexes.append(i)
return nan_indexes
def nan_to_zero(segment, nan_list):
for val in nan_list:
segment[val] = 0
return segment

Loading…
Cancel
Save