Browse Source

Move repeating code from models to utils #270 (#272)

peak / trough / jump / drop / general common code to and utils refactoring
pull/1/head
Alexandr Velikiy 6 years ago committed by Alexey Velikiy
parent
commit
d389cf3adb
  1. 70
      analytics/analytics/models/drop_model.py
  2. 32
      analytics/analytics/models/general_model.py
  3. 67
      analytics/analytics/models/jump_model.py
  4. 33
      analytics/analytics/models/peak_model.py
  5. 33
      analytics/analytics/models/trough_model.py
  6. 79
      analytics/analytics/utils/common.py
  7. 2
      analytics/analytics/utils/segments.py

70
analytics/analytics/models/drop_model.py

@ -36,78 +36,32 @@ class DropModel(Model):
patterns_list = [] patterns_list = []
for segment in segments: for segment in segments:
if segment['labeled']: if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
percent_of_nans = segment_data.isnull().sum() / len(segment_data) percent_of_nans = segment_data.isnull().sum() / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) confidence = utils.find_confidence(segment_data)
segment_max = max(segment_data) confidences.append(confidence)
confidences.append(0.20 * (segment_max - segment_min)) segment_cent_index, drop_height, drop_length = utils.find_drop_parameters(segment_data, segment_from_index)
flat_segment = segment_data.rolling(window = 5).mean()
pdf = gaussian_kde(flat_segment.dropna())
max_drop = max(flat_segment.dropna())
min_drop = min(flat_segment.dropna())
x = np.linspace(flat_segment.dropna().min() - 1, flat_segment.dropna().max() + 1, len(flat_segment.dropna()))
y = pdf(x)
ax_list = list(zip(x, y))
ax_list = np.array(ax_list, np.float32)
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0]
peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0]
try:
min_peak_index = peaks_kde[0]
segment_min_line = ax_list[min_peak_index, 0]
max_peak_index = peaks_kde[1]
segment_max_line = ax_list[max_peak_index, 0]
segment_median = ax_list[antipeaks_kde[0], 0]
except IndexError:
segment_max_line = max_drop
segment_min_line = min_drop
segment_median = (max_drop - min_drop) / 2 + min_drop
drop_height = 0.95 * (segment_max_line - segment_min_line)
drop_height_list.append(drop_height) drop_height_list.append(drop_height)
drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line)
drop_length_list.append(drop_length) drop_length_list.append(drop_length)
cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median) #finds all interseprions with median
drop_center = cen_ind[0]
segment_cent_index = drop_center - 5 + segment_from_index
self.idrops.append(segment_cent_index) self.idrops.append(segment_cent_index)
labeled_drop = data[segment_cent_index - self.state['WINDOW_SIZE']: segment_cent_index + self.state['WINDOW_SIZE'] + 1] labeled_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
labeled_drop = labeled_drop - min(labeled_drop) labeled_drop = utils.subtract_min_without_nan(labeled_drop)
patterns_list.append(labeled_drop) patterns_list.append(labeled_drop)
self.model_drop = utils.get_av_model(patterns_list) self.model_drop = utils.get_av_model(patterns_list)
for idrop in self.idrops: convolve_list = utils.get_convolve(self.idrops, self.model_drop, data, self.state['WINDOW_SIZE'])
labeled_drop = data[idrop - self.state['WINDOW_SIZE']: idrop + self.state['WINDOW_SIZE'] + 1]
labeled_drop = labeled_drop - min(labeled_drop)
auto_convolve = scipy.signal.fftconvolve(labeled_drop, labeled_drop)
convolve_drop = scipy.signal.fftconvolve(labeled_drop, self.model_drop)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_drop))
del_conv_list = [] del_conv_list = []
for segment in segments: for segment in segments:
if segment['deleted']: if segment['deleted']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: if len(segment_data) == 0:
continue continue
flat_segment = segment_data.rolling(window = 5).mean() segment_cent_index = utils.find_drop_parameters(segment_data, segment_from_index)[0]
flat_segment_dropna = flat_segment.dropna() deleted_drop = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
pdf = gaussian_kde(flat_segment_dropna) deleted_drop = utils.subtract_min_without_nan(deleted_drop)
x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna))
y = pdf(x)
ax_list = list(zip(x, y))
ax_list = np.array(ax_list, np.float32)
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0]
segment_median = ax_list[antipeaks_kde[0], 0]
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median
drop_center = cen_ind[0] # or -1? test
segment_cent_index = drop_center - 5 + segment_from_index
deleted_drop = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE'] + 1]
deleted_drop = deleted_drop - min(labeled_drop)
del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop) del_conv_drop = scipy.signal.fftconvolve(deleted_drop, self.model_drop)
del_conv_list.append(max(del_conv_drop)) del_conv_list.append(max(del_conv_drop))
@ -163,7 +117,7 @@ class DropModel(Model):
pattern_data = self.model_drop pattern_data = self.model_drop
for segment in segments: for segment in segments:
if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']):
convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE'])
percent_of_nans = convol_data.isnull().sum() / len(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5: if percent_of_nans > 0.5:
delete_list.append(segment) delete_list.append(segment)

32
analytics/analytics/models/general_model.py

@ -34,39 +34,28 @@ class GeneralModel(Model):
patterns_list = [] patterns_list = []
for segment in segments: for segment in segments:
if segment['labeled']: if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
percent_of_nans = segment_data.isnull().sum() / len(segment_data) percent_of_nans = segment_data.isnull().sum() / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
x = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) center_ind = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2)
self.ipats.append(x) self.ipats.append(center_ind)
segment_data = data[x - self.state['WINDOW_SIZE'] : x + self.state['WINDOW_SIZE']] segment_data = utils.get_interval(data, center_ind, self.state['WINDOW_SIZE'])
segment_min = min(segment_data) segment_data = utils.subtract_min_without_nan(segment_data)
segment_data = segment_data - segment_min
patterns_list.append(segment_data) patterns_list.append(segment_data)
self.model_gen = utils.get_av_model(patterns_list) self.model_gen = utils.get_av_model(patterns_list)
for ipat in self.ipats: #labeled segments convolve_list = utils.get_convolve(self.ipats, self.model_gen, data, self.state['WINDOW_SIZE'])
labeled_data = data[ipat - self.state['WINDOW_SIZE']: ipat + self.state['WINDOW_SIZE'] + 1]
labeled_data = labeled_data - min(labeled_data)
auto_convolve = scipy.signal.fftconvolve(labeled_data, labeled_data)
convolve_data = scipy.signal.fftconvolve(labeled_data, self.model_gen)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_data))
del_conv_list = [] del_conv_list = []
for segment in segments: for segment in segments:
if segment['deleted']: if segment['deleted']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: if len(segment_data) == 0:
continue continue
del_mid_index = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2) del_mid_index = segment_from_index + math.ceil((segment_to_index - segment_from_index) / 2)
deleted_pat = data[del_mid_index - self.state['WINDOW_SIZE']: del_mid_index + self.state['WINDOW_SIZE'] + 1] deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE'])
deleted_pat = deleted_pat - min(deleted_pat) deleted_pat = utils.subtract_min_without_nan(segment_data)
del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.model_gen)
del_conv_list.append(max(del_conv_pat)) del_conv_list.append(max(del_conv_pat))
@ -97,8 +86,7 @@ class GeneralModel(Model):
for i in range(self.state['WINDOW_SIZE'] * 2, len(data)): for i in range(self.state['WINDOW_SIZE'] * 2, len(data)):
watch_data = data[i - self.state['WINDOW_SIZE'] * 2: i] watch_data = data[i - self.state['WINDOW_SIZE'] * 2: i]
w = min(watch_data) watch_data = utils.subtract_min_without_nan(watch_data)
watch_data = watch_data - w
conv = scipy.signal.fftconvolve(watch_data, pat_data) conv = scipy.signal.fftconvolve(watch_data, pat_data)
self.all_conv.append(max(conv)) self.all_conv.append(max(conv))
all_conv_peaks = utils.peak_finder(self.all_conv, self.state['WINDOW_SIZE'] * 2) all_conv_peaks = utils.peak_finder(self.all_conv, self.state['WINDOW_SIZE'] * 2)

67
analytics/analytics/models/jump_model.py

@ -42,73 +42,28 @@ class JumpModel(Model):
percent_of_nans = segment_data.isnull().sum() / len(segment_data) percent_of_nans = segment_data.isnull().sum() / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) confidence = utils.find_confidence(segment_data)
segment_max = max(segment_data) confidences.append(confidence)
confidences.append(0.20 * (segment_max - segment_min)) segment_cent_index, jump_height, jump_length = utils.find_jump_parameters(segment_data, segment_from_index)
flat_segment = segment_data.rolling(window = 5).mean()
flat_segment_dropna = flat_segment.dropna()
min_jump = min(flat_segment_dropna)
max_jump = max(flat_segment_dropna)
pdf = gaussian_kde(flat_segment_dropna)
x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna))
y = pdf(x)
ax_list = list(zip(x, y))
ax_list = np.array(ax_list, np.float32)
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0]
peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0]
try:
min_peak_index = peaks_kde[0]
segment_min_line = ax_list[min_peak_index, 0]
max_peak_index = peaks_kde[1]
segment_max_line = ax_list[max_peak_index, 0]
segment_median = ax_list[antipeaks_kde[0], 0]
except IndexError:
segment_max_line = max_jump
segment_min_line = min_jump
segment_median = (max_jump - min_jump) / 2 + min_jump
jump_height = 0.95 * (segment_max_line - segment_min_line)
jump_height_list.append(jump_height) jump_height_list.append(jump_height)
jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line)
jump_length_list.append(jump_length) jump_length_list.append(jump_length)
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median
jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment_from_index
self.ijumps.append(segment_cent_index) self.ijumps.append(segment_cent_index)
labeled_jump = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE'] + 1] labeled_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
labeled_jump = labeled_jump - min(labeled_jump) labeled_jump = utils.subtract_min_without_nan(labeled_jump)
patterns_list.append(labeled_jump) patterns_list.append(labeled_jump)
self.model_jump = utils.get_av_model(patterns_list) self.model_jump = utils.get_av_model(patterns_list)
for ijump in self.ijumps: convolve_list = utils.get_convolve(self.ijumps, self.model_jump, data, self.state['WINDOW_SIZE'])
labeled_jump = data[ijump - self.state['WINDOW_SIZE']: ijump + self.state['WINDOW_SIZE'] + 1]
labeled_jump = labeled_jump - min(labeled_jump)
auto_convolve = scipy.signal.fftconvolve(labeled_jump, labeled_jump)
convolve_jump = scipy.signal.fftconvolve(labeled_jump, self.model_jump)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_jump))
del_conv_list = [] del_conv_list = []
for segment in segments: for segment in segments:
if segment['deleted']: if segment['deleted']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: if len(segment_data) == 0:
continue continue
flat_segment = segment_data.rolling(window = 5).mean() segment_cent_index = utils.find_jump_parameters(segment_data, segment_from_index)[0]
flat_segment_dropna = flat_segment.dropna() deleted_jump = utils.get_interval(data, segment_cent_index, self.state['WINDOW_SIZE'])
pdf = gaussian_kde(flat_segment_dropna) deleted_jump = utils.subtract_min_without_nan(labeled_jump)
x = np.linspace(flat_segment_dropna.min() - 1, flat_segment_dropna.max() + 1, len(flat_segment_dropna))
y = pdf(x)
ax_list = list(zip(x, y))
ax_list = np.array(ax_list, np.float32)
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0]
segment_median = ax_list[antipeaks_kde[0], 0]
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median) #finds all interseprions with median
jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment_from_index
deleted_jump = data[segment_cent_index - self.state['WINDOW_SIZE'] : segment_cent_index + self.state['WINDOW_SIZE'] + 1]
deleted_jump = deleted_jump - min(labeled_jump)
del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump) del_conv_jump = scipy.signal.fftconvolve(deleted_jump, self.model_jump)
del_conv_list.append(max(del_conv_jump)) del_conv_list.append(max(del_conv_jump))
@ -169,7 +124,7 @@ class JumpModel(Model):
delete_low_bound = self.state['conv_del_min'] * 0.98 delete_low_bound = self.state['conv_del_min'] * 0.98
for segment in segments: for segment in segments:
if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']): if segment > self.state['WINDOW_SIZE'] and segment < (len(data) - self.state['WINDOW_SIZE']):
convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE'])
percent_of_nans = convol_data.isnull().sum() / len(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5: if percent_of_nans > 0.5:
delete_list.append(segment) delete_list.append(segment)

33
analytics/analytics/models/peak_model.py

@ -34,41 +34,30 @@ class PeakModel(Model):
patterns_list = [] patterns_list = []
for segment in segments: for segment in segments:
if segment['labeled']: if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
percent_of_nans = segment_data.isnull().sum() / len(segment_data) percent_of_nans = segment_data.isnull().sum() / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) confidence = utils.find_confidence(segment_data)
segment_max = max(segment_data) confidences.append(confidence)
confidences.append(0.2 * (segment_max - segment_min))
segment_max_index = segment_data.idxmax() segment_max_index = segment_data.idxmax()
self.ipeaks.append(segment_max_index) self.ipeaks.append(segment_max_index)
labeled_peak = data[segment_max_index - self.state['WINDOW_SIZE']: segment_max_index + self.state['WINDOW_SIZE'] + 1] labeled_peak = utils.get_interval(data, segment_max_index, self.state['WINDOW_SIZE'])
labeled_peak = labeled_peak - min(labeled_peak) labeled_peak = utils.subtract_min_without_nan(labeled_peak)
patterns_list.append(labeled_peak) patterns_list.append(labeled_peak)
self.model_peak = utils.get_av_model(patterns_list) self.model_peak = utils.get_av_model(patterns_list)
for ipeak in self.ipeaks: #labeled segments convolve_list = utils.get_convolve(self.ipeaks, self.model_peak, data, self.state['WINDOW_SIZE'])
labeled_peak = data[ipeak - self.state['WINDOW_SIZE']: ipeak + self.state['WINDOW_SIZE'] + 1]
labeled_peak = labeled_peak - min(labeled_peak)
auto_convolve = scipy.signal.fftconvolve(labeled_peak, labeled_peak)
convolve_peak = scipy.signal.fftconvolve(labeled_peak, self.model_peak)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_peak))
del_conv_list = [] del_conv_list = []
for segment in segments: for segment in segments:
if segment['deleted']: if segment['deleted']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
if len(segment_data) == 0: if len(segment_data) == 0:
continue continue
del_max_index = segment_data.idxmax() del_max_index = segment_data.idxmax()
deleted_peak = data[del_max_index - self.state['WINDOW_SIZE']: del_max_index + self.state['WINDOW_SIZE'] + 1] deleted_peak = utils.get_interval(data, del_max_index, self.state['WINDOW_SIZE'])
deleted_peak = deleted_peak - min(deleted_peak) deleted_peak = utils.subtract_min_without_nan(deleted_peak)
del_conv_peak = scipy.signal.fftconvolve(deleted_peak, self.model_peak) del_conv_peak = scipy.signal.fftconvolve(deleted_peak, self.model_peak)
del_conv_list.append(max(del_conv_peak)) del_conv_list.append(max(del_conv_peak))
@ -124,8 +113,8 @@ class PeakModel(Model):
pattern_data = self.model_peak pattern_data = self.model_peak
for segment in segments: for segment in segments:
if segment > self.state['WINDOW_SIZE']: if segment > self.state['WINDOW_SIZE']:
convol_data = data[segment - self.state['WINDOW_SIZE']: segment + self.state['WINDOW_SIZE'] + 1] convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE'])
convol_data = convol_data - min(convol_data) convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5: if percent_of_nans > 0.5:
delete_list.append(segment) delete_list.append(segment)

33
analytics/analytics/models/trough_model.py

@ -34,42 +34,31 @@ class TroughModel(Model):
patterns_list = [] patterns_list = []
for segment in segments: for segment in segments:
if segment['labeled']: if segment['labeled']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
percent_of_nans = segment_data.isnull().sum() / len(segment_data) percent_of_nans = segment_data.isnull().sum() / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
segment_min = min(segment_data) confidence = utils.find_confidence(segment_data)
segment_max = max(segment_data) confidences.append(confidence)
confidences.append(0.2 * (segment_max - segment_min))
segment_min_index = segment_data.idxmin() segment_min_index = segment_data.idxmin()
self.itroughs.append(segment_min_index) self.itroughs.append(segment_min_index)
labeled_trough = data[segment_min_index - self.state['WINDOW_SIZE'] : segment_min_index + self.state['WINDOW_SIZE'] + 1] labeled_trough = utils.get_interval(data, segment_min_index, self.state['WINDOW_SIZE'])
labeled_trough = labeled_trough - min(labeled_trough) labeled_trough = utils.subtract_min_without_nan(labeled_trough)
patterns_list.append(labeled_trough) patterns_list.append(labeled_trough)
self.model_trough = utils.get_av_model(patterns_list) self.model_trough = utils.get_av_model(patterns_list)
for itrough in self.itroughs: convolve_list = utils.get_convolve(self.itroughs, self.model_trough, data, self.state['WINDOW_SIZE'])
labeled_trough = data[itrough - self.state['WINDOW_SIZE']: itrough + self.state['WINDOW_SIZE'] + 1]
labeled_trough = labeled_trough - min(labeled_trough)
auto_convolve = scipy.signal.fftconvolve(labeled_trough, labeled_trough)
convolve_trough = scipy.signal.fftconvolve(labeled_trough, self.model_trough)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_trough))
del_conv_list = [] del_conv_list = []
for segment in segments: for segment in segments:
if segment['deleted']: if segment['deleted']:
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) segment_from_index, segment_to_index, segment_data = utils.parse_segment(segment, dataframe)
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
segment_data = data[segment_from_index: segment_to_index + 1]
percent_of_nans = segment_data.isnull().sum() / len(segment_data) percent_of_nans = segment_data.isnull().sum() / len(segment_data)
if percent_of_nans > 0 or len(segment_data) == 0: if percent_of_nans > 0 or len(segment_data) == 0:
continue continue
del_min_index = segment_data.idxmin() del_min_index = segment_data.idxmin()
deleted_trough = data[del_min_index - self.state['WINDOW_SIZE']: del_min_index + self.state['WINDOW_SIZE'] + 1] deleted_trough = utils.get_interval(data, del_min_index, self.state['WINDOW_SIZE'])
deleted_trough = deleted_trough - min(deleted_trough) deleted_trough = utils.subtract_min_without_nan(deleted_trough)
del_conv_trough = scipy.signal.fftconvolve(deleted_trough, self.model_trough) del_conv_trough = scipy.signal.fftconvolve(deleted_trough, self.model_trough)
del_conv_list.append(max(del_conv_trough)) del_conv_list.append(max(del_conv_trough))
@ -125,8 +114,8 @@ class TroughModel(Model):
pattern_data = self.model_trough pattern_data = self.model_trough
for segment in segments: for segment in segments:
if segment > self.state['WINDOW_SIZE']: if segment > self.state['WINDOW_SIZE']:
convol_data = data[segment - self.state['WINDOW_SIZE'] : segment + self.state['WINDOW_SIZE'] + 1] convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE'])
convol_data = convol_data - min(convol_data) convol_data = utils.subtract_min_without_nan(convol_data)
percent_of_nans = convol_data.isnull().sum() / len(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data)
if percent_of_nans > 0.5: if percent_of_nans > 0.5:
delete_list.append(segment) delete_list.append(segment)

79
analytics/analytics/utils/common.py

@ -1,6 +1,10 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import scipy.signal
from scipy.fftpack import fft
from scipy.signal import argrelextrema
from scipy.stats import gaussian_kde
import utils
def exponential_smoothing(series, alpha): def exponential_smoothing(series, alpha):
result = [series[0]] result = [series[0]]
@ -276,3 +280,76 @@ def nan_to_zero(segment, nan_list):
segment[val] = 0 segment[val] = 0
return segment return segment
def find_confidence(segment: pd.Series) -> float:
segment_min = min(segment)
segment_max = max(segment)
return 0.2 * (segment_max - segment_min)
def get_interval(data: pd.Series, center: int, window_size: int) -> pd.Series:
left_bound = center - window_size
right_bound = center + window_size + 1
return data[left_bound: right_bound]
def subtract_min_without_nan(segment: list) -> list:
if not np.isnan(min(segment)):
segment = segment - min(segment)
return segment
def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: int) -> list:
labeled_segment = []
convolve_list = []
for segment in segments:
labeled_segment = utils.get_interval(data, segment, window_size)
labeled_segment = utils.subtract_min_without_nan(labeled_segment)
auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment)
convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model)
convolve_list.append(max(auto_convolve))
convolve_list.append(max(convolve_segment))
return convolve_list
def find_jump_parameters(segment_data: pd.Series, segment_from_index: int):
flat_segment = segment_data.rolling(window=5).mean()
flat_segment_dropna = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna)
jump_height = 0.95 * (segment_max_line - segment_min_line)
jump_length = utils.find_jump_length(segment_data, segment_min_line, segment_max_line) # finds all interseprions with median
cen_ind = utils.intersection_segment(flat_segment.tolist(), segment_median)
jump_center = cen_ind[0]
segment_cent_index = jump_center - 5 + segment_from_index
return segment_cent_index, jump_height, jump_length
def find_drop_parameters(segment_data: pd.Series, segment_from_index: int):
flat_segment = segment_data.rolling(window=5).mean()
flat_segment_dropna = flat_segment.dropna()
segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(flat_segment_dropna)
drop_height = 0.95 * (segment_max_line - segment_min_line)
drop_length = utils.find_drop_length(segment_data, segment_min_line, segment_max_line)
cen_ind = utils.drop_intersection(flat_segment.tolist(), segment_median)
drop_center = cen_ind[0]
segment_cent_index = drop_center - 5 + segment_from_index
return segment_cent_index, drop_height, drop_length
def get_distribution_density(segment: pd.Series) -> float:
min_jump = min(segment)
max_jump = max(segment)
pdf = gaussian_kde(segment)
x = np.linspace(segment.min() - 1, segment.max() + 1, len(segment))
y = pdf(x)
ax_list = list(zip(x, y))
ax_list = np.array(ax_list, np.float32)
antipeaks_kde = argrelextrema(np.array(ax_list), np.less)[0]
peaks_kde = argrelextrema(np.array(ax_list), np.greater)[0]
try:
min_peak_index = peaks_kde[0]
segment_min_line = ax_list[min_peak_index, 0]
max_peak_index = peaks_kde[1]
segment_max_line = ax_list[max_peak_index, 0]
segment_median = ax_list[antipeaks_kde[0], 0]
except IndexError:
segment_max_line = max_jump
segment_min_line = min_jump
segment_median = (max_jump - min_jump) / 2 + min_jump
return segment_median, segment_max_line, segment_min_line

2
analytics/analytics/utils/segments.py

@ -2,7 +2,7 @@ import pandas as pd
from utils.common import timestamp_to_index from utils.common import timestamp_to_index
def parse_segment(segment, dataframe): def parse_segment(segment: dict, dataframe: pd.DataFrame):
start = timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) start = timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms'))
end = timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) end = timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms'))
data = dataframe['value'][start: end + 1] data = dataframe['value'][start: end + 1]

Loading…
Cancel
Save