Browse Source

detector based on all patterns corr

pull/25/head
Alexey Velikiy 3 years ago
parent
commit
864c2e0b42
  1. 125
      server/src/services/analytic_service/pattern_detector.rs

125
server/src/services/analytic_service/pattern_detector.rs

@ -1,10 +1,10 @@
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct LearningResults { pub struct LearningResults {
model: Vec<f64>, // avg_min: f64, // model: Vec<f64>,
// avg_max: f64 patterns: Vec<Vec<f64>>,
} }
const CORR_THRESHOLD: f64 = 0.9; const CORR_THRESHOLD: f64 = 0.95;
#[derive(Clone)] #[derive(Clone)]
pub struct PatternDetector { pub struct PatternDetector {
@ -25,7 +25,6 @@ impl PatternDetector {
} }
pub async fn learn(reads: &Vec<Vec<(u64, f64)>>) -> LearningResults { pub async fn learn(reads: &Vec<Vec<(u64, f64)>>) -> LearningResults {
let size_avg = reads.iter().map(|r| r.len()).sum::<usize>() / reads.len(); let size_avg = reads.iter().map(|r| r.len()).sum::<usize>() / reads.len();
let mut stat = Vec::<(usize, f64)>::new(); let mut stat = Vec::<(usize, f64)>::new();
@ -33,71 +32,108 @@ impl PatternDetector {
stat.push((0usize, 0f64)); stat.push((0usize, 0f64));
} }
let mut patterns = Vec::<Vec<f64>>::new();
// for r in reads {
// let xs: Vec<f64> = r.iter().map(|e| e.1).map(nan_to_zero).collect();
// if xs.len() > size_avg {
// let offset = (xs.len() - size_avg) / 2;
// for i in 0..size_avg {
// stat[i].0 += 1;
// stat[i].1 += xs[i + offset];
// }
// } else {
// let offset = (size_avg - xs.len()) / 2;
// for i in 0..xs.len() {
// stat[i + offset].0 += 1;
// stat[i + offset].1 += xs[i];
// }
// }
// }
for r in reads { for r in reads {
let xs: Vec<f64> = r.iter().map(|e| e.1).map(nan_to_zero).collect(); let xs: Vec<f64> = r.iter().map(|e| e.1).map(nan_to_zero).collect();
if xs.len() > size_avg { patterns.push(xs);
let offset = (xs.len() - size_avg) / 2;
for i in 0..size_avg {
stat[i].0 += 1;
stat[i].1 += xs[i + offset];
}
} else {
let offset = (size_avg - xs.len()) / 2;
for i in 0..xs.len() {
stat[i + offset].0 += 1;
stat[i + offset].1 += xs[i];
}
}
} }
let model = stat.iter().map(|(c, v)| v / *c as f64).collect(); // let model = stat.iter().map(|(c, v)| v / *c as f64).collect();
LearningResults { model } LearningResults {
patterns
//model
}
} }
// TODO: get iterator instead of vector // TODO: get iterator instead of vector
pub fn detect(&self, ts: &Vec<(u64, f64)>) -> Vec<(u64, u64)> { pub fn detect(&self, ts: &Vec<(u64, f64)>) -> Vec<(u64, u64)> {
let mut results = Vec::new(); let mut results = Vec::new();
let mut i = 0; // let mut i = 0;
let m = &self.learning_results.model;
// TODO: here we ignoring gaps in data // let m = &self.learning_results.model;
while i < ts.len() - self.learning_results.model.len() {
let mut backet = Vec::<f64>::new();
for j in 0..m.len() { // // TODO: here we ignoring gaps in data
backet.push(nan_to_zero(ts[j + i].1)); // while i < ts.len() - self.learning_results.model.len() {
} // let mut backet = Vec::<f64>::new();
let c = PatternDetector::corr(&backet, &m); // for j in 0..m.len() {
// backet.push(nan_to_zero(ts[j + i].1));
// }
if c >= CORR_THRESHOLD { // let c = PatternDetector::corr_aligned(&backet, &m);
let from = ts[i].0;
let to = ts[i + backet.len() - 1].0; // if c >= CORR_THRESHOLD {
results.push((from, to)); // let from = ts[i].0;
} // let to = ts[i + backet.len() - 1].0;
// results.push((from, to));
// }
i += m.len(); // i += m.len();
// }
let pt = &self.learning_results.patterns;
for i in 0..ts.len() {
for p in pt {
if i + p.len() < ts.len() {
let mut backet = Vec::<f64>::new();
for j in 0..p.len() {
backet.push(nan_to_zero(ts[i + j].1));
}
if PatternDetector::corr_aligned(p, &backet) >= CORR_THRESHOLD {
results.push((ts[i].0, ts[i + p.len() - 1].0));
}
}
}
} }
return results; return results;
} }
fn corr(xs: &Vec<f64>, ys: &Vec<f64>) -> f64 { fn corr_aligned(xs: &Vec<f64>, ys: &Vec<f64>) -> f64 {
assert_eq!(xs.len(), ys.len());
let n = xs.len() as f64; let n = xs.len() as f64;
// TODO: compute it faster, with one iteration over x y let mut s_xs: f64 = 0f64;
let s_xs: f64 = xs.iter().sum(); let mut s_ys: f64 = 0f64;
let s_ys: f64 = ys.iter().sum(); let mut s_xsys: f64 = 0f64;
let s_xsys: f64 = xs.iter().zip(ys).map(|(xi, yi)| xi * yi).sum(); let mut s_xs_2: f64 = 0f64;
let s_xs_2: f64 = xs.iter().map(|xi| xi * xi).sum(); let mut s_ys_2: f64 = 0f64;
let s_ys_2: f64 = ys.iter().map(|yi| yi * yi).sum();
let min = xs.len().min(ys.len());
xs.iter()
.take(min)
.zip(ys.iter().take(min))
.for_each(|(xi, yi)| {
s_xs += xi;
s_ys += yi;
s_xsys += xi * yi;
s_xs_2 += xi * xi;
s_ys_2 += yi * yi;
});
let numerator: f64 = n * s_xsys - s_xs * s_ys; let numerator: f64 = n * s_xsys - s_xs * s_ys;
let denominator: f64 = ((n * s_xs_2 - s_xs * s_xs) * (n * s_ys_2 - s_ys * s_ys)).sqrt(); let denominator: f64 = ((n * s_xs_2 - s_xs * s_xs) * (n * s_ys_2 - s_ys * s_ys)).sqrt();
// TODO: case when denominator = 0
let result: f64 = numerator / denominator; let result: f64 = numerator / denominator;
// assert!(result.abs() <= 1.01); // assert!(result.abs() <= 1.01);
@ -108,5 +144,4 @@ impl PatternDetector {
return result; return result;
} }
} }

Loading…
Cancel
Save