diff --git a/server/src/services/analytic_service/pattern_detector.rs b/server/src/services/analytic_service/pattern_detector.rs index 5549005..07ce0bf 100644 --- a/server/src/services/analytic_service/pattern_detector.rs +++ b/server/src/services/analytic_service/pattern_detector.rs @@ -1,10 +1,10 @@ #[derive(Debug, Clone)] pub struct LearningResults { - model: Vec, // avg_min: f64, - // avg_max: f64 + // model: Vec, + patterns: Vec>, } -const CORR_THRESHOLD: f64 = 0.9; +const CORR_THRESHOLD: f64 = 0.95; #[derive(Clone)] pub struct PatternDetector { @@ -25,7 +25,6 @@ impl PatternDetector { } pub async fn learn(reads: &Vec>) -> LearningResults { - let size_avg = reads.iter().map(|r| r.len()).sum::() / reads.len(); let mut stat = Vec::<(usize, f64)>::new(); @@ -33,71 +32,108 @@ impl PatternDetector { stat.push((0usize, 0f64)); } + let mut patterns = Vec::>::new(); + + // for r in reads { + // let xs: Vec = r.iter().map(|e| e.1).map(nan_to_zero).collect(); + // if xs.len() > size_avg { + // let offset = (xs.len() - size_avg) / 2; + // for i in 0..size_avg { + // stat[i].0 += 1; + // stat[i].1 += xs[i + offset]; + // } + // } else { + // let offset = (size_avg - xs.len()) / 2; + // for i in 0..xs.len() { + // stat[i + offset].0 += 1; + // stat[i + offset].1 += xs[i]; + // } + // } + // } + for r in reads { let xs: Vec = r.iter().map(|e| e.1).map(nan_to_zero).collect(); - if xs.len() > size_avg { - let offset = (xs.len() - size_avg) / 2; - for i in 0..size_avg { - stat[i].0 += 1; - stat[i].1 += xs[i + offset]; - } - } else { - let offset = (size_avg - xs.len()) / 2; - for i in 0..xs.len() { - stat[i + offset].0 += 1; - stat[i + offset].1 += xs[i]; - } - } + patterns.push(xs); } - let model = stat.iter().map(|(c, v)| v / *c as f64).collect(); + // let model = stat.iter().map(|(c, v)| v / *c as f64).collect(); - LearningResults { model } + LearningResults { + patterns + //model + } } // TODO: get iterator instead of vector pub fn detect(&self, ts: &Vec<(u64, f64)>) -> Vec<(u64, u64)> { let mut results = Vec::new(); - let mut i = 0; - let m = &self.learning_results.model; + // let mut i = 0; - // TODO: here we ignoring gaps in data - while i < ts.len() - self.learning_results.model.len() { - let mut backet = Vec::::new(); + // let m = &self.learning_results.model; - for j in 0..m.len() { - backet.push(nan_to_zero(ts[j + i].1)); - } + // // TODO: here we ignoring gaps in data + // while i < ts.len() - self.learning_results.model.len() { + // let mut backet = Vec::::new(); - let c = PatternDetector::corr(&backet, &m); + // for j in 0..m.len() { + // backet.push(nan_to_zero(ts[j + i].1)); + // } - if c >= CORR_THRESHOLD { - let from = ts[i].0; - let to = ts[i + backet.len() - 1].0; - results.push((from, to)); - } + // let c = PatternDetector::corr_aligned(&backet, &m); + + // if c >= CORR_THRESHOLD { + // let from = ts[i].0; + // let to = ts[i + backet.len() - 1].0; + // results.push((from, to)); + // } - i += m.len(); + // i += m.len(); + // } + + let pt = &self.learning_results.patterns; + + for i in 0..ts.len() { + for p in pt { + if i + p.len() < ts.len() { + let mut backet = Vec::::new(); + for j in 0..p.len() { + backet.push(nan_to_zero(ts[i + j].1)); + } + if PatternDetector::corr_aligned(p, &backet) >= CORR_THRESHOLD { + results.push((ts[i].0, ts[i + p.len() - 1].0)); + } + } + } } return results; } - fn corr(xs: &Vec, ys: &Vec) -> f64 { - - assert_eq!(xs.len(), ys.len()); - + fn corr_aligned(xs: &Vec, ys: &Vec) -> f64 { let n = xs.len() as f64; - // TODO: compute it faster, with one iteration over x y - let s_xs: f64 = xs.iter().sum(); - let s_ys: f64 = ys.iter().sum(); - let s_xsys: f64 = xs.iter().zip(ys).map(|(xi, yi)| xi * yi).sum(); - let s_xs_2: f64 = xs.iter().map(|xi| xi * xi).sum(); - let s_ys_2: f64 = ys.iter().map(|yi| yi * yi).sum(); + let mut s_xs: f64 = 0f64; + let mut s_ys: f64 = 0f64; + let mut s_xsys: f64 = 0f64; + let mut s_xs_2: f64 = 0f64; + let mut s_ys_2: f64 = 0f64; + + let min = xs.len().min(ys.len()); + xs.iter() + .take(min) + .zip(ys.iter().take(min)) + .for_each(|(xi, yi)| { + s_xs += xi; + s_ys += yi; + s_xsys += xi * yi; + s_xs_2 += xi * xi; + s_ys_2 += yi * yi; + }); let numerator: f64 = n * s_xsys - s_xs * s_ys; let denominator: f64 = ((n * s_xs_2 - s_xs * s_xs) * (n * s_ys_2 - s_ys * s_ys)).sqrt(); + // TODO: case when denominator = 0 + let result: f64 = numerator / denominator; // assert!(result.abs() <= 1.01); @@ -108,5 +144,4 @@ impl PatternDetector { return result; } - }