Add src

7 years ago · af4ab07edb
37 changed files with 8040 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+node_modules/
+dist/
+.vscode/
--- a/README.md
+++ b/README.md
@ -0,0 +1,29 @@
+# Hastic server
+
+Implementation of basic pattern recognition and unsupervised learning for anomamaly detection.
+
+Implementation of analytic unit for Hastic. 
+see [REST API](REST.md)
+
+## Build & run
+
+### Analytic unit
+
+Python3 project
+
+```
+pip3 install pandas
+pip3 install influxdb
+
+```
+
+### Server
+
+Node.js project
+
+```
+cd server
+npm install
+npm run build
+npm start
+```
--- a/REST.md
+++ b/REST.md
@ -0,0 +1,198 @@
+# Hastic server REST API
+
+## /anomalies
+
+### Get anomalies
+`GET /anomalies?id=<anomaly_id>[&name=<anomaly_name>]`
+
+NOTE: `name` param is deprecated, use `id` instead
+
+Return data format:
+
+```
+{
+  "name": "<anomaly_name>",
+  "metric": "<metric_id>",
+  "status": "<str>"
+}
+```
+
+status field can be one of:
+
+- `learning`
+- `ready`
+- `failed`
+
+### Get anomaly status
+`GET /anomalies/status?id=<anomaly_id>[&name=<anomaly_name>]`
+
+NOTE: `name` param is deprecated, use `id` instead
+
+Return data format:
+
+```
+{
+  "status": <str>
+}
+```
+
+status field can be one of:
+
+- `learning`
+- `ready`
+- `failed`
+
+### Add anomaly
+
+`POST /anomalies`
+
+Data format:
+
+```
+{
+  "name": "cpu_utilization_supervised",
+  "metric": {
+    "datasource": "influx accelerometer",
+    "targets": [
+      <targets>
+    ]
+  },
+  "panelUrl": "http://grafana.example.com/d/oNZ35bWiz/new-dashboard-copy?panelId=2&fullscreen"
+}
+```
+
+`targets` example:
+
+```
+{
+  "alias": "command",
+  "groupBy": [],
+  "measurement": "data",
+  "orderByTime": "ASC",
+  "policy": "default",
+  "refId": "A",
+  "resultFormat": "time_series",
+  "select": [
+    [
+      {
+        "params": [
+          "command"
+        ],
+        "type": "field"
+      }
+    ]
+  ],
+  "tags": []
+}
+```
+
+Return data format:
+
+```
+{
+  "anomaly_id": "<anomaly_id>"
+}
+```
+
+### Delete anpmalies
+`DELETE /anomalies`
+
+Data format:
+
+```
+{
+  "id": "<anomaly_id>",
+  "name": "<anomaly_name>" // deprecated, use id instead
+}
+```
+
+Return data format:
+
+```
+Success
+```
+
+## /segments
+
+### Get segments
+`GET /segments?anomaly_id=<anomaly_id>[&last_segment=<id>][&from=<time_from>][&to=<time_to>]`
+
+Return data format:
+
+```
+{
+  "segments": [
+    {
+      "id": 0,
+      "start": 1392765184318,
+      "finish": 1397243699000,
+      "labeled": true
+    },
+    ...
+  ]
+}
+```
+
+### Update segments
+
+`PATCH /segments`
+
+Data format:
+
+```
+{
+  "anomaly_id": "<anomaly_id>",
+  "name": "<anomaly_name>", // deprecated, use id instead
+  "added_segments": [
+    {
+      "start": 1397164656000,
+      "finish": 1397243699000
+    },
+    ...
+  ],
+  "removed_segments": [3, 9]
+}
+```
+
+Return data format:
+
+```
+{
+  "added_ids": [12, ...]
+}
+```
+
+## /alerts
+
+### Check if alert is enabled for anomaly
+
+`GET /alerts?anomaly_id=<anomaly_id>`
+
+Return data format:
+
+```
+{
+  "enable": true
+}
+```
+
+### Enable / disable alert for anomaly
+
+`POST /alerts`
+
+Data format:
+
+```
+{
+  "anomaly_id": "<anomaly_id>",
+  "enable": true
+}
+```
+
+Return data format:
+
+```
+{
+  "status": "Ok"
+}
+```
--- a/server/README.md
+++ b/server/README.md
@ -0,0 +1,27 @@
+# Hastic server
+
+REST server for managing data for analytics.
+
+Running on 8000 port.
+
+# Build
+
+```
+npm install
+npm run build
+```
+
+# Run
+
+```
+npm start
+```
+
+# Development
+
+You should have `nodemon` module installed to run development server.
+
+```
+npm i -g nodemon
+npm run dev
+```
--- a/server/build/dev-server.js
+++ b/server/build/dev-server.js
@ -0,0 +1,10 @@
+const { spawn } = require('child_process');
+
+const webpack = spawn('webpack', ['--config', 'build/webpack.dev.conf.js'], {
+  stdio: 'inherit',
+  shell: true
+});
+//webpack.stdout.pipe(process.stdout);
+
+const nodemon = spawn('nodemon', ['../dist/server', '--watch', 'server.js']);
+nodemon.stdout.pipe(process.stdout);
--- a/server/build/webpack.base.conf.js
+++ b/server/build/webpack.base.conf.js
@ -0,0 +1,52 @@
+const path = require('path');
+const fs = require('fs');
+
+const webpack = require('webpack');
+
+
+function resolve(p) {
+  return path.join(__dirname, '/../', p);
+}
+
+module.exports = {
+  target: 'node',
+  node: {
+    __dirname: false,
+    __filename: false,
+  },
+  context: resolve('./src'),
+  entry: './index',
+  devtool: 'inline-source-map',
+  output: {
+    filename: "server.js",
+    path: resolve('dist')
+  },
+  externals: [
+    function(context, request, callback) {
+      if(request[0] == '.') {
+        callback();
+      } else {
+        callback(null, "require('" + request + "')");
+      }
+    }
+  ],
+  plugins: [
+    new webpack.optimize.OccurrenceOrderPlugin(),
+    new webpack.HotModuleReplacementPlugin(),
+    new webpack.DefinePlugin({
+      'process.env.NODE_ENV': JSON.stringify('development')
+    })
+  ],
+  resolve: {
+    extensions: [".ts", ".js"]
+  },
+  module: {
+    rules: [
+      {
+        test: /\.ts$/,
+        loader: "ts-loader",
+        exclude: /node_modules/
+      }
+    ]
+  }
+}
--- a/server/build/webpack.dev.conf.js
+++ b/server/build/webpack.dev.conf.js
@ -0,0 +1,4 @@
+var base = require('./webpack.base.conf');
+
+base.watch = true;
+module.exports = base;
--- a/server/build/webpack.prod.conf.js
+++ b/server/build/webpack.prod.conf.js
@ -0,0 +1,3 @@
+var base = require('./webpack.base.conf');
+
+module.exports = base;
--- a/server/package-lock.json
+++ b/server/package-lock.json
--- a/server/package.json
+++ b/server/package.json
@ -0,0 +1,32 @@
+{
+  "name": "hastic-server",
+  "version": "1.0.0",
+  "description": "REST server for managing data for analytics",
+  "scripts": {
+    "start": "node dist/server.js",
+    "dev": "node build/dev-server.js",
+    "build": "webpack --config build/webpack.prod.conf.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/hastic/hastic-server.git"
+  },
+  "author": "CorpGlory",
+  "license": "ISC",
+  "bugs": {
+    "url": "https://github.com/hastic/hastic-server/issues"
+  },
+  "homepage": "https://github.com/hastic/hastic-server#readme",
+  "dependencies": {
+    "express": "^4.16.3",
+    "fast-csv": "^2.4.1",
+    "telegraf": "^3.21.0"
+  },
+  "devDependencies": {
+    "@types/express": "^4.11.1",
+    "nodemon": "^1.17.3",
+    "ts-loader": "^3.5.0",
+    "typescript": "^2.8.3",
+    "webpack": "^3.5.6"
+  }
+}
--- a/server/src/config.ts
+++ b/server/src/config.ts
@ -0,0 +1,9 @@
+import * as path from 'path';
+
+const DATA_PATH = path.join(__dirname, '../data');
+const ANALYTICS_PATH = path.join(__dirname, '../../src');
+const ANOMALIES_PATH = path.join(ANALYTICS_PATH, 'anomalies');
+const SEGMENTS_PATH = path.join(ANALYTICS_PATH, 'segments');
+const METRICS_PATH = path.join(ANALYTICS_PATH, 'metrics');
+
+export { DATA_PATH, ANALYTICS_PATH, ANOMALIES_PATH, SEGMENTS_PATH, METRICS_PATH }
--- a/server/src/index.ts
+++ b/server/src/index.ts
@ -0,0 +1,31 @@
+import * as express from 'express';
+import * as bodyParser from 'body-parser';
+
+import { router as anomaliesRouter } from './routes/anomalies';
+import { router as segmentsRouter } from './routes/segments';
+import { router as alertsRouter } from './routes/alerts';
+import { tgBotInit } from './services/notification';
+
+const app = express();
+const PORT = 8000;
+
+app.use(bodyParser.json());
+app.use(bodyParser.urlencoded({ extended: true }));
+
+app.use(function (req, res, next) {
+  res.header('Access-Control-Allow-Origin', '*');
+  res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, PATCH, OPTIONS');
+  res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
+  next();
+});
+
+app.use('/anomalies', anomaliesRouter);
+app.use('/segments', segmentsRouter);
+app.use('/alerts', alertsRouter);
+app.use('/', (req, res) => { res.send('Analytic unit works') });
+
+app.listen(PORT, () => {
+  console.log(`Server is running on :${PORT}`)
+});
+
+tgBotInit();
--- a/server/src/routes/alerts.ts
+++ b/server/src/routes/alerts.ts
@ -0,0 +1,62 @@
+import * as express from 'express';
+import {AnomalyId, getAnomalyIdByName, loadAnomalyById} from '../services/anomalyType';
+import { getAlertsAnomalies, saveAlertsAnomalies } from '../services/alerts';
+
+function getAlert(req, res) {
+  try {
+    let anomalyId: AnomalyId = req.query.anomaly_id;
+    let anomaly = loadAnomalyById(anomalyId)
+    if (anomaly == null) {
+      anomalyId = getAnomalyIdByName(anomalyId.toLowerCase());
+    }
+
+    let alertsAnomalies = getAlertsAnomalies();
+    let pos = alertsAnomalies.indexOf(anomalyId);
+
+    let enable: boolean = (pos !== -1);
+    res.status(200).send({
+      enable
+    });
+  } catch(e) {
+    res.status(500).send({
+      code: 500,
+      message: 'Internal error'
+    });
+  }
+}
+
+function changeAlert(req, res) {
+  try {
+    let anomalyId: AnomalyId = req.body.anomaly_id;
+    let enable: boolean = req.body.enable;
+
+    let anomaly = loadAnomalyById(anomalyId)
+    if (anomaly == null) {
+      anomalyId = getAnomalyIdByName(anomalyId.toLowerCase());
+    }
+
+    let alertsAnomalies = getAlertsAnomalies();
+    let pos: number = alertsAnomalies.indexOf(anomalyId);
+    if(enable && pos == -1) {
+      alertsAnomalies.push(anomalyId);
+      saveAlertsAnomalies(alertsAnomalies);
+    } else if(!enable && pos > -1) {
+      alertsAnomalies.splice(pos, 1);
+      saveAlertsAnomalies(alertsAnomalies);
+    }
+    res.status(200).send({
+      status: 'Ok'
+    });
+  } catch(e) {
+    res.status(500).send({
+      code: 500,
+      message: 'Internal error'
+    });
+  }
+}
+
+export const router = express.Router();
+
+router.get('/', getAlert);
+router.post('/', changeAlert);
+
--- a/server/src/routes/anomalies.ts
+++ b/server/src/routes/anomalies.ts
@ -0,0 +1,136 @@
+import * as express from 'express';
+
+import {
+  Metric,
+  Anomaly,
+  saveAnomaly,
+  insertAnomaly, removeAnomaly, loadAnomalyByName, loadAnomalyById, getAnomalyIdByName
+} from '../services/anomalyType';
+import { runLearning } from '../services/analytics'
+import { saveTargets } from '../services/metrics';
+
+async function sendAnomalyTypeStatus(req, res) {
+  let id = req.query.id;
+  let name = req.query.name;
+  try {
+    let anomaly: Anomaly;
+    if(id !== undefined) {
+      anomaly = loadAnomalyById(id);
+    } else {
+      anomaly = loadAnomalyByName(name);
+    }
+    if(anomaly === null) {
+      res.status(404).send({
+        code: 404,
+        message: 'Not found'
+      });
+      return;
+    }
+    if(anomaly.status === undefined) {
+      throw new Error('No status for ' + name);
+    }
+    res.status(200).send({ status: anomaly.status });
+  } catch(e) {
+    console.error(e);
+    // TODO: better send 404 when we know than isn`t found
+    res.status(500).send({ error: 'Can`t return anything' });
+  }
+
+}
+
+async function getAnomaly(req, res) {
+  try {
+    let id = req.query.id;
+    let name = req.query.name;
+
+    let anomaly:Anomaly;
+    if(id !== undefined) {
+      anomaly = loadAnomalyById(id);
+    } else {
+      anomaly = loadAnomalyByName(name.toLowerCase());
+    }
+    if(anomaly === null) {
+      res.status(404).send({
+        code: 404,
+        message: 'Not found'
+      });
+      return;
+    }
+
+    let payload = JSON.stringify({
+      name: anomaly.name,
+      metric: anomaly.metric,
+      status: anomaly.status
+    });
+    res.status(200).send(payload)
+  } catch(e) {
+    console.error(e);
+    // TODO: better send 404 when we know than isn`t found
+    res.status(500).send('Can`t get anything');
+  }
+}
+
+async function createAnomaly(req, res) {
+  try {
+    const metric:Metric = {
+      datasource: req.body.metric.datasource,
+      targets: saveTargets(req.body.metric.targets)
+    };
+
+    const anomaly:Anomaly = {
+      name: req.body.name,
+      panelUrl: req.body.panelUrl,
+      metric: metric,
+      status: 'learning',
+      last_prediction_time: 0,
+      next_id: 0
+    };
+    let anomalyId = insertAnomaly(anomaly);
+    if(anomalyId === null) {
+      res.status(403).send({
+        code: 403,
+        message: 'Already exists'
+      });
+    }
+
+    let payload = JSON.stringify({ anomaly_id: anomalyId })
+    res.status(200).send(payload);
+
+    runLearning(anomalyId);
+  } catch(e) {
+    res.status(500).send({
+      code: 500,
+      message: 'Internal error'
+    });
+  }
+}
+
+function deleteAnomaly(req, res) {
+  try {
+    let id = req.query.id;
+    let name = req.query.name;
+
+    if(id !== undefined) {
+      removeAnomaly(id);
+    } else {
+      removeAnomaly(name.toLowerCase());
+    }
+    
+    res.status(200).send({
+      code: 200,
+      message: 'Success'
+    });
+  } catch(e) {
+    res.status(500).send({
+      code: 500,
+      message: 'Internal error'
+    });
+  }
+}
+
+export const router = express.Router();
+
+router.get('/status', sendAnomalyTypeStatus);
+router.get('/', getAnomaly);
+router.post('/', createAnomaly);
+router.delete('/', deleteAnomaly);
--- a/server/src/routes/segments.ts
+++ b/server/src/routes/segments.ts
@ -0,0 +1,80 @@
+import * as express from 'express';
+import {
+  getLabeledSegments,
+  insertSegments,
+  removeSegments,
+} from '../services/segments';
+import {runLearning} from '../services/analytics';
+import {Anomaly, AnomalyId, getAnomalyIdByName, loadAnomalyById} from '../services/anomalyType';
+
+
+async function sendSegments(req, res) {
+  try {
+    let anomalyId: AnomalyId = req.query.anomaly_id;
+    let anomaly:Anomaly = loadAnomalyById(anomalyId);
+    if(anomaly === null) {
+      anomalyId = getAnomalyIdByName(anomalyId);
+    }
+
+    let lastSegmentId = req.query.last_segment;
+    let timeFrom = req.query.from;
+    let timeTo = req.query.to;
+
+    let segments = getLabeledSegments(anomalyId);
+
+    // Id filtering
+    if(lastSegmentId !== undefined) {
+      segments = segments.filter(el => el.id > lastSegmentId);
+    }
+
+    // Time filtering
+    if(timeFrom !== undefined) {
+      segments = segments.filter(el => el.finish > timeFrom);
+    }
+
+    if(timeTo !== undefined) {
+      segments = segments.filter(el => el.start < timeTo);
+    }
+
+    let payload = JSON.stringify({
+      segments
+    });
+    res.status(200).send(payload);
+  } catch(e) {
+    res.status(500).send({
+      code: 500,
+      message: 'Internal error'
+    });
+  }
+}
+
+async function updateSegments(req, res) {
+  try {
+    let segmentsUpdate = req.body;
+
+    let anomalyId = segmentsUpdate.anomaly_id;
+    let anomalyName = segmentsUpdate.name;
+
+    if(anomalyId === undefined) {
+      anomalyId = getAnomalyIdByName(anomalyName.toLowerCase());
+    }
+
+    let addedIds = insertSegments(anomalyId, segmentsUpdate.added_segments, true);
+    removeSegments(anomalyId, segmentsUpdate.removed_segments);
+
+    let payload = JSON.stringify({ added_ids: addedIds });
+    res.status(200).send(payload);
+
+    runLearning(anomalyId);
+  } catch(e) {
+    res.status(500).send({
+      code: 500,
+      message: 'Internal error'
+    });
+  }
+}
+
+export const router = express.Router();
+
+router.get('/', sendSegments);
+router.patch('/', updateSegments);
--- a/server/src/services/alerts.ts
+++ b/server/src/services/alerts.ts
@ -0,0 +1,58 @@
+import { getJsonDataSync, writeJsonDataSync } from './json';
+import * as path from 'path';
+import { AnomalyId } from './anomalyType';
+import { ANOMALIES_PATH } from '../config';
+import { runPredict } from './analytics';
+import { sendNotification } from './notification';
+import { getLabeledSegments } from './segments';
+
+function getAlertsAnomalies() : AnomalyId[] {
+  return getJsonDataSync(path.join(ANOMALIES_PATH, `alerts_anomalies.json`));
+}
+
+function saveAlertsAnomalies(anomalies: AnomalyId[]) {
+  return writeJsonDataSync(path.join(ANOMALIES_PATH, `alerts_anomalies.json`), anomalies);
+}
+
+function processAlerts(anomalyId) {
+  let segments = getLabeledSegments(anomalyId);
+
+  const currentTime = new Date().getTime();
+  const activeAlert = activeAlerts.has(anomalyId);
+  let newActiveAlert = false;
+
+  if(segments.length > 0) {
+    let lastSegment = segments[segments.length - 1];
+    if(lastSegment.finish >= currentTime - alertTimeout) {
+      newActiveAlert = true;
+    }
+  }
+
+  if(!activeAlert && newActiveAlert) {
+    activeAlerts.add(anomalyId);
+    sendNotification(anomalyId, true);
+  } else if(activeAlert && !newActiveAlert) {
+    activeAlerts.delete(anomalyId);
+    sendNotification(anomalyId, false);
+  }
+}
+
+async function alertsTick() {
+  let alertsAnomalies = getAlertsAnomalies();
+  for (let anomalyId of alertsAnomalies) {
+    try {
+      await runPredict(anomalyId);
+      processAlerts(anomalyId);
+    } catch (e) {
+      console.error(e);
+    }
+  }
+  setTimeout(alertsTick, 5000);
+}
+
+const alertTimeout = 60000; // ms
+const activeAlerts = new Set<string>();
+setTimeout(alertsTick, 5000);
+
+
+export { getAlertsAnomalies, saveAlertsAnomalies }
--- a/server/src/services/analytics.ts
+++ b/server/src/services/analytics.ts
@ -0,0 +1,141 @@
+import { spawn } from 'child_process'
+import { ANALYTICS_PATH } from '../config'
+import {
+  Anomaly,
+  AnomalyId, getAnomalyTypeInfo,
+  loadAnomalyById,
+  setAnomalyPredictionTime,
+  setAnomalyStatus
+} from './anomalyType'
+import { getTarget } from './metrics';
+import { getLabeledSegments, insertSegments, removeSegments } from './segments';
+import { split, map, mapSync } from 'event-stream'
+
+const learnWorker = spawn('python3', ['worker.py'], { cwd: ANALYTICS_PATH })
+learnWorker.stdout.pipe(split())
+  .pipe(
+    mapSync(function(line){
+      console.log(line)
+      onMessage(line)
+    })
+  );
+
+learnWorker.stderr.on('data', data => console.error(`worker stderr: ${data}`));
+
+const taskMap = {};
+let nextTaskId = 0;
+
+function onMessage(data) {
+  let response = JSON.parse(data);
+  let taskId = response.__task_id;
+  // let anomalyName = response.anomaly_name;
+  // let task = response.task;
+  let status = response.status;
+
+  if(status === 'success' || status === 'failed') {
+    if(taskId in taskMap) {
+      let resolver = taskMap[taskId];
+      resolver(response);
+      delete taskMap[taskId];
+    }
+  }
+}
+
+function runTask(task) : Promise<any> {
+  let anomaly:Anomaly = loadAnomalyById(task.anomaly_id);
+  task.metric = {
+    datasource: anomaly.metric.datasource,
+    targets: anomaly.metric.targets.map(t => getTarget(t))
+  };
+
+  task.__task_id = nextTaskId++;
+  let command = JSON.stringify(task)
+  learnWorker.stdin.write(`${command}\n`);
+  return new Promise<Object>((resolve, reject) => {
+    taskMap[task.__task_id] = resolve
+  })
+}
+
+async function runLearning(anomalyId:AnomalyId) {
+  let segments = getLabeledSegments(anomalyId);
+  setAnomalyStatus(anomalyId, 'learning');
+  let anomaly:Anomaly  = loadAnomalyById(anomalyId);
+  let analyticsType = "anomalies";
+  let preset = undefined;
+  if (anomaly.name.includes("jumps")) {
+    analyticsType = "patterns";
+    preset = "steps"
+  }
+  if (anomaly.name.includes("cliffs") || anomaly.name.includes("drops")) {
+    analyticsType = "patterns";
+    preset = "cliffs"
+  }
+  if (anomaly.name.includes("peaks")) {
+    analyticsType = "patterns";
+    preset = "peaks"
+  }
+  let task = {
+    type: 'learn',
+    anomaly_id: anomalyId,
+    analytics_type: analyticsType,
+    preset,
+    segments: segments
+  };
+
+  let result = await runTask(task);
+
+  if (result.status === 'success') {
+    setAnomalyStatus(anomalyId, 'ready');
+    insertSegments(anomalyId, result.segments, false);
+    setAnomalyPredictionTime(anomalyId, result.last_prediction_time);
+  } else {
+    setAnomalyStatus(anomalyId, 'failed');
+  }
+}
+
+async function runPredict(anomalyId:AnomalyId) {
+  let anomaly:Anomaly = loadAnomalyById(anomalyId);
+  let analyticsType = "anomalies";
+  let preset = undefined;
+  if (anomaly.name.includes("jump")) {
+    analyticsType = "patterns";
+    preset = "steps"
+  }
+  if (anomaly.name.includes("cliffs") || anomaly.name.includes("drops")) {
+    analyticsType = "patterns";
+    preset = "cliffs"
+  }
+  if (anomaly.name.includes("peaks")) {
+    analyticsType = "patterns";
+    preset = "peaks"
+  }
+  let task = {
+    type: 'predict',
+    anomaly_id: anomalyId,
+    analytics_type: analyticsType,
+    preset,
+    last_prediction_time: anomaly.last_prediction_time
+  };
+  let result = await runTask(task);
+
+  if(result.status === 'failed') {
+    return [];
+  }
+  // Merging segments
+  let segments = getLabeledSegments(anomalyId);
+  if(segments.length > 0 && result.segments.length > 0) {
+    let lastOldSegment = segments[segments.length - 1];
+    let firstNewSegment = result.segments[0];
+
+    if(firstNewSegment.start <= lastOldSegment.finish) {
+      result.segments[0].start = lastOldSegment.start;
+      removeSegments(anomalyId, [lastOldSegment.id]);
+    }
+  }
+
+  insertSegments(anomalyId, result.segments, false);
+  setAnomalyPredictionTime(anomalyId, result.last_prediction_time);
+  return result.segments;
+}
+
+export { runLearning, runPredict }
--- a/server/src/services/anomalyType.ts
+++ b/server/src/services/anomalyType.ts
@ -0,0 +1,117 @@
+import * as path from 'path'
+import { getJsonDataSync, writeJsonDataSync } from './json'
+import { ANOMALIES_PATH } from '../config'
+import * as fs from 'fs'
+import * as crypto from 'crypto';
+
+export type Metric = {
+  datasource: string,
+  targets: string[]
+}
+
+export type Anomaly = {
+  name: string,
+
+  panelUrl: string,
+
+  metric: Metric,
+  status: string,
+
+  last_prediction_time: number,
+  next_id: number
+}
+
+export type AnomalyId = string;
+
+let anomaliesNameToIdMap = {};
+
+function loadAnomaliesMap() {
+  let filename = path.join(ANOMALIES_PATH, `all_anomalies.json`);
+  anomaliesNameToIdMap = getJsonDataSync(filename);
+}
+
+function saveAnomaliesMap() {
+  let filename = path.join(ANOMALIES_PATH, `all_anomalies.json`);
+  writeJsonDataSync(filename, anomaliesNameToIdMap);
+}
+
+function getAnomalyIdByName(anomalyName:string) : AnomalyId {
+  loadAnomaliesMap();
+  anomalyName = anomalyName.toLowerCase();
+  if(anomalyName in anomaliesNameToIdMap) {
+    return anomaliesNameToIdMap[anomalyName];
+  }
+  return anomalyName;
+}
+
+function insertAnomaly(anomaly: Anomaly) : AnomalyId {
+  const hashString = anomaly.name + (new Date()).toString();
+  const anomalyId:AnomalyId = crypto.createHash('md5').update(hashString).digest('hex');
+  anomaliesNameToIdMap[anomaly.name] = anomalyId;
+  saveAnomaliesMap();
+  // return anomalyId
+  // const anomalyId:AnomalyId = anomaly.name;
+  let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`);
+  if(fs.existsSync(filename)) {
+    return null;
+  }
+  saveAnomaly(anomalyId, anomaly);
+  return anomalyId;
+}
+
+function removeAnomaly(anomalyId:AnomalyId) {
+  let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`);
+  fs.unlinkSync(filename);
+}
+
+function saveAnomaly(anomalyId: AnomalyId, anomaly: Anomaly) {
+  let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`);
+  return writeJsonDataSync(filename, anomaly);
+}
+
+function loadAnomalyById(anomalyId: AnomalyId) : Anomaly {
+  let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`);
+  if(!fs.existsSync(filename)) {
+    return null;
+  }
+  return getJsonDataSync(filename);
+}
+
+function loadAnomalyByName(anomalyName: string) : Anomaly {
+  let anomalyId = getAnomalyIdByName(anomalyName);
+  return loadAnomalyById(anomalyId);
+}
+
+function saveAnomalyTypeInfo(info) {
+  console.log('Saving');
+  let filename = path.join(ANOMALIES_PATH, `${info.name}.json`);
+  if(info.next_id === undefined) {
+    info.next_id = 0;
+  }
+  if(info.last_prediction_time === undefined) {
+      info.last_prediction_time = 0;
+  }
+
+  return writeJsonDataSync(filename, info);
+}
+
+function getAnomalyTypeInfo(name) {
+  return getJsonDataSync(path.join(ANOMALIES_PATH, `${name}.json`));
+}
+
+function setAnomalyStatus(anomalyId:AnomalyId, status:string) {
+  let info = loadAnomalyById(anomalyId);
+  info.status = status;
+  saveAnomaly(anomalyId, info);
+}
+
+function setAnomalyPredictionTime(anomalyId:AnomalyId, lastPredictionTime:number) {
+  let info = loadAnomalyById(anomalyId);
+  info.last_prediction_time = lastPredictionTime;
+  saveAnomaly(anomalyId, info);
+}
+
+export {
+  saveAnomaly, loadAnomalyById, loadAnomalyByName, insertAnomaly, removeAnomaly, saveAnomalyTypeInfo,
+  getAnomalyTypeInfo, getAnomalyIdByName, setAnomalyStatus, setAnomalyPredictionTime
+}
--- a/server/src/services/json.ts
+++ b/server/src/services/json.ts
@ -0,0 +1,55 @@
+import * as fs from 'fs';
+
+async function getJsonData(filename: string): Promise<Object> {
+  var data = await new Promise<string>((resolve, reject) => {
+    fs.readFile(filename, 'utf8', (err, data) => {
+      if(err) {
+        console.error(err);
+        reject('Can`t read file');
+      } else {
+        resolve(data);
+      }
+    });
+  });
+
+  try {
+    return JSON.parse(data);
+  } catch(e) {
+    console.error(e);
+    throw new Error('Wrong file format');
+  }
+}
+
+function writeJsonData(filename: string, data: Object) {
+  return new Promise((resolve, reject) => {
+    fs.writeFile(filename, JSON.stringify(data), 'utf8', (err) => {
+      if(err) {
+        console.error(err);
+        reject('Cat`t write file');
+      } else {
+        resolve();
+      }
+    });
+  })
+}
+
+function getJsonDataSync(filename: string) {
+  let data = fs.readFileSync(filename, 'utf8');
+  try {
+    return JSON.parse(data);
+  } catch(e) {
+    console.error(e);
+    throw new Error('Wrong file format');
+  }
+}
+
+function writeJsonDataSync(filename: string, data: Object) {
+  fs.writeFileSync(filename, JSON.stringify(data));
+}
+
+export {
+  getJsonData,
+  writeJsonData,
+  getJsonDataSync,
+  writeJsonDataSync
+}
--- a/server/src/services/metrics.ts
+++ b/server/src/services/metrics.ts
@ -0,0 +1,27 @@
+import * as path from 'path';
+import { getJsonDataSync, writeJsonDataSync }  from './json';
+import { METRICS_PATH } from '../config';
+import * as crypto from 'crypto';
+
+function saveTargets(targets) {
+  let metrics = [];
+  for (let target of targets) {
+    metrics.push(saveTarget(target));
+  }
+  return metrics;
+}
+
+function saveTarget(target) {
+  //const md5 = crypto.createHash('md5')
+  const targetId = crypto.createHash('md5').update(JSON.stringify(target)).digest('hex');
+  let filename = path.join(METRICS_PATH, `${targetId}.json`);
+  writeJsonDataSync(filename, target);
+  return targetId;
+}
+
+function getTarget(targetId) {
+  let filename = path.join(METRICS_PATH, `${targetId}.json`);
+  return getJsonDataSync(filename);
+}
+
+export { saveTargets, getTarget }
--- a/server/src/services/notification.ts
+++ b/server/src/services/notification.ts
@ -0,0 +1,140 @@
+//import * as Telegraf from 'telegraf'
+import * as path from 'path';
+import { DATA_PATH } from '../config';
+import { getJsonDataSync, writeJsonDataSync } from './json';
+import { AnomalyId } from './anomalyType';
+
+
+type SubscriberId = string;
+type SubscribersMap = Map< AnomalyId, SubscriberId[] >;
+
+type BotConfig = {
+  token: string,
+  subscriptions: SubscribersMap
+};
+
+function sendNotification(anomalyName, active) {
+  console.log('Notification ' + anomalyName);
+  if(anomalyName in botConfig.subscriptions) {
+    let notificationMessage;
+    if(active) {
+      notificationMessage = 'Alert! Anomaly type ' + anomalyName;
+    } else {
+      notificationMessage = 'Ok! Anomaly type ' + anomalyName;
+    }
+
+    for (let SubscriberId of botConfig.subscriptions[anomalyName]) {
+      bot.telegram.sendMessage(SubscriberId, notificationMessage);
+    }
+  }
+}
+
+function loadBotConfig() : BotConfig {
+  let filename = path.join(DATA_PATH, `bot_config.json`);
+  let jsonData;
+  try {
+    jsonData = getJsonDataSync(filename);
+  } catch(e) {
+    console.error(e.message);
+    jsonData = [];
+  }
+  return jsonData;
+}
+
+function saveBotConfig(botConfig: BotConfig) {
+  let filename = path.join(DATA_PATH, `bot_config.json`);
+  try {
+    writeJsonDataSync(filename, botConfig);
+  } catch(e) {
+    console.error(e.message);
+  }
+}
+
+const commandArgs = (ctx, next) => {
+  try {
+    if(ctx.updateType === 'message') {
+      const text = ctx.update.message.text;
+      if(text !== undefined && text.startsWith('/')) {
+        const match = text.match(/^\/([^\s]+)\s?(.+)?/);
+        let args = [];
+        let command;
+        if(match !== null) {
+          if(match[1]) {
+            command = match[1];
+          }
+          if(match[2]) {
+            args = match[2].split(' ');
+          }
+        }
+        ctx.state.command = {
+          raw: text,
+          command,
+          args,
+        };
+      }
+    }
+    return next(ctx);
+  } catch (e) {
+
+  }
+};
+
+function addNotification(ctx) {
+  console.log('addNotification')
+  let command = ctx.state.command;
+  let chatId = ctx.chat.id;
+  if(command.args.length > 0) {
+    for (let anomalyName of command.args) {
+      if(!(anomalyName in botConfig.subscriptions)) {
+        botConfig.subscriptions[anomalyName] = []
+      }
+      if(botConfig.subscriptions[anomalyName].includes(chatId)) {
+        return ctx.reply('You are already subscribed on alerts from anomaly ' + command.args)
+      }  else {
+        botConfig.subscriptions[anomalyName].push(chatId);
+        saveBotConfig(botConfig);
+      }
+    }
+    return ctx.reply('You have been successfully subscribed on alerts from anomaly ' + command.args)
+  } else {
+    return ctx.reply('You should use syntax: \/addNotification <anomaly_name>')
+  }
+}
+
+function removeNotification(ctx) {
+  let command = ctx.state.command;
+  let chatId = ctx.chat.id;
+  if(command.args.length > 0) {
+    for (let anomalyName of command.args) {
+      if(anomalyName in botConfig.subscriptions) {
+        botConfig.subscriptions[anomalyName] = botConfig.subscriptions[anomalyName].filter(el => el !== chatId);
+        saveBotConfig(botConfig);
+      }
+    }
+    return ctx.reply('You have been successfully unsubscribed from alerts from ' + command.args);
+  } else {
+    return ctx.reply('You should use syntax: \/removeNotification <anomaly_name>');
+  }
+}
+
+const Telegraf = require('telegraf');
+let botConfig: BotConfig;
+let bot;
+
+function tgBotInit() {
+  try {
+    botConfig = loadBotConfig();
+    bot = new Telegraf(botConfig.token);
+
+    bot.use(commandArgs);
+
+    bot.command('addNotification', addNotification);
+    bot.command('removeNotification', removeNotification);
+
+    bot.startPolling();
+  } catch(e) {
+    // TODO: handle exception
+  }
+}
+
+export { sendNotification, tgBotInit }
--- a/server/src/services/segments.ts
+++ b/server/src/services/segments.ts
@ -0,0 +1,75 @@
+import * as path from 'path';
+import { getJsonDataSync, writeJsonDataSync }  from './json';
+import { SEGMENTS_PATH } from '../config';
+import { AnomalyId, loadAnomalyById, saveAnomaly } from './anomalyType';
+
+function getLabeledSegments(anomalyId: AnomalyId) {
+  let filename = path.join(SEGMENTS_PATH, `${anomalyId}_labeled.json`);
+
+  let segments = [];
+  try {
+    segments = getJsonDataSync(filename);
+    for (let segment of segments) {
+      if (segment.labeled === undefined) {
+        segment.labeled = false;
+      }
+    }
+  } catch (e) {
+    console.error(e.message);
+  }
+  return segments;
+}
+
+function getPredictedSegments(anomalyId: AnomalyId) {
+  let filename = path.join(SEGMENTS_PATH, `${anomalyId}_segments.json`);
+
+  let jsonData;
+  try {
+    jsonData = getJsonDataSync(filename);
+  } catch(e) {
+    console.error(e.message);
+    jsonData = [];
+  }
+  return jsonData;
+}
+
+function saveSegments(anomalyId: AnomalyId, segments) {
+  let filename = path.join(SEGMENTS_PATH, `${anomalyId}_labeled.json`);
+
+  try {
+    return writeJsonDataSync(filename, segments);
+  } catch(e) {
+    console.error(e.message);
+    throw new Error('Can`t write to db');
+  }
+}
+
+function insertSegments(anomalyId: AnomalyId, addedSegments, labeled:boolean) {
+  // Set status
+  let info = loadAnomalyById(anomalyId);
+  let segments = getLabeledSegments(anomalyId);
+
+  let nextId = info.next_id;
+  let addedIds = []
+  for (let segment of addedSegments) {
+    segment.id = nextId;
+    segment.labeled = labeled;
+    addedIds.push(nextId);
+    nextId++;
+    segments.push(segment);
+  }
+  info.next_id = nextId;
+  saveSegments(anomalyId, segments);
+  saveAnomaly(anomalyId, info);
+  return addedIds;
+}
+
+function removeSegments(anomalyId: AnomalyId, removedSegments) {
+  let segments = getLabeledSegments(anomalyId);
+  for (let segmentId of removedSegments) {
+    segments = segments.filter(el => el.id !== segmentId);
+  }
+  saveSegments(anomalyId, segments);
+}
+
+export { getLabeledSegments, getPredictedSegments, saveSegments, insertSegments, removeSegments }
--- a/server/tsconfig.json
+++ b/server/tsconfig.json
@ -0,0 +1,10 @@
+{
+  "compilerOptions": {
+    "outDir": "./dist/",
+    "sourceMap": true,
+    "noImplicitAny": false,
+    "module": "commonjs",
+    "target": "es2015",
+    "allowJs": true
+  }
+}
--- a/src/.gitignore
+++ b/src/.gitignore
@ -0,0 +1,11 @@
+anomalies/
+segments/
+datasets/
+datasources/
+models/
+metrics/
+__pycache__/
+*.pyc
+*.txt
+*.log
+tasks.csv
--- a/src/add_anomaly.py
+++ b/src/add_anomaly.py
@ -0,0 +1,5 @@
+from worker import worker
+
+if __name__ == "__main__":
+    w = worker()
+    w.do_task({"type": "learn", "anomaly_name": "cpu_utilization_supervised", "segments": []})
--- a/src/anomaly_model.py
+++ b/src/anomaly_model.py
@ -0,0 +1,157 @@
+import os.path
+from data_provider import DataProvider
+from data_preprocessor import data_preprocessor
+import json
+import pandas as pd
+import logging
+
+datasource_folder = "datasources/"
+dataset_folder = "datasets/"
+anomalies_folder = "anomalies/"
+models_folder = "models/"
+metrics_folder = "metrics/"
+logger = logging.getLogger('analytic_toolset')
+
+
+def anomalies_to_timestamp(anomalies):
+    for anomaly in anomalies:
+        anomaly['start'] = int(anomaly['start'].timestamp() * 1000)
+        anomaly['finish'] = int(anomaly['finish'].timestamp() * 1000)
+    return anomalies
+
+
+class AnomalyModel:
+
+    def __init__(self, anomaly_name):
+        self.anomaly_name = anomaly_name
+        self.load_anomaly_config()
+
+        datasource = self.anomaly_config['metric']['datasource']
+        metric_name = self.anomaly_config['metric']['targets'][0]
+
+        dbconfig_filename = os.path.join(datasource_folder, datasource + ".json")
+        target_filename = os.path.join(metrics_folder, metric_name + ".json")
+
+        dataset_filename = os.path.join(dataset_folder, metric_name + ".csv")
+        augmented_path = os.path.join(dataset_folder, metric_name + "_augmented.csv")
+
+        with open(dbconfig_filename, 'r') as config_file:
+            dbconfig = json.load(config_file)
+
+        with open(target_filename, 'r') as file:
+            target = json.load(file)
+
+        self.data_prov = DataProvider(dbconfig, target, dataset_filename)
+        self.preprocessor = data_preprocessor(self.data_prov, augmented_path)
+        self.model = None
+
+        self.__load_model()
+
+    def anomalies_box(self, anomalies):
+        max_time = 0
+        min_time = float("inf")
+        for anomaly in anomalies:
+            max_time = max(max_time, anomaly['finish'])
+            min_time = min(min_time, anomaly['start'])
+        min_time = pd.to_datetime(min_time, unit='ms')
+        max_time = pd.to_datetime(max_time, unit='ms')
+        return min_time, max_time
+
+    def learn(self, anomalies):
+        logger.info("Start to learn for anomaly_name='%s'" % self.anomaly_name)
+
+        confidence = 0.02
+        dataframe = self.data_prov.get_dataframe()
+        start_index, stop_index = 0, len(dataframe)
+        if len(anomalies) > 0:
+            confidence = 0.0
+            min_time, max_time = self.anomalies_box(anomalies)
+            start_index = dataframe[dataframe['timestamp'] >= min_time].index[0]
+            stop_index = dataframe[dataframe['timestamp'] > max_time].index[0]
+            start_index, stop_index = self.preprocessor.expand_indexes(start_index, stop_index)
+        dataframe = dataframe[start_index:stop_index]
+
+        train_augmented = self.preprocessor.get_augmented_data(
+            start_index,
+            stop_index,
+            anomalies
+        )
+
+        self.model = self.create_algorithm()
+        self.model.fit(train_augmented, confidence)
+        if len(anomalies) > 0:
+            last_dataframe_time = dataframe.iloc[- 1]['timestamp']
+            last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
+        else:
+            last_prediction_time = 0
+
+        self.__save_model()
+        logger.info("Learning is finished for anomaly_name='%s'" % self.anomaly_name)
+        return last_prediction_time
+
+    def predict(self, last_prediction_time):
+        logger.info("Start to predict for anomaly type='%s'" % self.anomaly_name)
+        last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms')
+
+        start_index = self.data_prov.get_upper_bound(last_prediction_time)
+        stop_index = self.data_prov.size()
+
+        # last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms')
+        # dataframe = dataframe[dataframe['timestamp'] > last_prediction_time]
+        last_prediction_time = int(last_prediction_time.timestamp() * 1000)
+
+        predicted_anomalies = []
+        if start_index < stop_index:
+            max_chunk_size = 50000
+            predicted = pd.Series()
+            for index in range(start_index, stop_index, max_chunk_size):
+                chunk_start = index
+                chunk_finish = min(index + max_chunk_size, stop_index)
+                predict_augmented = self.preprocessor.get_augmented_data(chunk_start, chunk_finish)
+
+                assert(len(predict_augmented) == chunk_finish - chunk_start)
+
+                predicted_current = self.model.predict(predict_augmented)
+                predicted = pd.concat([predicted, predicted_current])
+            predicted_anomalies = self.preprocessor.inverse_transform_anomalies(predicted)
+
+            last_row = self.data_prov.get_data_range(stop_index - 1, stop_index)
+
+            last_dataframe_time = last_row.iloc[0]['timestamp']
+            predicted_anomalies = anomalies_to_timestamp(predicted_anomalies)
+            last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
+
+        logger.info("Predicting is finished for anomaly type='%s'" % self.anomaly_name)
+        return predicted_anomalies, last_prediction_time
+
+    def synchronize_data(self):
+        self.data_prov.synchronize()
+        self.preprocessor.set_data_provider(self.data_prov)
+        self.preprocessor.synchronize()
+
+    def load_anomaly_config(self):
+        with open(os.path.join(anomalies_folder, self.anomaly_name + ".json"), 'r') as config_file:
+            self.anomaly_config = json.load(config_file)
+
+    def get_anomalies(self):
+        labeled_anomalies_file = os.path.join(anomalies_folder, self.anomaly_name + "_labeled.json")
+        if not os.path.exists(labeled_anomalies_file):
+            return []
+        with open(labeled_anomalies_file) as file:
+            return json.load(file)
+
+    def create_algorithm(self):
+        from supervised_algorithm import supervised_algorithm
+        return supervised_algorithm()
+
+    def __save_model(self):
+        logger.info("Save model '%s'" % self.anomaly_name)
+        model_filename = os.path.join(models_folder, self.anomaly_name + ".m")
+        self.model.save(model_filename)
+
+    def __load_model(self):
+        logger.info("Load model '%s'" % self.anomaly_name)
+        model_filename = os.path.join(models_folder, self.anomaly_name + ".m")
+        if os.path.exists(model_filename):
+            self.model = self.create_algorithm()
+            self.model.load(model_filename)
--- a/src/data_preprocessor.py
+++ b/src/data_preprocessor.py
@ -0,0 +1,255 @@
+import os.path
+import pandas as pd
+import numpy as np
+import math
+import time
+
+from tsfresh.transformers.feature_augmenter import FeatureAugmenter
+from tsfresh.feature_extraction.settings import from_columns
+from pytz import timezone
+
+
+class data_preprocessor:
+    # augmented = None
+    frame_size = 16
+    calc_features = [
+        # "value__agg_linear_trend__f_agg_\"max\"__chunk_len_5__attr_\"intercept\"",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_10",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_20",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_8__w_20",
+        # "value__fft_coefficient__coeff_3__attr_\"abs\"",
+        "time_of_day_column_x",
+        "time_of_day_column_y",
+        "value__abs_energy",
+        "value__absolute_sum_of_changes",
+        "value__sum_of_reoccurring_data_points",
+    ]
+    time_features = [
+        'time_of_day_column_x',
+        'time_of_day_column_y'
+    ]
+    chunk_size = 50000
+
+    def __init__(self, data_provider, augmented_path):
+        self.data_provider = data_provider
+        self.augmented_path = augmented_path
+        self.last_chunk_index = 0
+        self.total_size = 0
+        self.__init_chunks()
+        self.synchronize()
+
+    def set_data_provider(self, data_provider):
+        self.data_provider = data_provider
+
+    def synchronize(self):
+        start_frame = self.total_size
+        stop_frame = self.data_provider.size()
+
+        max_chunk_size = 30000
+        for frame in range(start_frame, stop_frame, max_chunk_size):
+            data = self.__get_source_frames(frame, min(stop_frame, frame + max_chunk_size))
+
+            if len(data) == 0:
+                return
+
+            append_augmented = self.__extract_features(data, self.calc_features)
+            self.__append_data(append_augmented)
+
+    def expand_indexes(self, start_index, stop_index):
+        return start_index, stop_index
+
+    def get_augmented_data(self, start_index, stop_index, anomalies=[]):
+        start_frame = start_index
+        stop_frame = stop_index
+        augmented = self.__get_data(start_frame, stop_frame)
+
+        if len(anomalies) > 0:
+            anomalies_indexes = self.transform_anomalies(anomalies)
+            augmented = augmented.drop(anomalies_indexes)
+
+        return augmented
+
+    def transform_anomalies(self, anomalies):
+        anomaly_index = None
+        dataframe = self.data_provider.get_dataframe(None)
+        for anomaly in anomalies:
+            start_time = pd.to_datetime(anomaly['start'], unit='ms')
+            finish_time = pd.to_datetime(anomaly['finish'], unit='ms')
+            current_index = (dataframe['timestamp'] >= start_time) & (dataframe['timestamp'] <= finish_time)
+            if anomaly_index is not None:
+                anomaly_index = (anomaly_index | current_index)
+            else:
+                anomaly_index = current_index
+
+        rows = dataframe[anomaly_index]
+        # indexes = np.floor_divide(rows.index, self.frame_size)
+        indexes = np.unique(rows.index)
+        return indexes
+
+    def inverse_transform_anomalies(self, prediction):
+        anomalies = []
+        cur_anomaly = None
+        source_dataframe = self.data_provider.get_dataframe(None)
+        for i in prediction.index:
+            if prediction[i]:
+                start_frame_index = max(0, i - self.frame_size + 1)
+                finish_frame_index = i
+                start = source_dataframe['timestamp'][start_frame_index]
+                finish = source_dataframe['timestamp'][finish_frame_index]
+                if cur_anomaly is None:
+                    if len(anomalies) > 0 and start <= anomalies[len(anomalies) - 1]['finish']:
+                        cur_anomaly = anomalies[len(anomalies) - 1]
+                        anomalies.pop()
+                    else:
+                        cur_anomaly = {'start': start, 'finish': finish}
+                cur_anomaly['finish'] = finish
+            elif cur_anomaly is not None:
+                anomalies.append(cur_anomaly)
+                cur_anomaly = None
+
+        if cur_anomaly:
+            anomalies.append(cur_anomaly)
+        return anomalies
+
+    def __get_data(self, start_index, stop_index):
+        result = pd.DataFrame()
+        start_chunk = start_index // self.chunk_size
+        finish_chunk = stop_index // self.chunk_size
+        for chunk_num in range(start_chunk, finish_chunk + 1):
+            chunk = self.__load_chunk(chunk_num)
+            if chunk_num == finish_chunk:
+                chunk = chunk[:stop_index % self.chunk_size]
+            if chunk_num == start_chunk:
+                chunk = chunk[start_index % self.chunk_size:]
+            result = pd.concat([result, chunk])
+        return result
+
+    def __init_chunks(self):
+        chunk_index = 0
+        self.last_chunk_index = 0
+        while True:
+            filename = self.augmented_path
+            if chunk_index > 0:
+                filename += "." + str(chunk_index)
+            if os.path.exists(filename):
+                self.last_chunk_index = chunk_index
+            else:
+                break
+            chunk_index += 1
+        self.total_size = self.last_chunk_index * self.chunk_size
+        last_chunk = self.__load_chunk(self.last_chunk_index)
+        self.total_size += len(last_chunk)
+
+    def __append_data(self, dataframe):
+        while len(dataframe) > 0:
+            chunk = self.__load_chunk(self.last_chunk_index)
+            rows_count = min(self.chunk_size - len(chunk), len(dataframe))
+
+            rows = dataframe.iloc[0:rows_count]
+            self.__save_chunk(self.last_chunk_index, rows)
+            self.total_size += rows_count
+
+            dataframe = dataframe[rows_count:]
+            if len(dataframe) > 0:
+                self.last_chunk_index += 1
+
+    def __load_chunk(self, index):
+        filename = self.augmented_path
+        if index > 0:
+            filename += "." + str(index)
+
+        if os.path.exists(filename):
+            chunk = pd.read_csv(filename)
+            frame_index = np.arange(index * self.chunk_size, index * self.chunk_size + len(chunk))
+            chunk = chunk.set_index(frame_index)
+            return chunk
+        return pd.DataFrame()
+
+    def __save_chunk(self, index, dataframe):
+        filename = self.augmented_path
+        if index > 0:
+            filename += "." + str(index)
+
+        if os.path.exists(filename):
+            dataframe.to_csv(filename, mode='a', index=False, header=False)
+        else:
+            dataframe.to_csv(filename, mode='w', index=False, header=True)
+
+    def __get_source_frames(self, start_frame, stop_frame):
+        start_index = start_frame
+        stop_index = stop_frame
+
+        # frame = self.source_dataframe[start_index:stop_index]
+        # mat = frame.as_matrix()
+
+        source_dataframe = self.data_provider.get_data_range(max(start_index - self.frame_size + 1, 0), stop_index)
+
+        dataframe = None
+        for i in range(start_index, stop_index):
+            mini = max(0, i - self.frame_size + 1)
+            frame = source_dataframe.loc[mini:i + 1].copy()
+            frame['id'] = i
+            if dataframe is None:
+                dataframe = frame
+            else:
+                dataframe = dataframe.append(frame, ignore_index=True)
+
+        #dataframe = self.source_dataframe[start_index:stop_index].copy()
+        #dataframe['id'] = np.floor_divide(dataframe.index, self.frame_size)
+        dataframe.reset_index(drop=True, inplace=True)
+        return dataframe
+
+    def __extract_features(self, data, features=None):
+        start_frame = data['id'][0]
+        stop_frame = data['id'][len(data)-1] + 1
+        augmented = pd.DataFrame(index=np.arange(start_frame, stop_frame))
+
+        # tsfresh features
+        tsfresh_features = None
+        if features is not None:
+            tsfresh_features = set(features) - set(self.time_features)
+
+        augmented = self.__extract_tfresh_features(data, augmented, tsfresh_features)
+
+        # time features
+        augmented = self.__extract_time_features(data, augmented, features)
+        return augmented
+
+    def __extract_tfresh_features(self, data, augmented, features):
+        relevant_extraction_settings = None
+        if features is not None:
+            augmented_features = set(features)
+            relevant_extraction_settings = from_columns(augmented_features)
+
+            #impute_function = partial(impute_dataframe_range, col_to_max=self.col_to_max,
+            #                          col_to_min=self.col_to_min, col_to_median=self.col_to_median)
+
+        feature_extractor = FeatureAugmenter(
+            kind_to_fc_parameters=relevant_extraction_settings,
+            column_id='id',
+            column_sort='timestamp')
+        feature_extractor.set_timeseries_container(data)
+
+        return feature_extractor.transform(augmented)
+
+    def __extract_time_features(self, data, augmented, features):
+        if features is None:
+            features = self.time_features
+
+        seconds = np.zeros(len(augmented))
+        first_id = data['id'][0]
+
+        for i in range(len(data)):
+            id = data['id'][i] - first_id
+            timeobj = data['timestamp'][i].time()
+            seconds[id] = timeobj.second + 60 * (timeobj.minute + 60 * timeobj.hour)
+
+        norm_seconds = 2 * math.pi * seconds / (24 * 3600)
+
+        if 'time_of_day_column_x' in features:
+            augmented['time_of_day_column_x'] = np.cos(norm_seconds)
+        if 'time_of_day_column_y' in features:
+            augmented['time_of_day_column_y'] = np.sin(norm_seconds)
+        return augmented
--- a/src/data_provider.py
+++ b/src/data_provider.py
@ -0,0 +1,220 @@
+from influxdb import InfluxDBClient
+import pandas as pd
+import os.path
+import numpy as np
+
+
+class DataProvider:
+    chunk_size = 50000
+
+    def __init__(self, dbconfig, target, data_filename):
+        self.dbconfig = dbconfig
+        self.target = target
+        self.data_filename = data_filename
+        self.last_time = None
+        self.total_size = 0
+        self.last_chunk_index = 0
+        self.chunk_last_times = {}
+        self.__init_chunks()
+        self.synchronize()
+
+    def get_dataframe(self, after_time=None):
+        result = pd.DataFrame()
+        for chunk_index, last_chunk_time in self.chunk_last_times.items():
+            if after_time is None or after_time <= last_chunk_time:
+                chunk = self.__load_chunk(chunk_index)
+                if after_time is not None:
+                    chunk = chunk[chunk['timestamp'] > after_time]
+                result = pd.concat([result, chunk])
+        return result
+
+    def get_upper_bound(self, after_time):
+        for chunk_index, last_chunk_time in self.chunk_last_times.items():
+            if after_time < last_chunk_time:
+                chunk = self.__load_chunk(chunk_index)
+                chunk = chunk[chunk['timestamp'] > after_time]
+                return chunk.index[0]
+        return self.size()
+
+    def size(self):
+        return self.total_size
+
+    def get_data_range(self, start_index, stop_index=None):
+        return self.__get_data(start_index, stop_index)
+
+    def transform_anomalies(self, anomalies):
+        result = []
+        if len(anomalies) == 0:
+            return result
+        dataframe = self.get_dataframe(None)
+        for anomaly in anomalies:
+            start_time = pd.to_datetime(anomaly['start']-1, unit='ms')
+            finish_time = pd.to_datetime(anomaly['finish']+1, unit='ms')
+            current_index = (dataframe['timestamp'] >= start_time) & (dataframe['timestamp'] <= finish_time)
+            anomaly_frame = dataframe[current_index]
+            cur_anomaly = {
+                'start': anomaly_frame.index[0],
+                'finish': anomaly_frame.index[len(anomaly_frame) - 1],
+                'labeled': anomaly['labeled']
+            }
+            result.append(cur_anomaly)
+
+        return result
+
+    def inverse_transform_indexes(self, indexes):
+        if len(indexes) == 0:
+            return []
+        dataframe = self.get_data_range(indexes[0][0], indexes[-1][1] + 1)
+
+        return [(dataframe['timestamp'][i1], dataframe['timestamp'][i2]) for (i1, i2) in indexes]
+
+    def synchronize(self):
+        # last_time = None
+        # if len(self.dataframe/) > 0:
+        #     last_time = self.dataframe['time'][len(self.dataframe)-1]
+        append_dataframe = self.load_from_db(self.last_time)
+        self.__append_data(append_dataframe)
+        # append_dataframe
+        # append_dataframe.to_csv(self.data_filename, mode='a', index=False, header=False)
+        # self.dataframe = pd.concat([self.dataframe, append_dataframe], ignore_index=True)
+
+    # def load(self):
+    #     if os.path.exists(self.data_filename):
+    #         self.dataframe = pd.read_csv(self.data_filename, parse_dates=[0])
+    #         self.synchronize()
+    #     else:
+    #         append_dataframe = self.load_from_db()
+    #         self.__append_data(append_dataframe)
+    #         #self.dataframe.to_csv(self.data_filename, index=False, header=True)
+
+    def custom_query(self, after_time):
+        query = self.target["query"]
+        timeFilter = "TRUE"
+        if after_time is not None:
+            timeFilter = "time > '%s'" % (str(after_time))
+        query = query.replace("$timeFilter", timeFilter)
+        return query
+
+    def load_from_db(self, after_time=None):
+        """Instantiate a connection to the InfluxDB."""
+        host = self.dbconfig['host']
+        port = self.dbconfig['port']
+        user = self.dbconfig['user']
+        password = self.dbconfig['password']
+        dbname = self.dbconfig['dbname']
+
+        client = InfluxDBClient(host, port, user, password, dbname)
+        # query = 'select k0, k1, k2 from vals;'
+
+        measurement = self.target['measurement']
+        select = self.target['select']
+        tags = self.target['tags']
+
+        if "query" in self.target:
+            query = self.custom_query(after_time)
+        else:
+            select_values = select[0][0]['params']
+            escaped_select_values = ["\"" + value + "\"" for value in select_values]
+
+            conditions_entries = []
+            if len(tags) > 0:
+                for tag in tags:
+                    conditions_entries.append("(\"" + tag['key'] + "\"" + tag['operator'] + "'" + tag['value'] + "')")
+            if after_time:
+                conditions_entries.append("time > '%s'" % (str(after_time)))
+
+            condition = ""
+            if len(conditions_entries) > 0:
+                condition = " where " + " AND ".join(conditions_entries)
+
+            query = "select %s from \"%s\"%s;" % (",".join(escaped_select_values), measurement, condition)
+
+        result = client.query(query, chunked=True, chunk_size=10000)
+        dataframe = pd.DataFrame(result.get_points())
+        if len(dataframe) > 0:
+            cols = dataframe.columns.tolist()
+            cols.remove('time')
+            cols = ['time'] + cols
+            dataframe = dataframe[cols]
+
+            dataframe['time'] = pd.to_datetime(dataframe['time'])
+            dataframe = dataframe.dropna(axis=0, how='any')
+
+        return dataframe
+
+    def __init_chunks(self):
+        chunk_index = 0
+        self.last_chunk_index = 0
+        while True:
+            filename = self.data_filename
+            if chunk_index > 0:
+                filename += "." + str(chunk_index)
+            if os.path.exists(filename):
+                self.last_chunk_index = chunk_index
+                chunk = self.__load_chunk(chunk_index)
+                chunk_last_time = chunk.iloc[len(chunk) - 1]['timestamp']
+                self.chunk_last_times[chunk_index] = chunk_last_time
+                self.last_time = chunk_last_time
+            else:
+                break
+            chunk_index += 1
+        self.total_size = self.last_chunk_index * self.chunk_size
+        last_chunk = self.__load_chunk(self.last_chunk_index)
+        self.total_size += len(last_chunk)
+
+    def __load_chunk(self, index):
+        filename = self.data_filename
+        if index > 0:
+            filename += "." + str(index)
+
+        if os.path.exists(filename):
+            chunk = pd.read_csv(filename, parse_dates=[0])
+            frame_index = np.arange(index * self.chunk_size, index * self.chunk_size + len(chunk))
+            chunk = chunk.set_index(frame_index)
+            return chunk.rename(columns={chunk.columns[0]: "timestamp", chunk.columns[1]: "value"})
+        return pd.DataFrame()
+
+    def __save_chunk(self, index, dataframe):
+        filename = self.data_filename
+        if index > 0:
+            filename += "." + str(index)
+
+        chunk_last_time = dataframe.iloc[len(dataframe) - 1]['time']
+        self.chunk_last_times[index] = chunk_last_time
+
+        if os.path.exists(filename):
+            dataframe.to_csv(filename, mode='a', index=False, header=False)
+        else:
+            dataframe.to_csv(filename, mode='w', index=False, header=True)
+
+    def __append_data(self, dataframe):
+        while len(dataframe) > 0:
+            chunk = self.__load_chunk(self.last_chunk_index)
+            rows_count = min(self.chunk_size - len(chunk), len(dataframe))
+
+            rows = dataframe.iloc[0:rows_count]
+
+            if len(rows) > 0:
+                self.__save_chunk(self.last_chunk_index, rows)
+                self.total_size += rows_count
+
+                self.last_time = rows.iloc[-1]['time']
+                dataframe = dataframe[rows_count:]
+
+            if len(dataframe) > 0:
+                self.last_chunk_index += 1
+
+    def __get_data(self, start_index, stop_index):
+        result = pd.DataFrame()
+        start_chunk = start_index // self.chunk_size
+        finish_chunk = self.last_chunk_index
+        if stop_index is not None:
+            finish_chunk = stop_index // self.chunk_size
+        for chunk_num in range(start_chunk, finish_chunk + 1):
+            chunk = self.__load_chunk(chunk_num)
+            if stop_index is not None and chunk_num == finish_chunk:
+                chunk = chunk[:stop_index % self.chunk_size]
+            if chunk_num == start_chunk:
+                chunk = chunk[start_index % self.chunk_size:]
+            result = pd.concat([result, chunk])
+        return result
--- a/src/learn.py
+++ b/src/learn.py
@ -0,0 +1,52 @@
+#!/usr/bin/env python
+import csv
+import os
+from worker import worker
+
+
+def enqueue_task():
+    tasks_file = "tasks.csv"
+    tasks = []
+    with open(tasks_file) as csvfile:
+        rdr = csv.reader(csvfile, delimiter=',')
+        tasks = list(rdr)
+    if len(tasks) == 0:
+        return None
+    res = tasks[0][0]
+    tasks = tasks[1:]
+    with open(tasks_file, "w+") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerows(tasks)
+    return res
+
+
+def set_lock(value):
+    lock_file = "learn.lock"
+    exists = os.path.exists(lock_file)
+    if exists == value:
+        return False
+
+    if value:
+        open(lock_file, "w+")
+    else:
+        os.remove(lock_file)
+    return True
+
+
+if __name__ == "__main__":
+    if not set_lock(True):
+        print("learn locked")
+        exit(0)
+
+    w = worker()
+    while True:
+        task = enqueue_task()
+        if task is None:
+            break
+
+        w.start()
+        w.add_task({"type": "learn", "anomaly_name": task})
+        w.add_task({"type": "predict", "anomaly_name": task})
+        w.stop()
+
+    set_lock(False)
--- a/src/pattern_detection_model.py
+++ b/src/pattern_detection_model.py
@ -0,0 +1,127 @@
+from data_provider import DataProvider
+import logging
+import os.path
+import json
+import pandas as pd
+
+datasource_folder = "datasources/"
+dataset_folder = "datasets/"
+anomalies_folder = "anomalies/"
+models_folder = "models/"
+metrics_folder = "metrics/"
+logger = logging.getLogger('analytic_toolset')
+
+
+def segments_box(segments):
+    max_time = 0
+    min_time = float("inf")
+    for segment in segments:
+        min_time = min(min_time, segment['start'])
+        max_time = max(max_time, segment['finish'])
+    min_time = pd.to_datetime(min_time, unit='ms')
+    max_time = pd.to_datetime(max_time, unit='ms')
+    return min_time, max_time
+
+
+class PatternDetectionModel:
+
+    def __init__(self, pattern_name, preset=None):
+        self.pattern_name = pattern_name
+        self.preset = preset
+
+        self.__load_anomaly_config()
+        datasource = self.anomaly_config['metric']['datasource']
+        metric_name = self.anomaly_config['metric']['targets'][0]
+
+        dbconfig_filename = os.path.join(datasource_folder, datasource + ".json")
+        target_filename = os.path.join(metrics_folder, metric_name + ".json")
+
+        dataset_filename = os.path.join(dataset_folder, metric_name + ".csv")
+
+        with open(dbconfig_filename, 'r') as config_file:
+            dbconfig = json.load(config_file)
+
+        with open(target_filename, 'r') as file:
+            target = json.load(file)
+
+        self.data_prov = DataProvider(dbconfig, target, dataset_filename)
+
+        self.model = None
+        self.__load_model(preset)
+
+    def learn(self, segments):
+        self.model = self.__create_model(self.preset)
+        window_size = 200
+
+        dataframe = self.data_prov.get_dataframe()
+        start_index, stop_index = 0, len(dataframe)
+        if len(segments) > 0:
+            min_time, max_time = segments_box(segments)
+            start_index = dataframe[dataframe['timestamp'] >= min_time].index[0]
+            stop_index = dataframe[dataframe['timestamp'] > max_time].index[0]
+            start_index = max(start_index - window_size, 0)
+            stop_index = min(stop_index + window_size, len(dataframe))
+
+        dataframe = dataframe[start_index:stop_index]
+
+        segments = self.data_prov.transform_anomalies(segments)
+        self.model.fit(dataframe, segments)
+        self.__save_model()
+        return 0
+        # return last_prediction_time
+
+    def predict(self, last_prediction_time):
+        if self.model is None:
+            return [], last_prediction_time
+
+        window_size = 100
+        last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms')
+
+        start_index = self.data_prov.get_upper_bound(last_prediction_time)
+        start_index = max(0, start_index - window_size)
+        dataframe = self.data_prov.get_data_range(start_index)
+
+        predicted_indexes = self.model.predict(dataframe)
+        predicted_indexes = [(x, y) for (x, y) in predicted_indexes if x >= start_index and y >= start_index]
+
+        predicted_times = self.data_prov.inverse_transform_indexes(predicted_indexes)
+        segments = []
+        for time_value in predicted_times:
+            ts1 = int(time_value[0].timestamp() * 1000)
+            ts2 = int(time_value[1].timestamp() * 1000)
+            segments.append({
+                'start': ts1,
+                'finish': ts2
+            })
+
+        last_dataframe_time = dataframe.iloc[- 1]['timestamp']
+        last_prediction_time = int(last_dataframe_time.timestamp() * 1000)
+        return segments, last_prediction_time
+        # return predicted_anomalies, last_prediction_time
+
+    def synchronize_data(self):
+        self.data_prov.synchronize()
+
+    def __create_model(self, preset):
+        if preset == "peaks":
+            from peaks_detector import PeaksDetector
+            return PeaksDetector()
+        if preset == "steps" or preset == "cliffs":
+            from step_detector import StepDetector
+            return StepDetector(preset)
+
+    def __load_anomaly_config(self):
+        with open(os.path.join(anomalies_folder, self.pattern_name + ".json"), 'r') as config_file:
+            self.anomaly_config = json.load(config_file)
+
+    def __save_model(self):
+        logger.info("Save model '%s'" % self.pattern_name)
+        model_filename = os.path.join(models_folder, self.pattern_name + ".m")
+        self.model.save(model_filename)
+
+    def __load_model(self, preset):
+        logger.info("Load model '%s'" % self.pattern_name)
+        model_filename = os.path.join(models_folder, self.pattern_name + ".m")
+        if os.path.exists(model_filename):
+            self.model = self.__create_model(preset)
+            self.model.load(model_filename)
--- a/src/peaks_detector.py
+++ b/src/peaks_detector.py
@ -0,0 +1,71 @@
+from scipy import signal
+import numpy as np
+import step_detect
+
+
+class PeaksDetector:
+    def __init__(self):
+        pass
+
+    def fit(self, dataset, contamination=0.005):
+        pass
+
+    def predict(self, dataframe):
+        array = dataframe['value'].as_matrix()
+        window_size = 20
+        # window = np.ones(101)
+        # mean_filtered = signal.fftconvolve(
+        #     np.concatenate([np.zeros(window_size), array, np.zeros(window_size)]),
+        #     window,
+        #     mode='valid'
+        # )
+        # filtered = np.divide(array, mean_filtered / 101)
+
+        window = signal.general_gaussian(2 * window_size + 1, p=0.5, sig=5)
+        #print(window)
+        filtered = signal.fftconvolve(array, window, mode='valid')
+
+        # filtered = np.concatenate([
+        #     np.zeros(window_size),
+        #     filtered,
+        #     np.zeros(window_size)
+        # ])
+        filtered = filtered / np.sum(window)
+        array = array[window_size:-window_size]
+        filtered = np.subtract(array, filtered)
+
+        import matplotlib.pyplot as plt
+
+        # filtered = np.convolve(array, step, mode='valid')
+        # print(len(array))
+        # print(len(filtered))
+
+        # step = np.hstack((np.ones(window_size), 0, -1*np.ones(window_size)))
+        #
+        # conv = np.convolve(array, step, mode='valid')
+        #
+        # conv = np.concatenate([
+        #     np.zeros(window_size),
+        #     conv,
+        #     np.zeros(window_size)])
+
+        #data = step_detect.t_scan(array, window=window_size)
+        data = filtered
+        data /= data.max()
+
+        #plt.plot(array[:1000])
+        plt.plot(data[:1000])
+        plt.show()
+
+        result = step_detect.find_steps(data, 0.1)
+        return [dataframe.index[x + window_size] for x in result]
+
+    def save(self, model_filename):
+        pass
+        # with open(model_filename, 'wb') as file:
+        #     pickle.dump((self.clf, self.scaler), file)
+
+    def load(self, model_filename):
+        pass
+        # with open(model_filename, 'rb') as file:
+        #     self.clf, self.scaler = pickle.load(file)
--- a/src/predict.py
+++ b/src/predict.py
@ -0,0 +1,83 @@
+import argparse
+import csv
+import time
+import datetime
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from influxdb import InfluxDBClient
+from sklearn import svm
+import numpy as np
+import math
+import pickle
+
+
+host = "209.205.120.226"
+port = 8086
+datasetFile = "/tmp/dataset.csv"
+anomaliesFile = "anomalies.csv"
+predictedAnomaliesFile = "predicted_anomalies.csv"
+modelFilename = 'finalized_model.sav'
+
+
+def readAnomalies():
+    anomalies = []
+
+    with open(anomaliesFile) as csvfile:
+        rdr = csv.reader(csvfile, delimiter=',')
+        for row in rdr:
+            anomaly = (int(row[0]), int(row[1]))
+            anomalies.append(anomaly)
+
+    return anomalies
+
+
+"""Instantiate a connection to the InfluxDB."""
+user = ''
+password = ''
+dbname = 'accelerometer'
+query = 'select k0, k1, k2 from vals limit 10000;'
+
+
+client = InfluxDBClient(host, port, user, password, dbname)
+
+def predict(host=host, port=port):
+
+    result = client.query(query)
+    df = pd.DataFrame(result['vals'], columns=['time', 'k0', 'k1', 'k2'])
+
+    basedAnomalies = readAnomalies()
+
+    df2 = df.rolling(200, win_type='triang').sum()
+    df2['time'] = pd.to_datetime(df2['time'])
+    df2 = df2[np.isfinite(df2['k0'])]
+
+    print(len(df2))
+
+
+    anomalies = []
+    last_anomaly = (-1, -1)
+    with open(modelFilename, 'rb') as fid:
+        clf = pickle.load(fid)
+        prediction = clf.predict(df2[['k0', 'k1', 'k2']])
+        print(len(prediction))
+        #print(prediction)
+        for i in range(len(prediction)):
+            if prediction[i] > 0.:
+                t = df2['time'][i + 199].timestamp()
+                t = ((t + 0 * 3600) * 1000)
+                if t < basedAnomalies[len(basedAnomalies) - 1][1]:
+                    continue
+                if t < last_anomaly[1] + 1000:
+                    last_anomaly = (last_anomaly[0], t)
+                else:
+                    if last_anomaly[1] != -1:
+                        anomalies.append(last_anomaly)
+                    last_anomaly = (t, t)
+
+    with open(predictedAnomaliesFile, "w") as file:
+        for anomaly in anomalies:
+            file.write(str(int(anomaly[0])) + "," + str(int(anomaly[1])) + "\n")
+
+predict()
+
--- a/src/prophet_algorithm.py
+++ b/src/prophet_algorithm.py
@ -0,0 +1,46 @@
+from fbprophet import Prophet
+import pandas as pd
+
+
+class prophet_algorithm(object):
+    def __init__(self):
+        self.model = None
+        self.dataset = None
+
+    def fit(self, data, anomalies):
+        pass
+
+    def predict(self, data):
+        data = data.reset_index()
+        data = data.rename(columns={'timestamp': 'ds', 'value': 'y'})
+        self.dataset = data
+
+        self.model = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=True)
+        self.model.fit(self.dataset)
+
+        future = self.model.make_future_dataframe(freq='H', periods=0, include_history=True)
+        forecast = self.model.predict(future)
+        cmp_df = forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(self.dataset.set_index('ds'))
+        cmp_df['e'] = [ max(row.y - row.yhat_upper, row.yhat_lower - row.y, 0) for index, row in cmp_df.iterrows() ]
+        return self.__calc_anomalies(cmp_df)
+
+    def __calc_anomalies(self, dataset):
+        anomalies = []
+        cur_anomaly = None
+        for i in range(len(dataset)):
+            if dataset['e'][i] > 17:
+                if cur_anomaly is None:
+                    cur_anomaly = {'start': dataset.index[i], 'finish': dataset.index[i], 'weight': 0}
+                cur_anomaly['finish'] = dataset.index[i]
+                cur_anomaly['weight'] += dataset['e'][i]
+            elif cur_anomaly is not None:
+                anomalies.append(cur_anomaly)
+                cur_anomaly = None
+        return anomalies
+
+
+if __name__ == "__main__":
+    dataset = pd.read_csv('art_daily_flatmiddle.csv', index_col=['timestamp'], parse_dates=['timestamp'])
+    algo = prophet_algorithm(dataset)
+    res = algo.fit()
+    print(res)
--- a/src/step_detect.py
+++ b/src/step_detect.py
@ -0,0 +1,231 @@
+
+"""
+Thomas Kahn
+thomas.b.kahn@gmail.com
+"""
+from __future__ import absolute_import
+from math import sqrt
+import multiprocessing as mp
+import numpy as np
+from six.moves import range
+from six.moves import zip
+
+
+def t_scan(L, window = 1e3, num_workers = -1):
+    """
+    Computes t statistic for i to i+window points versus i-window to i
+    points for each point i in input array. Uses multiple processes to
+    do this calculation asynchronously. Array is decomposed into window
+    number of frames, each consisting of points spaced at window
+    intervals. This optimizes the calculation, as the drone function
+    need only compute the mean and variance for each set once.
+    Parameters
+    ----------
+    L : numpy array
+        1 dimensional array that represents time series of datapoints
+    window : int / float
+        Number of points that comprise the windows of data that are
+        compared
+    num_workers : int
+        Number of worker processes for multithreaded t_stat computation
+        Defult value uses num_cpu - 1 workers
+    Returns
+    -------
+    t_stat : numpy array
+        Array which holds t statistic values for each point. The first
+        and last (window) points are replaced with zero, since the t
+        statistic calculation cannot be performed in that case.
+    """
+    size    = L.size
+    window  = int(window)
+    frames  = list(range(window))
+    n_cols  = (size // window) - 1
+
+    t_stat  = np.zeros((window, n_cols))
+
+    if num_workers == 1:
+        results = [_t_scan_drone(L, n_cols, frame, window) for frame in frames]
+    else:
+        if num_workers == -1:
+            num_workers = mp.cpu_count() - 1
+        pool    = mp.Pool(processes = num_workers)
+        results = [pool.apply_async(_t_scan_drone, args=(L, n_cols, frame, window)) for frame in frames]
+        results = [r.get() for r in results]
+        pool.close()
+
+    for index, row in results:
+        t_stat[index] = row
+
+    t_stat  = np.concatenate((
+        np.zeros(window),
+        t_stat.transpose().ravel(order='C'),
+        np.zeros(size % window)
+    ))
+
+    return t_stat
+
+
+def _t_scan_drone(L, n_cols, frame, window=1e3):
+    """
+    Drone function for t_scan. Not Intended to be called manually.
+    Computes t_scan for the designated frame, and returns result as
+    array along with an integer tag for proper placement in the
+    aggregate array
+    """
+    size   = L.size
+    window = int(window)
+    root_n = sqrt(window)
+
+    output = np.zeros(n_cols)
+    b      = L[frame:window+frame]
+    b_mean = b.mean()
+    b_var  = b.var()
+    for i in range(window+frame, size-window, window):
+        a = L[i:i+window]
+        a_mean = a.mean()
+        a_var  = a.var()
+        output[i // window - 1] = root_n * (a_mean - b_mean) / sqrt(a_var + b_var)
+        b_mean, b_var = a_mean, a_var
+
+    return frame, output
+
+
+def mz_fwt(x, n=2):
+    """
+    Computes the multiscale product of the Mallat-Zhong discrete forward
+    wavelet transform up to and including scale n for the input data x.
+    If n is even, the spikes in the signal will be positive. If n is odd
+    the spikes will match the polarity of the step (positive for steps
+    up, negative for steps down).
+    This function is essentially a direct translation of the MATLAB code
+    provided by Sadler and Swami in section A.4 of the following:
+    http://www.dtic.mil/dtic/tr/fulltext/u2/a351960.pdf
+    Parameters
+    ----------
+    x : numpy array
+        1 dimensional array that represents time series of data points
+    n : int
+        Highest scale to multiply to
+    Returns
+    -------
+    prod : numpy array
+        The multiscale product for x
+    """
+    N_pnts   = x.size
+    lambda_j = [1.5, 1.12, 1.03, 1.01][0:n]
+    if n > 4:
+        lambda_j += [1.0]*(n-4)
+
+    H = np.array([0.125, 0.375, 0.375, 0.125])
+    G = np.array([2.0, -2.0])
+
+    Gn = [2]
+    Hn = [3]
+    for j in range(1,n):
+        q = 2**(j-1)
+        Gn.append(q+1)
+        Hn.append(3*q+1)
+
+    S    = np.concatenate((x[::-1], x))
+    S    = np.concatenate((S, x[::-1]))
+    prod = np.ones(N_pnts)
+    for j in range(n):
+        n_zeros = 2**j - 1
+        Gz      = _insert_zeros(G, n_zeros)
+        Hz      = _insert_zeros(H, n_zeros)
+        current = (1.0/lambda_j[j])*np.convolve(S,Gz)
+        current = current[N_pnts+Gn[j]:2*N_pnts+Gn[j]]
+        prod    *= current
+        if j == n-1:
+            break
+        S_new   = np.convolve(S, Hz)
+        S_new   = S_new[N_pnts+Hn[j]:2*N_pnts+Hn[j]]
+        S       = np.concatenate((S_new[::-1], S_new))
+        S       = np.concatenate((S, S_new[::-1]))
+    return prod
+
+
+def _insert_zeros(x, n):
+    """
+    Helper function for mz_fwt. Splits input array and adds n zeros
+    between values.
+    """
+    newlen       = (n+1)*x.size
+    out          = np.zeros(newlen)
+    indices      = list(range(0, newlen-n, n+1))
+    out[indices] = x
+    return out
+
+
+def find_steps(array, threshold):
+    """
+    Finds local maxima by segmenting array based on positions at which
+    the threshold value is crossed. Note that this thresholding is
+    applied after the absolute value of the array is taken. Thus,
+    the distinction between upward and downward steps is lost. However,
+    get_step_sizes can be used to determine directionality after the
+    fact.
+    Parameters
+    ----------
+    array : numpy array
+        1 dimensional array that represents time series of data points
+    threshold : int / float
+        Threshold value that defines a step
+    Returns
+    -------
+    steps : list
+        List of indices of the detected steps
+    """
+    steps        = []
+    array        = np.abs(array)
+    above_points = np.where(array > threshold, 1, 0)
+    ap_dif       = np.diff(above_points)
+    cross_ups    = np.where(ap_dif == 1)[0]
+    cross_dns    = np.where(ap_dif == -1)[0]
+    for upi, dni in zip(cross_ups,cross_dns):
+        steps.append(np.argmax(array[upi:dni]) + upi)
+    return steps
+
+
+def get_step_sizes(array, indices, window=1000):
+    """
+    Calculates step size for each index within the supplied list. Step
+    size is determined by averaging over a range of points (specified
+    by the window parameter) before and after the index of step
+    occurrence. The directionality of the step is reflected by the sign
+    of the step size (i.e. a positive value indicates an upward step,
+    and a negative value indicates a downward step). The combined
+    standard deviation of both measurements (as a measure of uncertainty
+    in step calculation) is also provided.
+    Parameters
+    ----------
+    array : numpy array
+        1 dimensional array that represents time series of data points
+    indices : list
+        List of indices of the detected steps (as provided by
+        find_steps, for example)
+    window : int, optional
+        Number of points to average over to determine baseline levels
+        before and after step.
+    Returns
+    -------
+    step_sizes : list
+        List of the calculated sizes of each step
+    step_error : list
+    """
+    step_sizes = []
+    step_error = []
+    indices    = sorted(indices)
+    last       = len(indices) - 1
+    for i, index in enumerate(indices):
+        if i == 0:
+            q = min(window, indices[i+1]-index)
+        elif i == last:
+            q = min(window, index - indices[i-1])
+        else:
+            q = min(window, index-indices[i-1], indices[i+1]-index)
+        a = array[index:index+q]
+        b = array[index-q:index]
+        step_sizes.append(a.mean() - b.mean())
+        step_error.append(sqrt(a.var()+b.var()))
+    return step_sizes, step_error
--- a/src/step_detector.py
+++ b/src/step_detector.py
@ -0,0 +1,188 @@
+import numpy as np
+import pickle
+
+
+def find_segments(array, threshold):
+    segments     = []
+    above_points = np.where(array > threshold, 1, 0)
+    ap_dif       = np.diff(above_points)
+    cross_ups    = np.where(ap_dif == 1)[0]
+    cross_dns    = np.where(ap_dif == -1)[0]
+    for upi, dni in zip(cross_ups,cross_dns):
+        segments.append((upi, dni))
+    return segments
+
+
+def is_intersect(target_segment, segments):
+    for segment in segments:
+        start = max(segment['start'], target_segment[0])
+        finish = min(segment['finish'], target_segment[1])
+        if start <= finish:
+            return True
+    return False
+
+
+def calc_intersections(segments, finded_segments):
+    intersections = 0
+    labeled = 0
+    for segment in segments:
+        if not segment['labeled']:
+            continue
+
+        labeled += 1
+        intersect = False
+        for finded_segment in finded_segments:
+            start = max(segment['start'], finded_segment[0])
+            finish = min(segment['finish'], finded_segment[1])
+            if start <= finish:
+                intersect = True
+                break
+        if intersect:
+            intersections += 1
+    return intersections, labeled
+
+
+def cost_function(segments, finded_segments):
+    intersections, labeled = calc_intersections(segments, finded_segments)
+    return intersections == labeled
+
+
+def compress_segments(segments):
+    result = []
+    for segment in segments:
+        if len(result) == 0 or result[len(result) - 1][1] < segment[0]:
+            result.append(segment)
+        else:
+            result[len(result) - 1] = (result[len(result) - 1][0], segment[1])
+    return result
+
+
+class StepDetector:
+    def __init__(self, preset):
+        self.preset = preset
+        self.mean = None
+        self.window_size = None
+        self.corr_max = None
+        self.threshold = None
+        self.segments = []
+
+    def fit(self, dataframe, segments, contamination=0.01):
+        array = dataframe['value'].as_matrix()
+        self.mean = array.mean()
+        self.segments = segments
+
+        norm_data = (array - self.mean)
+
+        self.__optimize(norm_data, segments, contamination)
+
+        # print(self.threshold)
+
+        # import matplotlib.pyplot as plt
+        # fig, ax = plt.subplots(figsize=[18, 16])
+        # ax = fig.add_subplot(2, 1, 1)
+        # ax.plot(array)
+        # ax = fig.add_subplot(2, 1, 2, sharex=ax)
+        # ax.plot(corr_res)
+        # plt.show()
+
+        # #print(R.size)
+        # # Nw = 20
+        # # result = R[Nw,Nw:-1]
+        # # result[0] = 0
+        # #ax.plot(result)
+        # #print(len(data))
+        # #print(len(R))
+        #
+        # print(self.window_size)
+        # print(self.threshold)
+
+    def predict(self, dataframe):
+        array = dataframe['value'].as_matrix()
+
+        norm_data = (array - self.mean)
+
+        step_size = self.window_size // 2
+        pattern = np.concatenate([[-1] * step_size, [1] * step_size])
+        corr_res = np.correlate(norm_data, pattern, mode='valid') / self.window_size
+        corr_res = np.concatenate((np.zeros(step_size), corr_res, np.zeros(step_size)))
+
+        corr_res /= self.corr_max
+
+        result = self.__predict(corr_res, self.threshold)
+
+        # import matplotlib.pyplot as plt
+        # fig, ax = plt.subplots(figsize=[18, 16])
+        # ax = fig.add_subplot(2, 1, 1)
+        # ax.plot(array[:70000])
+        # ax = fig.add_subplot(2, 1, 2, sharex=ax)
+        # ax.plot(corr_res[:70000])
+        # plt.show()
+
+        result.sort()
+        result = compress_segments(result)
+
+        if len(self.segments) > 0:
+            result = [segment for segment in result if not is_intersect(segment, self.segments)]
+        return result
+
+    def __optimize(self, data, segments, contamination):
+        window_size = 10
+        mincost = None
+        while window_size < 100:
+            # print(window_size)
+            cost = self.__optimize_threshold(data, window_size, segments, contamination)
+            if mincost is None or cost < mincost:
+                mincost = cost
+                self.window_size = window_size
+            window_size = int(window_size * 1.2)
+        self.__optimize_threshold(data, self.window_size, segments, contamination)
+
+    def __optimize_threshold(self, data, window_size, segments, contamination):
+        step_size = window_size // 2
+        pattern = np.concatenate([[-1] * step_size, [1] * step_size])
+        corr_res = np.correlate(data, pattern, mode='same') / window_size
+        corr_res = np.concatenate((np.zeros(step_size), corr_res, np.zeros(step_size)))
+        self.corr_max = corr_res.max()
+        corr_res /= self.corr_max
+        N = 20
+        lower = 0.
+        upper = 1.
+        cost = 0
+        for i in range(0, N):
+            self.threshold = 0.5 * (lower + upper)
+            result = self.__predict(corr_res, self.threshold)
+
+            if len(segments) > 0:
+                intersections, labeled = calc_intersections(segments, result)
+                good = intersections == labeled
+                cost = len(result)
+            else:
+                total_sum = 0
+                for segment in result:
+                    total_sum += (segment[1] - segment[0])
+                good = total_sum > len(data) * contamination
+                cost = -self.threshold
+
+            if good:
+                lower = self.threshold
+            else:
+                upper = self.threshold
+
+        return cost
+
+    def __predict(self, data, threshold):
+        segments = find_segments(data, threshold)
+        segments += find_segments(data * -1, threshold)
+        #segments -= 1
+        return [(x - 1, y - 1) for (x, y) in segments]
+
+    def save(self, model_filename):
+        with open(model_filename, 'wb') as file:
+            pickle.dump((self.mean, self.window_size, self.corr_max, self.threshold), file)
+
+    def load(self, model_filename):
+        try:
+            with open(model_filename, 'rb') as file:
+                self.mean, self.window_size, self.corr_max, self.threshold = pickle.load(file)
+        except:
+            pass
--- a/src/supervised_algorithm.py
+++ b/src/supervised_algorithm.py
@ -0,0 +1,71 @@
+import pickle
+from tsfresh.transformers.feature_selector import FeatureSelector
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.ensemble import IsolationForest
+import pandas as pd
+
+from sklearn import svm
+
+
+class supervised_algorithm(object):
+    frame_size = 16
+    good_features = [
+        #"value__agg_linear_trend__f_agg_\"max\"__chunk_len_5__attr_\"intercept\"",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_10",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_20",
+        # "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_8__w_20",
+        # "value__fft_coefficient__coeff_3__attr_\"abs\"",
+        "time_of_day_column_x",
+        "time_of_day_column_y",
+        "value__abs_energy",
+        # "value__absolute_sum_of_changes",
+        # "value__sum_of_reoccurring_data_points",
+    ]
+    clf = None
+    scaler = None
+
+    def __init__(self):
+        self.features = []
+        self.col_to_max, self.col_to_min, self.col_to_median = None, None, None
+        self.augmented_path = None
+
+    def fit(self, dataset, contamination=0.005):
+        dataset = dataset[self.good_features]
+        dataset = dataset[-100000:]
+
+        self.scaler = MinMaxScaler(feature_range=(-1, 1))
+        # self.clf = svm.OneClassSVM(nu=contamination, kernel="rbf", gamma=0.1)
+        self.clf = IsolationForest(contamination=contamination)
+
+        self.scaler.fit(dataset)
+
+        dataset = self.scaler.transform(dataset)
+        self.clf.fit(dataset)
+
+    def predict(self, dataframe):
+        dataset = dataframe[self.good_features]
+        dataset = self.scaler.transform(dataset)
+        prediction = self.clf.predict(dataset)
+
+        # for i in range(len(dataset)):
+        #     print(str(dataset[i]) + " " + str(prediction[i]))
+
+        prediction = [x < 0.0 for x in prediction]
+        return pd.Series(prediction, index=dataframe.index)
+
+    def save(self, model_filename):
+        with open(model_filename, 'wb') as file:
+            pickle.dump((self.clf, self.scaler), file)
+
+    def load(self, model_filename):
+        with open(model_filename, 'rb') as file:
+            self.clf, self.scaler = pickle.load(file)
+
+    def __select_features(self, x, y):
+        # feature_selector = FeatureSelector()
+        feature_selector = FeatureSelector()
+
+        feature_selector.fit(x, y)
+        return feature_selector.relevant_features
--- a/src/worker.py
+++ b/src/worker.py
@ -0,0 +1,131 @@
+from anomaly_model import AnomalyModel
+from pattern_detection_model import PatternDetectionModel
+import queue
+import threading
+import json
+import logging
+import sys
+import traceback
+
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                    filename='analytic_toolset.log',
+                    filemode='a')
+logger = logging.getLogger('analytic_toolset')
+
+
+class worker(object):
+    models_cache = {}
+    thread = None
+    queue = queue.Queue()
+
+    def start(self):
+        self.thread = threading.Thread(target=self.run)
+        self.thread.start()
+
+    def stop(self):
+        if self.thread:
+            self.queue.put(None)
+            self.thread.join()
+
+    def run(self):
+        while True:
+            task = self.queue.get()
+            if task['type'] == "stop":
+                break
+            self.do_task(task)
+            self.queue.task_done()
+
+    def add_task(self, task):
+        self.queue.put(task)
+
+    def do_task(self, task):
+        try:
+            type = task['type']
+            anomaly_id = task['anomaly_id']
+            if type == "predict":
+                last_prediction_time = task['last_prediction_time']
+                analytics_type = task['analytics_type']
+                preset = None
+                if "preset" in task:
+                    preset = task['preset']
+                result = self.do_predict(anomaly_id, last_prediction_time, analytics_type, preset)
+            elif type == "learn":
+                segments = task['segments']
+                analytics_type = task['analytics_type']
+                preset = None
+                if "preset" in task:
+                    preset = task['preset']
+                result = self.do_learn(anomaly_id, segments, analytics_type, preset)
+            else:
+                result = {
+                    'status': "failed",
+                    'error': "unknown type " + str(type)
+                }
+        except Exception as e:
+            #traceback.extract_stack()
+            error_text = traceback.format_exc()
+            logger.error("Exception: '%s'" % error_text)
+            result = {
+                'task': type,
+                'status': "failed",
+                'anomaly_id': anomaly_id,
+                'error': str(e)
+            }
+        return result
+
+    def do_learn(self, anomaly_id, segments, analytics_type, preset=None):
+        model = self.get_model(anomaly_id, analytics_type, preset)
+        model.synchronize_data()
+        last_prediction_time = model.learn(segments)
+        result = self.do_predict(anomaly_id, last_prediction_time, analytics_type, preset)
+        result['task'] = 'learn'
+        return result
+
+    def do_predict(self, anomaly_id, last_prediction_time, analytics_type, preset=None):
+        model = self.get_model(anomaly_id, analytics_type, preset)
+        model.synchronize_data()
+        segments, last_prediction_time = model.predict(last_prediction_time)
+        return {
+            'task': "predict",
+            'status': "success",
+            'anomaly_id': anomaly_id,
+            'segments': segments,
+            'last_prediction_time': last_prediction_time
+        }
+
+    def get_model(self, anomaly_id, analytics_type, preset=None):
+        if anomaly_id not in self.models_cache:
+            if analytics_type == "anomalies":
+                model = AnomalyModel(anomaly_id)
+            elif analytics_type == "patterns":
+                model = PatternDetectionModel(anomaly_id, preset)
+            self.models_cache[anomaly_id] = model
+        return self.models_cache[anomaly_id]
+
+
+if __name__ == "__main__":
+    w = worker()
+    logger.info("Worker was started")
+    while True:
+        try:
+            text = input("")
+            task = json.loads(text)
+            logger.info("Received command '%s'" % text)
+            if task['type'] == "stop":
+                logger.info("Stopping...")
+                break
+            print(json.dumps({
+                'task': task['type'],
+                'anomaly_id': task['anomaly_id'],
+                '__task_id': task['__task_id'],
+                'status': "in progress"
+            }))
+            sys.stdout.flush()
+            res = w.do_task(task)
+            res['__task_id'] = task['__task_id']
+            print(json.dumps(res))
+            sys.stdout.flush()
+        except Exception as e:
+            logger.error("Exception: '%s'" % str(e))
+