rozetko
7 years ago
37 changed files with 8040 additions and 0 deletions
@ -0,0 +1,29 @@ |
|||||||
|
# Hastic server |
||||||
|
|
||||||
|
Implementation of basic pattern recognition and unsupervised learning for anomamaly detection. |
||||||
|
|
||||||
|
Implementation of analytic unit for Hastic. |
||||||
|
see [REST API](REST.md) |
||||||
|
|
||||||
|
## Build & run |
||||||
|
|
||||||
|
### Analytic unit |
||||||
|
|
||||||
|
Python3 project |
||||||
|
|
||||||
|
``` |
||||||
|
pip3 install pandas |
||||||
|
pip3 install influxdb |
||||||
|
|
||||||
|
``` |
||||||
|
|
||||||
|
### Server |
||||||
|
|
||||||
|
Node.js project |
||||||
|
|
||||||
|
``` |
||||||
|
cd server |
||||||
|
npm install |
||||||
|
npm run build |
||||||
|
npm start |
||||||
|
``` |
@ -0,0 +1,198 @@ |
|||||||
|
# Hastic server REST API |
||||||
|
|
||||||
|
## /anomalies |
||||||
|
|
||||||
|
### Get anomalies |
||||||
|
`GET /anomalies?id=<anomaly_id>[&name=<anomaly_name>]` |
||||||
|
|
||||||
|
NOTE: `name` param is deprecated, use `id` instead |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"name": "<anomaly_name>", |
||||||
|
"metric": "<metric_id>", |
||||||
|
"status": "<str>" |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
status field can be one of: |
||||||
|
|
||||||
|
- `learning` |
||||||
|
- `ready` |
||||||
|
- `failed` |
||||||
|
|
||||||
|
### Get anomaly status |
||||||
|
`GET /anomalies/status?id=<anomaly_id>[&name=<anomaly_name>]` |
||||||
|
|
||||||
|
NOTE: `name` param is deprecated, use `id` instead |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"status": <str> |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
status field can be one of: |
||||||
|
|
||||||
|
- `learning` |
||||||
|
- `ready` |
||||||
|
- `failed` |
||||||
|
|
||||||
|
### Add anomaly |
||||||
|
|
||||||
|
`POST /anomalies` |
||||||
|
|
||||||
|
Data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"name": "cpu_utilization_supervised", |
||||||
|
"metric": { |
||||||
|
"datasource": "influx accelerometer", |
||||||
|
"targets": [ |
||||||
|
<targets> |
||||||
|
] |
||||||
|
}, |
||||||
|
"panelUrl": "http://grafana.example.com/d/oNZ35bWiz/new-dashboard-copy?panelId=2&fullscreen" |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
`targets` example: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"alias": "command", |
||||||
|
"groupBy": [], |
||||||
|
"measurement": "data", |
||||||
|
"orderByTime": "ASC", |
||||||
|
"policy": "default", |
||||||
|
"refId": "A", |
||||||
|
"resultFormat": "time_series", |
||||||
|
"select": [ |
||||||
|
[ |
||||||
|
{ |
||||||
|
"params": [ |
||||||
|
"command" |
||||||
|
], |
||||||
|
"type": "field" |
||||||
|
} |
||||||
|
] |
||||||
|
], |
||||||
|
"tags": [] |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"anomaly_id": "<anomaly_id>" |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
### Delete anpmalies |
||||||
|
`DELETE /anomalies` |
||||||
|
|
||||||
|
Data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"id": "<anomaly_id>", |
||||||
|
"name": "<anomaly_name>" // deprecated, use id instead |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
Success |
||||||
|
``` |
||||||
|
|
||||||
|
## /segments |
||||||
|
|
||||||
|
### Get segments |
||||||
|
`GET /segments?anomaly_id=<anomaly_id>[&last_segment=<id>][&from=<time_from>][&to=<time_to>]` |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"segments": [ |
||||||
|
{ |
||||||
|
"id": 0, |
||||||
|
"start": 1392765184318, |
||||||
|
"finish": 1397243699000, |
||||||
|
"labeled": true |
||||||
|
}, |
||||||
|
... |
||||||
|
] |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
### Update segments |
||||||
|
|
||||||
|
`PATCH /segments` |
||||||
|
|
||||||
|
Data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"anomaly_id": "<anomaly_id>", |
||||||
|
"name": "<anomaly_name>", // deprecated, use id instead |
||||||
|
"added_segments": [ |
||||||
|
{ |
||||||
|
"start": 1397164656000, |
||||||
|
"finish": 1397243699000 |
||||||
|
}, |
||||||
|
... |
||||||
|
], |
||||||
|
"removed_segments": [3, 9] |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"added_ids": [12, ...] |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
## /alerts |
||||||
|
|
||||||
|
### Check if alert is enabled for anomaly |
||||||
|
|
||||||
|
`GET /alerts?anomaly_id=<anomaly_id>` |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"enable": true |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
### Enable / disable alert for anomaly |
||||||
|
|
||||||
|
`POST /alerts` |
||||||
|
|
||||||
|
Data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"anomaly_id": "<anomaly_id>", |
||||||
|
"enable": true |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
Return data format: |
||||||
|
|
||||||
|
``` |
||||||
|
{ |
||||||
|
"status": "Ok" |
||||||
|
} |
||||||
|
``` |
@ -0,0 +1,27 @@ |
|||||||
|
# Hastic server |
||||||
|
|
||||||
|
REST server for managing data for analytics. |
||||||
|
|
||||||
|
Running on 8000 port. |
||||||
|
|
||||||
|
# Build |
||||||
|
|
||||||
|
``` |
||||||
|
npm install |
||||||
|
npm run build |
||||||
|
``` |
||||||
|
|
||||||
|
# Run |
||||||
|
|
||||||
|
``` |
||||||
|
npm start |
||||||
|
``` |
||||||
|
|
||||||
|
# Development |
||||||
|
|
||||||
|
You should have `nodemon` module installed to run development server. |
||||||
|
|
||||||
|
``` |
||||||
|
npm i -g nodemon |
||||||
|
npm run dev |
||||||
|
``` |
@ -0,0 +1,10 @@ |
|||||||
|
const { spawn } = require('child_process'); |
||||||
|
|
||||||
|
const webpack = spawn('webpack', ['--config', 'build/webpack.dev.conf.js'], { |
||||||
|
stdio: 'inherit', |
||||||
|
shell: true |
||||||
|
}); |
||||||
|
//webpack.stdout.pipe(process.stdout);
|
||||||
|
|
||||||
|
const nodemon = spawn('nodemon', ['../dist/server', '--watch', 'server.js']); |
||||||
|
nodemon.stdout.pipe(process.stdout); |
@ -0,0 +1,52 @@ |
|||||||
|
const path = require('path'); |
||||||
|
const fs = require('fs'); |
||||||
|
|
||||||
|
const webpack = require('webpack'); |
||||||
|
|
||||||
|
|
||||||
|
function resolve(p) { |
||||||
|
return path.join(__dirname, '/../', p); |
||||||
|
} |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
target: 'node', |
||||||
|
node: { |
||||||
|
__dirname: false, |
||||||
|
__filename: false, |
||||||
|
}, |
||||||
|
context: resolve('./src'), |
||||||
|
entry: './index', |
||||||
|
devtool: 'inline-source-map', |
||||||
|
output: { |
||||||
|
filename: "server.js", |
||||||
|
path: resolve('dist') |
||||||
|
}, |
||||||
|
externals: [ |
||||||
|
function(context, request, callback) { |
||||||
|
if(request[0] == '.') { |
||||||
|
callback(); |
||||||
|
} else { |
||||||
|
callback(null, "require('" + request + "')"); |
||||||
|
} |
||||||
|
} |
||||||
|
], |
||||||
|
plugins: [ |
||||||
|
new webpack.optimize.OccurrenceOrderPlugin(), |
||||||
|
new webpack.HotModuleReplacementPlugin(), |
||||||
|
new webpack.DefinePlugin({ |
||||||
|
'process.env.NODE_ENV': JSON.stringify('development') |
||||||
|
}) |
||||||
|
], |
||||||
|
resolve: { |
||||||
|
extensions: [".ts", ".js"] |
||||||
|
}, |
||||||
|
module: { |
||||||
|
rules: [ |
||||||
|
{ |
||||||
|
test: /\.ts$/, |
||||||
|
loader: "ts-loader", |
||||||
|
exclude: /node_modules/ |
||||||
|
} |
||||||
|
] |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,4 @@ |
|||||||
|
var base = require('./webpack.base.conf'); |
||||||
|
|
||||||
|
base.watch = true; |
||||||
|
module.exports = base; |
@ -0,0 +1,3 @@ |
|||||||
|
var base = require('./webpack.base.conf'); |
||||||
|
|
||||||
|
module.exports = base; |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,32 @@ |
|||||||
|
{ |
||||||
|
"name": "hastic-server", |
||||||
|
"version": "1.0.0", |
||||||
|
"description": "REST server for managing data for analytics", |
||||||
|
"scripts": { |
||||||
|
"start": "node dist/server.js", |
||||||
|
"dev": "node build/dev-server.js", |
||||||
|
"build": "webpack --config build/webpack.prod.conf.js" |
||||||
|
}, |
||||||
|
"repository": { |
||||||
|
"type": "git", |
||||||
|
"url": "git+https://github.com/hastic/hastic-server.git" |
||||||
|
}, |
||||||
|
"author": "CorpGlory", |
||||||
|
"license": "ISC", |
||||||
|
"bugs": { |
||||||
|
"url": "https://github.com/hastic/hastic-server/issues" |
||||||
|
}, |
||||||
|
"homepage": "https://github.com/hastic/hastic-server#readme", |
||||||
|
"dependencies": { |
||||||
|
"express": "^4.16.3", |
||||||
|
"fast-csv": "^2.4.1", |
||||||
|
"telegraf": "^3.21.0" |
||||||
|
}, |
||||||
|
"devDependencies": { |
||||||
|
"@types/express": "^4.11.1", |
||||||
|
"nodemon": "^1.17.3", |
||||||
|
"ts-loader": "^3.5.0", |
||||||
|
"typescript": "^2.8.3", |
||||||
|
"webpack": "^3.5.6" |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,9 @@ |
|||||||
|
import * as path from 'path'; |
||||||
|
|
||||||
|
const DATA_PATH = path.join(__dirname, '../data'); |
||||||
|
const ANALYTICS_PATH = path.join(__dirname, '../../src'); |
||||||
|
const ANOMALIES_PATH = path.join(ANALYTICS_PATH, 'anomalies'); |
||||||
|
const SEGMENTS_PATH = path.join(ANALYTICS_PATH, 'segments'); |
||||||
|
const METRICS_PATH = path.join(ANALYTICS_PATH, 'metrics'); |
||||||
|
|
||||||
|
export { DATA_PATH, ANALYTICS_PATH, ANOMALIES_PATH, SEGMENTS_PATH, METRICS_PATH } |
@ -0,0 +1,31 @@ |
|||||||
|
import * as express from 'express'; |
||||||
|
import * as bodyParser from 'body-parser'; |
||||||
|
|
||||||
|
import { router as anomaliesRouter } from './routes/anomalies'; |
||||||
|
import { router as segmentsRouter } from './routes/segments'; |
||||||
|
import { router as alertsRouter } from './routes/alerts'; |
||||||
|
import { tgBotInit } from './services/notification'; |
||||||
|
|
||||||
|
const app = express(); |
||||||
|
const PORT = 8000; |
||||||
|
|
||||||
|
app.use(bodyParser.json()); |
||||||
|
app.use(bodyParser.urlencoded({ extended: true })); |
||||||
|
|
||||||
|
app.use(function (req, res, next) { |
||||||
|
res.header('Access-Control-Allow-Origin', '*'); |
||||||
|
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, PATCH, OPTIONS'); |
||||||
|
res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept'); |
||||||
|
next(); |
||||||
|
}); |
||||||
|
|
||||||
|
app.use('/anomalies', anomaliesRouter); |
||||||
|
app.use('/segments', segmentsRouter); |
||||||
|
app.use('/alerts', alertsRouter); |
||||||
|
app.use('/', (req, res) => { res.send('Analytic unit works') }); |
||||||
|
|
||||||
|
app.listen(PORT, () => { |
||||||
|
console.log(`Server is running on :${PORT}`) |
||||||
|
}); |
||||||
|
|
||||||
|
tgBotInit(); |
@ -0,0 +1,62 @@ |
|||||||
|
import * as express from 'express'; |
||||||
|
import {AnomalyId, getAnomalyIdByName, loadAnomalyById} from '../services/anomalyType'; |
||||||
|
import { getAlertsAnomalies, saveAlertsAnomalies } from '../services/alerts'; |
||||||
|
|
||||||
|
function getAlert(req, res) { |
||||||
|
try { |
||||||
|
let anomalyId: AnomalyId = req.query.anomaly_id; |
||||||
|
let anomaly = loadAnomalyById(anomalyId) |
||||||
|
if (anomaly == null) { |
||||||
|
anomalyId = getAnomalyIdByName(anomalyId.toLowerCase()); |
||||||
|
} |
||||||
|
|
||||||
|
let alertsAnomalies = getAlertsAnomalies(); |
||||||
|
let pos = alertsAnomalies.indexOf(anomalyId); |
||||||
|
|
||||||
|
let enable: boolean = (pos !== -1); |
||||||
|
res.status(200).send({ |
||||||
|
enable |
||||||
|
}); |
||||||
|
} catch(e) { |
||||||
|
res.status(500).send({ |
||||||
|
code: 500, |
||||||
|
message: 'Internal error' |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function changeAlert(req, res) { |
||||||
|
try { |
||||||
|
let anomalyId: AnomalyId = req.body.anomaly_id; |
||||||
|
let enable: boolean = req.body.enable; |
||||||
|
|
||||||
|
let anomaly = loadAnomalyById(anomalyId) |
||||||
|
if (anomaly == null) { |
||||||
|
anomalyId = getAnomalyIdByName(anomalyId.toLowerCase()); |
||||||
|
} |
||||||
|
|
||||||
|
let alertsAnomalies = getAlertsAnomalies(); |
||||||
|
let pos: number = alertsAnomalies.indexOf(anomalyId); |
||||||
|
if(enable && pos == -1) { |
||||||
|
alertsAnomalies.push(anomalyId); |
||||||
|
saveAlertsAnomalies(alertsAnomalies); |
||||||
|
} else if(!enable && pos > -1) { |
||||||
|
alertsAnomalies.splice(pos, 1); |
||||||
|
saveAlertsAnomalies(alertsAnomalies); |
||||||
|
} |
||||||
|
res.status(200).send({ |
||||||
|
status: 'Ok' |
||||||
|
}); |
||||||
|
} catch(e) { |
||||||
|
res.status(500).send({ |
||||||
|
code: 500, |
||||||
|
message: 'Internal error' |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
export const router = express.Router(); |
||||||
|
|
||||||
|
router.get('/', getAlert); |
||||||
|
router.post('/', changeAlert); |
||||||
|
|
@ -0,0 +1,136 @@ |
|||||||
|
import * as express from 'express'; |
||||||
|
|
||||||
|
import { |
||||||
|
Metric, |
||||||
|
Anomaly, |
||||||
|
saveAnomaly, |
||||||
|
insertAnomaly, removeAnomaly, loadAnomalyByName, loadAnomalyById, getAnomalyIdByName |
||||||
|
} from '../services/anomalyType'; |
||||||
|
import { runLearning } from '../services/analytics' |
||||||
|
import { saveTargets } from '../services/metrics'; |
||||||
|
|
||||||
|
async function sendAnomalyTypeStatus(req, res) { |
||||||
|
let id = req.query.id; |
||||||
|
let name = req.query.name; |
||||||
|
try { |
||||||
|
let anomaly: Anomaly; |
||||||
|
if(id !== undefined) { |
||||||
|
anomaly = loadAnomalyById(id); |
||||||
|
} else { |
||||||
|
anomaly = loadAnomalyByName(name); |
||||||
|
} |
||||||
|
if(anomaly === null) { |
||||||
|
res.status(404).send({ |
||||||
|
code: 404, |
||||||
|
message: 'Not found' |
||||||
|
}); |
||||||
|
return; |
||||||
|
} |
||||||
|
if(anomaly.status === undefined) { |
||||||
|
throw new Error('No status for ' + name); |
||||||
|
} |
||||||
|
res.status(200).send({ status: anomaly.status }); |
||||||
|
} catch(e) { |
||||||
|
console.error(e); |
||||||
|
// TODO: better send 404 when we know than isn`t found
|
||||||
|
res.status(500).send({ error: 'Can`t return anything' }); |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
async function getAnomaly(req, res) { |
||||||
|
try { |
||||||
|
let id = req.query.id; |
||||||
|
let name = req.query.name; |
||||||
|
|
||||||
|
let anomaly:Anomaly; |
||||||
|
if(id !== undefined) { |
||||||
|
anomaly = loadAnomalyById(id); |
||||||
|
} else { |
||||||
|
anomaly = loadAnomalyByName(name.toLowerCase()); |
||||||
|
} |
||||||
|
if(anomaly === null) { |
||||||
|
res.status(404).send({ |
||||||
|
code: 404, |
||||||
|
message: 'Not found' |
||||||
|
}); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
let payload = JSON.stringify({ |
||||||
|
name: anomaly.name, |
||||||
|
metric: anomaly.metric, |
||||||
|
status: anomaly.status |
||||||
|
}); |
||||||
|
res.status(200).send(payload) |
||||||
|
} catch(e) { |
||||||
|
console.error(e); |
||||||
|
// TODO: better send 404 when we know than isn`t found
|
||||||
|
res.status(500).send('Can`t get anything'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
async function createAnomaly(req, res) { |
||||||
|
try { |
||||||
|
const metric:Metric = { |
||||||
|
datasource: req.body.metric.datasource, |
||||||
|
targets: saveTargets(req.body.metric.targets) |
||||||
|
}; |
||||||
|
|
||||||
|
const anomaly:Anomaly = { |
||||||
|
name: req.body.name, |
||||||
|
panelUrl: req.body.panelUrl, |
||||||
|
metric: metric, |
||||||
|
status: 'learning', |
||||||
|
last_prediction_time: 0, |
||||||
|
next_id: 0 |
||||||
|
}; |
||||||
|
let anomalyId = insertAnomaly(anomaly); |
||||||
|
if(anomalyId === null) { |
||||||
|
res.status(403).send({ |
||||||
|
code: 403, |
||||||
|
message: 'Already exists' |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
let payload = JSON.stringify({ anomaly_id: anomalyId }) |
||||||
|
res.status(200).send(payload); |
||||||
|
|
||||||
|
runLearning(anomalyId); |
||||||
|
} catch(e) { |
||||||
|
res.status(500).send({ |
||||||
|
code: 500, |
||||||
|
message: 'Internal error' |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function deleteAnomaly(req, res) { |
||||||
|
try { |
||||||
|
let id = req.query.id; |
||||||
|
let name = req.query.name; |
||||||
|
|
||||||
|
if(id !== undefined) { |
||||||
|
removeAnomaly(id); |
||||||
|
} else { |
||||||
|
removeAnomaly(name.toLowerCase()); |
||||||
|
} |
||||||
|
|
||||||
|
res.status(200).send({ |
||||||
|
code: 200, |
||||||
|
message: 'Success' |
||||||
|
}); |
||||||
|
} catch(e) { |
||||||
|
res.status(500).send({ |
||||||
|
code: 500, |
||||||
|
message: 'Internal error' |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
export const router = express.Router(); |
||||||
|
|
||||||
|
router.get('/status', sendAnomalyTypeStatus); |
||||||
|
router.get('/', getAnomaly); |
||||||
|
router.post('/', createAnomaly); |
||||||
|
router.delete('/', deleteAnomaly); |
@ -0,0 +1,80 @@ |
|||||||
|
import * as express from 'express'; |
||||||
|
import { |
||||||
|
getLabeledSegments, |
||||||
|
insertSegments, |
||||||
|
removeSegments, |
||||||
|
} from '../services/segments'; |
||||||
|
import {runLearning} from '../services/analytics'; |
||||||
|
import {Anomaly, AnomalyId, getAnomalyIdByName, loadAnomalyById} from '../services/anomalyType'; |
||||||
|
|
||||||
|
|
||||||
|
async function sendSegments(req, res) { |
||||||
|
try { |
||||||
|
let anomalyId: AnomalyId = req.query.anomaly_id; |
||||||
|
let anomaly:Anomaly = loadAnomalyById(anomalyId); |
||||||
|
if(anomaly === null) { |
||||||
|
anomalyId = getAnomalyIdByName(anomalyId); |
||||||
|
} |
||||||
|
|
||||||
|
let lastSegmentId = req.query.last_segment; |
||||||
|
let timeFrom = req.query.from; |
||||||
|
let timeTo = req.query.to; |
||||||
|
|
||||||
|
let segments = getLabeledSegments(anomalyId); |
||||||
|
|
||||||
|
// Id filtering
|
||||||
|
if(lastSegmentId !== undefined) { |
||||||
|
segments = segments.filter(el => el.id > lastSegmentId); |
||||||
|
} |
||||||
|
|
||||||
|
// Time filtering
|
||||||
|
if(timeFrom !== undefined) { |
||||||
|
segments = segments.filter(el => el.finish > timeFrom); |
||||||
|
} |
||||||
|
|
||||||
|
if(timeTo !== undefined) { |
||||||
|
segments = segments.filter(el => el.start < timeTo); |
||||||
|
} |
||||||
|
|
||||||
|
let payload = JSON.stringify({ |
||||||
|
segments |
||||||
|
}); |
||||||
|
res.status(200).send(payload); |
||||||
|
} catch(e) { |
||||||
|
res.status(500).send({ |
||||||
|
code: 500, |
||||||
|
message: 'Internal error' |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
async function updateSegments(req, res) { |
||||||
|
try { |
||||||
|
let segmentsUpdate = req.body; |
||||||
|
|
||||||
|
let anomalyId = segmentsUpdate.anomaly_id; |
||||||
|
let anomalyName = segmentsUpdate.name; |
||||||
|
|
||||||
|
if(anomalyId === undefined) { |
||||||
|
anomalyId = getAnomalyIdByName(anomalyName.toLowerCase()); |
||||||
|
} |
||||||
|
|
||||||
|
let addedIds = insertSegments(anomalyId, segmentsUpdate.added_segments, true); |
||||||
|
removeSegments(anomalyId, segmentsUpdate.removed_segments); |
||||||
|
|
||||||
|
let payload = JSON.stringify({ added_ids: addedIds }); |
||||||
|
res.status(200).send(payload); |
||||||
|
|
||||||
|
runLearning(anomalyId); |
||||||
|
} catch(e) { |
||||||
|
res.status(500).send({ |
||||||
|
code: 500, |
||||||
|
message: 'Internal error' |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
export const router = express.Router(); |
||||||
|
|
||||||
|
router.get('/', sendSegments); |
||||||
|
router.patch('/', updateSegments); |
@ -0,0 +1,58 @@ |
|||||||
|
import { getJsonDataSync, writeJsonDataSync } from './json'; |
||||||
|
import * as path from 'path'; |
||||||
|
import { AnomalyId } from './anomalyType'; |
||||||
|
import { ANOMALIES_PATH } from '../config'; |
||||||
|
import { runPredict } from './analytics'; |
||||||
|
import { sendNotification } from './notification'; |
||||||
|
import { getLabeledSegments } from './segments'; |
||||||
|
|
||||||
|
function getAlertsAnomalies() : AnomalyId[] { |
||||||
|
return getJsonDataSync(path.join(ANOMALIES_PATH, `alerts_anomalies.json`)); |
||||||
|
} |
||||||
|
|
||||||
|
function saveAlertsAnomalies(anomalies: AnomalyId[]) { |
||||||
|
return writeJsonDataSync(path.join(ANOMALIES_PATH, `alerts_anomalies.json`), anomalies); |
||||||
|
} |
||||||
|
|
||||||
|
function processAlerts(anomalyId) { |
||||||
|
let segments = getLabeledSegments(anomalyId); |
||||||
|
|
||||||
|
const currentTime = new Date().getTime(); |
||||||
|
const activeAlert = activeAlerts.has(anomalyId); |
||||||
|
let newActiveAlert = false; |
||||||
|
|
||||||
|
if(segments.length > 0) { |
||||||
|
let lastSegment = segments[segments.length - 1]; |
||||||
|
if(lastSegment.finish >= currentTime - alertTimeout) { |
||||||
|
newActiveAlert = true; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if(!activeAlert && newActiveAlert) { |
||||||
|
activeAlerts.add(anomalyId); |
||||||
|
sendNotification(anomalyId, true); |
||||||
|
} else if(activeAlert && !newActiveAlert) { |
||||||
|
activeAlerts.delete(anomalyId); |
||||||
|
sendNotification(anomalyId, false); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
async function alertsTick() { |
||||||
|
let alertsAnomalies = getAlertsAnomalies(); |
||||||
|
for (let anomalyId of alertsAnomalies) { |
||||||
|
try { |
||||||
|
await runPredict(anomalyId); |
||||||
|
processAlerts(anomalyId); |
||||||
|
} catch (e) { |
||||||
|
console.error(e); |
||||||
|
} |
||||||
|
} |
||||||
|
setTimeout(alertsTick, 5000); |
||||||
|
} |
||||||
|
|
||||||
|
const alertTimeout = 60000; // ms
|
||||||
|
const activeAlerts = new Set<string>(); |
||||||
|
setTimeout(alertsTick, 5000); |
||||||
|
|
||||||
|
|
||||||
|
export { getAlertsAnomalies, saveAlertsAnomalies } |
@ -0,0 +1,141 @@ |
|||||||
|
import { spawn } from 'child_process' |
||||||
|
import { ANALYTICS_PATH } from '../config' |
||||||
|
import { |
||||||
|
Anomaly, |
||||||
|
AnomalyId, getAnomalyTypeInfo, |
||||||
|
loadAnomalyById, |
||||||
|
setAnomalyPredictionTime, |
||||||
|
setAnomalyStatus |
||||||
|
} from './anomalyType' |
||||||
|
import { getTarget } from './metrics'; |
||||||
|
import { getLabeledSegments, insertSegments, removeSegments } from './segments'; |
||||||
|
import { split, map, mapSync } from 'event-stream' |
||||||
|
|
||||||
|
const learnWorker = spawn('python3', ['worker.py'], { cwd: ANALYTICS_PATH }) |
||||||
|
learnWorker.stdout.pipe(split()) |
||||||
|
.pipe( |
||||||
|
mapSync(function(line){ |
||||||
|
console.log(line) |
||||||
|
onMessage(line) |
||||||
|
}) |
||||||
|
); |
||||||
|
|
||||||
|
learnWorker.stderr.on('data', data => console.error(`worker stderr: ${data}`)); |
||||||
|
|
||||||
|
const taskMap = {}; |
||||||
|
let nextTaskId = 0; |
||||||
|
|
||||||
|
function onMessage(data) { |
||||||
|
let response = JSON.parse(data); |
||||||
|
let taskId = response.__task_id; |
||||||
|
// let anomalyName = response.anomaly_name;
|
||||||
|
// let task = response.task;
|
||||||
|
let status = response.status; |
||||||
|
|
||||||
|
if(status === 'success' || status === 'failed') { |
||||||
|
if(taskId in taskMap) { |
||||||
|
let resolver = taskMap[taskId]; |
||||||
|
resolver(response); |
||||||
|
delete taskMap[taskId]; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function runTask(task) : Promise<any> { |
||||||
|
let anomaly:Anomaly = loadAnomalyById(task.anomaly_id); |
||||||
|
task.metric = { |
||||||
|
datasource: anomaly.metric.datasource, |
||||||
|
targets: anomaly.metric.targets.map(t => getTarget(t)) |
||||||
|
}; |
||||||
|
|
||||||
|
task.__task_id = nextTaskId++; |
||||||
|
let command = JSON.stringify(task) |
||||||
|
learnWorker.stdin.write(`${command}\n`); |
||||||
|
return new Promise<Object>((resolve, reject) => { |
||||||
|
taskMap[task.__task_id] = resolve |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
async function runLearning(anomalyId:AnomalyId) { |
||||||
|
let segments = getLabeledSegments(anomalyId); |
||||||
|
setAnomalyStatus(anomalyId, 'learning'); |
||||||
|
let anomaly:Anomaly = loadAnomalyById(anomalyId); |
||||||
|
let analyticsType = "anomalies"; |
||||||
|
let preset = undefined; |
||||||
|
if (anomaly.name.includes("jumps")) { |
||||||
|
analyticsType = "patterns"; |
||||||
|
preset = "steps" |
||||||
|
} |
||||||
|
if (anomaly.name.includes("cliffs") || anomaly.name.includes("drops")) { |
||||||
|
analyticsType = "patterns"; |
||||||
|
preset = "cliffs" |
||||||
|
} |
||||||
|
if (anomaly.name.includes("peaks")) { |
||||||
|
analyticsType = "patterns"; |
||||||
|
preset = "peaks" |
||||||
|
} |
||||||
|
let task = { |
||||||
|
type: 'learn', |
||||||
|
anomaly_id: anomalyId, |
||||||
|
analytics_type: analyticsType, |
||||||
|
preset, |
||||||
|
segments: segments |
||||||
|
}; |
||||||
|
|
||||||
|
let result = await runTask(task); |
||||||
|
|
||||||
|
if (result.status === 'success') { |
||||||
|
setAnomalyStatus(anomalyId, 'ready'); |
||||||
|
insertSegments(anomalyId, result.segments, false); |
||||||
|
setAnomalyPredictionTime(anomalyId, result.last_prediction_time); |
||||||
|
} else { |
||||||
|
setAnomalyStatus(anomalyId, 'failed'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
async function runPredict(anomalyId:AnomalyId) { |
||||||
|
let anomaly:Anomaly = loadAnomalyById(anomalyId); |
||||||
|
let analyticsType = "anomalies"; |
||||||
|
let preset = undefined; |
||||||
|
if (anomaly.name.includes("jump")) { |
||||||
|
analyticsType = "patterns"; |
||||||
|
preset = "steps" |
||||||
|
} |
||||||
|
if (anomaly.name.includes("cliffs") || anomaly.name.includes("drops")) { |
||||||
|
analyticsType = "patterns"; |
||||||
|
preset = "cliffs" |
||||||
|
} |
||||||
|
if (anomaly.name.includes("peaks")) { |
||||||
|
analyticsType = "patterns"; |
||||||
|
preset = "peaks" |
||||||
|
} |
||||||
|
let task = { |
||||||
|
type: 'predict', |
||||||
|
anomaly_id: anomalyId, |
||||||
|
analytics_type: analyticsType, |
||||||
|
preset, |
||||||
|
last_prediction_time: anomaly.last_prediction_time |
||||||
|
}; |
||||||
|
let result = await runTask(task); |
||||||
|
|
||||||
|
if(result.status === 'failed') { |
||||||
|
return []; |
||||||
|
} |
||||||
|
// Merging segments
|
||||||
|
let segments = getLabeledSegments(anomalyId); |
||||||
|
if(segments.length > 0 && result.segments.length > 0) { |
||||||
|
let lastOldSegment = segments[segments.length - 1]; |
||||||
|
let firstNewSegment = result.segments[0]; |
||||||
|
|
||||||
|
if(firstNewSegment.start <= lastOldSegment.finish) { |
||||||
|
result.segments[0].start = lastOldSegment.start; |
||||||
|
removeSegments(anomalyId, [lastOldSegment.id]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
insertSegments(anomalyId, result.segments, false); |
||||||
|
setAnomalyPredictionTime(anomalyId, result.last_prediction_time); |
||||||
|
return result.segments; |
||||||
|
} |
||||||
|
|
||||||
|
export { runLearning, runPredict } |
@ -0,0 +1,117 @@ |
|||||||
|
import * as path from 'path' |
||||||
|
import { getJsonDataSync, writeJsonDataSync } from './json' |
||||||
|
import { ANOMALIES_PATH } from '../config' |
||||||
|
import * as fs from 'fs' |
||||||
|
import * as crypto from 'crypto'; |
||||||
|
|
||||||
|
export type Metric = { |
||||||
|
datasource: string, |
||||||
|
targets: string[] |
||||||
|
} |
||||||
|
|
||||||
|
export type Anomaly = { |
||||||
|
name: string, |
||||||
|
|
||||||
|
panelUrl: string, |
||||||
|
|
||||||
|
metric: Metric, |
||||||
|
status: string, |
||||||
|
|
||||||
|
last_prediction_time: number, |
||||||
|
next_id: number |
||||||
|
} |
||||||
|
|
||||||
|
export type AnomalyId = string; |
||||||
|
|
||||||
|
let anomaliesNameToIdMap = {}; |
||||||
|
|
||||||
|
function loadAnomaliesMap() { |
||||||
|
let filename = path.join(ANOMALIES_PATH, `all_anomalies.json`); |
||||||
|
anomaliesNameToIdMap = getJsonDataSync(filename); |
||||||
|
} |
||||||
|
|
||||||
|
function saveAnomaliesMap() { |
||||||
|
let filename = path.join(ANOMALIES_PATH, `all_anomalies.json`); |
||||||
|
writeJsonDataSync(filename, anomaliesNameToIdMap); |
||||||
|
} |
||||||
|
|
||||||
|
function getAnomalyIdByName(anomalyName:string) : AnomalyId { |
||||||
|
loadAnomaliesMap(); |
||||||
|
anomalyName = anomalyName.toLowerCase(); |
||||||
|
if(anomalyName in anomaliesNameToIdMap) { |
||||||
|
return anomaliesNameToIdMap[anomalyName]; |
||||||
|
} |
||||||
|
return anomalyName; |
||||||
|
} |
||||||
|
|
||||||
|
function insertAnomaly(anomaly: Anomaly) : AnomalyId { |
||||||
|
const hashString = anomaly.name + (new Date()).toString(); |
||||||
|
const anomalyId:AnomalyId = crypto.createHash('md5').update(hashString).digest('hex'); |
||||||
|
anomaliesNameToIdMap[anomaly.name] = anomalyId; |
||||||
|
saveAnomaliesMap(); |
||||||
|
// return anomalyId
|
||||||
|
// const anomalyId:AnomalyId = anomaly.name;
|
||||||
|
let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`); |
||||||
|
if(fs.existsSync(filename)) { |
||||||
|
return null; |
||||||
|
} |
||||||
|
saveAnomaly(anomalyId, anomaly); |
||||||
|
return anomalyId; |
||||||
|
} |
||||||
|
|
||||||
|
function removeAnomaly(anomalyId:AnomalyId) { |
||||||
|
let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`); |
||||||
|
fs.unlinkSync(filename); |
||||||
|
} |
||||||
|
|
||||||
|
function saveAnomaly(anomalyId: AnomalyId, anomaly: Anomaly) { |
||||||
|
let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`); |
||||||
|
return writeJsonDataSync(filename, anomaly); |
||||||
|
} |
||||||
|
|
||||||
|
function loadAnomalyById(anomalyId: AnomalyId) : Anomaly { |
||||||
|
let filename = path.join(ANOMALIES_PATH, `${anomalyId}.json`); |
||||||
|
if(!fs.existsSync(filename)) { |
||||||
|
return null; |
||||||
|
} |
||||||
|
return getJsonDataSync(filename); |
||||||
|
} |
||||||
|
|
||||||
|
function loadAnomalyByName(anomalyName: string) : Anomaly { |
||||||
|
let anomalyId = getAnomalyIdByName(anomalyName); |
||||||
|
return loadAnomalyById(anomalyId); |
||||||
|
} |
||||||
|
|
||||||
|
function saveAnomalyTypeInfo(info) { |
||||||
|
console.log('Saving'); |
||||||
|
let filename = path.join(ANOMALIES_PATH, `${info.name}.json`); |
||||||
|
if(info.next_id === undefined) { |
||||||
|
info.next_id = 0; |
||||||
|
} |
||||||
|
if(info.last_prediction_time === undefined) { |
||||||
|
info.last_prediction_time = 0; |
||||||
|
} |
||||||
|
|
||||||
|
return writeJsonDataSync(filename, info); |
||||||
|
} |
||||||
|
|
||||||
|
function getAnomalyTypeInfo(name) { |
||||||
|
return getJsonDataSync(path.join(ANOMALIES_PATH, `${name}.json`)); |
||||||
|
} |
||||||
|
|
||||||
|
function setAnomalyStatus(anomalyId:AnomalyId, status:string) { |
||||||
|
let info = loadAnomalyById(anomalyId); |
||||||
|
info.status = status; |
||||||
|
saveAnomaly(anomalyId, info); |
||||||
|
} |
||||||
|
|
||||||
|
function setAnomalyPredictionTime(anomalyId:AnomalyId, lastPredictionTime:number) { |
||||||
|
let info = loadAnomalyById(anomalyId); |
||||||
|
info.last_prediction_time = lastPredictionTime; |
||||||
|
saveAnomaly(anomalyId, info); |
||||||
|
} |
||||||
|
|
||||||
|
export { |
||||||
|
saveAnomaly, loadAnomalyById, loadAnomalyByName, insertAnomaly, removeAnomaly, saveAnomalyTypeInfo, |
||||||
|
getAnomalyTypeInfo, getAnomalyIdByName, setAnomalyStatus, setAnomalyPredictionTime |
||||||
|
} |
@ -0,0 +1,55 @@ |
|||||||
|
import * as fs from 'fs'; |
||||||
|
|
||||||
|
async function getJsonData(filename: string): Promise<Object> { |
||||||
|
var data = await new Promise<string>((resolve, reject) => { |
||||||
|
fs.readFile(filename, 'utf8', (err, data) => { |
||||||
|
if(err) { |
||||||
|
console.error(err); |
||||||
|
reject('Can`t read file'); |
||||||
|
} else { |
||||||
|
resolve(data); |
||||||
|
} |
||||||
|
}); |
||||||
|
}); |
||||||
|
|
||||||
|
try { |
||||||
|
return JSON.parse(data); |
||||||
|
} catch(e) { |
||||||
|
console.error(e); |
||||||
|
throw new Error('Wrong file format'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function writeJsonData(filename: string, data: Object) { |
||||||
|
return new Promise((resolve, reject) => { |
||||||
|
fs.writeFile(filename, JSON.stringify(data), 'utf8', (err) => { |
||||||
|
if(err) { |
||||||
|
console.error(err); |
||||||
|
reject('Cat`t write file'); |
||||||
|
} else { |
||||||
|
resolve(); |
||||||
|
} |
||||||
|
}); |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
function getJsonDataSync(filename: string) { |
||||||
|
let data = fs.readFileSync(filename, 'utf8'); |
||||||
|
try { |
||||||
|
return JSON.parse(data); |
||||||
|
} catch(e) { |
||||||
|
console.error(e); |
||||||
|
throw new Error('Wrong file format'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function writeJsonDataSync(filename: string, data: Object) { |
||||||
|
fs.writeFileSync(filename, JSON.stringify(data)); |
||||||
|
} |
||||||
|
|
||||||
|
export { |
||||||
|
getJsonData, |
||||||
|
writeJsonData, |
||||||
|
getJsonDataSync, |
||||||
|
writeJsonDataSync |
||||||
|
} |
@ -0,0 +1,27 @@ |
|||||||
|
import * as path from 'path'; |
||||||
|
import { getJsonDataSync, writeJsonDataSync } from './json'; |
||||||
|
import { METRICS_PATH } from '../config'; |
||||||
|
import * as crypto from 'crypto'; |
||||||
|
|
||||||
|
function saveTargets(targets) { |
||||||
|
let metrics = []; |
||||||
|
for (let target of targets) { |
||||||
|
metrics.push(saveTarget(target)); |
||||||
|
} |
||||||
|
return metrics; |
||||||
|
} |
||||||
|
|
||||||
|
function saveTarget(target) { |
||||||
|
//const md5 = crypto.createHash('md5')
|
||||||
|
const targetId = crypto.createHash('md5').update(JSON.stringify(target)).digest('hex'); |
||||||
|
let filename = path.join(METRICS_PATH, `${targetId}.json`); |
||||||
|
writeJsonDataSync(filename, target); |
||||||
|
return targetId; |
||||||
|
} |
||||||
|
|
||||||
|
function getTarget(targetId) { |
||||||
|
let filename = path.join(METRICS_PATH, `${targetId}.json`); |
||||||
|
return getJsonDataSync(filename); |
||||||
|
} |
||||||
|
|
||||||
|
export { saveTargets, getTarget } |
@ -0,0 +1,140 @@ |
|||||||
|
//import * as Telegraf from 'telegraf'
|
||||||
|
import * as path from 'path'; |
||||||
|
import { DATA_PATH } from '../config'; |
||||||
|
import { getJsonDataSync, writeJsonDataSync } from './json'; |
||||||
|
import { AnomalyId } from './anomalyType'; |
||||||
|
|
||||||
|
|
||||||
|
type SubscriberId = string; |
||||||
|
type SubscribersMap = Map< AnomalyId, SubscriberId[] >; |
||||||
|
|
||||||
|
type BotConfig = { |
||||||
|
token: string, |
||||||
|
subscriptions: SubscribersMap |
||||||
|
}; |
||||||
|
|
||||||
|
function sendNotification(anomalyName, active) { |
||||||
|
console.log('Notification ' + anomalyName); |
||||||
|
if(anomalyName in botConfig.subscriptions) { |
||||||
|
let notificationMessage; |
||||||
|
if(active) { |
||||||
|
notificationMessage = 'Alert! Anomaly type ' + anomalyName; |
||||||
|
} else { |
||||||
|
notificationMessage = 'Ok! Anomaly type ' + anomalyName; |
||||||
|
} |
||||||
|
|
||||||
|
for (let SubscriberId of botConfig.subscriptions[anomalyName]) { |
||||||
|
bot.telegram.sendMessage(SubscriberId, notificationMessage); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function loadBotConfig() : BotConfig { |
||||||
|
let filename = path.join(DATA_PATH, `bot_config.json`); |
||||||
|
let jsonData; |
||||||
|
try { |
||||||
|
jsonData = getJsonDataSync(filename); |
||||||
|
} catch(e) { |
||||||
|
console.error(e.message); |
||||||
|
jsonData = []; |
||||||
|
} |
||||||
|
return jsonData; |
||||||
|
} |
||||||
|
|
||||||
|
function saveBotConfig(botConfig: BotConfig) { |
||||||
|
let filename = path.join(DATA_PATH, `bot_config.json`); |
||||||
|
try { |
||||||
|
writeJsonDataSync(filename, botConfig); |
||||||
|
} catch(e) { |
||||||
|
console.error(e.message); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
const commandArgs = (ctx, next) => { |
||||||
|
try { |
||||||
|
if(ctx.updateType === 'message') { |
||||||
|
const text = ctx.update.message.text; |
||||||
|
if(text !== undefined && text.startsWith('/')) { |
||||||
|
const match = text.match(/^\/([^\s]+)\s?(.+)?/); |
||||||
|
let args = []; |
||||||
|
let command; |
||||||
|
if(match !== null) { |
||||||
|
if(match[1]) { |
||||||
|
command = match[1]; |
||||||
|
} |
||||||
|
if(match[2]) { |
||||||
|
args = match[2].split(' '); |
||||||
|
} |
||||||
|
} |
||||||
|
ctx.state.command = { |
||||||
|
raw: text, |
||||||
|
command, |
||||||
|
args, |
||||||
|
}; |
||||||
|
} |
||||||
|
} |
||||||
|
return next(ctx); |
||||||
|
} catch (e) { |
||||||
|
|
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
function addNotification(ctx) { |
||||||
|
console.log('addNotification') |
||||||
|
let command = ctx.state.command; |
||||||
|
let chatId = ctx.chat.id; |
||||||
|
if(command.args.length > 0) { |
||||||
|
for (let anomalyName of command.args) { |
||||||
|
if(!(anomalyName in botConfig.subscriptions)) { |
||||||
|
botConfig.subscriptions[anomalyName] = [] |
||||||
|
} |
||||||
|
if(botConfig.subscriptions[anomalyName].includes(chatId)) { |
||||||
|
return ctx.reply('You are already subscribed on alerts from anomaly ' + command.args) |
||||||
|
} else { |
||||||
|
botConfig.subscriptions[anomalyName].push(chatId); |
||||||
|
saveBotConfig(botConfig); |
||||||
|
} |
||||||
|
} |
||||||
|
return ctx.reply('You have been successfully subscribed on alerts from anomaly ' + command.args) |
||||||
|
} else { |
||||||
|
return ctx.reply('You should use syntax: \/addNotification <anomaly_name>') |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function removeNotification(ctx) { |
||||||
|
let command = ctx.state.command; |
||||||
|
let chatId = ctx.chat.id; |
||||||
|
if(command.args.length > 0) { |
||||||
|
for (let anomalyName of command.args) { |
||||||
|
if(anomalyName in botConfig.subscriptions) { |
||||||
|
botConfig.subscriptions[anomalyName] = botConfig.subscriptions[anomalyName].filter(el => el !== chatId); |
||||||
|
saveBotConfig(botConfig); |
||||||
|
} |
||||||
|
} |
||||||
|
return ctx.reply('You have been successfully unsubscribed from alerts from ' + command.args); |
||||||
|
} else { |
||||||
|
return ctx.reply('You should use syntax: \/removeNotification <anomaly_name>'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
const Telegraf = require('telegraf'); |
||||||
|
let botConfig: BotConfig; |
||||||
|
let bot; |
||||||
|
|
||||||
|
function tgBotInit() { |
||||||
|
try { |
||||||
|
botConfig = loadBotConfig(); |
||||||
|
bot = new Telegraf(botConfig.token); |
||||||
|
|
||||||
|
bot.use(commandArgs); |
||||||
|
|
||||||
|
bot.command('addNotification', addNotification); |
||||||
|
bot.command('removeNotification', removeNotification); |
||||||
|
|
||||||
|
bot.startPolling(); |
||||||
|
} catch(e) { |
||||||
|
// TODO: handle exception
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
export { sendNotification, tgBotInit } |
@ -0,0 +1,75 @@ |
|||||||
|
import * as path from 'path'; |
||||||
|
import { getJsonDataSync, writeJsonDataSync } from './json'; |
||||||
|
import { SEGMENTS_PATH } from '../config'; |
||||||
|
import { AnomalyId, loadAnomalyById, saveAnomaly } from './anomalyType'; |
||||||
|
|
||||||
|
function getLabeledSegments(anomalyId: AnomalyId) { |
||||||
|
let filename = path.join(SEGMENTS_PATH, `${anomalyId}_labeled.json`); |
||||||
|
|
||||||
|
let segments = []; |
||||||
|
try { |
||||||
|
segments = getJsonDataSync(filename); |
||||||
|
for (let segment of segments) { |
||||||
|
if (segment.labeled === undefined) { |
||||||
|
segment.labeled = false; |
||||||
|
} |
||||||
|
} |
||||||
|
} catch (e) { |
||||||
|
console.error(e.message); |
||||||
|
} |
||||||
|
return segments; |
||||||
|
} |
||||||
|
|
||||||
|
function getPredictedSegments(anomalyId: AnomalyId) { |
||||||
|
let filename = path.join(SEGMENTS_PATH, `${anomalyId}_segments.json`); |
||||||
|
|
||||||
|
let jsonData; |
||||||
|
try { |
||||||
|
jsonData = getJsonDataSync(filename); |
||||||
|
} catch(e) { |
||||||
|
console.error(e.message); |
||||||
|
jsonData = []; |
||||||
|
} |
||||||
|
return jsonData; |
||||||
|
} |
||||||
|
|
||||||
|
function saveSegments(anomalyId: AnomalyId, segments) { |
||||||
|
let filename = path.join(SEGMENTS_PATH, `${anomalyId}_labeled.json`); |
||||||
|
|
||||||
|
try { |
||||||
|
return writeJsonDataSync(filename, segments); |
||||||
|
} catch(e) { |
||||||
|
console.error(e.message); |
||||||
|
throw new Error('Can`t write to db'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function insertSegments(anomalyId: AnomalyId, addedSegments, labeled:boolean) { |
||||||
|
// Set status
|
||||||
|
let info = loadAnomalyById(anomalyId); |
||||||
|
let segments = getLabeledSegments(anomalyId); |
||||||
|
|
||||||
|
let nextId = info.next_id; |
||||||
|
let addedIds = [] |
||||||
|
for (let segment of addedSegments) { |
||||||
|
segment.id = nextId; |
||||||
|
segment.labeled = labeled; |
||||||
|
addedIds.push(nextId); |
||||||
|
nextId++; |
||||||
|
segments.push(segment); |
||||||
|
} |
||||||
|
info.next_id = nextId; |
||||||
|
saveSegments(anomalyId, segments); |
||||||
|
saveAnomaly(anomalyId, info); |
||||||
|
return addedIds; |
||||||
|
} |
||||||
|
|
||||||
|
function removeSegments(anomalyId: AnomalyId, removedSegments) { |
||||||
|
let segments = getLabeledSegments(anomalyId); |
||||||
|
for (let segmentId of removedSegments) { |
||||||
|
segments = segments.filter(el => el.id !== segmentId); |
||||||
|
} |
||||||
|
saveSegments(anomalyId, segments); |
||||||
|
} |
||||||
|
|
||||||
|
export { getLabeledSegments, getPredictedSegments, saveSegments, insertSegments, removeSegments } |
@ -0,0 +1,10 @@ |
|||||||
|
{ |
||||||
|
"compilerOptions": { |
||||||
|
"outDir": "./dist/", |
||||||
|
"sourceMap": true, |
||||||
|
"noImplicitAny": false, |
||||||
|
"module": "commonjs", |
||||||
|
"target": "es2015", |
||||||
|
"allowJs": true |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,11 @@ |
|||||||
|
anomalies/ |
||||||
|
segments/ |
||||||
|
datasets/ |
||||||
|
datasources/ |
||||||
|
models/ |
||||||
|
metrics/ |
||||||
|
__pycache__/ |
||||||
|
*.pyc |
||||||
|
*.txt |
||||||
|
*.log |
||||||
|
tasks.csv |
@ -0,0 +1,5 @@ |
|||||||
|
from worker import worker |
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
w = worker() |
||||||
|
w.do_task({"type": "learn", "anomaly_name": "cpu_utilization_supervised", "segments": []}) |
@ -0,0 +1,157 @@ |
|||||||
|
import os.path |
||||||
|
from data_provider import DataProvider |
||||||
|
from data_preprocessor import data_preprocessor |
||||||
|
import json |
||||||
|
import pandas as pd |
||||||
|
import logging |
||||||
|
|
||||||
|
datasource_folder = "datasources/" |
||||||
|
dataset_folder = "datasets/" |
||||||
|
anomalies_folder = "anomalies/" |
||||||
|
models_folder = "models/" |
||||||
|
metrics_folder = "metrics/" |
||||||
|
logger = logging.getLogger('analytic_toolset') |
||||||
|
|
||||||
|
|
||||||
|
def anomalies_to_timestamp(anomalies): |
||||||
|
for anomaly in anomalies: |
||||||
|
anomaly['start'] = int(anomaly['start'].timestamp() * 1000) |
||||||
|
anomaly['finish'] = int(anomaly['finish'].timestamp() * 1000) |
||||||
|
return anomalies |
||||||
|
|
||||||
|
|
||||||
|
class AnomalyModel: |
||||||
|
|
||||||
|
def __init__(self, anomaly_name): |
||||||
|
self.anomaly_name = anomaly_name |
||||||
|
self.load_anomaly_config() |
||||||
|
|
||||||
|
datasource = self.anomaly_config['metric']['datasource'] |
||||||
|
metric_name = self.anomaly_config['metric']['targets'][0] |
||||||
|
|
||||||
|
dbconfig_filename = os.path.join(datasource_folder, datasource + ".json") |
||||||
|
target_filename = os.path.join(metrics_folder, metric_name + ".json") |
||||||
|
|
||||||
|
dataset_filename = os.path.join(dataset_folder, metric_name + ".csv") |
||||||
|
augmented_path = os.path.join(dataset_folder, metric_name + "_augmented.csv") |
||||||
|
|
||||||
|
with open(dbconfig_filename, 'r') as config_file: |
||||||
|
dbconfig = json.load(config_file) |
||||||
|
|
||||||
|
with open(target_filename, 'r') as file: |
||||||
|
target = json.load(file) |
||||||
|
|
||||||
|
self.data_prov = DataProvider(dbconfig, target, dataset_filename) |
||||||
|
self.preprocessor = data_preprocessor(self.data_prov, augmented_path) |
||||||
|
self.model = None |
||||||
|
|
||||||
|
self.__load_model() |
||||||
|
|
||||||
|
def anomalies_box(self, anomalies): |
||||||
|
max_time = 0 |
||||||
|
min_time = float("inf") |
||||||
|
for anomaly in anomalies: |
||||||
|
max_time = max(max_time, anomaly['finish']) |
||||||
|
min_time = min(min_time, anomaly['start']) |
||||||
|
min_time = pd.to_datetime(min_time, unit='ms') |
||||||
|
max_time = pd.to_datetime(max_time, unit='ms') |
||||||
|
return min_time, max_time |
||||||
|
|
||||||
|
def learn(self, anomalies): |
||||||
|
logger.info("Start to learn for anomaly_name='%s'" % self.anomaly_name) |
||||||
|
|
||||||
|
confidence = 0.02 |
||||||
|
dataframe = self.data_prov.get_dataframe() |
||||||
|
start_index, stop_index = 0, len(dataframe) |
||||||
|
if len(anomalies) > 0: |
||||||
|
confidence = 0.0 |
||||||
|
min_time, max_time = self.anomalies_box(anomalies) |
||||||
|
start_index = dataframe[dataframe['timestamp'] >= min_time].index[0] |
||||||
|
stop_index = dataframe[dataframe['timestamp'] > max_time].index[0] |
||||||
|
start_index, stop_index = self.preprocessor.expand_indexes(start_index, stop_index) |
||||||
|
dataframe = dataframe[start_index:stop_index] |
||||||
|
|
||||||
|
train_augmented = self.preprocessor.get_augmented_data( |
||||||
|
start_index, |
||||||
|
stop_index, |
||||||
|
anomalies |
||||||
|
) |
||||||
|
|
||||||
|
self.model = self.create_algorithm() |
||||||
|
self.model.fit(train_augmented, confidence) |
||||||
|
if len(anomalies) > 0: |
||||||
|
last_dataframe_time = dataframe.iloc[- 1]['timestamp'] |
||||||
|
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) |
||||||
|
else: |
||||||
|
last_prediction_time = 0 |
||||||
|
|
||||||
|
self.__save_model() |
||||||
|
logger.info("Learning is finished for anomaly_name='%s'" % self.anomaly_name) |
||||||
|
return last_prediction_time |
||||||
|
|
||||||
|
def predict(self, last_prediction_time): |
||||||
|
logger.info("Start to predict for anomaly type='%s'" % self.anomaly_name) |
||||||
|
last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms') |
||||||
|
|
||||||
|
start_index = self.data_prov.get_upper_bound(last_prediction_time) |
||||||
|
stop_index = self.data_prov.size() |
||||||
|
|
||||||
|
# last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms') |
||||||
|
# dataframe = dataframe[dataframe['timestamp'] > last_prediction_time] |
||||||
|
last_prediction_time = int(last_prediction_time.timestamp() * 1000) |
||||||
|
|
||||||
|
predicted_anomalies = [] |
||||||
|
if start_index < stop_index: |
||||||
|
max_chunk_size = 50000 |
||||||
|
predicted = pd.Series() |
||||||
|
for index in range(start_index, stop_index, max_chunk_size): |
||||||
|
chunk_start = index |
||||||
|
chunk_finish = min(index + max_chunk_size, stop_index) |
||||||
|
predict_augmented = self.preprocessor.get_augmented_data(chunk_start, chunk_finish) |
||||||
|
|
||||||
|
assert(len(predict_augmented) == chunk_finish - chunk_start) |
||||||
|
|
||||||
|
predicted_current = self.model.predict(predict_augmented) |
||||||
|
predicted = pd.concat([predicted, predicted_current]) |
||||||
|
predicted_anomalies = self.preprocessor.inverse_transform_anomalies(predicted) |
||||||
|
|
||||||
|
last_row = self.data_prov.get_data_range(stop_index - 1, stop_index) |
||||||
|
|
||||||
|
last_dataframe_time = last_row.iloc[0]['timestamp'] |
||||||
|
predicted_anomalies = anomalies_to_timestamp(predicted_anomalies) |
||||||
|
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) |
||||||
|
|
||||||
|
logger.info("Predicting is finished for anomaly type='%s'" % self.anomaly_name) |
||||||
|
return predicted_anomalies, last_prediction_time |
||||||
|
|
||||||
|
def synchronize_data(self): |
||||||
|
self.data_prov.synchronize() |
||||||
|
self.preprocessor.set_data_provider(self.data_prov) |
||||||
|
self.preprocessor.synchronize() |
||||||
|
|
||||||
|
def load_anomaly_config(self): |
||||||
|
with open(os.path.join(anomalies_folder, self.anomaly_name + ".json"), 'r') as config_file: |
||||||
|
self.anomaly_config = json.load(config_file) |
||||||
|
|
||||||
|
def get_anomalies(self): |
||||||
|
labeled_anomalies_file = os.path.join(anomalies_folder, self.anomaly_name + "_labeled.json") |
||||||
|
if not os.path.exists(labeled_anomalies_file): |
||||||
|
return [] |
||||||
|
with open(labeled_anomalies_file) as file: |
||||||
|
return json.load(file) |
||||||
|
|
||||||
|
def create_algorithm(self): |
||||||
|
from supervised_algorithm import supervised_algorithm |
||||||
|
return supervised_algorithm() |
||||||
|
|
||||||
|
def __save_model(self): |
||||||
|
logger.info("Save model '%s'" % self.anomaly_name) |
||||||
|
model_filename = os.path.join(models_folder, self.anomaly_name + ".m") |
||||||
|
self.model.save(model_filename) |
||||||
|
|
||||||
|
def __load_model(self): |
||||||
|
logger.info("Load model '%s'" % self.anomaly_name) |
||||||
|
model_filename = os.path.join(models_folder, self.anomaly_name + ".m") |
||||||
|
if os.path.exists(model_filename): |
||||||
|
self.model = self.create_algorithm() |
||||||
|
self.model.load(model_filename) |
@ -0,0 +1,255 @@ |
|||||||
|
import os.path |
||||||
|
import pandas as pd |
||||||
|
import numpy as np |
||||||
|
import math |
||||||
|
import time |
||||||
|
|
||||||
|
from tsfresh.transformers.feature_augmenter import FeatureAugmenter |
||||||
|
from tsfresh.feature_extraction.settings import from_columns |
||||||
|
from pytz import timezone |
||||||
|
|
||||||
|
|
||||||
|
class data_preprocessor: |
||||||
|
# augmented = None |
||||||
|
frame_size = 16 |
||||||
|
calc_features = [ |
||||||
|
# "value__agg_linear_trend__f_agg_\"max\"__chunk_len_5__attr_\"intercept\"", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_10", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_20", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_8__w_20", |
||||||
|
# "value__fft_coefficient__coeff_3__attr_\"abs\"", |
||||||
|
"time_of_day_column_x", |
||||||
|
"time_of_day_column_y", |
||||||
|
"value__abs_energy", |
||||||
|
"value__absolute_sum_of_changes", |
||||||
|
"value__sum_of_reoccurring_data_points", |
||||||
|
] |
||||||
|
time_features = [ |
||||||
|
'time_of_day_column_x', |
||||||
|
'time_of_day_column_y' |
||||||
|
] |
||||||
|
chunk_size = 50000 |
||||||
|
|
||||||
|
def __init__(self, data_provider, augmented_path): |
||||||
|
self.data_provider = data_provider |
||||||
|
self.augmented_path = augmented_path |
||||||
|
self.last_chunk_index = 0 |
||||||
|
self.total_size = 0 |
||||||
|
self.__init_chunks() |
||||||
|
self.synchronize() |
||||||
|
|
||||||
|
def set_data_provider(self, data_provider): |
||||||
|
self.data_provider = data_provider |
||||||
|
|
||||||
|
def synchronize(self): |
||||||
|
start_frame = self.total_size |
||||||
|
stop_frame = self.data_provider.size() |
||||||
|
|
||||||
|
max_chunk_size = 30000 |
||||||
|
for frame in range(start_frame, stop_frame, max_chunk_size): |
||||||
|
data = self.__get_source_frames(frame, min(stop_frame, frame + max_chunk_size)) |
||||||
|
|
||||||
|
if len(data) == 0: |
||||||
|
return |
||||||
|
|
||||||
|
append_augmented = self.__extract_features(data, self.calc_features) |
||||||
|
self.__append_data(append_augmented) |
||||||
|
|
||||||
|
def expand_indexes(self, start_index, stop_index): |
||||||
|
return start_index, stop_index |
||||||
|
|
||||||
|
def get_augmented_data(self, start_index, stop_index, anomalies=[]): |
||||||
|
start_frame = start_index |
||||||
|
stop_frame = stop_index |
||||||
|
augmented = self.__get_data(start_frame, stop_frame) |
||||||
|
|
||||||
|
if len(anomalies) > 0: |
||||||
|
anomalies_indexes = self.transform_anomalies(anomalies) |
||||||
|
augmented = augmented.drop(anomalies_indexes) |
||||||
|
|
||||||
|
return augmented |
||||||
|
|
||||||
|
def transform_anomalies(self, anomalies): |
||||||
|
anomaly_index = None |
||||||
|
dataframe = self.data_provider.get_dataframe(None) |
||||||
|
for anomaly in anomalies: |
||||||
|
start_time = pd.to_datetime(anomaly['start'], unit='ms') |
||||||
|
finish_time = pd.to_datetime(anomaly['finish'], unit='ms') |
||||||
|
current_index = (dataframe['timestamp'] >= start_time) & (dataframe['timestamp'] <= finish_time) |
||||||
|
if anomaly_index is not None: |
||||||
|
anomaly_index = (anomaly_index | current_index) |
||||||
|
else: |
||||||
|
anomaly_index = current_index |
||||||
|
|
||||||
|
rows = dataframe[anomaly_index] |
||||||
|
# indexes = np.floor_divide(rows.index, self.frame_size) |
||||||
|
indexes = np.unique(rows.index) |
||||||
|
return indexes |
||||||
|
|
||||||
|
def inverse_transform_anomalies(self, prediction): |
||||||
|
anomalies = [] |
||||||
|
cur_anomaly = None |
||||||
|
source_dataframe = self.data_provider.get_dataframe(None) |
||||||
|
for i in prediction.index: |
||||||
|
if prediction[i]: |
||||||
|
start_frame_index = max(0, i - self.frame_size + 1) |
||||||
|
finish_frame_index = i |
||||||
|
start = source_dataframe['timestamp'][start_frame_index] |
||||||
|
finish = source_dataframe['timestamp'][finish_frame_index] |
||||||
|
if cur_anomaly is None: |
||||||
|
if len(anomalies) > 0 and start <= anomalies[len(anomalies) - 1]['finish']: |
||||||
|
cur_anomaly = anomalies[len(anomalies) - 1] |
||||||
|
anomalies.pop() |
||||||
|
else: |
||||||
|
cur_anomaly = {'start': start, 'finish': finish} |
||||||
|
cur_anomaly['finish'] = finish |
||||||
|
elif cur_anomaly is not None: |
||||||
|
anomalies.append(cur_anomaly) |
||||||
|
cur_anomaly = None |
||||||
|
|
||||||
|
if cur_anomaly: |
||||||
|
anomalies.append(cur_anomaly) |
||||||
|
return anomalies |
||||||
|
|
||||||
|
def __get_data(self, start_index, stop_index): |
||||||
|
result = pd.DataFrame() |
||||||
|
start_chunk = start_index // self.chunk_size |
||||||
|
finish_chunk = stop_index // self.chunk_size |
||||||
|
for chunk_num in range(start_chunk, finish_chunk + 1): |
||||||
|
chunk = self.__load_chunk(chunk_num) |
||||||
|
if chunk_num == finish_chunk: |
||||||
|
chunk = chunk[:stop_index % self.chunk_size] |
||||||
|
if chunk_num == start_chunk: |
||||||
|
chunk = chunk[start_index % self.chunk_size:] |
||||||
|
result = pd.concat([result, chunk]) |
||||||
|
return result |
||||||
|
|
||||||
|
def __init_chunks(self): |
||||||
|
chunk_index = 0 |
||||||
|
self.last_chunk_index = 0 |
||||||
|
while True: |
||||||
|
filename = self.augmented_path |
||||||
|
if chunk_index > 0: |
||||||
|
filename += "." + str(chunk_index) |
||||||
|
if os.path.exists(filename): |
||||||
|
self.last_chunk_index = chunk_index |
||||||
|
else: |
||||||
|
break |
||||||
|
chunk_index += 1 |
||||||
|
self.total_size = self.last_chunk_index * self.chunk_size |
||||||
|
last_chunk = self.__load_chunk(self.last_chunk_index) |
||||||
|
self.total_size += len(last_chunk) |
||||||
|
|
||||||
|
def __append_data(self, dataframe): |
||||||
|
while len(dataframe) > 0: |
||||||
|
chunk = self.__load_chunk(self.last_chunk_index) |
||||||
|
rows_count = min(self.chunk_size - len(chunk), len(dataframe)) |
||||||
|
|
||||||
|
rows = dataframe.iloc[0:rows_count] |
||||||
|
self.__save_chunk(self.last_chunk_index, rows) |
||||||
|
self.total_size += rows_count |
||||||
|
|
||||||
|
dataframe = dataframe[rows_count:] |
||||||
|
if len(dataframe) > 0: |
||||||
|
self.last_chunk_index += 1 |
||||||
|
|
||||||
|
def __load_chunk(self, index): |
||||||
|
filename = self.augmented_path |
||||||
|
if index > 0: |
||||||
|
filename += "." + str(index) |
||||||
|
|
||||||
|
if os.path.exists(filename): |
||||||
|
chunk = pd.read_csv(filename) |
||||||
|
frame_index = np.arange(index * self.chunk_size, index * self.chunk_size + len(chunk)) |
||||||
|
chunk = chunk.set_index(frame_index) |
||||||
|
return chunk |
||||||
|
return pd.DataFrame() |
||||||
|
|
||||||
|
def __save_chunk(self, index, dataframe): |
||||||
|
filename = self.augmented_path |
||||||
|
if index > 0: |
||||||
|
filename += "." + str(index) |
||||||
|
|
||||||
|
if os.path.exists(filename): |
||||||
|
dataframe.to_csv(filename, mode='a', index=False, header=False) |
||||||
|
else: |
||||||
|
dataframe.to_csv(filename, mode='w', index=False, header=True) |
||||||
|
|
||||||
|
def __get_source_frames(self, start_frame, stop_frame): |
||||||
|
start_index = start_frame |
||||||
|
stop_index = stop_frame |
||||||
|
|
||||||
|
# frame = self.source_dataframe[start_index:stop_index] |
||||||
|
# mat = frame.as_matrix() |
||||||
|
|
||||||
|
source_dataframe = self.data_provider.get_data_range(max(start_index - self.frame_size + 1, 0), stop_index) |
||||||
|
|
||||||
|
dataframe = None |
||||||
|
for i in range(start_index, stop_index): |
||||||
|
mini = max(0, i - self.frame_size + 1) |
||||||
|
frame = source_dataframe.loc[mini:i + 1].copy() |
||||||
|
frame['id'] = i |
||||||
|
if dataframe is None: |
||||||
|
dataframe = frame |
||||||
|
else: |
||||||
|
dataframe = dataframe.append(frame, ignore_index=True) |
||||||
|
|
||||||
|
#dataframe = self.source_dataframe[start_index:stop_index].copy() |
||||||
|
#dataframe['id'] = np.floor_divide(dataframe.index, self.frame_size) |
||||||
|
dataframe.reset_index(drop=True, inplace=True) |
||||||
|
return dataframe |
||||||
|
|
||||||
|
def __extract_features(self, data, features=None): |
||||||
|
start_frame = data['id'][0] |
||||||
|
stop_frame = data['id'][len(data)-1] + 1 |
||||||
|
augmented = pd.DataFrame(index=np.arange(start_frame, stop_frame)) |
||||||
|
|
||||||
|
# tsfresh features |
||||||
|
tsfresh_features = None |
||||||
|
if features is not None: |
||||||
|
tsfresh_features = set(features) - set(self.time_features) |
||||||
|
|
||||||
|
augmented = self.__extract_tfresh_features(data, augmented, tsfresh_features) |
||||||
|
|
||||||
|
# time features |
||||||
|
augmented = self.__extract_time_features(data, augmented, features) |
||||||
|
return augmented |
||||||
|
|
||||||
|
def __extract_tfresh_features(self, data, augmented, features): |
||||||
|
relevant_extraction_settings = None |
||||||
|
if features is not None: |
||||||
|
augmented_features = set(features) |
||||||
|
relevant_extraction_settings = from_columns(augmented_features) |
||||||
|
|
||||||
|
#impute_function = partial(impute_dataframe_range, col_to_max=self.col_to_max, |
||||||
|
# col_to_min=self.col_to_min, col_to_median=self.col_to_median) |
||||||
|
|
||||||
|
feature_extractor = FeatureAugmenter( |
||||||
|
kind_to_fc_parameters=relevant_extraction_settings, |
||||||
|
column_id='id', |
||||||
|
column_sort='timestamp') |
||||||
|
feature_extractor.set_timeseries_container(data) |
||||||
|
|
||||||
|
return feature_extractor.transform(augmented) |
||||||
|
|
||||||
|
def __extract_time_features(self, data, augmented, features): |
||||||
|
if features is None: |
||||||
|
features = self.time_features |
||||||
|
|
||||||
|
seconds = np.zeros(len(augmented)) |
||||||
|
first_id = data['id'][0] |
||||||
|
|
||||||
|
for i in range(len(data)): |
||||||
|
id = data['id'][i] - first_id |
||||||
|
timeobj = data['timestamp'][i].time() |
||||||
|
seconds[id] = timeobj.second + 60 * (timeobj.minute + 60 * timeobj.hour) |
||||||
|
|
||||||
|
norm_seconds = 2 * math.pi * seconds / (24 * 3600) |
||||||
|
|
||||||
|
if 'time_of_day_column_x' in features: |
||||||
|
augmented['time_of_day_column_x'] = np.cos(norm_seconds) |
||||||
|
if 'time_of_day_column_y' in features: |
||||||
|
augmented['time_of_day_column_y'] = np.sin(norm_seconds) |
||||||
|
return augmented |
@ -0,0 +1,220 @@ |
|||||||
|
from influxdb import InfluxDBClient |
||||||
|
import pandas as pd |
||||||
|
import os.path |
||||||
|
import numpy as np |
||||||
|
|
||||||
|
|
||||||
|
class DataProvider: |
||||||
|
chunk_size = 50000 |
||||||
|
|
||||||
|
def __init__(self, dbconfig, target, data_filename): |
||||||
|
self.dbconfig = dbconfig |
||||||
|
self.target = target |
||||||
|
self.data_filename = data_filename |
||||||
|
self.last_time = None |
||||||
|
self.total_size = 0 |
||||||
|
self.last_chunk_index = 0 |
||||||
|
self.chunk_last_times = {} |
||||||
|
self.__init_chunks() |
||||||
|
self.synchronize() |
||||||
|
|
||||||
|
def get_dataframe(self, after_time=None): |
||||||
|
result = pd.DataFrame() |
||||||
|
for chunk_index, last_chunk_time in self.chunk_last_times.items(): |
||||||
|
if after_time is None or after_time <= last_chunk_time: |
||||||
|
chunk = self.__load_chunk(chunk_index) |
||||||
|
if after_time is not None: |
||||||
|
chunk = chunk[chunk['timestamp'] > after_time] |
||||||
|
result = pd.concat([result, chunk]) |
||||||
|
return result |
||||||
|
|
||||||
|
def get_upper_bound(self, after_time): |
||||||
|
for chunk_index, last_chunk_time in self.chunk_last_times.items(): |
||||||
|
if after_time < last_chunk_time: |
||||||
|
chunk = self.__load_chunk(chunk_index) |
||||||
|
chunk = chunk[chunk['timestamp'] > after_time] |
||||||
|
return chunk.index[0] |
||||||
|
return self.size() |
||||||
|
|
||||||
|
def size(self): |
||||||
|
return self.total_size |
||||||
|
|
||||||
|
def get_data_range(self, start_index, stop_index=None): |
||||||
|
return self.__get_data(start_index, stop_index) |
||||||
|
|
||||||
|
def transform_anomalies(self, anomalies): |
||||||
|
result = [] |
||||||
|
if len(anomalies) == 0: |
||||||
|
return result |
||||||
|
dataframe = self.get_dataframe(None) |
||||||
|
for anomaly in anomalies: |
||||||
|
start_time = pd.to_datetime(anomaly['start']-1, unit='ms') |
||||||
|
finish_time = pd.to_datetime(anomaly['finish']+1, unit='ms') |
||||||
|
current_index = (dataframe['timestamp'] >= start_time) & (dataframe['timestamp'] <= finish_time) |
||||||
|
anomaly_frame = dataframe[current_index] |
||||||
|
cur_anomaly = { |
||||||
|
'start': anomaly_frame.index[0], |
||||||
|
'finish': anomaly_frame.index[len(anomaly_frame) - 1], |
||||||
|
'labeled': anomaly['labeled'] |
||||||
|
} |
||||||
|
result.append(cur_anomaly) |
||||||
|
|
||||||
|
return result |
||||||
|
|
||||||
|
def inverse_transform_indexes(self, indexes): |
||||||
|
if len(indexes) == 0: |
||||||
|
return [] |
||||||
|
dataframe = self.get_data_range(indexes[0][0], indexes[-1][1] + 1) |
||||||
|
|
||||||
|
return [(dataframe['timestamp'][i1], dataframe['timestamp'][i2]) for (i1, i2) in indexes] |
||||||
|
|
||||||
|
def synchronize(self): |
||||||
|
# last_time = None |
||||||
|
# if len(self.dataframe/) > 0: |
||||||
|
# last_time = self.dataframe['time'][len(self.dataframe)-1] |
||||||
|
append_dataframe = self.load_from_db(self.last_time) |
||||||
|
self.__append_data(append_dataframe) |
||||||
|
# append_dataframe |
||||||
|
# append_dataframe.to_csv(self.data_filename, mode='a', index=False, header=False) |
||||||
|
# self.dataframe = pd.concat([self.dataframe, append_dataframe], ignore_index=True) |
||||||
|
|
||||||
|
# def load(self): |
||||||
|
# if os.path.exists(self.data_filename): |
||||||
|
# self.dataframe = pd.read_csv(self.data_filename, parse_dates=[0]) |
||||||
|
# self.synchronize() |
||||||
|
# else: |
||||||
|
# append_dataframe = self.load_from_db() |
||||||
|
# self.__append_data(append_dataframe) |
||||||
|
# #self.dataframe.to_csv(self.data_filename, index=False, header=True) |
||||||
|
|
||||||
|
def custom_query(self, after_time): |
||||||
|
query = self.target["query"] |
||||||
|
timeFilter = "TRUE" |
||||||
|
if after_time is not None: |
||||||
|
timeFilter = "time > '%s'" % (str(after_time)) |
||||||
|
query = query.replace("$timeFilter", timeFilter) |
||||||
|
return query |
||||||
|
|
||||||
|
def load_from_db(self, after_time=None): |
||||||
|
"""Instantiate a connection to the InfluxDB.""" |
||||||
|
host = self.dbconfig['host'] |
||||||
|
port = self.dbconfig['port'] |
||||||
|
user = self.dbconfig['user'] |
||||||
|
password = self.dbconfig['password'] |
||||||
|
dbname = self.dbconfig['dbname'] |
||||||
|
|
||||||
|
client = InfluxDBClient(host, port, user, password, dbname) |
||||||
|
# query = 'select k0, k1, k2 from vals;' |
||||||
|
|
||||||
|
measurement = self.target['measurement'] |
||||||
|
select = self.target['select'] |
||||||
|
tags = self.target['tags'] |
||||||
|
|
||||||
|
if "query" in self.target: |
||||||
|
query = self.custom_query(after_time) |
||||||
|
else: |
||||||
|
select_values = select[0][0]['params'] |
||||||
|
escaped_select_values = ["\"" + value + "\"" for value in select_values] |
||||||
|
|
||||||
|
conditions_entries = [] |
||||||
|
if len(tags) > 0: |
||||||
|
for tag in tags: |
||||||
|
conditions_entries.append("(\"" + tag['key'] + "\"" + tag['operator'] + "'" + tag['value'] + "')") |
||||||
|
if after_time: |
||||||
|
conditions_entries.append("time > '%s'" % (str(after_time))) |
||||||
|
|
||||||
|
condition = "" |
||||||
|
if len(conditions_entries) > 0: |
||||||
|
condition = " where " + " AND ".join(conditions_entries) |
||||||
|
|
||||||
|
query = "select %s from \"%s\"%s;" % (",".join(escaped_select_values), measurement, condition) |
||||||
|
|
||||||
|
result = client.query(query, chunked=True, chunk_size=10000) |
||||||
|
dataframe = pd.DataFrame(result.get_points()) |
||||||
|
if len(dataframe) > 0: |
||||||
|
cols = dataframe.columns.tolist() |
||||||
|
cols.remove('time') |
||||||
|
cols = ['time'] + cols |
||||||
|
dataframe = dataframe[cols] |
||||||
|
|
||||||
|
dataframe['time'] = pd.to_datetime(dataframe['time']) |
||||||
|
dataframe = dataframe.dropna(axis=0, how='any') |
||||||
|
|
||||||
|
return dataframe |
||||||
|
|
||||||
|
def __init_chunks(self): |
||||||
|
chunk_index = 0 |
||||||
|
self.last_chunk_index = 0 |
||||||
|
while True: |
||||||
|
filename = self.data_filename |
||||||
|
if chunk_index > 0: |
||||||
|
filename += "." + str(chunk_index) |
||||||
|
if os.path.exists(filename): |
||||||
|
self.last_chunk_index = chunk_index |
||||||
|
chunk = self.__load_chunk(chunk_index) |
||||||
|
chunk_last_time = chunk.iloc[len(chunk) - 1]['timestamp'] |
||||||
|
self.chunk_last_times[chunk_index] = chunk_last_time |
||||||
|
self.last_time = chunk_last_time |
||||||
|
else: |
||||||
|
break |
||||||
|
chunk_index += 1 |
||||||
|
self.total_size = self.last_chunk_index * self.chunk_size |
||||||
|
last_chunk = self.__load_chunk(self.last_chunk_index) |
||||||
|
self.total_size += len(last_chunk) |
||||||
|
|
||||||
|
def __load_chunk(self, index): |
||||||
|
filename = self.data_filename |
||||||
|
if index > 0: |
||||||
|
filename += "." + str(index) |
||||||
|
|
||||||
|
if os.path.exists(filename): |
||||||
|
chunk = pd.read_csv(filename, parse_dates=[0]) |
||||||
|
frame_index = np.arange(index * self.chunk_size, index * self.chunk_size + len(chunk)) |
||||||
|
chunk = chunk.set_index(frame_index) |
||||||
|
return chunk.rename(columns={chunk.columns[0]: "timestamp", chunk.columns[1]: "value"}) |
||||||
|
return pd.DataFrame() |
||||||
|
|
||||||
|
def __save_chunk(self, index, dataframe): |
||||||
|
filename = self.data_filename |
||||||
|
if index > 0: |
||||||
|
filename += "." + str(index) |
||||||
|
|
||||||
|
chunk_last_time = dataframe.iloc[len(dataframe) - 1]['time'] |
||||||
|
self.chunk_last_times[index] = chunk_last_time |
||||||
|
|
||||||
|
if os.path.exists(filename): |
||||||
|
dataframe.to_csv(filename, mode='a', index=False, header=False) |
||||||
|
else: |
||||||
|
dataframe.to_csv(filename, mode='w', index=False, header=True) |
||||||
|
|
||||||
|
def __append_data(self, dataframe): |
||||||
|
while len(dataframe) > 0: |
||||||
|
chunk = self.__load_chunk(self.last_chunk_index) |
||||||
|
rows_count = min(self.chunk_size - len(chunk), len(dataframe)) |
||||||
|
|
||||||
|
rows = dataframe.iloc[0:rows_count] |
||||||
|
|
||||||
|
if len(rows) > 0: |
||||||
|
self.__save_chunk(self.last_chunk_index, rows) |
||||||
|
self.total_size += rows_count |
||||||
|
|
||||||
|
self.last_time = rows.iloc[-1]['time'] |
||||||
|
dataframe = dataframe[rows_count:] |
||||||
|
|
||||||
|
if len(dataframe) > 0: |
||||||
|
self.last_chunk_index += 1 |
||||||
|
|
||||||
|
def __get_data(self, start_index, stop_index): |
||||||
|
result = pd.DataFrame() |
||||||
|
start_chunk = start_index // self.chunk_size |
||||||
|
finish_chunk = self.last_chunk_index |
||||||
|
if stop_index is not None: |
||||||
|
finish_chunk = stop_index // self.chunk_size |
||||||
|
for chunk_num in range(start_chunk, finish_chunk + 1): |
||||||
|
chunk = self.__load_chunk(chunk_num) |
||||||
|
if stop_index is not None and chunk_num == finish_chunk: |
||||||
|
chunk = chunk[:stop_index % self.chunk_size] |
||||||
|
if chunk_num == start_chunk: |
||||||
|
chunk = chunk[start_index % self.chunk_size:] |
||||||
|
result = pd.concat([result, chunk]) |
||||||
|
return result |
@ -0,0 +1,52 @@ |
|||||||
|
#!/usr/bin/env python |
||||||
|
import csv |
||||||
|
import os |
||||||
|
from worker import worker |
||||||
|
|
||||||
|
|
||||||
|
def enqueue_task(): |
||||||
|
tasks_file = "tasks.csv" |
||||||
|
tasks = [] |
||||||
|
with open(tasks_file) as csvfile: |
||||||
|
rdr = csv.reader(csvfile, delimiter=',') |
||||||
|
tasks = list(rdr) |
||||||
|
if len(tasks) == 0: |
||||||
|
return None |
||||||
|
res = tasks[0][0] |
||||||
|
tasks = tasks[1:] |
||||||
|
with open(tasks_file, "w+") as csvfile: |
||||||
|
writer = csv.writer(csvfile) |
||||||
|
writer.writerows(tasks) |
||||||
|
return res |
||||||
|
|
||||||
|
|
||||||
|
def set_lock(value): |
||||||
|
lock_file = "learn.lock" |
||||||
|
exists = os.path.exists(lock_file) |
||||||
|
if exists == value: |
||||||
|
return False |
||||||
|
|
||||||
|
if value: |
||||||
|
open(lock_file, "w+") |
||||||
|
else: |
||||||
|
os.remove(lock_file) |
||||||
|
return True |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
if not set_lock(True): |
||||||
|
print("learn locked") |
||||||
|
exit(0) |
||||||
|
|
||||||
|
w = worker() |
||||||
|
while True: |
||||||
|
task = enqueue_task() |
||||||
|
if task is None: |
||||||
|
break |
||||||
|
|
||||||
|
w.start() |
||||||
|
w.add_task({"type": "learn", "anomaly_name": task}) |
||||||
|
w.add_task({"type": "predict", "anomaly_name": task}) |
||||||
|
w.stop() |
||||||
|
|
||||||
|
set_lock(False) |
@ -0,0 +1,127 @@ |
|||||||
|
from data_provider import DataProvider |
||||||
|
import logging |
||||||
|
import os.path |
||||||
|
import json |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
datasource_folder = "datasources/" |
||||||
|
dataset_folder = "datasets/" |
||||||
|
anomalies_folder = "anomalies/" |
||||||
|
models_folder = "models/" |
||||||
|
metrics_folder = "metrics/" |
||||||
|
logger = logging.getLogger('analytic_toolset') |
||||||
|
|
||||||
|
|
||||||
|
def segments_box(segments): |
||||||
|
max_time = 0 |
||||||
|
min_time = float("inf") |
||||||
|
for segment in segments: |
||||||
|
min_time = min(min_time, segment['start']) |
||||||
|
max_time = max(max_time, segment['finish']) |
||||||
|
min_time = pd.to_datetime(min_time, unit='ms') |
||||||
|
max_time = pd.to_datetime(max_time, unit='ms') |
||||||
|
return min_time, max_time |
||||||
|
|
||||||
|
|
||||||
|
class PatternDetectionModel: |
||||||
|
|
||||||
|
def __init__(self, pattern_name, preset=None): |
||||||
|
self.pattern_name = pattern_name |
||||||
|
self.preset = preset |
||||||
|
|
||||||
|
self.__load_anomaly_config() |
||||||
|
datasource = self.anomaly_config['metric']['datasource'] |
||||||
|
metric_name = self.anomaly_config['metric']['targets'][0] |
||||||
|
|
||||||
|
dbconfig_filename = os.path.join(datasource_folder, datasource + ".json") |
||||||
|
target_filename = os.path.join(metrics_folder, metric_name + ".json") |
||||||
|
|
||||||
|
dataset_filename = os.path.join(dataset_folder, metric_name + ".csv") |
||||||
|
|
||||||
|
with open(dbconfig_filename, 'r') as config_file: |
||||||
|
dbconfig = json.load(config_file) |
||||||
|
|
||||||
|
with open(target_filename, 'r') as file: |
||||||
|
target = json.load(file) |
||||||
|
|
||||||
|
self.data_prov = DataProvider(dbconfig, target, dataset_filename) |
||||||
|
|
||||||
|
self.model = None |
||||||
|
self.__load_model(preset) |
||||||
|
|
||||||
|
def learn(self, segments): |
||||||
|
self.model = self.__create_model(self.preset) |
||||||
|
window_size = 200 |
||||||
|
|
||||||
|
dataframe = self.data_prov.get_dataframe() |
||||||
|
start_index, stop_index = 0, len(dataframe) |
||||||
|
if len(segments) > 0: |
||||||
|
min_time, max_time = segments_box(segments) |
||||||
|
start_index = dataframe[dataframe['timestamp'] >= min_time].index[0] |
||||||
|
stop_index = dataframe[dataframe['timestamp'] > max_time].index[0] |
||||||
|
start_index = max(start_index - window_size, 0) |
||||||
|
stop_index = min(stop_index + window_size, len(dataframe)) |
||||||
|
|
||||||
|
dataframe = dataframe[start_index:stop_index] |
||||||
|
|
||||||
|
segments = self.data_prov.transform_anomalies(segments) |
||||||
|
self.model.fit(dataframe, segments) |
||||||
|
self.__save_model() |
||||||
|
return 0 |
||||||
|
# return last_prediction_time |
||||||
|
|
||||||
|
def predict(self, last_prediction_time): |
||||||
|
if self.model is None: |
||||||
|
return [], last_prediction_time |
||||||
|
|
||||||
|
window_size = 100 |
||||||
|
last_prediction_time = pd.to_datetime(last_prediction_time, unit='ms') |
||||||
|
|
||||||
|
start_index = self.data_prov.get_upper_bound(last_prediction_time) |
||||||
|
start_index = max(0, start_index - window_size) |
||||||
|
dataframe = self.data_prov.get_data_range(start_index) |
||||||
|
|
||||||
|
predicted_indexes = self.model.predict(dataframe) |
||||||
|
predicted_indexes = [(x, y) for (x, y) in predicted_indexes if x >= start_index and y >= start_index] |
||||||
|
|
||||||
|
predicted_times = self.data_prov.inverse_transform_indexes(predicted_indexes) |
||||||
|
segments = [] |
||||||
|
for time_value in predicted_times: |
||||||
|
ts1 = int(time_value[0].timestamp() * 1000) |
||||||
|
ts2 = int(time_value[1].timestamp() * 1000) |
||||||
|
segments.append({ |
||||||
|
'start': ts1, |
||||||
|
'finish': ts2 |
||||||
|
}) |
||||||
|
|
||||||
|
last_dataframe_time = dataframe.iloc[- 1]['timestamp'] |
||||||
|
last_prediction_time = int(last_dataframe_time.timestamp() * 1000) |
||||||
|
return segments, last_prediction_time |
||||||
|
# return predicted_anomalies, last_prediction_time |
||||||
|
|
||||||
|
def synchronize_data(self): |
||||||
|
self.data_prov.synchronize() |
||||||
|
|
||||||
|
def __create_model(self, preset): |
||||||
|
if preset == "peaks": |
||||||
|
from peaks_detector import PeaksDetector |
||||||
|
return PeaksDetector() |
||||||
|
if preset == "steps" or preset == "cliffs": |
||||||
|
from step_detector import StepDetector |
||||||
|
return StepDetector(preset) |
||||||
|
|
||||||
|
def __load_anomaly_config(self): |
||||||
|
with open(os.path.join(anomalies_folder, self.pattern_name + ".json"), 'r') as config_file: |
||||||
|
self.anomaly_config = json.load(config_file) |
||||||
|
|
||||||
|
def __save_model(self): |
||||||
|
logger.info("Save model '%s'" % self.pattern_name) |
||||||
|
model_filename = os.path.join(models_folder, self.pattern_name + ".m") |
||||||
|
self.model.save(model_filename) |
||||||
|
|
||||||
|
def __load_model(self, preset): |
||||||
|
logger.info("Load model '%s'" % self.pattern_name) |
||||||
|
model_filename = os.path.join(models_folder, self.pattern_name + ".m") |
||||||
|
if os.path.exists(model_filename): |
||||||
|
self.model = self.__create_model(preset) |
||||||
|
self.model.load(model_filename) |
@ -0,0 +1,71 @@ |
|||||||
|
from scipy import signal |
||||||
|
import numpy as np |
||||||
|
import step_detect |
||||||
|
|
||||||
|
|
||||||
|
class PeaksDetector: |
||||||
|
def __init__(self): |
||||||
|
pass |
||||||
|
|
||||||
|
def fit(self, dataset, contamination=0.005): |
||||||
|
pass |
||||||
|
|
||||||
|
def predict(self, dataframe): |
||||||
|
array = dataframe['value'].as_matrix() |
||||||
|
window_size = 20 |
||||||
|
# window = np.ones(101) |
||||||
|
# mean_filtered = signal.fftconvolve( |
||||||
|
# np.concatenate([np.zeros(window_size), array, np.zeros(window_size)]), |
||||||
|
# window, |
||||||
|
# mode='valid' |
||||||
|
# ) |
||||||
|
# filtered = np.divide(array, mean_filtered / 101) |
||||||
|
|
||||||
|
window = signal.general_gaussian(2 * window_size + 1, p=0.5, sig=5) |
||||||
|
#print(window) |
||||||
|
filtered = signal.fftconvolve(array, window, mode='valid') |
||||||
|
|
||||||
|
# filtered = np.concatenate([ |
||||||
|
# np.zeros(window_size), |
||||||
|
# filtered, |
||||||
|
# np.zeros(window_size) |
||||||
|
# ]) |
||||||
|
filtered = filtered / np.sum(window) |
||||||
|
array = array[window_size:-window_size] |
||||||
|
filtered = np.subtract(array, filtered) |
||||||
|
|
||||||
|
import matplotlib.pyplot as plt |
||||||
|
|
||||||
|
# filtered = np.convolve(array, step, mode='valid') |
||||||
|
# print(len(array)) |
||||||
|
# print(len(filtered)) |
||||||
|
|
||||||
|
# step = np.hstack((np.ones(window_size), 0, -1*np.ones(window_size))) |
||||||
|
# |
||||||
|
# conv = np.convolve(array, step, mode='valid') |
||||||
|
# |
||||||
|
# conv = np.concatenate([ |
||||||
|
# np.zeros(window_size), |
||||||
|
# conv, |
||||||
|
# np.zeros(window_size)]) |
||||||
|
|
||||||
|
#data = step_detect.t_scan(array, window=window_size) |
||||||
|
data = filtered |
||||||
|
data /= data.max() |
||||||
|
|
||||||
|
#plt.plot(array[:1000]) |
||||||
|
plt.plot(data[:1000]) |
||||||
|
plt.show() |
||||||
|
|
||||||
|
result = step_detect.find_steps(data, 0.1) |
||||||
|
return [dataframe.index[x + window_size] for x in result] |
||||||
|
|
||||||
|
def save(self, model_filename): |
||||||
|
pass |
||||||
|
# with open(model_filename, 'wb') as file: |
||||||
|
# pickle.dump((self.clf, self.scaler), file) |
||||||
|
|
||||||
|
def load(self, model_filename): |
||||||
|
pass |
||||||
|
# with open(model_filename, 'rb') as file: |
||||||
|
# self.clf, self.scaler = pickle.load(file) |
@ -0,0 +1,83 @@ |
|||||||
|
import argparse |
||||||
|
import csv |
||||||
|
import time |
||||||
|
import datetime |
||||||
|
import pandas as pd |
||||||
|
import matplotlib.pyplot as plt |
||||||
|
|
||||||
|
from influxdb import InfluxDBClient |
||||||
|
from sklearn import svm |
||||||
|
import numpy as np |
||||||
|
import math |
||||||
|
import pickle |
||||||
|
|
||||||
|
|
||||||
|
host = "209.205.120.226" |
||||||
|
port = 8086 |
||||||
|
datasetFile = "/tmp/dataset.csv" |
||||||
|
anomaliesFile = "anomalies.csv" |
||||||
|
predictedAnomaliesFile = "predicted_anomalies.csv" |
||||||
|
modelFilename = 'finalized_model.sav' |
||||||
|
|
||||||
|
|
||||||
|
def readAnomalies(): |
||||||
|
anomalies = [] |
||||||
|
|
||||||
|
with open(anomaliesFile) as csvfile: |
||||||
|
rdr = csv.reader(csvfile, delimiter=',') |
||||||
|
for row in rdr: |
||||||
|
anomaly = (int(row[0]), int(row[1])) |
||||||
|
anomalies.append(anomaly) |
||||||
|
|
||||||
|
return anomalies |
||||||
|
|
||||||
|
|
||||||
|
"""Instantiate a connection to the InfluxDB.""" |
||||||
|
user = '' |
||||||
|
password = '' |
||||||
|
dbname = 'accelerometer' |
||||||
|
query = 'select k0, k1, k2 from vals limit 10000;' |
||||||
|
|
||||||
|
|
||||||
|
client = InfluxDBClient(host, port, user, password, dbname) |
||||||
|
|
||||||
|
def predict(host=host, port=port): |
||||||
|
|
||||||
|
result = client.query(query) |
||||||
|
df = pd.DataFrame(result['vals'], columns=['time', 'k0', 'k1', 'k2']) |
||||||
|
|
||||||
|
basedAnomalies = readAnomalies() |
||||||
|
|
||||||
|
df2 = df.rolling(200, win_type='triang').sum() |
||||||
|
df2['time'] = pd.to_datetime(df2['time']) |
||||||
|
df2 = df2[np.isfinite(df2['k0'])] |
||||||
|
|
||||||
|
print(len(df2)) |
||||||
|
|
||||||
|
|
||||||
|
anomalies = [] |
||||||
|
last_anomaly = (-1, -1) |
||||||
|
with open(modelFilename, 'rb') as fid: |
||||||
|
clf = pickle.load(fid) |
||||||
|
prediction = clf.predict(df2[['k0', 'k1', 'k2']]) |
||||||
|
print(len(prediction)) |
||||||
|
#print(prediction) |
||||||
|
for i in range(len(prediction)): |
||||||
|
if prediction[i] > 0.: |
||||||
|
t = df2['time'][i + 199].timestamp() |
||||||
|
t = ((t + 0 * 3600) * 1000) |
||||||
|
if t < basedAnomalies[len(basedAnomalies) - 1][1]: |
||||||
|
continue |
||||||
|
if t < last_anomaly[1] + 1000: |
||||||
|
last_anomaly = (last_anomaly[0], t) |
||||||
|
else: |
||||||
|
if last_anomaly[1] != -1: |
||||||
|
anomalies.append(last_anomaly) |
||||||
|
last_anomaly = (t, t) |
||||||
|
|
||||||
|
with open(predictedAnomaliesFile, "w") as file: |
||||||
|
for anomaly in anomalies: |
||||||
|
file.write(str(int(anomaly[0])) + "," + str(int(anomaly[1])) + "\n") |
||||||
|
|
||||||
|
predict() |
||||||
|
|
@ -0,0 +1,46 @@ |
|||||||
|
from fbprophet import Prophet |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
|
||||||
|
class prophet_algorithm(object): |
||||||
|
def __init__(self): |
||||||
|
self.model = None |
||||||
|
self.dataset = None |
||||||
|
|
||||||
|
def fit(self, data, anomalies): |
||||||
|
pass |
||||||
|
|
||||||
|
def predict(self, data): |
||||||
|
data = data.reset_index() |
||||||
|
data = data.rename(columns={'timestamp': 'ds', 'value': 'y'}) |
||||||
|
self.dataset = data |
||||||
|
|
||||||
|
self.model = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=True) |
||||||
|
self.model.fit(self.dataset) |
||||||
|
|
||||||
|
future = self.model.make_future_dataframe(freq='H', periods=0, include_history=True) |
||||||
|
forecast = self.model.predict(future) |
||||||
|
cmp_df = forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(self.dataset.set_index('ds')) |
||||||
|
cmp_df['e'] = [ max(row.y - row.yhat_upper, row.yhat_lower - row.y, 0) for index, row in cmp_df.iterrows() ] |
||||||
|
return self.__calc_anomalies(cmp_df) |
||||||
|
|
||||||
|
def __calc_anomalies(self, dataset): |
||||||
|
anomalies = [] |
||||||
|
cur_anomaly = None |
||||||
|
for i in range(len(dataset)): |
||||||
|
if dataset['e'][i] > 17: |
||||||
|
if cur_anomaly is None: |
||||||
|
cur_anomaly = {'start': dataset.index[i], 'finish': dataset.index[i], 'weight': 0} |
||||||
|
cur_anomaly['finish'] = dataset.index[i] |
||||||
|
cur_anomaly['weight'] += dataset['e'][i] |
||||||
|
elif cur_anomaly is not None: |
||||||
|
anomalies.append(cur_anomaly) |
||||||
|
cur_anomaly = None |
||||||
|
return anomalies |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
dataset = pd.read_csv('art_daily_flatmiddle.csv', index_col=['timestamp'], parse_dates=['timestamp']) |
||||||
|
algo = prophet_algorithm(dataset) |
||||||
|
res = algo.fit() |
||||||
|
print(res) |
@ -0,0 +1,231 @@ |
|||||||
|
|
||||||
|
""" |
||||||
|
Thomas Kahn |
||||||
|
thomas.b.kahn@gmail.com |
||||||
|
""" |
||||||
|
from __future__ import absolute_import |
||||||
|
from math import sqrt |
||||||
|
import multiprocessing as mp |
||||||
|
import numpy as np |
||||||
|
from six.moves import range |
||||||
|
from six.moves import zip |
||||||
|
|
||||||
|
|
||||||
|
def t_scan(L, window = 1e3, num_workers = -1): |
||||||
|
""" |
||||||
|
Computes t statistic for i to i+window points versus i-window to i |
||||||
|
points for each point i in input array. Uses multiple processes to |
||||||
|
do this calculation asynchronously. Array is decomposed into window |
||||||
|
number of frames, each consisting of points spaced at window |
||||||
|
intervals. This optimizes the calculation, as the drone function |
||||||
|
need only compute the mean and variance for each set once. |
||||||
|
Parameters |
||||||
|
---------- |
||||||
|
L : numpy array |
||||||
|
1 dimensional array that represents time series of datapoints |
||||||
|
window : int / float |
||||||
|
Number of points that comprise the windows of data that are |
||||||
|
compared |
||||||
|
num_workers : int |
||||||
|
Number of worker processes for multithreaded t_stat computation |
||||||
|
Defult value uses num_cpu - 1 workers |
||||||
|
Returns |
||||||
|
------- |
||||||
|
t_stat : numpy array |
||||||
|
Array which holds t statistic values for each point. The first |
||||||
|
and last (window) points are replaced with zero, since the t |
||||||
|
statistic calculation cannot be performed in that case. |
||||||
|
""" |
||||||
|
size = L.size |
||||||
|
window = int(window) |
||||||
|
frames = list(range(window)) |
||||||
|
n_cols = (size // window) - 1 |
||||||
|
|
||||||
|
t_stat = np.zeros((window, n_cols)) |
||||||
|
|
||||||
|
if num_workers == 1: |
||||||
|
results = [_t_scan_drone(L, n_cols, frame, window) for frame in frames] |
||||||
|
else: |
||||||
|
if num_workers == -1: |
||||||
|
num_workers = mp.cpu_count() - 1 |
||||||
|
pool = mp.Pool(processes = num_workers) |
||||||
|
results = [pool.apply_async(_t_scan_drone, args=(L, n_cols, frame, window)) for frame in frames] |
||||||
|
results = [r.get() for r in results] |
||||||
|
pool.close() |
||||||
|
|
||||||
|
for index, row in results: |
||||||
|
t_stat[index] = row |
||||||
|
|
||||||
|
t_stat = np.concatenate(( |
||||||
|
np.zeros(window), |
||||||
|
t_stat.transpose().ravel(order='C'), |
||||||
|
np.zeros(size % window) |
||||||
|
)) |
||||||
|
|
||||||
|
return t_stat |
||||||
|
|
||||||
|
|
||||||
|
def _t_scan_drone(L, n_cols, frame, window=1e3): |
||||||
|
""" |
||||||
|
Drone function for t_scan. Not Intended to be called manually. |
||||||
|
Computes t_scan for the designated frame, and returns result as |
||||||
|
array along with an integer tag for proper placement in the |
||||||
|
aggregate array |
||||||
|
""" |
||||||
|
size = L.size |
||||||
|
window = int(window) |
||||||
|
root_n = sqrt(window) |
||||||
|
|
||||||
|
output = np.zeros(n_cols) |
||||||
|
b = L[frame:window+frame] |
||||||
|
b_mean = b.mean() |
||||||
|
b_var = b.var() |
||||||
|
for i in range(window+frame, size-window, window): |
||||||
|
a = L[i:i+window] |
||||||
|
a_mean = a.mean() |
||||||
|
a_var = a.var() |
||||||
|
output[i // window - 1] = root_n * (a_mean - b_mean) / sqrt(a_var + b_var) |
||||||
|
b_mean, b_var = a_mean, a_var |
||||||
|
|
||||||
|
return frame, output |
||||||
|
|
||||||
|
|
||||||
|
def mz_fwt(x, n=2): |
||||||
|
""" |
||||||
|
Computes the multiscale product of the Mallat-Zhong discrete forward |
||||||
|
wavelet transform up to and including scale n for the input data x. |
||||||
|
If n is even, the spikes in the signal will be positive. If n is odd |
||||||
|
the spikes will match the polarity of the step (positive for steps |
||||||
|
up, negative for steps down). |
||||||
|
This function is essentially a direct translation of the MATLAB code |
||||||
|
provided by Sadler and Swami in section A.4 of the following: |
||||||
|
http://www.dtic.mil/dtic/tr/fulltext/u2/a351960.pdf |
||||||
|
Parameters |
||||||
|
---------- |
||||||
|
x : numpy array |
||||||
|
1 dimensional array that represents time series of data points |
||||||
|
n : int |
||||||
|
Highest scale to multiply to |
||||||
|
Returns |
||||||
|
------- |
||||||
|
prod : numpy array |
||||||
|
The multiscale product for x |
||||||
|
""" |
||||||
|
N_pnts = x.size |
||||||
|
lambda_j = [1.5, 1.12, 1.03, 1.01][0:n] |
||||||
|
if n > 4: |
||||||
|
lambda_j += [1.0]*(n-4) |
||||||
|
|
||||||
|
H = np.array([0.125, 0.375, 0.375, 0.125]) |
||||||
|
G = np.array([2.0, -2.0]) |
||||||
|
|
||||||
|
Gn = [2] |
||||||
|
Hn = [3] |
||||||
|
for j in range(1,n): |
||||||
|
q = 2**(j-1) |
||||||
|
Gn.append(q+1) |
||||||
|
Hn.append(3*q+1) |
||||||
|
|
||||||
|
S = np.concatenate((x[::-1], x)) |
||||||
|
S = np.concatenate((S, x[::-1])) |
||||||
|
prod = np.ones(N_pnts) |
||||||
|
for j in range(n): |
||||||
|
n_zeros = 2**j - 1 |
||||||
|
Gz = _insert_zeros(G, n_zeros) |
||||||
|
Hz = _insert_zeros(H, n_zeros) |
||||||
|
current = (1.0/lambda_j[j])*np.convolve(S,Gz) |
||||||
|
current = current[N_pnts+Gn[j]:2*N_pnts+Gn[j]] |
||||||
|
prod *= current |
||||||
|
if j == n-1: |
||||||
|
break |
||||||
|
S_new = np.convolve(S, Hz) |
||||||
|
S_new = S_new[N_pnts+Hn[j]:2*N_pnts+Hn[j]] |
||||||
|
S = np.concatenate((S_new[::-1], S_new)) |
||||||
|
S = np.concatenate((S, S_new[::-1])) |
||||||
|
return prod |
||||||
|
|
||||||
|
|
||||||
|
def _insert_zeros(x, n): |
||||||
|
""" |
||||||
|
Helper function for mz_fwt. Splits input array and adds n zeros |
||||||
|
between values. |
||||||
|
""" |
||||||
|
newlen = (n+1)*x.size |
||||||
|
out = np.zeros(newlen) |
||||||
|
indices = list(range(0, newlen-n, n+1)) |
||||||
|
out[indices] = x |
||||||
|
return out |
||||||
|
|
||||||
|
|
||||||
|
def find_steps(array, threshold): |
||||||
|
""" |
||||||
|
Finds local maxima by segmenting array based on positions at which |
||||||
|
the threshold value is crossed. Note that this thresholding is |
||||||
|
applied after the absolute value of the array is taken. Thus, |
||||||
|
the distinction between upward and downward steps is lost. However, |
||||||
|
get_step_sizes can be used to determine directionality after the |
||||||
|
fact. |
||||||
|
Parameters |
||||||
|
---------- |
||||||
|
array : numpy array |
||||||
|
1 dimensional array that represents time series of data points |
||||||
|
threshold : int / float |
||||||
|
Threshold value that defines a step |
||||||
|
Returns |
||||||
|
------- |
||||||
|
steps : list |
||||||
|
List of indices of the detected steps |
||||||
|
""" |
||||||
|
steps = [] |
||||||
|
array = np.abs(array) |
||||||
|
above_points = np.where(array > threshold, 1, 0) |
||||||
|
ap_dif = np.diff(above_points) |
||||||
|
cross_ups = np.where(ap_dif == 1)[0] |
||||||
|
cross_dns = np.where(ap_dif == -1)[0] |
||||||
|
for upi, dni in zip(cross_ups,cross_dns): |
||||||
|
steps.append(np.argmax(array[upi:dni]) + upi) |
||||||
|
return steps |
||||||
|
|
||||||
|
|
||||||
|
def get_step_sizes(array, indices, window=1000): |
||||||
|
""" |
||||||
|
Calculates step size for each index within the supplied list. Step |
||||||
|
size is determined by averaging over a range of points (specified |
||||||
|
by the window parameter) before and after the index of step |
||||||
|
occurrence. The directionality of the step is reflected by the sign |
||||||
|
of the step size (i.e. a positive value indicates an upward step, |
||||||
|
and a negative value indicates a downward step). The combined |
||||||
|
standard deviation of both measurements (as a measure of uncertainty |
||||||
|
in step calculation) is also provided. |
||||||
|
Parameters |
||||||
|
---------- |
||||||
|
array : numpy array |
||||||
|
1 dimensional array that represents time series of data points |
||||||
|
indices : list |
||||||
|
List of indices of the detected steps (as provided by |
||||||
|
find_steps, for example) |
||||||
|
window : int, optional |
||||||
|
Number of points to average over to determine baseline levels |
||||||
|
before and after step. |
||||||
|
Returns |
||||||
|
------- |
||||||
|
step_sizes : list |
||||||
|
List of the calculated sizes of each step |
||||||
|
step_error : list |
||||||
|
""" |
||||||
|
step_sizes = [] |
||||||
|
step_error = [] |
||||||
|
indices = sorted(indices) |
||||||
|
last = len(indices) - 1 |
||||||
|
for i, index in enumerate(indices): |
||||||
|
if i == 0: |
||||||
|
q = min(window, indices[i+1]-index) |
||||||
|
elif i == last: |
||||||
|
q = min(window, index - indices[i-1]) |
||||||
|
else: |
||||||
|
q = min(window, index-indices[i-1], indices[i+1]-index) |
||||||
|
a = array[index:index+q] |
||||||
|
b = array[index-q:index] |
||||||
|
step_sizes.append(a.mean() - b.mean()) |
||||||
|
step_error.append(sqrt(a.var()+b.var())) |
||||||
|
return step_sizes, step_error |
@ -0,0 +1,188 @@ |
|||||||
|
import numpy as np |
||||||
|
import pickle |
||||||
|
|
||||||
|
|
||||||
|
def find_segments(array, threshold): |
||||||
|
segments = [] |
||||||
|
above_points = np.where(array > threshold, 1, 0) |
||||||
|
ap_dif = np.diff(above_points) |
||||||
|
cross_ups = np.where(ap_dif == 1)[0] |
||||||
|
cross_dns = np.where(ap_dif == -1)[0] |
||||||
|
for upi, dni in zip(cross_ups,cross_dns): |
||||||
|
segments.append((upi, dni)) |
||||||
|
return segments |
||||||
|
|
||||||
|
|
||||||
|
def is_intersect(target_segment, segments): |
||||||
|
for segment in segments: |
||||||
|
start = max(segment['start'], target_segment[0]) |
||||||
|
finish = min(segment['finish'], target_segment[1]) |
||||||
|
if start <= finish: |
||||||
|
return True |
||||||
|
return False |
||||||
|
|
||||||
|
|
||||||
|
def calc_intersections(segments, finded_segments): |
||||||
|
intersections = 0 |
||||||
|
labeled = 0 |
||||||
|
for segment in segments: |
||||||
|
if not segment['labeled']: |
||||||
|
continue |
||||||
|
|
||||||
|
labeled += 1 |
||||||
|
intersect = False |
||||||
|
for finded_segment in finded_segments: |
||||||
|
start = max(segment['start'], finded_segment[0]) |
||||||
|
finish = min(segment['finish'], finded_segment[1]) |
||||||
|
if start <= finish: |
||||||
|
intersect = True |
||||||
|
break |
||||||
|
if intersect: |
||||||
|
intersections += 1 |
||||||
|
return intersections, labeled |
||||||
|
|
||||||
|
|
||||||
|
def cost_function(segments, finded_segments): |
||||||
|
intersections, labeled = calc_intersections(segments, finded_segments) |
||||||
|
return intersections == labeled |
||||||
|
|
||||||
|
|
||||||
|
def compress_segments(segments): |
||||||
|
result = [] |
||||||
|
for segment in segments: |
||||||
|
if len(result) == 0 or result[len(result) - 1][1] < segment[0]: |
||||||
|
result.append(segment) |
||||||
|
else: |
||||||
|
result[len(result) - 1] = (result[len(result) - 1][0], segment[1]) |
||||||
|
return result |
||||||
|
|
||||||
|
|
||||||
|
class StepDetector: |
||||||
|
def __init__(self, preset): |
||||||
|
self.preset = preset |
||||||
|
self.mean = None |
||||||
|
self.window_size = None |
||||||
|
self.corr_max = None |
||||||
|
self.threshold = None |
||||||
|
self.segments = [] |
||||||
|
|
||||||
|
def fit(self, dataframe, segments, contamination=0.01): |
||||||
|
array = dataframe['value'].as_matrix() |
||||||
|
self.mean = array.mean() |
||||||
|
self.segments = segments |
||||||
|
|
||||||
|
norm_data = (array - self.mean) |
||||||
|
|
||||||
|
self.__optimize(norm_data, segments, contamination) |
||||||
|
|
||||||
|
# print(self.threshold) |
||||||
|
|
||||||
|
# import matplotlib.pyplot as plt |
||||||
|
# fig, ax = plt.subplots(figsize=[18, 16]) |
||||||
|
# ax = fig.add_subplot(2, 1, 1) |
||||||
|
# ax.plot(array) |
||||||
|
# ax = fig.add_subplot(2, 1, 2, sharex=ax) |
||||||
|
# ax.plot(corr_res) |
||||||
|
# plt.show() |
||||||
|
|
||||||
|
# #print(R.size) |
||||||
|
# # Nw = 20 |
||||||
|
# # result = R[Nw,Nw:-1] |
||||||
|
# # result[0] = 0 |
||||||
|
# #ax.plot(result) |
||||||
|
# #print(len(data)) |
||||||
|
# #print(len(R)) |
||||||
|
# |
||||||
|
# print(self.window_size) |
||||||
|
# print(self.threshold) |
||||||
|
|
||||||
|
def predict(self, dataframe): |
||||||
|
array = dataframe['value'].as_matrix() |
||||||
|
|
||||||
|
norm_data = (array - self.mean) |
||||||
|
|
||||||
|
step_size = self.window_size // 2 |
||||||
|
pattern = np.concatenate([[-1] * step_size, [1] * step_size]) |
||||||
|
corr_res = np.correlate(norm_data, pattern, mode='valid') / self.window_size |
||||||
|
corr_res = np.concatenate((np.zeros(step_size), corr_res, np.zeros(step_size))) |
||||||
|
|
||||||
|
corr_res /= self.corr_max |
||||||
|
|
||||||
|
result = self.__predict(corr_res, self.threshold) |
||||||
|
|
||||||
|
# import matplotlib.pyplot as plt |
||||||
|
# fig, ax = plt.subplots(figsize=[18, 16]) |
||||||
|
# ax = fig.add_subplot(2, 1, 1) |
||||||
|
# ax.plot(array[:70000]) |
||||||
|
# ax = fig.add_subplot(2, 1, 2, sharex=ax) |
||||||
|
# ax.plot(corr_res[:70000]) |
||||||
|
# plt.show() |
||||||
|
|
||||||
|
result.sort() |
||||||
|
result = compress_segments(result) |
||||||
|
|
||||||
|
if len(self.segments) > 0: |
||||||
|
result = [segment for segment in result if not is_intersect(segment, self.segments)] |
||||||
|
return result |
||||||
|
|
||||||
|
def __optimize(self, data, segments, contamination): |
||||||
|
window_size = 10 |
||||||
|
mincost = None |
||||||
|
while window_size < 100: |
||||||
|
# print(window_size) |
||||||
|
cost = self.__optimize_threshold(data, window_size, segments, contamination) |
||||||
|
if mincost is None or cost < mincost: |
||||||
|
mincost = cost |
||||||
|
self.window_size = window_size |
||||||
|
window_size = int(window_size * 1.2) |
||||||
|
self.__optimize_threshold(data, self.window_size, segments, contamination) |
||||||
|
|
||||||
|
def __optimize_threshold(self, data, window_size, segments, contamination): |
||||||
|
step_size = window_size // 2 |
||||||
|
pattern = np.concatenate([[-1] * step_size, [1] * step_size]) |
||||||
|
corr_res = np.correlate(data, pattern, mode='same') / window_size |
||||||
|
corr_res = np.concatenate((np.zeros(step_size), corr_res, np.zeros(step_size))) |
||||||
|
self.corr_max = corr_res.max() |
||||||
|
corr_res /= self.corr_max |
||||||
|
N = 20 |
||||||
|
lower = 0. |
||||||
|
upper = 1. |
||||||
|
cost = 0 |
||||||
|
for i in range(0, N): |
||||||
|
self.threshold = 0.5 * (lower + upper) |
||||||
|
result = self.__predict(corr_res, self.threshold) |
||||||
|
|
||||||
|
if len(segments) > 0: |
||||||
|
intersections, labeled = calc_intersections(segments, result) |
||||||
|
good = intersections == labeled |
||||||
|
cost = len(result) |
||||||
|
else: |
||||||
|
total_sum = 0 |
||||||
|
for segment in result: |
||||||
|
total_sum += (segment[1] - segment[0]) |
||||||
|
good = total_sum > len(data) * contamination |
||||||
|
cost = -self.threshold |
||||||
|
|
||||||
|
if good: |
||||||
|
lower = self.threshold |
||||||
|
else: |
||||||
|
upper = self.threshold |
||||||
|
|
||||||
|
return cost |
||||||
|
|
||||||
|
def __predict(self, data, threshold): |
||||||
|
segments = find_segments(data, threshold) |
||||||
|
segments += find_segments(data * -1, threshold) |
||||||
|
#segments -= 1 |
||||||
|
return [(x - 1, y - 1) for (x, y) in segments] |
||||||
|
|
||||||
|
def save(self, model_filename): |
||||||
|
with open(model_filename, 'wb') as file: |
||||||
|
pickle.dump((self.mean, self.window_size, self.corr_max, self.threshold), file) |
||||||
|
|
||||||
|
def load(self, model_filename): |
||||||
|
try: |
||||||
|
with open(model_filename, 'rb') as file: |
||||||
|
self.mean, self.window_size, self.corr_max, self.threshold = pickle.load(file) |
||||||
|
except: |
||||||
|
pass |
@ -0,0 +1,71 @@ |
|||||||
|
import pickle |
||||||
|
from tsfresh.transformers.feature_selector import FeatureSelector |
||||||
|
from sklearn.preprocessing import MinMaxScaler |
||||||
|
from sklearn.ensemble import IsolationForest |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
from sklearn import svm |
||||||
|
|
||||||
|
|
||||||
|
class supervised_algorithm(object): |
||||||
|
frame_size = 16 |
||||||
|
good_features = [ |
||||||
|
#"value__agg_linear_trend__f_agg_\"max\"__chunk_len_5__attr_\"intercept\"", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_20", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_10", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_2__w_20", |
||||||
|
# "value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_8__w_20", |
||||||
|
# "value__fft_coefficient__coeff_3__attr_\"abs\"", |
||||||
|
"time_of_day_column_x", |
||||||
|
"time_of_day_column_y", |
||||||
|
"value__abs_energy", |
||||||
|
# "value__absolute_sum_of_changes", |
||||||
|
# "value__sum_of_reoccurring_data_points", |
||||||
|
] |
||||||
|
clf = None |
||||||
|
scaler = None |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
self.features = [] |
||||||
|
self.col_to_max, self.col_to_min, self.col_to_median = None, None, None |
||||||
|
self.augmented_path = None |
||||||
|
|
||||||
|
def fit(self, dataset, contamination=0.005): |
||||||
|
dataset = dataset[self.good_features] |
||||||
|
dataset = dataset[-100000:] |
||||||
|
|
||||||
|
self.scaler = MinMaxScaler(feature_range=(-1, 1)) |
||||||
|
# self.clf = svm.OneClassSVM(nu=contamination, kernel="rbf", gamma=0.1) |
||||||
|
self.clf = IsolationForest(contamination=contamination) |
||||||
|
|
||||||
|
self.scaler.fit(dataset) |
||||||
|
|
||||||
|
dataset = self.scaler.transform(dataset) |
||||||
|
self.clf.fit(dataset) |
||||||
|
|
||||||
|
def predict(self, dataframe): |
||||||
|
dataset = dataframe[self.good_features] |
||||||
|
dataset = self.scaler.transform(dataset) |
||||||
|
prediction = self.clf.predict(dataset) |
||||||
|
|
||||||
|
# for i in range(len(dataset)): |
||||||
|
# print(str(dataset[i]) + " " + str(prediction[i])) |
||||||
|
|
||||||
|
prediction = [x < 0.0 for x in prediction] |
||||||
|
return pd.Series(prediction, index=dataframe.index) |
||||||
|
|
||||||
|
def save(self, model_filename): |
||||||
|
with open(model_filename, 'wb') as file: |
||||||
|
pickle.dump((self.clf, self.scaler), file) |
||||||
|
|
||||||
|
def load(self, model_filename): |
||||||
|
with open(model_filename, 'rb') as file: |
||||||
|
self.clf, self.scaler = pickle.load(file) |
||||||
|
|
||||||
|
def __select_features(self, x, y): |
||||||
|
# feature_selector = FeatureSelector() |
||||||
|
feature_selector = FeatureSelector() |
||||||
|
|
||||||
|
feature_selector.fit(x, y) |
||||||
|
return feature_selector.relevant_features |
@ -0,0 +1,131 @@ |
|||||||
|
from anomaly_model import AnomalyModel |
||||||
|
from pattern_detection_model import PatternDetectionModel |
||||||
|
import queue |
||||||
|
import threading |
||||||
|
import json |
||||||
|
import logging |
||||||
|
import sys |
||||||
|
import traceback |
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG, |
||||||
|
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', |
||||||
|
filename='analytic_toolset.log', |
||||||
|
filemode='a') |
||||||
|
logger = logging.getLogger('analytic_toolset') |
||||||
|
|
||||||
|
|
||||||
|
class worker(object): |
||||||
|
models_cache = {} |
||||||
|
thread = None |
||||||
|
queue = queue.Queue() |
||||||
|
|
||||||
|
def start(self): |
||||||
|
self.thread = threading.Thread(target=self.run) |
||||||
|
self.thread.start() |
||||||
|
|
||||||
|
def stop(self): |
||||||
|
if self.thread: |
||||||
|
self.queue.put(None) |
||||||
|
self.thread.join() |
||||||
|
|
||||||
|
def run(self): |
||||||
|
while True: |
||||||
|
task = self.queue.get() |
||||||
|
if task['type'] == "stop": |
||||||
|
break |
||||||
|
self.do_task(task) |
||||||
|
self.queue.task_done() |
||||||
|
|
||||||
|
def add_task(self, task): |
||||||
|
self.queue.put(task) |
||||||
|
|
||||||
|
def do_task(self, task): |
||||||
|
try: |
||||||
|
type = task['type'] |
||||||
|
anomaly_id = task['anomaly_id'] |
||||||
|
if type == "predict": |
||||||
|
last_prediction_time = task['last_prediction_time'] |
||||||
|
analytics_type = task['analytics_type'] |
||||||
|
preset = None |
||||||
|
if "preset" in task: |
||||||
|
preset = task['preset'] |
||||||
|
result = self.do_predict(anomaly_id, last_prediction_time, analytics_type, preset) |
||||||
|
elif type == "learn": |
||||||
|
segments = task['segments'] |
||||||
|
analytics_type = task['analytics_type'] |
||||||
|
preset = None |
||||||
|
if "preset" in task: |
||||||
|
preset = task['preset'] |
||||||
|
result = self.do_learn(anomaly_id, segments, analytics_type, preset) |
||||||
|
else: |
||||||
|
result = { |
||||||
|
'status': "failed", |
||||||
|
'error': "unknown type " + str(type) |
||||||
|
} |
||||||
|
except Exception as e: |
||||||
|
#traceback.extract_stack() |
||||||
|
error_text = traceback.format_exc() |
||||||
|
logger.error("Exception: '%s'" % error_text) |
||||||
|
result = { |
||||||
|
'task': type, |
||||||
|
'status': "failed", |
||||||
|
'anomaly_id': anomaly_id, |
||||||
|
'error': str(e) |
||||||
|
} |
||||||
|
return result |
||||||
|
|
||||||
|
def do_learn(self, anomaly_id, segments, analytics_type, preset=None): |
||||||
|
model = self.get_model(anomaly_id, analytics_type, preset) |
||||||
|
model.synchronize_data() |
||||||
|
last_prediction_time = model.learn(segments) |
||||||
|
result = self.do_predict(anomaly_id, last_prediction_time, analytics_type, preset) |
||||||
|
result['task'] = 'learn' |
||||||
|
return result |
||||||
|
|
||||||
|
def do_predict(self, anomaly_id, last_prediction_time, analytics_type, preset=None): |
||||||
|
model = self.get_model(anomaly_id, analytics_type, preset) |
||||||
|
model.synchronize_data() |
||||||
|
segments, last_prediction_time = model.predict(last_prediction_time) |
||||||
|
return { |
||||||
|
'task': "predict", |
||||||
|
'status': "success", |
||||||
|
'anomaly_id': anomaly_id, |
||||||
|
'segments': segments, |
||||||
|
'last_prediction_time': last_prediction_time |
||||||
|
} |
||||||
|
|
||||||
|
def get_model(self, anomaly_id, analytics_type, preset=None): |
||||||
|
if anomaly_id not in self.models_cache: |
||||||
|
if analytics_type == "anomalies": |
||||||
|
model = AnomalyModel(anomaly_id) |
||||||
|
elif analytics_type == "patterns": |
||||||
|
model = PatternDetectionModel(anomaly_id, preset) |
||||||
|
self.models_cache[anomaly_id] = model |
||||||
|
return self.models_cache[anomaly_id] |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
w = worker() |
||||||
|
logger.info("Worker was started") |
||||||
|
while True: |
||||||
|
try: |
||||||
|
text = input("") |
||||||
|
task = json.loads(text) |
||||||
|
logger.info("Received command '%s'" % text) |
||||||
|
if task['type'] == "stop": |
||||||
|
logger.info("Stopping...") |
||||||
|
break |
||||||
|
print(json.dumps({ |
||||||
|
'task': task['type'], |
||||||
|
'anomaly_id': task['anomaly_id'], |
||||||
|
'__task_id': task['__task_id'], |
||||||
|
'status': "in progress" |
||||||
|
})) |
||||||
|
sys.stdout.flush() |
||||||
|
res = w.do_task(task) |
||||||
|
res['__task_id'] = task['__task_id'] |
||||||
|
print(json.dumps(res)) |
||||||
|
sys.stdout.flush() |
||||||
|
except Exception as e: |
||||||
|
logger.error("Exception: '%s'" % str(e)) |
||||||
|
|
Loading…
Reference in new issue