Browse Source
* fit -> do_fit in all models && add self.segment_length * Move converting indices to timestamps to Model class * add flexible win size to all modelspull/1/head
7 changed files with 127 additions and 144 deletions
@ -1,28 +1,51 @@ |
|||||||
import utils |
import utils |
||||||
|
|
||||||
from abc import ABC, abstractmethod |
from abc import ABC, abstractmethod |
||||||
from pandas import DataFrame |
|
||||||
from typing import Optional |
from typing import Optional |
||||||
|
import pandas as pd |
||||||
|
import math |
||||||
|
|
||||||
AnalyticUnitCache = dict |
AnalyticUnitCache = dict |
||||||
|
|
||||||
class Model(ABC): |
class Model(ABC): |
||||||
|
|
||||||
@abstractmethod |
@abstractmethod |
||||||
def fit(self, dataframe: DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
def do_fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> None: |
||||||
pass |
pass |
||||||
|
|
||||||
@abstractmethod |
@abstractmethod |
||||||
def do_predict(self, dataframe: DataFrame) -> list: |
def do_predict(self, dataframe: pd.DataFrame) -> list: |
||||||
pass |
pass |
||||||
|
|
||||||
def predict(self, dataframe: DataFrame, cache: Optional[AnalyticUnitCache]) -> dict: |
def fit(self, dataframe: pd.DataFrame, segments: list, cache: Optional[AnalyticUnitCache]) -> AnalyticUnitCache: |
||||||
|
if type(cache) is AnalyticUnitCache: |
||||||
|
self.state = cache |
||||||
|
|
||||||
|
self.segments = segments |
||||||
|
segment_length_list = [] |
||||||
|
for segment in self.segments: |
||||||
|
if segment['labeled']: |
||||||
|
segment_from_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['from'], unit='ms')) |
||||||
|
segment_to_index = utils.timestamp_to_index(dataframe, pd.to_datetime(segment['to'], unit='ms')) |
||||||
|
|
||||||
|
segment_length = abs(segment_to_index - segment_from_index) |
||||||
|
segment_length_list.append(segment_length) |
||||||
|
self.state['WINDOW_SIZE'] = math.ceil(max(segment_length_list) / 2) |
||||||
|
self.do_fit(dataframe, segments) |
||||||
|
return self.state |
||||||
|
|
||||||
|
def predict(self, dataframe: pd.DataFrame, cache: Optional[AnalyticUnitCache]) -> dict: |
||||||
if type(cache) is AnalyticUnitCache: |
if type(cache) is AnalyticUnitCache: |
||||||
self.state = cache |
self.state = cache |
||||||
|
|
||||||
result = self.do_predict(dataframe) |
result = self.do_predict(dataframe) |
||||||
|
# TODO: convert from ns to ms more proper way (not dividing by 10^6) |
||||||
|
segments = [( |
||||||
|
dataframe['timestamp'][x - 1].value / 1000000, |
||||||
|
dataframe['timestamp'][x + 1].value / 1000000 |
||||||
|
) for x in result] |
||||||
|
|
||||||
return { |
return { |
||||||
'segments': result, |
'segments': segments, |
||||||
'cache': self.state |
'cache': self.state |
||||||
} |
} |
||||||
|
Loading…
Reference in new issue