Browse Source
* Add `get_data_chunks` generator to `utils/dataframe.py` * Add chunks generator usage to `analytic_worker.py` * Add tests to `tests/test_detector_chunks.py` * Minor fixes (constants, etc)pull/1/head
8 changed files with 132 additions and 70 deletions
@ -1,3 +1,4 @@
|
||||
from utils.common import * |
||||
from utils.segments import * |
||||
from utils.time import * |
||||
from utils.dataframe import * |
||||
|
@ -0,0 +1,30 @@
|
||||
from typing import Generator |
||||
import pandas as pd |
||||
|
||||
def get_data_chunks(dataframe: pd.DataFrame, window_size: int, chunk_size: int) -> Generator[pd.DataFrame, None, None]: |
||||
""" |
||||
Returns generator that splits dataframe on intersected segments. |
||||
Intersection makes it able to detect pattern that present in dataframe on the border between chunks. |
||||
window_size - length of intersection. |
||||
chunk_size - length of chunk |
||||
""" |
||||
|
||||
data_len = len(dataframe) |
||||
|
||||
if data_len <= chunk_size: |
||||
yield dataframe |
||||
return |
||||
|
||||
nonintersected = chunk_size - window_size |
||||
|
||||
offset = 0 |
||||
while True: |
||||
left_values = data_len - offset |
||||
if left_values == 0: |
||||
break |
||||
if left_values <= chunk_size: |
||||
yield dataframe[offset : data_len].reset_index() |
||||
break |
||||
else: |
||||
yield dataframe[offset: offset + chunk_size].reset_index() |
||||
offset += min(nonintersected, left_values) |
Loading…
Reference in new issue