Files
flucto-heisskleber/heisskleber/influxdb/subscriber.py
2023-11-15 15:36:41 +01:00

76 lines
2.3 KiB
Python

import pandas as pd
from influxdb_client import InfluxDBClient
from heisskleber.core.types import Source
from .config import InfluxDBConf
def build_query(options: dict) -> str:
query = (
f'from(bucket:"{options["bucket"]}")'
+ f'|> range(start: {options["start"].isoformat("T")}, stop: {options["end"].isoformat("T")})'
+ f'|> filter(fn:(r) => r._measurement == "{options["measurement"]}")'
)
if options["filter"]:
for attribute, value in options["filter"].items():
if isinstance(value, list):
query += f'|> filter(fn:(r) => r.{attribute} == "{value[0]}"'
for vv in value[1:]:
query += f' or r.{attribute} == "{vv}"'
query += ")"
else:
query += f'|> filter(fn:(r) => r.{attribute} == "{value}")'
query += (
f'|> aggregateWindow(every: {options["resample"]}, fn: mean)'
+ '|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")'
)
return query
class Influx_Subscriber(Source):
def __init__(self, config: InfluxDBConf, query: str):
self.config = config
self.query = query
self.client: InfluxDBClient = InfluxDBClient(
url=self.config.url,
token=self.config.all_access_token or self.config.read_token,
org=self.config.org,
timeout=60_000,
)
self.reader = self.client.query_api()
self._run_query()
self.index = 0
def receive(self) -> tuple[str, dict]:
row = self.df.iloc[self.index].to_dict()
self.index += 1
return "influx", row
def _run_query(self):
self.df: pd.DataFrame = self.reader.query_data_frame(self.query, org=self.config.org)
self.df["epoch"] = pd.to_numeric(self.df["_time"]) / 1e9
self.df.drop(
columns=[
"result",
"table",
"_start",
"_stop",
"_measurement",
"_time",
"topic",
],
inplace=True,
)
def __iter__(self):
for _, row in self.df.iterrows():
yield "influx", row.to_dict()
def __next__(self):
return self.__iter__().__next__()