[InfluxDB] Improve database subsystem, and testing

daq-tools · Jun 9, 2023 · b2817b6 · b2817b6
1 parent 693ff48
commit b2817b6
Show file tree

Hide file tree

Showing 12 changed files with 214 additions and 177 deletions.
diff --git a/.coveragerc b/.coveragerc
diff --git a/Makefile b/Makefile
@@ -10,6 +10,7 @@ $(eval venv := .venv)
 $(eval pip := $(venv)/bin/pip)
 $(eval python := $(venv)/bin/python)
 $(eval pytest := $(venv)/bin/pytest)
+$(eval coverage := $(venv)/bin/coverage)
 $(eval bumpversion := $(venv)/bin/bumpversion)
 $(eval twine := $(venv)/bin/twine)
 $(eval sphinx-build := $(venv)/bin/sphinx-build)
@@ -94,8 +95,10 @@ test: virtualenv-dev
 
 .PHONY:
 test-coverage: virtualenv-dev
- $(pytest) --cov --cov-report=term-missing --cov-report=xml kotori test
-
+ $(coverage) run --concurrency=multiprocessing,thread --parallel-mode --timid $(pytest) kotori test
+ $(coverage) combine
+ $(coverage) report
+ $(coverage) xml
 
 
 # =============

diff --git a/etc/test/main.ini b/etc/test/main.ini
@@ -14,9 +14,14 @@ include =
 
 ; http server
 [kotori]
+
+; TODO: Refactor to [http] section.
 http_listen = localhost
 http_port = 24642
 
+; TODO: Implement backend database selection.
+; use_database = influxdb
+
 ; mqtt bus adapter
 [mqtt]
 host = localhost

diff --git a/kotori/daq/services/mig.py b/kotori/daq/services/mig.py
@@ -39,6 +39,7 @@ def __init__(self, channel=None, graphing=None, strategy=None):
  self.strategy = strategy
 
  self.name = u'service-mig-' + self.channel.get('realm', str(id(self)))
+ self.database = None
 
  def setupService(self):
 

diff --git a/kotori/daq/storage/influx.py b/kotori/daq/storage/influx.py
@@ -1,14 +1,13 @@
 # -*- coding: utf-8 -*-
-# (c) 2015-2021 Andreas Motl <[email protected]>
-import math
-
+# (c) 2015-2023 Andreas Motl <[email protected]>
 import requests
 from copy import deepcopy
 from funcy import project
 from collections import OrderedDict
 from twisted.logger import Logger
 from influxdb.client import InfluxDBClient, InfluxDBClientError
-from kotori.io.protocol.util import parse_timestamp, is_number, convert_floats
+
+from kotori.daq.storage.util import format_chunk
 
 log = Logger()
 
@@ -33,7 +32,7 @@ def __init__(self, settings=None, database=None):
 
  self.__dict__.update(**settings)
 
- # Bookeeping for all databases having been touched already
+ # Bookkeeping for all databases having been touched already
  self.databases_written_once = set()
 
  # Knowledge about all databases to be accessed using UDP
@@ -71,7 +70,7 @@ def write(self, meta, data):
  data_copy = deepcopy(data)
 
  try:
- chunk = self.format_chunk(meta, data)
+ chunk = format_chunk(meta, data)
 
  except Exception as ex:
  log.failure(u'Could not format chunk (ex={ex_name}: {ex}): data={data}, meta={meta}',
@@ -122,160 +121,6 @@ def write_chunk(self, meta, chunk):
  def get_tags(data):
  return project(data, ['gateway', 'node'])
 
- def format_chunk(self, meta, data):
- """
- Format for InfluxDB >= 0.9::
- {
- "measurement": "hiveeyes_100",
- "tags": {
- "host": "server01",
- "region": "europe"
- },
- "time": "2015-10-17T19:30:00Z",
- "fields": {
- "value": 0.42
- }
- }
- """
-
- assert isinstance(data, dict), 'Data payload is not a dictionary'
-
- chunk = {
- "measurement": meta['measurement'],
- "tags": {},
- }
-
- """
- if "gateway" in meta:
- chunk["tags"]["gateway"] = meta["gateway"]
-
- if "node" in meta:
- chunk["tags"]["node"] = meta["node"]
- """
-
- # TODO: Refactor to some knowledgebase component.
- time_field_candidates = [
- 'time', # Vanilla
- 'datetime', # Vanilla
- 'Time', # Tasmota
- 'dateTime', # WeeWX
- 'timestamp', # Contrib
- ]
-
- # Extract timestamp field from data
- chunk['time_precision'] = 'n'
- # FIXME: Unify with ``kotori.io.protocol.http.data_acquisition()``.
- for time_field in time_field_candidates:
- if time_field in data:
-
- # WeeWX. TODO: Move to specific vendor configuration.
- # Disabled in favor of precision detection heuristic.
- #if time_field == 'dateTime':
- # chunk['time_precision'] = 's'
-
- # Process timestamp field.
- if data[time_field]:
-
- # Decode timestamp.
- chunk['time'] = data[time_field]
- if is_number(chunk['time']):
- chunk['time'] = float(chunk['time'])
-
- # Remove timestamp from data payload.
- del data[time_field]
-
- # If we found a timestamp field already,
- # don't look out for more.
- break
-
- # Extract geohash from data. Finally, thanks Rich!
- # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
- if "geohash" in data:
- chunk["tags"]["geohash"] = data["geohash"]
- del data['geohash']
-
- # Extract more information specific to luftdaten.info
- for field in ['location', 'location_id', 'location_name', 'sensor_id', 'sensor_type']:
- if field in data:
- chunk["tags"][field] = data[field]
- del data[field]
-
- # TODO: Maybe do this at data acquisition / transformation time, not here.
- if 'time' in chunk:
- timestamp = chunk['time'] = parse_timestamp(chunk['time'])
-
- # Heuristically compute timestamp precision
- if isinstance(timestamp, (int, float)):
- if timestamp >= 1e17 or timestamp <= -1e17:
- time_precision = 'n'
- elif timestamp >= 1e14 or timestamp <= -1e14:
- time_precision = 'u'
- elif timestamp >= 1e11 or timestamp <= -1e11:
- time_precision = 'ms'
-
- # TODO: Is this a reasonable default?
- else:
- time_precision = 's'
-
- # Support fractional epoch timestamps like `1637431069.6585083`.
- if isinstance(timestamp, float):
- fractional, whole = math.modf(timestamp)
- fracdigits = len(str(fractional)) - 2
- if fracdigits > 0:
- if fracdigits <= 3:
- exponent = 3
- time_precision = "ms"
- elif fracdigits <= 6:
- exponent = 6
- time_precision = "u"
- else:
- exponent = 9
- time_precision = "n"
- timestamp = timestamp * (10 ** exponent)
-
- chunk['time'] = int(timestamp)
- chunk['time_precision'] = time_precision
-
- """
- # FIXME: Breaks CSV data acquisition. Why?
- if isinstance(chunk['time'], datetime.datetime):
- if chunk['time'].microsecond == 0:
- chunk['time_precision'] = 's'
- """
-
- """
- Prevent errors like
- ERROR: InfluxDBClientError: 400:
- write failed: field type conflict:
- input field "pitch" on measurement "01_position" is type float64, already exists as type integer
- """
- self.data_to_float(data)
-
- assert data, 'Data payload is empty'
-
- chunk["fields"] = data
-
- return chunk
-
- def data_to_float(self, data):
- return convert_floats(data)
-
- for key, value in data.items():
-
- # Sanity checks
- if isinstance(value, str):
- continue
-
- if value is None:
- data[key] = None
- continue
-
- # Convert to float
- try:
- data[key] = float(value)
- except (TypeError, ValueError) as ex:
- log.warn(u'Measurement "{key}: {value}" float conversion failed: {ex}', key=key, value=value, ex=ex)
-
 
 class BusInfluxForwarder(object):
  """

diff --git a/kotori/daq/storage/util.py b/kotori/daq/storage/util.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+# (c) 2015-2023 Andreas Motl <[email protected]>
+import math
+
+from kotori.io.protocol.util import convert_floats, is_number, parse_timestamp
+
+
+def format_chunk(meta, data):
+ """
+ Format for InfluxDB >= 0.9::
+ {
+ "measurement": "hiveeyes_100",
+ "tags": {
+ "host": "server01",
+ "region": "europe"
+ },
+ "time": "2015-10-17T19:30:00Z",
+ "fields": {
+ "value": 0.42
+ }
+ }
+ """
+
+ assert isinstance(data, dict), 'Data payload is not a dictionary'
+
+ chunk = {
+ "measurement": meta['measurement'],
+ "tags": {},
+ }
+
+ """
+ if "gateway" in meta:
+ chunk["tags"]["gateway"] = meta["gateway"]
+
+ if "node" in meta:
+ chunk["tags"]["node"] = meta["node"]
+ """
+
+ # TODO: Refactor to some knowledgebase component.
+ time_field_candidates = [
+ 'time', # Vanilla
+ 'datetime', # Vanilla
+ 'Time', # Tasmota
+ 'dateTime', # WeeWX
+ 'timestamp', # Contrib
+ ]
+
+ # Extract timestamp field from data
+ chunk['time_precision'] = 'n'
+ # FIXME: Unify with ``kotori.io.protocol.http.data_acquisition()``.
+ for time_field in time_field_candidates:
+ if time_field in data:
+
+ # WeeWX. TODO: Move to specific vendor configuration.
+ # Disabled in favor of precision detection heuristic.
+ # if time_field == 'dateTime':
+ # chunk['time_precision'] = 's'
+
+ # Process timestamp field.
+ if data[time_field]:
+
+ # Decode timestamp.
+ chunk['time'] = data[time_field]
+ if is_number(chunk['time']):
+ chunk['time'] = float(chunk['time'])
+
+ # Remove timestamp from data payload.
+ del data[time_field]
+
+ # If we found a timestamp field already,
+ # don't look out for more.
+ break
+
+ # Extract geohash from data. Finally, thanks Rich!
+ # TODO: Also precompute geohash with 3-4 different zoomlevels and add them as tags
+ if "geohash" in data:
+ chunk["tags"]["geohash"] = data["geohash"]
+ del data['geohash']
+
+ # Extract more information specific to luftdaten.info
+ for field in ['location', 'location_id', 'location_name', 'sensor_id', 'sensor_type']:
+ if field in data:
+ chunk["tags"][field] = data[field]
+ del data[field]
+
+ # TODO: Maybe do this at data acquisition / transformation time, not here.
+ if 'time' in chunk:
+ timestamp = chunk['time'] = parse_timestamp(chunk['time'])
+
+ # Heuristically compute timestamp precision
+ if isinstance(timestamp, (int, float)):
+ if timestamp >= 1e17 or timestamp <= -1e17:
+ time_precision = 'n'
+ elif timestamp >= 1e14 or timestamp <= -1e14:
+ time_precision = 'u'
+ elif timestamp >= 1e11 or timestamp <= -1e11:
+ time_precision = 'ms'
+
+ # TODO: Is this a reasonable default?
+ else:
+ time_precision = 's'
+
+ # Support fractional epoch timestamps like `1637431069.6585083`.
+ if isinstance(timestamp, float):
+ fractional, whole = math.modf(timestamp)
+ fracdigits = len(str(fractional)) - 2
+ if fracdigits > 0:
+ if fracdigits <= 3:
+ exponent = 3
+ time_precision = "ms"
+ elif fracdigits <= 6:
+ exponent = 6
+ time_precision = "u"
+ else:
+ exponent = 9
+ time_precision = "n"
+ timestamp = timestamp * (10 ** exponent)
+
+ chunk['time'] = int(timestamp)
+ chunk['time_precision'] = time_precision
+
+ """
+ # FIXME: Breaks CSV data acquisition. Why?
+ if isinstance(chunk['time'], datetime.datetime):
+ if chunk['time'].microsecond == 0:
+ chunk['time_precision'] = 's'
+ """
+
+ # Make sure numeric data in `fields` is in float format.
+ """
+ Prevent errors like
+ ERROR: InfluxDBClientError: 400:
+ write failed: field type conflict:
+ input field "pitch" on measurement "01_position" is type float64, already exists as type integer
+ """
+ convert_floats(data)
+
+ assert data, 'Data payload is empty'
+
+ chunk["fields"] = data
+
+ return chunk