Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prepare 3.0.8 release #39

Merged
merged 5 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@ The current development release is 3.0.6. This is the recommended version for
anyone who wants the latest features. It should be suitable for production
deployment very soon.

## [3.0.8] - 2024-01-07

### Changed

- Ensure all telegraf metrics are timestamped in microseconds.
Because output to telegraf is buffered, we can't rely on telegraf doing the
timestamping (multiple samples get timestamped with the same or very similar
times), so we add a timestamp to every metric line as soon as we see it.
- `ntpmon_info` metric in telegraf mode split into
`ntpmon_resident_set_size_bytes`, `ntpmon_virtual_memory_size_bytes`, and
`ntpmon_uptime_seconds` in prometheus mode, with tags only on
`ntpmon_uptime_seconds`, to tidy metric names and reduce tag cardinality when
used with prometheus. These retain their existing names in telegraf mode.


## [3.0.7] - 2024-01-05

### Changed
Expand Down
6 changes: 6 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
ntpmon (3.0.8-1) focal; urgency=medium

* New upstream release.

-- Paul Gear <[email protected]> Sun, 07 Jan 2024 13:33:33 +1000

ntpmon (3.0.7-1) focal; urgency=medium

* New upstream release.
Expand Down
14 changes: 7 additions & 7 deletions src/line_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@


import re
import time


exclude_fields = []
exclude_fields = [
"timestamp_ns",
]

exclude_tags = []

Expand Down Expand Up @@ -64,15 +67,12 @@ def timestamp_to_line_protocol(timestamp: float) -> (int, int):


def to_line_protocol(metrics: dict, which: str, additional_tags: dict = {}) -> str:
if "datetime" in metrics:
seconds, nanoseconds = timestamp_to_line_protocol(metrics["datetime"].timestamp())
timestamp = f" {seconds}{nanoseconds:09}"
else:
timestamp = ""
if "timestamp_ns" not in metrics:
metrics["timestamp_ns"] = time.time_ns()
tags = format_tags(metrics, additional_tags)
if len(tags):
tags = "," + tags
return f"{which}{tags} {format_fields(metrics)}{timestamp}"
return f"{which}{tags} {format_fields(metrics)} {metrics['timestamp_ns']}"


punctuation = re.compile(r'[-!@#$%^&()<>,./\?+=:;"\'\[\]\{\}\*\s]+')
Expand Down
30 changes: 21 additions & 9 deletions src/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,18 @@ def __init__(self, args: argparse.Namespace) -> None:
]

infotypes: ClassVar[Dict[str, Tuple[str, str, str]]] = {
"ntpmon_rss": ("i", "_bytes", "The resident set size of the ntpmon process"),
"ntpmon_uptime": (None, "_seconds", "Time for which the ntpmon process has been running"),
"ntpmon_vms": ("i", "_bytes", "The virtual memory size of the ntpmon process"),
"resident_set_size": ("i", "_bytes", "The resident set size of the ntpmon process"),
"virtual_memory_size": ("i", "_bytes", "The virtual memory size of the ntpmon process"),
}

infotypes_labelled: ClassVar[Dict[str, Tuple[str, str, str]]] = {
"uptime": (None, "_seconds", "Time for which the ntpmon process has been running"),
}

info_rewrites: ClassVar[Dict[str, str]] = {
"ntpmon_rss": "resident_set_size",
"ntpmon_uptime": "uptime",
"ntpmon_vms": "virtual_memory_size",
}

peerstatslabels: ClassVar[List[str]] = [
Expand Down Expand Up @@ -184,10 +193,16 @@ def __init__(self, args: argparse.Namespace) -> None:
}

def send_info(self, metrics: dict, debug: bool = False) -> None:
# rewrite info metric names for prometheus
for i in self.info_rewrites:
if i in metrics:
metrics[self.info_rewrites[i]] = metrics[i]
del metrics[i]
self.send_stats("ntpmon", metrics, self.infotypes, [], [], debug=debug)
self.send_stats(
"ntpmon_info",
"ntpmon",
metrics,
self.infotypes,
self.infotypes_labelled,
[x for x in self.infolabels if x in metrics],
[metrics[x] for x in self.infolabels if x in metrics],
debug=debug,
Expand Down Expand Up @@ -301,10 +316,7 @@ def send(self, name: str, metrics: dict, tries: int = 0) -> None:
print(telegraf_line, file=self.file)
except BrokenPipeError as bpe:
# If we have lost our connection to telegraf, wait a little, then
# reopen the socket and try again. We add a timestamp to metrics
# without it, in case it takes a while to make the connection.
if "datetime" not in metrics:
metrics["datetime"] = datetime.datetime.now(tz=datetime.timezone.utc)
# reopen the socket and try again.
time.sleep(0.1)
self.set_file()
self.send(name, metrics, tries + 1)
Expand Down
22 changes: 14 additions & 8 deletions src/peer_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def checkfail(test: str) -> int:
def extract_chrony_measurements(f: List[str]) -> dict:
return {
# sorted by field position rather than name
"datetime": datetime.datetime.fromisoformat("+".join((f[0], f[1], "00:00"))),
"timestamp_ns": str_to_nanoseconds(f[0], f[1]),
"source": f[2],
"leap": leapcodes[f[3]],
"stratum": int(f[4]),
Expand Down Expand Up @@ -122,7 +122,7 @@ def extract_chrony_measurements(f: List[str]) -> dict:
def extract_chrony_statistics(f: List[str]) -> dict:
return {
# sort by field position rather than name
"datetime": datetime.datetime.fromisoformat("+".join((f[0], f[1], "00:00"))),
"timestamp_ns": str_to_nanoseconds(f[0], f[1]),
"source": f[2],
"stdev": float(f[3]),
"offset": float(f[4]),
Expand Down Expand Up @@ -159,7 +159,7 @@ def extract_chrony_statistics(f: List[str]) -> dict:
def extract_chrony_tracking(f: List[str]) -> dict:
return {
# sort by field position rather than name
"datetime": datetime.datetime.fromisoformat("+".join((f[0], f[1], "00:00"))),
"timestamp_ns": str_to_nanoseconds(f[0], f[1]),
"source": f[2],
"stratum": int(f[3]),
"freq": float(f[4]),
Expand Down Expand Up @@ -190,7 +190,7 @@ def extract_chrony_tracking(f: List[str]) -> dict:
def extract_ntp_peerstats(f: List[str]) -> dict:
basefields = {
# sorted by field position rather than name
"datetime": datetime.datetime.fromtimestamp(mjd_to_timestamp(float(f[0]), float(f[1])), tz=datetime.timezone.utc),
"timestamp_ns": mjd_to_nanoseconds(float(f[0]), float(f[1])),
"source": f[2],
"offset": float(f[4]),
"delay": float(f[5]),
Expand All @@ -201,10 +201,6 @@ def extract_ntp_peerstats(f: List[str]) -> dict:
return basefields


def mjd_to_timestamp(day: float, time: float) -> float:
return (day - 40587) * 86400 + time


select_field = {
0: "invalid",
1: "false",
Expand Down Expand Up @@ -232,6 +228,11 @@ def extract_ntpd_status_word(status: str) -> dict:
}


def mjd_to_nanoseconds(day: float, time: float) -> int:
"""Convert mean julian day + time into nanoseconds since the epoch"""
return int(((day - 40587) * 86400 + time) * 1_000_000_000)


def parse_measurement(line: str) -> dict:
if regex.match(line):
return None
Expand All @@ -246,3 +247,8 @@ def parse_measurement(line: str) -> dict:
except Exception as e:
print(e, file=sys.stderr)
return None


def str_to_nanoseconds(date: str, time: str) -> int:
"""Convert date + time strings in UTC to nanoseconds since the epoch"""
return int(datetime.datetime.fromisoformat("+".join((date, time, "00:00"))).timestamp() * 1_000_000_000)
2 changes: 1 addition & 1 deletion src/version_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# and bash.
MAJOR="3"
MINOR="0"
PATCH="7"
PATCH="8"
5 changes: 4 additions & 1 deletion unit_tests/test_line_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright: (c) 2023 Paul D. Gear
# License: AGPLv3 <http://www.gnu.org/licenses/agpl.html>

import time
import pytest

import line_protocol
Expand All @@ -28,8 +29,10 @@ def test_timestamp_to_line_protocol() -> None:


def test_to_line_protocol() -> None:
now_ns = time.time_ns()
metrics = {
"associd": 0,
"timestamp_ns": now_ns,
"frequency": -11.673,
"leap": False,
"offset": +0.0000145826,
Expand All @@ -50,7 +53,7 @@ def test_to_line_protocol() -> None:
== "ntpmon,hostname=ntp1,processor=x86_64,refid=100.66.246.50,reftime=e93a0505.8336edfd,system=Linux/5.10.0-26-amd64,"
"version=ntpd\\ [email protected]\\ Wed\\ Sep\\ 23\\ 11:46:38\\ UTC\\ 2020\\ (1) "
"frequency=-11.673,offset=1.45826e-05,rootdelay=1.026,rootdisp=8.218,"
"sys_jitter=0.082849,associd=0i,precision=-23i,stratum=2i,leap=0i,test=1i"
f"sys_jitter=0.082849,associd=0i,precision=-23i,stratum=2i,leap=0i,test=1i {now_ns}"
)


Expand Down
9 changes: 6 additions & 3 deletions unit_tests/test_peer_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ def test_parse_chrony_measurements() -> None:
measurements.append(peer_stats.parse_measurement(l))
assert len(measurements) == 6

timestamp = int(datetime.datetime(2021, 12, 30, 11, 28, 49, tzinfo=datetime.timezone.utc).timestamp() * 1_000_000_000)
assert measurements[0]["refid"] == "47505373" # 4th-last field extract
assert measurements[1]["mode"] == "server" # parse 3rd-last field
assert measurements[2]["source"] == "150.101.186.50" # 3rd field extract
assert measurements[3]["offset"] > measurements[4]["offset"]
assert measurements[4]["datetime"] == datetime.datetime(2021, 12, 30, 11, 28, 49, tzinfo=datetime.timezone.utc)
assert measurements[4]["timestamp_ns"] == timestamp
assert measurements[5]["score"] == 0.01
assert bool(measurements[5]["exceeded_max_delay_dev_ratio"])

Expand All @@ -64,12 +65,14 @@ def test_parse_chrony_measurements() -> None:
def test_parse_ntpd_peerstats() -> None:
lines = peerstats.strip().split("\n")
measurements = [peer_stats.parse_measurement(l) for l in lines]
print(measurements[0])
timestamp = int(
datetime.datetime(2023, 12, 25, 8, 41, 56, 612000, tzinfo=datetime.timezone.utc).timestamp() * 1_000_000_000
)
assert len(measurements) == len(lines)
assert all([m is not None for m in measurements])
assert measurements[0]["reachable"] == True
assert measurements[0]["peertype"] == "survivor"
assert measurements[1]["datetime"] == datetime.datetime(2023, 12, 25, 8, 41, 56, 612000, tzinfo=datetime.timezone.utc)
assert measurements[1]["timestamp_ns"] == timestamp
assert measurements[2]["offset"] > 0
assert measurements[3]["peertype"] == "outlier"
assert measurements[5]["peertype"] == "sync"
Expand Down