diff --git a/.github/dockerfiles/Dockerfile.build_test b/.github/dockerfiles/Dockerfile.build_test index c275d61d6fa376..80d97319da537d 100644 --- a/.github/dockerfiles/Dockerfile.build_test +++ b/.github/dockerfiles/Dockerfile.build_test @@ -1,4 +1,7 @@ -ARG BASE +# The default value is overridden in every Dockerfile usage, but adding it here helps avoid issues with +# CI checks that require a non-empty or valid base image name. See more details here: +# https://docs.docker.com/go/dockerfile/rule/invalid-default-arg-in-from/ +ARG BASE="netdata" FROM ${BASE} diff --git a/CHANGELOG.md b/CHANGELOG.md index eade63cbbb3381..6e9b37c9d583f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,14 +6,34 @@ **Merged pull requests:** +- Exit slabinfo.plugin on EPIPE [\#18448](https://github.com/netdata/netdata/pull/18448) ([teqwve](https://github.com/teqwve)) +- go.d remove vnode disable [\#18446](https://github.com/netdata/netdata/pull/18446) ([ilyam8](https://github.com/ilyam8)) +- go.d add support for symlinked vnode config files [\#18445](https://github.com/netdata/netdata/pull/18445) ([ilyam8](https://github.com/ilyam8)) +- Proper precedence when calculating time\_to\_evict [\#18444](https://github.com/netdata/netdata/pull/18444) ([stelfrag](https://github.com/stelfrag)) +- do not free the sender when the sender thread exits [\#18441](https://github.com/netdata/netdata/pull/18441) ([ktsaou](https://github.com/ktsaou)) +- fix receiver deadlock [\#18440](https://github.com/netdata/netdata/pull/18440) ([ktsaou](https://github.com/ktsaou)) +- fix charts.d/sensors leftovers [\#18439](https://github.com/netdata/netdata/pull/18439) ([ilyam8](https://github.com/ilyam8)) +- remove deadlock from sender [\#18438](https://github.com/netdata/netdata/pull/18438) ([ktsaou](https://github.com/ktsaou)) +- go.d remove duplicates in testing [\#18435](https://github.com/netdata/netdata/pull/18435) ([ilyam8](https://github.com/ilyam8)) +- Improve agent shutdown time [\#18434](https://github.com/netdata/netdata/pull/18434) ([stelfrag](https://github.com/stelfrag)) +- Regenerate integrations.js [\#18432](https://github.com/netdata/netdata/pull/18432) ([netdatabot](https://github.com/netdatabot)) +- go.d/sensors: add sysfs scan method to collect metrics [\#18431](https://github.com/netdata/netdata/pull/18431) ([ilyam8](https://github.com/ilyam8)) +- stream paths propagated to children and parents [\#18430](https://github.com/netdata/netdata/pull/18430) ([ktsaou](https://github.com/ktsaou)) +- go.d lmsensors improve performance [\#18429](https://github.com/netdata/netdata/pull/18429) ([ilyam8](https://github.com/ilyam8)) +- ci fix InvalidDefaultArgInFrom warn [\#18428](https://github.com/netdata/netdata/pull/18428) ([ilyam8](https://github.com/ilyam8)) +- vendor https://github.com/mdlayher/lmsensors [\#18427](https://github.com/netdata/netdata/pull/18427) ([ilyam8](https://github.com/ilyam8)) +- remove charts.d/sensors [\#18426](https://github.com/netdata/netdata/pull/18426) ([ilyam8](https://github.com/ilyam8)) +- Reset last connected when removing stale nodes with netdatacli [\#18425](https://github.com/netdata/netdata/pull/18425) ([stelfrag](https://github.com/stelfrag)) - remove checks.plugin dir [\#18424](https://github.com/netdata/netdata/pull/18424) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#18421](https://github.com/netdata/netdata/pull/18421) ([netdatabot](https://github.com/netdatabot)) - fix hyperlink in go.d samba meta [\#18420](https://github.com/netdata/netdata/pull/18420) ([ilyam8](https://github.com/ilyam8)) - add go.d samba [\#18418](https://github.com/netdata/netdata/pull/18418) ([ilyam8](https://github.com/ilyam8)) +- ACLK code cleanup [\#18417](https://github.com/netdata/netdata/pull/18417) ([stelfrag](https://github.com/stelfrag)) - restore /api/v1/badge.svg [\#18416](https://github.com/netdata/netdata/pull/18416) ([ktsaou](https://github.com/ktsaou)) - add "smbstatus -P" to ndsudo [\#18414](https://github.com/netdata/netdata/pull/18414) ([ilyam8](https://github.com/ilyam8)) - remove python.d/sambsa [\#18413](https://github.com/netdata/netdata/pull/18413) ([ilyam8](https://github.com/ilyam8)) - SPAWN-SERVER: re-evaluate signals even 500ms [\#18411](https://github.com/netdata/netdata/pull/18411) ([ktsaou](https://github.com/ktsaou)) +- Claim on Windows [\#18410](https://github.com/netdata/netdata/pull/18410) ([thiagoftsm](https://github.com/thiagoftsm)) - kickstart: fix write\_claim\_config when executed as a regular user [\#18406](https://github.com/netdata/netdata/pull/18406) ([ilyam8](https://github.com/ilyam8)) - Fix coverity issues [\#18405](https://github.com/netdata/netdata/pull/18405) ([stelfrag](https://github.com/stelfrag)) - remove pyyaml2 [\#18404](https://github.com/netdata/netdata/pull/18404) ([ilyam8](https://github.com/ilyam8)) @@ -329,9 +349,6 @@ - Sign DEB packages in the GHA runners that build them. [\#17949](https://github.com/netdata/netdata/pull/17949) ([Ferroin](https://github.com/Ferroin)) - Detect on startup if the netdata-meta.db file is not a valid database file [\#17924](https://github.com/netdata/netdata/pull/17924) ([stelfrag](https://github.com/stelfrag)) - eBPF cgroup and mutex [\#17915](https://github.com/netdata/netdata/pull/17915) ([thiagoftsm](https://github.com/thiagoftsm)) -- Fix small typo [\#17875](https://github.com/netdata/netdata/pull/17875) ([stelfrag](https://github.com/stelfrag)) -- spawn server \(Windows support for external plugins\) [\#17866](https://github.com/netdata/netdata/pull/17866) ([ktsaou](https://github.com/ktsaou)) -- sysinfo \(WinAPI\) [\#17857](https://github.com/netdata/netdata/pull/17857) ([thiagoftsm](https://github.com/thiagoftsm)) ## [v1.46.3](https://github.com/netdata/netdata/tree/v1.46.3) (2024-07-23) @@ -405,19 +422,6 @@ - fix detect\_libc in installer [\#17880](https://github.com/netdata/netdata/pull/17880) ([ilyam8](https://github.com/ilyam8)) - update bundled UI to v6.138.0 [\#17879](https://github.com/netdata/netdata/pull/17879) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#17878](https://github.com/netdata/netdata/pull/17878) ([netdatabot](https://github.com/netdatabot)) -- Regenerate integrations.js [\#17877](https://github.com/netdata/netdata/pull/17877) ([netdatabot](https://github.com/netdatabot)) -- Improve filecheck module metadata. [\#17874](https://github.com/netdata/netdata/pull/17874) ([Ferroin](https://github.com/Ferroin)) -- update Telegram Cloud notification docs to include new topic ID field [\#17873](https://github.com/netdata/netdata/pull/17873) ([papazach](https://github.com/papazach)) -- go.d phpfpm add config schema [\#17872](https://github.com/netdata/netdata/pull/17872) ([ilyam8](https://github.com/ilyam8)) -- Fix updating release info when publishing nightly releases. [\#17871](https://github.com/netdata/netdata/pull/17871) ([Ferroin](https://github.com/Ferroin)) -- go.d phpfpm: debug log the response on decoding error [\#17870](https://github.com/netdata/netdata/pull/17870) ([ilyam8](https://github.com/ilyam8)) -- Improve agent shutdown [\#17868](https://github.com/netdata/netdata/pull/17868) ([stelfrag](https://github.com/stelfrag)) -- Add openSUSE 15.6 to CI. [\#17865](https://github.com/netdata/netdata/pull/17865) ([Ferroin](https://github.com/Ferroin)) -- Update CI infrastructure to publish to secondary packaging host. [\#17863](https://github.com/netdata/netdata/pull/17863) ([Ferroin](https://github.com/Ferroin)) -- Improve anacron detection in updater. [\#17862](https://github.com/netdata/netdata/pull/17862) ([Ferroin](https://github.com/Ferroin)) -- RBAC for dynamic configuration documentation [\#17861](https://github.com/netdata/netdata/pull/17861) ([Ancairon](https://github.com/Ancairon)) -- DYNCFG: health, generate userconfig for incomplete alerts [\#17859](https://github.com/netdata/netdata/pull/17859) ([ktsaou](https://github.com/ktsaou)) -- Create retention charts for higher tiers [\#17855](https://github.com/netdata/netdata/pull/17855) ([stelfrag](https://github.com/stelfrag)) ## [v1.45.6](https://github.com/netdata/netdata/tree/v1.45.6) (2024-06-05) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c8d8bf203c88d..ff79c1d427a60b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,6 +118,10 @@ elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "CYGWIN" OR "${CMAKE_SYSTEM_NAME}" STREQU endif() set(BINDIR usr/bin) + set(CMAKE_RC_COMPILER_INIT windres) + ENABLE_LANGUAGE(RC) + + SET(CMAKE_RC_COMPILE_OBJECT " -O coff -i -o ") add_definitions(-D_GNU_SOURCE) if($ENV{CLION_IDE}) @@ -615,6 +619,8 @@ set(LIBNETDATA_FILES src/libnetdata/avl/avl.h src/libnetdata/buffer/buffer.c src/libnetdata/buffer/buffer.h + src/libnetdata/ringbuffer/ringbuffer.c + src/libnetdata/ringbuffer/ringbuffer.h src/libnetdata/circular_buffer/circular_buffer.c src/libnetdata/circular_buffer/circular_buffer.h src/libnetdata/clocks/clocks.c @@ -1262,6 +1268,14 @@ set(STREAMING_PLUGIN_FILES src/streaming/protocol/commands.c src/streaming/protocol/commands.h src/streaming/protocol/command-claimed_id.c + src/streaming/stream_path.c + src/streaming/stream_path.h + src/streaming/stream_capabilities.c + src/streaming/stream_capabilities.h + src/streaming/sender_connect.c + src/streaming/sender_internals.h + src/streaming/sender_execute.c + src/streaming/sender_commit.c ) set(WEB_PLUGIN_FILES @@ -1286,6 +1300,13 @@ set(CLAIM_PLUGIN_FILES src/claim/cloud-status.h ) +set(CLAIM_WINDOWS_FILES + src/claim/netdata_claim.c + src/claim/netdata_claim.h + src/claim/netdata_claim_window.c + src/claim/netdata_claim_window.h +) + set(ACLK_ALWAYS_BUILD src/aclk/aclk_rrdhost_state.h src/aclk/aclk_proxy.c @@ -1298,12 +1319,9 @@ set(ACLK_ALWAYS_BUILD src/aclk/aclk_util.h src/aclk/https_client.c src/aclk/https_client.h - src/aclk/mqtt_websockets/c-rbuf/cringbuffer.c - src/aclk/mqtt_websockets/c-rbuf/cringbuffer.h - src/aclk/mqtt_websockets/c-rbuf/cringbuffer_internal.h - src/aclk/mqtt_websockets/c_rhash/c_rhash.c - src/aclk/mqtt_websockets/c_rhash/c_rhash.h - src/aclk/mqtt_websockets/c_rhash/c_rhash_internal.h + src/libnetdata/c_rhash/c_rhash.c + src/libnetdata/c_rhash/c_rhash.h + src/libnetdata/c_rhash/c_rhash_internal.h ) set(TIMEX_PLUGIN_FILES @@ -1546,8 +1564,6 @@ set(ACLK_FILES src/aclk/schema-wrappers/schema_wrapper_utils.h src/aclk/schema-wrappers/agent_cmds.cc src/aclk/schema-wrappers/agent_cmds.h - src/aclk/helpers/mqtt_wss_pal.h - src/aclk/helpers/ringbuffer_pal.h ) @@ -2233,6 +2249,13 @@ add_executable(netdata "$<$:${PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES}>" ) +if(OS_WINDOWS) + set(NETDATA_CLAIM_RES_FILES "packaging/windows/resources/netdata_claim.rc") + + add_executable(netdata_claim ${CLAIM_WINDOWS_FILES} ${NETDATA_CLAIM_RES_FILES}) + target_link_libraries(netdata_claim shell32;gdi32;msftedit) +endif() + target_compile_definitions(netdata PRIVATE "$<$:DLIB_NO_GUI_SUPPORT>" ) @@ -2421,6 +2444,13 @@ install(PROGRAMS COMPONENT netdata DESTINATION "${BINDIR}") +if(OS_WINDOWS) + install(PROGRAMS + ${CMAKE_BINARY_DIR}/netdata_claim.exe + COMPONENT netdata + DESTINATION "${BINDIR}") +endif() + # # We don't check ENABLE_PLUGIN_CGROUP_NETWORK because rpm builds assume # the files exists unconditionally. @@ -2772,7 +2802,6 @@ install(PROGRAMS src/collectors/charts.d.plugin/example/example.chart.sh src/collectors/charts.d.plugin/libreswan/libreswan.chart.sh src/collectors/charts.d.plugin/opensips/opensips.chart.sh - src/collectors/charts.d.plugin/sensors/sensors.chart.sh COMPONENT plugin-chartsd DESTINATION usr/libexec/netdata/charts.d) @@ -2781,7 +2810,6 @@ install(FILES src/collectors/charts.d.plugin/example/example.conf src/collectors/charts.d.plugin/libreswan/libreswan.conf src/collectors/charts.d.plugin/opensips/opensips.conf - src/collectors/charts.d.plugin/sensors/sensors.conf COMPONENT plugin-chartsd DESTINATION usr/lib/netdata/conf.d/charts.d) diff --git a/README.md b/README.md index 0b282f729961c7..8e510a4e826e26 100644 --- a/README.md +++ b/README.md @@ -30,17 +30,17 @@ MENU: **[GETTING STARTED](#getting-started)** | **[HOW IT WORKS](#how-it-works)* > **Important** :bulb:
> People get addicted to Netdata. Once you use it on your systems, **there's no going back!**
-**Netdata** is a high-performance, cloud-native, and on-premises observability platform designed to monitor metrics and logs with unparalleled efficiency. It delivers a simpler, faster, and significantly easier approach to real-time, low-latency monitoring for systems, containers, and applications. +**Netdata** is a high-performance, cloud-native, and on-premises observability platform designed to monitor metrics and logs with unparalleled efficiency. It delivers a simpler, faster, and significantly easier approach to real-time, low-latency monitoring for systems, containers, and applications. Netdata requires **zero-configuration** to get started, offering a powerful and comprehensive monitoring experience, out of the box. -What sets Netdata apart is its **cost-efficient, distributed design**. Unlike traditional monitoring solutions that centralize data, **Netdata distributes the code**. Instead of funneling all data into a few central databases, Netdata processes data at the edge, keeping it close to the source. The smart open-source Netdata Agent acts as a distributed database, enabling the construction of complex observability pipelines with modular, Lego-like simplicity. +Netdata is also known for its **cost-efficient, distributed design**. Unlike traditional monitoring solutions that centralize data, **Netdata distributes the code**. Instead of funneling all data into a few central databases, Netdata processes data at the edge, keeping it close to the source. The smart open-source Netdata Agent acts as a distributed database, enabling the construction of complex observability pipelines with modular, Lego-like simplicity. -Netdata also incorporates **A.I. insights** for all monitored data, training machine learning models directly at the edge. This allows for fully automated and unsupervised anomaly detection, and with the provided APIs and UIs, users can quickly spot correlations and gain deeper insights. +Netdata provides **A.I. insights** for all monitored data, training machine learning models directly at the edge. This allows for fully automated and unsupervised anomaly detection, and with its intuitive APIs and UIs, users can quickly perform root cause analysis and troubleshoot issues, identifying correlations and gaining deeper insights into their infrastructure. ### The Netdata Ecosystem Netdata is built on three core components: -1. **Netdata Agent** (usually called just "Netdata"): This open-source component is the heart of the Netdata ecosystem, handling data collection, storage (embedded database), querying, machine learning, exporting, and alerting of observability data. All observability data and features a Netdata ecosystem offers, are managed by the Netdata Agent. It runs in physical and virtual servers, cloud environments, Kubernetes clusters, and edge/IoT devices and is carefully optimized to be a _**polite citizen**_ for production systems and applications. +1. **Netdata Agent** (usually called just "Netdata"): This open-source component is the heart of the Netdata ecosystem, handling data collection, storage (embedded database), querying, machine learning, exporting, and alerting of observability data. All observability data and features a Netdata ecosystem offers, are managed by the Netdata Agent. It runs in physical and virtual servers, cloud environments, Kubernetes clusters, and edge/IoT devices and is carefully optimized to have _**zero impact**_ on production systems and applications. Netdata Agent License: GPL v3+ CII Best Practices Coverity Scan diff --git a/REDISTRIBUTED.md b/REDISTRIBUTED.md index 5149127f650bb1..854b1ef2ab2f39 100644 --- a/REDISTRIBUTED.md +++ b/REDISTRIBUTED.md @@ -23,176 +23,38 @@ We have decided to redistribute all these, instead of using them through a CDN, to allow Netdata to work in cases where Internet connectivity is not available. -- [Dygraphs](http://dygraphs.com/) - - Copyright 2009, Dan Vanderkam - [MIT License](http://dygraphs.com/legal.html) - -- [Easy Pie Chart](https://rendro.github.io/easy-pie-chart/) - - Copyright 2013, Robert Fleischmann - [MIT License](https://github.com/rendro/easy-pie-chart/blob/master/LICENSE) - -- [Gauge.js](http://bernii.github.io/gauge.js/) - - Copyright, Bernard Kobos - [MIT License](https://github.com/getgauge/gauge-js/blob/master/LICENSE) - -- [d3pie](https://github.com/benkeen/d3pie) - - Copyright (c) 2014-2015 Benjamin Keen - [MIT License](https://github.com/benkeen/d3pie/blob/master/LICENSE) - -- [jQuery Sparklines](http://omnipotent.net/jquery.sparkline/) - - Copyright 2009-2012, Splunk Inc. - [New BSD License](http://opensource.org/licenses/BSD-3-Clause) - -- [Peity](http://benpickles.github.io/peity/) - - Copyright 2009-2015, Ben Pickles - [MIT License](https://github.com/benpickles/peity/blob/master/LICENCE) - -- [morris.js](http://morrisjs.github.io/morris.js/) - - Copyright 2013, Olly Smith - [Simplified BSD License](http://morrisjs.github.io/morris.js/) - -- [Raphaël](http://dmitrybaranovskiy.github.io/raphael/) - - Copyright 2008, Dmitry Baranovskiy - [MIT License](http://dmitrybaranovskiy.github.io/raphael/license.html) - -- [C3](http://c3js.org/) - - Copyright 2013, Masayuki Tanaka - [MIT License](https://github.com/masayuki0812/c3/blob/master/LICENSE) - -- [D3](http://d3js.org/) - - Copyright 2015, Mike Bostock - [BSD License](http://opensource.org/licenses/BSD-3-Clause) - -- [jQuery](https://jquery.org/) - - Copyright 2015, jQuery Foundation - [MIT License](https://jquery.org/license/) - -- [Bootstrap](http://getbootstrap.com/getting-started/) - - Copyright 2015, Twitter - [MIT License](https://github.com/twbs/bootstrap/blob/v4-dev/LICENSE) - -- [Bootstrap Toggle](http://www.bootstraptoggle.com/) - - Copyright (c) 2011-2014 Min Hur, The New York Times Company - [MIT License](https://github.com/minhur/bootstrap-toggle/blob/master/LICENSE) - -- [Bootstrap-slider](http://seiyria.com/bootstrap-slider/) - - Copyright 2017 Kyle Kemp, Rohit Kalkur, and contributors - [MIT License](https://github.com/seiyria/bootstrap-slider/blob/master/LICENSE.md) - -- [bootstrap-table](http://bootstrap-table.wenzhixin.net.cn/) - - Copyright (c) 2012-2016 Zhixin Wen [wenzhixin2010@gmail.com](mailto:wenzhixin2010@gmail.com) - [MIT License](https://github.com/wenzhixin/bootstrap-table/blob/master/LICENSE) - -- [tableExport.jquery.plugin](https://github.com/hhurz/tableExport.jquery.plugin) - - Copyright (c) 2015,2016 hhurz - [MIT License](https://github.com/hhurz/tableExport.jquery.plugin/blob/master/LICENSE) - -- [perfect-scrollbar](https://jamesflorentino.github.io/nanoScrollerJS/) - - Copyright 2016, Hyunje Alex Jun and other contributors - [MIT License](https://github.com/noraesae/perfect-scrollbar/blob/master/LICENSE) - -- [FontAwesome](https://github.com/FortAwesome/Font-Awesome) - - Created by Dave Gandy - Font license: [SIL OFL 1.1](http://scripts.sil.org/OFL) - Icon license [Creative Commons Attribution 4.0 (CC-BY 4.0)](https://creativecommons.org/licenses/by/4.0/) - Code license: [MIT License](http://opensource.org/licenses/mit-license.html) - -- [node-extend](https://github.com/justmoon/node-extend) - - Copyright 2014, Stefan Thomas - [MIT License](https://github.com/justmoon/node-extend/blob/master/LICENSE) - -- [node-net-snmp](https://github.com/stephenwvickers/node-net-snmp) - - Copyright 2013, Stephen Vickers - [MIT License](https://github.com/nospaceships/node-net-snmp#license) - -- [node-asn1-ber](https://github.com/stephenwvickers/node-asn1-ber) - - Copyright 2017, Stephen Vickers - Copyright 2011, Mark Cavage - [MIT License](https://github.com/nospaceships/node-asn1-ber#license) - -- [pixl-xml](https://github.com/jhuckaby/pixl-xml) - - Copyright 2015, Joseph Huckaby - [MIT License](https://github.com/jhuckaby/pixl-xml#license) - -- [sensors](https://github.com/paroj/sensors.py) - - Copyright 2014, Pavel Rojtberg - [LGPL 2.1 License](http://opensource.org/licenses/LGPL-2.1) - -- [PyYAML](https://pypi.org/project/PyYAML/) - - Copyright 2006, Kirill Simonov - [MIT License](https://github.com/yaml/pyyaml/blob/master/LICENSE) - -- [urllib3](https://github.com/shazow/urllib3) - - Copyright 2008-2016 Andrey Petrov and [contributors](https://github.com/shazow/urllib3/blob/master/CONTRIBUTORS.txt) - [MIT License](https://github.com/shazow/urllib3/blob/master/LICENSE.txt) - -- [lz-string](http://pieroxy.net/blog/pages/lz-string/index.html) - - Copyright 2013 Pieroxy - [WTFPL License](http://pieroxy.net/blog/pages/lz-string/index.html#inline_menu_10) - -- [pako](http://nodeca.github.io/pako/) - - Copyright 2014-2017 Vitaly Puzrin and Andrei Tuputcyn - [MIT License](https://github.com/nodeca/pako/blob/master/LICENSE) - -- [clipboard-polyfill](https://github.com/lgarron/clipboard-polyfill) - - Copyright (c) 2014 Lucas Garron - [MIT License](https://github.com/lgarron/clipboard-polyfill/blob/master/LICENSE.md) - -- [Utilities for writing code that runs on Python 2 and 3](https://raw.githubusercontent.com/netdata/netdata/master/src/collectors/python.d.plugin/python_modules/urllib3/packages/six.py) - - Copyright (c) 2010-2015 Benjamin Peterson - [MIT License](https://github.com/benjaminp/six/blob/master/LICENSE) - -- [mcrcon](https://github.com/barneygale/MCRcon) - - Copyright (C) 2015 Barnaby Gale - [MIT License](https://raw.githubusercontent.com/barneygale/MCRcon/master/COPYING.txt) - -- [monotonic](https://github.com/atdt/monotonic) - - Copyright 2014, 2015, 2016 Ori Livneh [ori@wikimedia.org](mailto:ori@wikimedia.org) - [Apache-2.0](http://www.apache.org/licenses/LICENSE-2.0) - -- [filelock](https://github.com/benediktschmitt/py-filelock) - - Copyright 2015, Benedikt Schmitt [Unlicense License](https://unlicense.org/) - -- [Kolmogorov-Smirnov distribution](http://simul.iro.umontreal.ca/ksdir/) - - Copyright March 2010 by Université de Montréal, Richard Simard and Pierre L'Ecuyer - [GPL 3.0](https://www.gnu.org/licenses/gpl-3.0.en.html) - -- [xxHash](https://github.com/Cyan4973/xxHash) - - Copyright (c) 2012-2021 Yann Collet - [BSD](https://github.com/Cyan4973/xxHash/blob/dev/LICENSE) - +| Name | Copyright | License | +|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Dygraphs](http://dygraphs.com/) | Copyright 2009, Dan Vanderkam | [MIT](http://dygraphs.com/legal.html) | +| [Easy Pie Chart](https://rendro.github.io/easy-pie-chart/) | Copyright 2013, Robert Fleischmann | [MIT](https://github.com/rendro/easy-pie-chart/blob/master/LICENSE) | +| [Gauge.js](http://bernii.github.io/gauge.js/) | Copyright, Bernard Kobos | [MIT](https://github.com/getgauge/gauge-js/blob/master/LICENSE) | +| [d3pie](https://github.com/benkeen/d3pie) | Copyright (c) 2014-2015 Benjamin Keen | [MIT](https://github.com/benkeen/d3pie/blob/master/LICENSE) | +| [jQuery Sparklines](http://omnipotent.net/jquery.sparkline/) | Copyright 2009-2012, Splunk Inc. | [New BSD](http://opensource.org/licenses/BSD-3-Clause) | +| [Peity](http://benpickles.github.io/peity/) | Copyright 2009-2015, Ben Pickles | [MIT](https://github.com/benpickles/peity/blob/master/LICENCE) | +| [morris.js](http://morrisjs.github.io/morris.js/) | Copyright 2013, Olly Smith | [Simplified BSD](http://morrisjs.github.io/morris.js/) | +| [Raphaël](http://dmitrybaranovskiy.github.io/raphael/) | Copyright 2008, Dmitry Baranovskiy | [MIT](http://dmitrybaranovskiy.github.io/raphael/license.html) | +| [C3](http://c3js.org/) | Copyright 2013, Masayuki Tanaka | [MIT](https://github.com/masayuki0812/c3/blob/master/LICENSE) | +| [D3](http://d3js.org/) | Copyright 2015, Mike Bostock | [BSD](http://opensource.org/licenses/BSD-3-Clause) | +| [jQuery](https://jquery.org/) | Copyright 2015, jQuery Foundation | [MIT](https://jquery.org/license/) | +| [Bootstrap](http://getbootstrap.com/getting-started/) | Copyright 2015, Twitter | [MIT](https://github.com/twbs/bootstrap/blob/v4-dev/LICENSE) | +| [Bootstrap Toggle](http://www.bootstraptoggle.com/) | Copyright (c) 2011-2014 Min Hur, The New York Times Company | [MIT](https://github.com/minhur/bootstrap-toggle/blob/master/LICENSE) | +| [Bootstrap-slider](http://seiyria.com/bootstrap-slider/) | Copyright 2017 Kyle Kemp, Rohit Kalkur, and contributors | [MIT](https://github.com/seiyria/bootstrap-slider/blob/master/LICENSE.md) | +| [bootstrap-table](http://bootstrap-table.wenzhixin.net.cn/) | Copyright (c) 2012-2016 Zhixin Wen | [MIT](https://github.com/wenzhixin/bootstrap-table/blob/master/LICENSE) | +| [tableExport.jquery.plugin](https://github.com/hhurz/tableExport.jquery.plugin) | Copyright (c) 2015,2016 hhurz | [MIT](https://github.com/hhurz/tableExport.jquery.plugin/blob/master/LICENSE) | +| [perfect-scrollbar](https://jamesflorentino.github.io/nanoScrollerJS/) | Copyright 2016, Hyunje Alex Jun and other contributors | [MIT](https://github.com/noraesae/perfect-scrollbar/blob/master/LICENSE) | +| [FontAwesome](https://github.com/FortAwesome/Font-Awesome) | Created by Dave Gandy | Font: [SIL OFL 1.1](http://scripts.sil.org/OFL), Icon: [Creative Commons Attribution 4.0 (CC-BY 4.0)](https://creativecommons.org/licenses/by/4.0/), Code: [MIT](http://opensource.org/licenses/mit-license.html) | +| [node-extend](https://github.com/justmoon/node-extend) | Copyright 2014, Stefan Thomas | [MIT](https://github.com/justmoon/node-extend/blob/master/LICENSE) | +| [pixl-xml](https://github.com/jhuckaby/pixl-xml) | Copyright 2015, Joseph Huckaby | [MIT](https://github.com/jhuckaby/pixl-xml#license) | +| [PyYAML](https://pypi.org/project/PyYAML/) | Copyright 2006, Kirill Simonov | [MIT](https://github.com/yaml/pyyaml/blob/master/LICENSE) | +| [urllib3](https://github.com/shazow/urllib3) | Copyright 2008-2016 Andrey Petrov and contributors | [MIT](https://github.com/shazow/urllib3/blob/master/LICENSE.txt) | +| [lz-string](http://pieroxy.net/blog/pages/lz-string/index.html) | Copyright 2013 Pieroxy | [WTFPL](http://pieroxy.net/blog/pages/lz-string/index.html#inline_menu_10) | +| [pako](http://nodeca.github.io/pako/) | Copyright 2014-2017 Vitaly Puzrin and Andrei Tuputcyn | [MIT](https://github.com/nodeca/pako/blob/master/LICENSE) | +| [clipboard-polyfill](https://github.com/lgarron/clipboard-polyfill) | Copyright (c) 2014 Lucas Garron | [MIT](https://github.com/lgarron/clipboard-polyfill/blob/master/LICENSE.md) | +| [Utilities for writing code that runs on Python 2 and 3](https://raw.githubusercontent.com/netdata/netdata/master/src/collectors/python.d.plugin/python_modules/urllib3/packages/six.py) | Copyright (c) 2010-2015 Benjamin Peterson | [MIT](https://github.com/benjaminp/six/blob/master/LICENSE) | +| [mcrcon](https://github.com/barneygale/MCRcon) | Copyright (C) 2015 Barnaby Gale | [MIT](https://raw.githubusercontent.com/barneygale/MCRcon/master/COPYING.txt) | +| [monotonic](https://github.com/atdt/monotonic) | Copyright 2014, 2015, 2016 Ori Livneh | [Apache-2.0](http://www.apache.org/licenses/LICENSE-2.0) | +| [filelock](https://github.com/benediktschmitt/py-filelock) | Copyright 2015, Benedikt Schmitt | [Unlicense](https://unlicense.org/) | +| [Kolmogorov-Smirnov distribution](http://simul.iro.umontreal.ca/ksdir/) | Copyright March 2010 by Université de Montréal, Richard Simard and Pierre L'Ecuyer | [GPL 3.0](https://www.gnu.org/licenses/gpl-3.0.en.html) | +| [xxHash](https://github.com/Cyan4973/xxHash) | Copyright (c) 2012-2021 Yann Collet | [BSD](https://github.com/Cyan4973/xxHash/blob/dev/LICENSE) | +| [lmsensors](https://github.com/mdlayher/lmsensors) | Copyright 2016, Matt Layher | [MIT](https://github.com/mdlayher/lmsensors/blob/master/LICENSE.md) | diff --git a/docs/developer-and-contributor-corner/pi-hole-raspberry-pi.md b/docs/developer-and-contributor-corner/pi-hole-raspberry-pi.md index df6bb080917349..e150cebdc0ede3 100644 --- a/docs/developer-and-contributor-corner/pi-hole-raspberry-pi.md +++ b/docs/developer-and-contributor-corner/pi-hole-raspberry-pi.md @@ -100,26 +100,6 @@ part of your system might affect another. ![The Netdata dashboard in action](https://user-images.githubusercontent.com/1153921/80827388-b9fee100-8b98-11ea-8f60-0d7824667cd3.gif) -### Enable temperature sensor monitoring - -You need to manually enable Netdata's built-in [temperature sensor -collector](/src/collectors/charts.d.plugin/sensors/README.md) to start collecting metrics. - -> Netdata uses a few plugins to manage its [collectors](/src/collectors/REFERENCE.md), each using a different language: Go, -> Python, Node.js, and Bash. While our Go collectors are undergoing the most active development, we still support the -> other languages. In this case, you need to enable a temperature sensor collector that's written in Bash. - -First, open the `charts.d.conf` file for editing. You should always use the `edit-config` script to edit Netdata's -configuration files, as it ensures your settings persist across updates to the Netdata Agent. - -```bash -cd /etc/netdata -sudo ./edit-config charts.d.conf -``` - -Uncomment the `sensors=force` line and save the file. Restart Netdata with `sudo systemctl restart netdata` to enable -Raspberry Pi temperature sensor monitoring. - ### Storing historical metrics on your Raspberry Pi By default, Netdata allocates 256 MiB in disk space to store historical metrics inside the [database diff --git a/integrations/integrations.js b/integrations/integrations.js index 3e295aab5ac36b..8b93f26a35feae 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -1130,45 +1130,6 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/opensips/metadata.yaml", "related_resources": "" }, - { - "meta": { - "plugin_name": "charts.d.plugin", - "module_name": "sensors", - "monitored_instance": { - "name": "Linux Sensors (sysfs)", - "link": "https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface", - "categories": [ - "data-collection.hardware-devices-and-sensors" - ], - "icon_filename": "microchip.svg" - }, - "related_resources": { - "integrations": { - "list": [] - } - }, - "info_provided_to_referring_integrations": { - "description": "" - }, - "keywords": [ - "sensors", - "sysfs", - "hwmon", - "rpi", - "raspberry pi" - ], - "most_popular": false - }, - "overview": "# Linux Sensors (sysfs)\n\nPlugin: charts.d.plugin\nModule: sensors\n\n## Overview\n\nUse this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures).\nFor all other cases use the [Go collector](/src/go/plugin/go.d/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values.\"\n\n\nIt will provide charts for all configured system sensors, by reading sensors directly from the kernel.\nThe values graphed are the raw hardware values of the sensors.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nBy default, the collector will try to read entries under `/sys/devices`\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Install charts.d plugin\n\nIf [using our official native DEB/RPM packages](/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.\n\n\n#### Enable the sensors collector\n\nThe `sensors` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `charts.d.conf` file.\n\n```bash\ncd /etc/netdata # Replace this path with your Netdata config directory, if different\nsudo ./edit-config charts.d.conf\n```\n\nChange the value of the `sensors` setting to `force` and uncomment the line. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `charts.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config charts.d/sensors.conf\n```\n#### Options\n\nThe config file is sourced by the charts.d plugin. It's a standard bash file.\n\nThe following collapsed table contains all the options that can be configured for the sensors collector.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| sensors_sys_dir | The directory the kernel exposes sensor data. | /sys/devices | no |\n| sensors_sys_depth | How deep in the tree to check for sensor data. | 10 | no |\n| sensors_source_update | If set to 1, the script will overwrite internal script functions with code generated ones. | 1 | no |\n| sensors_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no |\n| sensors_priority | The charts priority on the dashboard. | 90000 | no |\n| sensors_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |\n\n{% /details %}\n#### Examples\n\n##### Set sensors path depth\n\nSet a different sensors path depth\n\n```yaml\n# the directory the kernel keeps sensor data\n#sensors_sys_dir=\"/sys/devices\"\n\n# how deep in the tree to check for sensor data\nsensors_sys_depth=5\n\n# if set to 1, the script will overwrite internal\n# script functions with code generated ones\n# leave to 1, is faster\n#sensors_source_update=1\n\n# the data collection frequency\n# if unset, will inherit the netdata update frequency\n#sensors_update_every=\n\n# the charts priority on the dashboard\n#sensors_priority=90000\n\n# the number of retries to do in case of failure\n# before disabling the module\n#sensors_retries=10\n\n```\n", - "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n\nTo troubleshoot issues with the `sensors` collector, run the `charts.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `charts.d.plugin` to debug the collector:\n\n ```bash\n ./charts.d.plugin debug 1 sensors\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `sensors` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep sensors\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep sensors /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep sensors\n```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per sensor chip\n\nMetrics related to sensor chips. Each chip provides its own set of the following metrics.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| sensors.temp | {filename} | Celsius |\n| sensors.volt | {filename} | Volts |\n| sensors.curr | {filename} | Ampere |\n| sensors.power | {filename} | Watt |\n| sensors.fans | {filename} | Rotations / Minute |\n| sensors.energy | {filename} | Joule |\n| sensors.humidity | {filename} | Percent |\n\n", - "integration_type": "collector", - "id": "charts.d.plugin-sensors-Linux_Sensors_(sysfs)", - "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/sensors/metadata.yaml", - "related_resources": "" - }, { "meta": { "plugin_name": "cups.plugin", @@ -16517,8 +16478,8 @@ export const integrations = [ }, "most_popular": false }, - "overview": "# Linux Sensors (lm-sensors)\n\nPlugin: go.d.plugin\nModule: sensors\n\n## Overview\n\nThis collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary.\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe following type of sensors are auto-detected:\n\n- temperature\n- fan\n- voltage\n- current\n- power\n- energy\n- humidity\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Install lm-sensors\n\n- Install `lm-sensors` using your distribution's package manager.\n- Run `sensors-detect` to detect hardware monitoring chips.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/sensors.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| binary_path | Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. | /usr/bin/sensors | yes |\n| timeout | Timeout for executing the binary, specified in seconds. | 2 | no |\n\n{% /details %}\n#### Examples\n\n##### Custom binary path\n\nThe executable is not in the directories specified in the PATH environment variable.\n\n{% details open=true summary=\"Config\" %}\n```yaml\njobs:\n - name: sensors\n binary_path: /usr/local/sbin/sensors\n\n```\n{% /details %}\n", + "overview": "# Linux Sensors (lm-sensors)\n\nPlugin: go.d.plugin\nModule: sensors\n\n## Overview\n\nThis collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary or [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface).\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe following type of sensors are auto-detected:\n\n- temperature\n- fan\n- voltage\n- current\n- power\n- energy\n- humidity\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\nNo action required.\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/sensors.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| binary_path | Path to the `sensors` binary. If left empty or if the binary is not found, [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface) will be used to collect sensor statistics. | /usr/bin/sensors | yes |\n| timeout | Timeout for executing the binary, specified in seconds. | 2 | no |\n\n{% /details %}\n#### Examples\n\n##### Custom binary path\n\nThe executable is not in the directories specified in the PATH environment variable.\n\n{% details open=true summary=\"Config\" %}\n```yaml\njobs:\n - name: sensors\n binary_path: /usr/local/sbin/sensors\n\n```\n{% /details %}\n##### Use sysfs instead of sensors\n\nSet `binary_path` to an empty string to use sysfs.\n\n{% details open=true summary=\"Config\" %}\n```yaml\njobs:\n - name: sensors\n binary_path: \"\"\n\n```\n{% /details %}\n", "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n**Important**: Debug mode is not supported for data collection jobs created via the UI using the Dyncfg feature.\n\nTo troubleshoot issues with the `sensors` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m sensors\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `sensors` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep sensors\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep sensors /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep sensors\n```\n\n", "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per sensor\n\nThese metrics refer to the sensor.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| chip | The hardware component responsible for the sensor monitoring. |\n| feature | The specific sensor or monitoring point provided by the chip. |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| sensors.sensor_temperature | temperature | Celsius |\n| sensors.sensor_voltage | voltage | Volts |\n| sensors.sensor_current | current | Amperes |\n| sensors.sensor_power | power | Watts |\n| sensors.sensor_fan_speed | fan | RPM |\n| sensors.sensor_energy | energy | Joules |\n| sensors.sensor_humidity | humidity | percent |\n\n", diff --git a/integrations/integrations.json b/integrations/integrations.json index 942c10f17c9dd2..21b18b4b0ddd60 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -1128,45 +1128,6 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/opensips/metadata.yaml", "related_resources": "" }, - { - "meta": { - "plugin_name": "charts.d.plugin", - "module_name": "sensors", - "monitored_instance": { - "name": "Linux Sensors (sysfs)", - "link": "https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface", - "categories": [ - "data-collection.hardware-devices-and-sensors" - ], - "icon_filename": "microchip.svg" - }, - "related_resources": { - "integrations": { - "list": [] - } - }, - "info_provided_to_referring_integrations": { - "description": "" - }, - "keywords": [ - "sensors", - "sysfs", - "hwmon", - "rpi", - "raspberry pi" - ], - "most_popular": false - }, - "overview": "# Linux Sensors (sysfs)\n\nPlugin: charts.d.plugin\nModule: sensors\n\n## Overview\n\nUse this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures).\nFor all other cases use the [Go collector](/src/go/plugin/go.d/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values.\"\n\n\nIt will provide charts for all configured system sensors, by reading sensors directly from the kernel.\nThe values graphed are the raw hardware values of the sensors.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nBy default, the collector will try to read entries under `/sys/devices`\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Install charts.d plugin\n\nIf [using our official native DEB/RPM packages](/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.\n\n\n#### Enable the sensors collector\n\nThe `sensors` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `charts.d.conf` file.\n\n```bash\ncd /etc/netdata # Replace this path with your Netdata config directory, if different\nsudo ./edit-config charts.d.conf\n```\n\nChange the value of the `sensors` setting to `force` and uncomment the line. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `charts.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config charts.d/sensors.conf\n```\n#### Options\n\nThe config file is sourced by the charts.d plugin. It's a standard bash file.\n\nThe following collapsed table contains all the options that can be configured for the sensors collector.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| sensors_sys_dir | The directory the kernel exposes sensor data. | /sys/devices | no |\n| sensors_sys_depth | How deep in the tree to check for sensor data. | 10 | no |\n| sensors_source_update | If set to 1, the script will overwrite internal script functions with code generated ones. | 1 | no |\n| sensors_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no |\n| sensors_priority | The charts priority on the dashboard. | 90000 | no |\n| sensors_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |\n\n#### Examples\n\n##### Set sensors path depth\n\nSet a different sensors path depth\n\n```yaml\n# the directory the kernel keeps sensor data\n#sensors_sys_dir=\"/sys/devices\"\n\n# how deep in the tree to check for sensor data\nsensors_sys_depth=5\n\n# if set to 1, the script will overwrite internal\n# script functions with code generated ones\n# leave to 1, is faster\n#sensors_source_update=1\n\n# the data collection frequency\n# if unset, will inherit the netdata update frequency\n#sensors_update_every=\n\n# the charts priority on the dashboard\n#sensors_priority=90000\n\n# the number of retries to do in case of failure\n# before disabling the module\n#sensors_retries=10\n\n```\n", - "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n\nTo troubleshoot issues with the `sensors` collector, run the `charts.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `charts.d.plugin` to debug the collector:\n\n ```bash\n ./charts.d.plugin debug 1 sensors\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `sensors` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep sensors\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep sensors /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep sensors\n```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per sensor chip\n\nMetrics related to sensor chips. Each chip provides its own set of the following metrics.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| sensors.temp | {filename} | Celsius |\n| sensors.volt | {filename} | Volts |\n| sensors.curr | {filename} | Ampere |\n| sensors.power | {filename} | Watt |\n| sensors.fans | {filename} | Rotations / Minute |\n| sensors.energy | {filename} | Joule |\n| sensors.humidity | {filename} | Percent |\n\n", - "integration_type": "collector", - "id": "charts.d.plugin-sensors-Linux_Sensors_(sysfs)", - "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/sensors/metadata.yaml", - "related_resources": "" - }, { "meta": { "plugin_name": "cups.plugin", @@ -16515,8 +16476,8 @@ }, "most_popular": false }, - "overview": "# Linux Sensors (lm-sensors)\n\nPlugin: go.d.plugin\nModule: sensors\n\n## Overview\n\nThis collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary.\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe following type of sensors are auto-detected:\n\n- temperature\n- fan\n- voltage\n- current\n- power\n- energy\n- humidity\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Install lm-sensors\n\n- Install `lm-sensors` using your distribution's package manager.\n- Run `sensors-detect` to detect hardware monitoring chips.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/sensors.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| binary_path | Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. | /usr/bin/sensors | yes |\n| timeout | Timeout for executing the binary, specified in seconds. | 2 | no |\n\n#### Examples\n\n##### Custom binary path\n\nThe executable is not in the directories specified in the PATH environment variable.\n\n```yaml\njobs:\n - name: sensors\n binary_path: /usr/local/sbin/sensors\n\n```\n", + "overview": "# Linux Sensors (lm-sensors)\n\nPlugin: go.d.plugin\nModule: sensors\n\n## Overview\n\nThis collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary or [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface).\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe following type of sensors are auto-detected:\n\n- temperature\n- fan\n- voltage\n- current\n- power\n- energy\n- humidity\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\nNo action required.\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/sensors.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| binary_path | Path to the `sensors` binary. If left empty or if the binary is not found, [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface) will be used to collect sensor statistics. | /usr/bin/sensors | yes |\n| timeout | Timeout for executing the binary, specified in seconds. | 2 | no |\n\n#### Examples\n\n##### Custom binary path\n\nThe executable is not in the directories specified in the PATH environment variable.\n\n```yaml\njobs:\n - name: sensors\n binary_path: /usr/local/sbin/sensors\n\n```\n##### Use sysfs instead of sensors\n\nSet `binary_path` to an empty string to use sysfs.\n\n```yaml\njobs:\n - name: sensors\n binary_path: \"\"\n\n```\n", "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n**Important**: Debug mode is not supported for data collection jobs created via the UI using the Dyncfg feature.\n\nTo troubleshoot issues with the `sensors` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m sensors\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `sensors` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep sensors\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep sensors /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep sensors\n```\n\n", "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per sensor\n\nThese metrics refer to the sensor.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| chip | The hardware component responsible for the sensor monitoring. |\n| feature | The specific sensor or monitoring point provided by the chip. |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| sensors.sensor_temperature | temperature | Celsius |\n| sensors.sensor_voltage | voltage | Volts |\n| sensors.sensor_current | current | Amperes |\n| sensors.sensor_power | power | Watts |\n| sensors.sensor_fan_speed | fan | RPM |\n| sensors.sensor_energy | energy | Joules |\n| sensors.sensor_humidity | humidity | percent |\n\n", diff --git a/packaging/version b/packaging/version index 8628f5416ae9ab..928b4707aaf6c2 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.99.0-27-nightly +v1.99.0-53-nightly diff --git a/packaging/windows/installer.nsi b/packaging/windows/installer.nsi index 88d160a1d37192..3268d8a786a8de 100644 --- a/packaging/windows/installer.nsi +++ b/packaging/windows/installer.nsi @@ -34,8 +34,14 @@ var startMsys var hCloudToken var cloudToken -var hCloudRoom -var cloudRoom +var hCloudRooms +var cloudRooms +var hProxy +var proxy +var hInsecure +var insecure + +var avoidClaim Function .onInit nsExec::ExecToLog '$SYSDIR\sc.exe stop Netdata' @@ -46,6 +52,8 @@ Function .onInit ${EndIf} StrCpy $startMsys ${BST_UNCHECKED} + StrCpy $insecure ${BST_UNCHECKED} + StrCpy $avoidClaim ${BST_UNCHECKED} FunctionEnd Function NetdataConfigPage @@ -57,40 +65,76 @@ Function NetdataConfigPage Abort ${EndIf} - ${NSD_CreateLabel} 0 0 100% 12u "Enter your Token and Cloud Room." + IfFileExists "$INSTDIR\etc\netdata\claim.conf" NotNeeded + + ${NSD_CreateLabel} 0 0 100% 12u "Enter your Token and Cloud Room(s)." ${NSD_CreateLabel} 0 15% 100% 12u "Optionally, you can open a terminal to execute additional commands." - ${NSD_CreateLabel} 0 35% 20% 10% "Token" + ${NSD_CreateLabel} 0 30% 20% 10% "Token" Pop $0 - ${NSD_CreateText} 21% 35% 79% 10% "" + ${NSD_CreateText} 21% 30% 79% 10% "" Pop $hCloudToken - ${NSD_CreateLabel} 0 55% 20% 10% "Room" + ${NSD_CreateLabel} 0 45% 20% 10% "Room(s)" Pop $0 - ${NSD_CreateText} 21% 55% 79% 10% "" - Pop $hCloudRoom + ${NSD_CreateText} 21% 45% 79% 10% "" + Pop $hCloudRooms + + ${NSD_CreateLabel} 0 60% 20% 10% "Proxy" + Pop $0 + ${NSD_CreateText} 21% 60% 79% 10% "" + Pop $hProxy + + ${NSD_CreateCheckbox} 0 75% 100% 10u "Insecure connection" + Pop $hInsecure - ${NSD_CreateCheckbox} 0 70% 100% 10u "Open terminal" + ${NSD_CreateCheckbox} 0 90% 100% 10u "Open terminal" Pop $hStartMsys + Goto EndDialogDraw + + NotNeeded: + StrCpy $avoidClaim ${BST_CHECKED} + ${NSD_CreateLabel} 0 0 100% 12u "Your host has already been claimed. You can proceed with the update." + + EndDialogDraw: nsDialogs::Show FunctionEnd Function NetdataConfigLeave - ${NSD_GetText} $hCloudToken $cloudToken - ${NSD_GetText} $hCloudRoom $cloudRoom - ${NSD_GetState} $hStartMsys $startMsys - - StrLen $0 $cloudToken - StrLen $1 $cloudRoom - ${If} $0 == 125 - ${AndIf} $0 == 36 - # We should start our new claiming software here - MessageBox MB_OK "$cloudToken | $cloudRoom | $startMsys" + ${If} $avoidClaim == ${BST_UNCHECKED} + ${NSD_GetText} $hCloudToken $cloudToken + ${NSD_GetText} $hCloudRooms $cloudRooms + ${NSD_GetText} $hProxy $proxy + ${NSD_GetState} $hStartMsys $startMsys + ${NSD_GetState} $hInsecure $insecure + + StrLen $0 $cloudToken + StrLen $1 $cloudRooms + ${If} $0 == 0 + ${OrIf} $1 == 0 + Goto runMsys + ${EndIf} + + ${If} $0 == 135 + ${AndIf} $1 >= 36 + nsExec::ExecToLog '$INSTDIR\usr\bin\netdata_claim.exe /T $cloudToken /R $cloudRooms /P $proxy /I $insecure' + pop $0 + ${Else} + MessageBox MB_OK "The Cloud information does not have the expected length." + ${EndIf} + + runMsys: + ${If} $startMsys == ${BST_CHECKED} + nsExec::ExecToLog '$INSTDIR\msys2.exe' + pop $0 + ${EndIf} ${EndIf} - ${If} $startMsys == 1 - nsExec::ExecToLog '$INSTDIR\msys2.exe' - pop $0 + ClearErrors + nsExec::ExecToLog '$SYSDIR\sc.exe start Netdata' + pop $0 + ${If} $0 != 0 + MessageBox MB_OK "Warning: Failed to start Netdata service." ${EndIf} FunctionEnd @@ -152,13 +196,6 @@ Section "Install Netdata" DetailPrint "Warning: Failed to add Netdata service description." ${EndIf} - ClearErrors - nsExec::ExecToLog '$SYSDIR\sc.exe start Netdata' - pop $0 - ${If} $0 != 0 - DetailPrint "Warning: Failed to start Netdata service." - ${EndIf} - WriteUninstaller "$INSTDIR\Uninstall.exe" Call NetdataUninstallRegistry diff --git a/packaging/windows/resources/netdata_claim.manifest b/packaging/windows/resources/netdata_claim.manifest new file mode 100644 index 00000000000000..f0092df426403f --- /dev/null +++ b/packaging/windows/resources/netdata_claim.manifest @@ -0,0 +1,16 @@ + + + + Netdata Claim! + + + + + + + + + diff --git a/packaging/windows/resources/netdata_claim.rc b/packaging/windows/resources/netdata_claim.rc new file mode 100644 index 00000000000000..7ba02833acdc9b --- /dev/null +++ b/packaging/windows/resources/netdata_claim.rc @@ -0,0 +1,3 @@ +#include "winuser.h" +1 RT_MANIFEST "netdata_claim.manifest" +11 ICON "../NetdataWhite.ico" diff --git a/src/aclk/aclk.c b/src/aclk/aclk.c index 9eb1da89424155..10d366f8cd09d1 100644 --- a/src/aclk/aclk.c +++ b/src/aclk/aclk.c @@ -839,16 +839,16 @@ void *aclk_main(void *ptr) void aclk_host_state_update(RRDHOST *host, int cmd, int queryable) { - nd_uuid_t node_id; + ND_UUID node_id; if (!aclk_online()) return; - if (!uuid_is_null(host->node_id)) { - uuid_copy(node_id, host->node_id); + if (!UUIDiszero(host->node_id)) { + node_id = host->node_id; } else { - int ret = get_node_id(&host->host_uuid, &node_id); + int ret = get_node_id(&host->host_id.uuid, &node_id.uuid); if (ret > 0) { // this means we were not able to check if node_id already present netdata_log_error("Unable to check for node_id. Ignoring the host state update."); @@ -887,7 +887,7 @@ void aclk_host_state_update(RRDHOST *host, int cmd, int queryable) .session_id = aclk_session_newarch }; node_state_update.node_id = mallocz(UUID_STR_LEN); - uuid_unparse_lower(node_id, (char*)node_state_update.node_id); + uuid_unparse_lower(node_id.uuid, (char*)node_state_update.node_id); node_state_update.capabilities = aclk_get_agent_capas(); @@ -1059,12 +1059,12 @@ char *aclk_state(void) else buffer_strcat(wb, "null"); - if (uuid_is_null(host->node_id)) + if (UUIDiszero(host->node_id)) buffer_strcat(wb, "\n\tNode ID: null\n"); else { - char node_id[GUID_LEN + 1]; - uuid_unparse_lower(host->node_id, node_id); - buffer_sprintf(wb, "\n\tNode ID: %s\n", node_id); + char node_id_str[UUID_STR_LEN]; + uuid_unparse_lower(host->node_id.uuid, node_id_str); + buffer_sprintf(wb, "\n\tNode ID: %s\n", node_id_str); } buffer_sprintf(wb, "\tStreaming Hops: %d\n\tRelationship: %s", host->system_info->hops, host == localhost ? "self" : "child"); @@ -1192,12 +1192,12 @@ char *aclk_state_json(void) } else json_object_object_add(nodeinstance, "claimed_id", NULL); - if (uuid_is_null(host->node_id)) { + if (UUIDiszero(host->node_id)) { json_object_object_add(nodeinstance, "node-id", NULL); } else { - char node_id[GUID_LEN + 1]; - uuid_unparse_lower(host->node_id, node_id); - tmp = json_object_new_string(node_id); + char node_id_str[UUID_STR_LEN]; + uuid_unparse_lower(host->node_id.uuid, node_id_str); + tmp = json_object_new_string(node_id_str); json_object_object_add(nodeinstance, "node-id", tmp); } diff --git a/src/aclk/aclk_otp.c b/src/aclk/aclk_otp.c index e7530a63157225..49f6f5ff1914dd 100644 --- a/src/aclk/aclk_otp.c +++ b/src/aclk/aclk_otp.c @@ -1,12 +1,9 @@ - // SPDX-License-Identifier: GPL-3.0-or-later #include "aclk_otp.h" #include "aclk_util.h" #include "aclk.h" -#include "mqtt_websockets/c-rbuf/cringbuffer.h" - static int aclk_https_request(https_req_t *request, https_req_response_t *response) { int rc; // wrapper for ACLK only which loads ACLK specific proxy settings diff --git a/src/aclk/helpers/mqtt_wss_pal.h b/src/aclk/helpers/mqtt_wss_pal.h deleted file mode 100644 index fe1aacf49300ac..00000000000000 --- a/src/aclk/helpers/mqtt_wss_pal.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef MQTT_WSS_PAL_H -#define MQTT_WSS_PAL_H - -#include "libnetdata/libnetdata.h" - -#undef OPENSSL_VERSION_095 -#undef OPENSSL_VERSION_097 -#undef OPENSSL_VERSION_110 -#undef OPENSSL_VERSION_111 - -#endif /* MQTT_WSS_PAL_H */ diff --git a/src/aclk/helpers/ringbuffer_pal.h b/src/aclk/helpers/ringbuffer_pal.h deleted file mode 100644 index 2f7e1cb9357f13..00000000000000 --- a/src/aclk/helpers/ringbuffer_pal.h +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef RINGBUFFER_PAL_H -#define RINGBUFFER_PAL_H - -#include "libnetdata/libnetdata.h" - -#define crbuf_malloc(...) mallocz(__VA_ARGS__) -#define crbuf_free(...) freez(__VA_ARGS__) - -#endif /* RINGBUFFER_PAL_H */ diff --git a/src/aclk/https_client.h b/src/aclk/https_client.h index bc5ca30b8dd004..ec727a45590dd3 100644 --- a/src/aclk/https_client.h +++ b/src/aclk/https_client.h @@ -5,9 +5,6 @@ #include "libnetdata/libnetdata.h" -#include "mqtt_websockets/c-rbuf/cringbuffer.h" -#include "mqtt_websockets/c_rhash/c_rhash.h" - typedef enum http_req_type { HTTP_REQ_GET = 0, HTTP_REQ_POST, diff --git a/src/aclk/mqtt_websockets/.github/workflows/run-tests.yaml b/src/aclk/mqtt_websockets/.github/workflows/run-tests.yaml deleted file mode 100644 index da5dde821b13ff..00000000000000 --- a/src/aclk/mqtt_websockets/.github/workflows/run-tests.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: run-tests -on: - push: - schedule: - - cron: '5 3 * * 0' - pull_request: -jobs: - run-tests: - runs-on: ubuntu-latest - steps: - - name: Install ruby and deps - run: sudo apt-get install ruby ruby-dev mosquitto - - name: Checkout - uses: actions/checkout@v2 diff --git a/src/aclk/mqtt_websockets/.gitignore b/src/aclk/mqtt_websockets/.gitignore deleted file mode 100644 index 9f1a0d89a31f00..00000000000000 --- a/src/aclk/mqtt_websockets/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -build/* -!build/.keep -test -.vscode -mqtt/mqtt.c -mqtt/include/mqtt.h -libmqttwebsockets.* -*.o -.dirstamp -.deps diff --git a/src/aclk/mqtt_websockets/c-rbuf/cringbuffer_internal.h b/src/aclk/mqtt_websockets/c-rbuf/cringbuffer_internal.h deleted file mode 100644 index d32de187ce3bf5..00000000000000 --- a/src/aclk/mqtt_websockets/c-rbuf/cringbuffer_internal.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only - -#ifndef CRINGBUFFER_INTERNAL_H -#define CRINGBUFFER_INTERNAL_H - -struct rbuf_t { - char *data; - - // points to next byte where we can write - char *head; - // points to oldest (next to be poped) readable byte - char *tail; - - // to avoid calculating data + size - // all the time - char *end; - - size_t size; - size_t size_data; -}; - -/* this exists so that it can be tested by unit tests - * without optimization that resets head and tail to - * beginning if buffer empty - */ -inline static int rbuf_bump_tail_noopt(rbuf_t buffer, size_t bytes) -{ - if (bytes > buffer->size_data) - return 0; - int i = buffer->tail - buffer->data; - buffer->tail = &buffer->data[(i + bytes) % buffer->size]; - buffer->size_data -= bytes; - - return 1; -} - -#endif diff --git a/src/aclk/mqtt_websockets/c-rbuf/ringbuffer_test.c b/src/aclk/mqtt_websockets/c-rbuf/ringbuffer_test.c deleted file mode 100644 index 6a17c99567c098..00000000000000 --- a/src/aclk/mqtt_websockets/c-rbuf/ringbuffer_test.c +++ /dev/null @@ -1,485 +0,0 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only - -#include "ringbuffer.h" - -// to be able to access internals -// never do this from app -#include "../src/ringbuffer_internal.h" - -#include -#include - -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" -#define KGRN "\x1B[32m" -#define KYEL "\x1B[33m" -#define KBLU "\x1B[34m" -#define KMAG "\x1B[35m" -#define KCYN "\x1B[36m" -#define KWHT "\x1B[37m" - -#define UNUSED(x) (void)(x) - -int total_fails = 0; -int total_tests = 0; -int total_checks = 0; - -#define CHECK_EQ_RESULT(x, y) \ - while (s_len--) \ - putchar('.'); \ - printf("%s%s " KNRM "\n", (((x) == (y)) ? KGRN : KRED), (((x) == (y)) ? " PASS " : " FAIL ")); \ - if ((x) != (y)) \ - total_fails++; \ - total_checks++; - -#define CHECK_EQ_PREFIX(x, y, prefix, subtest_name, ...) \ - { \ - int s_len = \ - 100 - \ - printf(("Checking: " KWHT "%s %s%2d " subtest_name " " KNRM), __func__, prefix, subtest_no, ##__VA_ARGS__); \ - CHECK_EQ_RESULT(x, y) \ - } - -#define CHECK_EQ(x, y, subtest_name, ...) \ - { \ - int s_len = \ - 100 - printf(("Checking: " KWHT "%s %2d " subtest_name " " KNRM), __func__, subtest_no, ##__VA_ARGS__); \ - CHECK_EQ_RESULT(x, y) \ - } - -#define TEST_DECL() \ - int subtest_no = 0; \ - printf(KYEL "TEST SUITE: %s\n" KNRM, __func__); \ - total_tests++; - -static void test_rbuf_get_linear_insert_range() -{ - TEST_DECL(); - - // check empty buffer behaviour - rbuf_t buff = rbuf_create(5); - char *to_write; - size_t ret; - to_write = rbuf_get_linear_insert_range(buff, &ret); - CHECK_EQ(ret, 5, "empty size"); - CHECK_EQ(to_write, buff->head, "empty write ptr"); - rbuf_free(buff); - - // check full buffer behaviour - subtest_no++; - buff = rbuf_create(5); - ret = rbuf_bump_head(buff, 5); - CHECK_EQ(ret, 1, "ret"); - to_write = rbuf_get_linear_insert_range(buff, &ret); - CHECK_EQ(to_write, NULL, "writable NULL"); - CHECK_EQ(ret, 0, "writable count = 0"); - - // check buffer flush - subtest_no++; - rbuf_flush(buff); - CHECK_EQ(rbuf_bytes_free(buff), 5, "size_free"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check behaviour head > tail - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 3); - to_write = rbuf_get_linear_insert_range(buff, &ret); - CHECK_EQ(to_write, buff->head, "write location"); - CHECK_EQ(ret, 2, "availible to linear write"); - - // check behaviour tail > head - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 5); - rbuf_bump_tail(buff, 3); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 3, "tail_ptr"); - to_write = rbuf_get_linear_insert_range(buff, &ret); - CHECK_EQ(to_write, buff->head, "write location"); - CHECK_EQ(ret, 3, "availible to linear write"); - -/* // check behaviour tail and head at last element - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 4); - rbuf_bump_tail(buff, 4); - CHECK_EQ(buff->head, buff->end - 1, "head_ptr"); - CHECK_EQ(buff->tail, buff->end - 1, "tail_ptr"); - to_write = rbuf_get_linear_insert_range(buff, &ret); - CHECK_EQ(to_write, buff->head, "write location"); - CHECK_EQ(ret, 1, "availible to linear write");*/ - - // check behaviour tail and head at last element - // after rbuf_bump_tail optimisation that restarts buffer - // in case tail catches up with head - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 4); - rbuf_bump_tail(buff, 4); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - to_write = rbuf_get_linear_insert_range(buff, &ret); - CHECK_EQ(to_write, buff->head, "write location"); - CHECK_EQ(ret, 5, "availible to linear write"); -} - -#define _CHECK_EQ(x, y, subtest_name, ...) CHECK_EQ_PREFIX(x, y, prefix, subtest_name, ##__VA_ARGS__) -#define _PREFX "(size = %5zu) " -static void test_rbuf_bump_head_bsize(size_t size) -{ - char prefix[16]; - snprintf(prefix, 16, _PREFX, size); - int subtest_no = 0; - rbuf_t buff = rbuf_create(size); - _CHECK_EQ(rbuf_bytes_free(buff), size, "size_free"); - - subtest_no++; - int ret = rbuf_bump_head(buff, size); - _CHECK_EQ(buff->data, buff->head, "loc"); - _CHECK_EQ(ret, 1, "ret"); - _CHECK_EQ(buff->size_data, buff->size, "size"); - _CHECK_EQ(rbuf_bytes_free(buff), 0, "size_free"); - - subtest_no++; - ret = rbuf_bump_head(buff, 1); - _CHECK_EQ(buff->data, buff->head, "loc no move"); - _CHECK_EQ(ret, 0, "ret error"); - _CHECK_EQ(buff->size_data, buff->size, "size"); - _CHECK_EQ(rbuf_bytes_free(buff), 0, "size_free"); - rbuf_free(buff); - - subtest_no++; - buff = rbuf_create(size); - ret = rbuf_bump_head(buff, size - 1); - _CHECK_EQ(buff->head, buff->end-1, "loc end"); - rbuf_free(buff); -} -#undef _CHECK_EQ - -static void test_rbuf_bump_head() -{ - TEST_DECL(); - UNUSED(subtest_no); - - size_t test_sizes[] = { 1, 2, 3, 5, 6, 7, 8, 100, 99999, 0 }; - for (int i = 0; test_sizes[i]; i++) - test_rbuf_bump_head_bsize(test_sizes[i]); -} - -static void test_rbuf_bump_tail_noopt(int subtest_no) -{ - rbuf_t buff = rbuf_create(10); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - - subtest_no++; - int ret = rbuf_bump_head(buff, 5); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_free(buff), 5, "size_free"); - CHECK_EQ(rbuf_bytes_available(buff), 5, "size_avail"); - CHECK_EQ(buff->head, buff->data + 5, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 2); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 3, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 7, "size_free"); - CHECK_EQ(buff->head, buff->data + 5, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 2, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 3); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(buff->head, buff->data + 5, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 5, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 1); - CHECK_EQ(ret, 0, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(buff->head, buff->data + 5, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 5, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_head(buff, 7); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 7, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 3, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 5, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 5); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 2, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 8, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check tail can't overrun head - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 3); - CHECK_EQ(ret, 0, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 2, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 8, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check head can't overrun tail - subtest_no++; - ret = rbuf_bump_head(buff, 9); - CHECK_EQ(ret, 0, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 2, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 8, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check head can fill the buffer - subtest_no++; - ret = rbuf_bump_head(buff, 8); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 10, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 0, "size_free"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check can empty the buffer - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 10); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); -} - -static void test_rbuf_bump_tail_opt(int subtest_no) -{ - subtest_no++; - rbuf_t buff = rbuf_create(10); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - - subtest_no++; - int ret = rbuf_bump_head(buff, 5); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_free(buff), 5, "size_free"); - CHECK_EQ(rbuf_bytes_available(buff), 5, "size_avail"); - CHECK_EQ(buff->head, buff->data + 5, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail(buff, 2); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 3, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 7, "size_free"); - CHECK_EQ(buff->head, buff->data + 5, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 2, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail(buff, 3); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail_noopt(buff, 1); - CHECK_EQ(ret, 0, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_head(buff, 6); - ret = rbuf_bump_tail(buff, 5); - ret = rbuf_bump_head(buff, 6); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 7, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 3, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data + 5, "tail_ptr"); - - subtest_no++; - ret = rbuf_bump_tail(buff, 5); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 2, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 8, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check tail can't overrun head - subtest_no++; - ret = rbuf_bump_tail(buff, 3); - CHECK_EQ(ret, 0, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 2, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 8, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check head can't overrun tail - subtest_no++; - ret = rbuf_bump_head(buff, 9); - CHECK_EQ(ret, 0, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 2, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 8, "size_free"); - CHECK_EQ(buff->head, buff->data + 2, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check head can fill the buffer - subtest_no++; - ret = rbuf_bump_head(buff, 8); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 10, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 0, "size_free"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - - // check can empty the buffer - subtest_no++; - ret = rbuf_bump_tail(buff, 10); - CHECK_EQ(ret, 1, "ret"); - CHECK_EQ(rbuf_bytes_available(buff), 0, "size_avail"); - CHECK_EQ(rbuf_bytes_free(buff), 10, "size_free"); - CHECK_EQ(buff->head, buff->data, "head_ptr"); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); -} - -static void test_rbuf_bump_tail() -{ - TEST_DECL(); - test_rbuf_bump_tail_noopt(subtest_no); - test_rbuf_bump_tail_opt(subtest_no); -} - -#define ASCII_A 0x61 -#define ASCII_Z 0x7A -#define TEST_DATA_SIZE ASCII_Z-ASCII_A+1 -static void test_rbuf_push() -{ - TEST_DECL(); - rbuf_t buff = rbuf_create(10); - int i; - char test_data[TEST_DATA_SIZE]; - - for (int i = 0; i <= TEST_DATA_SIZE; i++) - test_data[i] = i + ASCII_A; - - int ret = rbuf_push(buff, test_data, 10); - CHECK_EQ(ret, 10, "written 10 bytes"); - CHECK_EQ(rbuf_bytes_free(buff), 0, "empty size == 0"); - for (i = 0; i < 10; i++) - CHECK_EQ(buff->data[i], i + ASCII_A, "Check data"); - - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 5); - rbuf_bump_tail_noopt(buff, 5); //to not reset both pointers to beginning - ret = rbuf_push(buff, test_data, 10); - CHECK_EQ(ret, 10, "written 10 bytes"); - for (i = 0; i < 10; i++) - CHECK_EQ(buff->data[i], ((i+5)%10) + ASCII_A, "Check Data"); - - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 9); - rbuf_bump_tail_noopt(buff, 9); - ret = rbuf_push(buff, test_data, 10); - CHECK_EQ(ret, 10, "written 10 bytes"); - for (i = 0; i < 10; i++) - CHECK_EQ(buff->data[i], ((i + 1) % 10) + ASCII_A, "Check data"); - - // let tail > head - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 9); - rbuf_bump_tail_noopt(buff, 9); - rbuf_bump_head(buff, 1); - ret = rbuf_push(buff, test_data, 9); - CHECK_EQ(ret, 9, "written 9 bytes"); - CHECK_EQ(buff->head, buff->end - 1, "head_ptr"); - CHECK_EQ(buff->tail, buff->head, "tail_ptr"); - rbuf_bump_tail(buff, 1); - //TODO push byte can be usefull optimisation - ret = rbuf_push(buff, &test_data[9], 1); - CHECK_EQ(ret, 1, "written 1 byte"); - CHECK_EQ(rbuf_bytes_free(buff), 0, "empty size == 0"); - for (i = 0; i < 10; i++) - CHECK_EQ(buff->data[i], i + ASCII_A, "Check data"); - - subtest_no++; - rbuf_flush(buff); - rbuf_bump_head(buff, 9); - rbuf_bump_tail_noopt(buff, 7); - rbuf_bump_head(buff, 1); - ret = rbuf_push(buff, test_data, 7); - CHECK_EQ(ret, 7, "written 7 bytes"); - CHECK_EQ(buff->head, buff->data + 7, "head_ptr"); - CHECK_EQ(buff->tail, buff->head, "tail_ptr"); - rbuf_bump_tail(buff, 3); - CHECK_EQ(buff->tail, buff->data, "tail_ptr"); - //TODO push byte can be usefull optimisation - ret = rbuf_push(buff, &test_data[7], 3); - CHECK_EQ(ret, 3, "written 3 bytes"); - CHECK_EQ(rbuf_bytes_free(buff), 0, "empty size == 0"); - for (i = 0; i < 10; i++) - CHECK_EQ(buff->data[i], i + ASCII_A, "Check data"); - - // test can't overfill the buffer - subtest_no++; - rbuf_flush(buff); - rbuf_push(buff, test_data, TEST_DATA_SIZE); - CHECK_EQ(ret, 3, "written 10 bytes"); - for (i = 0; i < 10; i++) - CHECK_EQ(buff->data[i], i + ASCII_A, "Check data"); -} - -#define TEST_RBUF_FIND_BYTES_SIZE 10 -void test_rbuf_find_bytes() -{ - TEST_DECL(); - rbuf_t buff = rbuf_create(TEST_RBUF_FIND_BYTES_SIZE); - char *filler_3 = " "; - char *needle = "needle"; - int idx; - char *ptr; - - // make sure needle is wrapped aroung in the buffer - // to test we still can find it - // target "edle ne" - rbuf_bump_head(buff, TEST_RBUF_FIND_BYTES_SIZE / 2); - rbuf_push(buff, filler_3, strlen(filler_3)); - rbuf_bump_tail(buff, TEST_RBUF_FIND_BYTES_SIZE / 2); - rbuf_push(buff, needle, strlen(needle)); - ptr = rbuf_find_bytes(buff, needle, strlen(needle), &idx); - CHECK_EQ(ptr, buff->data + (TEST_RBUF_FIND_BYTES_SIZE / 2) + strlen(filler_3), "Pointer to needle correct"); - CHECK_EQ(idx, ptr - buff->tail, "Check needle index"); -} - -int main() -{ - test_rbuf_bump_head(); - test_rbuf_bump_tail(); - test_rbuf_get_linear_insert_range(); - test_rbuf_push(); - test_rbuf_find_bytes(); - - printf( - KNRM "Total Tests %d, Total Checks %d, Successful Checks %d, Failed Checks %d\n", - total_tests, total_checks, total_checks - total_fails, total_fails); - if (total_fails) - printf(KRED "!!!Some test(s) Failed!!!\n"); - else - printf(KGRN "ALL TESTS PASSED\n"); - - return total_fails; -} diff --git a/src/aclk/mqtt_websockets/common_internal.h b/src/aclk/mqtt_websockets/common_internal.h index 2be1c45b88b140..d79dbb3f38c84a 100644 --- a/src/aclk/mqtt_websockets/common_internal.h +++ b/src/aclk/mqtt_websockets/common_internal.h @@ -1,27 +1,12 @@ -// SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later #ifndef COMMON_INTERNAL_H #define COMMON_INTERNAL_H #include "endian_compat.h" -#ifdef MQTT_WSS_CUSTOM_ALLOC -#include "../helpers/mqtt_wss_pal.h" -#else -#define mw_malloc(...) malloc(__VA_ARGS__) -#define mw_calloc(...) calloc(__VA_ARGS__) -#define mw_free(...) free(__VA_ARGS__) -#define mw_strdup(...) strdup(__VA_ARGS__) -#define mw_realloc(...) realloc(__VA_ARGS__) -#endif - #ifndef MQTT_WSS_FRAG_MEMALIGN #define MQTT_WSS_FRAG_MEMALIGN (8) #endif -#define OPENSSL_VERSION_095 0x00905100L -#define OPENSSL_VERSION_097 0x00907000L -#define OPENSSL_VERSION_110 0x10100000L -#define OPENSSL_VERSION_111 0x10101000L - #endif /* COMMON_INTERNAL_H */ diff --git a/src/aclk/mqtt_websockets/common_public.h b/src/aclk/mqtt_websockets/common_public.h index a855737f9e1a1c..8f3b4f7d154b52 100644 --- a/src/aclk/mqtt_websockets/common_public.h +++ b/src/aclk/mqtt_websockets/common_public.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + #ifndef MQTT_WEBSOCKETS_COMMON_PUBLIC_H #define MQTT_WEBSOCKETS_COMMON_PUBLIC_H diff --git a/src/aclk/mqtt_websockets/mqtt_ng.c b/src/aclk/mqtt_websockets/mqtt_ng.c index 8ad6bd5c98bbf9..96099aa687cd64 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.c +++ b/src/aclk/mqtt_websockets/mqtt_ng.c @@ -1,16 +1,10 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif -#include -#include -#include -#include -#include - -#include "c_rhash/c_rhash.h" +#include "libnetdata/libnetdata.h" #include "common_internal.h" #include "mqtt_constants.h" @@ -26,10 +20,8 @@ #define SMALL_STRING_DONT_FRAGMENT_LIMIT 128 -#define MIN(a,b) (((a)<(b))?(a):(b)) - -#define LOCK_HDR_BUFFER(buffer) pthread_mutex_lock(&((buffer)->mutex)) -#define UNLOCK_HDR_BUFFER(buffer) pthread_mutex_unlock(&((buffer)->mutex)) +#define LOCK_HDR_BUFFER(buffer) spinlock_lock(&((buffer)->spinlock)) +#define UNLOCK_HDR_BUFFER(buffer) spinlock_unlock(&((buffer)->spinlock)) #define BUFFER_FRAG_GARBAGE_COLLECT 0x01 // some packets can be marked for garbage collection @@ -75,7 +67,7 @@ struct transaction_buffer { // to be able to revert state easily // in case of error mid processing struct header_buffer state_backup; - pthread_mutex_t mutex; + SPINLOCK spinlock; struct buffer_fragment *sending_frag; }; @@ -578,7 +570,7 @@ static int transaction_buffer_grow(struct transaction_buffer *buf, mqtt_wss_log_ inline static int transaction_buffer_init(struct transaction_buffer *to_init, size_t size) { - pthread_mutex_init(&to_init->mutex, NULL); + spinlock_init(&to_init->spinlock); to_init->hdr_buffer.size = size; to_init->hdr_buffer.data = mallocz(size); @@ -593,7 +585,6 @@ inline static int transaction_buffer_init(struct transaction_buffer *to_init, si static void transaction_buffer_destroy(struct transaction_buffer *to_init) { buffer_purge(&to_init->hdr_buffer); - pthread_mutex_destroy(&to_init->mutex); freez(to_init->hdr_buffer.data); } diff --git a/src/aclk/mqtt_websockets/mqtt_ng.h b/src/aclk/mqtt_websockets/mqtt_ng.h index 4b0584d58cfec9..8bd7434d58dd36 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.h +++ b/src/aclk/mqtt_websockets/mqtt_ng.h @@ -1,10 +1,5 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later -#include -#include -#include - -#include "c-rbuf/cringbuffer.h" #include "common_public.h" #define MQTT_NG_MSGGEN_OK 0 diff --git a/src/aclk/mqtt_websockets/mqtt_wss_client.c b/src/aclk/mqtt_websockets/mqtt_wss_client.c index 2d231ef44ec33d..3d3933118b3073 100644 --- a/src/aclk/mqtt_websockets/mqtt_wss_client.c +++ b/src/aclk/mqtt_websockets/mqtt_wss_client.c @@ -1,27 +1,15 @@ -// SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include "libnetdata/libnetdata.h" #include "mqtt_wss_client.h" #include "mqtt_ng.h" #include "ws_client.h" #include "common_internal.h" -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - #define PIPE_READ_END 0 #define PIPE_WRITE_END 1 #define POLLFD_SOCKET 0 diff --git a/src/aclk/mqtt_websockets/mqtt_wss_client.h b/src/aclk/mqtt_websockets/mqtt_wss_client.h index 4bdea4db9f158f..bafd136d1d5a2f 100644 --- a/src/aclk/mqtt_websockets/mqtt_wss_client.h +++ b/src/aclk/mqtt_websockets/mqtt_wss_client.h @@ -1,12 +1,8 @@ -// SPDX-License-Identifier: GPL-3.0-only -// Copyright (C) 2020 Timotej Šiškovič +// SPDX-License-Identifier: GPL-3.0-or-later #ifndef MQTT_WSS_CLIENT_H #define MQTT_WSS_CLIENT_H -#include -#include //size_t - #include "mqtt_wss_log.h" #include "common_public.h" diff --git a/src/aclk/mqtt_websockets/mqtt_wss_log.c b/src/aclk/mqtt_websockets/mqtt_wss_log.c index e5da76fcfb5504..def2236af63937 100644 --- a/src/aclk/mqtt_websockets/mqtt_wss_log.c +++ b/src/aclk/mqtt_websockets/mqtt_wss_log.c @@ -1,12 +1,8 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later -#include -#include -#include -#include +#include "libnetdata/libnetdata.h" #include "mqtt_wss_log.h" -#include "common_internal.h" struct mqtt_wss_log_ctx { mqtt_wss_log_callback_t extern_log_fnc; diff --git a/src/aclk/mqtt_websockets/ws_client.c b/src/aclk/mqtt_websockets/ws_client.c index a6b9b23f3db287..ed39967ce88a61 100644 --- a/src/aclk/mqtt_websockets/ws_client.c +++ b/src/aclk/mqtt_websockets/ws_client.c @@ -1,31 +1,10 @@ -// Copyright (C) 2020 Timotej Šiškovič -// SPDX-License-Identifier: GPL-3.0-only -// -// This program is free software: you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free Software Foundation, version 3. -// -// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// See the GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License along with this program. -// If not, see . - -#include -#include -#include -#include -#include - -#include +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" #include "ws_client.h" #include "common_internal.h" -#ifdef MQTT_WEBSOCKETS_DEBUG -#include "../c-rbuf/src/ringbuffer_internal.h" -#endif - #define UNIT_LOG_PREFIX "ws_client: " #define FATAL(fmt, ...) mws_fatal(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) #define ERROR(fmt, ...) mws_error(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) diff --git a/src/aclk/mqtt_websockets/ws_client.h b/src/aclk/mqtt_websockets/ws_client.h index 0ccbd29a80a05f..75e780697b802e 100644 --- a/src/aclk/mqtt_websockets/ws_client.h +++ b/src/aclk/mqtt_websockets/ws_client.h @@ -1,14 +1,10 @@ -// SPDX-License-Identifier: GPL-3.0-only -// Copyright (C) 2020 Timotej Šiškovič +// SPDX-License-Identifier: GPL-3.0-or-later #ifndef WS_CLIENT_H #define WS_CLIENT_H -#include "c-rbuf/cringbuffer.h" #include "mqtt_wss_log.h" -#include - #define WS_CLIENT_NEED_MORE_BYTES 0x10 #define WS_CLIENT_PARSING_DONE 0x11 #define WS_CLIENT_CONNECTION_CLOSED 0x12 diff --git a/src/claim/claim.c b/src/claim/claim.c index b3c4f9e7b3bf53..03fb18c388fefa 100644 --- a/src/claim/claim.c +++ b/src/claim/claim.c @@ -166,8 +166,8 @@ bool load_claiming_state(void) { have_claimed_id = true; } - invalidate_node_instances(&localhost->host_uuid, have_claimed_id ? &uuid.uuid : NULL); - metaqueue_store_claim_id(&localhost->host_uuid, have_claimed_id ? &uuid.uuid : NULL); + invalidate_node_instances(&localhost->host_id.uuid, have_claimed_id ? &uuid.uuid : NULL); + metaqueue_store_claim_id(&localhost->host_id.uuid, have_claimed_id ? &uuid.uuid : NULL); errno_clear(); @@ -197,7 +197,7 @@ CLOUD_STATUS claim_reload_and_wait_online(void) { int ms = 0; do { status = cloud_status(); - if ((status == CLOUD_STATUS_ONLINE || status == CLOUD_STATUS_INDIRECT) && !uuid_is_null(localhost->host_uuid)) + if ((status == CLOUD_STATUS_ONLINE || status == CLOUD_STATUS_INDIRECT) && !UUIDiszero(localhost->host_id)) break; sleep_usec(50 * USEC_PER_MS); diff --git a/src/claim/cloud-status.c b/src/claim/cloud-status.c index 426c59a68c8aed..45db177e916301 100644 --- a/src/claim/cloud-status.c +++ b/src/claim/cloud-status.c @@ -32,7 +32,7 @@ CLOUD_STATUS cloud_status(void) { if(localhost->sender && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS) && stream_has_capability(localhost->sender, STREAM_CAP_NODE_ID) && - !uuid_is_null(localhost->node_id) && + !UUIDiszero(localhost->node_id) && !UUIDiszero(localhost->aclk.claim_id_of_parent)) return CLOUD_STATUS_INDIRECT; diff --git a/src/claim/netdata_claim.c b/src/claim/netdata_claim.c new file mode 100644 index 00000000000000..6091ebe91023ad --- /dev/null +++ b/src/claim/netdata_claim.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define UNICODE +#define _UNICODE +#include +#include +#include +#include +#include +#include + +#include "netdata_claim.h" + +LPWSTR token = NULL; +LPWSTR room = NULL; +LPWSTR proxy = NULL; +LPWSTR *argv = NULL; + +char *aToken = NULL; +char *aRoom = NULL; +char *aProxy = NULL; +int insecure = 0; + +LPWSTR netdata_claim_get_formatted_message(LPWSTR pMessage, ...) +{ + LPWSTR pBuffer = NULL; + + va_list args = NULL; + va_start(args, pMessage); + + FormatMessage(FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, pMessage, 0, 0, (LPWSTR)&pBuffer, + 0, &args); + va_end(args); + + return pBuffer; +} + +// Common Functions +void netdata_claim_error_exit(wchar_t *function) +{ + DWORD error = GetLastError(); + LPWSTR pMessage = L"The function %1 failed with error %2."; + LPWSTR pBuffer = netdata_claim_get_formatted_message(pMessage, function, error); + + if (pBuffer) { + MessageBoxW(NULL, pBuffer, L"Error", MB_OK|MB_ICONERROR); + LocalFree(pBuffer); + } + + ExitProcess(error); +} + +/** + * Parse Args + * + * Parse arguments identifying necessity to make a window + * + * @param argc number of arguments + * @param argv A pointer for all arguments given + * + * @return it return the number of arguments parsed. + */ +int nd_claim_parse_args(int argc, LPWSTR *argv) +{ + int i; + for (i = 1 ; i < argc; i++) { + // We are working with Microsoft, thus it does not make sense wait for only smallcase + if(wcscasecmp(L"/T", argv[i]) == 0) { + if (argc <= i + 1) + continue; + i++; + token = argv[i]; + } + + if(wcscasecmp(L"/R", argv[i]) == 0) { + if (argc <= i + 1) + continue; + i++; + room = argv[i]; + } + + if(wcscasecmp(L"/P", argv[i]) == 0) { + if (argc <= i + 1) + continue; + i++; + // Minimum IPV4 + if(wcslen(argv[i]) >= 8) { + proxy = argv[i]; + } + } + + if(wcscasecmp(L"/I", argv[i]) == 0) { + if (argc <= i + 1) + continue; + + i++; + size_t length = wcslen(argv[i]); + char *tmp = calloc(sizeof(char), length); + if (!tmp) + ExitProcess(1); + + netdata_claim_convert_str(tmp, argv[i], length - 1); + if (i < argc) + insecure = atoi(tmp); + else + insecure = 1; + + free(tmp); + } + } + + if (!token || !room) + return 0; + + return argc; +} + +static int netdata_claim_prepare_strings() +{ + if (!token || !room) + return -1; + + size_t length = wcslen(token) + 1; + aToken = calloc(sizeof(char), length); + if (!aToken) + return -1; + + netdata_claim_convert_str(aToken, token, length - 1); + + length = wcslen(room) + 1; + aRoom = calloc(sizeof(char), length - 1); + if (!aRoom) + return -1; + + netdata_claim_convert_str(aRoom, room, length - 1); + + if (proxy) { + length = wcslen(proxy) + 1; + aProxy = calloc(sizeof(char), length - 1); + if (!aProxy) + return -1; + + netdata_claim_convert_str(aProxy, proxy, length - 1); + } + return 0; +} + +static void netdata_claim_exit_callback(int signal) +{ + (void)signal; + if (aToken) + free(aToken); + + if (aRoom) + free(aRoom); + + if (aProxy) + free(aProxy); + + if (argv) + LocalFree(argv); +} + +static inline int netdata_claim_prepare_data(char *out, size_t length) +{ + char *proxyLabel = (aProxy) ? "proxy = " : "# proxy = "; + char *proxyValue = (aProxy) ? aProxy : ""; + return snprintf(out, + length, + "[global]\n url = https://app.netdata.cloud\n token = %s\n rooms = %s\n %s%s\n insecure = %s", + aToken, + aRoom, + proxyLabel, + proxyValue, + (insecure) ? "YES" : "NO" + ); +} + +static int netdata_claim_get_path(char *path) +{ + char *usrPath = { "\\usr\\bin" }; + DWORD length = GetCurrentDirectoryA(WINDOWS_MAX_PATH, path); + if (!length) { + return -1; + } + + if (strstr(path, usrPath)) { + length -= 7; + path[length] = '\0'; + } + + return 0; +} + +static void netdata_claim_write_config(char *path) +{ + char configPath[WINDOWS_MAX_PATH + 1]; + char data[WINDOWS_MAX_PATH + 1]; + snprintf(configPath, WINDOWS_MAX_PATH - 1, "%s\\etc\\netdata\\claim.conf", path); + + HANDLE hf = CreateFileA(configPath, GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (hf == INVALID_HANDLE_VALUE) + netdata_claim_error_exit(L"CreateFileA"); + + DWORD length = netdata_claim_prepare_data(data, WINDOWS_MAX_PATH); + DWORD written = 0; + + BOOL ret = WriteFile(hf, data, length, &written, NULL); + if (!ret) { + CloseHandle(hf); + netdata_claim_error_exit(L"WriteFileA"); + } + + if (length != written) + MessageBoxW(NULL, L"Cannot write claim.conf.", L"Error", MB_OK|MB_ICONERROR); + + CloseHandle(hf); +} + +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) +{ + signal(SIGABRT, netdata_claim_exit_callback); + signal(SIGINT, netdata_claim_exit_callback); + signal(SIGTERM, netdata_claim_exit_callback); + + int argc; + LPWSTR *argv = CommandLineToArgvW(GetCommandLineW(), &argc); + if (argc) + argc = nd_claim_parse_args(argc, argv); + + // When no data is given, user must to use graphic mode + int ret = 0; + if (!argc) { + ret = netdata_claim_window_loop(hInstance, nCmdShow); + } else { + if (netdata_claim_prepare_strings()) { + goto exit_claim; + } + + char basePath[WINDOWS_MAX_PATH]; + if (!netdata_claim_get_path(basePath)) { + netdata_claim_write_config(basePath); + } + } + +exit_claim: + netdata_claim_exit_callback(0); + + return ret; +} diff --git a/src/claim/netdata_claim.h b/src/claim/netdata_claim.h new file mode 100644 index 00000000000000..f05e05a5a7375c --- /dev/null +++ b/src/claim/netdata_claim.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CLAIM_H_ +# define NETDATA_CLAIM_H_ 1 + +#include +#include "netdata_claim_window.h" + +extern LPWSTR token; +extern LPWSTR room; +extern LPWSTR proxy; + +void netdata_claim_error_exit(wchar_t *function); +static inline void netdata_claim_convert_str(char *dst, wchar_t *src, size_t len) { + size_t copied = wcstombs(dst, src, len); + dst[copied] = '\0'; +} + +#endif //NETDATA_CLAIM_H_ diff --git a/src/claim/netdata_claim_window.c b/src/claim/netdata_claim_window.c new file mode 100644 index 00000000000000..5b8e81335c080d --- /dev/null +++ b/src/claim/netdata_claim_window.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define UNICODE +#define _UNICODE +#include +#include "richedit.h" +#include "tchar.h" +#include "netdata_claim.h" + +static LPCTSTR szWindowClass = _T("DesktopApp"); + +static HINSTANCE hInst; +static HWND hToken; +static HWND hRoom; + +LRESULT CALLBACK WndProc(HWND hNetdatawnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + PAINTSTRUCT ps; + HDC hdc; + LPCTSTR topMsg[] = { L" Help", + L" ", + L"In this initial version of the software, there are no fields for data", + L" entry. To claim your agent, you must use the following options:", + L" ", + L"/T TOKEN: The cloud token;", + L"/R ROOMS: A list of rooms to claim;", + L"/P PROXY: The proxy information;", + L"/I : Use insecure connection;" + }; + + switch (message) + { + case WM_PAINT: { + hdc = BeginPaint(hNetdatawnd, &ps); + + int i; + for (i = 0; i < sizeof(topMsg) / sizeof(LPCTSTR); i++) { + TextOut(hdc, 5, 5 + 15*i, topMsg[i], wcslen(topMsg[i])); + } + EndPaint(hNetdatawnd, &ps); + break; + } + case WM_COMMAND: + case WM_DESTROY: { + PostQuitMessage(0); + break; + } + default: { + return DefWindowProc(hNetdatawnd, message, wParam, lParam); + break; + } + } + + return 0; +} + +int netdata_claim_window_loop(HINSTANCE hInstance, int nCmdShow) +{ + WNDCLASSEX wcex; + + wcex.cbSize = sizeof(WNDCLASSEX); + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = WndProc; + wcex.cbClsExtra = 0; + wcex.cbWndExtra = 0; + wcex.hInstance = hInstance; + wcex.hIcon = LoadIcon(wcex.hInstance, MAKEINTRESOURCEW(11)); + wcex.hCursor = LoadCursor(NULL, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1); + wcex.lpszMenuName = NULL; + wcex.lpszClassName = szWindowClass; + wcex.hIconSm = LoadIcon(wcex.hInstance, IDI_APPLICATION); + + if (!RegisterClassEx(&wcex)) { + MessageBoxW(NULL, L"Call to RegisterClassEx failed!", L"Error", 0); + return 1; + } + + hInst = hInstance; + + HWND hNetdatawnd = CreateWindowExW(WS_EX_OVERLAPPEDWINDOW, + szWindowClass, + L"Netdata Claim", + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, CW_USEDEFAULT, + 460, 220, + NULL, + NULL, + hInstance, + NULL + ); + + if (!hNetdatawnd) { + MessageBoxW(NULL, L"Call to CreateWindow failed!", L"Error", 0); + return 1; + } + + ShowWindow(hNetdatawnd, nCmdShow); + UpdateWindow(hNetdatawnd); + + MSG msg; + while (GetMessage(&msg, NULL, 0, 0)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + + return (int) msg.wParam; +} diff --git a/src/claim/netdata_claim_window.h b/src/claim/netdata_claim_window.h new file mode 100644 index 00000000000000..583ed1cda0fcfc --- /dev/null +++ b/src/claim/netdata_claim_window.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CLAIM_WINDOW_H_ +# define NETDATA_CLAIM_WINDOW_H_ 1 + +// https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation +// https://sourceforge.net/p/mingw/mailman/mingw-users/thread/4C8FD4EB.4050503@xs4all.nl/ +#define WINDOWS_MAX_PATH 8191 + +int netdata_claim_window_loop(HINSTANCE hInstance, int nCmdShow); + +#endif //NETDATA_CLAIM_WINDOW_H_ diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index 7b3208d077ccdb..a1dbc2defaf80c 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -487,8 +487,6 @@ If you don't see the app/service you'd like to monitor in this list: - [Linux Sensors (lm-sensors)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/sensors/integrations/linux_sensors_lm-sensors.md) -- [Linux Sensors (sysfs)](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md) - - [NVML](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nvml.md) - [Nvidia GPU](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nvidia_smi/integrations/nvidia_gpu.md) diff --git a/src/collectors/charts.d.plugin/sensors/README.md b/src/collectors/charts.d.plugin/sensors/README.md deleted file mode 120000 index 7e5a416c43abee..00000000000000 --- a/src/collectors/charts.d.plugin/sensors/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/linux_sensors_sysfs.md \ No newline at end of file diff --git a/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md b/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md deleted file mode 100644 index f9221caa191e89..00000000000000 --- a/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md +++ /dev/null @@ -1,235 +0,0 @@ - - -# Linux Sensors (sysfs) - - - - - -Plugin: charts.d.plugin -Module: sensors - - - -## Overview - -Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures). -For all other cases use the [Go collector](/src/go/plugin/go.d/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." - - -It will provide charts for all configured system sensors, by reading sensors directly from the kernel. -The values graphed are the raw hardware values of the sensors. - - -This collector is only supported on the following platforms: - -- Linux - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -By default, the collector will try to read entries under `/sys/devices` - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per sensor chip - -Metrics related to sensor chips. Each chip provides its own set of the following metrics. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| sensors.temp | {filename} | Celsius | -| sensors.volt | {filename} | Volts | -| sensors.curr | {filename} | Ampere | -| sensors.power | {filename} | Watt | -| sensors.fans | {filename} | Rotations / Minute | -| sensors.energy | {filename} | Joule | -| sensors.humidity | {filename} | Percent | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Install charts.d plugin - -If [using our official native DEB/RPM packages](/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed. - - -#### Enable the sensors collector - -The `sensors` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `charts.d.conf` file. - -```bash -cd /etc/netdata # Replace this path with your Netdata config directory, if different -sudo ./edit-config charts.d.conf -``` - -Change the value of the `sensors` setting to `force` and uncomment the line. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system. - - - -### Configuration - -#### File - -The configuration file name for this integration is `charts.d/sensors.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config charts.d/sensors.conf -``` -#### Options - -The config file is sourced by the charts.d plugin. It's a standard bash file. - -The following collapsed table contains all the options that can be configured for the sensors collector. - - -
Config options - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| sensors_sys_dir | The directory the kernel exposes sensor data. | /sys/devices | no | -| sensors_sys_depth | How deep in the tree to check for sensor data. | 10 | no | -| sensors_source_update | If set to 1, the script will overwrite internal script functions with code generated ones. | 1 | no | -| sensors_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no | -| sensors_priority | The charts priority on the dashboard. | 90000 | no | -| sensors_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | - -
- -#### Examples - -##### Set sensors path depth - -Set a different sensors path depth - -```yaml -# the directory the kernel keeps sensor data -#sensors_sys_dir="/sys/devices" - -# how deep in the tree to check for sensor data -sensors_sys_depth=5 - -# if set to 1, the script will overwrite internal -# script functions with code generated ones -# leave to 1, is faster -#sensors_source_update=1 - -# the data collection frequency -# if unset, will inherit the netdata update frequency -#sensors_update_every= - -# the charts priority on the dashboard -#sensors_priority=90000 - -# the number of retries to do in case of failure -# before disabling the module -#sensors_retries=10 - -``` - - -## Troubleshooting - -### Debug Mode - - -To troubleshoot issues with the `sensors` collector, run the `charts.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `charts.d.plugin` to debug the collector: - - ```bash - ./charts.d.plugin debug 1 sensors - ``` - -### Getting Logs - -If you're encountering problems with the `sensors` collector, follow these steps to retrieve logs and identify potential issues: - -- **Run the command** specific to your system (systemd, non-systemd, or Docker container). -- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. - -#### System with systemd - -Use the following command to view logs generated since the last Netdata service restart: - -```bash -journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep sensors -``` - -#### System without systemd - -Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: - -```bash -grep sensors /var/log/netdata/collector.log -``` - -**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. - -#### Docker Container - -If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: - -```bash -docker logs netdata 2>&1 | grep sensors -``` - - diff --git a/src/collectors/charts.d.plugin/sensors/metadata.yaml b/src/collectors/charts.d.plugin/sensors/metadata.yaml deleted file mode 100644 index 9aacdd353933e5..00000000000000 --- a/src/collectors/charts.d.plugin/sensors/metadata.yaml +++ /dev/null @@ -1,182 +0,0 @@ -plugin_name: charts.d.plugin -modules: - - meta: - plugin_name: charts.d.plugin - module_name: sensors - monitored_instance: - name: Linux Sensors (sysfs) - link: "https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface" - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: "microchip.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - sensors - - sysfs - - hwmon - - rpi - - raspberry pi - most_popular: false - overview: - data_collection: - metrics_description: | - Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures). - For all other cases use the [Go collector](/src/go/plugin/go.d/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." - method_description: | - It will provide charts for all configured system sensors, by reading sensors directly from the kernel. - The values graphed are the raw hardware values of the sensors. - supported_platforms: - include: [Linux] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "By default, the collector will try to read entries under `/sys/devices`" - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: "Install charts.d plugin" - description: | - If [using our official native DEB/RPM packages](/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed. - - title: "Enable the sensors collector" - description: | - The `sensors` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `charts.d.conf` file. - - ```bash - cd /etc/netdata # Replace this path with your Netdata config directory, if different - sudo ./edit-config charts.d.conf - ``` - - Change the value of the `sensors` setting to `force` and uncomment the line. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system. - configuration: - file: - name: charts.d/sensors.conf - options: - description: | - The config file is sourced by the charts.d plugin. It's a standard bash file. - - The following collapsed table contains all the options that can be configured for the sensors collector. - folding: - title: "Config options" - enabled: true - list: - - name: sensors_sys_dir - description: The directory the kernel exposes sensor data. - default_value: "/sys/devices" - required: false - - name: sensors_sys_depth - description: How deep in the tree to check for sensor data. - default_value: 10 - required: false - - name: sensors_source_update - description: If set to 1, the script will overwrite internal script functions with code generated ones. - default_value: 1 - required: false - - name: sensors_update_every - description: The data collection frequency. If unset, will inherit the netdata update frequency. - default_value: 1 - required: false - - name: sensors_priority - description: The charts priority on the dashboard. - default_value: 90000 - required: false - - name: sensors_retries - description: The number of retries to do in case of failure before disabling the collector. - default_value: 10 - required: false - examples: - folding: - enabled: false - title: "Config" - list: - - name: Set sensors path depth - description: Set a different sensors path depth - config: | - # the directory the kernel keeps sensor data - #sensors_sys_dir="/sys/devices" - - # how deep in the tree to check for sensor data - sensors_sys_depth=5 - - # if set to 1, the script will overwrite internal - # script functions with code generated ones - # leave to 1, is faster - #sensors_source_update=1 - - # the data collection frequency - # if unset, will inherit the netdata update frequency - #sensors_update_every= - - # the charts priority on the dashboard - #sensors_priority=90000 - - # the number of retries to do in case of failure - # before disabling the module - #sensors_retries=10 - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: sensor chip - description: "Metrics related to sensor chips. Each chip provides its own set of the following metrics." - labels: [] - metrics: - - name: sensors.temp - description: Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: "{filename}" - - name: sensors.volt - description: Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: "{filename}" - - name: sensors.curr - description: Current - unit: "Ampere" - chart_type: line - dimensions: - - name: "{filename}" - - name: sensors.power - description: Power - unit: "Watt" - chart_type: line - dimensions: - - name: "{filename}" - - name: sensors.fans - description: Fans Speed - unit: "Rotations / Minute" - chart_type: line - dimensions: - - name: "{filename}" - - name: sensors.energy - description: Energy - unit: "Joule" - chart_type: area - dimensions: - - name: "{filename}" - - name: sensors.humidity - description: Humidity - unit: "Percent" - chart_type: line - dimensions: - - name: "{filename}" diff --git a/src/collectors/charts.d.plugin/sensors/sensors.chart.sh b/src/collectors/charts.d.plugin/sensors/sensors.chart.sh deleted file mode 100644 index 9576e2ab20dd44..00000000000000 --- a/src/collectors/charts.d.plugin/sensors/sensors.chart.sh +++ /dev/null @@ -1,250 +0,0 @@ -# shellcheck shell=bash -# no need for shebang - this file is loaded from charts.d.plugin -# SPDX-License-Identifier: GPL-3.0-or-later - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis -# - -# sensors docs -# https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface - -# if this chart is called X.chart.sh, then all functions and global variables -# must start with X_ - -# the directory the kernel keeps sensor data -sensors_sys_dir="${NETDATA_HOST_PREFIX}/sys/devices" - -# how deep in the tree to check for sensor data -sensors_sys_depth=10 - -# if set to 1, the script will overwrite internal -# script functions with code generated ones -# leave to 1, is faster -sensors_source_update=1 - -# how frequently to collect sensor data -# the default is to collect it at every iteration of charts.d -sensors_update_every= - -sensors_priority=90000 - -declare -A sensors_excluded=() - -sensors_find_all_files() { - find "$1" -maxdepth $sensors_sys_depth -name \*_input -o -name temp 2>/dev/null -} - -sensors_find_all_dirs() { - # shellcheck disable=SC2162 - sensors_find_all_files "$1" | while read; do - dirname "$REPLY" - done | sort -u -} - -# _check is called once, to find out if this chart should be enabled or not -sensors_check() { - - # this should return: - # - 0 to enable the chart - # - 1 to disable the chart - - [ -z "$(sensors_find_all_files "$sensors_sys_dir")" ] && error "no sensors found in '$sensors_sys_dir'." && return 1 - return 0 -} - -sensors_check_files() { - # we only need sensors that report a non-zero value - # also remove not needed sensors - - local f v excluded - for f in "$@"; do - [ ! -f "$f" ] && continue - for ex in "${sensors_excluded[@]}"; do - [[ $f =~ .*$ex$ ]] && excluded='1' && break - done - - [ "$excluded" != "1" ] && v="$(cat "$f")" || v=0 - v=$((v + 1 - 1)) - [ $v -ne 0 ] && echo "$f" && continue - excluded= - - error "$f gives zero values" - done -} - -sensors_check_temp_type() { - # valid temp types are 1 to 6 - # disabled sensors have the value 0 - - local f t v - for f in "$@"; do - # shellcheck disable=SC2001 - t=$(echo "$f" | sed "s|_input$|_type|g") - [ "$f" = "$t" ] && echo "$f" && continue - [ ! -f "$t" ] && echo "$f" && continue - - v="$(cat "$t")" - v=$((v + 1 - 1)) - [ $v -ne 0 ] && echo "$f" && continue - - error "$f is disabled" - done -} - -# _create is called once, to create the charts -sensors_create() { - local path dir name x file lfile labelname device subsystem id type mode files multiplier divisor - - # we create a script with the source of the - # sensors_update() function - # - the highest speed we can achieve - - [ $sensors_source_update -eq 1 ] && echo >"$TMP_DIR/sensors.sh" "sensors_update() {" - - for path in $(sensors_find_all_dirs "$sensors_sys_dir" | sort -u); do - dir=$(basename "$path") - device= - subsystem= - id= - type= - name= - - [ -h "$path/device" ] && device=$(readlink -f "$path/device") - [ ! -z "$device" ] && device=$(basename "$device") - [ -z "$device" ] && device="$dir" - - [ -h "$path/subsystem" ] && subsystem=$(readlink -f "$path/subsystem") - [ ! -z "$subsystem" ] && subsystem=$(basename "$subsystem") - [ -z "$subsystem" ] && subsystem="$dir" - - [ -f "$path/name" ] && name=$(cat "$path/name") - [ -z "$name" ] && name="$dir" - - [ -f "$path/type" ] && type=$(cat "$path/type") - [ -z "$type" ] && type="$dir" - - id="$(fixid "$device.$subsystem.$dir")" - - debug "path='$path', dir='$dir', device='$device', subsystem='$subsystem', id='$id', name='$name'" - - for mode in temperature voltage fans power current energy humidity; do - files= - multiplier=1 - divisor=1 - algorithm="absolute" - - case $mode in - temperature) - files="$( - ls "$path"/temp*_input 2>/dev/null - ls "$path/temp" 2>/dev/null - )" - files="$(sensors_check_files "$files")" - files="$(sensors_check_temp_type "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.temp_${id}_${name}' '' 'Temperature' 'Celsius' 'temperature' 'sensors.temp' line $((sensors_priority + 1)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.temp_${id}_${name}' \$1\"" - divisor=1000 - ;; - - voltage) - files="$(ls "$path"/in*_input 2>/dev/null)" - files="$(sensors_check_files "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.volt_${id}_${name}' '' 'Voltage' 'Volts' 'voltage' 'sensors.volt' line $((sensors_priority + 2)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.volt_${id}_${name}' \$1\"" - divisor=1000 - ;; - - current) - files="$(ls "$path"/curr*_input 2>/dev/null)" - files="$(sensors_check_files "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.curr_${id}_${name}' '' 'Current' 'Ampere' 'current' 'sensors.curr' line $((sensors_priority + 3)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.curr_${id}_${name}' \$1\"" - divisor=1000 - ;; - - power) - files="$(ls "$path"/power*_input 2>/dev/null)" - files="$(sensors_check_files "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.power_${id}_${name}' '' 'Power' 'Watt' 'power' 'sensors.power' line $((sensors_priority + 4)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.power_${id}_${name}' \$1\"" - divisor=1000000 - ;; - - fans) - files="$(ls "$path"/fan*_input 2>/dev/null)" - files="$(sensors_check_files "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.fan_${id}_${name}' '' 'Fans Speed' 'Rotations / Minute' 'fans' 'sensors.fans' line $((sensors_priority + 5)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.fan_${id}_${name}' \$1\"" - ;; - - energy) - files="$(ls "$path"/energy*_input 2>/dev/null)" - files="$(sensors_check_files "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.energy_${id}_${name}' '' 'Energy' 'Joule' 'energy' 'sensors.energy' area $((sensors_priority + 6)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.energy_${id}_${name}' \$1\"" - algorithm="incremental" - divisor=1000000 - ;; - - humidity) - files="$(ls "$path"/humidity*_input 2>/dev/null)" - files="$(sensors_check_files "$files")" - [ -z "$files" ] && continue - echo "CHART 'sensors.humidity_${id}_${name}' '' 'Humidity' 'Percent' 'humidity' 'sensors.humidity' line $((sensors_priority + 7)) $sensors_update_every '' '' 'sensors'" - echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN 'sensors.humidity_${id}_${name}' \$1\"" - divisor=1000 - ;; - - *) - continue - ;; - esac - - for x in $files; do - file="$x" - fid="$(fixid "$file")" - lfile="$(basename "$file" | sed "s|_input$|_label|g")" - labelname="$(basename "$file" | sed "s|_input$||g")" - - if [ ! "$path/$lfile" = "$file" ] && [ -f "$path/$lfile" ]; then - labelname="$(cat "$path/$lfile")" - fi - - echo "DIMENSION $fid '$labelname' $algorithm $multiplier $divisor" - echo >>"$TMP_DIR/sensors.sh" "echo \"SET $fid = \"\$(< $file )" - done - - echo >>"$TMP_DIR/sensors.sh" "echo END" - done - done - - [ $sensors_source_update -eq 1 ] && echo >>"$TMP_DIR/sensors.sh" "}" - - # ok, load the function sensors_update() we created - # shellcheck source=/dev/null - [ $sensors_source_update -eq 1 ] && . "$TMP_DIR/sensors.sh" - - return 0 -} - -# _update is called continuously, to collect the values -sensors_update() { - # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see below). - - # do all the work to collect / calculate the values - # for each dimension - # remember: KEEP IT SIMPLE AND SHORT - - # shellcheck source=/dev/null - [ $sensors_source_update -eq 0 ] && . "$TMP_DIR/sensors.sh" "$1" - - return 0 -} diff --git a/src/collectors/charts.d.plugin/sensors/sensors.conf b/src/collectors/charts.d.plugin/sensors/sensors.conf deleted file mode 100644 index bcb28807d6ae85..00000000000000 --- a/src/collectors/charts.d.plugin/sensors/sensors.conf +++ /dev/null @@ -1,32 +0,0 @@ -# no need for shebang - this file is loaded from charts.d.plugin - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2018 Costa Tsaousis -# GPL v3+ - -# THIS PLUGIN IS DEPRECATED -# USE THE PYTHON.D ONE - -# the directory the kernel keeps sensor data -#sensors_sys_dir="/sys/devices" - -# how deep in the tree to check for sensor data -#sensors_sys_depth=10 - -# if set to 1, the script will overwrite internal -# script functions with code generated ones -# leave to 1, is faster -#sensors_source_update=1 - -# the data collection frequency -# if unset, will inherit the netdata update frequency -#sensors_update_every= - -# the charts priority on the dashboard -#sensors_priority=90000 - -# the number of retries to do in case of failure -# before disabling the module -#sensors_retries=10 - diff --git a/src/collectors/plugins.d/gperf-config.txt b/src/collectors/plugins.d/gperf-config.txt index 721b771b77a734..bfaa4eecc93beb 100644 --- a/src/collectors/plugins.d/gperf-config.txt +++ b/src/collectors/plugins.d/gperf-config.txt @@ -35,6 +35,8 @@ #define PLUGINSD_KEYWORD_ID_RSET 21 #define PLUGINSD_KEYWORD_ID_RSSTATE 24 +#define PLUGINSD_KEYWORD_ID_JSON 80 + #define PLUGINSD_KEYWORD_ID_DYNCFG_ENABLE 901 #define PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_MODULE 902 #define PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_JOB 903 @@ -102,11 +104,15 @@ REND, PLUGINSD_KEYWORD_ID_REND, PARSER_INIT_STRE RSET, PLUGINSD_KEYWORD_ID_RSET, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30 RSSTATE, PLUGINSD_KEYWORD_ID_RSSTATE, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31 # +# JSON +# +JSON, PLUGINSD_KEYWORD_ID_JSON, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 32 +# # obsolete - do nothing commands # -DYNCFG_ENABLE, PLUGINSD_KEYWORD_ID_DYNCFG_ENABLE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32 -DYNCFG_REGISTER_MODULE, PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_MODULE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33 -DYNCFG_REGISTER_JOB, PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34 -DYNCFG_RESET, PLUGINSD_KEYWORD_ID_DYNCFG_RESET, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35 -REPORT_JOB_STATUS, PLUGINSD_KEYWORD_ID_REPORT_JOB_STATUS, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 36 -DELETE_JOB, PLUGINSD_KEYWORD_ID_DELETE_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 37 +DYNCFG_ENABLE, PLUGINSD_KEYWORD_ID_DYNCFG_ENABLE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33 +DYNCFG_REGISTER_MODULE, PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_MODULE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34 +DYNCFG_REGISTER_JOB, PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35 +DYNCFG_RESET, PLUGINSD_KEYWORD_ID_DYNCFG_RESET, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 36 +REPORT_JOB_STATUS, PLUGINSD_KEYWORD_ID_REPORT_JOB_STATUS, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 37 +DELETE_JOB, PLUGINSD_KEYWORD_ID_DELETE_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 38 diff --git a/src/collectors/plugins.d/gperf-hashtable.h b/src/collectors/plugins.d/gperf-hashtable.h index 315e2f7c76d816..5375de83213b58 100644 --- a/src/collectors/plugins.d/gperf-hashtable.h +++ b/src/collectors/plugins.d/gperf-hashtable.h @@ -67,6 +67,8 @@ #define PLUGINSD_KEYWORD_ID_RSET 21 #define PLUGINSD_KEYWORD_ID_RSSTATE 24 +#define PLUGINSD_KEYWORD_ID_JSON 80 + #define PLUGINSD_KEYWORD_ID_DYNCFG_ENABLE 901 #define PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_MODULE 902 #define PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_JOB 903 @@ -75,12 +77,12 @@ #define PLUGINSD_KEYWORD_ID_DELETE_JOB 906 -#define GPERF_PARSER_TOTAL_KEYWORDS 37 +#define GPERF_PARSER_TOTAL_KEYWORDS 38 #define GPERF_PARSER_MIN_WORD_LENGTH 3 #define GPERF_PARSER_MAX_WORD_LENGTH 22 -#define GPERF_PARSER_MIN_HASH_VALUE 7 -#define GPERF_PARSER_MAX_HASH_VALUE 52 -/* maximum key range = 46, duplicates = 0 */ +#define GPERF_PARSER_MIN_HASH_VALUE 4 +#define GPERF_PARSER_MAX_HASH_VALUE 53 +/* maximum key range = 50, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -94,32 +96,32 @@ gperf_keyword_hash_function (register const char *str, register size_t len) { static const unsigned char asso_values[] = { - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 6, 24, 3, 9, 6, - 0, 53, 3, 27, 53, 53, 33, 53, 42, 0, - 53, 53, 0, 30, 53, 12, 3, 53, 9, 0, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53 + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 31, 28, 2, 4, 0, + 5, 54, 0, 25, 20, 54, 17, 54, 27, 0, + 54, 54, 1, 16, 54, 15, 0, 54, 2, 0, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; } @@ -130,92 +132,94 @@ static const PARSER_KEYWORD gperf_keywords[] = {(char*)0,0,PARSER_INIT_PLUGINSD,0}, {(char*)0,0,PARSER_INIT_PLUGINSD,0}, {(char*)0,0,PARSER_INIT_PLUGINSD,0}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 67 "gperf-config.txt" +#line 69 "gperf-config.txt" {"HOST", PLUGINSD_KEYWORD_ID_HOST, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 4}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 87 "gperf-config.txt" - {"CONFIG", PLUGINSD_KEYWORD_ID_CONFIG, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 21}, -#line 101 "gperf-config.txt" +#line 103 "gperf-config.txt" {"REND", PLUGINSD_KEYWORD_ID_REND, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29}, -#line 75 "gperf-config.txt" +#line 68 "gperf-config.txt" + {"EXIT", PLUGINSD_KEYWORD_ID_EXIT, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3}, +#line 77 "gperf-config.txt" {"CHART", PLUGINSD_KEYWORD_ID_CHART, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 9}, -#line 84 "gperf-config.txt" +#line 89 "gperf-config.txt" + {"CONFIG", PLUGINSD_KEYWORD_ID_CONFIG, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 21}, +#line 86 "gperf-config.txt" {"OVERWRITE", PLUGINSD_KEYWORD_ID_OVERWRITE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 18}, -#line 70 "gperf-config.txt" +#line 72 "gperf-config.txt" {"HOST_LABEL", PLUGINSD_KEYWORD_ID_HOST_LABEL, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 7}, -#line 68 "gperf-config.txt" +#line 70 "gperf-config.txt" {"HOST_DEFINE", PLUGINSD_KEYWORD_ID_HOST_DEFINE, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 5}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 100 "gperf-config.txt" +#line 102 "gperf-config.txt" {"RDSTATE", PLUGINSD_KEYWORD_ID_RDSTATE, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28}, -#line 86 "gperf-config.txt" - {"VARIABLE", PLUGINSD_KEYWORD_ID_VARIABLE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 20}, -#line 69 "gperf-config.txt" - {"HOST_DEFINE_END", PLUGINSD_KEYWORD_ID_HOST_DEFINE_END, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 6}, -#line 66 "gperf-config.txt" - {"EXIT", PLUGINSD_KEYWORD_ID_EXIT, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3}, -#line 80 "gperf-config.txt" - {"FUNCTION", PLUGINSD_KEYWORD_ID_FUNCTION, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 14}, -#line 110 "gperf-config.txt" - {"DYNCFG_RESET", PLUGINSD_KEYWORD_ID_DYNCFG_RESET, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35}, -#line 107 "gperf-config.txt" - {"DYNCFG_ENABLE", PLUGINSD_KEYWORD_ID_DYNCFG_ENABLE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32}, -#line 111 "gperf-config.txt" - {"REPORT_JOB_STATUS", PLUGINSD_KEYWORD_ID_REPORT_JOB_STATUS, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 36}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 112 "gperf-config.txt" - {"DELETE_JOB", PLUGINSD_KEYWORD_ID_DELETE_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 37}, -#line 98 "gperf-config.txt" - {"CHART_DEFINITION_END", PLUGINSD_KEYWORD_ID_CHART_DEFINITION_END, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 26}, {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 109 "gperf-config.txt" - {"DYNCFG_REGISTER_JOB", PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34}, -#line 82 "gperf-config.txt" - {"FUNCTION_PROGRESS", PLUGINSD_KEYWORD_ID_FUNCTION_PROGRESS, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16}, -#line 99 "gperf-config.txt" - {"RBEGIN", PLUGINSD_KEYWORD_ID_RBEGIN, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, -#line 108 "gperf-config.txt" - {"DYNCFG_REGISTER_MODULE", PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_MODULE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 81 "gperf-config.txt" - {"FUNCTION_RESULT_BEGIN", PLUGINSD_KEYWORD_ID_FUNCTION_RESULT_BEGIN, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}, -#line 102 "gperf-config.txt" +#line 118 "gperf-config.txt" + {"DELETE_JOB", PLUGINSD_KEYWORD_ID_DELETE_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 38}, +#line 71 "gperf-config.txt" + {"HOST_DEFINE_END", PLUGINSD_KEYWORD_ID_HOST_DEFINE_END, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 6}, +#line 116 "gperf-config.txt" + {"DYNCFG_RESET", PLUGINSD_KEYWORD_ID_DYNCFG_RESET, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 36}, +#line 113 "gperf-config.txt" + {"DYNCFG_ENABLE", PLUGINSD_KEYWORD_ID_DYNCFG_ENABLE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33}, +#line 117 "gperf-config.txt" + {"REPORT_JOB_STATUS", PLUGINSD_KEYWORD_ID_REPORT_JOB_STATUS, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 37}, +#line 87 "gperf-config.txt" + {"SET", PLUGINSD_KEYWORD_ID_SET, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19}, +#line 95 "gperf-config.txt" + {"SET2", PLUGINSD_KEYWORD_ID_SET2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, +#line 104 "gperf-config.txt" {"RSET", PLUGINSD_KEYWORD_ID_RSET, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30}, -#line 74 "gperf-config.txt" - {"BEGIN", PLUGINSD_KEYWORD_ID_BEGIN, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, -#line 92 "gperf-config.txt" - {"BEGIN2", PLUGINSD_KEYWORD_ID_BEGIN2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, -#line 103 "gperf-config.txt" +#line 100 "gperf-config.txt" + {"CHART_DEFINITION_END", PLUGINSD_KEYWORD_ID_CHART_DEFINITION_END, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 26}, +#line 115 "gperf-config.txt" + {"DYNCFG_REGISTER_JOB", PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_JOB, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35}, +#line 105 "gperf-config.txt" {"RSSTATE", PLUGINSD_KEYWORD_ID_RSSTATE, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31}, -#line 64 "gperf-config.txt" +#line 78 "gperf-config.txt" + {"CLABEL", PLUGINSD_KEYWORD_ID_CLABEL, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 10}, +#line 114 "gperf-config.txt" + {"DYNCFG_REGISTER_MODULE", PLUGINSD_KEYWORD_ID_DYNCFG_REGISTER_MODULE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34}, +#line 66 "gperf-config.txt" {"FLUSH", PLUGINSD_KEYWORD_ID_FLUSH, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1}, -#line 85 "gperf-config.txt" - {"SET", PLUGINSD_KEYWORD_ID_SET, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19}, +#line 82 "gperf-config.txt" + {"FUNCTION", PLUGINSD_KEYWORD_ID_FUNCTION, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 14}, #line 93 "gperf-config.txt" - {"SET2", PLUGINSD_KEYWORD_ID_SET2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, - {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {"CLAIMED_ID", PLUGINSD_KEYWORD_ID_CLAIMED_ID, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 22}, +#line 81 "gperf-config.txt" + {"END", PLUGINSD_KEYWORD_ID_END, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, +#line 96 "gperf-config.txt" + {"END2", PLUGINSD_KEYWORD_ID_END2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25}, +#line 79 "gperf-config.txt" + {"CLABEL_COMMIT", PLUGINSD_KEYWORD_ID_CLABEL_COMMIT, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 11}, #line 76 "gperf-config.txt" - {"CLABEL", PLUGINSD_KEYWORD_ID_CLABEL, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 10}, -#line 65 "gperf-config.txt" + {"BEGIN", PLUGINSD_KEYWORD_ID_BEGIN, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, +#line 94 "gperf-config.txt" + {"BEGIN2", PLUGINSD_KEYWORD_ID_BEGIN2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, +#line 101 "gperf-config.txt" + {"RBEGIN", PLUGINSD_KEYWORD_ID_RBEGIN, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, +#line 67 "gperf-config.txt" {"DISABLE", PLUGINSD_KEYWORD_ID_DISABLE, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2}, -#line 83 "gperf-config.txt" - {"LABEL", PLUGINSD_KEYWORD_ID_LABEL, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 17}, -#line 78 "gperf-config.txt" +#line 84 "gperf-config.txt" + {"FUNCTION_PROGRESS", PLUGINSD_KEYWORD_ID_FUNCTION_PROGRESS, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16}, +#line 80 "gperf-config.txt" {"DIMENSION", PLUGINSD_KEYWORD_ID_DIMENSION, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 12}, -#line 91 "gperf-config.txt" - {"CLAIMED_ID", PLUGINSD_KEYWORD_ID_CLAIMED_ID, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 22}, +#line 88 "gperf-config.txt" + {"VARIABLE", PLUGINSD_KEYWORD_ID_VARIABLE, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 20}, +#line 109 "gperf-config.txt" + {"JSON", PLUGINSD_KEYWORD_ID_JSON, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 32}, +#line 83 "gperf-config.txt" + {"FUNCTION_RESULT_BEGIN", PLUGINSD_KEYWORD_ID_FUNCTION_RESULT_BEGIN, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}, {(char*)0,0,PARSER_INIT_PLUGINSD,0}, {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 77 "gperf-config.txt" - {"CLABEL_COMMIT", PLUGINSD_KEYWORD_ID_CLABEL_COMMIT, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 11}, {(char*)0,0,PARSER_INIT_PLUGINSD,0}, -#line 79 "gperf-config.txt" - {"END", PLUGINSD_KEYWORD_ID_END, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, -#line 94 "gperf-config.txt" - {"END2", PLUGINSD_KEYWORD_ID_END2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25} + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, + {(char*)0,0,PARSER_INIT_PLUGINSD,0}, +#line 85 "gperf-config.txt" + {"LABEL", PLUGINSD_KEYWORD_ID_LABEL, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 17} }; const PARSER_KEYWORD * diff --git a/src/collectors/plugins.d/pluginsd_parser.c b/src/collectors/plugins.d/pluginsd_parser.c index a142c6ccccd0a7..6625fb96da0b61 100644 --- a/src/collectors/plugins.d/pluginsd_parser.c +++ b/src/collectors/plugins.d/pluginsd_parser.c @@ -1081,6 +1081,35 @@ static inline PARSER_RC pluginsd_exit(char **words __maybe_unused, size_t num_wo return PARSER_RC_STOP; } +static void pluginsd_json_stream_paths(PARSER *parser, void *action_data __maybe_unused) { + stream_path_set_from_json(parser->user.host, buffer_tostring(parser->defer.response), false); + buffer_free(parser->defer.response); +} + +static void pluginsd_json_dev_null(PARSER *parser, void *action_data __maybe_unused) { + buffer_free(parser->defer.response); +} + +static PARSER_RC pluginsd_json(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { + RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_JSON); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); + + char *keyword = get_word(words, num_words, 1); + + parser->defer.response = buffer_create(0, NULL); + parser->defer.end_keyword = PLUGINSD_KEYWORD_JSON_END; + parser->defer.action = pluginsd_json_dev_null; + parser->defer.action_data = NULL; + parser->flags |= PARSER_DEFER_UNTIL_KEYWORD; + + if(strcmp(keyword, PLUGINSD_KEYWORD_STREAM_PATH) == 0) + parser->defer.action = pluginsd_json_stream_paths; + else + netdata_log_error("PLUGINSD: invalid JSON payload keyword '%s'", keyword); + + return PARSER_RC_OK; +} + PARSER_RC rrdpush_receiver_pluginsd_claimed_id(char **words, size_t num_words, PARSER *parser); // ---------------------------------------------------------------------------- @@ -1215,6 +1244,8 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, int fd_input, #include "gperf-hashtable.h" PARSER_RC parser_execute(PARSER *parser, const PARSER_KEYWORD *keyword, char **words, size_t num_words) { + // put all the keywords ordered by the frequency they are used + switch(keyword->id) { case PLUGINSD_KEYWORD_ID_SET2: return pluginsd_set_v2(words, num_words, parser); @@ -1254,6 +1285,8 @@ PARSER_RC parser_execute(PARSER *parser, const PARSER_KEYWORD *keyword, char **w return pluginsd_function_result_begin(words, num_words, parser); case PLUGINSD_KEYWORD_ID_FUNCTION_PROGRESS: return pluginsd_function_progress(words, num_words, parser); + case PLUGINSD_KEYWORD_ID_JSON: + return pluginsd_json(words, num_words, parser); case PLUGINSD_KEYWORD_ID_LABEL: return pluginsd_label(words, num_words, parser); case PLUGINSD_KEYWORD_ID_OVERWRITE: diff --git a/src/collectors/plugins.d/pluginsd_parser.h b/src/collectors/plugins.d/pluginsd_parser.h index baf66df291c4ba..983da7d13e8dc4 100644 --- a/src/collectors/plugins.d/pluginsd_parser.h +++ b/src/collectors/plugins.d/pluginsd_parser.h @@ -93,6 +93,8 @@ typedef struct parser_user_object { } v2; } PARSER_USER_OBJECT; +typedef void (*parser_deferred_action_t)(struct parser *parser, void *action_data); + struct parser { uint8_t version; // Parser version PARSER_REPERTOIRE repertoire; @@ -115,7 +117,7 @@ struct parser { struct { const char *end_keyword; BUFFER *response; - void (*action)(struct parser *parser, void *action_data); + parser_deferred_action_t action; void *action_data; } defer; diff --git a/src/collectors/proc.plugin/README.md b/src/collectors/proc.plugin/README.md index 79bfd8645e6d25..bb09226107ff7f 100644 --- a/src/collectors/proc.plugin/README.md +++ b/src/collectors/proc.plugin/README.md @@ -610,7 +610,7 @@ The `drm` path can be configured if it differs from the default: ``` > [!NOTE] -> Temperature, fan speed, voltage and power metrics for AMD GPUs can be monitored using the [Sensors](/src/collectors/charts.d.plugin/sensors/README.md) plugin. +> Temperature, fan speed, voltage and power metrics for AMD GPUs can be monitored using the [Sensors](/src/go/plugin/go.d/modules/sensors/README.md) plugin. ## IPC diff --git a/src/collectors/slabinfo.plugin/slabinfo.c b/src/collectors/slabinfo.plugin/slabinfo.c index 216f31ac6645ff..8446c214182591 100644 --- a/src/collectors/slabinfo.plugin/slabinfo.c +++ b/src/collectors/slabinfo.plugin/slabinfo.c @@ -318,6 +318,12 @@ unsigned int do_slab_stats(int update_every) { } printf("END\n"); + fprintf(stdout, "\n"); + fflush(stdout); + if (ferror(stdout) && errno == EPIPE) { + netdata_log_error("error writing to stdout: EPIPE. Exiting..."); + return loops; + } loops++; diff --git a/src/daemon/commands.c b/src/daemon/commands.c index 62d42858095b08..df083191f5891a 100644 --- a/src/daemon/commands.c +++ b/src/daemon/commands.c @@ -372,10 +372,10 @@ static int remove_ephemeral_host(BUFFER *wb, RRDHOST *host, bool report_error) if (!rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST)) { rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST); - sql_set_host_label(&host->host_uuid, "_is_ephemeral", "true"); + sql_set_host_label(&host->host_id.uuid, "_is_ephemeral", "true"); aclk_host_state_update(host, 0, 0); unregister_node(host->machine_guid); - uuid_clear(host->node_id); + host->node_id = UUID_ZERO; buffer_sprintf(wb, "Unregistering node with machine guid %s, hostname = %s", host->machine_guid, rrdhost_hostname(host)); rrd_wrlock(); rrdhost_free___while_having_rrd_wrlock(host, true); @@ -517,7 +517,7 @@ static void pipe_write_cb(uv_write_t* req, int status) static inline void add_char_to_command_reply(BUFFER *reply_string, unsigned *reply_string_size, char character) { - buffer_fast_charcat(reply_string, character); + buffer_putc(reply_string, character); *reply_string_size +=1; } diff --git a/src/daemon/config/dyncfg-tree.c b/src/daemon/config/dyncfg-tree.c index 77d031fa0ae461..b0ca32491ef2dd 100644 --- a/src/daemon/config/dyncfg-tree.c +++ b/src/daemon/config/dyncfg-tree.c @@ -71,12 +71,10 @@ static void dyncfg_tree_for_host(RRDHOST *host, BUFFER *wb, const char *path, co if(id && *id) template = string_strdupz(id); - ND_UUID host_uuid = uuid2UUID(host->host_uuid); - size_t path_len = strlen(path); DYNCFG *df; dfe_start_read(dyncfg_globals.nodes, df) { - if(!UUIDeq(df->host_uuid, host_uuid)) + if(!UUIDeq(df->host_uuid, host->host_id)) continue; if(strncmp(string2str(df->path), path, path_len) != 0) diff --git a/src/daemon/config/dyncfg.c b/src/daemon/config/dyncfg.c index 2f484d1ed6d5c7..68447866d3ab3b 100644 --- a/src/daemon/config/dyncfg.c +++ b/src/daemon/config/dyncfg.c @@ -192,7 +192,7 @@ const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const rrd_function_execute_cb_t execute_cb, void *execute_cb_data, bool overwrite_cb) { DYNCFG tmp = { - .host_uuid = uuid2UUID(host->host_uuid), + .host_uuid = host->host_id, .path = string_strdupz(path), .cmds = cmds, .type = type, diff --git a/src/daemon/main.c b/src/daemon/main.c index 7fda312619c7b7..7af3533afa8fdf 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -2006,9 +2006,7 @@ int netdata_main(int argc, char **argv) { nd_log_initialize(); netdata_log_info("Netdata agent version '%s' is starting", NETDATA_VERSION); - ieee754_doubles = is_system_ieee754_double(); - if(!ieee754_doubles) - globally_disabled_capabilities |= STREAM_CAP_IEEE754; + check_local_streaming_capabilities(); aral_judy_init(); diff --git a/src/database/contexts/api_v1_contexts.c b/src/database/contexts/api_v1_contexts.c index d9bb21db3dc4cd..1a1c83a00b8fc4 100644 --- a/src/database/contexts/api_v1_contexts.c +++ b/src/database/contexts/api_v1_contexts.c @@ -399,8 +399,8 @@ int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, char node_uuid[UUID_STR_LEN] = ""; - if(!uuid_is_null(host->node_id)) - uuid_unparse_lower(host->node_id, node_uuid); + if(!UUIDiszero(host->node_id)) + uuid_unparse_lower(host->node_id.uuid, node_uuid); if(after != 0 && before != 0) rrdr_relative_window_to_absolute_query(&after, &before, NULL, false); diff --git a/src/database/contexts/api_v2_contexts.c b/src/database/contexts/api_v2_contexts.c index 000f6b78496ae4..765d1c83dfdeb3 100644 --- a/src/database/contexts/api_v2_contexts.c +++ b/src/database/contexts/api_v2_contexts.c @@ -201,8 +201,8 @@ void buffer_json_agent_status_id(BUFFER *wb, size_t ai, usec_t duration_ut) { void buffer_json_node_add_v2(BUFFER *wb, RRDHOST *host, size_t ni, usec_t duration_ut, bool status) { buffer_json_member_add_string(wb, "mg", host->machine_guid); - if(!uuid_is_null(host->node_id)) - buffer_json_member_add_uuid(wb, "nd", host->node_id); + if(!UUIDiszero(host->node_id)) + buffer_json_member_add_uuid(wb, "nd", host->node_id.uuid); buffer_json_member_add_string(wb, "nm", rrdhost_hostname(host)); buffer_json_member_add_uint64(wb, "ni", ni); @@ -425,6 +425,7 @@ static void rrdcontext_to_json_v2_rrdhost(BUFFER *wb, RRDHOST *host, struct rrdc rrdhost_health_to_json_v2(wb, "health", &s); agent_capabilities_to_json(wb, host, "capabilities"); + rrdhost_stream_path_to_json(wb, host, STREAM_PATH_JSON_MEMBER, false); } if (ctl->mode & (CONTEXTS_V2_NODE_INSTANCES)) { diff --git a/src/database/contexts/api_v2_contexts_agents.c b/src/database/contexts/api_v2_contexts_agents.c index ba9cc450591b21..a8d4fbfda75843 100644 --- a/src/database/contexts/api_v2_contexts_agents.c +++ b/src/database/contexts/api_v2_contexts_agents.c @@ -41,7 +41,7 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now buffer_json_member_add_object(wb, "agent"); buffer_json_member_add_string(wb, "mg", localhost->machine_guid); - buffer_json_member_add_uuid(wb, "nd", localhost->node_id); + buffer_json_member_add_uuid(wb, "nd", localhost->node_id.uuid); buffer_json_member_add_string(wb, "nm", rrdhost_hostname(localhost)); buffer_json_member_add_time_t(wb, "now", now_s); @@ -62,16 +62,15 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now dfe_start_read(rrdhost_root_index, host) { total++; - if(host == localhost) - continue; - - if(rrdhost_state_cloud_emulation(host)) - receiving++; - else - archived++; - if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED)) sending++; + + if(host != localhost) { + if (rrdhost_state_cloud_emulation(host)) + receiving++; + else + archived++; + } } dfe_done(host); diff --git a/src/database/contexts/api_v2_contexts_alert_transitions.c b/src/database/contexts/api_v2_contexts_alert_transitions.c index 60ae81035f5d56..13061f60f07a99 100644 --- a/src/database/contexts/api_v2_contexts_alert_transitions.c +++ b/src/database/contexts/api_v2_contexts_alert_transitions.c @@ -373,8 +373,8 @@ void contexts_v2_alert_transitions_to_json(BUFFER *wb, struct rrdcontext_to_json if(host) { buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(host)); - if(!uuid_is_null(host->node_id)) - buffer_json_member_add_uuid(wb, "node_id", host->node_id); + if(!UUIDiszero(host->node_id)) + buffer_json_member_add_uuid(wb, "node_id", host->node_id.uuid); } buffer_json_member_add_string(wb, "alert", *t->alert_name ? t->alert_name : NULL); diff --git a/src/database/contexts/query_scope.c b/src/database/contexts/query_scope.c index f243c4a9f4e6ef..7485ef3e6a1ae2 100644 --- a/src/database/contexts/query_scope.c +++ b/src/database/contexts/query_scope.c @@ -18,8 +18,8 @@ ssize_t query_scope_foreach_host(SIMPLE_PATTERN *scope_hosts_sp, SIMPLE_PATTERN uint64_t t_hash = 0; dfe_start_read(rrdhost_root_index, host) { - if(!uuid_is_null(host->node_id)) - uuid_unparse_lower(host->node_id, host_node_id_str); + if(!UUIDiszero(host->node_id)) + uuid_unparse_lower(host->node_id.uuid, host_node_id_str); else host_node_id_str[0] = '\0'; diff --git a/src/database/contexts/query_target.c b/src/database/contexts/query_target.c index 5a3933d80886db..b25b8e427263ef 100644 --- a/src/database/contexts/query_target.c +++ b/src/database/contexts/query_target.c @@ -897,9 +897,9 @@ static ssize_t query_node_add(void *data, RRDHOST *host, bool queryable_host) { QUERY_TARGET *qt = qtl->qt; QUERY_NODE *qn = query_node_allocate(qt, host); - if(!uuid_is_null(host->node_id)) { + if(!UUIDiszero(host->node_id)) { if(!qtl->host_node_id_str[0]) - uuid_unparse_lower(host->node_id, qn->node_id); + uuid_unparse_lower(host->node_id.uuid, qn->node_id); else memcpy(qn->node_id, qtl->host_node_id_str, sizeof(qn->node_id)); } @@ -1120,8 +1120,8 @@ QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) { } if(host) { - if(!uuid_is_null(host->node_id)) - uuid_unparse_lower(host->node_id, qtl.host_node_id_str); + if(!UUIDiszero(host->node_id)) + uuid_unparse_lower(host->node_id.uuid, qtl.host_node_id_str); else qtl.host_node_id_str[0] = '\0'; diff --git a/src/database/contexts/rrdcontext.c b/src/database/contexts/rrdcontext.c index 7e299a16e65dcf..a98bc98efe3c18 100644 --- a/src/database/contexts/rrdcontext.c +++ b/src/database/contexts/rrdcontext.c @@ -241,9 +241,9 @@ void rrdcontext_hub_checkpoint_command(void *ptr) { cmd->version_hash, rrdhost_hostname(host), our_version_hash); // prepare the snapshot - char uuid[UUID_STR_LEN]; - uuid_unparse_lower(host->node_id, uuid); - contexts_snapshot_t bundle = contexts_snapshot_new(cmd->claim_id, uuid, our_version_hash); + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(host->node_id.uuid, uuid_str); + contexts_snapshot_t bundle = contexts_snapshot_new(cmd->claim_id, uuid_str, our_version_hash); // do a deep scan on every metric of the host to make sure all our data are updated rrdcontext_recalculate_host_retention(host, RRD_FLAG_NONE, false); @@ -264,7 +264,7 @@ void rrdcontext_hub_checkpoint_command(void *ptr) { rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); char node_str[UUID_STR_LEN]; - uuid_unparse_lower(host->node_id, node_str); + uuid_unparse_lower(host->node_id.uuid, node_str); nd_log(NDLS_ACCESS, NDLP_DEBUG, "ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED", node_str, rrdhost_hostname(host)); diff --git a/src/database/contexts/worker.c b/src/database/contexts/worker.c index 953e61e6637deb..53d17492e3d49a 100644 --- a/src/database/contexts/worker.c +++ b/src/database/contexts/worker.c @@ -99,8 +99,8 @@ void rrdhost_load_rrdcontext_data(RRDHOST *host) { if(host->rrdctx.contexts) return; rrdhost_create_rrdcontexts(host); - ctx_get_context_list(&host->host_uuid, rrdcontext_load_context_callback, host); - ctx_get_chart_list(&host->host_uuid, rrdinstance_load_chart_callback, host); + ctx_get_context_list(&host->host_id.uuid, rrdcontext_load_context_callback, host); + ctx_get_chart_list(&host->host_id.uuid, rrdinstance_load_chart_callback, host); RRDCONTEXT *rc; dfe_start_read(host->rrdctx.contexts, rc) { @@ -173,6 +173,8 @@ static void rrdhost_update_cached_retention(RRDHOST *host, time_t first_time_s, spinlock_lock(&host->retention.spinlock); + time_t old_first_time_s = host->retention.first_time_s; + if(global) { host->retention.first_time_s = first_time_s; host->retention.last_time_s = last_time_s; @@ -185,7 +187,12 @@ static void rrdhost_update_cached_retention(RRDHOST *host, time_t first_time_s, host->retention.last_time_s = last_time_s; } + bool stream_path_update_required = old_first_time_s != host->retention.first_time_s; + spinlock_unlock(&host->retention.spinlock); + + if(stream_path_update_required) + stream_path_retention_updated(host); } void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs) { @@ -349,7 +356,7 @@ void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc) { rc->hub.family = string2str(rc->family); // delete it from SQL - if(ctx_delete_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + if(ctx_delete_context(&rc->rrdhost->host_id.uuid, &rc->hub) != 0) netdata_log_error("RRDCONTEXT: failed to delete context '%s' version %"PRIu64" from SQL.", rc->hub.id, rc->hub.version); } @@ -845,7 +852,7 @@ void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused if(rrd_flag_is_deleted(rc)) rrdcontext_delete_from_sql_unsafe(rc); - else if (ctx_store_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + else if (ctx_store_context(&rc->rrdhost->host_id.uuid, &rc->hub) != 0) netdata_log_error("RRDCONTEXT: failed to save context '%s' version %"PRIu64" to SQL.", rc->hub.id, rc->hub.version); } @@ -954,7 +961,7 @@ static void rrdcontext_dequeue_from_hub_queue(RRDCONTEXT *rc) { static void rrdcontext_dispatch_queued_contexts_to_hub(RRDHOST *host, usec_t now_ut) { // check if we have received a streaming command for this host - if(uuid_is_null(host->node_id) || !rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS) || !aclk_online_for_contexts() || !host->rrdctx.hub_queue) + if(UUIDiszero(host->node_id) || !rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS) || !aclk_online_for_contexts() || !host->rrdctx.hub_queue) return; // check if there are queued items to send @@ -985,10 +992,10 @@ static void rrdcontext_dispatch_queued_contexts_to_hub(RRDHOST *host, usec_t now if(!bundle) { // prepare the bundle to send the messages - char uuid[UUID_STR_LEN]; - uuid_unparse_lower(host->node_id, uuid); + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(host->node_id.uuid, uuid_str); - bundle = contexts_updated_new(claim_id.str, uuid, 0, now_ut); + bundle = contexts_updated_new(claim_id.str, uuid_str, 0, now_ut); } // update the hub data of the context, give a new version, pack the message // and save an update to SQL diff --git a/src/database/engine/datafile.c b/src/database/engine/datafile.c index 35c047722c4ed1..7bf9487f2d9cb3 100644 --- a/src/database/engine/datafile.c +++ b/src/database/engine/datafile.c @@ -66,7 +66,8 @@ void datafile_release(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS re spinlock_unlock(&df->users.spinlock); } -bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) { +bool datafile_acquire_for_deletion(struct rrdengine_datafile *df, bool is_shutdown) +{ bool can_be_deleted = false; spinlock_lock(&df->users.spinlock); @@ -107,7 +108,7 @@ bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) { if(!df->users.time_to_evict) { // first time we did the above - df->users.time_to_evict = now_s + 120; + df->users.time_to_evict = now_s + (is_shutdown ? DATAFILE_DELETE_TIMEOUT_SHORT : DATAFILE_DELETE_TIMEOUT_LONG); internal_error(true, "DBENGINE: datafile %u of tier %d is not used by any open cache pages, " "but it has %u lockers (oc:%u, pd:%u), " "%zu clean and %zu hot open cache pages " @@ -572,8 +573,8 @@ void finalize_data_files(struct rrdengine_instance *ctx) struct rrdengine_journalfile *journalfile = datafile->journalfile; logged = false; - size_t iterations = 100; - while(!datafile_acquire_for_deletion(datafile) && datafile != ctx->datafiles.first->prev && --iterations > 0) { + size_t iterations = 10; + while(!datafile_acquire_for_deletion(datafile, true) && datafile != ctx->datafiles.first->prev && --iterations > 0) { if(!logged) { netdata_log_info("Waiting to acquire data file %u of tier %d to close it...", datafile->fileno, ctx->config.tier); logged = true; diff --git a/src/database/engine/datafile.h b/src/database/engine/datafile.h index 569f1b0a28f4c2..843cb8c1e65abe 100644 --- a/src/database/engine/datafile.h +++ b/src/database/engine/datafile.h @@ -24,6 +24,11 @@ struct rrdengine_instance; #define MAX_DATAFILES (65536 * 4) /* Supports up to 64TiB for now */ #define TARGET_DATAFILES (50) +// When trying to acquire a datafile for deletion and an attempt to evict pages is completed +// the acquire for deletion will return true after this timeout +#define DATAFILE_DELETE_TIMEOUT_SHORT (1) +#define DATAFILE_DELETE_TIMEOUT_LONG (120) + typedef enum __attribute__ ((__packed__)) { DATAFILE_ACQUIRE_OPEN_CACHE = 0, DATAFILE_ACQUIRE_PAGE_DETAILS, @@ -72,7 +77,7 @@ struct rrdengine_datafile { bool datafile_acquire(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS reason); void datafile_release(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS reason); -bool datafile_acquire_for_deletion(struct rrdengine_datafile *df); +bool datafile_acquire_for_deletion(struct rrdengine_datafile *df, bool is_shutdown); void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile, bool having_lock); void datafile_list_delete_unsafe(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); diff --git a/src/database/engine/rrdengine.c b/src/database/engine/rrdengine.c index a989877fcf42c7..25ebbcc2ac67ac 100644 --- a/src/database/engine/rrdengine.c +++ b/src/database/engine/rrdengine.c @@ -1218,7 +1218,7 @@ void datafile_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile * if(worker) worker_is_busy(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT); - bool datafile_got_for_deletion = datafile_acquire_for_deletion(datafile); + bool datafile_got_for_deletion = datafile_acquire_for_deletion(datafile, false); if (update_retention) update_metrics_first_time_s(ctx, datafile, datafile->next, worker); @@ -1227,7 +1227,7 @@ void datafile_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile * if(worker) worker_is_busy(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT); - datafile_got_for_deletion = datafile_acquire_for_deletion(datafile); + datafile_got_for_deletion = datafile_acquire_for_deletion(datafile, false); if (!datafile_got_for_deletion) { netdata_log_info("DBENGINE: waiting for data file '%s/" diff --git a/src/database/rrd.h b/src/database/rrd.h index 93298ec2fbaeb9..608a88ef6ee557 100644 --- a/src/database/rrd.h +++ b/src/database/rrd.h @@ -104,6 +104,8 @@ struct ml_metrics_statistics { #include "health/rrdvar.h" #include "health/rrdcalc.h" #include "rrdlabels.h" +#include "streaming/stream_capabilities.h" +#include "streaming/stream_path.h" #include "streaming/rrdpush.h" #include "aclk/aclk_rrdhost_state.h" #include "sqlite/sqlite_health.h" @@ -1217,6 +1219,8 @@ struct rrdhost { RRDSET **array; } pluginsd_chart_slots; } receive; + + RRDHOST_STREAM_PATH path; } rrdpush; struct rrdpush_destinations *destinations; // a linked list of possible destinations @@ -1307,8 +1311,8 @@ struct rrdhost { time_t last_time_s; } retention; - nd_uuid_t host_uuid; // Global GUID for this host - nd_uuid_t node_id; // Cloud node_id + ND_UUID host_id; // Global GUID for this host + ND_UUID node_id; // Cloud node_id struct { ND_UUID claim_id_of_origin; diff --git a/src/database/rrdhost.c b/src/database/rrdhost.c index ec9bc86e0f45ef..376db65bfef286 100644 --- a/src/database/rrdhost.c +++ b/src/database/rrdhost.c @@ -36,13 +36,13 @@ time_t rrdhost_free_ephemeral_time_s = 86400; RRDHOST *find_host_by_node_id(char *node_id) { - nd_uuid_t node_uuid; - if (unlikely(!node_id || uuid_parse(node_id, node_uuid))) + ND_UUID node_uuid; + if (unlikely(!node_id || uuid_parse(node_id, node_uuid.uuid))) return NULL; RRDHOST *host, *ret = NULL; dfe_start_read(rrdhost_root_index, host) { - if (uuid_eq(host->node_id, node_uuid)) { + if (UUIDeq(host->node_id, node_uuid)) { ret = host; break; } @@ -423,7 +423,7 @@ static RRDHOST *rrdhost_create( if(!host->rrdvars) host->rrdvars = rrdvariables_create(); - if (likely(!uuid_parse(host->machine_guid, host->host_uuid))) + if (likely(!uuid_parse(host->machine_guid, host->host_id.uuid))) sql_load_node_id(host); else error_report("Host machine GUID %s is not valid", host->machine_guid); @@ -1117,7 +1117,7 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt global_functions_add(); if (likely(system_info)) { - detect_machine_guid_change(&localhost->host_uuid); + detect_machine_guid_change(&localhost->host_id.uuid); sql_aclk_sync_init(); api_v1_management_init(); } @@ -1229,6 +1229,10 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) { DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(localhost, host, prev, next); } + // ------------------------------------------------------------------------ + + rrdhost_stream_path_clear(host, true); + // ------------------------------------------------------------------------ // clean up streaming chart slots diff --git a/src/database/sqlite/sqlite_aclk.c b/src/database/sqlite/sqlite_aclk.c index 1a6ceb99201946..f520339d066c46 100644 --- a/src/database/sqlite/sqlite_aclk.c +++ b/src/database/sqlite/sqlite_aclk.c @@ -167,8 +167,8 @@ static int create_host_callback(void *data, int argc, char **argv, char **column #ifdef NETDATA_INTERNAL_CHECKS char node_str[UUID_STR_LEN] = ""; - if (likely(!uuid_is_null(host->node_id))) - uuid_unparse_lower(host->node_id, node_str); + if (likely(!UUIDiszero(host->node_id))) + uuid_unparse_lower(host->node_id.uuid, node_str); internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\" ephemeral=%d", rrdhost_hostname(host), host->machine_guid, node_str, is_ephemeral); #endif @@ -201,6 +201,34 @@ static void sql_delete_aclk_table_list(void) buffer_free(sql); } +#define SQL_INVALIDATE_HOST_LAST_CONNECTED "UPDATE host SET last_connected = 1 WHERE host_id = @host_id" + +static void invalidate_host_last_connected(nd_uuid_t *host_uuid) +{ + sqlite3_stmt *res = NULL; + if (!host_uuid) + return; + + if (!PREPARE_STATEMENT(db_meta, SQL_INVALIDATE_HOST_LAST_CONNECTED, &res)) + return; + + int param = 0; + SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, host_uuid, sizeof(*host_uuid), SQLITE_STATIC)); + + param = 0; + int rc = sqlite3_step_monitored(res); + if (unlikely(rc != SQLITE_DONE)) { + char wstr[UUID_STR_LEN]; + uuid_unparse_lower(*host_uuid, wstr); + error_report("Failed invalidate last_connected time for host with GUID %s, rc = %d", wstr, rc); + } + +bind_fail: + REPORT_BIND_FAIL(res, param); + SQLITE_FINALIZE(res); +} + + // OPCODE: ACLK_DATABASE_NODE_UNREGISTER static void sql_unregister_node(char *machine_guid) { @@ -228,6 +256,7 @@ static void sql_unregister_node(char *machine_guid) error_report("Failed to execute command to remove host node id"); } else { // node: machine guid will be freed after processing + invalidate_host_last_connected(&host_uuid); metadata_delete_host_chart_labels(machine_guid); machine_guid = NULL; } @@ -388,12 +417,11 @@ static void aclk_synchronization(void *arg) int live = (host == localhost || host->receiver || !(rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN))) ? 1 : 0; struct aclk_sync_cfg_t *ahc = host->aclk_config; if (unlikely(!ahc)) - create_aclk_config(host, &host->host_uuid, &host->node_id); + create_aclk_config(host, &host->host_id.uuid, &host->node_id.uuid); aclk_host_state_update(host, live, 1); break; case ACLK_DATABASE_NODE_UNREGISTER: sql_unregister_node(cmd.param[0]); - break; // ALERTS case ACLK_DATABASE_PUSH_ALERT_CONFIG: @@ -465,8 +493,8 @@ void create_aclk_config(RRDHOST *host __maybe_unused, nd_uuid_t *host_uuid __may uuid_unparse_lower(*node_id, wc->node_id); host->aclk_config = wc; - if (node_id && uuid_is_null(host->node_id)) { - uuid_copy(host->node_id, *node_id); + if (node_id && UUIDiszero(host->node_id)) { + uuid_copy(host->node_id.uuid, *node_id); } wc->host = host; @@ -553,7 +581,6 @@ void aclk_query_init(mqtt_wss_client client) { queue_aclk_sync_cmd(ACLK_MQTT_WSS_CLIENT, client, NULL); } - void schedule_node_info_update(RRDHOST *host __maybe_unused) { if (unlikely(!host)) diff --git a/src/database/sqlite/sqlite_aclk_alert.c b/src/database/sqlite/sqlite_aclk_alert.c index 8d6754432ea2f1..605b411937bd77 100644 --- a/src/database/sqlite/sqlite_aclk_alert.c +++ b/src/database/sqlite/sqlite_aclk_alert.c @@ -115,14 +115,14 @@ static int insert_alert_to_submit_queue(RRDHOST *host, int64_t health_log_id, ui return 1; } - if (is_event_from_alert_variable_config(unique_id, &host->host_uuid)) + if (is_event_from_alert_variable_config(unique_id, &host->host_id.uuid)) return 2; if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_QUEUE_ALERT_TO_CLOUD, &res)) return -1; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, health_log_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (int64_t) unique_id)); @@ -151,7 +151,7 @@ static int delete_alert_from_submit_queue(RRDHOST *host, int64_t first_seq_id, i return -1; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, first_seq_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, last_seq_id)); @@ -265,7 +265,7 @@ static void commit_alert_events(RRDHOST *host) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); int64_t first_sequence_id = 0; int64_t last_sequence_id = 0; @@ -425,7 +425,7 @@ static void aclk_push_alert_event(RRDHOST *host __maybe_unused) { CLAIM_ID claim_id = claim_id_get(); - if (!claim_id_is_set(claim_id) || uuid_is_null(host->node_id)) + if (!claim_id_is_set(claim_id) || UUIDiszero(host->node_id)) return; sqlite3_stmt *res = NULL; @@ -434,10 +434,10 @@ static void aclk_push_alert_event(RRDHOST *host __maybe_unused) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); char node_id_str[UUID_STR_LEN]; - uuid_unparse_lower(host->node_id, node_id_str); + uuid_unparse_lower(host->node_id.uuid, node_id_str); struct alarm_log_entry alarm_log; alarm_log.node_id = node_id_str; @@ -494,7 +494,7 @@ static void delete_alert_from_pending_queue(RRDHOST *host, int64_t row1, int64_t return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, row1)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, row2)); @@ -525,7 +525,7 @@ void rebuild_host_alert_version_table(RRDHOST *host) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; int rc = execute_insert(res); @@ -538,7 +538,7 @@ void rebuild_host_alert_version_table(RRDHOST *host) if (!PREPARE_STATEMENT(db_meta, SQL_REBUILD_HOST_ALERT_VERSION_TABLE, &res)) return; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; rc = execute_insert(res); @@ -563,7 +563,7 @@ bool process_alert_pending_queue(RRDHOST *host) int param = 0; int added =0, count = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; int64_t start_row = 0; @@ -781,7 +781,7 @@ static uint64_t calculate_node_alert_version(RRDHOST *host) uint64_t version = 0; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; while (sqlite3_step_monitored(res) == SQLITE_ROW) { @@ -908,7 +908,7 @@ void send_alert_snapshot_to_cloud(RRDHOST *host __maybe_unused) return; // Check the database for this node to see how many alerts we will need to put in the snapshot - int cnt = calculate_alert_snapshot_entries(&host->host_uuid); + int cnt = calculate_alert_snapshot_entries(&host->host_id.uuid); if (!cnt) return; @@ -917,7 +917,7 @@ void send_alert_snapshot_to_cloud(RRDHOST *host __maybe_unused) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); nd_uuid_t local_snapshot_uuid; char snapshot_uuid_str[UUID_STR_LEN]; diff --git a/src/database/sqlite/sqlite_health.c b/src/database/sqlite/sqlite_health.c index 15cc8f7e6f51b6..bde8ba1d52c356 100644 --- a/src/database/sqlite/sqlite_health.c +++ b/src/database/sqlite/sqlite_health.c @@ -161,7 +161,7 @@ static void insert_alert_queue( int submit_delay = calculate_delay(old_status, new_status); int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64)health_log_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, unique_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, alarm_id)); @@ -253,7 +253,7 @@ static void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64) ae->alarm_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &ae->config_hash_id, sizeof(ae->config_hash_id), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, SQLITE3_BIND_STRING_OR_NULL(res, ++param, ae->name)); @@ -310,7 +310,7 @@ void sql_health_alarm_log_cleanup(RRDHOST *host) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64)host->health_log.health_log_history)); param = 0; @@ -339,7 +339,7 @@ bool sql_update_transition_in_health_log(RRDHOST *host, uint32_t alarm_id, nd_uu SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, transition_id, sizeof(*transition_id), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64)alarm_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, last_transition, sizeof(*last_transition), SQLITE_STATIC)); - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; rc = execute_insert(res); @@ -454,7 +454,7 @@ uint32_t sql_get_max_unique_id (RRDHOST *host) return 0; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; while (sqlite3_step_monitored(res) == SQLITE_ROW) @@ -480,7 +480,7 @@ void sql_check_removed_alerts_state(RRDHOST *host) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; while (sqlite3_step_monitored(res) == SQLITE_ROW) { @@ -514,7 +514,7 @@ static void sql_remove_alerts_from_deleted_charts(RRDHOST *host, nd_uuid_t *host sqlite3_stmt *res = NULL; int ret; - nd_uuid_t *actual_uuid = host ? &host->host_uuid : host_uuid; + nd_uuid_t *actual_uuid = host ? &host->host_id.uuid : host_uuid; if (!actual_uuid) return; @@ -595,7 +595,7 @@ void sql_health_alarm_log_load(RRDHOST *host) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); DICTIONARY *all_rrdcalcs = dictionary_create( DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); @@ -887,7 +887,7 @@ int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_S return ret; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, (int) ae->alarm_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, (int) ae->unique_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, (uint32_t) HEALTH_ENTRY_FLAG_EXEC_RUN)); @@ -950,7 +950,7 @@ void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, time_t after, const ch stmt_query = *active_stmt; int param = 0; - rc = sqlite3_bind_blob(stmt_query, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); + rc = sqlite3_bind_blob(stmt_query, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind host_id for SQL_SELECT_HEALTH_LOG."); goto finish; @@ -1233,7 +1233,7 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t * return alarm_id; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(done, SQLITE3_BIND_STRING_OR_NULL(res, ++param, chart)); SQLITE_BIND_FAIL(done, SQLITE3_BIND_STRING_OR_NULL(res, ++param, name)); diff --git a/src/database/sqlite/sqlite_metadata.c b/src/database/sqlite/sqlite_metadata.c index 62483f60e70ce5..1e2cc341cc1e7f 100644 --- a/src/database/sqlite/sqlite_metadata.c +++ b/src/database/sqlite/sqlite_metadata.c @@ -255,20 +255,21 @@ static inline void set_host_node_id(RRDHOST *host, nd_uuid_t *node_id) return; if (unlikely(!node_id)) { - uuid_clear(host->node_id); + host->node_id = UUID_ZERO; return; } struct aclk_sync_cfg_t *wc = host->aclk_config; - uuid_copy(host->node_id, *node_id); + uuid_copy(host->node_id.uuid, *node_id); if (unlikely(!wc)) - create_aclk_config(host, &host->host_uuid, node_id); + create_aclk_config(host, &host->host_id.uuid, node_id); else uuid_unparse_lower(*node_id, wc->node_id); rrdpush_receiver_send_node_and_claim_id_to_child(host); + stream_path_node_id_updated(host); } #define SQL_SET_HOST_LABEL \ @@ -451,7 +452,7 @@ struct node_instance_list *get_node_list(void) node_list[row].live = (host == localhost || host->receiver || !(rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN))) ? 1 : 0; node_list[row].hops = host->system_info ? host->system_info->hops : - uuid_eq(*host_id, localhost->host_uuid) ? 0 : 1; + uuid_eq(*host_id, localhost->host_id.uuid) ? 0 : 1; node_list[row].hostname = sqlite3_column_bytes(res, 2) ? strdupz((char *)sqlite3_column_text(res, 2)) : NULL; } @@ -480,7 +481,7 @@ void sql_load_node_id(RRDHOST *host) return; int param = 0; - SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); param = 0; int rc = sqlite3_step_monitored(res); @@ -940,7 +941,7 @@ static int store_host_metadata(RRDHOST *host) return false; int param = 0; - SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_hostname(host), 0)); SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_registry_hostname(host), 1)); SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int(res, ++param, host->rrd_update_every)); @@ -1010,30 +1011,30 @@ static bool store_host_systeminfo(RRDHOST *host) int ret = 0; - ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_NAME", system_info->container_os_name, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_ID", system_info->container_os_id, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_ID_LIKE", system_info->container_os_id_like, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_VERSION", system_info->container_os_version, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_VERSION_ID", system_info->container_os_version_id, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_DETECTION", system_info->host_os_detection, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_NAME", system_info->host_os_name, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_ID", system_info->host_os_id, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_ID_LIKE", system_info->host_os_id_like, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_VERSION", system_info->host_os_version, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_VERSION_ID", system_info->host_os_version_id, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_DETECTION", system_info->host_os_detection, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_KERNEL_NAME", system_info->kernel_name, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT", system_info->host_cores, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CPU_FREQ", system_info->host_cpu_freq, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_TOTAL_RAM", system_info->host_ram_total, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_TOTAL_DISK_SIZE", system_info->host_disk_space, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_KERNEL_VERSION", system_info->kernel_version, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_ARCHITECTURE", system_info->architecture, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_VIRTUALIZATION", system_info->virtualization, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_VIRT_DETECTION", system_info->virt_detection, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CONTAINER", system_info->container, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CONTAINER_DETECTION", system_info->container_detection, &host->host_uuid); - ret += add_host_sysinfo_key_value("NETDATA_HOST_IS_K8S_NODE", system_info->is_k8s_node, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_NAME", system_info->container_os_name, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_ID", system_info->container_os_id, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_ID_LIKE", system_info->container_os_id_like, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_VERSION", system_info->container_os_version, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_VERSION_ID", system_info->container_os_version_id, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_DETECTION", system_info->host_os_detection, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_NAME", system_info->host_os_name, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_ID", system_info->host_os_id, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_ID_LIKE", system_info->host_os_id_like, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_VERSION", system_info->host_os_version, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_VERSION_ID", system_info->host_os_version_id, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_DETECTION", system_info->host_os_detection, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_KERNEL_NAME", system_info->kernel_name, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT", system_info->host_cores, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CPU_FREQ", system_info->host_cpu_freq, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_TOTAL_RAM", system_info->host_ram_total, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_TOTAL_DISK_SIZE", system_info->host_disk_space, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_KERNEL_VERSION", system_info->kernel_version, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_ARCHITECTURE", system_info->architecture, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_VIRTUALIZATION", system_info->virtualization, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_VIRT_DETECTION", system_info->virt_detection, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CONTAINER", system_info->container, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CONTAINER_DETECTION", system_info->container_detection, &host->host_id.uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_IS_K8S_NODE", system_info->is_k8s_node, &host->host_id.uuid); return !(24 == ret); } @@ -1052,7 +1053,7 @@ static int store_chart_metadata(RRDSET *st) int param = 0; SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, &st->chart_uuid, sizeof(st->chart_uuid), SQLITE_STATIC)); - SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, &st->rrdhost->host_uuid, sizeof(st->rrdhost->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, &st->rrdhost->host_id.uuid, sizeof(st->rrdhost->host_id.uuid), SQLITE_STATIC)); SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_text(res, ++param, string2str(st->parts.type), -1, SQLITE_STATIC)); SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_text(res, ++param, string2str(st->parts.id), -1, SQLITE_STATIC)); @@ -1915,13 +1916,13 @@ static void start_metadata_hosts(uv_work_t *req __maybe_unused) if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_LABELS))) { rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_LABELS); - int rc = exec_statement_with_uuid(SQL_DELETE_HOST_LABELS, &host->host_uuid); + int rc = exec_statement_with_uuid(SQL_DELETE_HOST_LABELS, &host->host_id.uuid); if (likely(!rc)) { query_counter++; buffer_flush(work_buffer); struct query_build tmp = {.sql = work_buffer, .count = 0}; - uuid_unparse_lower(host->host_uuid, tmp.uuid_str); + uuid_unparse_lower(host->host_id.uuid, tmp.uuid_str); rrdlabels_walkthrough_read(host->rrdlabels, host_label_store_to_sql_callback, &tmp); buffer_strcat(work_buffer, " ON CONFLICT (host_id, label_key) DO UPDATE SET source_type = excluded.source_type, label_value=excluded.label_value, date_created=UNIXEPOCH()"); rc = db_execute(db_meta, buffer_tostring(work_buffer)); @@ -1943,9 +1944,9 @@ static void start_metadata_hosts(uv_work_t *req __maybe_unused) int rc; ND_UUID uuid = claim_id_get_uuid(); if(!UUIDiszero(uuid)) - rc = store_claim_id(&host->host_uuid, &uuid.uuid); + rc = store_claim_id(&host->host_id.uuid, &uuid.uuid); else - rc = store_claim_id(&host->host_uuid, NULL); + rc = store_claim_id(&host->host_id.uuid, NULL); if (unlikely(rc)) rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_CLAIMID | RRDHOST_FLAG_METADATA_UPDATE); diff --git a/src/go/plugin/go.d/agent/agent.go b/src/go/plugin/go.d/agent/agent.go index 2423e84e05e26a..b44021bb693f5d 100644 --- a/src/go/plugin/go.d/agent/agent.go +++ b/src/go/plugin/go.d/agent/agent.go @@ -22,7 +22,6 @@ import ( "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/netdataapi" "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/safewriter" - "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/vnodes" "github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/multipath" "github.com/mattn/go-isatty" @@ -187,9 +186,7 @@ func (a *Agent) run(ctx context.Context) { jobMgr.ConfigDefaults = discCfg.Registry jobMgr.FnReg = fnMgr - if reg := a.setupVnodeRegistry(); reg == nil || reg.Len() == 0 { - vnodes.Disabled = true - } else { + if reg := a.setupVnodeRegistry(); reg != nil && reg.Len() > 0 { jobMgr.Vnodes = reg } diff --git a/src/go/plugin/go.d/agent/module/job.go b/src/go/plugin/go.d/agent/module/job.go index 67fae8aa205007..2121dec0d6747e 100644 --- a/src/go/plugin/go.d/agent/module/job.go +++ b/src/go/plugin/go.d/agent/module/job.go @@ -17,7 +17,6 @@ import ( "github.com/netdata/netdata/go/plugins/logger" "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/netdataapi" - "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/vnodes" ) var obsoleteLock = &sync.Mutex{} @@ -299,13 +298,11 @@ func (j *Job) Cleanup() { return } - if !vnodes.Disabled { - if !j.vnodeCreated && j.vnodeGUID != "" { - _ = j.api.HOSTINFO(j.vnodeGUID, j.vnodeHostname, j.vnodeLabels) - j.vnodeCreated = true - } - _ = j.api.HOST(j.vnodeGUID) + if !j.vnodeCreated && j.vnodeGUID != "" { + _ = j.api.HOSTINFO(j.vnodeGUID, j.vnodeHostname, j.vnodeLabels) + j.vnodeCreated = true } + _ = j.api.HOST(j.vnodeGUID) if j.runChart.created { j.runChart.MarkRemove() @@ -397,15 +394,13 @@ func (j *Job) collect() (result map[string]int64) { } func (j *Job) processMetrics(metrics map[string]int64, startTime time.Time, sinceLastRun int) bool { - if !vnodes.Disabled { - if !j.vnodeCreated && j.vnodeGUID != "" { - _ = j.api.HOSTINFO(j.vnodeGUID, j.vnodeHostname, j.vnodeLabels) - j.vnodeCreated = true - } - - _ = j.api.HOST(j.vnodeGUID) + if !j.vnodeCreated && j.vnodeGUID != "" { + _ = j.api.HOSTINFO(j.vnodeGUID, j.vnodeHostname, j.vnodeLabels) + j.vnodeCreated = true } + _ = j.api.HOST(j.vnodeGUID) + if !ndInternalMonitoringDisabled && !j.runChart.created { j.runChart.ID = fmt.Sprintf("execution_time_of_%s", j.FullName()) j.createChart(j.runChart) diff --git a/src/go/plugin/go.d/agent/vnodes/vnodes.go b/src/go/plugin/go.d/agent/vnodes/vnodes.go index 9272f1514ec6c2..de16f79062be29 100644 --- a/src/go/plugin/go.d/agent/vnodes/vnodes.go +++ b/src/go/plugin/go.d/agent/vnodes/vnodes.go @@ -14,8 +14,6 @@ import ( "gopkg.in/yaml.v2" ) -var Disabled = false // TODO: remove after Netdata v1.39.0. Fix for "from source" stable-channel installations. - func New(confDir string) *Vnodes { vn := &Vnodes{ Logger: logger.New().With( @@ -61,11 +59,39 @@ func (vn *Vnodes) readConfDir() { return nil } - if !d.Type().IsRegular() || !isConfigFile(path) { + if d.Type()&os.ModeSymlink != 0 { + dst, err := os.Readlink(path) + if err != nil { + vn.Warningf("failed to resolve symlink '%s': %v", path, err) + return nil + } + + if !filepath.IsAbs(dst) { + dst = filepath.Join(filepath.Dir(path), filepath.Clean(dst)) + } + + fi, err := os.Stat(dst) + if err != nil { + vn.Warningf("failed to stat resolved path '%s': %v", dst, err) + return nil + } + if !fi.Mode().IsRegular() { + vn.Debugf("'%s' is not a regular file, skipping it", dst) + return nil + } + path = dst + } else if !d.Type().IsRegular() { + vn.Debugf("'%s' is not a regular file, skipping it", path) + return nil + } + + if !isConfigFile(path) { + vn.Debugf("'%s' is not a config file (wrong extension), skipping it", path) return nil } var cfg []VirtualNode + if err := loadConfigFile(&cfg, path); err != nil { vn.Warning(err) return nil diff --git a/src/go/plugin/go.d/modules/ap/ap_test.go b/src/go/plugin/go.d/modules/ap/ap_test.go index 237e00e9e1c765..2fe9c043a2c611 100644 --- a/src/go/plugin/go.d/modules/ap/ap_test.go +++ b/src/go/plugin/go.d/modules/ap/ap_test.go @@ -211,25 +211,11 @@ func TestAP_Collect(t *testing.T) { mx := ap.Collect() assert.Equal(t, test.wantMetrics, mx) - assert.Equal(t, test.wantCharts, len(*ap.Charts()), "Charts") - testMetricsHasAllChartsDims(t, ap, mx) - }) - } -} -func testMetricsHasAllChartsDims(t *testing.T, ap *AP, mx map[string]int64) { - for _, chart := range *ap.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } + assert.Equal(t, test.wantCharts, len(*ap.Charts()), "wantCharts") + + module.TestMetricsHasAllChartsDims(t, ap.Charts(), mx) + }) } } diff --git a/src/go/plugin/go.d/modules/clickhouse/clickhouse_test.go b/src/go/plugin/go.d/modules/clickhouse/clickhouse_test.go index c3defbda73ffce..e613c2aa3326fd 100644 --- a/src/go/plugin/go.d/modules/clickhouse/clickhouse_test.go +++ b/src/go/plugin/go.d/modules/clickhouse/clickhouse_test.go @@ -243,25 +243,14 @@ func TestClickHouse_Collect(t *testing.T) { mx := click.Collect() require.Equal(t, test.wantMetrics, mx) + if len(test.wantMetrics) > 0 { - testMetricsHasAllChartsDims(t, click, mx) + module.TestMetricsHasAllChartsDims(t, click.Charts(), mx) } }) } } -func testMetricsHasAllChartsDims(t *testing.T, click *ClickHouse, mx map[string]int64) { - for _, chart := range *click.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - } -} - func prepareCaseOk(t *testing.T) (*ClickHouse, func()) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/cockroachdb/cockroachdb_test.go b/src/go/plugin/go.d/modules/cockroachdb/cockroachdb_test.go index 886b65fab6c8b1..9d7e21dc1db3ee 100644 --- a/src/go/plugin/go.d/modules/cockroachdb/cockroachdb_test.go +++ b/src/go/plugin/go.d/modules/cockroachdb/cockroachdb_test.go @@ -221,9 +221,11 @@ func TestCockroachDB_Collect(t *testing.T) { "valcount": 124081, } - collected := cdb.Collect() - assert.Equal(t, expected, collected) - testCharts(t, cdb, collected) + mx := cdb.Collect() + + assert.Equal(t, expected, mx) + + module.TestMetricsHasAllChartsDims(t, cdb.Charts(), mx) } func TestCockroachDB_Collect_ReturnsNilIfNotCockroachDBMetrics(t *testing.T) { @@ -254,23 +256,6 @@ func TestCockroachDB_Collect_ReturnsNilIfReceiveResponse404(t *testing.T) { assert.Nil(t, cdb.Collect()) } -func testCharts(t *testing.T, cdb *CockroachDB, collected map[string]int64) { - ensureCollectedHasAllChartsDimsVarsIDs(t, cdb, collected) -} - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, c *CockroachDB, collected map[string]int64) { - for _, chart := range *c.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareCockroachDB() *CockroachDB { cdb := New() cdb.URL = "http://127.0.0.1:38001/metrics" diff --git a/src/go/plugin/go.d/modules/dmcache/dmcache_test.go b/src/go/plugin/go.d/modules/dmcache/dmcache_test.go index 218ae044ceadee..d4b730081a69a8 100644 --- a/src/go/plugin/go.d/modules/dmcache/dmcache_test.go +++ b/src/go/plugin/go.d/modules/dmcache/dmcache_test.go @@ -192,21 +192,11 @@ func TestLVM_Collect(t *testing.T) { mx := dmcache.Collect() assert.Equal(t, test.wantMetrics, mx) - assert.Len(t, *dmcache.Charts(), test.wantCharts) - testMetricsHasAllChartsDims(t, dmcache, mx) - }) - } -} -func testMetricsHasAllChartsDims(t *testing.T, dmcache *DmCache, mx map[string]int64) { - for _, chart := range *dmcache.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } + assert.Len(t, *dmcache.Charts(), test.wantCharts, "wantCharts") + + module.TestMetricsHasAllChartsDims(t, dmcache.Charts(), mx) + }) } } diff --git a/src/go/plugin/go.d/modules/fail2ban/fail2ban_test.go b/src/go/plugin/go.d/modules/fail2ban/fail2ban_test.go index ae84959bdd67d7..283a73421d8e62 100644 --- a/src/go/plugin/go.d/modules/fail2ban/fail2ban_test.go +++ b/src/go/plugin/go.d/modules/fail2ban/fail2ban_test.go @@ -170,30 +170,16 @@ func TestFail2Ban_Collect(t *testing.T) { mx := f2b.Collect() assert.Equal(t, test.wantMetrics, mx) + if len(test.wantMetrics) > 0 { - assert.Len(t, *f2b.Charts(), len(jailChartsTmpl)*2) - testMetricsHasAllChartsDims(t, f2b, mx) + assert.Len(t, *f2b.Charts(), len(jailChartsTmpl)*2, "wantCharts") + + module.TestMetricsHasAllChartsDims(t, f2b.Charts(), mx) } }) } } -func testMetricsHasAllChartsDims(t *testing.T, f2b *Fail2Ban, mx map[string]int64) { - for _, chart := range *f2b.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareMockOk() *mockFail2BanClientCliExec { return &mockFail2BanClientCliExec{ statusData: dataStatus, diff --git a/src/go/plugin/go.d/modules/filecheck/filecheck_test.go b/src/go/plugin/go.d/modules/filecheck/filecheck_test.go index 43024b0bc7003d..76777c854ea395 100644 --- a/src/go/plugin/go.d/modules/filecheck/filecheck_test.go +++ b/src/go/plugin/go.d/modules/filecheck/filecheck_test.go @@ -244,21 +244,11 @@ func TestFilecheck_Collect(t *testing.T) { mx := fc.Collect() copyModTime(test.wantCollected, mx) + assert.Equal(t, test.wantCollected, mx) - testMetricsHasAllChartsDims(t, fc, mx) - }) - } -} -func testMetricsHasAllChartsDims(t *testing.T, fc *Filecheck, mx map[string]int64) { - for _, chart := range *fc.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "mx metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } + module.TestMetricsHasAllChartsDims(t, fc.Charts(), mx) + }) } } diff --git a/src/go/plugin/go.d/modules/hddtemp/hddtemp_test.go b/src/go/plugin/go.d/modules/hddtemp/hddtemp_test.go index d20d79edbf9506..97f32305773a2f 100644 --- a/src/go/plugin/go.d/modules/hddtemp/hddtemp_test.go +++ b/src/go/plugin/go.d/modules/hddtemp/hddtemp_test.go @@ -242,26 +242,13 @@ func TestHddTemp_Collect(t *testing.T) { mx := hdd.Collect() assert.Equal(t, test.wantMetrics, mx) - assert.Len(t, *hdd.Charts(), test.wantCharts) - assert.Equal(t, test.wantDisconnect, mock.disconnectCalled) - testMetricsHasAllChartsDims(t, hdd, mx) - }) - } -} -func testMetricsHasAllChartsDims(t *testing.T, hdd *HddTemp, mx map[string]int64) { - for _, chart := range *hdd.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } + assert.Len(t, *hdd.Charts(), test.wantCharts, "wantCharts") + + assert.Equal(t, test.wantDisconnect, mock.disconnectCalled, "disconnectCalled") + + module.TestMetricsHasAllChartsDims(t, hdd.Charts(), mx) + }) } } diff --git a/src/go/plugin/go.d/modules/hpssa/hpssa_test.go b/src/go/plugin/go.d/modules/hpssa/hpssa_test.go index a3e90d2a788869..001e62ca43463c 100644 --- a/src/go/plugin/go.d/modules/hpssa/hpssa_test.go +++ b/src/go/plugin/go.d/modules/hpssa/hpssa_test.go @@ -352,8 +352,10 @@ func TestHpssa_Collect(t *testing.T) { mx := hpe.Collect() assert.Equal(t, test.wantMetrics, mx) - assert.Len(t, *hpe.Charts(), test.wantCharts) - testMetricsHasAllChartsDims(t, hpe, mx) + + assert.Len(t, *hpe.Charts(), test.wantCharts, "wantCharts") + + module.TestMetricsHasAllChartsDims(t, hpe.Charts(), mx) }) } } @@ -412,19 +414,3 @@ func (m *mockSsacliExec) controllersInfo() ([]byte, error) { } return m.infoData, nil } - -func testMetricsHasAllChartsDims(t *testing.T, hpe *Hpssa, mx map[string]int64) { - for _, chart := range *hpe.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} diff --git a/src/go/plugin/go.d/modules/ipfs/ipfs_test.go b/src/go/plugin/go.d/modules/ipfs/ipfs_test.go index 5e353a1bc22f32..aebdbb5e9fd382 100644 --- a/src/go/plugin/go.d/modules/ipfs/ipfs_test.go +++ b/src/go/plugin/go.d/modules/ipfs/ipfs_test.go @@ -165,29 +165,14 @@ func TestIPFS_Collect(t *testing.T) { mx := ipfs.Collect() require.Equal(t, test.wantMetrics, mx) + if len(test.wantMetrics) > 0 { - testMetricsHasAllChartsDims(t, ipfs, mx) + module.TestMetricsHasAllChartsDims(t, ipfs.Charts(), mx) } }) } } -func testMetricsHasAllChartsDims(t *testing.T, ipfs *IPFS, mx map[string]int64) { - for _, chart := range *ipfs.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareCaseOkDefault(t *testing.T) (*IPFS, func()) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/litespeed/litespeed_test.go b/src/go/plugin/go.d/modules/litespeed/litespeed_test.go index 576609dca69d48..50c91b25d29409 100644 --- a/src/go/plugin/go.d/modules/litespeed/litespeed_test.go +++ b/src/go/plugin/go.d/modules/litespeed/litespeed_test.go @@ -128,29 +128,14 @@ func TestLitespeed_Collect(t *testing.T) { mx := lite.Collect() assert.Equal(t, test.wantMetrics, mx) + if len(test.wantMetrics) > 0 { - testMetricsHasAllChartsDims(t, lite, mx) + module.TestMetricsHasAllChartsDims(t, lite.Charts(), mx) } }) } } -func testMetricsHasAllChartsDims(t *testing.T, lite *Litespeed, mx map[string]int64) { - for _, chart := range *lite.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareLitespeedOk() *Litespeed { lite := New() lite.ReportsDir = "testdata" diff --git a/src/go/plugin/go.d/modules/puppet/puppet_test.go b/src/go/plugin/go.d/modules/puppet/puppet_test.go index 7c80a638adc7a4..2e3ae2df9e1985 100644 --- a/src/go/plugin/go.d/modules/puppet/puppet_test.go +++ b/src/go/plugin/go.d/modules/puppet/puppet_test.go @@ -151,29 +151,14 @@ func TestPuppet_Collect(t *testing.T) { mx := puppet.Collect() require.Equal(t, test.wantMetrics, mx) + if len(test.wantMetrics) > 0 { - testMetricsHasAllChartsDims(t, puppet, mx) + module.TestMetricsHasAllChartsDims(t, puppet.Charts(), mx) } }) } } -func testMetricsHasAllChartsDims(t *testing.T, puppet *Puppet, mx map[string]int64) { - for _, chart := range *puppet.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareCaseOkDefault(t *testing.T) (*Puppet, func()) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/rspamd/rspamd_test.go b/src/go/plugin/go.d/modules/rspamd/rspamd_test.go index 0c8cc8e5ba9a65..9564c574e771b5 100644 --- a/src/go/plugin/go.d/modules/rspamd/rspamd_test.go +++ b/src/go/plugin/go.d/modules/rspamd/rspamd_test.go @@ -156,29 +156,14 @@ func TestRspamd_Collect(t *testing.T) { mx := rsp.Collect() require.Equal(t, test.wantMetrics, mx) + if len(test.wantMetrics) > 0 { - testMetricsHasAllChartsDims(t, rsp, mx) + module.TestMetricsHasAllChartsDims(t, rsp.Charts(), mx) } }) } } -func testMetricsHasAllChartsDims(t *testing.T, rsp *Rspamd, mx map[string]int64) { - for _, chart := range *rsp.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareCaseOk(t *testing.T) (*Rspamd, func()) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/scaleio/scaleio_test.go b/src/go/plugin/go.d/modules/scaleio/scaleio_test.go index bb906333e5a47d..4b4a5905e99414 100644 --- a/src/go/plugin/go.d/modules/scaleio/scaleio_test.go +++ b/src/go/plugin/go.d/modules/scaleio/scaleio_test.go @@ -298,9 +298,11 @@ func TestScaleIO_Collect(t *testing.T) { "system_total_iops_write": 617200, } - collected := scaleIO.Collect() - assert.Equal(t, expected, collected) - testCharts(t, scaleIO, collected) + mx := scaleIO.Collect() + + assert.Equal(t, expected, mx) + + testCharts(t, scaleIO, mx) } func TestScaleIO_Collect_ConnectionRefused(t *testing.T) { @@ -317,7 +319,7 @@ func testCharts(t *testing.T, scaleIO *ScaleIO, collected map[string]int64) { t.Helper() ensureStoragePoolChartsAreCreated(t, scaleIO) ensureSdcChartsAreCreated(t, scaleIO) - ensureCollectedHasAllChartsDimsVarsIDs(t, scaleIO, collected) + module.TestMetricsHasAllChartsDims(t, scaleIO.Charts(), collected) } func ensureStoragePoolChartsAreCreated(t *testing.T, scaleIO *ScaleIO) { @@ -336,19 +338,6 @@ func ensureSdcChartsAreCreated(t *testing.T, scaleIO *ScaleIO) { } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, scaleIO *ScaleIO, collected map[string]int64) { - for _, chart := range *scaleIO.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareSrvMockScaleIO(t *testing.T) (*httptest.Server, *client.MockScaleIOAPIServer, *ScaleIO) { t.Helper() const ( diff --git a/src/go/plugin/go.d/modules/sensors/charts.go b/src/go/plugin/go.d/modules/sensors/charts.go index 05081e1ad47094..b298e096c05300 100644 --- a/src/go/plugin/go.d/modules/sensors/charts.go +++ b/src/go/plugin/go.d/modules/sensors/charts.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" + "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/sensors/lmsensors" ) const ( @@ -17,6 +18,7 @@ const ( prioSensorFan prioSensorEnergy prioSensorHumidity + prioSensorIntrusion ) var sensorTemperatureChartTmpl = module.Chart{ @@ -110,10 +112,24 @@ var sensorHumidityChartTmpl = module.Chart{ }, } -func (s *Sensors) addSensorChart(sn sensorStats) { +var sensorIntrusionChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_intrusion", + Title: "Sensor intrusion", + Units: "status", + Fam: "intrusion", + Ctx: "sensors.sensor_intrusion", + Type: module.Line, + Priority: prioSensorIntrusion, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s_alarm_off", Name: "alarm_off"}, + {ID: "sensor_chip_%s_feature_%s_subfeature_%s_alarm_on", Name: "alarm_on"}, + }, +} + +func (s *Sensors) addExecSensorChart(sn execSensor) { var chart *module.Chart - switch sensorType(sn) { + switch sn.sensorType() { case sensorTypeTemp: chart = sensorTemperatureChartTmpl.Copy() case sensorTypeVoltage: @@ -148,6 +164,51 @@ func (s *Sensors) addSensorChart(sn sensorStats) { } } +func (s *Sensors) addSysfsSensorChart(devName string, sn lmsensors.Sensor) { + var chart *module.Chart + var feat, subfeat string + devName = snakeCase(devName) + + switch v := sn.(type) { + case *lmsensors.TemperatureSensor: + chart = sensorTemperatureChartTmpl.Copy() + feat, subfeat = firstNotEmpty(v.Label, v.Name), v.Name+"_input" + case *lmsensors.VoltageSensor: + chart = sensorVoltageChartTmpl.Copy() + feat, subfeat = firstNotEmpty(v.Label, v.Name), v.Name+"_input" + case *lmsensors.CurrentSensor: + chart = sensorCurrentChartTmpl.Copy() + feat, subfeat = firstNotEmpty(v.Label, v.Name), v.Name+"_input" + case *lmsensors.PowerSensor: + chart = sensorPowerChartTmpl.Copy() + feat, subfeat = firstNotEmpty(v.Label, v.Name), v.Name+"_average" + case *lmsensors.FanSensor: + chart = sensorFanChartTmpl.Copy() + feat, subfeat = firstNotEmpty(v.Label, v.Name), v.Name+"_input" + case *lmsensors.IntrusionSensor: + chart = sensorIntrusionChartTmpl.Copy() + feat, subfeat = firstNotEmpty(v.Label, v.Name), v.Name+"_alarm" + default: + return + } + + origFeat := feat + feat, subfeat = snakeCase(feat), snakeCase(subfeat) + + chart.ID = fmt.Sprintf(chart.ID, devName, feat, subfeat) + chart.Labels = []module.Label{ + {Key: "chip", Value: devName}, + {Key: "feature", Value: origFeat}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, devName, feat, subfeat) + } + + if err := s.Charts().Add(chart); err != nil { + s.Warning(err) + } +} + func (s *Sensors) removeSensorChart(px string) { for _, chart := range *s.Charts() { if strings.HasPrefix(chart.ID, px) { diff --git a/src/go/plugin/go.d/modules/sensors/collect.go b/src/go/plugin/go.d/modules/sensors/collect.go index 46e900ad0a31a6..e3dfc93b3db3a8 100644 --- a/src/go/plugin/go.d/modules/sensors/collect.go +++ b/src/go/plugin/go.d/modules/sensors/collect.go @@ -2,178 +2,11 @@ package sensors -import ( - "bufio" - "bytes" - "errors" - "fmt" - "strconv" - "strings" -) - -type sensorStats struct { - chip string - feature string - subfeature string - value string -} - -func (s *sensorStats) String() string { - return fmt.Sprintf("chip:%s feat:%s subfeat:%s value:%s", s.chip, s.feature, s.subfeature, s.value) -} - -const ( - sensorTypeTemp = "temperature" - sensorTypeVoltage = "voltage" - sensorTypePower = "power" - sensorTypeHumidity = "humidity" - sensorTypeFan = "fan" - sensorTypeCurrent = "current" - sensorTypeEnergy = "energy" -) - const precision = 1000 func (s *Sensors) collect() (map[string]int64, error) { - bs, err := s.exec.sensorsInfo() - if err != nil { - return nil, err - } - - if len(bs) == 0 { - return nil, errors.New("empty response from sensors") - } - - sensors, err := parseSensors(bs) - if err != nil { - return nil, err - } - if len(sensors) == 0 { - return nil, errors.New("no sensors found") - } - - mx := make(map[string]int64) - seen := make(map[string]bool) - - for _, sn := range sensors { - // TODO: Most likely we need different values depending on the type of sensor. - if !strings.HasSuffix(sn.subfeature, "_input") { - s.Debugf("skipping non input sensor: '%s'", sn) - continue - } - - v, err := strconv.ParseFloat(sn.value, 64) - if err != nil { - s.Debugf("parsing value for sensor '%s': %v", sn, err) - continue - } - - if sensorType(sn) == "" { - s.Debugf("can not find type for sensor '%s'", sn) - continue - } - - if minVal, maxVal, ok := sensorLimits(sn); ok && (v < minVal || v > maxVal) { - s.Debugf("value outside limits [%d/%d] for sensor '%s'", int64(minVal), int64(maxVal), sn) - continue - } - - key := fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s", sn.chip, sn.feature, sn.subfeature) - key = snakeCase(key) - if !s.sensors[key] { - s.sensors[key] = true - s.addSensorChart(sn) - } - - seen[key] = true - - mx[key] = int64(v * precision) + if s.exec != nil { + return s.collectExec() } - - for k := range s.sensors { - if !seen[k] { - delete(s.sensors, k) - s.removeSensorChart(k) - } - } - - return mx, nil -} - -func snakeCase(n string) string { - return strings.ToLower(strings.ReplaceAll(n, " ", "_")) -} - -func sensorLimits(sn sensorStats) (minVal float64, maxVal float64, ok bool) { - switch sensorType(sn) { - case sensorTypeTemp: - return -127, 1000, true - case sensorTypeVoltage: - return -400, 400, true - case sensorTypeCurrent: - return -127, 127, true - case sensorTypeFan: - return 0, 65535, true - default: - return 0, 0, false - } -} - -func sensorType(sn sensorStats) string { - switch { - case strings.HasPrefix(sn.subfeature, "temp"): - return sensorTypeTemp - case strings.HasPrefix(sn.subfeature, "in"): - return sensorTypeVoltage - case strings.HasPrefix(sn.subfeature, "power"): - return sensorTypePower - case strings.HasPrefix(sn.subfeature, "humidity"): - return sensorTypeHumidity - case strings.HasPrefix(sn.subfeature, "fan"): - return sensorTypeFan - case strings.HasPrefix(sn.subfeature, "curr"): - return sensorTypeCurrent - case strings.HasPrefix(sn.subfeature, "energy"): - return sensorTypeEnergy - default: - return "" - } -} - -func parseSensors(output []byte) ([]sensorStats, error) { - var sensors []sensorStats - - sc := bufio.NewScanner(bytes.NewReader(output)) - - var chip, feat string - - for sc.Scan() { - text := sc.Text() - if text == "" { - chip, feat = "", "" - continue - } - - switch { - case strings.HasPrefix(text, " ") && chip != "" && feat != "": - parts := strings.Split(text, ":") - if len(parts) != 2 { - continue - } - subfeat, value := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) - sensors = append(sensors, sensorStats{ - chip: chip, - feature: feat, - subfeature: subfeat, - value: value, - }) - case strings.HasSuffix(text, ":") && chip != "": - feat = strings.TrimSpace(strings.TrimSuffix(text, ":")) - default: - chip = text - feat = "" - } - } - - return sensors, nil + return s.collectSysfs() } diff --git a/src/go/plugin/go.d/modules/sensors/collect_exec.go b/src/go/plugin/go.d/modules/sensors/collect_exec.go new file mode 100644 index 00000000000000..4fbbb7faf83eb5 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/collect_exec.go @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "strconv" + "strings" +) + +const ( + sensorTypeTemp = "temperature" + sensorTypeVoltage = "voltage" + sensorTypePower = "power" + sensorTypeHumidity = "humidity" + sensorTypeFan = "fan" + sensorTypeCurrent = "current" + sensorTypeEnergy = "energy" +) + +type execSensor struct { + chip string + feature string + subfeature string + value string +} + +func (s *execSensor) String() string { + return fmt.Sprintf("chip:%s feat:%s subfeat:%s value:%s", s.chip, s.feature, s.subfeature, s.value) +} + +func (s *execSensor) sensorType() string { + switch { + case strings.HasPrefix(s.subfeature, "temp"): + return sensorTypeTemp + case strings.HasPrefix(s.subfeature, "in"): + return sensorTypeVoltage + case strings.HasPrefix(s.subfeature, "power"): + return sensorTypePower + case strings.HasPrefix(s.subfeature, "humidity"): + return sensorTypeHumidity + case strings.HasPrefix(s.subfeature, "fan"): + return sensorTypeFan + case strings.HasPrefix(s.subfeature, "curr"): + return sensorTypeCurrent + case strings.HasPrefix(s.subfeature, "energy"): + return sensorTypeEnergy + default: + return "" + } +} + +func (s *execSensor) limits() (minVal float64, maxVal float64, ok bool) { + switch s.sensorType() { + case sensorTypeTemp: + return -127, 1000, true + case sensorTypeVoltage: + return -400, 400, true + case sensorTypeCurrent: + return -127, 127, true + case sensorTypeFan: + return 0, 65535, true + default: + return 0, 0, false + } +} + +func (s *Sensors) collectExec() (map[string]int64, error) { + if s.exec == nil { + return nil, errors.New("exec sensor is not initialized") + } + + s.Debugf("using sensors binary to collect metrics") + + bs, err := s.exec.sensorsInfo() + if err != nil { + return nil, err + } + + if len(bs) == 0 { + return nil, errors.New("empty response from sensors") + } + + sensors, err := parseExecSensors(bs) + if err != nil { + return nil, err + } + if len(sensors) == 0 { + return nil, errors.New("no sensors found") + } + + mx := make(map[string]int64) + seen := make(map[string]bool) + + for _, sn := range sensors { + sx := "_input" + if sn.sensorType() == sensorTypePower { + sx = "_average" + } + + if !strings.HasSuffix(sn.subfeature, sx) { + s.Debugf("skipping sensor: '%s'", sn) + continue + } + + v, err := strconv.ParseFloat(sn.value, 64) + if err != nil { + s.Debugf("parsing value for sensor '%s': %v", sn, err) + continue + } + + if sn.sensorType() == "" { + s.Debugf("can not find type for sensor '%s'", sn) + continue + } + + if minVal, maxVal, ok := sn.limits(); ok && (v < minVal || v > maxVal) { + s.Debugf("value outside limits [%d/%d] for sensor '%s'", int64(minVal), int64(maxVal), sn) + continue + } + + key := fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s", sn.chip, sn.feature, sn.subfeature) + key = snakeCase(key) + + if !s.sensors[key] { + s.sensors[key] = true + s.addExecSensorChart(sn) + } + + seen[key] = true + + mx[key] = int64(v * precision) + } + + for k := range s.sensors { + if !seen[k] { + delete(s.sensors, k) + s.removeSensorChart(k) + } + } + + return mx, nil +} + +func snakeCase(n string) string { + return strings.ToLower(strings.ReplaceAll(n, " ", "_")) +} + +func parseExecSensors(output []byte) ([]execSensor, error) { + var sensors []execSensor + + sc := bufio.NewScanner(bytes.NewReader(output)) + + var chip, feat string + + for sc.Scan() { + text := sc.Text() + if text == "" { + chip, feat = "", "" + continue + } + + switch { + case strings.HasPrefix(text, " ") && chip != "" && feat != "": + parts := strings.Split(text, ":") + if len(parts) != 2 { + continue + } + subfeat, value := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + sensors = append(sensors, execSensor{ + chip: chip, + feature: feat, + subfeature: subfeat, + value: value, + }) + case strings.HasSuffix(text, ":") && chip != "": + feat = strings.TrimSpace(strings.TrimSuffix(text, ":")) + default: + chip = text + feat = "" + } + } + + return sensors, nil +} diff --git a/src/go/plugin/go.d/modules/sensors/collect_sysfs.go b/src/go/plugin/go.d/modules/sensors/collect_sysfs.go new file mode 100644 index 00000000000000..bb2921fd3be71f --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/collect_sysfs.go @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "errors" + "fmt" + + "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/sensors/lmsensors" +) + +func (s *Sensors) collectSysfs() (map[string]int64, error) { + if s.sc == nil { + return nil, errors.New("sysfs scanner is not initialized") + } + + s.Debugf("using sysfs scan to collect metrics") + + devices, err := s.sc.Scan() + if err != nil { + return nil, err + } + + if len(devices) == 0 { + return nil, errors.New("sysfs scanner: devices found") + } + + seen := make(map[string]bool) + mx := make(map[string]int64) + + for _, dev := range devices { + for _, sn := range dev.Sensors { + var key string + + switch v := sn.(type) { + case *lmsensors.TemperatureSensor: + key = snakeCase(fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s_input", dev.Name, firstNotEmpty(v.Label, v.Name), v.Name)) + mx[key] = int64(v.Input * precision) + case *lmsensors.VoltageSensor: + key = snakeCase(fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s_input", dev.Name, firstNotEmpty(v.Label, v.Name), v.Name)) + mx[key] = int64(v.Input * precision) + case *lmsensors.CurrentSensor: + key = snakeCase(fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s_input", dev.Name, firstNotEmpty(v.Label, v.Name), v.Name)) + mx[key] = int64(v.Input * precision) + case *lmsensors.PowerSensor: + key = snakeCase(fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s_average", dev.Name, firstNotEmpty(v.Label, v.Name), v.Name)) + mx[key] = int64(v.Average * precision) + case *lmsensors.FanSensor: + key = snakeCase(fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s_input", dev.Name, firstNotEmpty(v.Label, v.Name), v.Name)) + mx[key] = int64(v.Input * precision) + case *lmsensors.IntrusionSensor: + key = snakeCase(fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s_alarm", dev.Name, firstNotEmpty(v.Label, v.Name), v.Name)) + mx[key+"_on"] = boolToInt(v.Alarm) + mx[key+"_off"] = boolToInt(!v.Alarm) + default: + s.Debugf("unexpected sensor type: %T", v) + continue + } + + seen[key] = true + + if !s.sensors[key] { + s.sensors[key] = true + s.addSysfsSensorChart(dev.Name, sn) + } + } + } + + if len(mx) == 0 { + return nil, errors.New("sysfs scanner: no metrics collected") + } + + for k := range s.sensors { + if !seen[k] { + delete(s.sensors, k) + s.removeSensorChart(k) + } + } + + return mx, nil +} + +func firstNotEmpty(s ...string) string { + for _, v := range s { + if v != "" { + return v + } + } + return "" +} + +func boolToInt(b bool) int64 { + if b { + return 1 + } + return 0 +} diff --git a/src/go/plugin/go.d/modules/sensors/config_schema.json b/src/go/plugin/go.d/modules/sensors/config_schema.json index 6c12ca9b8d6421..96f728f8fed9cc 100644 --- a/src/go/plugin/go.d/modules/sensors/config_schema.json +++ b/src/go/plugin/go.d/modules/sensors/config_schema.json @@ -13,7 +13,7 @@ }, "binary_path": { "title": "Binary path", - "description": "Path to the `sensors` binary.", + "description": "Path to the `sensors` binary. If left empty or if the binary is not found, [**sysfs**](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface) will be used to collect sensor statistics.", "type": "string", "default": "/usr/bin/sensors" }, @@ -26,7 +26,6 @@ } }, "required": [ - "binary_path" ], "additionalProperties": false, "patternProperties": { diff --git a/src/go/plugin/go.d/modules/sensors/init.go b/src/go/plugin/go.d/modules/sensors/init.go index 6753693da5ad43..f79b62d8cab1c3 100644 --- a/src/go/plugin/go.d/modules/sensors/init.go +++ b/src/go/plugin/go.d/modules/sensors/init.go @@ -3,20 +3,16 @@ package sensors import ( - "errors" "os" "os/exec" "strings" ) -func (s *Sensors) validateConfig() error { +func (s *Sensors) initSensorsBinary() (sensorsBinary, error) { if s.BinaryPath == "" { - return errors.New("no sensors binary path specified") + return nil, nil } - return nil -} -func (s *Sensors) initSensorsCliExec() (sensorsCLI, error) { binPath := s.BinaryPath if !strings.HasPrefix(binPath, "/") { diff --git a/src/go/plugin/go.d/modules/sensors/integrations/linux_sensors_lm-sensors.md b/src/go/plugin/go.d/modules/sensors/integrations/linux_sensors_lm-sensors.md index d5e948c4281bd4..a41c6bb4cf55b4 100644 --- a/src/go/plugin/go.d/modules/sensors/integrations/linux_sensors_lm-sensors.md +++ b/src/go/plugin/go.d/modules/sensors/integrations/linux_sensors_lm-sensors.md @@ -21,7 +21,7 @@ Module: sensors ## Overview -This collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary. +This collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary or [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface). @@ -97,12 +97,7 @@ There are no alerts configured by default for this integration. ### Prerequisites -#### Install lm-sensors - -- Install `lm-sensors` using your distribution's package manager. -- Run `sensors-detect` to detect hardware monitoring chips. - - +No action required. ### Configuration @@ -128,7 +123,7 @@ The following options can be defined globally: update_every. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| | update_every | Data collection frequency. | 10 | no | -| binary_path | Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. | /usr/bin/sensors | yes | +| binary_path | Path to the `sensors` binary. If left empty or if the binary is not found, [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface) will be used to collect sensor statistics. | /usr/bin/sensors | yes | | timeout | Timeout for executing the binary, specified in seconds. | 2 | no | @@ -149,6 +144,20 @@ jobs: ``` +##### Use sysfs instead of sensors + +Set `binary_path` to an empty string to use sysfs. + +
Config + +```yaml +jobs: + - name: sensors + binary_path: "" + +``` +
+ ## Troubleshooting diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/LICENSE.md b/src/go/plugin/go.d/modules/sensors/lmsensors/LICENSE.md new file mode 100644 index 00000000000000..031350dc8025b6 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/LICENSE.md @@ -0,0 +1,10 @@ +MIT License +=========== + +Copyright (C) 2016 Matt Layher + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/README.md b/src/go/plugin/go.d/modules/sensors/lmsensors/README.md new file mode 100644 index 00000000000000..e1ed52bbb868c5 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/README.md @@ -0,0 +1,4 @@ +lmsensors +========= + +Modified version of [mdlayher/lmsensors](https://github.com/mdlayher/lmsensors). \ No newline at end of file diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/currentsensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/currentsensor.go new file mode 100644 index 00000000000000..31126555823c0b --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/currentsensor.go @@ -0,0 +1,62 @@ +package lmsensors + +import ( + "strconv" +) + +var _ Sensor = &CurrentSensor{} + +// A CurrentSensor is a Sensor that detects current in Amperes. +type CurrentSensor struct { + // The name of the sensor. + Name string + + // A label that describes what the sensor is monitoring. Label may be empty. + Label string + + // Whether the sensor has an alarm triggered. + Alarm bool + + // The input current, in Amperes, indicated by the sensor. + Input float64 + + // The maximum current threshold, in Amperes, indicated by the sensor. + Maximum float64 + + // The critical current threshold, in Amperes, indicated by the sensor. + Critical float64 +} + +func (s *CurrentSensor) Type() SensorType { return SensorTypeCurrent } + +func (s *CurrentSensor) parse(raw map[string]string) error { + for k, v := range raw { + switch k { + case "crit", "input", "max": + f, err := strconv.ParseFloat(v, 64) + if err != nil { + return err + } + + // Raw current values are scaled by 1000 + f /= 1000 + + switch k { + case "crit": + s.Critical = f + case "input": + s.Input = f + case "max": + s.Maximum = f + } + case "alarm": + s.Alarm = v != "0" + case "label": + s.Label = v + } + } + + return nil +} + +func (s *CurrentSensor) name() string { return s.Name } diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/doc.go b/src/go/plugin/go.d/modules/sensors/lmsensors/doc.go new file mode 100644 index 00000000000000..fdc84328431dbd --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/doc.go @@ -0,0 +1,3 @@ +// Package lmsensors provides access to Linux monitoring sensors data, such +// as temperatures, voltage, and fan speeds. +package lmsensors diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/fansensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/fansensor.go new file mode 100644 index 00000000000000..12de36bea5b851 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/fansensor.go @@ -0,0 +1,64 @@ +package lmsensors + +import ( + "strconv" +) + +var _ Sensor = &FanSensor{} + +// A FanSensor is a Sensor that detects fan speeds in rotations per minute. +type FanSensor struct { + // The name of the sensor. + Name string + + // A label that describes what the sensor is monitoring. Label may be empty. + Label string + + // Whether the fan speed is below the minimum threshold. + Alarm bool + + // Whether the fan will sound an audible alarm when fan speed is below the minimum threshold. + Beep bool + + // The input fan speed, in rotations per minute, indicated by the sensor. + Input float64 + + // The low threshold fan speed, in rotations per minute, indicated by the sensor. + Minimum float64 + + // The high threshold fan speed, in rotations per minute, indicated by the sensor. + Maximum float64 +} + +func (s *FanSensor) Type() SensorType { return SensorTypeFan } + +func (s *FanSensor) parse(raw map[string]string) error { + for k, v := range raw { + switch k { + case "input", "min", "max": + f, err := strconv.ParseFloat(v, 64) + if err != nil { + return err + } + + switch k { + case "input": + s.Input = f + case "min": + s.Minimum = f + case "max": + s.Maximum = f + } + case "alarm": + s.Alarm = v != "0" + case "beep": + s.Beep = v != "0" + case "label": + s.Label = v + } + } + + return nil +} + +func (s *FanSensor) name() string { return s.Name } diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/intrusionsensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/intrusionsensor.go new file mode 100644 index 00000000000000..ffe4040ba7d348 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/intrusionsensor.go @@ -0,0 +1,32 @@ +package lmsensors + +var _ Sensor = &IntrusionSensor{} + +// An IntrusionSensor is a Sensor that detects when the machine's chassis has been opened. +type IntrusionSensor struct { + // The name of the sensor. + Name string + + // A label that describes what the sensor is monitoring. Label may be empty. + Label string + + // Whether the machine's chassis has been opened, and the alarm has been triggered. + Alarm bool +} + +func (s *IntrusionSensor) Type() SensorType { return SensorTypeIntrusion } + +func (s *IntrusionSensor) parse(raw map[string]string) error { + for k, v := range raw { + switch k { + case "alarm": + s.Alarm = v != "0" + case "label": + s.Label = v + } + } + + return nil +} + +func (s *IntrusionSensor) name() string { return s.Name } diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/powersensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/powersensor.go new file mode 100644 index 00000000000000..545de37e4f8871 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/powersensor.go @@ -0,0 +1,75 @@ +package lmsensors + +import ( + "strconv" + "time" +) + +var _ Sensor = &PowerSensor{} + +// A PowerSensor is a Sensor that detects average electrical power consumption in watts. +type PowerSensor struct { + // The name of the sensor. + Name string + + // A label that describes what the sensor is monitoring. Label may be empty. + Label string + + // The average electrical power consumption, in watts, indicated by the sensor. + Average float64 + + // The interval of time over which the average electrical power consumption is collected. + AverageInterval time.Duration + + // Whether this sensor has a battery. + Battery bool + + // The model number of the sensor. + ModelNumber string + + // Miscellaneous OEM information about the sensor. + OEMInfo string + + // The serial number of the sensor. + SerialNumber string +} + +func (s *PowerSensor) Type() SensorType { return SensorTypePower } + +func (s *PowerSensor) parse(raw map[string]string) error { + for k, v := range raw { + switch k { + case "average": + f, err := strconv.ParseFloat(v, 64) + if err != nil { + return err + } + + // Raw temperature values are scaled by one million + f /= 1000000 + s.Average = f + case "average_interval": + // Time values in milliseconds + d, err := time.ParseDuration(v + "ms") + if err != nil { + return err + } + + s.AverageInterval = d + case "is_battery": + s.Battery = v != "0" + case "model_number": + s.ModelNumber = v + case "oem_info": + s.OEMInfo = v + case "serial_number": + s.SerialNumber = v + case "label": + s.Label = v + } + } + + return nil +} + +func (s *PowerSensor) name() string { return s.Name } diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/scanner.go b/src/go/plugin/go.d/modules/sensors/lmsensors/scanner.go new file mode 100644 index 00000000000000..dc6af7d9ab5361 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/scanner.go @@ -0,0 +1,245 @@ +package lmsensors + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + "time" + + "github.com/netdata/netdata/go/plugins/logger" +) + +// A filesystem is an interface to a filesystem, used for testing. +type filesystem interface { + ReadFile(filename string) (string, error) + Readlink(name string) (string, error) + Stat(name string) (os.FileInfo, error) + WalkDir(root string, walkFn fs.WalkDirFunc) error +} + +// A Scanner scans for Devices, so data can be read from their Sensors. +type Scanner struct { + *logger.Logger + + fs filesystem +} + +// New creates a new Scanner. +func New() *Scanner { + return &Scanner{ + fs: &systemFilesystem{}, + } +} + +// Scan scans for Devices and their Sensors. +func (sc *Scanner) Scan() ([]*Device, error) { + paths, err := sc.detectDevicePaths() + if err != nil { + return nil, err + } + + sc.Debugf("sysfs scanner: found %d paths", len(paths)) + + var devices []*Device + + for _, rootPath := range paths { + sc.Debugf("sysfs scanner: scanning %s", rootPath) + + dev := &Device{} + raw := make(map[string]map[string]string) + + // Walk filesystem paths to fetch devices and sensors + err := sc.fs.WalkDir(rootPath, func(path string, de fs.DirEntry, err error) error { + if err != nil { + return err + } + + if de.IsDir() || !de.Type().IsRegular() { + if de.IsDir() && path != rootPath { + return fs.SkipDir + } + return nil + } + + // Skip some files that can't be read or don't provide useful sensor information + file := filepath.Base(path) + if shouldSkip(file) { + return nil + } + + now := time.Now() + s, err := sc.fs.ReadFile(path) + if err != nil { + return nil + } + sc.Debugf("sysfs scanner: reading file '%s' took %s", path, time.Since(now)) + + if file == "name" { + dev.Name = s + return nil + } + + // Sensor names in format "sensor#_foo", e.g. "temp1_input" + parts := strings.SplitN(file, "_", 2) + if len(parts) != 2 { + return nil + } + + if _, ok := raw[parts[0]]; !ok { + raw[parts[0]] = make(map[string]string) + } + + raw[parts[0]][parts[1]] = s + + return nil + }) + if err != nil { + return nil, err + } + + sensors, err := parseSensors(raw) + if err != nil { + return nil, err + } + + for _, sn := range sensors { + sc.Debugf("sysfs scanner: found sensor %+v", sn) + } + + dev.Sensors = sensors + devices = append(devices, dev) + } + + renameDevices(devices) + + return devices, nil +} + +// renameDevices renames devices in place to prevent duplicate device names, and to number each device. +func renameDevices(devices []*Device) { + nameCount := make(map[string]int) + + for i := range devices { + name := devices[i].Name + devices[i].Name = fmt.Sprintf("%s-%02d", + name, + nameCount[name], + ) + nameCount[name]++ + } +} + +// detectDevicePaths performs a filesystem walk to paths where devices may reside on Linux. +func (sc *Scanner) detectDevicePaths() ([]string, error) { + const lookPath = "/sys/class/hwmon" + + var paths []string + err := sc.fs.WalkDir(lookPath, func(path string, de os.DirEntry, err error) error { + if err != nil { + return err + } + + if de.Type()&os.ModeSymlink == 0 { + return nil + } + + dest, err := sc.fs.Readlink(path) + if err != nil { + return err + } + + dest = filepath.Join(lookPath, filepath.Clean(dest)) + + // Symlink destination has a file called name, meaning a sensor exists here and data can be retrieved + fi, err := sc.fs.Stat(filepath.Join(dest, "name")) + if err != nil && !os.IsNotExist(err) { + return err + } + if err == nil && fi.Mode().IsRegular() { + paths = append(paths, dest) + return nil + } + + // Symlink destination has another symlink called device, which can be read and used to retrieve data + device := filepath.Join(dest, "device") + fi, err = sc.fs.Stat(device) + if err != nil { + if !os.IsNotExist(err) { + return err + } + return nil + } + + if fi.Mode()&os.ModeSymlink != 0 { + return nil + } + + device, err = sc.fs.Readlink(device) + if err != nil { + return err + } + + dest = filepath.Join(dest, filepath.Clean(device)) + + // Symlink destination has a file called name, meaning a sensor exists here and data can be retrieved + if _, err := sc.fs.Stat(filepath.Join(dest, "name")); err != nil { + if !os.IsNotExist(err) { + return err + } + return nil + } + + paths = append(paths, dest) + + return nil + }) + + return paths, err +} + +// shouldSkip indicates if a given filename should be skipped during the filesystem walk operation. +func shouldSkip(file string) bool { + if strings.HasPrefix(file, "runtime_") { + return true + } + + switch file { + case "async": + case "autosuspend_delay_ms": + case "control": + case "driver_override": + case "modalias": + case "uevent": + default: + return false + } + + return true +} + +var _ filesystem = &systemFilesystem{} + +// A systemFilesystem is a filesystem which uses operations on the host filesystem. +type systemFilesystem struct{} + +func (fs *systemFilesystem) ReadFile(filename string) (string, error) { + b, err := os.ReadFile(filename) + if err != nil { + return "", err + } + return strings.TrimSpace(string(b)), nil +} + +func (fs *systemFilesystem) Readlink(name string) (string, error) { + return os.Readlink(name) +} + +func (fs *systemFilesystem) Stat(name string) (os.FileInfo, error) { + return os.Stat(name) +} + +func (fs *systemFilesystem) WalkDir(root string, walkFn fs.WalkDirFunc) error { + return filepath.WalkDir(root, walkFn) +} diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/scanner_test.go b/src/go/plugin/go.d/modules/sensors/lmsensors/scanner_test.go new file mode 100644 index 00000000000000..8f1fad9f9a0efe --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/scanner_test.go @@ -0,0 +1,810 @@ +package lmsensors + +import ( + "fmt" + "io/fs" + "os" + "reflect" + "strings" + "testing" + "time" +) + +// TODO(mdlayher): why does scanning work if device file isn't a symlink, +// even though it is in the actual filesystem (and the actual filesystem +// exhibits the same behavior)? + +func TestScannerScan(t *testing.T) { + tests := []struct { + name string + fs filesystem + devices []*Device + }{ + { + name: "power_meter device", + fs: &memoryFilesystem{ + symlinks: map[string]string{ + "/sys/class/hwmon/hwmon0": "../../devices/LNXSYSTM:00/device:00/ACPI0000:00/hwmon/hwmon0", + "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/hwmon/hwmon0/device": "../../../ACPI0000:00", + }, + files: []memoryFile{ + { + name: "/sys/class/hwmon", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/class/hwmon/hwmon0", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/hwmon/hwmon0/name", + err: os.ErrNotExist, + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/hwmon/hwmon0/device", + dirEntry: &memoryDirEntry{ + // mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/name", + contents: "power_meter", + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/power1_average", + contents: "345000000", + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/power1_average_interval", + contents: "1000", + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/power1_is_battery", + contents: "0", + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/power1_model_number", + contents: "Intel(R) Node Manager", + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/power1_oem_info", + contents: "Meter measures total domain", + }, + { + name: "/sys/devices/LNXSYSTM:00/device:00/ACPI0000:00/power1_serial_number", + contents: "", + }, + }, + }, + devices: []*Device{{ + Name: "power_meter-00", + Sensors: []Sensor{ + &PowerSensor{ + Name: "power1", + Average: 345.0, + AverageInterval: 1 * time.Second, + Battery: false, + ModelNumber: "Intel(R) Node Manager", + OEMInfo: "Meter measures total domain", + SerialNumber: "", + }, + }, + }}, + }, + { + name: "acpitz device", + fs: &memoryFilesystem{ + symlinks: map[string]string{ + "/sys/class/hwmon/hwmon0": "../../devices/virtual/hwmon/hwmon0", + }, + files: []memoryFile{ + { + name: "/sys/class/hwmon", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/class/hwmon/hwmon0", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/virtual/hwmon/hwmon0", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/virtual/hwmon/hwmon0/name", + contents: "acpitz", + }, + { + name: "/sys/devices/virtual/hwmon/hwmon0/temp1_crit", + contents: "105000", + }, + { + name: "/sys/devices/virtual/hwmon/hwmon0/temp1_input", + contents: "27800", + }, + }, + }, + devices: []*Device{{ + Name: "acpitz-00", + Sensors: []Sensor{ + &TemperatureSensor{ + Name: "temp1", + Input: 27.8, + Critical: 105.0, + CriticalAlarm: false, + }, + }, + }}, + }, + { + name: "coretemp device", + fs: &memoryFilesystem{ + symlinks: map[string]string{ + "/sys/class/hwmon/hwmon1": "../../devices/platform/coretemp.0/hwmon/hwmon1", + "/sys/devices/platform/coretemp.0/hwmon/hwmon1/device": "../../../coretemp.0", + }, + files: []memoryFile{ + { + name: "/sys/class/hwmon", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/class/hwmon/hwmon1", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/coretemp.0", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/platform/coretemp.0/hwmon/hwmon1/name", + err: os.ErrNotExist, + }, + { + name: "/sys/devices/platform/coretemp.0/hwmon/hwmon1/device", + dirEntry: &memoryDirEntry{ + // mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/coretemp.0/name", + contents: "coretemp", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_crit", + contents: "100000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_crit_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_input", + contents: "40000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_label", + contents: "Core 0", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_max", + contents: "80000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_crit", + contents: "100000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_crit_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_input", + contents: "42000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_label", + contents: "Core 1", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_max", + contents: "80000", + }, + }, + }, + devices: []*Device{{ + Name: "coretemp-00", + Sensors: []Sensor{ + &TemperatureSensor{ + Name: "temp1", + Label: "Core 0", + Input: 40.0, + Maximum: 80.0, + Critical: 100.0, + CriticalAlarm: false, + }, + &TemperatureSensor{ + Name: "temp2", + Label: "Core 1", + Input: 42.0, + Maximum: 80.0, + Critical: 100.0, + CriticalAlarm: false, + }, + }, + }}, + }, + { + name: "it8728 device", + fs: &memoryFilesystem{ + symlinks: map[string]string{ + "/sys/class/hwmon/hwmon2": "../../devices/platform/it87.2608/hwmon/hwmon2", + "/sys/devices/platform/it87.2608/hwmon/hwmon2/device": "../../../it87.2608", + }, + files: []memoryFile{ + { + name: "/sys/class/hwmon", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/class/hwmon/hwmon2", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/it87.2608", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/platform/it87.2608/hwmon/hwmon2/name", + err: os.ErrNotExist, + }, + { + name: "/sys/devices/platform/it87.2608/hwmon/hwmon2/device", + dirEntry: &memoryDirEntry{ + // mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/it87.2608/name", + contents: "it8728", + }, + { + name: "/sys/devices/platform/it87.2608/fan1_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/it87.2608/fan1_beep", + contents: "1", + }, + { + name: "/sys/devices/platform/it87.2608/fan1_input", + contents: "1010", + }, + { + name: "/sys/devices/platform/it87.2608/fan1_min", + contents: "10", + }, + { + name: "/sys/devices/platform/it87.2608/in0_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/it87.2608/in0_beep", + contents: "0", + }, + { + name: "/sys/devices/platform/it87.2608/in0_input", + contents: "1056", + }, + { + name: "/sys/devices/platform/it87.2608/in0_max", + contents: "3060", + }, + { + name: "/sys/devices/platform/it87.2608/in1_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/it87.2608/in1_beep", + contents: "0", + }, + { + name: "/sys/devices/platform/it87.2608/in1_input", + contents: "3384", + }, + { + name: "/sys/devices/platform/it87.2608/in1_label", + contents: "3VSB", + }, + { + name: "/sys/devices/platform/it87.2608/in1_max", + contents: "6120", + }, + { + name: "/sys/devices/platform/it87.2608/intrusion0_alarm", + contents: "1", + }, + { + name: "/sys/devices/platform/it87.2608/temp1_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/it87.2608/temp1_beep", + contents: "1", + }, + { + name: "/sys/devices/platform/it87.2608/temp1_input", + contents: "43000", + }, + { + name: "/sys/devices/platform/it87.2608/temp1_max", + contents: "127000", + }, + { + name: "/sys/devices/platform/it87.2608/temp1_type", + contents: "4", + }, + }, + }, + devices: []*Device{{ + Name: "it8728-00", + Sensors: []Sensor{ + &FanSensor{ + Name: "fan1", + Alarm: false, + Beep: true, + Input: 1010, + Minimum: 10, + }, + &VoltageSensor{ + Name: "in0", + Alarm: false, + Beep: false, + Input: 1.056, + Maximum: 3.060, + }, + &VoltageSensor{ + Name: "in1", + Label: "3VSB", + Alarm: false, + Beep: false, + Input: 3.384, + Maximum: 6.120, + }, + &IntrusionSensor{ + Name: "intrusion0", + Alarm: true, + }, + &TemperatureSensor{ + Name: "temp1", + Alarm: false, + Beep: true, + TempType: TemperatureSensorTypeThermistor, + Input: 43.0, + Maximum: 127.0, + }, + }, + }}, + }, + { + name: "multiple coretemp devices", + fs: &memoryFilesystem{ + symlinks: map[string]string{ + "/sys/class/hwmon/hwmon1": "../../devices/platform/coretemp.0/hwmon/hwmon1", + "/sys/class/hwmon/hwmon2": "../../devices/platform/coretemp.1/hwmon/hwmon2", + "/sys/devices/platform/coretemp.0/hwmon/hwmon1/device": "../../../coretemp.0", + "/sys/devices/platform/coretemp.1/hwmon/hwmon2/device": "../../../coretemp.1", + }, + files: []memoryFile{ + { + name: "/sys/class/hwmon", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/class/hwmon/hwmon1", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/class/hwmon/hwmon2", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/coretemp.0", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/platform/coretemp.0/hwmon/hwmon1/name", + err: os.ErrNotExist, + }, + { + name: "/sys/devices/platform/coretemp.0/hwmon/hwmon1/device", + dirEntry: &memoryDirEntry{ + // mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/coretemp.0/name", + contents: "coretemp", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_crit", + contents: "100000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_crit_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_input", + contents: "40000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_label", + contents: "Core 0", + }, + { + name: "/sys/devices/platform/coretemp.0/temp1_max", + contents: "80000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_crit", + contents: "100000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_crit_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_input", + contents: "42000", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_label", + contents: "Core 1", + }, + { + name: "/sys/devices/platform/coretemp.0/temp2_max", + contents: "80000", + }, + { + name: "/sys/devices/platform/coretemp.1", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/platform/coretemp.1/hwmon/hwmon2/name", + err: os.ErrNotExist, + }, + { + name: "/sys/devices/platform/coretemp.1/hwmon/hwmon2/device", + dirEntry: &memoryDirEntry{ + // mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/platform/coretemp.1/name", + contents: "coretemp", + }, + { + name: "/sys/devices/platform/coretemp.1/temp1_crit", + contents: "100000", + }, + { + name: "/sys/devices/platform/coretemp.1/temp1_crit_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/coretemp.1/temp1_input", + contents: "38000", + }, + { + name: "/sys/devices/platform/coretemp.1/temp1_label", + contents: "Core 0", + }, + { + name: "/sys/devices/platform/coretemp.1/temp1_max", + contents: "80000", + }, + { + name: "/sys/devices/platform/coretemp.1/temp2_crit", + contents: "100000", + }, + { + name: "/sys/devices/platform/coretemp.1/temp2_crit_alarm", + contents: "0", + }, + { + name: "/sys/devices/platform/coretemp.1/temp2_input", + contents: "37000", + }, + { + name: "/sys/devices/platform/coretemp.1/temp2_label", + contents: "Core 1", + }, + { + name: "/sys/devices/platform/coretemp.1/temp2_max", + contents: "80000", + }, + }, + }, + devices: []*Device{ + { + Name: "coretemp-00", + Sensors: []Sensor{ + &TemperatureSensor{ + Name: "temp1", + Label: "Core 0", + Input: 40.0, + Maximum: 80.0, + Critical: 100.0, + CriticalAlarm: false, + }, + &TemperatureSensor{ + Name: "temp2", + Label: "Core 1", + Input: 42.0, + Maximum: 80.0, + Critical: 100.0, + CriticalAlarm: false, + }, + }, + }, + { + Name: "coretemp-01", + Sensors: []Sensor{ + &TemperatureSensor{ + Name: "temp1", + Label: "Core 0", + Input: 38.0, + Maximum: 80.0, + Critical: 100.0, + CriticalAlarm: false, + }, + &TemperatureSensor{ + Name: "temp2", + Label: "Core 1", + Input: 37.0, + Maximum: 80.0, + Critical: 100.0, + CriticalAlarm: false, + }, + }, + }, + }, + }, + { + name: "sfc device", + fs: &memoryFilesystem{ + symlinks: map[string]string{ + "/sys/class/hwmon/hwmon0": "../../devices/pci0000:00/0000:00:02.0/0000:03:00.0/hwmon/hwmon0", + "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/hwmon/hwmon0/device": "../../../0000:03:00.0", + }, + files: []memoryFile{ + { + name: "/sys/class/hwmon", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/class/hwmon/hwmon0", + dirEntry: &memoryDirEntry{ + mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0", + dirEntry: &memoryDirEntry{ + isDir: true, + }, + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/hwmon/hwmon0/name", + err: os.ErrNotExist, + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/hwmon/hwmon0/device", + dirEntry: &memoryDirEntry{ + // mode: os.ModeSymlink, + }, + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/name", + contents: "sfc", + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/curr1_alarm", + contents: "0", + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/curr1_crit", + contents: "18000", + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/curr1_input", + contents: "7624", + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/curr1_label", + contents: "0.9V supply current", + }, + { + name: "/sys/devices/pci0000:00/0000:00:02.0/0000:03:00.0/curr1_max", + contents: "16000", + }, + }, + }, + devices: []*Device{{ + Name: "sfc-00", + Sensors: []Sensor{ + &CurrentSensor{ + Name: "curr1", + Label: "0.9V supply current", + Alarm: false, + Input: 7.624, + Maximum: 16.0, + Critical: 18.0, + }, + }, + }}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := &Scanner{fs: tt.fs} + + devices, err := s.Scan() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if want, got := tt.devices, devices; !reflect.DeepEqual(want, got) { + t.Fatalf("unexpected Devices:\n- want:\n%v\n- got:\n%v", + devicesStr(want), devicesStr(got)) + } + }) + } +} + +func devicesStr(ds []*Device) string { + var out string + for _, d := range ds { + out += fmt.Sprintf("device: %q [%d sensors]\n", d.Name, len(d.Sensors)) + + for _, s := range d.Sensors { + out += fmt.Sprintf(" - sensor: %#v\n", s) + } + } + + return out +} + +var _ filesystem = &memoryFilesystem{} + +// A memoryFilesystem is an in-memory implementation of filesystem, used for +// tests. +type memoryFilesystem struct { + symlinks map[string]string + files []memoryFile +} + +func (fs *memoryFilesystem) ReadFile(filename string) (string, error) { + for _, f := range fs.files { + if f.name == filename { + return f.contents, nil + } + } + + return "", fmt.Errorf("readfile: file %q not in memory", filename) +} + +func (fs *memoryFilesystem) Readlink(name string) (string, error) { + if l, ok := fs.symlinks[name]; ok { + return l, nil + } + + return "", fmt.Errorf("readlink: symlink %q not in memory", name) +} + +func (fs *memoryFilesystem) Stat(name string) (os.FileInfo, error) { + for _, f := range fs.files { + if f.name == name { + de := f.dirEntry + if de == nil { + de = &memoryDirEntry{} + } + info, _ := de.Info() + return info, f.err + } + } + + return nil, fmt.Errorf("stat: file %q not in memory", name) +} + +func (fs *memoryFilesystem) WalkDir(root string, walkFn fs.WalkDirFunc) error { + if _, err := fs.Stat(root); err != nil { + return err + } + + for _, f := range fs.files { + // Only walk paths under the specified root + if !strings.HasPrefix(f.name, root) { + continue + } + + de := f.dirEntry + if de == nil { + de = &memoryDirEntry{} + } + + if err := walkFn(f.name, de, nil); err != nil { + return err + } + } + + return nil +} + +// A memoryFile is an in-memory file used by memoryFilesystem. +type memoryFile struct { + name string + contents string + dirEntry fs.DirEntry + err error +} + +var _ fs.DirEntry = &memoryDirEntry{} + +// A memoryDirEntry is a fs.DirEntry used by memoryFiles. +type memoryDirEntry struct { + name string + mode os.FileMode + isDir bool +} + +func (fi *memoryDirEntry) Name() string { return fi.name } +func (fi *memoryDirEntry) Type() os.FileMode { return fi.mode } +func (fi *memoryDirEntry) IsDir() bool { return fi.isDir } +func (fi *memoryDirEntry) Info() (fs.FileInfo, error) { return fi, nil } +func (fi *memoryDirEntry) Sys() interface{} { return nil } +func (fi *memoryDirEntry) Size() int64 { return 0 } +func (fi *memoryDirEntry) Mode() os.FileMode { return fi.Type() } +func (fi *memoryDirEntry) ModTime() time.Time { return time.Now() } diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/sensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/sensor.go new file mode 100644 index 00000000000000..f1e9a2b7fa7830 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/sensor.go @@ -0,0 +1,92 @@ +package lmsensors + +import ( + "sort" + "strings" +) + +// A Device is a physical or virtual device which may have zero or +// more Sensors. +type Device struct { + // The name of the device. + Name string + + // Any Sensors that belong to this Device. Use type assertions to + // check for specific Sensor types and fetch their data. + Sensors []Sensor +} + +type SensorType string + +const ( + SensorTypeCurrent SensorType = "current" + SensorTypeFan SensorType = "fan" + SensorTypeIntrusion SensorType = "intrusion" + SensorTypePower SensorType = "power" + SensorTypeTemperature SensorType = "temperature" + SensorTypeVoltage SensorType = "voltage" +) + +// A Sensor is a hardware sensor, used to retrieve device temperatures, fan speeds, voltages, etc. +// Use type assertions to check for specific +// Sensor types and fetch their data. +type Sensor interface { + Type() SensorType +} + +// parseSensors parses all Sensors from an input raw data slice, produced during a filesystem walk. +func parseSensors(raw map[string]map[string]string) ([]Sensor, error) { + sensors := make([]Sensor, 0, len(raw)) + for k, v := range raw { + var sn Sensor + var err error + + switch { + case strings.HasPrefix(k, "curr"): + s := &CurrentSensor{Name: k} + sn = s + err = s.parse(v) + case strings.HasPrefix(k, "intrusion"): + s := &IntrusionSensor{Name: k} + sn = s + err = s.parse(v) + case strings.HasPrefix(k, "in"): + s := &VoltageSensor{Name: k} + sn = s + err = s.parse(v) + case strings.HasPrefix(k, "fan"): + s := &FanSensor{Name: k} + sn = s + err = s.parse(v) + case strings.HasPrefix(k, "power"): + s := &PowerSensor{Name: k} + sn = s + err = s.parse(v) + case strings.HasPrefix(k, "temp"): + s := &TemperatureSensor{Name: k} + sn = s + err = s.parse(v) + default: + continue + } + if err != nil { + return nil, err + } + + if sn == nil { + continue + } + + sensors = append(sensors, sn) + } + + type namer interface{ name() string } + + sort.Slice(sensors, func(i, j int) bool { + v1, ok1 := sensors[i].(namer) + v2, ok2 := sensors[j].(namer) + return ok1 && ok2 && v1.name() < v2.name() + }) + + return sensors, nil +} diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/temperaturesensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/temperaturesensor.go new file mode 100644 index 00000000000000..13484498bb414c --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/temperaturesensor.go @@ -0,0 +1,108 @@ +package lmsensors + +import ( + "strconv" +) + +// A TemperatureSensorType is value that indicates the type of TemperatureSensor. +type TemperatureSensorType int + +// All possible TemperatureSensorType constants. +const ( + TemperatureSensorUnknown TemperatureSensorType = 0 + TemperatureSensorTypePIICeleronDiode TemperatureSensorType = 1 + TemperatureSensorType3904Transistor TemperatureSensorType = 2 + TemperatureSensorTypeThermalDiode TemperatureSensorType = 3 + TemperatureSensorTypeThermistor TemperatureSensorType = 4 + TemperatureSensorTypeAMDAMDSI TemperatureSensorType = 5 + TemperatureSensorTypeIntelPECI TemperatureSensorType = 6 +) + +var _ Sensor = &TemperatureSensor{} + +// A TemperatureSensor is a Sensor that detects temperatures in degrees +// Celsius. +type TemperatureSensor struct { + // The name of the sensor. + Name string + + // A label that describes what the sensor is monitoring. Label may be empty. + Label string + + // Whether the sensor has an alarm triggered. + Alarm bool + + // Whether the sensor will sound an audible alarm if an alarm + // is triggered. + Beep bool + + // The type of sensor used to report temperatures. + TempType TemperatureSensorType + + // The input temperature, in degrees Celsius, indicated by the sensor. + Input float64 + + // A low threshold temperature, in degrees Celsius, indicated by the sensor. + Minimum float64 + + // A high threshold temperature, in degrees Celsius, indicated by the sensor. + Maximum float64 + + // A critical threshold temperature, in degrees Celsius, indicated by the sensor. + Critical float64 + + // An emergency threshold temperature, in degrees Celsius, indicated by the sensor. + Emergency float64 + + // Whether the temperature is past the critical threshold. + CriticalAlarm bool +} + +func (s *TemperatureSensor) Type() SensorType { return SensorTypeTemperature } + +func (s *TemperatureSensor) parse(raw map[string]string) error { + for k, v := range raw { + switch k { + case "input", "min", "max", "crit", "emergency": + f, err := strconv.ParseFloat(v, 64) + if err != nil { + return err + } + + // Raw temperature values are scaled by 1000 + f /= 1000 + + switch k { + case "input": + s.Input = f + case "min": + s.Minimum = f + case "max": + s.Maximum = f + case "crit": + s.Critical = f + case "emergency": + s.Emergency = f + } + case "alarm": + s.Alarm = v != "0" + case "beep": + s.Beep = v != "0" + case "type": + t, err := strconv.Atoi(v) + if err != nil { + return err + } + + s.TempType = TemperatureSensorType(t) + case "crit_alarm": + s.CriticalAlarm = v != "0" + case "label": + s.Label = v + } + } + + return nil +} + +func (s *TemperatureSensor) name() string { return s.Name } diff --git a/src/go/plugin/go.d/modules/sensors/lmsensors/voltagesensor.go b/src/go/plugin/go.d/modules/sensors/lmsensors/voltagesensor.go new file mode 100644 index 00000000000000..a5428117cefc05 --- /dev/null +++ b/src/go/plugin/go.d/modules/sensors/lmsensors/voltagesensor.go @@ -0,0 +1,68 @@ +package lmsensors + +import ( + "strconv" +) + +var _ Sensor = &VoltageSensor{} + +// A VoltageSensor is a Sensor that detects voltage. +type VoltageSensor struct { + // The name of the sensor. + Name string + + // A label that describes what the sensor is monitoring. Label may be empty. + Label string + + // Whether the sensor has an alarm triggered. + Alarm bool + + // Whether the sensor will sound an audible alarm when an alarm + // is triggered. + Beep bool + + // The input voltage indicated by the sensor. + Input float64 + + // The minimum voltage threshold indicated by the sensor. + Min float64 + + // The maximum voltage threshold indicated by the sensor. + Maximum float64 +} + +func (s *VoltageSensor) Type() SensorType { return SensorTypeVoltage } + +func (s *VoltageSensor) parse(raw map[string]string) error { + for k, v := range raw { + switch k { + case "input", "min", "max": + f, err := strconv.ParseFloat(v, 64) + if err != nil { + return err + } + + // Raw temperature values are scaled by 1000 + f /= 1000 + + switch k { + case "input": + s.Input = f + case "min": + s.Min = f + case "max": + s.Maximum = f + } + case "alarm": + s.Alarm = v != "0" + case "beep": + s.Beep = v != "0" + case "label": + s.Label = v + } + } + + return nil +} + +func (s *VoltageSensor) name() string { return s.Name } diff --git a/src/go/plugin/go.d/modules/sensors/metadata.yaml b/src/go/plugin/go.d/modules/sensors/metadata.yaml index 5ea94f3982eacc..f104c3e8786ecc 100644 --- a/src/go/plugin/go.d/modules/sensors/metadata.yaml +++ b/src/go/plugin/go.d/modules/sensors/metadata.yaml @@ -30,7 +30,7 @@ modules: metrics_description: > This collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, - utilizing the [sensors](https://linux.die.net/man/1/sensors) binary. + utilizing the [sensors](https://linux.die.net/man/1/sensors) binary or [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface). method_description: "" supported_platforms: include: [] @@ -56,11 +56,7 @@ modules: description: "" setup: prerequisites: - list: - - title: Install lm-sensors - description: | - - Install `lm-sensors` using your distribution's package manager. - - Run `sensors-detect` to detect hardware monitoring chips. + list: [] configuration: file: name: go.d/sensors.conf @@ -76,7 +72,7 @@ modules: default_value: 10 required: false - name: binary_path - description: Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. + description: Path to the `sensors` binary. If left empty or if the binary is not found, [sysfs](https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface) will be used to collect sensor statistics. default_value: /usr/bin/sensors required: true - name: timeout @@ -94,6 +90,12 @@ modules: jobs: - name: sensors binary_path: /usr/local/sbin/sensors + - name: Use sysfs instead of sensors + description: Set `binary_path` to an empty string to use sysfs. + config: | + jobs: + - name: sensors + binary_path: "" troubleshooting: problems: list: [] diff --git a/src/go/plugin/go.d/modules/sensors/sensors.go b/src/go/plugin/go.d/modules/sensors/sensors.go index 379d44debfb8ce..08913f40643a66 100644 --- a/src/go/plugin/go.d/modules/sensors/sensors.go +++ b/src/go/plugin/go.d/modules/sensors/sensors.go @@ -8,6 +8,7 @@ import ( "time" "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" + "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/sensors/lmsensors" "github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/web" ) @@ -49,13 +50,17 @@ type ( charts *module.Charts - exec sensorsCLI + exec sensorsBinary + sc sysfsScanner sensors map[string]bool } - sensorsCLI interface { + sensorsBinary interface { sensorsInfo() ([]byte, error) } + sysfsScanner interface { + Scan() ([]*lmsensors.Device, error) + } ) func (s *Sensors) Configuration() any { @@ -63,17 +68,15 @@ func (s *Sensors) Configuration() any { } func (s *Sensors) Init() error { - if err := s.validateConfig(); err != nil { - s.Errorf("config validation: %s", err) - return err + if sb, err := s.initSensorsBinary(); err != nil { + s.Infof("sensors exec initialization: %v", err) + } else if sb != nil { + s.exec = sb } - sensorsExec, err := s.initSensorsCliExec() - if err != nil { - s.Errorf("sensors exec initialization: %v", err) - return err - } - s.exec = sensorsExec + sc := lmsensors.New() + sc.Logger = s.Logger + s.sc = sc return nil } diff --git a/src/go/plugin/go.d/modules/sensors/sensors_test.go b/src/go/plugin/go.d/modules/sensors/sensors_test.go index a370d7500c446b..54437b0aa9c926 100644 --- a/src/go/plugin/go.d/modules/sensors/sensors_test.go +++ b/src/go/plugin/go.d/modules/sensors/sensors_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" + "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/sensors/lmsensors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -43,14 +44,14 @@ func TestSensors_Init(t *testing.T) { config Config wantFail bool }{ - "fails if 'binary_path' is not set": { - wantFail: true, + "success if 'binary_path' is not set": { + wantFail: false, config: Config{ BinaryPath: "", }, }, - "fails if failed to find binary": { - wantFail: true, + "success if failed to find binary": { + wantFail: false, config: Config{ BinaryPath: "sensors!!!", }, @@ -83,7 +84,7 @@ func TestSensors_Cleanup(t *testing.T) { "after check": { prepare: func() *Sensors { sensors := New() - sensors.exec = prepareMockOkOnlyTemp() + sensors.exec = prepareMockExecOkOnlyTemp() _ = sensors.Check() return sensors }, @@ -91,7 +92,7 @@ func TestSensors_Cleanup(t *testing.T) { "after collect": { prepare: func() *Sensors { sensors := New() - sensors.exec = prepareMockOkTempInCurrPowerFan() + sensors.exec = prepareMockExecOkTempInCurrPowerFan() _ = sensors.Collect() return sensors }, @@ -113,28 +114,28 @@ func TestSensors_Charts(t *testing.T) { func TestSensors_Check(t *testing.T) { tests := map[string]struct { - prepareMock func() *mockSensorsCLIExec + prepareMock func() *mockSensorsBinary wantFail bool }{ - "only temperature": { + "exec: only temperature": { wantFail: false, - prepareMock: prepareMockOkOnlyTemp, + prepareMock: prepareMockExecOkOnlyTemp, }, - "temperature and voltage": { + "exec: temperature and voltage": { wantFail: false, - prepareMock: prepareMockOkTempInCurrPowerFan, + prepareMock: prepareMockExecOkTempInCurrPowerFan, }, - "error on sensors info call": { + "exec: error on sensors info call": { wantFail: true, - prepareMock: prepareMockErr, + prepareMock: prepareMockExecErr, }, - "empty response": { + "exec: empty response": { wantFail: true, - prepareMock: prepareMockEmptyResponse, + prepareMock: prepareMockExecEmptyResponse, }, - "unexpected response": { + "exec: unexpected response": { wantFail: true, - prepareMock: prepareMockUnexpectedResponse, + prepareMock: prepareMockExecUnexpectedResponse, }, } @@ -155,13 +156,14 @@ func TestSensors_Check(t *testing.T) { func TestSensors_Collect(t *testing.T) { tests := map[string]struct { - prepareMock func() *mockSensorsCLIExec - wantMetrics map[string]int64 - wantCharts int + prepareExecMock func() *mockSensorsBinary + prepareSysfsMock func() *mockSysfsScanner + wantMetrics map[string]int64 + wantCharts int }{ - "only temperature": { - prepareMock: prepareMockOkOnlyTemp, - wantCharts: 24, + "exec: only temperature": { + prepareExecMock: prepareMockExecOkOnlyTemp, + wantCharts: 24, wantMetrics: map[string]int64{ "sensor_chip_bnxt_en-pci-6200_feature_temp1_subfeature_temp1_input": 80000, "sensor_chip_bnxt_en-pci-6201_feature_temp1_subfeature_temp1_input": 81000, @@ -189,18 +191,19 @@ func TestSensors_Collect(t *testing.T) { "sensor_chip_nvme-pci-8100_feature_composite_subfeature_temp1_input": 39850, }, }, - "multiple sensors": { - prepareMock: prepareMockOkTempInCurrPowerFan, - wantCharts: 19, + "exec: multiple sensors": { + prepareExecMock: prepareMockExecOkTempInCurrPowerFan, + wantCharts: 20, wantMetrics: map[string]int64{ "sensor_chip_acpitz-acpi-0_feature_temp1_subfeature_temp1_input": 88000, "sensor_chip_amdgpu-pci-0300_feature_edge_subfeature_temp1_input": 53000, "sensor_chip_amdgpu-pci-0300_feature_fan1_subfeature_fan1_input": 0, "sensor_chip_amdgpu-pci-0300_feature_junction_subfeature_temp2_input": 58000, "sensor_chip_amdgpu-pci-0300_feature_mem_subfeature_temp3_input": 57000, + "sensor_chip_amdgpu-pci-0300_feature_ppt_subfeature_power1_average": 29000, "sensor_chip_amdgpu-pci-0300_feature_vddgfx_subfeature_in0_input": 787, "sensor_chip_amdgpu-pci-6700_feature_edge_subfeature_temp1_input": 60000, - "sensor_chip_amdgpu-pci-6700_feature_ppt_subfeature_power1_input": 8144, + "sensor_chip_amdgpu-pci-6700_feature_ppt_subfeature_power1_average": 5088, "sensor_chip_amdgpu-pci-6700_feature_vddgfx_subfeature_in0_input": 1335, "sensor_chip_amdgpu-pci-6700_feature_vddnb_subfeature_in1_input": 973, "sensor_chip_asus-isa-0000_feature_cpu_fan_subfeature_fan1_input": 5700000, @@ -214,25 +217,61 @@ func TestSensors_Collect(t *testing.T) { "sensor_chip_ucsi_source_psy_usbc000:001-isa-0000_feature_in0_subfeature_in0_input": 0, }, }, - "error on sensors info call": { - prepareMock: prepareMockErr, - wantMetrics: nil, + "exec: error on sensors info call": { + prepareExecMock: prepareMockExecErr, + wantMetrics: nil, }, - "empty response": { - prepareMock: prepareMockEmptyResponse, - wantMetrics: nil, + "exec: empty response": { + prepareExecMock: prepareMockExecEmptyResponse, + wantMetrics: nil, }, - "unexpected response": { - prepareMock: prepareMockUnexpectedResponse, - wantMetrics: nil, + "exec: unexpected response": { + prepareExecMock: prepareMockExecUnexpectedResponse, + wantMetrics: nil, + }, + + "sysfs: multiple sensors": { + prepareSysfsMock: prepareMockSysfsScannerOk, + wantCharts: 20, + wantMetrics: map[string]int64{ + "sensor_chip_acpitz-acpi-0_feature_temp1_subfeature_temp1_input": 88000, + "sensor_chip_amdgpu-pci-0300_feature_edge_subfeature_temp1_input": 53000, + "sensor_chip_amdgpu-pci-0300_feature_fan1_subfeature_fan1_input": 0, + "sensor_chip_amdgpu-pci-0300_feature_junction_subfeature_temp2_input": 58000, + "sensor_chip_amdgpu-pci-0300_feature_mem_subfeature_temp3_input": 57000, + "sensor_chip_amdgpu-pci-0300_feature_ppt_subfeature_power1_average": 29000, + "sensor_chip_amdgpu-pci-0300_feature_vddgfx_subfeature_in0_input": 787, + "sensor_chip_amdgpu-pci-6700_feature_edge_subfeature_temp1_input": 60000, + "sensor_chip_amdgpu-pci-6700_feature_ppt_subfeature_power1_average": 5088, + "sensor_chip_amdgpu-pci-6700_feature_vddgfx_subfeature_in0_input": 1335, + "sensor_chip_amdgpu-pci-6700_feature_vddnb_subfeature_in1_input": 973, + "sensor_chip_asus-isa-0000_feature_cpu_fan_subfeature_fan1_input": 5700000, + "sensor_chip_asus-isa-0000_feature_gpu_fan_subfeature_fan2_input": 6600000, + "sensor_chip_bat0-acpi-0_feature_in0_subfeature_in0_input": 17365, + "sensor_chip_k10temp-pci-00c3_feature_tctl_subfeature_temp1_input": 90000, + "sensor_chip_nvme-pci-0600_feature_composite_subfeature_temp1_input": 33850, + "sensor_chip_nvme-pci-0600_feature_sensor_1_subfeature_temp2_input": 48850, + "sensor_chip_nvme-pci-0600_feature_sensor_2_subfeature_temp3_input": 33850, + "sensor_chip_ucsi_source_psy_usbc000:001-isa-0000_feature_curr1_subfeature_curr1_input": 0, + "sensor_chip_ucsi_source_psy_usbc000:001-isa-0000_feature_in0_subfeature_in0_input": 0, + }, + }, + "sysfs: error on scan": { + prepareSysfsMock: prepareMockSysfsScannerErr, + wantMetrics: nil, }, } for name, test := range tests { t.Run(name, func(t *testing.T) { sensors := New() - mock := test.prepareMock() - sensors.exec = mock + if test.prepareExecMock != nil { + sensors.exec = test.prepareExecMock() + } else if test.prepareSysfsMock != nil { + sensors.sc = test.prepareSysfsMock() + } else { + t.Fail() + } var mx map[string]int64 for i := 0; i < 10; i++ { @@ -240,48 +279,36 @@ func TestSensors_Collect(t *testing.T) { } assert.Equal(t, test.wantMetrics, mx) + assert.Len(t, *sensors.Charts(), test.wantCharts) - testMetricsHasAllChartsDims(t, sensors, mx) - }) - } -} -func testMetricsHasAllChartsDims(t *testing.T, sensors *Sensors, mx map[string]int64) { - for _, chart := range *sensors.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } + if len(test.wantMetrics) > 0 { + module.TestMetricsHasAllChartsDims(t, sensors.Charts(), mx) + } + }) } } -func prepareMockOkOnlyTemp() *mockSensorsCLIExec { - return &mockSensorsCLIExec{ +func prepareMockExecOkOnlyTemp() *mockSensorsBinary { + return &mockSensorsBinary{ sensorsInfoData: dataSensorsTemp, } } -func prepareMockOkTempInCurrPowerFan() *mockSensorsCLIExec { - return &mockSensorsCLIExec{ +func prepareMockExecOkTempInCurrPowerFan() *mockSensorsBinary { + return &mockSensorsBinary{ sensorsInfoData: dataSensorsTempInCurrPowerFan, } } -func prepareMockErr() *mockSensorsCLIExec { - return &mockSensorsCLIExec{ +func prepareMockExecErr() *mockSensorsBinary { + return &mockSensorsBinary{ errOnSensorsInfo: true, } } -func prepareMockUnexpectedResponse() *mockSensorsCLIExec { - return &mockSensorsCLIExec{ +func prepareMockExecUnexpectedResponse() *mockSensorsBinary { + return &mockSensorsBinary{ sensorsInfoData: []byte(` Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla malesuada erat id magna mattis, eu viverra tellus rhoncus. @@ -290,19 +317,174 @@ Fusce et felis pulvinar, posuere sem non, porttitor eros. } } -func prepareMockEmptyResponse() *mockSensorsCLIExec { - return &mockSensorsCLIExec{} +func prepareMockExecEmptyResponse() *mockSensorsBinary { + return &mockSensorsBinary{} } -type mockSensorsCLIExec struct { +type mockSensorsBinary struct { errOnSensorsInfo bool sensorsInfoData []byte } -func (m *mockSensorsCLIExec) sensorsInfo() ([]byte, error) { +func (m *mockSensorsBinary) sensorsInfo() ([]byte, error) { if m.errOnSensorsInfo { return nil, errors.New("mock.sensorsInfo() error") } return m.sensorsInfoData, nil } + +func prepareMockSysfsScannerOk() *mockSysfsScanner { + return &mockSysfsScanner{ + scanData: []*lmsensors.Device{ + {Name: "asus-isa-0000", Sensors: []lmsensors.Sensor{ + &lmsensors.FanSensor{ + Name: "fan1", + Label: "cpu_fan", + Input: 5700, + }, + &lmsensors.FanSensor{ + Name: "fan2", + Label: "gpu_fan", + Input: 6600, + }, + }}, + {Name: "nvme-pci-0600", Sensors: []lmsensors.Sensor{ + &lmsensors.TemperatureSensor{ + Name: "temp1", + Label: "Composite", + Input: 33.85, + Maximum: 83.85, + Minimum: -40.15, + Critical: 87.85, + Alarm: false, + }, + &lmsensors.TemperatureSensor{ + Name: "temp2", + Label: "Sensor 1", + Input: 48.85, + Maximum: 65261.85, + Minimum: -273.15, + }, + &lmsensors.TemperatureSensor{ + Name: "temp3", + Label: "Sensor 2", + Input: 33.85, + Maximum: 65261.85, + Minimum: -273.15, + }, + }}, + {Name: "amdgpu-pci-6700", Sensors: []lmsensors.Sensor{ + &lmsensors.VoltageSensor{ + Name: "in0", + Label: "vddgfx", + Input: 1.335, + }, + &lmsensors.VoltageSensor{ + Name: "in1", + Label: "vddnb", + Input: 0.973, + }, + &lmsensors.TemperatureSensor{ + Name: "temp1", + Label: "edge", + Input: 60.000, + }, + &lmsensors.PowerSensor{ + Name: "power1", + Label: "PPT", + Average: 5.088, + }, + }}, + {Name: "BAT0-acpi-0", Sensors: []lmsensors.Sensor{ + &lmsensors.VoltageSensor{ + Name: "in0", + Label: "in0", + Input: 17.365, + }, + }}, + {Name: "ucsi_source_psy_USBC000:001-isa-0000", Sensors: []lmsensors.Sensor{ + &lmsensors.VoltageSensor{ + Name: "in0", + Label: "in0", + Input: 0.000, + }, + &lmsensors.CurrentSensor{ + Name: "curr1", + Label: "curr1", + Input: 0.000, + }, + }}, + {Name: "k10temp-pci-00c3", Sensors: []lmsensors.Sensor{ + &lmsensors.TemperatureSensor{ + Name: "temp1", + Label: "Tctl", + Input: 90, + }, + }}, + {Name: "amdgpu-pci-0300", Sensors: []lmsensors.Sensor{ + &lmsensors.VoltageSensor{ + Name: "in0", + Label: "vddgfx", + Input: 0.787, + }, + &lmsensors.FanSensor{ + Name: "fan1", + Label: "fan1", + Maximum: 4900, + }, + &lmsensors.TemperatureSensor{ + Name: "temp1", + Label: "edge", + Input: 53, + Critical: 100, + Emergency: 105, + }, + &lmsensors.TemperatureSensor{ + Name: "temp2", + Label: "junction", + Input: 58, + Critical: 100, + Emergency: 105, + }, + &lmsensors.TemperatureSensor{ + Name: "temp3", + Label: "mem", + Input: 57, + Critical: 106, + Emergency: 110, + }, + &lmsensors.PowerSensor{ + Name: "power1", + Label: "PPT", + Average: 29, + }, + }}, + {Name: "acpitz-acpi-0", Sensors: []lmsensors.Sensor{ + &lmsensors.FanSensor{ + Name: "temp1", + Label: "temp1", + Input: 88, + }, + }}, + }, + } +} + +func prepareMockSysfsScannerErr() *mockSysfsScanner { + return &mockSysfsScanner{ + errOnScan: true, + } +} + +type mockSysfsScanner struct { + errOnScan bool + scanData []*lmsensors.Device +} + +func (m *mockSysfsScanner) Scan() ([]*lmsensors.Device, error) { + if m.errOnScan { + return nil, errors.New("mock.scan() error") + } + return m.scanData, nil +} diff --git a/src/go/plugin/go.d/modules/smartctl/smartctl_test.go b/src/go/plugin/go.d/modules/smartctl/smartctl_test.go index 7c56605f613ec6..6645424602e741 100644 --- a/src/go/plugin/go.d/modules/smartctl/smartctl_test.go +++ b/src/go/plugin/go.d/modules/smartctl/smartctl_test.go @@ -366,25 +366,11 @@ func TestSmartctl_Collect(t *testing.T) { } assert.Equal(t, test.wantMetrics, mx) - assert.Len(t, *smart.Charts(), test.wantCharts) - testMetricsHasAllChartsDims(t, smart, mx) - }) - } -} -func testMetricsHasAllChartsDims(t *testing.T, smart *Smartctl, mx map[string]int64) { - for _, chart := range *smart.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } + assert.Len(t, *smart.Charts(), test.wantCharts, "wantCharts") + + module.TestMetricsHasAllChartsDims(t, smart.Charts(), mx) + }) } } diff --git a/src/go/plugin/go.d/modules/squidlog/squidlog_test.go b/src/go/plugin/go.d/modules/squidlog/squidlog_test.go index eb5ce635ffb844..cbaccd351925fa 100644 --- a/src/go/plugin/go.d/modules/squidlog/squidlog_test.go +++ b/src/go/plugin/go.d/modules/squidlog/squidlog_test.go @@ -238,16 +238,18 @@ func TestSquidLog_Collect_ReturnOldDataIfNothingRead(t *testing.T) { } _ = squid.Collect() - collected := squid.Collect() - assert.Equal(t, expected, collected) - testCharts(t, squid, collected) + mx := squid.Collect() + + assert.Equal(t, expected, mx) + + testCharts(t, squid, mx) } -func testCharts(t *testing.T, squidlog *SquidLog, collected map[string]int64) { +func testCharts(t *testing.T, squidlog *SquidLog, mx map[string]int64) { t.Helper() ensureChartsDynamicDimsCreated(t, squidlog) - ensureCollectedHasAllChartsDimsVarsIDs(t, squidlog, collected) + module.TestMetricsHasAllChartsDims(t, squidlog.Charts(), mx) } func ensureChartsDynamicDimsCreated(t *testing.T, squid *SquidLog) { @@ -276,19 +278,6 @@ func ensureDynamicDimsCreated(t *testing.T, squid *SquidLog, chartID, dimPrefix } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, s *SquidLog, collected map[string]int64) { - for _, chart := range *s.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareSquidCollect(t *testing.T) *SquidLog { t.Helper() squid := New() diff --git a/src/go/plugin/go.d/modules/storcli/storcli_test.go b/src/go/plugin/go.d/modules/storcli/storcli_test.go index 63ee54b564f9dc..ba1a0fb161e631 100644 --- a/src/go/plugin/go.d/modules/storcli/storcli_test.go +++ b/src/go/plugin/go.d/modules/storcli/storcli_test.go @@ -230,8 +230,10 @@ func TestStorCli_Collect(t *testing.T) { mx := stor.Collect() assert.Equal(t, test.wantMetrics, mx) - assert.Len(t, *stor.Charts(), test.wantCharts) - testMetricsHasAllChartsDims(t, stor, mx) + + assert.Len(t, *stor.Charts(), test.wantCharts, "wantCharts") + + module.TestMetricsHasAllChartsDims(t, stor.Charts(), mx) }) } } @@ -291,19 +293,3 @@ func (m *mockStorCliExec) drivesInfo() ([]byte, error) { } return m.drivesInfoData, nil } - -func testMetricsHasAllChartsDims(t *testing.T, stor *StorCli, mx map[string]int64) { - for _, chart := range *stor.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} diff --git a/src/go/plugin/go.d/modules/vernemq/vernemq_test.go b/src/go/plugin/go.d/modules/vernemq/vernemq_test.go index 13eb3dceb25ecb..fc41343d1757ef 100644 --- a/src/go/plugin/go.d/modules/vernemq/vernemq_test.go +++ b/src/go/plugin/go.d/modules/vernemq/vernemq_test.go @@ -91,9 +91,11 @@ func TestVerneMQ_Collect(t *testing.T) { verneMQ, srv := prepareClientServerV1101(t) defer srv.Close() - collected := verneMQ.Collect() - assert.Equal(t, v1101ExpectedMetrics, collected) - testCharts(t, verneMQ, collected) + mx := verneMQ.Collect() + + assert.Equal(t, v1101ExpectedMetrics, mx) + + module.TestMetricsHasAllChartsDims(t, verneMQ.Charts(), mx) } func TestVerneMQ_Collect_ReturnsNilIfConnectionRefused(t *testing.T) { @@ -124,23 +126,6 @@ func TestVerneMQ_Collect_ReturnsNilIfReceiveResponse404(t *testing.T) { assert.Nil(t, verneMQ.Collect()) } -func testCharts(t *testing.T, verneMQ *VerneMQ, collected map[string]int64) { - ensureCollectedHasAllChartsDimsVarsIDs(t, verneMQ, collected) -} - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, verneMQ *VerneMQ, collected map[string]int64) { - for _, chart := range *verneMQ.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareVerneMQ() *VerneMQ { verneMQ := New() verneMQ.URL = "http://127.0.0.1:38001/metrics" diff --git a/src/go/plugin/go.d/modules/weblog/weblog_test.go b/src/go/plugin/go.d/modules/weblog/weblog_test.go index 1e36bbf68b0e67..028eca39fdf54d 100644 --- a/src/go/plugin/go.d/modules/weblog/weblog_test.go +++ b/src/go/plugin/go.d/modules/weblog/weblog_test.go @@ -737,16 +737,7 @@ func testCharts(t *testing.T, w *WebLog, mx map[string]int64) { testCustomTimeFieldCharts(t, w) testCustomNumericFieldCharts(t, w) - testChartsDimIDs(t, w, mx) -} - -func testChartsDimIDs(t *testing.T, w *WebLog, mx map[string]int64) { - for _, chart := range *w.Charts() { - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - } + module.TestMetricsHasAllChartsDims(t, w.Charts(), mx) } func testVhostChart(t *testing.T, w *WebLog) { diff --git a/src/go/plugin/go.d/modules/windows/windows_test.go b/src/go/plugin/go.d/modules/windows/windows_test.go index 0529502489e54a..7cfbe3317ec7da 100644 --- a/src/go/plugin/go.d/modules/windows/windows_test.go +++ b/src/go/plugin/go.d/modules/windows/windows_test.go @@ -817,7 +817,7 @@ func TestWindows_Collect(t *testing.T) { func testCharts(t *testing.T, win *Windows, mx map[string]int64) { ensureChartsDimsCreated(t, win) - ensureCollectedHasAllChartsDimsVarsIDs(t, win, mx) + module.TestMetricsHasAllChartsDims(t, win.Charts(), mx) } func ensureChartsDimsCreated(t *testing.T, w *Windows) { @@ -1047,19 +1047,6 @@ func ensureChartsDimsCreated(t *testing.T, w *Windows) { } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, w *Windows, mx map[string]int64) { - for _, chart := range *w.Charts() { - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareWindowsV0200() (win *Windows, cleanup func()) { ts := httptest.NewServer(http.HandlerFunc( func(w http.ResponseWriter, r *http.Request) { diff --git a/src/libnetdata/buffer/buffer.h b/src/libnetdata/buffer/buffer.h index 28bbf36bfb6869..f226df18091a31 100644 --- a/src/libnetdata/buffer/buffer.h +++ b/src/libnetdata/buffer/buffer.h @@ -152,13 +152,10 @@ static inline void _buffer_json_depth_pop(BUFFER *wb) { wb->json.depth--; } -static inline void buffer_fast_charcat(BUFFER *wb, const char c) { - +static inline void buffer_putc(BUFFER *wb, char c) { buffer_need_bytes(wb, 2); - *(&wb->buffer[wb->len]) = c; - wb->len += 1; + wb->buffer[wb->len++] = c; wb->buffer[wb->len] = '\0'; - buffer_overflow_check(wb); } @@ -181,13 +178,6 @@ static inline void buffer_fast_rawcat(BUFFER *wb, const char *txt, size_t len) { buffer_overflow_check(wb); } -static inline void buffer_putc(BUFFER *wb, char c) { - buffer_need_bytes(wb, 2); - wb->buffer[wb->len++] = c; - wb->buffer[wb->len] = '\0'; - buffer_overflow_check(wb); -} - static inline void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len) { if(unlikely(!txt || !*txt || !len)) return; diff --git a/src/aclk/mqtt_websockets/c_rhash/c_rhash.c b/src/libnetdata/c_rhash/c_rhash.c similarity index 77% rename from src/aclk/mqtt_websockets/c_rhash/c_rhash.c rename to src/libnetdata/c_rhash/c_rhash.c index a71b500e2c2f32..0ab25d5d427321 100644 --- a/src/aclk/mqtt_websockets/c_rhash/c_rhash.c +++ b/src/libnetdata/c_rhash/c_rhash.c @@ -1,33 +1,13 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later +#include "../libnetdata.h" #include "c_rhash_internal.h" -#include -#include - -#ifdef DEBUG_VERBOSE -#include -#endif - -#define c_rmalloc(...) malloc(__VA_ARGS__) -#define c_rcalloc(...) calloc(__VA_ARGS__) -#define c_rfree(...) free(__VA_ARGS__) - -static inline uint32_t simple_hash(const char *name) { - unsigned char *s = (unsigned char *) name; - uint32_t hval = 0x811c9dc5; - while (*s) { - hval *= 16777619; - hval ^= (uint32_t) *s++; - } - return hval; -} - c_rhash c_rhash_new(size_t bin_count) { if (!bin_count) bin_count = 1000; - c_rhash hash = c_rcalloc(1, sizeof(struct c_rhash_s) + (bin_count * sizeof(struct bin_ll*)) ); + c_rhash hash = callocz(1, sizeof(struct c_rhash_s) + (bin_count * sizeof(struct bin_ll*)) ); if (hash == NULL) return NULL; @@ -75,16 +55,9 @@ static int insert_into_bin(c_rhash_bin *bin, uint8_t key_type, const void *key, struct bin_item *prev = NULL; while (*bin != NULL) { if (!compare_bin_item(*bin, key_type, key)) { -#ifdef DEBUG_VERBOSE - printf("Key already present! Updating value!\n"); -#endif -// TODO: optimize here if the new value is of different kind compared to the old one -// in case it is not crazily bigger we can reuse the memory and avoid malloc and free - c_rfree((*bin)->value); + freez((*bin)->value); (*bin)->value_type = value_type; - (*bin)->value = c_rmalloc(get_itemtype_len(value_type, value)); - if ((*bin)->value == NULL) - return 1; + (*bin)->value = mallocz(get_itemtype_len(value_type, value)); memcpy((*bin)->value, value, get_itemtype_len(value_type, value)); return 0; } @@ -93,18 +66,18 @@ static int insert_into_bin(c_rhash_bin *bin, uint8_t key_type, const void *key, } if (*bin == NULL) - *bin = c_rcalloc(1, sizeof(struct bin_item)); + *bin = callocz(1, sizeof(struct bin_item)); if (prev != NULL) prev->next = *bin; (*bin)->key_type = key_type; size_t len = get_itemtype_len(key_type, key); - (*bin)->key = c_rmalloc(len); + (*bin)->key = mallocz(len); memcpy((*bin)->key, key, len); (*bin)->value_type = value_type; len = get_itemtype_len(value_type, value); - (*bin)->value = c_rmalloc(len); + (*bin)->value = mallocz(len); memcpy((*bin)->value, value, len); return 0; } @@ -121,33 +94,18 @@ static inline c_rhash_bin *get_binptr_by_str(c_rhash hash, const char *key) { int c_rhash_insert_str_ptr(c_rhash hash, const char *key, void *value) { c_rhash_bin *bin = get_binptr_by_str(hash, key); -#ifdef DEBUG_VERBOSE - if (bin != NULL) - printf("COLLISION. There will be more than one item in bin idx=%d\n", nhash); -#endif - return insert_into_bin(bin, ITEMTYPE_STRING, key, ITEMTYPE_OPAQUE_PTR, &value); } int c_rhash_insert_str_uint8(c_rhash hash, const char *key, uint8_t value) { c_rhash_bin *bin = get_binptr_by_str(hash, key); -#ifdef DEBUG_VERBOSE - if (bin != NULL) - printf("COLLISION. There will be more than one item in bin idx=%d\n", nhash); -#endif - return insert_into_bin(bin, ITEMTYPE_STRING, key, ITEMTYPE_UINT8, &value); } int c_rhash_insert_uint64_ptr(c_rhash hash, uint64_t key, void *value) { c_rhash_bin *bin = &hash->bins[key % hash->bin_count]; -#ifdef DEBUG_VERBOSE - if (bin != NULL) - printf("COLLISION. There will be more than one item in bin idx=%d\n", nhash); -#endif - return insert_into_bin(bin, ITEMTYPE_UINT64, &key, ITEMTYPE_OPAQUE_PTR, &value); } @@ -208,9 +166,9 @@ static void c_rhash_destroy_bin(c_rhash_bin bin) { struct bin_item *next; do { next = bin->next; - c_rfree(bin->key); - c_rfree(bin->value); - c_rfree(bin); + freez(bin->key); + freez(bin->value); + freez(bin); bin = next; } while (bin != NULL); } @@ -260,5 +218,5 @@ void c_rhash_destroy(c_rhash hash) { if (hash->bins[i] != NULL) c_rhash_destroy_bin(hash->bins[i]); } - c_rfree(hash); + freez(hash); } diff --git a/src/aclk/mqtt_websockets/c_rhash/c_rhash.h b/src/libnetdata/c_rhash/c_rhash.h similarity index 94% rename from src/aclk/mqtt_websockets/c_rhash/c_rhash.h rename to src/libnetdata/c_rhash/c_rhash.h index 37addd1616859b..990ef5432c37df 100644 --- a/src/aclk/mqtt_websockets/c_rhash/c_rhash.h +++ b/src/libnetdata/c_rhash/c_rhash.h @@ -1,8 +1,8 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later -#include -#include -#include +#ifndef C_RHASH_H +#define C_RHASH_H +#include "../libnetdata.h" #ifndef DEFAULT_BIN_COUNT #define DEFAULT_BIN_COUNT 1000 @@ -59,3 +59,5 @@ typedef struct { int c_rhash_iter_uint64_keys(c_rhash hash, c_rhash_iter_t *iter, uint64_t *key); int c_rhash_iter_str_keys(c_rhash hash, c_rhash_iter_t *iter, const char **key); + +#endif diff --git a/src/aclk/mqtt_websockets/c_rhash/c_rhash_internal.h b/src/libnetdata/c_rhash/c_rhash_internal.h similarity index 83% rename from src/aclk/mqtt_websockets/c_rhash/c_rhash_internal.h rename to src/libnetdata/c_rhash/c_rhash_internal.h index 20f741076bd634..c5800310c7cbff 100644 --- a/src/aclk/mqtt_websockets/c_rhash/c_rhash_internal.h +++ b/src/libnetdata/c_rhash/c_rhash_internal.h @@ -1,4 +1,4 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later #include "c_rhash.h" diff --git a/src/aclk/mqtt_websockets/c_rhash/tests.c b/src/libnetdata/c_rhash/tests.c similarity index 99% rename from src/aclk/mqtt_websockets/c_rhash/tests.c rename to src/libnetdata/c_rhash/tests.c index 909c5562dbda1b..3caa7d003662d6 100644 --- a/src/aclk/mqtt_websockets/c_rhash/tests.c +++ b/src/libnetdata/c_rhash/tests.c @@ -1,4 +1,4 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later #include #include diff --git a/src/libnetdata/functions_evloop/functions_evloop.h b/src/libnetdata/functions_evloop/functions_evloop.h index 1519f2042d240e..fd874e837cb22d 100644 --- a/src/libnetdata/functions_evloop/functions_evloop.h +++ b/src/libnetdata/functions_evloop/functions_evloop.h @@ -75,6 +75,10 @@ #define PLUGINSD_KEYWORD_NODE_ID "NODE_ID" #define PLUGINSD_KEYWORD_CLAIMED_ID "CLAIMED_ID" +#define PLUGINSD_KEYWORD_JSON "JSON" +#define PLUGINSD_KEYWORD_JSON_END "JSON_PAYLOAD_END" +#define PLUGINSD_KEYWORD_STREAM_PATH "STREAM_PATH" + typedef void (*functions_evloop_worker_execute_t)(const char *transaction, char *function, usec_t *stop_monotonic_ut, bool *cancelled, BUFFER *payload, HTTP_ACCESS access, const char *source, void *data); diff --git a/src/libnetdata/json/json-c-parser-inline.h b/src/libnetdata/json/json-c-parser-inline.h index bb60a9538302d0..e51cb232e77260 100644 --- a/src/libnetdata/json/json-c-parser-inline.h +++ b/src/libnetdata/json/json-c-parser-inline.h @@ -39,13 +39,26 @@ #define JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, member, dst, error, required) do { \ json_object *_j; \ - if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ - if(uuid_parse(json_object_get_string(_j), dst) != 0 && required) { \ - buffer_sprintf(error, "invalid UUID '%s.%s'", path, member); \ + if (json_object_object_get_ex(jobj, member, &_j)) { \ + if (json_object_is_type(_j, json_type_string)) { \ + if (uuid_parse(json_object_get_string(_j), dst) != 0) { \ + if(required) { \ + buffer_sprintf(error, "invalid UUID '%s.%s'", path, member); \ + return false; \ + } \ + else \ + uuid_clear(dst); \ + } \ + } \ + else if (json_object_is_type(_j, json_type_null)) { \ + uuid_clear(dst); \ + } \ + else if (required) { \ + buffer_sprintf(error, "expected UUID or null '%s.%s'", path, member); \ return false; \ } \ } \ - else if(required) { \ + else if (required) { \ buffer_sprintf(error, "missing UUID '%s.%s'", path, member); \ return false; \ } \ @@ -137,7 +150,6 @@ } \ } while(0) - #define JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, member, converter, dst, error, required) do { \ json_object *_j; \ if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) \ diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h index 6ff72bae1c8c30..a7418d53401aa5 100644 --- a/src/libnetdata/libnetdata.h +++ b/src/libnetdata/libnetdata.h @@ -451,6 +451,8 @@ extern char *netdata_configured_host_prefix; #include "july/july.h" #include "threads/threads.h" #include "buffer/buffer.h" +#include "ringbuffer/ringbuffer.h" +#include "c_rhash/c_rhash.h" #include "locks/locks.h" #include "circular_buffer/circular_buffer.h" #include "avl/avl.h" diff --git a/src/aclk/mqtt_websockets/c-rbuf/cringbuffer.c b/src/libnetdata/ringbuffer/ringbuffer.c similarity index 86% rename from src/aclk/mqtt_websockets/c-rbuf/cringbuffer.c rename to src/libnetdata/ringbuffer/ringbuffer.c index 8950c690678151..5a3523962e1d8e 100644 --- a/src/aclk/mqtt_websockets/c-rbuf/cringbuffer.c +++ b/src/libnetdata/ringbuffer/ringbuffer.c @@ -1,33 +1,17 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later -#include "cringbuffer.h" -#include "cringbuffer_internal.h" - -#include -#include -#include - -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) - -// this allows user to use their own -// custom memory allocation functions -#ifdef RBUF_CUSTOM_MALLOC -#include "../../helpers/ringbuffer_pal.h" -#else -#define crbuf_malloc(...) malloc(__VA_ARGS__) -#define crbuf_free(...) free(__VA_ARGS__) -#endif +#include "../libnetdata.h" +#include "ringbuffer_internal.h" rbuf_t rbuf_create(size_t size) { - rbuf_t buffer = crbuf_malloc(sizeof(struct rbuf_t) + size); + rbuf_t buffer = mallocz(sizeof(struct rbuf) + size); if (!buffer) return NULL; - memset(buffer, 0, sizeof(struct rbuf_t)); + memset(buffer, 0, sizeof(struct rbuf)); - buffer->data = ((char*)buffer) + sizeof(struct rbuf_t); + buffer->data = ((char*)buffer) + sizeof(struct rbuf); buffer->head = buffer->data; buffer->tail = buffer->data; @@ -39,7 +23,7 @@ rbuf_t rbuf_create(size_t size) void rbuf_free(rbuf_t buffer) { - crbuf_free(buffer); + freez(buffer); } void rbuf_flush(rbuf_t buffer) @@ -81,6 +65,17 @@ int rbuf_bump_head(rbuf_t buffer, size_t bytes) return 1; } +int rbuf_bump_tail_noopt(rbuf_t buffer, size_t bytes) +{ + if (bytes > buffer->size_data) + return 0; + int i = buffer->tail - buffer->data; + buffer->tail = &buffer->data[(i + bytes) % buffer->size]; + buffer->size_data -= bytes; + + return 1; +} + int rbuf_bump_tail(rbuf_t buffer, size_t bytes) { if(!rbuf_bump_tail_noopt(buffer, bytes)) diff --git a/src/aclk/mqtt_websockets/c-rbuf/cringbuffer.h b/src/libnetdata/ringbuffer/ringbuffer.h similarity index 90% rename from src/aclk/mqtt_websockets/c-rbuf/cringbuffer.h rename to src/libnetdata/ringbuffer/ringbuffer.h index eb98035a96dd75..340112a8f7c82d 100644 --- a/src/aclk/mqtt_websockets/c-rbuf/cringbuffer.h +++ b/src/libnetdata/ringbuffer/ringbuffer.h @@ -1,11 +1,10 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only +// SPDX-License-Identifier: GPL-3.0-or-later -#ifndef CRINGBUFFER_H -#define CRINGBUFFER_H +#ifndef RINGBUFFER_H +#define RINGBUFFER_H +#include "../libnetdata.h" -#include - -typedef struct rbuf_t *rbuf_t; +typedef struct rbuf *rbuf_t; rbuf_t rbuf_create(size_t size); void rbuf_free(rbuf_t buffer); diff --git a/src/libnetdata/ringbuffer/ringbuffer_internal.h b/src/libnetdata/ringbuffer/ringbuffer_internal.h new file mode 100644 index 00000000000000..0cc254aa851627 --- /dev/null +++ b/src/libnetdata/ringbuffer/ringbuffer_internal.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef RINGBUFFER_INTERNAL_H +#define RINGBUFFER_INTERNAL_H + +#include "ringbuffer.h" + +struct rbuf { + char *data; + + // points to next byte where we can write + char *head; + // points to oldest (next to be poped) readable byte + char *tail; + + // to avoid calculating data + size + // all the time + char *end; + + size_t size; + size_t size_data; +}; + +typedef struct rbuf *rbuf_t; + +#endif diff --git a/src/libnetdata/template-enum.h b/src/libnetdata/template-enum.h index 393a6a945e97c4..82487336ac7612 100644 --- a/src/libnetdata/template-enum.h +++ b/src/libnetdata/template-enum.h @@ -37,4 +37,36 @@ return def_str; \ } +// -------------------------------------------------------------------------------------------------------------------- + +#define BITMAP_STR_DEFINE_FUNCTIONS_EXTERN(type) \ + type type ## _2id_one(const char *str); \ + const char *type##_2json(BUFFER *wb, const char *key, type id); + +#define BITMAP_STR_DEFINE_FUNCTIONS(type, def, def_str) \ + type type##_2id_one(const char *str) \ + { \ + if (!str || !*str) \ + return def; \ + \ + for (size_t i = 0; type ## _names[i].name; i++) { \ + if (strcmp(type ## _names[i].name, str) == 0) \ + return type ## _names[i].id; \ + } \ + \ + return def; \ + } \ + \ + const char *type##_2json(BUFFER *wb, const char *key, type id) \ + { \ + buffer_json_member_add_array(wb, key); \ + for (size_t i = 0; type ## _names[i].name; i++) { \ + if ((id & type ## _names[i].id) == type ## _names[i].id) \ + buffer_json_add_array_item_string(wb, type ## _names[i].name); \ + } \ + buffer_json_array_close(wb); \ + \ + return def_str; \ + } + #endif //NETDATA_TEMPLATE_ENUM_H diff --git a/src/libnetdata/uuid/uuid.h b/src/libnetdata/uuid/uuid.h index 91d2ad56f5a925..5fb1bce68c6715 100644 --- a/src/libnetdata/uuid/uuid.h +++ b/src/libnetdata/uuid/uuid.h @@ -37,7 +37,7 @@ ND_UUID UUID_generate_from_hash(const void *payload, size_t payload_len); #define UUIDeq(a, b) ((a).parts.hig64 == (b).parts.hig64 && (a).parts.low64 == (b).parts.low64) -#define UUIDiszero(a) (UUIDeq(a, UUID_ZERO)) +#define UUIDiszero(a) ((a).parts.hig64 == 0 && (a).parts.low64 == 0) static inline ND_UUID uuid2UUID(const nd_uuid_t uu1) { // uu1 may not be aligned, so copy it to the output diff --git a/src/registry/registry.c b/src/registry/registry.c index bf303a69ef800a..aab81afc5f8f6e 100644 --- a/src/registry/registry.c +++ b/src/registry/registry.c @@ -164,15 +164,15 @@ void registry_update_cloud_base_url() { int registry_request_hello_json(RRDHOST *host, struct web_client *w, bool do_not_track) { registry_json_header(host, w, "hello", REGISTRY_STATUS_OK); - if(!uuid_is_null(host->node_id)) - buffer_json_member_add_uuid(w->response.data, "node_id", host->node_id); + if(!UUIDiszero(host->node_id)) + buffer_json_member_add_uuid(w->response.data, "node_id", host->node_id.uuid); buffer_json_member_add_object(w->response.data, "agent"); { buffer_json_member_add_string(w->response.data, "machine_guid", localhost->machine_guid); - if(!uuid_is_null(localhost->node_id)) - buffer_json_member_add_uuid(w->response.data, "node_id", localhost->node_id); + if(!UUIDiszero(localhost->node_id)) + buffer_json_member_add_uuid(w->response.data, "node_id", localhost->node_id.uuid); CLAIM_ID claim_id = claim_id_get(); if (claim_id_is_set(claim_id)) @@ -196,8 +196,8 @@ int registry_request_hello_json(RRDHOST *host, struct web_client *w, bool do_not buffer_json_add_array_item_object(w->response.data); buffer_json_member_add_string(w->response.data, "machine_guid", h->machine_guid); - if(!uuid_is_null(h->node_id)) - buffer_json_member_add_uuid(w->response.data, "node_id", h->node_id); + if(!UUIDiszero(h->node_id)) + buffer_json_member_add_uuid(w->response.data, "node_id", h->node_id.uuid); buffer_json_member_add_string(w->response.data, "hostname", rrdhost_registry_hostname(h)); buffer_json_object_close(w->response.data); diff --git a/src/streaming/protocol/command-nodeid.c b/src/streaming/protocol/command-nodeid.c index bf003b6746d761..88d0e15b3dcd63 100644 --- a/src/streaming/protocol/command-nodeid.c +++ b/src/streaming/protocol/command-nodeid.c @@ -4,18 +4,18 @@ #include "collectors/plugins.d/pluginsd_internals.h" // the child disconnected from the parent, and it has to clear the parent's claim id -void rrdpush_sender_clear_child_claim_id(RRDHOST *host) { +void rrdpush_sender_clear_parent_claim_id(RRDHOST *host) { host->aclk.claim_id_of_parent = UUID_ZERO; } // the parent sends to the child its claim id, node id and cloud url void rrdpush_receiver_send_node_and_claim_id_to_child(RRDHOST *host) { - if(host == localhost || uuid_is_null(host->node_id)) return; + if(host == localhost || UUIDiszero(host->node_id)) return; spinlock_lock(&host->receiver_lock); if(host->receiver && stream_has_capability(host->receiver, STREAM_CAP_NODE_ID)) { char node_id_str[UUID_STR_LEN] = ""; - uuid_unparse_lower(host->node_id, node_id_str); + uuid_unparse_lower(host->node_id.uuid, node_id_str); CLAIM_ID claim_id = claim_id_get(); @@ -39,46 +39,52 @@ void rrdpush_receiver_send_node_and_claim_id_to_child(RRDHOST *host) { // the sender of the child receives node id, claim id and cloud url from the receiver of the parent void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { - char *claim_id = get_word(s->line.words, s->line.num_words, 1); - char *node_id = get_word(s->line.words, s->line.num_words, 2); + char *claim_id_str = get_word(s->line.words, s->line.num_words, 1); + char *node_id_str = get_word(s->line.words, s->line.num_words, 2); char *url = get_word(s->line.words, s->line.num_words, 3); bool claimed = is_agent_claimed(); + bool update_node_id = false; - ND_UUID claim_uuid; - if (uuid_parse(claim_id ? claim_id : "", claim_uuid.uuid) != 0) { + ND_UUID claim_id; + if (uuid_parse(claim_id_str ? claim_id_str : "", claim_id.uuid) != 0) { nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM %s [send to %s] received invalid claim id '%s'", rrdhost_hostname(s->host), s->connected_to, - claim_id ? claim_id : "(unset)"); + claim_id_str ? claim_id_str : "(unset)"); return; } - ND_UUID node_uuid; - if(uuid_parse(node_id ? node_id : "", node_uuid.uuid) != 0) { + ND_UUID node_id; + if(uuid_parse(node_id_str ? node_id_str : "", node_id.uuid) != 0) { nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM %s [send to %s] received an invalid node id '%s'", rrdhost_hostname(s->host), s->connected_to, - node_id ? node_id : "(unset)"); + node_id_str ? node_id_str : "(unset)"); return; } - if (!UUIDiszero(s->host->aclk.claim_id_of_parent) && !UUIDeq(s->host->aclk.claim_id_of_parent, claim_uuid)) + if (!UUIDiszero(s->host->aclk.claim_id_of_parent) && !UUIDeq(s->host->aclk.claim_id_of_parent, claim_id)) nd_log(NDLS_DAEMON, NDLP_INFO, "STREAM %s [send to %s] changed parent's claim id to %s", - rrdhost_hostname(s->host), s->connected_to, claim_id ? claim_id : "(unset)"); + rrdhost_hostname(s->host), s->connected_to, + claim_id_str ? claim_id_str : "(unset)"); - if(!uuid_is_null(s->host->node_id) && uuid_compare(s->host->node_id, node_uuid.uuid) != 0) { + if(!UUIDiszero(s->host->node_id) && !UUIDeq(s->host->node_id, node_id)) { if(claimed) { nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM %s [send to %s] parent reports different node id '%s', but we are claimed. Ignoring it.", - rrdhost_hostname(s->host), s->connected_to, node_id ? node_id : "(unset)"); + rrdhost_hostname(s->host), s->connected_to, + node_id_str ? node_id_str : "(unset)"); return; } - else + else { + update_node_id = true; nd_log(NDLS_DAEMON, NDLP_WARNING, "STREAM %s [send to %s] changed node id to %s", - rrdhost_hostname(s->host), s->connected_to, node_id ? node_id : "(unset)"); + rrdhost_hostname(s->host), s->connected_to, + node_id_str ? node_id_str : "(unset)"); + } } if(!url || !*url) { @@ -89,7 +95,7 @@ void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { return; } - s->host->aclk.claim_id_of_parent = claim_uuid; + s->host->aclk.claim_id_of_parent = claim_id; // There are some very strange corner cases here: // @@ -105,12 +111,18 @@ void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { // we are directly claimed and connected, ignore node id and cloud url return; - if(uuid_is_null(s->host->node_id)) - uuid_copy(s->host->node_id, node_uuid.uuid); + bool node_id_updated = false; + if(UUIDiszero(s->host->node_id) || update_node_id) { + s->host->node_id = node_id; + node_id_updated = true; + } // we change the URL, to allow the agent dashboard to work with Netdata Cloud on-prem, if any. cloud_config_url_set(url); // send it down the line (to children) rrdpush_receiver_send_node_and_claim_id_to_child(s->host); + + if(node_id_updated) + stream_path_node_id_updated(s->host); } diff --git a/src/streaming/protocol/commands.h b/src/streaming/protocol/commands.h index a75713755f2231..4f5ca7875a4a87 100644 --- a/src/streaming/protocol/commands.h +++ b/src/streaming/protocol/commands.h @@ -7,7 +7,7 @@ void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s); void rrdpush_receiver_send_node_and_claim_id_to_child(RRDHOST *host); -void rrdpush_sender_clear_child_claim_id(RRDHOST *host); +void rrdpush_sender_clear_parent_claim_id(RRDHOST *host); void rrdpush_sender_send_claimed_id(RRDHOST *host); diff --git a/src/streaming/receiver.c b/src/streaming/receiver.c index ecb23af6310cb4..6a7a37d9feb0b9 100644 --- a/src/streaming/receiver.c +++ b/src/streaming/receiver.c @@ -460,44 +460,50 @@ static bool rrdhost_set_receiver(RRDHOST *host, struct receiver_state *rpt) { static void rrdhost_clear_receiver(struct receiver_state *rpt) { RRDHOST *host = rpt->host; - if(host) { - bool signal_rrdcontext = false; - spinlock_lock(&host->receiver_lock); + if(!host) return; + spinlock_lock(&host->receiver_lock); + { // Make sure that we detach this thread and don't kill a freshly arriving receiver - if(host->receiver == rpt) { + + if (host->receiver == rpt) { + spinlock_unlock(&host->receiver_lock); + { + // run all these without having the receiver lock + + stream_path_child_disconnected(host); + rrdpush_sender_thread_stop(host, STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, false); + rrdpush_receiver_replication_reset(host); + rrdcontext_host_child_disconnected(host); + + if (rpt->config.health_enabled) + rrdcalc_child_disconnected(host); + + rrdpush_reset_destinations_postpone_time(host); + } + spinlock_lock(&host->receiver_lock); + + // now we have the lock again + __atomic_sub_fetch(&localhost->connected_children_count, 1, __ATOMIC_RELAXED); rrdhost_flag_set(rpt->host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED); - pluginsd_process_cleanup(rpt->parser); - __atomic_store_n(&rpt->parser, NULL, __ATOMIC_RELAXED); - host->trigger_chart_obsoletion_check = 0; host->child_connect_time = 0; host->child_disconnected_time = now_realtime_sec(); - host->health.health_enabled = 0; - rrdpush_sender_thread_stop(host, STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, false); - - signal_rrdcontext = true; - rrdpush_receiver_replication_reset(host); - + host->rrdpush_last_receiver_exit_reason = rpt->exit.reason; rrdhost_flag_set(host, RRDHOST_FLAG_ORPHAN); host->receiver = NULL; - host->rrdpush_last_receiver_exit_reason = rpt->exit.reason; - - if(rpt->config.health_enabled) - rrdcalc_child_disconnected(host); } + } - spinlock_unlock(&host->receiver_lock); - - if(signal_rrdcontext) - rrdcontext_host_child_disconnected(host); + // this must be cleared with the receiver lock + pluginsd_process_cleanup(rpt->parser); + __atomic_store_n(&rpt->parser, NULL, __ATOMIC_RELAXED); - rrdpush_reset_destinations_postpone_time(host); - } + spinlock_unlock(&host->receiver_lock); } bool stop_streaming_receiver(RRDHOST *host, STREAM_HANDSHAKE reason) { @@ -857,21 +863,6 @@ static void rrdpush_receive(struct receiver_state *rpt) ; } -static void rrdpush_receiver_thread_cleanup(void *pptr) { - struct receiver_state *rpt = CLEANUP_FUNCTION_GET_PTR(pptr); - if(!rpt) return; - - netdata_log_info("STREAM '%s' [receive from [%s]:%s]: " - "receive thread ended (task id %d)" - , rpt->hostname ? rpt->hostname : "-" - , rpt->client_ip ? rpt->client_ip : "-", rpt->client_port ? rpt->client_port : "-", gettid_cached()); - - worker_unregister(); - rrdhost_clear_receiver(rpt); - receiver_state_free(rpt); - rrdhost_set_is_parent_label(); -} - static bool stream_receiver_log_capabilities(BUFFER *wb, void *ptr) { struct receiver_state *rpt = ptr; if(!rpt) @@ -891,7 +882,6 @@ static bool stream_receiver_log_transport(BUFFER *wb, void *ptr) { } void *rrdpush_receiver_thread(void *ptr) { - CLEANUP_FUNCTION_REGISTER(rrdpush_receiver_thread_cleanup) cleanup_ptr = ptr; worker_register("STREAMRCV"); worker_register_job_custom_metric(WORKER_RECEIVER_JOB_BYTES_READ, @@ -923,5 +913,15 @@ void *rrdpush_receiver_thread(void *ptr) { , rpt->client_port); rrdpush_receive(rpt); + + netdata_log_info("STREAM '%s' [receive from [%s]:%s]: " + "receive thread ended (task id %d)" + , rpt->hostname ? rpt->hostname : "-" + , rpt->client_ip ? rpt->client_ip : "-", rpt->client_port ? rpt->client_port : "-", gettid_cached()); + + worker_unregister(); + rrdhost_clear_receiver(rpt); + receiver_state_free(rpt); + rrdhost_set_is_parent_label(); return NULL; } diff --git a/src/streaming/rrdpush.c b/src/streaming/rrdpush.c index 6b0cc9ebe11845..7a5d6f73084aba 100644 --- a/src/streaming/rrdpush.c +++ b/src/streaming/rrdpush.c @@ -39,7 +39,6 @@ struct config stream_config = { }; unsigned int default_rrdpush_enabled = 0; -STREAM_CAPABILITIES globally_disabled_capabilities = STREAM_CAP_NONE; unsigned int default_rrdpush_compression_enabled = 1; char *default_rrdpush_destination = NULL; @@ -801,6 +800,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *decoded_query_stri return rrdpush_receiver_too_busy_now(w); struct receiver_state *rpt = callocz(1, sizeof(*rpt)); + rpt->connected_since_s = now_realtime_sec(); rpt->last_msg_t = now_monotonic_sec(); rpt->hops = 1; @@ -1265,147 +1265,3 @@ const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error) { return "UNKNOWN"; } - -static struct { - STREAM_CAPABILITIES cap; - const char *str; -} capability_names[] = { - {STREAM_CAP_V1, "V1" }, - {STREAM_CAP_V2, "V2" }, - {STREAM_CAP_VN, "VN" }, - {STREAM_CAP_VCAPS, "VCAPS" }, - {STREAM_CAP_HLABELS, "HLABELS" }, - {STREAM_CAP_CLAIM, "CLAIM" }, - {STREAM_CAP_CLABELS, "CLABELS" }, - {STREAM_CAP_LZ4, "LZ4" }, - {STREAM_CAP_FUNCTIONS, "FUNCTIONS" }, - {STREAM_CAP_REPLICATION, "REPLICATION" }, - {STREAM_CAP_BINARY, "BINARY" }, - {STREAM_CAP_INTERPOLATED, "INTERPOLATED" }, - {STREAM_CAP_IEEE754, "IEEE754" }, - {STREAM_CAP_DATA_WITH_ML, "ML" }, - {STREAM_CAP_DYNCFG, "DYNCFG" }, - {STREAM_CAP_NODE_ID, "NODEID" }, - {STREAM_CAP_SLOTS, "SLOTS" }, - {STREAM_CAP_ZSTD, "ZSTD" }, - {STREAM_CAP_GZIP, "GZIP" }, - {STREAM_CAP_BROTLI, "BROTLI" }, - {STREAM_CAP_PROGRESS, "PROGRESS" }, - {0 , NULL }, -}; - -void stream_capabilities_to_string(BUFFER *wb, STREAM_CAPABILITIES caps) { - for(size_t i = 0; capability_names[i].str ; i++) { - if(caps & capability_names[i].cap) { - buffer_strcat(wb, capability_names[i].str); - buffer_strcat(wb, " "); - } - } -} - -void stream_capabilities_to_json_array(BUFFER *wb, STREAM_CAPABILITIES caps, const char *key) { - if(key) - buffer_json_member_add_array(wb, key); - else - buffer_json_add_array_item_array(wb); - - for(size_t i = 0; capability_names[i].str ; i++) { - if(caps & capability_names[i].cap) - buffer_json_add_array_item_string(wb, capability_names[i].str); - } - - buffer_json_array_close(wb); -} - -void log_receiver_capabilities(struct receiver_state *rpt) { - BUFFER *wb = buffer_create(100, NULL); - stream_capabilities_to_string(wb, rpt->capabilities); - - nd_log_daemon(NDLP_INFO, "STREAM %s [receive from [%s]:%s]: established link with negotiated capabilities: %s", - rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, buffer_tostring(wb)); - - buffer_free(wb); -} - -void log_sender_capabilities(struct sender_state *s) { - BUFFER *wb = buffer_create(100, NULL); - stream_capabilities_to_string(wb, s->capabilities); - - nd_log_daemon(NDLP_INFO, "STREAM %s [send to %s]: established link with negotiated capabilities: %s", - rrdhost_hostname(s->host), s->connected_to, buffer_tostring(wb)); - - buffer_free(wb); -} - -STREAM_CAPABILITIES stream_our_capabilities(RRDHOST *host, bool sender) { - STREAM_CAPABILITIES disabled_capabilities = globally_disabled_capabilities; - - if(host && sender) { - // we have DATA_WITH_ML capability - // we should remove the DATA_WITH_ML capability if our database does not have anomaly info - // this can happen under these conditions: 1. we don't run ML, and 2. we don't receive ML - spinlock_lock(&host->receiver_lock); - - if(!ml_host_running(host) && !stream_has_capability(host->receiver, STREAM_CAP_DATA_WITH_ML)) - disabled_capabilities |= STREAM_CAP_DATA_WITH_ML; - - spinlock_unlock(&host->receiver_lock); - - if(host->sender) - disabled_capabilities |= host->sender->disabled_capabilities; - } - - return (STREAM_CAP_V1 | - STREAM_CAP_V2 | - STREAM_CAP_VN | - STREAM_CAP_VCAPS | - STREAM_CAP_HLABELS | - STREAM_CAP_CLAIM | - STREAM_CAP_CLABELS | - STREAM_CAP_FUNCTIONS | - STREAM_CAP_REPLICATION | - STREAM_CAP_BINARY | - STREAM_CAP_INTERPOLATED | - STREAM_CAP_SLOTS | - STREAM_CAP_PROGRESS | - STREAM_CAP_COMPRESSIONS_AVAILABLE | - STREAM_CAP_DYNCFG | - STREAM_CAP_NODE_ID | - STREAM_CAP_IEEE754 | - STREAM_CAP_DATA_WITH_ML | - 0) & ~disabled_capabilities; -} - -STREAM_CAPABILITIES convert_stream_version_to_capabilities(int32_t version, RRDHOST *host, bool sender) { - STREAM_CAPABILITIES caps = 0; - - if(version <= 1) caps = STREAM_CAP_V1; - else if(version < STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_V2 | STREAM_CAP_HLABELS; - else if(version <= STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM; - else if(version <= STREAM_OLD_VERSION_CLABELS) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS; - else if(version <= STREAM_OLD_VERSION_LZ4) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS | STREAM_CAP_LZ4_AVAILABLE; - else caps = version; - - if(caps & STREAM_CAP_VCAPS) - caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2|STREAM_CAP_VN); - - if(caps & STREAM_CAP_VN) - caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2); - - if(caps & STREAM_CAP_V2) - caps &= ~(STREAM_CAP_V1); - - STREAM_CAPABILITIES common_caps = caps & stream_our_capabilities(host, sender); - - if(!(common_caps & STREAM_CAP_INTERPOLATED)) - // DATA WITH ML requires INTERPOLATED - common_caps &= ~STREAM_CAP_DATA_WITH_ML; - - return common_caps; -} - -int32_t stream_capabilities_to_vn(uint32_t caps) { - if(caps & STREAM_CAP_LZ4) return STREAM_OLD_VERSION_LZ4; - if(caps & STREAM_CAP_CLABELS) return STREAM_OLD_VERSION_CLABELS; - return STREAM_OLD_VERSION_CLAIM; // if(caps & STREAM_CAP_CLAIM) -} diff --git a/src/streaming/rrdpush.h b/src/streaming/rrdpush.h index e0d61dfcc99b91..17d3aadd9efb2c 100644 --- a/src/streaming/rrdpush.h +++ b/src/streaming/rrdpush.h @@ -8,90 +8,12 @@ #include "web/server/web_client.h" #include "database/rrdfunctions.h" #include "database/rrd.h" +#include "stream_capabilities.h" #define CONNECTED_TO_SIZE 100 #define CBUFFER_INITIAL_SIZE (16 * 1024) #define THREAD_BUFFER_INITIAL_SIZE (CBUFFER_INITIAL_SIZE / 2) -// ---------------------------------------------------------------------------- -// obsolete versions - do not use anymore - -#define STREAM_OLD_VERSION_CLAIM 3 -#define STREAM_OLD_VERSION_CLABELS 4 -#define STREAM_OLD_VERSION_LZ4 5 - -// ---------------------------------------------------------------------------- -// capabilities negotiation - -typedef enum { - STREAM_CAP_NONE = 0, - - // do not use the first 3 bits - // they used to be versions 1, 2 and 3 - // before we introduce capabilities - - STREAM_CAP_V1 = (1 << 3), // v1 = the oldest protocol - STREAM_CAP_V2 = (1 << 4), // v2 = the second version of the protocol (with host labels) - STREAM_CAP_VN = (1 << 5), // version negotiation supported (for versions 3, 4, 5 of the protocol) - // v3 = claiming supported - // v4 = chart labels supported - // v5 = lz4 compression supported - STREAM_CAP_VCAPS = (1 << 6), // capabilities negotiation supported - STREAM_CAP_HLABELS = (1 << 7), // host labels supported - STREAM_CAP_CLAIM = (1 << 8), // claiming supported - STREAM_CAP_CLABELS = (1 << 9), // chart labels supported - STREAM_CAP_LZ4 = (1 << 10), // lz4 compression supported - STREAM_CAP_FUNCTIONS = (1 << 11), // plugin functions supported - STREAM_CAP_REPLICATION = (1 << 12), // replication supported - STREAM_CAP_BINARY = (1 << 13), // streaming supports binary data - STREAM_CAP_INTERPOLATED = (1 << 14), // streaming supports interpolated streaming of values - STREAM_CAP_IEEE754 = (1 << 15), // streaming supports binary/hex transfer of double values - STREAM_CAP_DATA_WITH_ML = (1 << 16), // streaming supports transferring anomaly bit - // STREAM_CAP_DYNCFG = (1 << 17), // leave this unused for as long as possible - STREAM_CAP_SLOTS = (1 << 18), // the sender can appoint a unique slot for each chart - STREAM_CAP_ZSTD = (1 << 19), // ZSTD compression supported - STREAM_CAP_GZIP = (1 << 20), // GZIP compression supported - STREAM_CAP_BROTLI = (1 << 21), // BROTLI compression supported - STREAM_CAP_PROGRESS = (1 << 22), // Functions PROGRESS support - STREAM_CAP_DYNCFG = (1 << 23), // support for DYNCFG - STREAM_CAP_NODE_ID = (1 << 24), // support for sending NODE_ID back to the child - - STREAM_CAP_INVALID = (1 << 30), // used as an invalid value for capabilities when this is set - // this must be signed int, so don't use the last bit - // needed for negotiating errors between parent and child -} STREAM_CAPABILITIES; - -#ifdef ENABLE_LZ4 -#define STREAM_CAP_LZ4_AVAILABLE STREAM_CAP_LZ4 -#else -#define STREAM_CAP_LZ4_AVAILABLE 0 -#endif // ENABLE_LZ4 - -#ifdef ENABLE_ZSTD -#define STREAM_CAP_ZSTD_AVAILABLE STREAM_CAP_ZSTD -#else -#define STREAM_CAP_ZSTD_AVAILABLE 0 -#endif // ENABLE_ZSTD - -#ifdef ENABLE_BROTLI -#define STREAM_CAP_BROTLI_AVAILABLE STREAM_CAP_BROTLI -#else -#define STREAM_CAP_BROTLI_AVAILABLE 0 -#endif // ENABLE_BROTLI - -#define STREAM_CAP_COMPRESSIONS_AVAILABLE (STREAM_CAP_LZ4_AVAILABLE|STREAM_CAP_ZSTD_AVAILABLE|STREAM_CAP_BROTLI_AVAILABLE|STREAM_CAP_GZIP) - -extern STREAM_CAPABILITIES globally_disabled_capabilities; - -STREAM_CAPABILITIES stream_our_capabilities(RRDHOST *host, bool sender); - -#define stream_has_capability(rpt, capability) ((rpt) && ((rpt)->capabilities & (capability)) == (capability)) - -static inline bool stream_has_more_than_one_capability_of(STREAM_CAPABILITIES caps, STREAM_CAPABILITIES mask) { - STREAM_CAPABILITIES common = (STREAM_CAPABILITIES)(caps & mask); - return (common & (common - 1)) != 0 && common != 0; -} - // ---------------------------------------------------------------------------- // stream handshake @@ -200,6 +122,9 @@ typedef enum __attribute__((packed)) { SENDER_FLAG_OVERFLOW = (1 << 0), // The buffer has been overflown } SENDER_FLAGS; +typedef void (*rrdpush_defer_action_t)(struct sender_state *s, void *data); +typedef void (*rrdpush_defer_cleanup_t)(struct sender_state *s, void *data); + struct sender_state { RRDHOST *host; pid_t tid; // the thread id of the sender, from gettid_cached() @@ -267,14 +192,12 @@ struct sender_state { } atomic; struct { - bool intercept_input; - const char *transaction; - const char *timeout_s; - const char *function; - const char *access; - const char *source; + const char *end_keyword; BUFFER *payload; - } functions; + rrdpush_defer_action_t action; + rrdpush_defer_cleanup_t cleanup; + void *action_data; + } defer; int parent_using_h2o; }; @@ -355,6 +278,7 @@ struct receiver_state { struct rrdhost_system_info *system_info; STREAM_CAPABILITIES capabilities; time_t last_msg_t; + time_t connected_since_s; struct buffered_reader reader; @@ -479,13 +403,7 @@ void rrdpush_signal_sender_to_wake_up(struct sender_state *s); void rrdpush_reset_destinations_postpone_time(RRDHOST *host); const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error); -void stream_capabilities_to_json_array(BUFFER *wb, STREAM_CAPABILITIES caps, const char *key); void rrdpush_receive_log_status(struct receiver_state *rpt, const char *msg, const char *status, ND_LOG_FIELD_PRIORITY priority); -void log_receiver_capabilities(struct receiver_state *rpt); -void log_sender_capabilities(struct sender_state *s); -STREAM_CAPABILITIES convert_stream_version_to_capabilities(int32_t version, RRDHOST *host, bool sender); -int32_t stream_capabilities_to_vn(uint32_t caps); -void stream_capabilities_to_string(BUFFER *wb, STREAM_CAPABILITIES caps); void receiver_state_free(struct receiver_state *rpt); bool stop_streaming_receiver(RRDHOST *host, STREAM_HANDSHAKE reason); @@ -763,5 +681,6 @@ void rrdpush_select_receiver_compression_algorithm(struct receiver_state *rpt); void rrdpush_compression_deactivate(struct sender_state *s); #include "protocol/commands.h" +#include "stream_path.h" #endif //NETDATA_RRDPUSH_H diff --git a/src/streaming/sender.c b/src/streaming/sender.c index e55c0f80f91738..31b2e8b863913a 100644 --- a/src/streaming/sender.c +++ b/src/streaming/sender.c @@ -1,209 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "rrdpush.h" -#include "common.h" -#include "aclk/https_client.h" - -#define WORKER_SENDER_JOB_CONNECT 0 -#define WORKER_SENDER_JOB_PIPE_READ 1 -#define WORKER_SENDER_JOB_SOCKET_RECEIVE 2 -#define WORKER_SENDER_JOB_EXECUTE 3 -#define WORKER_SENDER_JOB_SOCKET_SEND 4 -#define WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE 5 -#define WORKER_SENDER_JOB_DISCONNECT_OVERFLOW 6 -#define WORKER_SENDER_JOB_DISCONNECT_TIMEOUT 7 -#define WORKER_SENDER_JOB_DISCONNECT_POLL_ERROR 8 -#define WORKER_SENDER_JOB_DISCONNECT_SOCKET_ERROR 9 -#define WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR 10 -#define WORKER_SENDER_JOB_DISCONNECT_PARENT_CLOSED 11 -#define WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR 12 -#define WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR 13 -#define WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION 14 -#define WORKER_SENDER_JOB_BUFFER_RATIO 15 -#define WORKER_SENDER_JOB_BYTES_RECEIVED 16 -#define WORKER_SENDER_JOB_BYTES_SENT 17 -#define WORKER_SENDER_JOB_BYTES_COMPRESSED 18 -#define WORKER_SENDER_JOB_BYTES_UNCOMPRESSED 19 -#define WORKER_SENDER_JOB_BYTES_COMPRESSION_RATIO 20 -#define WORKER_SENDER_JOB_REPLAY_REQUEST 21 -#define WORKER_SENDER_JOB_FUNCTION_REQUEST 22 -#define WORKER_SENDER_JOB_REPLAY_DICT_SIZE 23 -#define WORKER_SENDER_JOB_DISCONNECT_CANT_UPGRADE_CONNECTION 24 - -#if WORKER_UTILIZATION_MAX_JOB_TYPES < 25 -#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 25 -#endif - -extern struct config stream_config; -extern char *netdata_ssl_ca_path; -extern char *netdata_ssl_ca_file; - -static __thread BUFFER *sender_thread_buffer = NULL; -static __thread bool sender_thread_buffer_used = false; -static __thread time_t sender_thread_buffer_last_reset_s = 0; - -void sender_thread_buffer_free(void) { - buffer_free(sender_thread_buffer); - sender_thread_buffer = NULL; - sender_thread_buffer_used = false; -} - -// Collector thread starting a transmission -BUFFER *sender_start(struct sender_state *s) { - if(unlikely(sender_thread_buffer_used)) - fatal("STREAMING: thread buffer is used multiple times concurrently."); - - if(unlikely(rrdpush_sender_last_buffer_recreate_get(s) > sender_thread_buffer_last_reset_s)) { - if(unlikely(sender_thread_buffer && sender_thread_buffer->size > THREAD_BUFFER_INITIAL_SIZE)) { - buffer_free(sender_thread_buffer); - sender_thread_buffer = NULL; - } - } - - if(unlikely(!sender_thread_buffer)) { - sender_thread_buffer = buffer_create(THREAD_BUFFER_INITIAL_SIZE, &netdata_buffers_statistics.buffers_streaming); - sender_thread_buffer_last_reset_s = rrdpush_sender_last_buffer_recreate_get(s); - } - - sender_thread_buffer_used = true; - buffer_flush(sender_thread_buffer); - return sender_thread_buffer; -} - -static inline void rrdpush_sender_thread_close_socket(RRDHOST *host); - -#define SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE 3 - -// Collector thread finishing a transmission -void sender_commit(struct sender_state *s, BUFFER *wb, STREAM_TRAFFIC_TYPE type) { - - if(unlikely(wb != sender_thread_buffer)) - fatal("STREAMING: sender is trying to commit a buffer that is not this thread's buffer."); - - if(unlikely(!sender_thread_buffer_used)) - fatal("STREAMING: sender is committing a buffer twice."); - - sender_thread_buffer_used = false; - - char *src = (char *)buffer_tostring(wb); - size_t src_len = buffer_strlen(wb); - - if(unlikely(!src || !src_len)) - return; - - sender_lock(s); - -#ifdef NETDATA_LOG_STREAM_SENDER - if(type == STREAM_TRAFFIC_TYPE_METADATA) { - if(!s->stream_log_fp) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "/tmp/stream-sender-%s.txt", s->host ? rrdhost_hostname(s->host) : "unknown"); - - s->stream_log_fp = fopen(filename, "w"); - } - - fprintf(s->stream_log_fp, "\n--- SEND MESSAGE START: %s ----\n" - "%s" - "--- SEND MESSAGE END ----------------------------------------\n" - , rrdhost_hostname(s->host), src - ); - } -#endif - - if(unlikely(s->buffer->max_size < (src_len + 1) * SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE)) { - netdata_log_info("STREAM %s [send to %s]: max buffer size of %zu is too small for a data message of size %zu. Increasing the max buffer size to %d times the max data message size.", - rrdhost_hostname(s->host), s->connected_to, s->buffer->max_size, buffer_strlen(wb) + 1, SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE); - - s->buffer->max_size = (src_len + 1) * SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE; - } - - if (s->compressor.initialized) { - while(src_len) { - size_t size_to_compress = src_len; - - if(unlikely(size_to_compress > COMPRESSION_MAX_MSG_SIZE)) { - if (stream_has_capability(s, STREAM_CAP_BINARY)) - size_to_compress = COMPRESSION_MAX_MSG_SIZE; - else { - if (size_to_compress > COMPRESSION_MAX_MSG_SIZE) { - // we need to find the last newline - // so that the decompressor will have a whole line to work with - - const char *t = &src[COMPRESSION_MAX_MSG_SIZE]; - while (--t >= src) - if (unlikely(*t == '\n')) - break; - - if (t <= src) { - size_to_compress = COMPRESSION_MAX_MSG_SIZE; - } else - size_to_compress = t - src + 1; - } - } - } - - const char *dst; - size_t dst_len = rrdpush_compress(&s->compressor, src, size_to_compress, &dst); - if (!dst_len) { - netdata_log_error("STREAM %s [send to %s]: COMPRESSION failed. Resetting compressor and re-trying", - rrdhost_hostname(s->host), s->connected_to); - - rrdpush_compression_initialize(s); - dst_len = rrdpush_compress(&s->compressor, src, size_to_compress, &dst); - if(!dst_len) { - netdata_log_error("STREAM %s [send to %s]: COMPRESSION failed again. Deactivating compression", - rrdhost_hostname(s->host), s->connected_to); - - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION); - rrdpush_compression_deactivate(s); - rrdpush_sender_thread_close_socket(s->host); - sender_unlock(s); - return; - } - } - - rrdpush_signature_t signature = rrdpush_compress_encode_signature(dst_len); - -#ifdef NETDATA_INTERNAL_CHECKS - // check if reversing the signature provides the same length - size_t decoded_dst_len = rrdpush_decompress_decode_signature((const char *)&signature, sizeof(signature)); - if(decoded_dst_len != dst_len) - fatal("RRDPUSH COMPRESSION: invalid signature, original payload %zu bytes, " - "compressed payload length %zu bytes, but signature says payload is %zu bytes", - size_to_compress, dst_len, decoded_dst_len); -#endif - - if(cbuffer_add_unsafe(s->buffer, (const char *)&signature, sizeof(signature))) - s->flags |= SENDER_FLAG_OVERFLOW; - else { - if(cbuffer_add_unsafe(s->buffer, dst, dst_len)) - s->flags |= SENDER_FLAG_OVERFLOW; - else - s->sent_bytes_on_this_connection_per_type[type] += dst_len + sizeof(signature); - } - - src = src + size_to_compress; - src_len -= size_to_compress; - } - } - else if(cbuffer_add_unsafe(s->buffer, src, src_len)) - s->flags |= SENDER_FLAG_OVERFLOW; - else - s->sent_bytes_on_this_connection_per_type[type] += src_len; - - replication_recalculate_buffer_used_ratio_unsafe(s); - - bool signal_sender = false; - if(!rrdpush_sender_pipe_has_pending_data(s)) { - rrdpush_sender_pipe_set_pending_data(s); - signal_sender = true; - } - - sender_unlock(s); - - if(signal_sender && (!stream_has_capability(s, STREAM_CAP_INTERPOLATED) || type != STREAM_TRAFFIC_TYPE_DATA)) - rrdpush_signal_sender_to_wake_up(s); -} +#include "sender_internals.h" static inline void rrdpush_sender_add_host_variable_to_buffer(BUFFER *wb, const RRDVAR_ACQUIRED *rva) { buffer_sprintf( @@ -324,692 +121,24 @@ static void rrdpush_sender_charts_and_replication_reset(RRDHOST *host) { rrdpush_sender_replicating_charts_zero(host->sender); } -static void rrdpush_sender_on_connect(RRDHOST *host) { +void rrdpush_sender_on_connect(RRDHOST *host) { rrdpush_sender_cbuffer_flush(host); rrdpush_sender_charts_and_replication_reset(host); } -static void rrdpush_sender_after_connect(RRDHOST *host) { +void rrdpush_sender_after_connect(RRDHOST *host) { rrdpush_sender_thread_send_custom_host_variables(host); } -static inline void rrdpush_sender_thread_close_socket(RRDHOST *host) { - netdata_ssl_close(&host->sender->ssl); +static void rrdpush_sender_on_disconnect(RRDHOST *host) { + // we have been connected to this parent - let's cleanup - if(host->sender->rrdpush_sender_socket != -1) { - close(host->sender->rrdpush_sender_socket); - host->sender->rrdpush_sender_socket = -1; - } - - rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); - rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED); - - // do not flush the circular buffer here - // this function is called sometimes with the mutex lock, sometimes without the lock rrdpush_sender_charts_and_replication_reset(host); // clear the parent's claim id - rrdpush_sender_clear_child_claim_id(host); + rrdpush_sender_clear_parent_claim_id(host); rrdpush_receiver_send_node_and_claim_id_to_child(host); -} - -void rrdpush_encode_variable(stream_encoded_t *se, RRDHOST *host) { - se->os_name = (host->system_info->host_os_name)?url_encode(host->system_info->host_os_name):strdupz(""); - se->os_id = (host->system_info->host_os_id)?url_encode(host->system_info->host_os_id):strdupz(""); - se->os_version = (host->system_info->host_os_version)?url_encode(host->system_info->host_os_version):strdupz(""); - se->kernel_name = (host->system_info->kernel_name)?url_encode(host->system_info->kernel_name):strdupz(""); - se->kernel_version = (host->system_info->kernel_version)?url_encode(host->system_info->kernel_version):strdupz(""); -} - -void rrdpush_clean_encoded(stream_encoded_t *se) { - if (se->os_name) { - freez(se->os_name); - se->os_name = NULL; - } - - if (se->os_id) { - freez(se->os_id); - se->os_id = NULL; - } - - if (se->os_version) { - freez(se->os_version); - se->os_version = NULL; - } - - if (se->kernel_name) { - freez(se->kernel_name); - se->kernel_name = NULL; - } - - if (se->kernel_version) { - freez(se->kernel_version); - se->kernel_version = NULL; - } -} - -struct { - const char *response; - const char *status; - size_t length; - int32_t version; - bool dynamic; - const char *error; - int worker_job_id; - int postpone_reconnect_seconds; - ND_LOG_FIELD_PRIORITY priority; -} stream_responses[] = { - { - .response = START_STREAMING_PROMPT_VN, - .length = sizeof(START_STREAMING_PROMPT_VN) - 1, - .status = RRDPUSH_STATUS_CONNECTED, - .version = STREAM_HANDSHAKE_OK_V3, // and above - .dynamic = true, // dynamic = we will parse the version / capabilities - .error = NULL, - .worker_job_id = 0, - .postpone_reconnect_seconds = 0, - .priority = NDLP_INFO, - }, - { - .response = START_STREAMING_PROMPT_V2, - .length = sizeof(START_STREAMING_PROMPT_V2) - 1, - .status = RRDPUSH_STATUS_CONNECTED, - .version = STREAM_HANDSHAKE_OK_V2, - .dynamic = false, - .error = NULL, - .worker_job_id = 0, - .postpone_reconnect_seconds = 0, - .priority = NDLP_INFO, - }, - { - .response = START_STREAMING_PROMPT_V1, - .length = sizeof(START_STREAMING_PROMPT_V1) - 1, - .status = RRDPUSH_STATUS_CONNECTED, - .version = STREAM_HANDSHAKE_OK_V1, - .dynamic = false, - .error = NULL, - .worker_job_id = 0, - .postpone_reconnect_seconds = 0, - .priority = NDLP_INFO, - }, - { - .response = START_STREAMING_ERROR_SAME_LOCALHOST, - .length = sizeof(START_STREAMING_ERROR_SAME_LOCALHOST) - 1, - .status = RRDPUSH_STATUS_LOCALHOST, - .version = STREAM_HANDSHAKE_ERROR_LOCALHOST, - .dynamic = false, - .error = "remote server rejected this stream, the host we are trying to stream is its localhost", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 60 * 60, // the IP may change, try it every hour - .priority = NDLP_DEBUG, - }, - { - .response = START_STREAMING_ERROR_ALREADY_STREAMING, - .length = sizeof(START_STREAMING_ERROR_ALREADY_STREAMING) - 1, - .status = RRDPUSH_STATUS_ALREADY_CONNECTED, - .version = STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED, - .dynamic = false, - .error = "remote server rejected this stream, the host we are trying to stream is already streamed to it", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 2 * 60, // 2 minutes - .priority = NDLP_DEBUG, - }, - { - .response = START_STREAMING_ERROR_NOT_PERMITTED, - .length = sizeof(START_STREAMING_ERROR_NOT_PERMITTED) - 1, - .status = RRDPUSH_STATUS_PERMISSION_DENIED, - .version = STREAM_HANDSHAKE_ERROR_DENIED, - .dynamic = false, - .error = "remote server denied access, probably we don't have the right API key?", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 1 * 60, // 1 minute - .priority = NDLP_ERR, - }, - { - .response = START_STREAMING_ERROR_BUSY_TRY_LATER, - .length = sizeof(START_STREAMING_ERROR_BUSY_TRY_LATER) - 1, - .status = RRDPUSH_STATUS_RATE_LIMIT, - .version = STREAM_HANDSHAKE_BUSY_TRY_LATER, - .dynamic = false, - .error = "remote server is currently busy, we should try later", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 2 * 60, // 2 minutes - .priority = NDLP_NOTICE, - }, - { - .response = START_STREAMING_ERROR_INTERNAL_ERROR, - .length = sizeof(START_STREAMING_ERROR_INTERNAL_ERROR) - 1, - .status = RRDPUSH_STATUS_INTERNAL_SERVER_ERROR, - .version = STREAM_HANDSHAKE_INTERNAL_ERROR, - .dynamic = false, - .error = "remote server is encountered an internal error, we should try later", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 5 * 60, // 5 minutes - .priority = NDLP_CRIT, - }, - { - .response = START_STREAMING_ERROR_INITIALIZATION, - .length = sizeof(START_STREAMING_ERROR_INITIALIZATION) - 1, - .status = RRDPUSH_STATUS_INITIALIZATION_IN_PROGRESS, - .version = STREAM_HANDSHAKE_INITIALIZATION, - .dynamic = false, - .error = "remote server is initializing, we should try later", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 2 * 60, // 2 minute - .priority = NDLP_NOTICE, - }, - - // terminator - { - .response = NULL, - .length = 0, - .status = RRDPUSH_STATUS_BAD_HANDSHAKE, - .version = STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE, - .dynamic = false, - .error = "remote node response is not understood, is it Netdata?", - .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 1 * 60, // 1 minute - .priority = NDLP_ERR, - } -}; - -static inline bool rrdpush_sender_validate_response(RRDHOST *host, struct sender_state *s, char *http, size_t http_length) { - int32_t version = STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE; - - int i; - for(i = 0; stream_responses[i].response ; i++) { - if(stream_responses[i].dynamic && - http_length > stream_responses[i].length && http_length < (stream_responses[i].length + 30) && - strncmp(http, stream_responses[i].response, stream_responses[i].length) == 0) { - - version = str2i(&http[stream_responses[i].length]); - break; - } - else if(http_length == stream_responses[i].length && strcmp(http, stream_responses[i].response) == 0) { - version = stream_responses[i].version; - - break; - } - } - - if(version >= STREAM_HANDSHAKE_OK_V1) { - host->destination->reason = version; - host->destination->postpone_reconnection_until = now_realtime_sec() + s->reconnect_delay; - s->capabilities = convert_stream_version_to_capabilities(version, host, true); - return true; - } - - ND_LOG_FIELD_PRIORITY priority = stream_responses[i].priority; - const char *error = stream_responses[i].error; - const char *status = stream_responses[i].status; - int worker_job_id = stream_responses[i].worker_job_id; - int delay = stream_responses[i].postpone_reconnect_seconds; - - worker_is_busy(worker_job_id); - rrdpush_sender_thread_close_socket(host); - host->destination->reason = version; - host->destination->postpone_reconnection_until = now_realtime_sec() + delay; - - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, status), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - char buf[RFC3339_MAX_LENGTH]; - rfc3339_datetime_ut(buf, sizeof(buf), host->destination->postpone_reconnection_until * USEC_PER_SEC, 0, false); - - nd_log(NDLS_DAEMON, priority, - "STREAM %s [send to %s]: %s - will retry in %d secs, at %s", - rrdhost_hostname(host), s->connected_to, error, delay, buf); - - return false; -} - -unsigned char alpn_proto_list[] = { - 18, 'n', 'e', 't', 'd', 'a', 't', 'a', '_', 's', 't', 'r', 'e', 'a', 'm', '/', '2', '.', '0', - 8, 'h', 't', 't', 'p', '/', '1', '.', '1' -}; - -#define CONN_UPGRADE_VAL "upgrade" - -static bool rrdpush_sender_connect_ssl(struct sender_state *s __maybe_unused) { - RRDHOST *host = s->host; - bool ssl_required = host->destination && host->destination->ssl; - - netdata_ssl_close(&host->sender->ssl); - - if(!ssl_required) - return true; - - if (netdata_ssl_open_ext(&host->sender->ssl, netdata_ssl_streaming_sender_ctx, s->rrdpush_sender_socket, alpn_proto_list, sizeof(alpn_proto_list))) { - if(!netdata_ssl_connect(&host->sender->ssl)) { - // couldn't connect - - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_SSL_ERROR), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR); - rrdpush_sender_thread_close_socket(host); - host->destination->reason = STREAM_HANDSHAKE_ERROR_SSL_ERROR; - host->destination->postpone_reconnection_until = now_realtime_sec() + 5 * 60; - return false; - } - - if (netdata_ssl_validate_certificate_sender && - security_test_certificate(host->sender->ssl.conn)) { - // certificate is not valid - - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_INVALID_SSL_CERTIFICATE), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR); - netdata_log_error("SSL: closing the stream connection, because the server SSL certificate is not valid."); - rrdpush_sender_thread_close_socket(host); - host->destination->reason = STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE; - host->destination->postpone_reconnection_until = now_realtime_sec() + 5 * 60; - return false; - } - - return true; - } - - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_CANT_ESTABLISH_SSL_CONNECTION), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - netdata_log_error("SSL: failed to establish connection."); - return false; -} - -static int rrdpush_http_upgrade_prelude(RRDHOST *host, struct sender_state *s) { - - char http[HTTP_HEADER_SIZE + 1]; - snprintfz(http, HTTP_HEADER_SIZE, - "GET " NETDATA_STREAM_URL HTTP_1_1 HTTP_ENDL - "Upgrade: " NETDATA_STREAM_PROTO_NAME HTTP_ENDL - "Connection: Upgrade" - HTTP_HDR_END); - - ssize_t bytes = send_timeout( - &host->sender->ssl, - s->rrdpush_sender_socket, - http, - strlen(http), - 0, - 1000); - - bytes = recv_timeout( - &host->sender->ssl, - s->rrdpush_sender_socket, - http, - HTTP_HEADER_SIZE, - 0, - 1000); - - if (bytes <= 0) { - error_report("Error reading from remote"); - return 1; - } - - rbuf_t buf = rbuf_create(bytes); - rbuf_push(buf, http, bytes); - - http_parse_ctx ctx; - http_parse_ctx_create(&ctx, HTTP_PARSE_INITIAL); - ctx.flags |= HTTP_PARSE_FLAG_DONT_WAIT_FOR_CONTENT; - - int rc; -// while((rc = parse_http_response(buf, &ctx)) == HTTP_PARSE_NEED_MORE_DATA); - rc = parse_http_response(buf, &ctx); - - if (rc != HTTP_PARSE_SUCCESS) { - error_report("Failed to parse HTTP response sent. (%d)", rc); - goto err_cleanup; - } - if (ctx.http_code == HTTP_RESP_MOVED_PERM) { - const char *hdr = get_http_header_by_name(&ctx, "location"); - if (hdr) - error_report("HTTP response is %d Moved Permanently (location: \"%s\") instead of expected %d Switching Protocols.", ctx.http_code, hdr, HTTP_RESP_SWITCH_PROTO); - else - error_report("HTTP response is %d instead of expected %d Switching Protocols.", ctx.http_code, HTTP_RESP_SWITCH_PROTO); - goto err_cleanup; - } - if (ctx.http_code == HTTP_RESP_NOT_FOUND) { - error_report("HTTP response is %d instead of expected %d Switching Protocols. Parent version too old.", ctx.http_code, HTTP_RESP_SWITCH_PROTO); - // TODO set some flag here that will signify parent is older version - // and to try connection without rrdpush_http_upgrade_prelude next time - goto err_cleanup; - } - if (ctx.http_code != HTTP_RESP_SWITCH_PROTO) { - error_report("HTTP response is %d instead of expected %d Switching Protocols", ctx.http_code, HTTP_RESP_SWITCH_PROTO); - goto err_cleanup; - } - - const char *hdr = get_http_header_by_name(&ctx, "connection"); - if (!hdr) { - error_report("Missing \"connection\" header in reply"); - goto err_cleanup; - } - if (strncmp(hdr, CONN_UPGRADE_VAL, strlen(CONN_UPGRADE_VAL))) { - error_report("Expected \"connection: " CONN_UPGRADE_VAL "\""); - goto err_cleanup; - } - - hdr = get_http_header_by_name(&ctx, "upgrade"); - if (!hdr) { - error_report("Missing \"upgrade\" header in reply"); - goto err_cleanup; - } - if (strncmp(hdr, NETDATA_STREAM_PROTO_NAME, strlen(NETDATA_STREAM_PROTO_NAME))) { - error_report("Expected \"upgrade: " NETDATA_STREAM_PROTO_NAME "\""); - goto err_cleanup; - } - - netdata_log_debug(D_STREAM, "Stream sender upgrade to \"" NETDATA_STREAM_PROTO_NAME "\" successful"); - rbuf_free(buf); - http_parse_ctx_destroy(&ctx); - return 0; -err_cleanup: - rbuf_free(buf); - http_parse_ctx_destroy(&ctx); - return 1; -} - -static bool rrdpush_sender_thread_connect_to_parent(RRDHOST *host, int default_port, int timeout, struct sender_state *s) { - - struct timeval tv = { - .tv_sec = timeout, - .tv_usec = 0 - }; - - // make sure the socket is closed - rrdpush_sender_thread_close_socket(host); - - s->rrdpush_sender_socket = connect_to_one_of_destinations( - host - , default_port - , &tv - , &s->reconnects_counter - , s->connected_to - , sizeof(s->connected_to)-1 - , &host->destination - ); - - if(unlikely(s->rrdpush_sender_socket == -1)) { - // netdata_log_error("STREAM %s [send to %s]: could not connect to parent node at this time.", rrdhost_hostname(host), host->rrdpush_send_destination); - return false; - } - - // netdata_log_info("STREAM %s [send to %s]: initializing communication...", rrdhost_hostname(host), s->connected_to); - - // reset our capabilities to default - s->capabilities = stream_our_capabilities(host, true); - - /* TODO: During the implementation of #7265 switch the set of variables to HOST_* and CONTAINER_* if the - version negotiation resulted in a high enough version. - */ - stream_encoded_t se; - rrdpush_encode_variable(&se, host); - - host->sender->hops = host->system_info->hops + 1; - - char http[HTTP_HEADER_SIZE + 1]; - int eol = snprintfz(http, HTTP_HEADER_SIZE, - "STREAM " - "key=%s" - "&hostname=%s" - "®istry_hostname=%s" - "&machine_guid=%s" - "&update_every=%d" - "&os=%s" - "&timezone=%s" - "&abbrev_timezone=%s" - "&utc_offset=%d" - "&hops=%d" - "&ml_capable=%d" - "&ml_enabled=%d" - "&mc_version=%d" - "&ver=%u" - "&NETDATA_INSTANCE_CLOUD_TYPE=%s" - "&NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=%s" - "&NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=%s" - "&NETDATA_SYSTEM_OS_NAME=%s" - "&NETDATA_SYSTEM_OS_ID=%s" - "&NETDATA_SYSTEM_OS_ID_LIKE=%s" - "&NETDATA_SYSTEM_OS_VERSION=%s" - "&NETDATA_SYSTEM_OS_VERSION_ID=%s" - "&NETDATA_SYSTEM_OS_DETECTION=%s" - "&NETDATA_HOST_IS_K8S_NODE=%s" - "&NETDATA_SYSTEM_KERNEL_NAME=%s" - "&NETDATA_SYSTEM_KERNEL_VERSION=%s" - "&NETDATA_SYSTEM_ARCHITECTURE=%s" - "&NETDATA_SYSTEM_VIRTUALIZATION=%s" - "&NETDATA_SYSTEM_VIRT_DETECTION=%s" - "&NETDATA_SYSTEM_CONTAINER=%s" - "&NETDATA_SYSTEM_CONTAINER_DETECTION=%s" - "&NETDATA_CONTAINER_OS_NAME=%s" - "&NETDATA_CONTAINER_OS_ID=%s" - "&NETDATA_CONTAINER_OS_ID_LIKE=%s" - "&NETDATA_CONTAINER_OS_VERSION=%s" - "&NETDATA_CONTAINER_OS_VERSION_ID=%s" - "&NETDATA_CONTAINER_OS_DETECTION=%s" - "&NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT=%s" - "&NETDATA_SYSTEM_CPU_FREQ=%s" - "&NETDATA_SYSTEM_TOTAL_RAM=%s" - "&NETDATA_SYSTEM_TOTAL_DISK_SIZE=%s" - "&NETDATA_PROTOCOL_VERSION=%s" - HTTP_1_1 HTTP_ENDL - "User-Agent: %s/%s\r\n" - "Accept: */*\r\n\r\n" - , host->rrdpush.send.api_key - , rrdhost_hostname(host) - , rrdhost_registry_hostname(host) - , host->machine_guid - , default_rrd_update_every - , rrdhost_os(host) - , rrdhost_timezone(host) - , rrdhost_abbrev_timezone(host) - , host->utc_offset - , host->sender->hops - , host->system_info->ml_capable - , host->system_info->ml_enabled - , host->system_info->mc_version - , s->capabilities - , (host->system_info->cloud_provider_type) ? host->system_info->cloud_provider_type : "" - , (host->system_info->cloud_instance_type) ? host->system_info->cloud_instance_type : "" - , (host->system_info->cloud_instance_region) ? host->system_info->cloud_instance_region : "" - , se.os_name - , se.os_id - , (host->system_info->host_os_id_like) ? host->system_info->host_os_id_like : "" - , se.os_version - , (host->system_info->host_os_version_id) ? host->system_info->host_os_version_id : "" - , (host->system_info->host_os_detection) ? host->system_info->host_os_detection : "" - , (host->system_info->is_k8s_node) ? host->system_info->is_k8s_node : "" - , se.kernel_name - , se.kernel_version - , (host->system_info->architecture) ? host->system_info->architecture : "" - , (host->system_info->virtualization) ? host->system_info->virtualization : "" - , (host->system_info->virt_detection) ? host->system_info->virt_detection : "" - , (host->system_info->container) ? host->system_info->container : "" - , (host->system_info->container_detection) ? host->system_info->container_detection : "" - , (host->system_info->container_os_name) ? host->system_info->container_os_name : "" - , (host->system_info->container_os_id) ? host->system_info->container_os_id : "" - , (host->system_info->container_os_id_like) ? host->system_info->container_os_id_like : "" - , (host->system_info->container_os_version) ? host->system_info->container_os_version : "" - , (host->system_info->container_os_version_id) ? host->system_info->container_os_version_id : "" - , (host->system_info->container_os_detection) ? host->system_info->container_os_detection : "" - , (host->system_info->host_cores) ? host->system_info->host_cores : "" - , (host->system_info->host_cpu_freq) ? host->system_info->host_cpu_freq : "" - , (host->system_info->host_ram_total) ? host->system_info->host_ram_total : "" - , (host->system_info->host_disk_space) ? host->system_info->host_disk_space : "" - , STREAMING_PROTOCOL_VERSION - , rrdhost_program_name(host) - , rrdhost_program_version(host) - ); - http[eol] = 0x00; - rrdpush_clean_encoded(&se); - - if(!rrdpush_sender_connect_ssl(s)) - return false; - - if (s->parent_using_h2o && rrdpush_http_upgrade_prelude(host, s)) { - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_CANT_UPGRADE_CONNECTION), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_CANT_UPGRADE_CONNECTION); - rrdpush_sender_thread_close_socket(host); - host->destination->reason = STREAM_HANDSHAKE_ERROR_HTTP_UPGRADE; - host->destination->postpone_reconnection_until = now_realtime_sec() + 1 * 60; - return false; - } - - ssize_t len = (ssize_t)strlen(http); - ssize_t bytes = send_timeout( - &host->sender->ssl, - s->rrdpush_sender_socket, - http, - len, - 0, - timeout); - - if(bytes <= 0) { // timeout is 0 - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_TIMEOUT), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT); - rrdpush_sender_thread_close_socket(host); - - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: failed to send HTTP header to remote netdata.", - rrdhost_hostname(host), s->connected_to); - - host->destination->reason = STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT; - host->destination->postpone_reconnection_until = now_realtime_sec() + 1 * 60; - return false; - } - - bytes = recv_timeout( - &host->sender->ssl, - s->rrdpush_sender_socket, - http, - HTTP_HEADER_SIZE, - 0, - timeout); - - if(bytes <= 0) { // timeout is 0 - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_TIMEOUT), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT); - rrdpush_sender_thread_close_socket(host); - - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: remote netdata does not respond.", - rrdhost_hostname(host), s->connected_to); - - host->destination->reason = STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT; - host->destination->postpone_reconnection_until = now_realtime_sec() + 30; - return false; - } - - if(sock_setnonblock(s->rrdpush_sender_socket) < 0) - nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM %s [send to %s]: cannot set non-blocking mode for socket.", - rrdhost_hostname(host), s->connected_to); - sock_setcloexec(s->rrdpush_sender_socket); - - if(sock_enlarge_out(s->rrdpush_sender_socket) < 0) - nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM %s [send to %s]: cannot enlarge the socket buffer.", - rrdhost_hostname(host), s->connected_to); - - http[bytes] = '\0'; - if(!rrdpush_sender_validate_response(host, s, http, bytes)) - return false; - - rrdpush_compression_initialize(s); - - log_sender_capabilities(s); - - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_CONNECTED), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM %s: connected to %s...", - rrdhost_hostname(host), s->connected_to); - - return true; -} - -static bool attempt_to_connect(struct sender_state *state) { - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_UUID(NDF_MESSAGE_ID, &streaming_to_parent_msgid), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - state->send_attempts = 0; - - // reset the bytes we have sent for this session - state->sent_bytes_on_this_connection = 0; - memset(state->sent_bytes_on_this_connection_per_type, 0, sizeof(state->sent_bytes_on_this_connection_per_type)); - - if(rrdpush_sender_thread_connect_to_parent(state->host, state->default_port, state->timeout, state)) { - // reset the buffer, to properly send charts and metrics - rrdpush_sender_on_connect(state->host); - - // send from the beginning - state->begin = 0; - - // make sure the next reconnection will be immediate - state->not_connected_loops = 0; - - // let the data collection threads know we are ready - rrdhost_flag_set(state->host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED); - - rrdpush_sender_after_connect(state->host); - - return true; - } - - // we couldn't connect - - // increase the failed connections counter - state->not_connected_loops++; - - // slow re-connection on repeating errors - usec_t now_ut = now_monotonic_usec(); - usec_t end_ut = now_ut + USEC_PER_SEC * state->reconnect_delay; - while(now_ut < end_ut) { - if(nd_thread_signaled_to_cancel()) - return false; - - sleep_usec(100 * USEC_PER_MS); // seconds - now_ut = now_monotonic_usec(); - } - - return false; + stream_path_parent_disconnected(host); } // TCP window is open, and we have data to transmit. @@ -1042,7 +171,7 @@ static ssize_t attempt_to_send(struct sender_state *s) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR); netdata_log_debug(D_STREAM, "STREAM: Send failed - closing socket..."); netdata_log_error("STREAM %s [send to %s]: failed to send metrics - closing connection - we have sent %zu bytes on this connection.", rrdhost_hostname(s->host), s->connected_to, s->sent_bytes_on_this_connection); - rrdpush_sender_thread_close_socket(s->host); + rrdpush_sender_thread_close_socket(s); } else netdata_log_debug(D_STREAM, "STREAM: send() returned 0 -> no error but no transmission"); @@ -1080,262 +209,11 @@ static ssize_t attempt_read(struct sender_state *s) { netdata_log_error("STREAM %s [send to %s]: error during receive (%zd) - closing connection.", rrdhost_hostname(s->host), s->connected_to, ret); } - rrdpush_sender_thread_close_socket(s->host); + rrdpush_sender_thread_close_socket(s); return ret; } -struct inflight_stream_function { - struct sender_state *sender; - STRING *transaction; - usec_t received_ut; -}; - -static void stream_execute_function_callback(BUFFER *func_wb, int code, void *data) { - struct inflight_stream_function *tmp = data; - struct sender_state *s = tmp->sender; - - if(rrdhost_can_send_definitions_to_parent(s->host)) { - BUFFER *wb = sender_start(s); - - pluginsd_function_result_begin_to_buffer(wb - , string2str(tmp->transaction) - , code - , content_type_id2string(func_wb->content_type) - , func_wb->expires); - - buffer_fast_strcat(wb, buffer_tostring(func_wb), buffer_strlen(func_wb)); - pluginsd_function_result_end_to_buffer(wb); - - sender_commit(s, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); - sender_thread_buffer_free(); - - internal_error(true, "STREAM %s [send to %s] FUNCTION transaction %s sending back response (%zu bytes, %"PRIu64" usec).", - rrdhost_hostname(s->host), s->connected_to, - string2str(tmp->transaction), - buffer_strlen(func_wb), - now_realtime_usec() - tmp->received_ut); - } - - string_freez(tmp->transaction); - buffer_free(func_wb); - freez(tmp); -} - -static void stream_execute_function_progress_callback(void *data, size_t done, size_t all) { - struct inflight_stream_function *tmp = data; - struct sender_state *s = tmp->sender; - - if(rrdhost_can_send_definitions_to_parent(s->host)) { - BUFFER *wb = sender_start(s); - - buffer_sprintf(wb, PLUGINSD_KEYWORD_FUNCTION_PROGRESS " '%s' %zu %zu\n", - string2str(tmp->transaction), done, all); - - sender_commit(s, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); - } -} - -static void execute_commands_function(struct sender_state *s, const char *command, const char *transaction, const char *timeout_s, const char *function, BUFFER *payload, const char *access, const char *source) { - worker_is_busy(WORKER_SENDER_JOB_FUNCTION_REQUEST); - nd_log(NDLS_ACCESS, NDLP_INFO, NULL); - - if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { - netdata_log_error("STREAM %s [send to %s] %s execution command is incomplete (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", - rrdhost_hostname(s->host), s->connected_to, - command, - transaction?transaction:"(unset)", - timeout_s?timeout_s:"(unset)", - function?function:"(unset)"); - } - else { - int timeout = str2i(timeout_s); - if(timeout <= 0) timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT; - - struct inflight_stream_function *tmp = callocz(1, sizeof(struct inflight_stream_function)); - tmp->received_ut = now_realtime_usec(); - tmp->sender = s; - tmp->transaction = string_strdupz(transaction); - BUFFER *wb = buffer_create(1024, &netdata_buffers_statistics.buffers_functions); - - int code = rrd_function_run(s->host, wb, timeout, - http_access_from_hex_mapping_old_roles(access), function, false, transaction, - stream_execute_function_callback, tmp, - stream_has_capability(s, STREAM_CAP_PROGRESS) ? stream_execute_function_progress_callback : NULL, - stream_has_capability(s, STREAM_CAP_PROGRESS) ? tmp : NULL, - NULL, NULL, payload, source, true); - - if(code != HTTP_RESP_OK) { - if (!buffer_strlen(wb)) - rrd_call_function_error(wb, "Failed to route request to collector", code); - } - } -} - -static void cleanup_intercepting_input(struct sender_state *s) { - freez((void *)s->functions.transaction); - freez((void *)s->functions.timeout_s); - freez((void *)s->functions.function); - freez((void *)s->functions.access); - freez((void *)s->functions.source); - buffer_free(s->functions.payload); - - s->functions.transaction = NULL; - s->functions.timeout_s = NULL; - s->functions.function = NULL; - s->functions.payload = NULL; - s->functions.access = NULL; - s->functions.source = NULL; - s->functions.intercept_input = false; -} - -static void execute_commands_cleanup(struct sender_state *s) { - cleanup_intercepting_input(s); -} - -// This is just a placeholder until the gap filling state machine is inserted -void execute_commands(struct sender_state *s) { - worker_is_busy(WORKER_SENDER_JOB_EXECUTE); - - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_CB(NDF_REQUEST, line_splitter_reconstruct_line, &s->line), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); - - char *start = s->read_buffer, *end = &s->read_buffer[s->read_len], *newline; - *end = '\0'; - for( ; start < end ; start = newline + 1) { - newline = strchr(start, '\n'); - - if(!newline) { - if(s->functions.intercept_input) { - buffer_strcat(s->functions.payload, start); - start = end; - } - break; - } - - *newline = '\0'; - s->line.count++; - - if(s->functions.intercept_input) { - if(strcmp(start, PLUGINSD_CALL_FUNCTION_PAYLOAD_END) == 0) { - execute_commands_function(s, - PLUGINSD_CALL_FUNCTION_PAYLOAD_END, - s->functions.transaction, s->functions.timeout_s, - s->functions.function, s->functions.payload, - s->functions.access, s->functions.source); - - cleanup_intercepting_input(s); - } - else { - buffer_strcat(s->functions.payload, start); - buffer_fast_charcat(s->functions.payload, '\n'); - } - - continue; - } - - s->line.num_words = quoted_strings_splitter_pluginsd(start, s->line.words, PLUGINSD_MAX_WORDS); - const char *command = get_word(s->line.words, s->line.num_words, 0); - - if(command && strcmp(command, PLUGINSD_CALL_FUNCTION) == 0) { - char *transaction = get_word(s->line.words, s->line.num_words, 1); - char *timeout_s = get_word(s->line.words, s->line.num_words, 2); - char *function = get_word(s->line.words, s->line.num_words, 3); - char *access = get_word(s->line.words, s->line.num_words, 4); - char *source = get_word(s->line.words, s->line.num_words, 5); - - execute_commands_function(s, command, transaction, timeout_s, function, NULL, access, source); - } - else if(command && strcmp(command, PLUGINSD_CALL_FUNCTION_PAYLOAD_BEGIN) == 0) { - char *transaction = get_word(s->line.words, s->line.num_words, 1); - char *timeout_s = get_word(s->line.words, s->line.num_words, 2); - char *function = get_word(s->line.words, s->line.num_words, 3); - char *access = get_word(s->line.words, s->line.num_words, 4); - char *source = get_word(s->line.words, s->line.num_words, 5); - char *content_type = get_word(s->line.words, s->line.num_words, 6); - - s->functions.transaction = strdupz(transaction ? transaction : ""); - s->functions.timeout_s = strdupz(timeout_s ? timeout_s : ""); - s->functions.function = strdupz(function ? function : ""); - s->functions.access = strdupz(access ? access : ""); - s->functions.source = strdupz(source ? source : ""); - s->functions.payload = buffer_create(0, NULL); - s->functions.payload->content_type = content_type_string2id(content_type); - s->functions.intercept_input = true; - } - else if(command && strcmp(command, PLUGINSD_CALL_FUNCTION_CANCEL) == 0) { - worker_is_busy(WORKER_SENDER_JOB_FUNCTION_REQUEST); - nd_log(NDLS_ACCESS, NDLP_DEBUG, NULL); - - char *transaction = get_word(s->line.words, s->line.num_words, 1); - if(transaction && *transaction) - rrd_function_cancel(transaction); - } - else if(command && strcmp(command, PLUGINSD_CALL_FUNCTION_PROGRESS) == 0) { - worker_is_busy(WORKER_SENDER_JOB_FUNCTION_REQUEST); - nd_log(NDLS_ACCESS, NDLP_DEBUG, NULL); - - char *transaction = get_word(s->line.words, s->line.num_words, 1); - if(transaction && *transaction) - rrd_function_progress(transaction); - } - else if (command && strcmp(command, PLUGINSD_KEYWORD_REPLAY_CHART) == 0) { - worker_is_busy(WORKER_SENDER_JOB_REPLAY_REQUEST); - nd_log(NDLS_ACCESS, NDLP_DEBUG, NULL); - - const char *chart_id = get_word(s->line.words, s->line.num_words, 1); - const char *start_streaming = get_word(s->line.words, s->line.num_words, 2); - const char *after = get_word(s->line.words, s->line.num_words, 3); - const char *before = get_word(s->line.words, s->line.num_words, 4); - - if (!chart_id || !start_streaming || !after || !before) { - netdata_log_error("STREAM %s [send to %s] %s command is incomplete" - " (chart=%s, start_streaming=%s, after=%s, before=%s)", - rrdhost_hostname(s->host), s->connected_to, - command, - chart_id ? chart_id : "(unset)", - start_streaming ? start_streaming : "(unset)", - after ? after : "(unset)", - before ? before : "(unset)"); - } - else { - replication_add_request(s, chart_id, - strtoll(after, NULL, 0), - strtoll(before, NULL, 0), - !strcmp(start_streaming, "true") - ); - } - } - else if(command && strcmp(command, PLUGINSD_KEYWORD_NODE_ID) == 0) { - rrdpush_sender_get_node_and_claim_id_from_parent(s); - } - else { - netdata_log_error("STREAM %s [send to %s] received unknown command over connection: %s", - rrdhost_hostname(s->host), s->connected_to, s->line.words[0]?s->line.words[0]:"(unset)"); - } - - line_splitter_reset(&s->line); - worker_is_busy(WORKER_SENDER_JOB_EXECUTE); - } - - if (start < end) { - memmove(s->read_buffer, start, end-start); - s->read_len = end - start; - } - else { - s->read_buffer[0] = '\0'; - s->read_len = 0; - } -} - -struct rrdpush_sender_thread_data { - RRDHOST *host; - char *pipe_buffer; -}; - static bool rrdpush_sender_pipe_close(RRDHOST *host, int *pipe_fds, bool reopen) { static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; @@ -1463,38 +341,6 @@ static bool rrdhost_sender_should_exit(struct sender_state *s) { return false; } -static void rrdpush_sender_thread_cleanup_callback(void *pptr) { - struct rrdpush_sender_thread_data *s = CLEANUP_FUNCTION_GET_PTR(pptr); - if(!s) return; - - worker_unregister(); - - RRDHOST *host = s->host; - - sender_lock(host->sender); - netdata_log_info("STREAM %s [send]: sending thread exits %s", - rrdhost_hostname(host), - host->sender->exit.reason != STREAM_HANDSHAKE_NEVER ? stream_handshake_error_to_string(host->sender->exit.reason) : ""); - - rrdpush_sender_thread_close_socket(host); - rrdpush_sender_pipe_close(host, host->sender->rrdpush_sender_pipe, false); - execute_commands_cleanup(host->sender); - - rrdhost_clear_sender___while_having_sender_mutex(host); - -#ifdef NETDATA_LOG_STREAM_SENDER - if(host->sender->stream_log_fp) { - fclose(host->sender->stream_log_fp); - host->sender->stream_log_fp = NULL; - } -#endif - - sender_unlock(host->sender); - - freez(s->pipe_buffer); - freez(s); -} - void rrdpush_initialize_ssl_ctx(RRDHOST *host __maybe_unused) { static SPINLOCK sp = NETDATA_SPINLOCK_INITIALIZER; spinlock_lock(&sp); @@ -1640,8 +486,7 @@ void *rrdpush_sender_thread(void *ptr) { &stream_config, CONFIG_SECTION_STREAM, "parent using h2o", false); // initialize rrdpush globals - rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); - rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED); + rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED | RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); int pipe_buffer_size = 10 * 1024; #ifdef F_GETPIPE_SZ @@ -1656,12 +501,9 @@ void *rrdpush_sender_thread(void *ptr) { return NULL; } - struct rrdpush_sender_thread_data *thread_data = callocz(1, sizeof(struct rrdpush_sender_thread_data)); - thread_data->pipe_buffer = mallocz(pipe_buffer_size); - thread_data->host = s->host; - - CLEANUP_FUNCTION_REGISTER(rrdpush_sender_thread_cleanup_callback) cleanup_ptr = thread_data; + char *pipe_buffer = mallocz(pipe_buffer_size); + bool was_connected = false; size_t iterations = 0; time_t now_s = now_monotonic_sec(); while(!rrdhost_sender_should_exit(s)) { @@ -1669,11 +511,16 @@ void *rrdpush_sender_thread(void *ptr) { // The connection attempt blocks (after which we use the socket in nonblocking) if(unlikely(s->rrdpush_sender_socket == -1)) { + if(was_connected) { + rrdpush_sender_on_disconnect(s->host); + was_connected = false; + } + worker_is_busy(WORKER_SENDER_JOB_CONNECT); now_s = now_monotonic_sec(); rrdpush_sender_cbuffer_recreate_timed(s, now_s, false, true); - execute_commands_cleanup(s); + rrdpush_sender_execute_commands_cleanup(s); rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); s->flags &= ~SENDER_FLAG_OVERFLOW; @@ -1688,10 +535,12 @@ void *rrdpush_sender_thread(void *ptr) { break; now_s = s->last_traffic_seen_t = now_monotonic_sec(); + stream_path_send_to_parent(s->host); rrdpush_sender_send_claimed_id(s->host); rrdpush_send_host_labels(s->host); rrdpush_send_global_functions(s->host); s->replication.oldest_request_after_t = 0; + was_connected = true; rrdhost_flag_set(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); @@ -1712,7 +561,7 @@ void *rrdpush_sender_thread(void *ptr) { )) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT); netdata_log_error("STREAM %s [send to %s]: could not send metrics for %d seconds - closing connection - we have sent %zu bytes on this connection via %zu send attempts.", rrdhost_hostname(s->host), s->connected_to, s->timeout, s->sent_bytes_on_this_connection, s->send_attempts); - rrdpush_sender_thread_close_socket(s->host); + rrdpush_sender_thread_close_socket(s); continue; } @@ -1741,9 +590,9 @@ void *rrdpush_sender_thread(void *ptr) { if(unlikely(s->rrdpush_sender_pipe[PIPE_READ] == -1)) { if(!rrdpush_sender_pipe_close(s->host, s->rrdpush_sender_pipe, true)) { - netdata_log_error("STREAM %s [send]: cannot create inter-thread communication pipe. Disabling streaming.", - rrdhost_hostname(s->host)); - rrdpush_sender_thread_close_socket(s->host); + netdata_log_error("STREAM %s [send]: cannot create inter-thread communication pipe. " + "Disabling streaming.", rrdhost_hostname(s->host)); + rrdpush_sender_thread_close_socket(s); break; } } @@ -1794,7 +643,7 @@ void *rrdpush_sender_thread(void *ptr) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_POLL_ERROR); netdata_log_error("STREAM %s [send to %s]: failed to poll(). Closing socket.", rrdhost_hostname(s->host), s->connected_to); rrdpush_sender_pipe_close(s->host, s->rrdpush_sender_pipe, true); - rrdpush_sender_thread_close_socket(s->host); + rrdpush_sender_thread_close_socket(s); continue; } @@ -1813,7 +662,7 @@ void *rrdpush_sender_thread(void *ptr) { worker_is_busy(WORKER_SENDER_JOB_PIPE_READ); netdata_log_debug(D_STREAM, "STREAM: Data added to send buffer (current buffer chunk %zu bytes)...", outstanding); - if (read(fds[Collector].fd, thread_data->pipe_buffer, pipe_buffer_size) == -1) + if (read(fds[Collector].fd, pipe_buffer, pipe_buffer_size) == -1) netdata_log_error("STREAM %s [send to %s]: cannot read from internal pipe.", rrdhost_hostname(s->host), s->connected_to); } @@ -1828,7 +677,7 @@ void *rrdpush_sender_thread(void *ptr) { } if(unlikely(s->read_len)) - execute_commands(s); + rrdpush_sender_execute_commands(s); if(unlikely(fds[Collector].revents & (POLLERR|POLLHUP|POLLNVAL))) { char *error = NULL; @@ -1843,7 +692,7 @@ void *rrdpush_sender_thread(void *ptr) { if(error) { rrdpush_sender_pipe_close(s->host, s->rrdpush_sender_pipe, true); netdata_log_error("STREAM %s [send to %s]: restarting internal pipe: %s.", - rrdhost_hostname(s->host), s->connected_to, error); + rrdhost_hostname(s->host), s->connected_to, error); } } @@ -1860,8 +709,8 @@ void *rrdpush_sender_thread(void *ptr) { if(unlikely(error)) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SOCKET_ERROR); netdata_log_error("STREAM %s [send to %s]: restarting connection: %s - %zu bytes transmitted.", - rrdhost_hostname(s->host), s->connected_to, error, s->sent_bytes_on_this_connection); - rrdpush_sender_thread_close_socket(s->host); + rrdhost_hostname(s->host), s->connected_to, error, s->sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(s); } } @@ -1870,12 +719,39 @@ void *rrdpush_sender_thread(void *ptr) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_OVERFLOW); errno_clear(); netdata_log_error("STREAM %s [send to %s]: buffer full (allocated %zu bytes) after sending %zu bytes. Restarting connection", - rrdhost_hostname(s->host), s->connected_to, s->buffer->size, s->sent_bytes_on_this_connection); - rrdpush_sender_thread_close_socket(s->host); + rrdhost_hostname(s->host), s->connected_to, s->buffer->size, s->sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(s); } worker_set_metric(WORKER_SENDER_JOB_REPLAY_DICT_SIZE, (NETDATA_DOUBLE) dictionary_entries(s->replication.requests)); } + if(was_connected) + rrdpush_sender_on_disconnect(s->host); + + netdata_log_info("STREAM %s [send]: sending thread exits %s", + rrdhost_hostname(s->host), + s->exit.reason != STREAM_HANDSHAKE_NEVER ? stream_handshake_error_to_string(s->exit.reason) : ""); + + sender_lock(s); + { + rrdpush_sender_thread_close_socket(s); + rrdpush_sender_pipe_close(s->host, s->rrdpush_sender_pipe, false); + rrdpush_sender_execute_commands_cleanup(s); + + rrdhost_clear_sender___while_having_sender_mutex(s->host); + +#ifdef NETDATA_LOG_STREAM_SENDER + if (s->stream_log_fp) { + fclose(s->stream_log_fp); + s->stream_log_fp = NULL; + } +#endif + } + sender_unlock(s); + + freez(pipe_buffer); + worker_unregister(); + return NULL; } diff --git a/src/streaming/sender_commit.c b/src/streaming/sender_commit.c new file mode 100644 index 00000000000000..3219569b03d57a --- /dev/null +++ b/src/streaming/sender_commit.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sender_internals.h" + +static __thread BUFFER *sender_thread_buffer = NULL; +static __thread bool sender_thread_buffer_used = false; +static __thread time_t sender_thread_buffer_last_reset_s = 0; + +void sender_thread_buffer_free(void) { + buffer_free(sender_thread_buffer); + sender_thread_buffer = NULL; + sender_thread_buffer_used = false; +} + +// Collector thread starting a transmission +BUFFER *sender_start(struct sender_state *s) { + if(unlikely(sender_thread_buffer_used)) + fatal("STREAMING: thread buffer is used multiple times concurrently."); + + if(unlikely(rrdpush_sender_last_buffer_recreate_get(s) > sender_thread_buffer_last_reset_s)) { + if(unlikely(sender_thread_buffer && sender_thread_buffer->size > THREAD_BUFFER_INITIAL_SIZE)) { + buffer_free(sender_thread_buffer); + sender_thread_buffer = NULL; + } + } + + if(unlikely(!sender_thread_buffer)) { + sender_thread_buffer = buffer_create(THREAD_BUFFER_INITIAL_SIZE, &netdata_buffers_statistics.buffers_streaming); + sender_thread_buffer_last_reset_s = rrdpush_sender_last_buffer_recreate_get(s); + } + + sender_thread_buffer_used = true; + buffer_flush(sender_thread_buffer); + return sender_thread_buffer; +} + +#define SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE 3 + +// Collector thread finishing a transmission +void sender_commit(struct sender_state *s, BUFFER *wb, STREAM_TRAFFIC_TYPE type) { + + if(unlikely(wb != sender_thread_buffer)) + fatal("STREAMING: sender is trying to commit a buffer that is not this thread's buffer."); + + if(unlikely(!sender_thread_buffer_used)) + fatal("STREAMING: sender is committing a buffer twice."); + + sender_thread_buffer_used = false; + + char *src = (char *)buffer_tostring(wb); + size_t src_len = buffer_strlen(wb); + + if(unlikely(!src || !src_len)) + return; + + sender_lock(s); + +#ifdef NETDATA_LOG_STREAM_SENDER + if(type == STREAM_TRAFFIC_TYPE_METADATA) { + if(!s->stream_log_fp) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "/tmp/stream-sender-%s.txt", s->host ? rrdhost_hostname(s->host) : "unknown"); + + s->stream_log_fp = fopen(filename, "w"); + } + + fprintf(s->stream_log_fp, "\n--- SEND MESSAGE START: %s ----\n" + "%s" + "--- SEND MESSAGE END ----------------------------------------\n" + , rrdhost_hostname(s->host), src + ); + } +#endif + + if(unlikely(s->buffer->max_size < (src_len + 1) * SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE)) { + netdata_log_info("STREAM %s [send to %s]: max buffer size of %zu is too small for a data message of size %zu. Increasing the max buffer size to %d times the max data message size.", + rrdhost_hostname(s->host), s->connected_to, s->buffer->max_size, buffer_strlen(wb) + 1, SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE); + + s->buffer->max_size = (src_len + 1) * SENDER_BUFFER_ADAPT_TO_TIMES_MAX_SIZE; + } + + if (s->compressor.initialized) { + while(src_len) { + size_t size_to_compress = src_len; + + if(unlikely(size_to_compress > COMPRESSION_MAX_MSG_SIZE)) { + if (stream_has_capability(s, STREAM_CAP_BINARY)) + size_to_compress = COMPRESSION_MAX_MSG_SIZE; + else { + if (size_to_compress > COMPRESSION_MAX_MSG_SIZE) { + // we need to find the last newline + // so that the decompressor will have a whole line to work with + + const char *t = &src[COMPRESSION_MAX_MSG_SIZE]; + while (--t >= src) + if (unlikely(*t == '\n')) + break; + + if (t <= src) { + size_to_compress = COMPRESSION_MAX_MSG_SIZE; + } else + size_to_compress = t - src + 1; + } + } + } + + const char *dst; + size_t dst_len = rrdpush_compress(&s->compressor, src, size_to_compress, &dst); + if (!dst_len) { + netdata_log_error("STREAM %s [send to %s]: COMPRESSION failed. Resetting compressor and re-trying", + rrdhost_hostname(s->host), s->connected_to); + + rrdpush_compression_initialize(s); + dst_len = rrdpush_compress(&s->compressor, src, size_to_compress, &dst); + if(!dst_len) { + netdata_log_error("STREAM %s [send to %s]: COMPRESSION failed again. Deactivating compression", + rrdhost_hostname(s->host), s->connected_to); + + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION); + rrdpush_compression_deactivate(s); + rrdpush_sender_thread_close_socket(s); + sender_unlock(s); + return; + } + } + + rrdpush_signature_t signature = rrdpush_compress_encode_signature(dst_len); + +#ifdef NETDATA_INTERNAL_CHECKS + // check if reversing the signature provides the same length + size_t decoded_dst_len = rrdpush_decompress_decode_signature((const char *)&signature, sizeof(signature)); + if(decoded_dst_len != dst_len) + fatal("RRDPUSH COMPRESSION: invalid signature, original payload %zu bytes, " + "compressed payload length %zu bytes, but signature says payload is %zu bytes", + size_to_compress, dst_len, decoded_dst_len); +#endif + + if(cbuffer_add_unsafe(s->buffer, (const char *)&signature, sizeof(signature))) + s->flags |= SENDER_FLAG_OVERFLOW; + else { + if(cbuffer_add_unsafe(s->buffer, dst, dst_len)) + s->flags |= SENDER_FLAG_OVERFLOW; + else + s->sent_bytes_on_this_connection_per_type[type] += dst_len + sizeof(signature); + } + + src = src + size_to_compress; + src_len -= size_to_compress; + } + } + else if(cbuffer_add_unsafe(s->buffer, src, src_len)) + s->flags |= SENDER_FLAG_OVERFLOW; + else + s->sent_bytes_on_this_connection_per_type[type] += src_len; + + replication_recalculate_buffer_used_ratio_unsafe(s); + + bool signal_sender = false; + if(!rrdpush_sender_pipe_has_pending_data(s)) { + rrdpush_sender_pipe_set_pending_data(s); + signal_sender = true; + } + + sender_unlock(s); + + if(signal_sender && (!stream_has_capability(s, STREAM_CAP_INTERPOLATED) || type != STREAM_TRAFFIC_TYPE_DATA)) + rrdpush_signal_sender_to_wake_up(s); +} diff --git a/src/streaming/sender_connect.c b/src/streaming/sender_connect.c new file mode 100644 index 00000000000000..1085cd9013138e --- /dev/null +++ b/src/streaming/sender_connect.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sender_internals.h" + +void rrdpush_sender_thread_close_socket(struct sender_state *s) { + rrdhost_flag_clear(s->host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED | RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS); + + netdata_ssl_close(&s->ssl); + + if(s->rrdpush_sender_socket != -1) { + close(s->rrdpush_sender_socket); + s->rrdpush_sender_socket = -1; + } + + // do not flush the circular buffer here + // this function is called sometimes with the sender lock, sometimes without the lock +} + +void rrdpush_encode_variable(stream_encoded_t *se, RRDHOST *host) { + se->os_name = (host->system_info->host_os_name)?url_encode(host->system_info->host_os_name):strdupz(""); + se->os_id = (host->system_info->host_os_id)?url_encode(host->system_info->host_os_id):strdupz(""); + se->os_version = (host->system_info->host_os_version)?url_encode(host->system_info->host_os_version):strdupz(""); + se->kernel_name = (host->system_info->kernel_name)?url_encode(host->system_info->kernel_name):strdupz(""); + se->kernel_version = (host->system_info->kernel_version)?url_encode(host->system_info->kernel_version):strdupz(""); +} + +void rrdpush_clean_encoded(stream_encoded_t *se) { + if (se->os_name) { + freez(se->os_name); + se->os_name = NULL; + } + + if (se->os_id) { + freez(se->os_id); + se->os_id = NULL; + } + + if (se->os_version) { + freez(se->os_version); + se->os_version = NULL; + } + + if (se->kernel_name) { + freez(se->kernel_name); + se->kernel_name = NULL; + } + + if (se->kernel_version) { + freez(se->kernel_version); + se->kernel_version = NULL; + } +} + +struct { + const char *response; + const char *status; + size_t length; + int32_t version; + bool dynamic; + const char *error; + int worker_job_id; + int postpone_reconnect_seconds; + ND_LOG_FIELD_PRIORITY priority; +} stream_responses[] = { + { + .response = START_STREAMING_PROMPT_VN, + .length = sizeof(START_STREAMING_PROMPT_VN) - 1, + .status = RRDPUSH_STATUS_CONNECTED, + .version = STREAM_HANDSHAKE_OK_V3, // and above + .dynamic = true, // dynamic = we will parse the version / capabilities + .error = NULL, + .worker_job_id = 0, + .postpone_reconnect_seconds = 0, + .priority = NDLP_INFO, + }, + { + .response = START_STREAMING_PROMPT_V2, + .length = sizeof(START_STREAMING_PROMPT_V2) - 1, + .status = RRDPUSH_STATUS_CONNECTED, + .version = STREAM_HANDSHAKE_OK_V2, + .dynamic = false, + .error = NULL, + .worker_job_id = 0, + .postpone_reconnect_seconds = 0, + .priority = NDLP_INFO, + }, + { + .response = START_STREAMING_PROMPT_V1, + .length = sizeof(START_STREAMING_PROMPT_V1) - 1, + .status = RRDPUSH_STATUS_CONNECTED, + .version = STREAM_HANDSHAKE_OK_V1, + .dynamic = false, + .error = NULL, + .worker_job_id = 0, + .postpone_reconnect_seconds = 0, + .priority = NDLP_INFO, + }, + { + .response = START_STREAMING_ERROR_SAME_LOCALHOST, + .length = sizeof(START_STREAMING_ERROR_SAME_LOCALHOST) - 1, + .status = RRDPUSH_STATUS_LOCALHOST, + .version = STREAM_HANDSHAKE_ERROR_LOCALHOST, + .dynamic = false, + .error = "remote server rejected this stream, the host we are trying to stream is its localhost", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 60 * 60, // the IP may change, try it every hour + .priority = NDLP_DEBUG, + }, + { + .response = START_STREAMING_ERROR_ALREADY_STREAMING, + .length = sizeof(START_STREAMING_ERROR_ALREADY_STREAMING) - 1, + .status = RRDPUSH_STATUS_ALREADY_CONNECTED, + .version = STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED, + .dynamic = false, + .error = "remote server rejected this stream, the host we are trying to stream is already streamed to it", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 2 * 60, // 2 minutes + .priority = NDLP_DEBUG, + }, + { + .response = START_STREAMING_ERROR_NOT_PERMITTED, + .length = sizeof(START_STREAMING_ERROR_NOT_PERMITTED) - 1, + .status = RRDPUSH_STATUS_PERMISSION_DENIED, + .version = STREAM_HANDSHAKE_ERROR_DENIED, + .dynamic = false, + .error = "remote server denied access, probably we don't have the right API key?", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 1 * 60, // 1 minute + .priority = NDLP_ERR, + }, + { + .response = START_STREAMING_ERROR_BUSY_TRY_LATER, + .length = sizeof(START_STREAMING_ERROR_BUSY_TRY_LATER) - 1, + .status = RRDPUSH_STATUS_RATE_LIMIT, + .version = STREAM_HANDSHAKE_BUSY_TRY_LATER, + .dynamic = false, + .error = "remote server is currently busy, we should try later", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 2 * 60, // 2 minutes + .priority = NDLP_NOTICE, + }, + { + .response = START_STREAMING_ERROR_INTERNAL_ERROR, + .length = sizeof(START_STREAMING_ERROR_INTERNAL_ERROR) - 1, + .status = RRDPUSH_STATUS_INTERNAL_SERVER_ERROR, + .version = STREAM_HANDSHAKE_INTERNAL_ERROR, + .dynamic = false, + .error = "remote server is encountered an internal error, we should try later", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 5 * 60, // 5 minutes + .priority = NDLP_CRIT, + }, + { + .response = START_STREAMING_ERROR_INITIALIZATION, + .length = sizeof(START_STREAMING_ERROR_INITIALIZATION) - 1, + .status = RRDPUSH_STATUS_INITIALIZATION_IN_PROGRESS, + .version = STREAM_HANDSHAKE_INITIALIZATION, + .dynamic = false, + .error = "remote server is initializing, we should try later", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 2 * 60, // 2 minute + .priority = NDLP_NOTICE, + }, + + // terminator + { + .response = NULL, + .length = 0, + .status = RRDPUSH_STATUS_BAD_HANDSHAKE, + .version = STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE, + .dynamic = false, + .error = "remote node response is not understood, is it Netdata?", + .worker_job_id = WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, + .postpone_reconnect_seconds = 1 * 60, // 1 minute + .priority = NDLP_ERR, + } +}; + +static inline bool rrdpush_sender_validate_response(RRDHOST *host, struct sender_state *s, char *http, size_t http_length) { + int32_t version = STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE; + + int i; + for(i = 0; stream_responses[i].response ; i++) { + if(stream_responses[i].dynamic && + http_length > stream_responses[i].length && http_length < (stream_responses[i].length + 30) && + strncmp(http, stream_responses[i].response, stream_responses[i].length) == 0) { + + version = str2i(&http[stream_responses[i].length]); + break; + } + else if(http_length == stream_responses[i].length && strcmp(http, stream_responses[i].response) == 0) { + version = stream_responses[i].version; + + break; + } + } + + if(version >= STREAM_HANDSHAKE_OK_V1) { + host->destination->reason = version; + host->destination->postpone_reconnection_until = now_realtime_sec() + s->reconnect_delay; + s->capabilities = convert_stream_version_to_capabilities(version, host, true); + return true; + } + + ND_LOG_FIELD_PRIORITY priority = stream_responses[i].priority; + const char *error = stream_responses[i].error; + const char *status = stream_responses[i].status; + int worker_job_id = stream_responses[i].worker_job_id; + int delay = stream_responses[i].postpone_reconnect_seconds; + + worker_is_busy(worker_job_id); + rrdpush_sender_thread_close_socket(s); + host->destination->reason = version; + host->destination->postpone_reconnection_until = now_realtime_sec() + delay; + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, status), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + char buf[RFC3339_MAX_LENGTH]; + rfc3339_datetime_ut(buf, sizeof(buf), host->destination->postpone_reconnection_until * USEC_PER_SEC, 0, false); + + nd_log(NDLS_DAEMON, priority, + "STREAM %s [send to %s]: %s - will retry in %d secs, at %s", + rrdhost_hostname(host), s->connected_to, error, delay, buf); + + return false; +} + +unsigned char alpn_proto_list[] = { + 18, 'n', 'e', 't', 'd', 'a', 't', 'a', '_', 's', 't', 'r', 'e', 'a', 'm', '/', '2', '.', '0', + 8, 'h', 't', 't', 'p', '/', '1', '.', '1' +}; + +#define CONN_UPGRADE_VAL "upgrade" + +static bool rrdpush_sender_connect_ssl(struct sender_state *s) { + RRDHOST *host = s->host; + bool ssl_required = host && host->destination && host->destination->ssl; + + netdata_ssl_close(&host->sender->ssl); + + if(!ssl_required) + return true; + + if (netdata_ssl_open_ext(&host->sender->ssl, netdata_ssl_streaming_sender_ctx, s->rrdpush_sender_socket, alpn_proto_list, sizeof(alpn_proto_list))) { + if(!netdata_ssl_connect(&host->sender->ssl)) { + // couldn't connect + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_SSL_ERROR), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR); + rrdpush_sender_thread_close_socket(s); + host->destination->reason = STREAM_HANDSHAKE_ERROR_SSL_ERROR; + host->destination->postpone_reconnection_until = now_realtime_sec() + 5 * 60; + return false; + } + + if (netdata_ssl_validate_certificate_sender && + security_test_certificate(host->sender->ssl.conn)) { + // certificate is not valid + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_INVALID_SSL_CERTIFICATE), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR); + netdata_log_error("SSL: closing the stream connection, because the server SSL certificate is not valid."); + rrdpush_sender_thread_close_socket(s); + host->destination->reason = STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE; + host->destination->postpone_reconnection_until = now_realtime_sec() + 5 * 60; + return false; + } + + return true; + } + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_CANT_ESTABLISH_SSL_CONNECTION), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + netdata_log_error("SSL: failed to establish connection."); + return false; +} + +static int rrdpush_http_upgrade_prelude(RRDHOST *host, struct sender_state *s) { + + char http[HTTP_HEADER_SIZE + 1]; + snprintfz(http, HTTP_HEADER_SIZE, + "GET " NETDATA_STREAM_URL HTTP_1_1 HTTP_ENDL + "Upgrade: " NETDATA_STREAM_PROTO_NAME HTTP_ENDL + "Connection: Upgrade" + HTTP_HDR_END); + + ssize_t bytes = send_timeout( + &host->sender->ssl, + s->rrdpush_sender_socket, + http, + strlen(http), + 0, + 1000); + + bytes = recv_timeout( + &host->sender->ssl, + s->rrdpush_sender_socket, + http, + HTTP_HEADER_SIZE, + 0, + 1000); + + if (bytes <= 0) { + error_report("Error reading from remote"); + return 1; + } + + rbuf_t buf = rbuf_create(bytes); + rbuf_push(buf, http, bytes); + + http_parse_ctx ctx; + http_parse_ctx_create(&ctx, HTTP_PARSE_INITIAL); + ctx.flags |= HTTP_PARSE_FLAG_DONT_WAIT_FOR_CONTENT; + + int rc; + // while((rc = parse_http_response(buf, &ctx)) == HTTP_PARSE_NEED_MORE_DATA); + rc = parse_http_response(buf, &ctx); + + if (rc != HTTP_PARSE_SUCCESS) { + error_report("Failed to parse HTTP response sent. (%d)", rc); + goto err_cleanup; + } + if (ctx.http_code == HTTP_RESP_MOVED_PERM) { + const char *hdr = get_http_header_by_name(&ctx, "location"); + if (hdr) + error_report("HTTP response is %d Moved Permanently (location: \"%s\") instead of expected %d Switching Protocols.", ctx.http_code, hdr, HTTP_RESP_SWITCH_PROTO); + else + error_report("HTTP response is %d instead of expected %d Switching Protocols.", ctx.http_code, HTTP_RESP_SWITCH_PROTO); + goto err_cleanup; + } + if (ctx.http_code == HTTP_RESP_NOT_FOUND) { + error_report("HTTP response is %d instead of expected %d Switching Protocols. Parent version too old.", ctx.http_code, HTTP_RESP_SWITCH_PROTO); + // TODO set some flag here that will signify parent is older version + // and to try connection without rrdpush_http_upgrade_prelude next time + goto err_cleanup; + } + if (ctx.http_code != HTTP_RESP_SWITCH_PROTO) { + error_report("HTTP response is %d instead of expected %d Switching Protocols", ctx.http_code, HTTP_RESP_SWITCH_PROTO); + goto err_cleanup; + } + + const char *hdr = get_http_header_by_name(&ctx, "connection"); + if (!hdr) { + error_report("Missing \"connection\" header in reply"); + goto err_cleanup; + } + if (strncmp(hdr, CONN_UPGRADE_VAL, strlen(CONN_UPGRADE_VAL))) { + error_report("Expected \"connection: " CONN_UPGRADE_VAL "\""); + goto err_cleanup; + } + + hdr = get_http_header_by_name(&ctx, "upgrade"); + if (!hdr) { + error_report("Missing \"upgrade\" header in reply"); + goto err_cleanup; + } + if (strncmp(hdr, NETDATA_STREAM_PROTO_NAME, strlen(NETDATA_STREAM_PROTO_NAME))) { + error_report("Expected \"upgrade: " NETDATA_STREAM_PROTO_NAME "\""); + goto err_cleanup; + } + + netdata_log_debug(D_STREAM, "Stream sender upgrade to \"" NETDATA_STREAM_PROTO_NAME "\" successful"); + rbuf_free(buf); + http_parse_ctx_destroy(&ctx); + return 0; +err_cleanup: + rbuf_free(buf); + http_parse_ctx_destroy(&ctx); + return 1; +} + +static bool rrdpush_sender_thread_connect_to_parent(RRDHOST *host, int default_port, int timeout, struct sender_state *s) { + + struct timeval tv = { + .tv_sec = timeout, + .tv_usec = 0 + }; + + // make sure the socket is closed + rrdpush_sender_thread_close_socket(s); + + s->rrdpush_sender_socket = connect_to_one_of_destinations( + host + , default_port + , &tv + , &s->reconnects_counter + , s->connected_to + , sizeof(s->connected_to)-1 + , &host->destination + ); + + if(unlikely(s->rrdpush_sender_socket == -1)) { + // netdata_log_error("STREAM %s [send to %s]: could not connect to parent node at this time.", rrdhost_hostname(host), host->rrdpush_send_destination); + return false; + } + + // netdata_log_info("STREAM %s [send to %s]: initializing communication...", rrdhost_hostname(host), s->connected_to); + + // reset our capabilities to default + s->capabilities = stream_our_capabilities(host, true); + + /* TODO: During the implementation of #7265 switch the set of variables to HOST_* and CONTAINER_* if the + version negotiation resulted in a high enough version. + */ + stream_encoded_t se; + rrdpush_encode_variable(&se, host); + + host->sender->hops = host->system_info->hops + 1; + + char http[HTTP_HEADER_SIZE + 1]; + int eol = snprintfz(http, HTTP_HEADER_SIZE, + "STREAM " + "key=%s" + "&hostname=%s" + "®istry_hostname=%s" + "&machine_guid=%s" + "&update_every=%d" + "&os=%s" + "&timezone=%s" + "&abbrev_timezone=%s" + "&utc_offset=%d" + "&hops=%d" + "&ml_capable=%d" + "&ml_enabled=%d" + "&mc_version=%d" + "&ver=%u" + "&NETDATA_INSTANCE_CLOUD_TYPE=%s" + "&NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=%s" + "&NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=%s" + "&NETDATA_SYSTEM_OS_NAME=%s" + "&NETDATA_SYSTEM_OS_ID=%s" + "&NETDATA_SYSTEM_OS_ID_LIKE=%s" + "&NETDATA_SYSTEM_OS_VERSION=%s" + "&NETDATA_SYSTEM_OS_VERSION_ID=%s" + "&NETDATA_SYSTEM_OS_DETECTION=%s" + "&NETDATA_HOST_IS_K8S_NODE=%s" + "&NETDATA_SYSTEM_KERNEL_NAME=%s" + "&NETDATA_SYSTEM_KERNEL_VERSION=%s" + "&NETDATA_SYSTEM_ARCHITECTURE=%s" + "&NETDATA_SYSTEM_VIRTUALIZATION=%s" + "&NETDATA_SYSTEM_VIRT_DETECTION=%s" + "&NETDATA_SYSTEM_CONTAINER=%s" + "&NETDATA_SYSTEM_CONTAINER_DETECTION=%s" + "&NETDATA_CONTAINER_OS_NAME=%s" + "&NETDATA_CONTAINER_OS_ID=%s" + "&NETDATA_CONTAINER_OS_ID_LIKE=%s" + "&NETDATA_CONTAINER_OS_VERSION=%s" + "&NETDATA_CONTAINER_OS_VERSION_ID=%s" + "&NETDATA_CONTAINER_OS_DETECTION=%s" + "&NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT=%s" + "&NETDATA_SYSTEM_CPU_FREQ=%s" + "&NETDATA_SYSTEM_TOTAL_RAM=%s" + "&NETDATA_SYSTEM_TOTAL_DISK_SIZE=%s" + "&NETDATA_PROTOCOL_VERSION=%s" + HTTP_1_1 HTTP_ENDL + "User-Agent: %s/%s\r\n" + "Accept: */*\r\n\r\n" + , host->rrdpush.send.api_key + , rrdhost_hostname(host) + , rrdhost_registry_hostname(host) + , host->machine_guid + , default_rrd_update_every + , rrdhost_os(host) + , rrdhost_timezone(host) + , rrdhost_abbrev_timezone(host) + , host->utc_offset + , host->sender->hops + , host->system_info->ml_capable + , host->system_info->ml_enabled + , host->system_info->mc_version + , s->capabilities + , (host->system_info->cloud_provider_type) ? host->system_info->cloud_provider_type : "" + , (host->system_info->cloud_instance_type) ? host->system_info->cloud_instance_type : "" + , (host->system_info->cloud_instance_region) ? host->system_info->cloud_instance_region : "" + , se.os_name + , se.os_id + , (host->system_info->host_os_id_like) ? host->system_info->host_os_id_like : "" + , se.os_version + , (host->system_info->host_os_version_id) ? host->system_info->host_os_version_id : "" + , (host->system_info->host_os_detection) ? host->system_info->host_os_detection : "" + , (host->system_info->is_k8s_node) ? host->system_info->is_k8s_node : "" + , se.kernel_name + , se.kernel_version + , (host->system_info->architecture) ? host->system_info->architecture : "" + , (host->system_info->virtualization) ? host->system_info->virtualization : "" + , (host->system_info->virt_detection) ? host->system_info->virt_detection : "" + , (host->system_info->container) ? host->system_info->container : "" + , (host->system_info->container_detection) ? host->system_info->container_detection : "" + , (host->system_info->container_os_name) ? host->system_info->container_os_name : "" + , (host->system_info->container_os_id) ? host->system_info->container_os_id : "" + , (host->system_info->container_os_id_like) ? host->system_info->container_os_id_like : "" + , (host->system_info->container_os_version) ? host->system_info->container_os_version : "" + , (host->system_info->container_os_version_id) ? host->system_info->container_os_version_id : "" + , (host->system_info->container_os_detection) ? host->system_info->container_os_detection : "" + , (host->system_info->host_cores) ? host->system_info->host_cores : "" + , (host->system_info->host_cpu_freq) ? host->system_info->host_cpu_freq : "" + , (host->system_info->host_ram_total) ? host->system_info->host_ram_total : "" + , (host->system_info->host_disk_space) ? host->system_info->host_disk_space : "" + , STREAMING_PROTOCOL_VERSION + , rrdhost_program_name(host) + , rrdhost_program_version(host) + ); + http[eol] = 0x00; + rrdpush_clean_encoded(&se); + + if(!rrdpush_sender_connect_ssl(s)) + return false; + + if (s->parent_using_h2o && rrdpush_http_upgrade_prelude(host, s)) { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_CANT_UPGRADE_CONNECTION), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_CANT_UPGRADE_CONNECTION); + rrdpush_sender_thread_close_socket(s); + host->destination->reason = STREAM_HANDSHAKE_ERROR_HTTP_UPGRADE; + host->destination->postpone_reconnection_until = now_realtime_sec() + 1 * 60; + return false; + } + + ssize_t len = (ssize_t)strlen(http); + ssize_t bytes = send_timeout( + &host->sender->ssl, + s->rrdpush_sender_socket, + http, + len, + 0, + timeout); + + if(bytes <= 0) { // timeout is 0 + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_TIMEOUT), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT); + rrdpush_sender_thread_close_socket(s); + + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM %s [send to %s]: failed to send HTTP header to remote netdata.", + rrdhost_hostname(host), s->connected_to); + + host->destination->reason = STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT; + host->destination->postpone_reconnection_until = now_realtime_sec() + 1 * 60; + return false; + } + + bytes = recv_timeout( + &host->sender->ssl, + s->rrdpush_sender_socket, + http, + HTTP_HEADER_SIZE, + 0, + timeout); + + if(bytes <= 0) { // timeout is 0 + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_TIMEOUT), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT); + rrdpush_sender_thread_close_socket(s); + + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM %s [send to %s]: remote netdata does not respond.", + rrdhost_hostname(host), s->connected_to); + + host->destination->reason = STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT; + host->destination->postpone_reconnection_until = now_realtime_sec() + 30; + return false; + } + + if(sock_setnonblock(s->rrdpush_sender_socket) < 0) + nd_log(NDLS_DAEMON, NDLP_WARNING, + "STREAM %s [send to %s]: cannot set non-blocking mode for socket.", + rrdhost_hostname(host), s->connected_to); + sock_setcloexec(s->rrdpush_sender_socket); + + if(sock_enlarge_out(s->rrdpush_sender_socket) < 0) + nd_log(NDLS_DAEMON, NDLP_WARNING, + "STREAM %s [send to %s]: cannot enlarge the socket buffer.", + rrdhost_hostname(host), s->connected_to); + + http[bytes] = '\0'; + if(!rrdpush_sender_validate_response(host, s, http, bytes)) + return false; + + rrdpush_compression_initialize(s); + + log_sender_capabilities(s); + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, RRDPUSH_STATUS_CONNECTED), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "STREAM %s: connected to %s...", + rrdhost_hostname(host), s->connected_to); + + return true; +} + +bool attempt_to_connect(struct sender_state *state) { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_UUID(NDF_MESSAGE_ID, &streaming_to_parent_msgid), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + state->send_attempts = 0; + + // reset the bytes we have sent for this session + state->sent_bytes_on_this_connection = 0; + memset(state->sent_bytes_on_this_connection_per_type, 0, sizeof(state->sent_bytes_on_this_connection_per_type)); + + if(rrdpush_sender_thread_connect_to_parent(state->host, state->default_port, state->timeout, state)) { + // reset the buffer, to properly send charts and metrics + rrdpush_sender_on_connect(state->host); + + // send from the beginning + state->begin = 0; + + // make sure the next reconnection will be immediate + state->not_connected_loops = 0; + + // let the data collection threads know we are ready + rrdhost_flag_set(state->host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED); + + rrdpush_sender_after_connect(state->host); + + return true; + } + + // we couldn't connect + + // increase the failed connections counter + state->not_connected_loops++; + + // slow re-connection on repeating errors + usec_t now_ut = now_monotonic_usec(); + usec_t end_ut = now_ut + USEC_PER_SEC * state->reconnect_delay; + while(now_ut < end_ut) { + if(nd_thread_signaled_to_cancel()) + return false; + + sleep_usec(100 * USEC_PER_MS); // seconds + now_ut = now_monotonic_usec(); + } + + return false; +} diff --git a/src/streaming/sender_execute.c b/src/streaming/sender_execute.c new file mode 100644 index 00000000000000..e1e49bf9dff48e --- /dev/null +++ b/src/streaming/sender_execute.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sender_internals.h" + +struct inflight_stream_function { + struct sender_state *sender; + STRING *transaction; + usec_t received_ut; +}; + +static void stream_execute_function_callback(BUFFER *func_wb, int code, void *data) { + struct inflight_stream_function *tmp = data; + struct sender_state *s = tmp->sender; + + if(rrdhost_can_send_definitions_to_parent(s->host)) { + BUFFER *wb = sender_start(s); + + pluginsd_function_result_begin_to_buffer(wb + , string2str(tmp->transaction) + , code + , content_type_id2string(func_wb->content_type) + , func_wb->expires); + + buffer_fast_strcat(wb, buffer_tostring(func_wb), buffer_strlen(func_wb)); + pluginsd_function_result_end_to_buffer(wb); + + sender_commit(s, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); + sender_thread_buffer_free(); + + internal_error(true, "STREAM %s [send to %s] FUNCTION transaction %s sending back response (%zu bytes, %"PRIu64" usec).", + rrdhost_hostname(s->host), s->connected_to, + string2str(tmp->transaction), + buffer_strlen(func_wb), + now_realtime_usec() - tmp->received_ut); + } + + string_freez(tmp->transaction); + buffer_free(func_wb); + freez(tmp); +} + +static void stream_execute_function_progress_callback(void *data, size_t done, size_t all) { + struct inflight_stream_function *tmp = data; + struct sender_state *s = tmp->sender; + + if(rrdhost_can_send_definitions_to_parent(s->host)) { + BUFFER *wb = sender_start(s); + + buffer_sprintf(wb, PLUGINSD_KEYWORD_FUNCTION_PROGRESS " '%s' %zu %zu\n", + string2str(tmp->transaction), done, all); + + sender_commit(s, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); + } +} + +static void execute_commands_function(struct sender_state *s, const char *command, const char *transaction, const char *timeout_s, const char *function, BUFFER *payload, const char *access, const char *source) { + worker_is_busy(WORKER_SENDER_JOB_FUNCTION_REQUEST); + nd_log(NDLS_ACCESS, NDLP_INFO, NULL); + + if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { + netdata_log_error("STREAM %s [send to %s] %s execution command is incomplete (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + rrdhost_hostname(s->host), s->connected_to, + command, + transaction?transaction:"(unset)", + timeout_s?timeout_s:"(unset)", + function?function:"(unset)"); + } + else { + int timeout = str2i(timeout_s); + if(timeout <= 0) timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT; + + struct inflight_stream_function *tmp = callocz(1, sizeof(struct inflight_stream_function)); + tmp->received_ut = now_realtime_usec(); + tmp->sender = s; + tmp->transaction = string_strdupz(transaction); + BUFFER *wb = buffer_create(1024, &netdata_buffers_statistics.buffers_functions); + + int code = rrd_function_run(s->host, wb, timeout, + http_access_from_hex_mapping_old_roles(access), function, false, transaction, + stream_execute_function_callback, tmp, + stream_has_capability(s, STREAM_CAP_PROGRESS) ? stream_execute_function_progress_callback : NULL, + stream_has_capability(s, STREAM_CAP_PROGRESS) ? tmp : NULL, + NULL, NULL, payload, source, true); + + if(code != HTTP_RESP_OK) { + if (!buffer_strlen(wb)) + rrd_call_function_error(wb, "Failed to route request to collector", code); + } + } +} + +struct deferred_function { + const char *transaction; + const char *timeout_s; + const char *function; + const char *access; + const char *source; +}; + +static void execute_deferred_function(struct sender_state *s, void *data) { + struct deferred_function *dfd = data; + execute_commands_function(s, s->defer.end_keyword, + dfd->transaction, dfd->timeout_s, + dfd->function, s->defer.payload, + dfd->access, dfd->source); +} + +static void execute_deferred_json(struct sender_state *s, void *data) { + const char *keyword = data; + + if(strcmp(keyword, PLUGINSD_KEYWORD_STREAM_PATH) == 0) + stream_path_set_from_json(s->host, buffer_tostring(s->defer.payload), true); + else + nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM: unknown JSON keyword '%s' with payload: %s", keyword, buffer_tostring(s->defer.payload)); +} + +static void cleanup_deferred_json(struct sender_state *s __maybe_unused, void *data) { + const char *keyword = data; + freez((void *)keyword); +} + +static void cleanup_deferred_function(struct sender_state *s __maybe_unused, void *data) { + struct deferred_function *dfd = data; + freez((void *)dfd->transaction); + freez((void *)dfd->timeout_s); + freez((void *)dfd->function); + freez((void *)dfd->access); + freez((void *)dfd->source); + freez(dfd); +} + +static void cleanup_deferred_data(struct sender_state *s) { + if(s->defer.cleanup) + s->defer.cleanup(s, s->defer.action_data); + + buffer_free(s->defer.payload); + s->defer.payload = NULL; + s->defer.end_keyword = NULL; + s->defer.action = NULL; + s->defer.cleanup = NULL; + s->defer.action_data = NULL; +} + +void rrdpush_sender_execute_commands_cleanup(struct sender_state *s) { + cleanup_deferred_data(s); +} + +// This is just a placeholder until the gap filling state machine is inserted +void rrdpush_sender_execute_commands(struct sender_state *s) { + worker_is_busy(WORKER_SENDER_JOB_EXECUTE); + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_CB(NDF_REQUEST, line_splitter_reconstruct_line, &s->line), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + char *start = s->read_buffer, *end = &s->read_buffer[s->read_len], *newline; + *end = '\0'; + for( ; start < end ; start = newline + 1) { + newline = strchr(start, '\n'); + + if(!newline) { + if(s->defer.end_keyword) { + buffer_strcat(s->defer.payload, start); + start = end; + } + break; + } + + *newline = '\0'; + s->line.count++; + + if(s->defer.end_keyword) { + if(strcmp(start, s->defer.end_keyword) == 0) { + s->defer.action(s, s->defer.action_data); + cleanup_deferred_data(s); + } + else { + buffer_strcat(s->defer.payload, start); + buffer_putc(s->defer.payload, '\n'); + } + + continue; + } + + s->line.num_words = quoted_strings_splitter_pluginsd(start, s->line.words, PLUGINSD_MAX_WORDS); + const char *command = get_word(s->line.words, s->line.num_words, 0); + + if(command && strcmp(command, PLUGINSD_CALL_FUNCTION) == 0) { + char *transaction = get_word(s->line.words, s->line.num_words, 1); + char *timeout_s = get_word(s->line.words, s->line.num_words, 2); + char *function = get_word(s->line.words, s->line.num_words, 3); + char *access = get_word(s->line.words, s->line.num_words, 4); + char *source = get_word(s->line.words, s->line.num_words, 5); + + execute_commands_function(s, command, transaction, timeout_s, function, NULL, access, source); + } + else if(command && strcmp(command, PLUGINSD_CALL_FUNCTION_PAYLOAD_BEGIN) == 0) { + char *transaction = get_word(s->line.words, s->line.num_words, 1); + char *timeout_s = get_word(s->line.words, s->line.num_words, 2); + char *function = get_word(s->line.words, s->line.num_words, 3); + char *access = get_word(s->line.words, s->line.num_words, 4); + char *source = get_word(s->line.words, s->line.num_words, 5); + char *content_type = get_word(s->line.words, s->line.num_words, 6); + + s->defer.end_keyword = PLUGINSD_CALL_FUNCTION_PAYLOAD_END; + s->defer.payload = buffer_create(0, NULL); + s->defer.payload->content_type = content_type_string2id(content_type); + s->defer.action = execute_deferred_function; + s->defer.cleanup = cleanup_deferred_function; + + struct deferred_function *dfd = callocz(1, sizeof(*dfd)); + dfd->transaction = strdupz(transaction ? transaction : ""); + dfd->timeout_s = strdupz(timeout_s ? timeout_s : ""); + dfd->function = strdupz(function ? function : ""); + dfd->access = strdupz(access ? access : ""); + dfd->source = strdupz(source ? source : ""); + + s->defer.action_data = dfd; + } + else if(command && strcmp(command, PLUGINSD_CALL_FUNCTION_CANCEL) == 0) { + worker_is_busy(WORKER_SENDER_JOB_FUNCTION_REQUEST); + nd_log(NDLS_ACCESS, NDLP_DEBUG, NULL); + + char *transaction = get_word(s->line.words, s->line.num_words, 1); + if(transaction && *transaction) + rrd_function_cancel(transaction); + } + else if(command && strcmp(command, PLUGINSD_CALL_FUNCTION_PROGRESS) == 0) { + worker_is_busy(WORKER_SENDER_JOB_FUNCTION_REQUEST); + nd_log(NDLS_ACCESS, NDLP_DEBUG, NULL); + + char *transaction = get_word(s->line.words, s->line.num_words, 1); + if(transaction && *transaction) + rrd_function_progress(transaction); + } + else if (command && strcmp(command, PLUGINSD_KEYWORD_REPLAY_CHART) == 0) { + worker_is_busy(WORKER_SENDER_JOB_REPLAY_REQUEST); + nd_log(NDLS_ACCESS, NDLP_DEBUG, NULL); + + const char *chart_id = get_word(s->line.words, s->line.num_words, 1); + const char *start_streaming = get_word(s->line.words, s->line.num_words, 2); + const char *after = get_word(s->line.words, s->line.num_words, 3); + const char *before = get_word(s->line.words, s->line.num_words, 4); + + if (!chart_id || !start_streaming || !after || !before) { + netdata_log_error("STREAM %s [send to %s] %s command is incomplete" + " (chart=%s, start_streaming=%s, after=%s, before=%s)", + rrdhost_hostname(s->host), s->connected_to, + command, + chart_id ? chart_id : "(unset)", + start_streaming ? start_streaming : "(unset)", + after ? after : "(unset)", + before ? before : "(unset)"); + } + else { + replication_add_request(s, chart_id, + strtoll(after, NULL, 0), + strtoll(before, NULL, 0), + !strcmp(start_streaming, "true") + ); + } + } + else if(command && strcmp(command, PLUGINSD_KEYWORD_NODE_ID) == 0) { + rrdpush_sender_get_node_and_claim_id_from_parent(s); + } + else if(command && strcmp(command, PLUGINSD_KEYWORD_JSON) == 0) { + char *keyword = get_word(s->line.words, s->line.num_words, 1); + + s->defer.end_keyword = PLUGINSD_KEYWORD_JSON_END; + s->defer.payload = buffer_create(0, NULL); + s->defer.action = execute_deferred_json; + s->defer.cleanup = cleanup_deferred_json; + s->defer.action_data = strdupz(keyword); + } + else { + netdata_log_error("STREAM %s [send to %s] received unknown command over connection: %s", + rrdhost_hostname(s->host), s->connected_to, s->line.words[0]?s->line.words[0]:"(unset)"); + } + + line_splitter_reset(&s->line); + worker_is_busy(WORKER_SENDER_JOB_EXECUTE); + } + + if (start < end) { + memmove(s->read_buffer, start, end-start); + s->read_len = end - start; + } + else { + s->read_buffer[0] = '\0'; + s->read_len = 0; + } +} diff --git a/src/streaming/sender_internals.h b/src/streaming/sender_internals.h new file mode 100644 index 00000000000000..237113a8c10b0a --- /dev/null +++ b/src/streaming/sender_internals.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SENDER_INTERNALS_H +#define NETDATA_SENDER_INTERNALS_H + +#include "rrdpush.h" +#include "common.h" +#include "aclk/https_client.h" + +#define WORKER_SENDER_JOB_CONNECT 0 +#define WORKER_SENDER_JOB_PIPE_READ 1 +#define WORKER_SENDER_JOB_SOCKET_RECEIVE 2 +#define WORKER_SENDER_JOB_EXECUTE 3 +#define WORKER_SENDER_JOB_SOCKET_SEND 4 +#define WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE 5 +#define WORKER_SENDER_JOB_DISCONNECT_OVERFLOW 6 +#define WORKER_SENDER_JOB_DISCONNECT_TIMEOUT 7 +#define WORKER_SENDER_JOB_DISCONNECT_POLL_ERROR 8 +#define WORKER_SENDER_JOB_DISCONNECT_SOCKET_ERROR 9 +#define WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR 10 +#define WORKER_SENDER_JOB_DISCONNECT_PARENT_CLOSED 11 +#define WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR 12 +#define WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR 13 +#define WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION 14 +#define WORKER_SENDER_JOB_BUFFER_RATIO 15 +#define WORKER_SENDER_JOB_BYTES_RECEIVED 16 +#define WORKER_SENDER_JOB_BYTES_SENT 17 +#define WORKER_SENDER_JOB_BYTES_COMPRESSED 18 +#define WORKER_SENDER_JOB_BYTES_UNCOMPRESSED 19 +#define WORKER_SENDER_JOB_BYTES_COMPRESSION_RATIO 20 +#define WORKER_SENDER_JOB_REPLAY_REQUEST 21 +#define WORKER_SENDER_JOB_FUNCTION_REQUEST 22 +#define WORKER_SENDER_JOB_REPLAY_DICT_SIZE 23 +#define WORKER_SENDER_JOB_DISCONNECT_CANT_UPGRADE_CONNECTION 24 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 25 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 25 +#endif + +extern struct config stream_config; +extern char *netdata_ssl_ca_path; +extern char *netdata_ssl_ca_file; + +bool attempt_to_connect(struct sender_state *state); +void rrdpush_sender_on_connect(RRDHOST *host); +void rrdpush_sender_after_connect(RRDHOST *host); +void rrdpush_sender_thread_close_socket(struct sender_state *s); + +void rrdpush_sender_execute_commands_cleanup(struct sender_state *s); +void rrdpush_sender_execute_commands(struct sender_state *s); + +#endif //NETDATA_SENDER_INTERNALS_H diff --git a/src/streaming/stream_capabilities.c b/src/streaming/stream_capabilities.c new file mode 100644 index 00000000000000..b089e8f9dbcdaa --- /dev/null +++ b/src/streaming/stream_capabilities.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdpush.h" + +static STREAM_CAPABILITIES globally_disabled_capabilities = STREAM_CAP_NONE; + +static struct { + STREAM_CAPABILITIES cap; + const char *str; +} capability_names[] = { + {STREAM_CAP_V1, "V1" }, + {STREAM_CAP_V2, "V2" }, + {STREAM_CAP_VN, "VN" }, + {STREAM_CAP_VCAPS, "VCAPS" }, + {STREAM_CAP_HLABELS, "HLABELS" }, + {STREAM_CAP_CLAIM, "CLAIM" }, + {STREAM_CAP_CLABELS, "CLABELS" }, + {STREAM_CAP_LZ4, "LZ4" }, + {STREAM_CAP_FUNCTIONS, "FUNCTIONS" }, + {STREAM_CAP_REPLICATION, "REPLICATION" }, + {STREAM_CAP_BINARY, "BINARY" }, + {STREAM_CAP_INTERPOLATED, "INTERPOLATED" }, + {STREAM_CAP_IEEE754, "IEEE754" }, + {STREAM_CAP_DATA_WITH_ML, "ML" }, + {STREAM_CAP_DYNCFG, "DYNCFG" }, + {STREAM_CAP_SLOTS, "SLOTS" }, + {STREAM_CAP_ZSTD, "ZSTD" }, + {STREAM_CAP_GZIP, "GZIP" }, + {STREAM_CAP_BROTLI, "BROTLI" }, + {STREAM_CAP_PROGRESS, "PROGRESS" }, + {STREAM_CAP_NODE_ID, "NODEID" }, + {STREAM_CAP_PATHS, "PATHS" }, + {0 , NULL }, +}; + +STREAM_CAPABILITIES stream_capabilities_parse_one(const char *str) { + if (!str || !*str) + return STREAM_CAP_NONE; + + for (size_t i = 0; capability_names[i].str; i++) { + if (strcmp(capability_names[i].str, str) == 0) + return capability_names[i].cap; + } + + return STREAM_CAP_NONE; +} + +void stream_capabilities_to_string(BUFFER *wb, STREAM_CAPABILITIES caps) { + for(size_t i = 0; capability_names[i].str ; i++) { + if(caps & capability_names[i].cap) { + buffer_strcat(wb, capability_names[i].str); + buffer_strcat(wb, " "); + } + } +} + +void stream_capabilities_to_json_array(BUFFER *wb, STREAM_CAPABILITIES caps, const char *key) { + if(key) + buffer_json_member_add_array(wb, key); + else + buffer_json_add_array_item_array(wb); + + for(size_t i = 0; capability_names[i].str ; i++) { + if(caps & capability_names[i].cap) + buffer_json_add_array_item_string(wb, capability_names[i].str); + } + + buffer_json_array_close(wb); +} + +void log_receiver_capabilities(struct receiver_state *rpt) { + BUFFER *wb = buffer_create(100, NULL); + stream_capabilities_to_string(wb, rpt->capabilities); + + nd_log_daemon(NDLP_INFO, "STREAM %s [receive from [%s]:%s]: established link with negotiated capabilities: %s", + rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, buffer_tostring(wb)); + + buffer_free(wb); +} + +void log_sender_capabilities(struct sender_state *s) { + BUFFER *wb = buffer_create(100, NULL); + stream_capabilities_to_string(wb, s->capabilities); + + nd_log_daemon(NDLP_INFO, "STREAM %s [send to %s]: established link with negotiated capabilities: %s", + rrdhost_hostname(s->host), s->connected_to, buffer_tostring(wb)); + + buffer_free(wb); +} + +STREAM_CAPABILITIES stream_our_capabilities(RRDHOST *host, bool sender) { + STREAM_CAPABILITIES disabled_capabilities = globally_disabled_capabilities; + + if(host && sender) { + // we have DATA_WITH_ML capability + // we should remove the DATA_WITH_ML capability if our database does not have anomaly info + // this can happen under these conditions: 1. we don't run ML, and 2. we don't receive ML + spinlock_lock(&host->receiver_lock); + + if(!ml_host_running(host) && !stream_has_capability(host->receiver, STREAM_CAP_DATA_WITH_ML)) + disabled_capabilities |= STREAM_CAP_DATA_WITH_ML; + + spinlock_unlock(&host->receiver_lock); + + if(host->sender) + disabled_capabilities |= host->sender->disabled_capabilities; + } + + return (STREAM_CAP_V1 | + STREAM_CAP_V2 | + STREAM_CAP_VN | + STREAM_CAP_VCAPS | + STREAM_CAP_HLABELS | + STREAM_CAP_CLAIM | + STREAM_CAP_CLABELS | + STREAM_CAP_FUNCTIONS | + STREAM_CAP_REPLICATION | + STREAM_CAP_BINARY | + STREAM_CAP_INTERPOLATED | + STREAM_CAP_SLOTS | + STREAM_CAP_PROGRESS | + STREAM_CAP_COMPRESSIONS_AVAILABLE | + STREAM_CAP_DYNCFG | + STREAM_CAP_NODE_ID | + STREAM_CAP_PATHS | + STREAM_CAP_IEEE754 | + STREAM_CAP_DATA_WITH_ML | + 0) & ~disabled_capabilities; +} + +STREAM_CAPABILITIES convert_stream_version_to_capabilities(int32_t version, RRDHOST *host, bool sender) { + STREAM_CAPABILITIES caps = 0; + + if(version <= 1) caps = STREAM_CAP_V1; + else if(version < STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_V2 | STREAM_CAP_HLABELS; + else if(version <= STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM; + else if(version <= STREAM_OLD_VERSION_CLABELS) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS; + else if(version <= STREAM_OLD_VERSION_LZ4) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS | STREAM_CAP_LZ4_AVAILABLE; + else caps = version; + + if(caps & STREAM_CAP_VCAPS) + caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2|STREAM_CAP_VN); + + if(caps & STREAM_CAP_VN) + caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2); + + if(caps & STREAM_CAP_V2) + caps &= ~(STREAM_CAP_V1); + + STREAM_CAPABILITIES common_caps = caps & stream_our_capabilities(host, sender); + + if(!(common_caps & STREAM_CAP_INTERPOLATED)) + // DATA WITH ML requires INTERPOLATED + common_caps &= ~STREAM_CAP_DATA_WITH_ML; + + return common_caps; +} + +int32_t stream_capabilities_to_vn(uint32_t caps) { + if(caps & STREAM_CAP_LZ4) return STREAM_OLD_VERSION_LZ4; + if(caps & STREAM_CAP_CLABELS) return STREAM_OLD_VERSION_CLABELS; + return STREAM_OLD_VERSION_CLAIM; // if(caps & STREAM_CAP_CLAIM) +} + +void check_local_streaming_capabilities(void) { + ieee754_doubles = is_system_ieee754_double(); + if(!ieee754_doubles) + globally_disabled_capabilities |= STREAM_CAP_IEEE754; +} diff --git a/src/streaming/stream_capabilities.h b/src/streaming/stream_capabilities.h new file mode 100644 index 00000000000000..90a0e2190cddea --- /dev/null +++ b/src/streaming/stream_capabilities.h @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STREAM_CAPABILITIES_H +#define NETDATA_STREAM_CAPABILITIES_H + +#include "libnetdata/libnetdata.h" + +// ---------------------------------------------------------------------------- +// obsolete versions - do not use anymore + +#define STREAM_OLD_VERSION_CLAIM 3 +#define STREAM_OLD_VERSION_CLABELS 4 +#define STREAM_OLD_VERSION_LZ4 5 + +// ---------------------------------------------------------------------------- +// capabilities negotiation + +typedef enum { + STREAM_CAP_NONE = 0, + + // do not use the first 3 bits + // they used to be versions 1, 2 and 3 + // before we introduce capabilities + + STREAM_CAP_V1 = (1 << 3), // v1 = the oldest protocol + STREAM_CAP_V2 = (1 << 4), // v2 = the second version of the protocol (with host labels) + STREAM_CAP_VN = (1 << 5), // version negotiation supported (for versions 3, 4, 5 of the protocol) + // v3 = claiming supported + // v4 = chart labels supported + // v5 = lz4 compression supported + STREAM_CAP_VCAPS = (1 << 6), // capabilities negotiation supported + STREAM_CAP_HLABELS = (1 << 7), // host labels supported + STREAM_CAP_CLAIM = (1 << 8), // claiming supported + STREAM_CAP_CLABELS = (1 << 9), // chart labels supported + STREAM_CAP_LZ4 = (1 << 10), // lz4 compression supported + STREAM_CAP_FUNCTIONS = (1 << 11), // plugin functions supported + STREAM_CAP_REPLICATION = (1 << 12), // replication supported + STREAM_CAP_BINARY = (1 << 13), // streaming supports binary data + STREAM_CAP_INTERPOLATED = (1 << 14), // streaming supports interpolated streaming of values + STREAM_CAP_IEEE754 = (1 << 15), // streaming supports binary/hex transfer of double values + STREAM_CAP_DATA_WITH_ML = (1 << 16), // streaming supports transferring anomaly bit + // STREAM_CAP_DYNCFG = (1 << 17), // leave this unused for as long as possible + STREAM_CAP_SLOTS = (1 << 18), // the sender can appoint a unique slot for each chart + STREAM_CAP_ZSTD = (1 << 19), // ZSTD compression supported + STREAM_CAP_GZIP = (1 << 20), // GZIP compression supported + STREAM_CAP_BROTLI = (1 << 21), // BROTLI compression supported + STREAM_CAP_PROGRESS = (1 << 22), // Functions PROGRESS support + STREAM_CAP_DYNCFG = (1 << 23), // support for DYNCFG + STREAM_CAP_NODE_ID = (1 << 24), // support for sending NODE_ID back to the child + STREAM_CAP_PATHS = (1 << 25), // support for sending PATHS upstream and downstream + + STREAM_CAP_INVALID = (1 << 30), // used as an invalid value for capabilities when this is set + // this must be signed int, so don't use the last bit + // needed for negotiating errors between parent and child +} STREAM_CAPABILITIES; + +#ifdef ENABLE_LZ4 +#define STREAM_CAP_LZ4_AVAILABLE STREAM_CAP_LZ4 +#else +#define STREAM_CAP_LZ4_AVAILABLE 0 +#endif // ENABLE_LZ4 + +#ifdef ENABLE_ZSTD +#define STREAM_CAP_ZSTD_AVAILABLE STREAM_CAP_ZSTD +#else +#define STREAM_CAP_ZSTD_AVAILABLE 0 +#endif // ENABLE_ZSTD + +#ifdef ENABLE_BROTLI +#define STREAM_CAP_BROTLI_AVAILABLE STREAM_CAP_BROTLI +#else +#define STREAM_CAP_BROTLI_AVAILABLE 0 +#endif // ENABLE_BROTLI + +#define STREAM_CAP_COMPRESSIONS_AVAILABLE (STREAM_CAP_LZ4_AVAILABLE|STREAM_CAP_ZSTD_AVAILABLE|STREAM_CAP_BROTLI_AVAILABLE|STREAM_CAP_GZIP) + +#define stream_has_capability(rpt, capability) ((rpt) && ((rpt)->capabilities & (capability)) == (capability)) + +static inline bool stream_has_more_than_one_capability_of(STREAM_CAPABILITIES caps, STREAM_CAPABILITIES mask) { + STREAM_CAPABILITIES common = (STREAM_CAPABILITIES)(caps & mask); + return (common & (common - 1)) != 0 && common != 0; +} + +struct sender_state; +struct receiver_state; +struct rrdhost; + +STREAM_CAPABILITIES stream_capabilities_parse_one(const char *str); + +void stream_capabilities_to_string(BUFFER *wb, STREAM_CAPABILITIES caps); +void stream_capabilities_to_json_array(BUFFER *wb, STREAM_CAPABILITIES caps, const char *key); +void log_receiver_capabilities(struct receiver_state *rpt); +void log_sender_capabilities(struct sender_state *s); +STREAM_CAPABILITIES convert_stream_version_to_capabilities(int32_t version, struct rrdhost *host, bool sender); +int32_t stream_capabilities_to_vn(uint32_t caps); +STREAM_CAPABILITIES stream_our_capabilities(struct rrdhost *host, bool sender); + +void check_local_streaming_capabilities(void); + +#endif //NETDATA_STREAM_CAPABILITIES_H diff --git a/src/streaming/stream_path.c b/src/streaming/stream_path.c new file mode 100644 index 00000000000000..5173e67231b4b7 --- /dev/null +++ b/src/streaming/stream_path.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "stream_path.h" +#include "rrdpush.h" +#include "collectors/plugins.d/pluginsd_internals.h" + +ENUM_STR_MAP_DEFINE(STREAM_PATH_FLAGS) = { + { .id = STREAM_PATH_FLAG_ACLK, .name = "aclk" }, + + // terminator + { . id = 0, .name = NULL } +}; + +BITMAP_STR_DEFINE_FUNCTIONS(STREAM_PATH_FLAGS, STREAM_PATH_FLAG_NONE, ""); + +static void stream_path_clear(STREAM_PATH *p) { + string_freez(p->hostname); + p->hostname = NULL; + p->host_id = UUID_ZERO; + p->node_id = UUID_ZERO; + p->claim_id = UUID_ZERO; + p->hops = 0; + p->since = 0; + p->first_time_t = 0; + p->capabilities = 0; + p->flags = STREAM_PATH_FLAG_NONE; +} + +static void rrdhost_stream_path_clear_unsafe(RRDHOST *host, bool destroy) { + for(size_t i = 0; i < host->rrdpush.path.used ; i++) + stream_path_clear(&host->rrdpush.path.array[i]); + + host->rrdpush.path.used = 0; + + if(destroy) { + freez(host->rrdpush.path.array); + host->rrdpush.path.array = NULL; + host->rrdpush.path.size = 0; + } +} + +void rrdhost_stream_path_clear(RRDHOST *host, bool destroy) { + spinlock_lock(&host->rrdpush.path.spinlock); + rrdhost_stream_path_clear_unsafe(host, destroy); + spinlock_unlock(&host->rrdpush.path.spinlock); +} + +static void stream_path_to_json_object(BUFFER *wb, STREAM_PATH *p) { + buffer_json_add_array_item_object(wb); + buffer_json_member_add_string(wb, "hostname", string2str(p->hostname)); + buffer_json_member_add_uuid(wb, "host_id", p->host_id.uuid); + buffer_json_member_add_uuid(wb, "node_id", p->node_id.uuid); + buffer_json_member_add_uuid(wb, "claim_id", p->claim_id.uuid); + buffer_json_member_add_int64(wb, "hops", p->hops); + buffer_json_member_add_uint64(wb, "since", p->since); + buffer_json_member_add_uint64(wb, "first_time_t", p->first_time_t); + stream_capabilities_to_json_array(wb, p->capabilities, "capabilities"); + STREAM_PATH_FLAGS_2json(wb, "flags", p->flags); + buffer_json_object_close(wb); +} + +static STREAM_PATH rrdhost_stream_path_self(RRDHOST *host) { + STREAM_PATH p = { 0 }; + + bool is_localhost = host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST); + + p.hostname = string_dup(localhost->hostname); + p.host_id = localhost->host_id; + p.node_id = localhost->node_id; + p.claim_id = claim_id_get_uuid(); + + p.flags = STREAM_PATH_FLAG_NONE; + if(!UUIDiszero(p.claim_id)) + p.flags |= STREAM_PATH_FLAG_ACLK; + + bool has_receiver = false; + spinlock_lock(&host->receiver_lock); + if(host->receiver) { + has_receiver = true; + p.hops = (int16_t)host->receiver->hops; + p.since = host->receiver->connected_since_s; + } + spinlock_unlock(&host->receiver_lock); + + if(!has_receiver) { + p.hops = (is_localhost) ? 0 : -1; // -1 for stale nodes + p.since = netdata_start_time; + } + + // the following may get the receiver lock again! + p.capabilities = stream_our_capabilities(host, true); + + rrdhost_retention(host, 0, false, &p.first_time_t, NULL); + + return p; +} + +void rrdhost_stream_path_to_json(BUFFER *wb, struct rrdhost *host, const char *key, bool add_version) { + if(add_version) + buffer_json_member_add_uint64(wb, "version", 1); + + spinlock_lock(&host->rrdpush.path.spinlock); + buffer_json_member_add_array(wb, key); + { + { + STREAM_PATH tmp = rrdhost_stream_path_self(host); + + bool found_self = false; + for (size_t i = 0; i < host->rrdpush.path.used; i++) { + STREAM_PATH *p = &host->rrdpush.path.array[i]; + if(UUIDeq(localhost->host_id, p->host_id)) { + // this is us, use the current data + p = &tmp; + found_self = true; + } + stream_path_to_json_object(wb, p); + } + + if(!found_self) { + // we didn't find ourselves in the list. + // append us. + stream_path_to_json_object(wb, &tmp); + } + + stream_path_clear(&tmp); + } + } + buffer_json_array_close(wb); // key + spinlock_unlock(&host->rrdpush.path.spinlock); +} + +static BUFFER *stream_path_payload(RRDHOST *host) { + BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + rrdhost_stream_path_to_json(wb, host, STREAM_PATH_JSON_MEMBER, true); + buffer_json_finalize(wb); + return wb; +} + +void stream_path_send_to_parent(RRDHOST *host) { + struct sender_state *s = host->sender; + if(!s || !stream_has_capability(s, STREAM_CAP_PATHS)) return; + + CLEAN_BUFFER *payload = stream_path_payload(host); + + BUFFER *wb = sender_start(s); + buffer_sprintf(wb, PLUGINSD_KEYWORD_JSON " " PLUGINSD_KEYWORD_STREAM_PATH "\n%s\n" PLUGINSD_KEYWORD_JSON_END "\n", buffer_tostring(payload)); + sender_commit(s, wb, STREAM_TRAFFIC_TYPE_METADATA); +} + +void stream_path_send_to_child(RRDHOST *host) { + if(host == localhost) + return; + + CLEAN_BUFFER *payload = stream_path_payload(host); + + spinlock_lock(&host->receiver_lock); + if(host->receiver && stream_has_capability(host->receiver, STREAM_CAP_PATHS)) { + + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_sprintf(wb, PLUGINSD_KEYWORD_JSON " " PLUGINSD_KEYWORD_STREAM_PATH "\n%s\n" PLUGINSD_KEYWORD_JSON_END "\n", buffer_tostring(payload)); + send_to_plugin(buffer_tostring(wb), __atomic_load_n(&host->receiver->parser, __ATOMIC_RELAXED)); + } + spinlock_unlock(&host->receiver_lock); +} + +void stream_path_child_disconnected(RRDHOST *host) { + rrdhost_stream_path_clear(host, true); +} + +void stream_path_parent_disconnected(RRDHOST *host) { + spinlock_lock(&host->rrdpush.path.spinlock); + + size_t cleared = 0; + size_t used = host->rrdpush.path.used; + for (size_t i = 0; i < used; i++) { + STREAM_PATH *p = &host->rrdpush.path.array[i]; + if(UUIDeq(localhost->host_id, p->host_id)) { + host->rrdpush.path.used = i + 1; + + for(size_t j = i + 1; j < used ;j++) { + stream_path_clear(&host->rrdpush.path.array[j]); + cleared++; + } + + break; + } + } + + spinlock_unlock(&host->rrdpush.path.spinlock); + + if(cleared) + stream_path_send_to_child(host); +} + +void stream_path_retention_updated(RRDHOST *host) { + if(!host || !localhost) return; + stream_path_send_to_parent(host); + stream_path_send_to_child(host); +} + +void stream_path_node_id_updated(RRDHOST *host) { + if(!host || !localhost) return; + stream_path_send_to_parent(host); + stream_path_send_to_child(host); +} + +// -------------------------------------------------------------------------------------------------------------------- + + +static bool parse_single_path(json_object *jobj, const char *path, STREAM_PATH *p, BUFFER *error) { + JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "hostname", p->hostname, error, true); + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "host_id", p->host_id.uuid, error, true); + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "node_id", p->node_id.uuid, error, true); + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "claim_id", p->claim_id.uuid, error, true); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "hops", p->hops, error, true); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "since", p->since, error, true); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "first_time_t", p->first_time_t, error, true); + JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "flags", STREAM_PATH_FLAGS_2id_one, p->flags, error, true); + JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "capabilities", stream_capabilities_parse_one, p->capabilities, error, true); + + if(!p->hostname) { + buffer_strcat(error, "hostname cannot be empty"); + return false; + } + + if(UUIDiszero(p->host_id)) { + buffer_strcat(error, "host_id cannot be zero"); + return false; + } + + if(p->hops < 0) { + buffer_strcat(error, "hops cannot be negative"); + return false; + } + + if(p->capabilities == STREAM_CAP_NONE) { + buffer_strcat(error, "capabilities cannot be empty"); + return false; + } + + if(p->since <= 0) { + buffer_strcat(error, "since cannot be <= 0"); + return false; + } + + return true; +} + +static XXH128_hash_t stream_path_hash_unsafe(RRDHOST *host) { + if(!host->rrdpush.path.used) + return (XXH128_hash_t){ 0 }; + + return XXH3_128bits(host->rrdpush.path.array, sizeof(*host->rrdpush.path.array) * host->rrdpush.path.used); +} + +static int compare_by_hops(const void *a, const void *b) { + const STREAM_PATH *path1 = a; + const STREAM_PATH *path2 = b; + + if (path1->hops < path2->hops) + return -1; + else if (path1->hops > path2->hops) + return 1; + + return 0; +} + +bool stream_path_set_from_json(RRDHOST *host, const char *json, bool from_parent) { + if(!json || !*json) + return false; + + CLEAN_JSON_OBJECT *jobj = json_tokener_parse(json); + if(!jobj) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM PATH: Cannot parse json: %s", json); + return false; + } + + spinlock_lock(&host->rrdpush.path.spinlock); + XXH128_hash_t old_hash = stream_path_hash_unsafe(host); + rrdhost_stream_path_clear_unsafe(host, true); + + CLEAN_BUFFER *error = buffer_create(0, NULL); + + json_object *_jarray; + if (json_object_object_get_ex(jobj, STREAM_PATH_JSON_MEMBER, &_jarray) && + json_object_is_type(_jarray, json_type_array)) { + size_t items = json_object_array_length(_jarray); + host->rrdpush.path.array = callocz(items, sizeof(*host->rrdpush.path.array)); + host->rrdpush.path.size = items; + + for (size_t i = 0; i < items; ++i) { + json_object *joption = json_object_array_get_idx(_jarray, i); + if (!json_object_is_type(joption, json_type_object)) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM PATH: Array item No %zu is not an object: %s", i, json); + continue; + } + + if(!parse_single_path(joption, "", &host->rrdpush.path.array[host->rrdpush.path.used], error)) { + stream_path_clear(&host->rrdpush.path.array[host->rrdpush.path.used]); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM PATH: Array item No %zu cannot be parsed: %s: %s", i, buffer_tostring(error), json); + } + else + host->rrdpush.path.used++; + } + } + + if(host->rrdpush.path.used > 1) { + // sorting is required in order to support stream_path_parent_disconnected() + qsort(host->rrdpush.path.array, host->rrdpush.path.used, + sizeof(*host->rrdpush.path.array), compare_by_hops); + } + + XXH128_hash_t new_hash = stream_path_hash_unsafe(host); + spinlock_unlock(&host->rrdpush.path.spinlock); + + if(!XXH128_isEqual(old_hash, new_hash)) { + if(!from_parent) + stream_path_send_to_parent(host); + + // when it comes from the child, we still need to send it back to the child + // including our own entry in it. + stream_path_send_to_child(host); + } + + return host->rrdpush.path.used > 0; +} diff --git a/src/streaming/stream_path.h b/src/streaming/stream_path.h new file mode 100644 index 00000000000000..e9d741580a0dcb --- /dev/null +++ b/src/streaming/stream_path.h @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STREAM_PATH_H +#define NETDATA_STREAM_PATH_H + +#include "stream_capabilities.h" + +#define STREAM_PATH_JSON_MEMBER "streaming_path" + +typedef enum __attribute__((packed)) { + STREAM_PATH_FLAG_NONE = 0, + STREAM_PATH_FLAG_ACLK = (1 << 0), +} STREAM_PATH_FLAGS; + +typedef struct stream_path { + STRING *hostname; // the hostname of the agent + ND_UUID host_id; // the machine guid of the agent + ND_UUID node_id; // the cloud node id of the agent + ND_UUID claim_id; // the cloud claim id of the agent + time_t since; // the timestamp of the last update + time_t first_time_t; // the oldest timestamp in the db + int16_t hops; // -1 = stale node, 0 = localhost, >0 the hops count + STREAM_PATH_FLAGS flags; // ACLK or NONE for the moment + STREAM_CAPABILITIES capabilities; // streaming connection capabilities +} STREAM_PATH; + +typedef struct rrdhost_stream_path { + SPINLOCK spinlock; + uint16_t size; + uint16_t used; + STREAM_PATH *array; +} RRDHOST_STREAM_PATH; + + +struct rrdhost; + +void stream_path_send_to_parent(struct rrdhost *host); +void stream_path_send_to_child(struct rrdhost *host); + +void rrdhost_stream_path_to_json(BUFFER *wb, struct rrdhost *host, const char *key, bool add_version); +void rrdhost_stream_path_clear(struct rrdhost *host, bool destroy); + +void stream_path_retention_updated(struct rrdhost *host); +void stream_path_node_id_updated(struct rrdhost *host); + +void stream_path_child_disconnected(struct rrdhost *host); +void stream_path_parent_disconnected(struct rrdhost *host); + +bool stream_path_set_from_json(struct rrdhost *host, const char *json, bool from_parent); + +#endif //NETDATA_STREAM_PATH_H diff --git a/src/web/api/http_auth.c b/src/web/api/http_auth.c index 9e45f5e166c694..5c4fffcaf7c61e 100644 --- a/src/web/api/http_auth.c +++ b/src/web/api/http_auth.c @@ -83,7 +83,7 @@ static uint64_t bearer_token_signature(nd_uuid_t token, struct bearer_token *bt) .created_s = bt->created_s, .expires_s = bt->expires_s, }; - uuid_copy(signature_payload.host_uuid, localhost->host_uuid); + uuid_copy(signature_payload.host_uuid, localhost->host_id.uuid); uuid_copy(signature_payload.token, token); uuid_copy(signature_payload.cloud_account_id, bt->cloud_account_id); memset(signature_payload.client_name, 0, sizeof(signature_payload.client_name)); @@ -96,7 +96,7 @@ static bool bearer_token_save_to_file(nd_uuid_t token, struct bearer_token *bt) CLEAN_BUFFER *wb = buffer_create(0, NULL); buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); buffer_json_member_add_uint64(wb, "version", 1); - buffer_json_member_add_uuid(wb, "host_uuid", localhost->host_uuid); + buffer_json_member_add_uuid(wb, "host_uuid", localhost->host_id.uuid); buffer_json_member_add_uuid(wb, "token", token); buffer_json_member_add_uuid(wb, "cloud_account_id", bt->cloud_account_id); buffer_json_member_add_string(wb, "client_name", bt->client_name); @@ -207,7 +207,7 @@ static bool bearer_token_parse_json(nd_uuid_t token, struct json_object *jobj, B return false; } - if(uuid_compare(host_uuid, localhost->host_uuid) != 0) { + if(uuid_compare(host_uuid, localhost->host_id.uuid) != 0) { buffer_flush(error); buffer_strcat(error, "Host UUID in JSON file does not match our host UUID"); return false; diff --git a/src/web/api/v1/api_v1_info.c b/src/web/api/v1/api_v1_info.c index b14d51190432c6..58a4a76f2cc2e0 100644 --- a/src/web/api/v1/api_v1_info.c +++ b/src/web/api/v1/api_v1_info.c @@ -42,7 +42,7 @@ static inline void web_client_api_request_v1_info_mirrored_hosts_status(BUFFER * buffer_json_member_add_boolean(wb, "reachable", (host == localhost || !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN))); buffer_json_member_add_string(wb, "guid", host->machine_guid); - buffer_json_member_add_uuid(wb, "node_id", host->node_id); + buffer_json_member_add_uuid(wb, "node_id", host->node_id.uuid); CLAIM_ID claim_id = rrdhost_claim_id_get(host); buffer_json_member_add_string(wb, "claim_id", claim_id_is_set(claim_id) ? claim_id.str : NULL); diff --git a/src/web/api/v2/api_v2_bearer.c b/src/web/api/v2/api_v2_bearer.c index 26cb3c93523eaf..b195abd404d35d 100644 --- a/src/web/api/v2/api_v2_bearer.c +++ b/src/web/api/v2/api_v2_bearer.c @@ -9,11 +9,11 @@ static bool verify_host_uuids(RRDHOST *host, const char *machine_guid, const cha if(strcmp(machine_guid, host->machine_guid) != 0) return false; - if(uuid_is_null(host->node_id)) + if(UUIDiszero(host->node_id)) return false; char buf[UUID_STR_LEN]; - uuid_unparse_lower(host->node_id, buf); + uuid_unparse_lower(host->node_id.uuid, buf); return strcmp(node_id, buf) == 0; } diff --git a/src/web/server/h2o/http_server.c b/src/web/server/h2o/http_server.c index 2bc6a6c1082414..dc83f40fcce684 100644 --- a/src/web/server/h2o/http_server.c +++ b/src/web/server/h2o/http_server.c @@ -290,7 +290,7 @@ static int netdata_uberhandler(h2o_handler_t *self, h2o_req_t *req) char host_uuid_str[UUID_STR_LEN]; if (host != NULL) - uuid_unparse_lower(host->host_uuid, host_uuid_str); + uuid_unparse_lower(host->host_id.uuid, host_uuid_str); nd_log(NDLS_ACCESS, NDLP_DEBUG, "HTTPD OK method: " PRINTF_H2O_IOVEC_FMT ", path: " PRINTF_H2O_IOVEC_FMT diff --git a/src/web/server/h2o/streaming.h b/src/web/server/h2o/streaming.h index dfc7b68fc8d03e..a30f4a8e223b51 100644 --- a/src/web/server/h2o/streaming.h +++ b/src/web/server/h2o/streaming.h @@ -3,8 +3,6 @@ #ifndef HTTPD_STREAMING_H #define HTTPD_STREAMING_H -#include "aclk/mqtt_websockets/c-rbuf/cringbuffer.h" - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-but-set-variable" diff --git a/src/web/server/web_client.c b/src/web/server/web_client.c index 8e5355eca4d21a..803bfe2d5f11c3 100644 --- a/src/web/server/web_client.c +++ b/src/web/server/web_client.c @@ -705,11 +705,12 @@ HTTP_VALIDATION http_request_validate(struct web_client *w) { if(last_pos > 4) last_pos -= 4; // allow searching for \r\n\r\n else last_pos = 0; - if(w->header_parse_last_size < last_pos) + if(w->header_parse_last_size <= last_pos) last_pos = 0; - is_it_valid = - url_is_request_complete_and_extract_payload(s, &s[last_pos], w->header_parse_last_size, &w->payload); + is_it_valid = url_is_request_complete_and_extract_payload(s, &s[last_pos], + w->header_parse_last_size, &w->payload); + if(!is_it_valid) { if(w->header_parse_tries > HTTP_REQ_MAX_HEADER_FETCH_TRIES) { netdata_log_info("Disabling slow client after %zu attempts to read the request (%zu bytes received)", w->header_parse_tries, buffer_strlen(w->response.data)); @@ -1824,11 +1825,12 @@ ssize_t web_client_receive(struct web_client *w) return web_client_read_file(w); ssize_t bytes; - ssize_t left = (ssize_t)(w->response.data->size - w->response.data->len); // do we have any space for more data? buffer_need_bytes(w->response.data, NETDATA_WEB_REQUEST_INITIAL_SIZE); + ssize_t left = (ssize_t)(w->response.data->size - w->response.data->len); + errno_clear(); if ( (web_client_check_conn_tcp(w)) && (netdata_ssl_web_server_ctx) ) {