diff --git a/.gitignore b/.gitignore index 91c020ab..1f2f2f81 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.swp exporter/postgres/queries.yml +hugo/.hugo_build.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 100a1ecf..d657110f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_install_hook_types: - pre-commit repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.6.0 hooks: - id: check-merge-conflict - id: check-symlinks @@ -15,25 +15,11 @@ repos: - id: trailing-whitespace args: ['--markdown-linebreak-ext=md'] - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook - rev: v9.0.0 + rev: v9.16.0 hooks: - id: commitlint stages: [commit-msg] additional_dependencies: ['@commitlint/config-conventional'] -- repo: https://github.com/ansible-community/ansible-lint.git - rev: v5.4.0 - # rev: v5.4.0 will not change until we drop ansible 2.9 - hooks: - - id: ansible-lint - always_run: true - pass_filenames: false - # do not add file filters here as ansible-lint does not give reliable - # results when called with individual files. - # https://github.com/ansible/ansible-lint/issues/611 - verbose: true - entry: ansible-lint --force-color -p -v . - additional_dependencies: - - 'ansible<2.10' - repo: https://github.com/codespell-project/codespell rev: v2.1.0 hooks: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b71174a8..69c57ed7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,41 @@ pgMonitor Release Notes .. contents:: Topics +v5.0.0 +====== + +Release Summary +--------------- + +Crunchy Data is pleased to announce the availability of pgMonitor 5.0.0. This release brings support for a new Prometheus exporter for PostgreSQL - sql_exporter. It also supports a new monitoring extension to make metric collection easier and more performant. This changelog contains all changes that have been added since the 4.11.0 release. + +Major Changes +------------- + +- grafana - Add new dashboards for sql_exporter support. New PostgreSQL Overview and PgBouncer direct metrics dashboards +- grafana - New Grafana minimum version is now 10.4. All dashboards have been updated to fix AngularJS deprecation warnings and re-exported from 10.4. +- grafana - Organize packages to allow better choice of available Grafana dashboards +- grafana - Remove top level general Overview dashboard +- pgmonitor-extension - Add more extensive support for materialized views and refreshed tables for expensive or custom metric queries +- pgmonitor-extension - Add support for using the PostgreSQL pgmonitor-extension to aid in metrics collection with sql_exporter +- postgres_exporter - Note that postgres_exporter is still supported but will be deprecated in a future version +- sql_exporter - Add support for directly connecting to PgBouncer to collect metrics +- sql_exporter - Add support for new PostgreSQL metrics collecting exporter (sql_exporter) + +Minor Changes +------------- + +- prometheus - Added OOMKiller Alert using node_exporter metrics + +Bugfixes +-------- + +- docs - add reference links to upstream configuration docs +- exporter - fix the pgbackrest-info.sh to force the necessary console output level that it expects +- grafana - fix some queries that were searching on the wrong label (datname vs. dbname) +- sql_exporter - add new metric for n_tup_newpage_upd +- sql_exporter - use the new views from pgmonitor-extension instead of full queries + v4.11.0 ======= diff --git a/blackbox_exporter/common/blackbox_exporter.sysconfig b/blackbox_exporter/common/blackbox_exporter.sysconfig index 4519a88c..faccbd71 100644 --- a/blackbox_exporter/common/blackbox_exporter.sysconfig +++ b/blackbox_exporter/common/blackbox_exporter.sysconfig @@ -7,4 +7,3 @@ # This file must be in a pathname that matches the EnvironmentFile entry in the service file (Default: /etc/sysconfig/blackbox_exporter) # OPT="--config.file=/etc/blackbox_exporter/crunchy-blackbox.yml" - diff --git a/build/packages.yml b/build/packages.yml index 1f3c3d7d..9f98b622 100644 --- a/build/packages.yml +++ b/build/packages.yml @@ -108,7 +108,7 @@ packages: - sql-exporter: files: - - { source: "https://github.com/CrunchyData/pgmonitor/sql_exporter/linux/crunchy-sql-exporter@.service" target: "/usr/lib/systemd/system/crunchy-sql-exporter@.service", mode: "0645", type: "file", owner: "root", group: "root" } + - { source: "https://github.com/CrunchyData/pgmonitor/sql_exporter/linux/crunchy-sql-exporter@.service", target: "/usr/lib/systemd/system/crunchy-sql-exporter@.service", mode: "0645", type: "file", owner: "root", group: "root" } - { target: "/usr/bin/sql_exporter", mode: "0755", type: "file", owner: "root", group: "root" } - { target: "/etc/sql_exporter", type: "folder", mode: "0755", owner: "root", group: "root" } upstream_repo: "https://github.com/burningalchemist/sql_exporter" @@ -133,7 +133,7 @@ packages: pkg_conflict: - { pkg_name: "pgmonitor-pg-common" } pkg_dependency: - - { pkg_name: "sql-exporter", gte: 0.13.0, lt: 0.15.0 } + - { pkg_name: "sql-exporter", gte: 0.13.0, lt: 0.16.0 } - grafana: @@ -160,9 +160,9 @@ packages: files: - { source: "grafana/linux/Filesystem_Details.json", target: "/etc/grafana/crunchy_dashboards/", mode: "0640", type: "file", rpm_new: false, owner: "grafana", group: "grafana" } - { source: "grafana/linux/Network_Details.json", target: "/etc/grafana/crunchy_dashboards/", mode: "0640", type: "file", rpm_new: false, owner: "grafana", group: "grafana" } - - { source: "grafana/linux/OS_Details.json", target: "/etc/grafana/crunchy_dashboards/", mode: "0640", type: "file", rpm_new: false, owner: "grafana", group: "grafana" } - - { source: "grafana/linux/OS_Overview.json", target: "/etc/grafana/crunchy_dashboards/", mode: "0640", type: "file", rpm_new: false, owner: "grafana", group: "grafana" } - pkg_dependency: + - { source: "grafana/linux/OS_Details.json", target: "/etc/grafana/crunchy_dashboards/", mode: "0640", type: "file", rpm_new: false, owner: "grafana", group: "grafana" } + - { source: "grafana/linux/OS_Overview.json", target: "/etc/grafana/crunchy_dashboards/", mode: "0640", type: "file", rpm_new: false, owner: "grafana", group: "grafana" } + pkg_dependency: - { pkg_name: "pgmonitor-grafana-extras-common" } upstream_repo: "https://github.com/CrunchyData/pgmonitor" diff --git a/changelogs/changelog.yaml b/changelogs/changelog.yaml index 007db342..32651c3e 100644 --- a/changelogs/changelog.yaml +++ b/changelogs/changelog.yaml @@ -109,3 +109,49 @@ releases: - 355.yml - release_summary.yml release_date: '2023-07-25' + 5.0.0: + changes: + bugfixes: + - docs - add reference links to upstream configuration docs + - exporter - fix the pgbackrest-info.sh to force the necessary console output + level that it expects + - grafana - fix some queries that were searching on the wrong label (datname + vs. dbname) + - sql_exporter - add new metric for n_tup_newpage_upd + - sql_exporter - use the new views from pgmonitor-extension instead of full + queries + major_changes: + - grafana - Add new dashboards for sql_exporter support. New PostgreSQL Overview + and PgBouncer direct metrics dashboards + - grafana - New Grafana minimum version is now 10.4. All dashboards have been + updated to fix AngularJS deprecation warnings and re-exported from 10.4. + - grafana - Organize packages to allow better choice of available Grafana dashboards + - grafana - Remove top level general Overview dashboard + - pgmonitor-extension - Add more extensive support for materialized views and + refreshed tables for expensive or custom metric queries + - pgmonitor-extension - Add support for using the PostgreSQL pgmonitor-extension + to aid in metrics collection with sql_exporter + - postgres_exporter - Note that postgres_exporter is still supported but will + be deprecated in a future version + - sql_exporter - Add support for directly connecting to PgBouncer to collect + metrics + - sql_exporter - Add support for new PostgreSQL metrics collecting exporter + (sql_exporter) + minor_changes: + - prometheus - Added OOMKiller Alert using node_exporter metrics + release_summary: Crunchy Data is pleased to announce the availability of pgMonitor + 5.0.0. This release brings support for a new Prometheus exporter for PostgreSQL + - sql_exporter. It also supports a new monitoring extension to make metric + collection easier and more performant. This changelog contains all changes + that have been added since the 4.11.0 release. + fragments: + - 361.yml + - 387.yml + - 397.yml + - 399.yml + - 401.yml + - 407.yml + - 411.yml + - 412.yml + - release_summary.yml + release_date: '2024-07-09' diff --git a/changelogs/fragments/361.yml b/changelogs/fragments/361.yml deleted file mode 100644 index e005e427..00000000 --- a/changelogs/fragments/361.yml +++ /dev/null @@ -1,10 +0,0 @@ -major_changes: - - sql_exporter - Add support for new PostgreSQL metrics collecting exporter: sql_exporter - - sql_exporter - Add support for directly connecting to PgBouncer to collect metrics - - pgmonitor-extension - Add support for using the PostgreSQL pgmonitor-extension to aid in metrics collection with sql_exporter - - pgmonitor-extension - Add more extensive support for materialized views and refreshed tables for expensive or custom metric queries - - postgres_exporter - Note that postgres_exporter is still supported but will be deprecated in a future version - - grafana - New Grafana minimum version is now 10.4. All dashboards have been updated to fix AngularJS deprecation warnings and re-exported from 10.4. - - grafana - Add new dashboards for sql_exporter support. New PostgreSQL Overview and PgBouncer direct metrics dashboards - - grafana - Organize packages to allow better choice of available Grafana dashboards - - grafana - Remove top level general Overview dashboard diff --git a/changelogs/fragments/387.yml b/changelogs/fragments/387.yml deleted file mode 100644 index eb23a550..00000000 --- a/changelogs/fragments/387.yml +++ /dev/null @@ -1,2 +0,0 @@ -minor_changes: - - prometheus - Added OOMKiller Alert using node_exporter metrics diff --git a/changelogs/fragments/397.yml b/changelogs/fragments/397.yml deleted file mode 100644 index a4052538..00000000 --- a/changelogs/fragments/397.yml +++ /dev/null @@ -1,2 +0,0 @@ -trivial: - - docs - Update OS and PgBouncer version support diff --git a/changelogs/fragments/399.yml b/changelogs/fragments/399.yml deleted file mode 100644 index 14d550ef..00000000 --- a/changelogs/fragments/399.yml +++ /dev/null @@ -1,6 +0,0 @@ -trivial: - - packaging - Update the build file to use the alertmanager storage folder that the packages have been setting up (/var/lib/ccp_monitoring/alertmanager) - - packaging - Update the build file to set blackbox-exporter-extras group to ccp_monitoring for consistency with other extras packages - - packaging - Take out remove_files lines since the files referenced have been gone for a long time now and packages aren't doing that anymore - - packaging - Take out package conflicts with containers packages. There are no containers packages being done at this time - diff --git a/changelogs/fragments/401.yml b/changelogs/fragments/401.yml deleted file mode 100644 index ad6c5542..00000000 --- a/changelogs/fragments/401.yml +++ /dev/null @@ -1,2 +0,0 @@ -bugfixes: - - exporter - fix the pgbackrest-info.sh to force the necessary console output level that it expects diff --git a/grafana/common/crunchy_grafana_datasource.yml b/grafana/common/crunchy_grafana_datasource.yml index 13c35436..4221f572 100644 --- a/grafana/common/crunchy_grafana_datasource.yml +++ b/grafana/common/crunchy_grafana_datasource.yml @@ -17,4 +17,3 @@ datasources: editable: False orgId: 1 version: 1 - diff --git a/grafana/containers/postgresql_details.json b/grafana/containers/postgresql_details.json index 548b973b..bb81b85b 100644 --- a/grafana/containers/postgresql_details.json +++ b/grafana/containers/postgresql_details.json @@ -701,7 +701,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(ccp_stat_database_xact_commit{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m])) + sum(irate(ccp_stat_database_xact_rollback{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_xact_commit{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m])) + sum(irate(ccp_stat_database_xact_rollback{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "interval": "", @@ -712,7 +712,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_pg_stat_statements_total_calls_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_pg_stat_statements_total_calls_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "interval": "", @@ -1172,7 +1172,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(ccp_stat_database_tup_fetched{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_tup_fetched{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "interval": "", @@ -1183,7 +1183,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_stat_database_tup_inserted{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_tup_inserted{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1193,7 +1193,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_stat_database_tup_updated{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_tup_updated{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1203,7 +1203,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_stat_database_tup_deleted{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_tup_deleted{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Deleted", @@ -1212,7 +1212,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_stat_database_tup_returned{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_tup_returned{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -1435,7 +1435,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(ccp_stat_database_deadlocks{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(rate(ccp_stat_database_deadlocks{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1445,7 +1445,7 @@ "step": 2 }, { - "expr": "sum(rate(ccp_stat_database_conflicts{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(rate(ccp_stat_database_conflicts{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "DeadLocks", @@ -1454,7 +1454,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_stat_database_xact_commit{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_xact_commit{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "interval": "", @@ -1465,7 +1465,7 @@ "step": 2 }, { - "expr": "sum(irate(ccp_stat_database_xact_rollback{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "expr": "sum(irate(ccp_stat_database_xact_rollback{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}[5m]))", "format": "time_series", "hide": false, "interval": "", @@ -1831,7 +1831,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"accessexclusivelock\"})", + "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\",mode=\"accessexclusivelock\"})", "format": "time_series", "hide": false, "interval": "", @@ -1841,7 +1841,7 @@ "step": 2 }, { - "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"exclusivelock\"})", + "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\",mode=\"exclusivelock\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1850,7 +1850,7 @@ "step": 2 }, { - "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"rowexclusivelock\"})", + "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\",mode=\"rowexclusivelock\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1859,7 +1859,7 @@ "step": 2 }, { - "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"sharerowexclusivelock\"})", + "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\",mode=\"sharerowexclusivelock\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1868,7 +1868,7 @@ "step": 2 }, { - "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"shareupdateexclusivelock\"})", + "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\",mode=\"shareupdateexclusivelock\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1877,7 +1877,7 @@ "step": 2 }, { - "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"accesssharelock\"})", + "expr": "sum by (mode) (ccp_locks_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\",mode=\"accesssharelock\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{mode}}", diff --git a/hugo/archetypes/default.md b/hugo/archetypes/default.md index 00e77bd7..26f317f3 100644 --- a/hugo/archetypes/default.md +++ b/hugo/archetypes/default.md @@ -3,4 +3,3 @@ title: "{{ replace .Name "-" " " | title }}" date: {{ .Date }} draft: true --- - diff --git a/hugo/content/_index.md b/hugo/content/_index.md index 8e2b9d18..68606910 100644 --- a/hugo/content/_index.md +++ b/hugo/content/_index.md @@ -39,11 +39,12 @@ Running pgMonitor will give you confidence in understanding how well your Postgr pgMonitor is an open-source monitoring solution for PostgreSQL and the systems that it runs on. pgMonitor came from the need to provide a way to easily create a visual environment to monitor all the metrics a database administrator needs to proactively ensure the health of the system. -pgMonitor combines multiple open-source software packages and necessary configuration to create a robust PostgreSQL monitoring environment. These include: +pgMonitor combines multiple open-source software services to create a robust PostgreSQL monitoring environment. These include: - [Prometheus](https://prometheus.io/) - an open-source metrics collector that is highly customizable. - [Grafana](https://grafana.com/) - an open-source data visualizer that allows you to generate many different kinds of charts and graphs. -- [PostgreSQL Exporter](https://github.com/wrouesnel/postgres_exporter) - an open-source data export to Prometheus that supports collecting metrics from any PostgreSQL server version 9.1 and above. +- [SQL Exporter](https://github.com/burningalchemist/sql_exporter) - an open-source exporter for Prometheus that supports collecting metrics from multiple database systems including PostgreSQL. +- [pgMonitor extension](https://github.com/CrunchyData/pgmonitor-extension) - a PostgreSQL extension that provides a means to collect metrics that can be used by an external collection source. ![pgMonitor](/images/crunchy-monitoring-arch.png) @@ -51,8 +52,7 @@ pgMonitor combines multiple open-source software packages and necessary configur ### Operating Systems -- RHEL 7/8/9 (Build/Run Testing, Setup Instructions) -- CentOS 7 (Build/Run Testing, Setup Instructions) +- RHEL 8/9 (Build/Run Testing, Setup Instructions) - Ubuntu 20/22 (Build/Run Testing) ### PostgreSQL @@ -63,14 +63,10 @@ pgMonitor combines multiple open-source software packages and necessary configur - Version 3.5 and greater is supported for the Grafana dashboard -#### Known issues - -- PostgreSQL 10+ SCRAM-SHA-256 encrypted passwords are supported on the Linux version of pgMonitor 4.0 or later only. - ### PgBouncer -- PgBouncer 1.22 -- pgbouncer_fdw 1.1.0 +- PgBouncer 1.21+ +- pgbouncer_fdw 1.1.0 (optional with sql_exporter) ## Installation @@ -82,6 +78,8 @@ Installation instructions for each package are provided in that packages subfold ### 3. [Grafana](/grafana) +Notes on upgrading can be found in each relevant section. + ## Version History For the [full history](/changelog) of pgMonitor, please see the [CHANGELOG](/changelog). @@ -99,4 +97,3 @@ Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. CRUNCHY DATA SOLUTIONS, INC. PROVIDES THIS GUIDE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF NON INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Crunchy, Crunchy Data Solutions, Inc. and the Crunchy Hippo Logo are trademarks of Crunchy Data Solutions, Inc. All Rights Reserved. - diff --git a/hugo/content/changelog/_index.md b/hugo/content/changelog/_index.md index 1daea842..9369afb1 100644 --- a/hugo/content/changelog/_index.md +++ b/hugo/content/changelog/_index.md @@ -3,6 +3,39 @@ title: "Changelog" draft: false weight: 5 --- +## 5.0.0 + +### Release Summary + +Crunchy Data is pleased to announce the availability of pgMonitor 5.0.0. This release brings support for a new Prometheus exporter for PostgreSQL - sql_exporter. It also supports a new monitoring extension to make metric collection easier and more performant. This changelog contains all changes that have been added since the 4.11.0 release. + +Please see the [5.0.0 upgrade documentation](v5_upgrade) for more information on converting to the new sql_exporter. + +### Major Changes + +- grafana - Add new dashboards for sql_exporter support. New PostgreSQL Overview and PgBouncer direct metrics dashboards +- grafana - New Grafana minimum version is now 10.4. All dashboards have been updated to fix AngularJS deprecation warnings and re-exported from 10.4. +- grafana - Organize packages to allow better choice of available Grafana dashboards +- grafana - Remove top level general Overview dashboard +- pgmonitor-extension - Add more extensive support for materialized views and refreshed tables for expensive or custom metric queries +- pgmonitor-extension - Add support for using the PostgreSQL pgmonitor-extension to aid in metrics collection with sql_exporter +- postgres_exporter - Note that postgres_exporter is still supported but will be deprecated in a future version +- sql_exporter - Add support for directly connecting to PgBouncer to collect metrics +- sql_exporter - Add support for new PostgreSQL metrics collecting exporter (sql_exporter) + +### Minor Changes + +- prometheus - Added OOMKiller Alert using node_exporter metrics + +### Bugfixes + +- docs - add reference links to upstream configuration docs +- exporter - fix the pgbackrest-info.sh to force the necessary console output level that it expects +- grafana - fix some queries that were searching on the wrong label (datname vs. dbname) +- sql_exporter - add new metric for n_tup_newpage_upd +- sql_exporter - use the new views from pgmonitor-extension instead of full queries + + ## 4.11.0 ### Release Summary diff --git a/hugo/content/changelog/v5_upgrade/_index.md b/hugo/content/changelog/v5_upgrade/_index.md new file mode 100644 index 00000000..19b02889 --- /dev/null +++ b/hugo/content/changelog/v5_upgrade/_index.md @@ -0,0 +1,80 @@ +--- +title: "Upgrading to pgMonitor v5.0.0" +draft: false +weight: 5 +--- + +Version 5 of pgMonitor introduces a new exporter that will be used for collecting PostgreSQL metrics: [https://github.com/burningalchemist/sql_exporter](sql_exporter). Converting to this new exporter will involve cleaning up the old postgres_exporter, updating Prometheus targets, and installing new Grafana dashboards. + +## Cleanup + +### postgres_exporter + +This new exporter for PostgreSQL allows for just a single exporter to connect to all databases within a PostgreSQL instance as well as connecting directly to PgBouncer to collect its metrics. +There is no longer any need for the postgres_exporter to be running, so its services can be shutdown. Some examples of those service names based on the old documentation are as follows: + +``` +sudo systemctl stop crunchy-postgres-exporter@postgres_exporter_pg## +sudo systemctl disable crunchy-postgres-exporter@postgres_exporter_pg## + +sudo systemctl stop crunchy-postgres-exporter@postgres_exporter_pg##_per_db +sudo systemctl disable crunchy-postgres-exporter@postgres_exporter_pg##_per_db +``` + +Note the values after the @ symbol may be different depending on the sysconfig files that have been created for your exporters. There may also be exporters running for multiple clusters and we would recommend replacing all of the existing postgres_exporters with the new sql_exporter. + +If you've installed pgMonitor with the packages provided by Crunchy Data, those packages can now be uninstalled as well. + +| Package Name | Description | +|--------------------------------|---------------------------------------------------------------------------| +| pgbouncer_fdw | Package for the pgbouncer_fdw extension | +| pgmonitor-pg-common | Package containing postgres_exporter items common for all versions of PostgreSQL | +| pgmonitor-pg##-extras | Crunchy-optimized configurations for postgres_exporter. Note that each major version of PostgreSQL has its own extras package (pgmonitor-pg13-extras, pgmonitor-pg14-extras, etc) | +| postgres_exporter | Base package for postgres_exporter | + +Note that the pgbouncer_fdw is no longer required to monitor PgBouncer but it can still be used with sql_exporter if desired. Per previous instructions, it was usually only installed on the global database. The extension can be removed as follows if it's not needed. +``` +DROP EXTENSION pgbouncer_fdw; +``` + +If postgres_exporter was not set up with packages, you can now manually remove all the related files. Note the ## is replaced with the major version of PG that was being monitored. It is possible that multiple versions of PG had been monitored and copies of these files could exist for all versions. Also, the sysconfig files listed below are the defaults used in examples; there may be additional postgres_exporter sysconfig files on your system(s). + +| System Location | +|-----------------| +| /etc/postgres_exporter/ | +| /usr/lib/systemd/system/crunchy-postgres-exporter@.service | +| /etc/sysconfig/postgres_exporter_pg## | +| /etc/sysconfig/postgres_exporter_pg##_per_db | +| /usr/bin/pgbackrest-info.sh | +| /etc/pgmonitor.conf | + + +### Prometheus +All postgres_exporter Prometheus targets can now be removed. The default location for Prometheus targets is `/etc/prometheus/auto.d/`, but please check your Prometheus installation for possible additional target locations. In the identified location(s), remove any targets for the postgres_exporter. The default ports for postgres_exporter were 9187 and 9188, so any targets with these ports should be examined for removal. Note that if alerting had previously been enabled, the previous step likely caused multiple alerts to fire; once this step is done, you can simply reload Prometheus to clear these targets and any related alerts should resolve themselves. + +```bash +sudo systemctl reload prometheus +``` +Any alerts related to postgres_exporter can also be removed from the files contained in the default alert files location `/etc/prometheus/alert-rules.d/`. Note the default example alert file had been named `crunchy-alert-rules-pg.yml` + +### Grafana + +Version 5.x of pgMonitor raises the minimum required version of Grafana to 10.4. It also removes dashboards related to postgres_exporter and adds new ones for sql_exporter. If you are simply using the dashboards provided by pgMonitor, the easiest method to update is to simply remove the old ones and install the new ones. + +If you are using Crunchy-provided packages, simply uninstall the old packages. It's recommended to follow the non-package removal process below as well to ensure things are cleaned up properly. + +| Package Name | Description | +|---------------------------|-------------------------------------------------------------------| +| pgmonitor-grafana-extras | Crunchy configurations for datasource & dashboard provisioning | + +If you didn't use the Crunchy-provided packages, ensure the files in the following folder are removed: + +``` +| System Location | +|-----------------| +| /etc/grafana/crunchy_dashboards | +``` + +## Set up sql_exporter + +At this point, you should just be able to follow the [standard setup instructions](https://access.crunchydata.com/documentation/pgmonitor/latest/) for the latest version of pgMonitor. This will set up the new exporter, Prometheus targets, and new Grafana dashboards. diff --git a/hugo/content/exporter/_index.md b/hugo/content/exporter/_index.md index 5b85fd5f..aa47ef3e 100644 --- a/hugo/content/exporter/_index.md +++ b/hugo/content/exporter/_index.md @@ -18,7 +18,7 @@ The Linux instructions below use RHEL, but any Linux-based system should work. [ - [System](#system) - [Legacy postgres_exporter Setup](#postgres-exporter) - + IMPORTANT NOTE: As of pgMonitor version 5.0.0, postgres_exporter has been deprecated in favor of sql_exporter. Support for postgres_exporter is still possible with 5.0, but only for bug fixes while custom queries are still supported. No new features will be added using postgres_exporter and it will be fully obsoleted in a future version of pgMonitor. We recommend migrating to sql_exporter as soon as possible. ## Installation {#installation} @@ -64,7 +64,7 @@ sudo useradd -m -d /var/lib/ccp_monitoring ccp_monitoring All executables installed via the above releases are expected to be in the {{< shell >}}/usr/bin{{< /shell >}} directory. A base node_exporter systemd file is expected to be in place already. An example one can be found here: -https://github.com/lest/prometheus-rpm/tree/master/node_exporter +https://github.com/prometheus/node_exporter/tree/master/examples/systemd A base blackbox_exporter systemd file is also expected to be in place. No examples are currently available. @@ -87,9 +87,9 @@ The following pgMonitor configuration files should be placed according to the fo ##### sql_exporter -sql_exporter takes advantage of the Crunchy Data pgmonitor-extension (https://github.com/CrunchyData/pgmonitor-extension) to provide a much easier configuration and setup. The extension takes care of creating all the necessary objects inside the database. +sql_exporter takes advantage of the Crunchy Data pgmonitor-extension (https://github.com/CrunchyData/pgmonitor-extension) to provide a much easier configuration and setup. The extension takes care of creating all the necessary objects inside the database. -The mimimum required version of pgmonitor-extension is currently 1.0.0. +The minimum required version of pgmonitor-extension is currently 1.0.0. The following pgMonitor configuration files should be placed according to the following mapping: @@ -97,12 +97,12 @@ The following pgMonitor configuration files should be placed according to the fo |------------------------------|-----------------| | sql_exporter/common/*.yml | /etc/sql_exporter/*.yml | | sql_exporter/common/*.sql | /etc/sql_exporter/*.sql | -| linux/crunchy-sql-exporter@.service | /usr/lib/systemd/system/crunchy-sql-exporter@.service | +| sql_exporter/linux/crunchy-sql-exporter@.service | /usr/lib/systemd/system/crunchy-sql-exporter@.service | | sql_exporter/linux/sql_exporter.sysconfig | /etc/sysconfig/sql_exporter | | sql_exporter/linux/crontab.txt | /etc/sysconfig/crontab.txt | | postgres_exporter/linux/pgbackrest-info.sh | /usr/bin/pgbackrest-info.sh | | postgres_exporter/linux/pgmonitor.conf | /etc/pgmonitor.conf (multi-backrest-repository/container environment only) | -| sql_exporter/common/sql_exporter.yml.example | /etc/sql_exporter/sql_exporter.yml.example | +| sql_exporter/common/sql_exporter.yml.example | /etc/sql_exporter/sql_exporter.yml | ##### blackbox_exporter @@ -117,6 +117,7 @@ The following pgMonitor configuration files should be placed according to the fo ## Upgrading {#upgrading} +* If you are upgrading to version 5.0 and transitioning to using the new sql_exporter, please see the documentation in [Upgrading to pgMonitor v5.0.0](/changelog/v5_upgrade/) * See the [CHANGELOG ](/changelog) for full details on both major & minor version upgrades. ## Setup {#setup} @@ -128,7 +129,7 @@ The following pgMonitor configuration files should be placed according to the fo The following files contain defaults that should enable the exporters to run effectively on your system for the purposes of using pgMonitor. Please take some time to review them. If you need to modify them, see the notes in the files for more details and recommendations: -- {{< shell >}}/etc/systemd/system/node_exporter.service.d/crunchy-node-exporter-service-rhel{{< /shell >}} +- {{< shell >}}/etc/systemd/system/node_exporter.service.d/crunchy-node-exporter-service-rhel.conf{{< /shell >}} - {{< shell >}}/etc/sysconfig/node_exporter{{< /shell >}} - {{< shell >}}/etc/sysconfig/sql_exporter{{< /shell >}} @@ -153,7 +154,7 @@ You will need to restart your PostgreSQL instance for the change to take effect. The pgmonitor-extension uses its own background worker to refresh metric data. -The following statement only needs to be run on the "global" database, typically the "postgres" database. If you want the pg_stat_statements view to be visible in other databases, this statement must be run there as well. +The following statement only needs to be run on the "global" database, typically the "postgres" database. If you want the pg_stat_statements view to be visible in other databases, this statement must be run there as well. ```sql CREATE EXTENSION pg_stat_statements; @@ -196,7 +197,7 @@ This should generate one or more statements similar to the following: ```sql GRANT CONNECT ON DATABASE "postgres" TO ccp_monitoring; ``` -Run these grant statements to then allow monitoring to connect. +Run these grant statements to then allow monitoring to connect. ##### Bloat setup @@ -254,7 +255,7 @@ sudo systemctl status crunchy-sql-exporter@sql_exporter_cluster2 ``` -### Monitoring multiple databases +### Monitoring multiple databases sql_exporter can connect to as many databases as you need. Simply add another connection configuration to the `job_name` in the sql_exporter configuration file for the other databases you wish to monitor. If making use of pgMonitor's metrics, ensure that the pgmonitor-extension is also installed on those target databases. @@ -280,11 +281,11 @@ jobs: ## Metrics Collected {#metrics-collected} -The metrics collected by our exporters are outlined below. +The metrics collected by our exporters are outlined below. ### PostgreSQL {#postgresql} -PostgreSQL metrics are collected by [sql_exporter](https://github.com/burningalchemist/sql_exporter). pgMonitor uses custom queries for its PG metrics. +PostgreSQL metrics are collected by [sql_exporter](https://github.com/burningalchemist/sql_exporter). pgMonitor uses custom queries for its PG metrics. #### Common Metrics @@ -307,10 +308,10 @@ Metrics contained in the `queries_global.yml` file. These metrics are common to * *ccp_connection_stats_max_connections* - Current value of max_connections for reference - * *ccp_connection_stats_max_idle_in_txn_time* - Runtime of longest idle in transaction (IIT) session. + * *ccp_connection_stats_max_idle_in_txn_time* - Runtime of longest idle in transaction (IIT) session. + + * *ccp_connection_stats_max_query_time* - Runtime of longest general query (inclusive of IIT). - * *ccp_connection_stats_max_query_time* - Runtime of longest general query (inclusive of IIT). - * *ccp_connection_stats_max_blocked_query_time* - Runtime of the longest running query that has been blocked by a heavyweight lock * *ccp_locks_count* - Count of active lock types per database @@ -341,7 +342,7 @@ Metrics contained in the `queries_global.yml` file. These metrics are common to * *ccp_wal_activity_last_5_min_size_bytes* - Current size in bytes of the last 5 minutes of WAL generation. Includes recycled WALs. -The meaning of the following `ccp_transaction_wraparound` metrics, and how to manage when they are triggered, is covered more extensively in this blog post: https://info.crunchydata.com/blog/managing-transaction-id-wraparound-in-postgresql +The meaning of the following `ccp_transaction_wraparound` metrics, and how to manage when they are triggered, is covered more extensively in this blog post: https://info.crunchydata.com/blog/managing-transaction-id-wraparound-in-postgresql * *ccp_transaction_wraparound_percent_towards_emergency_autovac* - Recommended thresholds set to 75%/95% when first evaluating vacuum settings on new systems. Once those have been reviewed and at least one instance-wide vacuum has been run, recommend thresholds of 110%/125%. Reaching 100% is not a cause for immediate concern, but alerting above 100% for extended periods of time means that autovacuum is not able to keep up with current transaction rate and needs further tuning. @@ -442,7 +443,7 @@ Bloat metrics are only available if the `pg_bloat_check` script has been setup t #### pgBouncer Metrics -The following metric prefixes correspond to the SHOW command views found in the [pgBouncer documentation](https://www.pgbouncer.org/usage.html). Each column found in the SHOW view is a separate metric under the respective prefix. Ex: `ccp_pgbouncer_pools_client_active` corresponds to the `SHOW POOLS` view's `client_active` column. +The following metric prefixes correspond to the SHOW command views found in the [pgBouncer documentation](https://www.pgbouncer.org/usage.html). Each column found in the SHOW view is a separate metric under the respective prefix. Ex: `ccp_pgbouncer_pools_client_active` corresponds to the `SHOW POOLS` view's `client_active` column. sql_exporter can connect directly to pgBouncer with some specific configuration options set. See the example `sql_exporter.yml` and the `crunchy_pgbouncer_collector_###.yml` file. @@ -535,7 +536,7 @@ Note that {{< shell >}}/etc/sysconfig/postgres_exporter_pg##{{< /shell >}} & {{< | queries_global.yml | postgres_exporter query file with minimal recommended queries that are common across all PG versions and only need to be run once per database instance. | | queries_global_dbsize.yml | postgres_exporter query file that contains metrics for monitoring database size. This is a separate file to allow the option to use a materialized view for very large databases | | queries_global_matview.yml | postgres_exporter query file that contains alternative metrics that use materialized views of common metrics across all PG versions | -| queries_per_db.yml | postgres_exporter query file with queries that gather per databse stats. WARNING: If your database has many tables this can greatly increase the storage requirements for your prometheus database. If necessary, edit the query to only gather tables you are interested in statistics for. The "PostgreSQL Details" and the "CRUD Details" Dashboards use these statistics. | +| queries_per_db.yml | postgres_exporter query file with queries that gather per database stats. WARNING: If your database has many tables this can greatly increase the storage requirements for your prometheus database. If necessary, edit the query to only gather tables you are interested in statistics for. The "PostgreSQL Details" and the "CRUD Details" Dashboards use these statistics. | | queries_per_db_matview.yml | postgres_exporter query files that contains alternative metrics that use materialized views of per database stats | | queries_general.yml | postgres_exporter query file for queries that are specific to the version of PostgreSQL that is being monitored. | | queries_backrest.yml | postgres_exporter query file for monitoring pgBackRest backup status. By default, new backrest data is only collected every 10 minutes to avoid excessive load when there are large backup lists. See sysconfig file for exporter service to adjust this throttling. | @@ -543,7 +544,7 @@ Note that {{< shell >}}/etc/sysconfig/postgres_exporter_pg##{{< /shell >}} & {{< | queries_pg_stat_statements.yml | postgres_exporter query file for specific pg_stat_statements metrics that are most useful for monitoring and trending. | -By default, there are two postgres_exporter services expected to be running. One connects to the default {{< shell >}}postgres{{< /shell >}} database that most PostgreSQL instances come with and is meant for collecting global metrics that are the same on all databases in the instance (connection/replication statistics, etc). This service uses the sysconfig file {{< shell >}}postgres_exporter_pg##{{< /shell >}}. Connect to this database and run the setup.sql script to install the required database objects for pgMonitor. +By default, there are two postgres_exporter services expected to be running. One connects to the default {{< shell >}}postgres{{< /shell >}} database that most PostgreSQL instances come with and is meant for collecting global metrics that are the same on all databases in the instance (connection/replication statistics, etc). This service uses the sysconfig file {{< shell >}}postgres_exporter_pg##{{< /shell >}}. Connect to this database and run the setup.sql script to install the required database objects for pgMonitor. The second postgres_exporter service is used to collect per-database metrics and uses the sysconfig file {{< shell >}}postgres_exporter_pg##_per_db{{< /shell >}}. By default it is set to also connect to the {{< shell >}}postgres{{< /shell >}} database, but you can add as many additional connection strings to this service for each individual database that you want metrics for. Per-db metrics include things like table/index statistics and bloat. See the section below for monitorig multiple databases for how to do this. @@ -569,7 +570,7 @@ For replica servers, the setup is the same except that the setup.sql file does n With large databases/tables and some other conditions, certain metrics can cause excessive load. For those cases, materialized views and alternative metric queries have been made available. The materialized views are refreshed on their own schedule independent of the Prometheus data scrape, so any load that may be associated with gathering the underlying data is mitigated. A configuration table, seen below, contains options for how often these materialized views should be refreshed. And a single procedure can be called to refresh all materialized views relevant to monitoring. -For every database that will be collecting materialized view metrics, you will have to run the {{< shell >}}setup_metric_views.sql{{< /shell >}} file against that database. This will likely need to be run as a superuser and must be run after running the base setup file mentioned above to create the necessary monitoring user first. +For every database that will be collecting materialized view metrics, you will have to run the {{< shell >}}setup_metric_views.sql{{< /shell >}} file against that database. This will likely need to be run as a superuser and must be run after running the base setup file mentioned above to create the necessary monitoring user first. ``` psql -U postgres -d alphadb -f setup_metric_views.sql psql -U postgres -d betadb -f setup_metric_views.sql @@ -598,6 +599,19 @@ https://github.com/CrunchyData/pgbouncer_fdw Once that is working, you should be able to add the {{< shell >}}queries_pgbouncer.yml{{< /shell >}} file to the {{< yaml >}}QUERY_FILE_LIST{{< /shell >}} for the exporter that is monitoring the database where the FDW was installed. +#### Enable Services + +To most easily allow the use of multiple postgres exporters, running multiple major versions of PostgreSQL, and to avoid maintaining many similar service files, a systemd template service file is used. The name of the sysconfig EnvironmentFile to be used by the service is passed as the value after the "@" and before ".service" in the service name. The default exporter's sysconfig file is named "postgres_exporter_pg##" and tied to the major version of postgres that it was installed for. A similar EnvironmentFile exists for the per-db service. Be sure to replace the ## in the below commands first! + +```bash +sudo systemctl enable crunchy-postgres-exporter@postgres_exporter_pg## +sudo systemctl start crunchy-postgres-exporter@postgres_exporter_pg## +sudo systemctl status crunchy-postgres-exporter@postgres_exporter_pg## + +sudo systemctl enable crunchy-postgres-exporter@postgres_exporter_pg##_per_db +sudo systemctl start crunchy-postgres-exporter@postgres_exporter_pg##_per_db +sudo systemctl status crunchy-postgres-exporter@postgres_exporter_pg##_per_db +``` #### Monitoring multiple databases and/or running multiple postgres exporters (RHEL) Certain metrics are not cluster-wide, so multiple exporters must be run to avoid duplication when monitoring multiple databases in a single PostgreSQL instance. To collect these per-database metrics, an additional exporter service is required and pgMonitor provides this using the following query file: ({{< shell >}}queries_per_db.yml{{< /shell >}}). In Prometheus, you can then define the global and per-db exporter targets for a single job. This will place all the metrics that are collected for a single database instance together. @@ -626,4 +640,3 @@ Lastly, update the Prometheus auto.d target file to include the new exporter in #### General Metrics *pg_up* - Database is up and connectable by metric collector. This metric is only available with postgres_exporter - diff --git a/hugo/content/grafana/_index.md b/hugo/content/grafana/_index.md index 492136c5..9caad4db 100644 --- a/hugo/content/grafana/_index.md +++ b/hugo/content/grafana/_index.md @@ -13,7 +13,7 @@ weight: 3 ### Included Dashboards {#dashboards} -pgMonitor comes with several dashboards ready to be used with automatic provisioning. They provide examples of using the metrics from the postgres_exporter and node_exporter. Since provisioned dashboards cannot be edited directly in the web interface, if any custom changes are desired, it is recommmended to make a copy of them and make your changes there. +pgMonitor comes with several dashboards ready to be used with automatic provisioning. They provide examples of using the metrics from the postgres_exporter and node_exporter. Since provisioned dashboards cannot be edited directly in the web interface, if any custom changes are desired, it is recommended to make a copy of them and make your changes there. | Dashboard Name | Filename | Description | |-----------------------|-----------------------|---------------------------------------------------| @@ -27,7 +27,7 @@ pgMonitor comes with several dashboards ready to be used with automatic provisio | TableSize Details | TableSize_Details.json | Provides size details on a per-table basis for the given database. | ||| |Filesystem Details | Filesystem_Details.json | Provides details on the filesystem metrics (disk usage, IO, etc). | -|Network Details | Network_Details.json | Provides details on network usage (utilization, traffic in/out, netstat, etc). | +|Network Details | Network_Details.json | Provides details on network usage (utilization, traffic in/out, netstat, etc). | |Overview | Overview.json | The top level overview dashboard that provides links to the OS Overview, PostgreSQL Overview, ETCD, and Prometheus Alerts dashboards. | |OS Details | OS_Details.json | Provides details on operating system metrics (cpu, memory, swap, disk usage). Links to Filesystem Details dashboard. | |OS Overview | Overview.json| Provides an overview that shows the up status of each system monitored by pgMonitor. | @@ -84,7 +84,9 @@ Please note that due to the change from postgres_exporter to sql_exporter, and i ## Upgrading {#upgrading} -Please review the ChangeLog for pgMonitor and take note of any changes to metric names and/or dashboards. Note that if you are using the included dashboards that are managed via the provisioning system, they will automatically be updated. If you've made any changes to configuration files and kept their default names, the package will not overwrite them and will instead make a new file with an {{< shell >}}*.rpmnew{{< /shell >}} extension. You can compare your file and the new one and incorporate any changes as needed or desired. +* If you are upgrading to version 5.0 and transitioning to using the new sql_exporter, please see the documentation in [Upgrading to pgMonitor v5.0.0](/changelog/v5_upgrade/) +* See the [CHANGELOG ](/changelog) for full details on both major & minor version upgrades. +* Note that if you are using the included dashboards that are managed via the provisioning system, they will automatically be updated. If you've made any changes to configuration files and kept their default names, the package will not overwrite them and will instead make a new file with an {{< shell >}}*.rpmnew{{< /shell >}} extension. You can compare your file and the new one and incorporate any changes as needed or desired. ## Setup {#setup} @@ -131,11 +133,9 @@ Navigate to the web interface: https://<ip-address>:3000. Log in with admi Grafana provides the ability to automatically provision datasources and dashboards via configuration files instead of having to manually import them either through the web interface or the API. Note that provisioned dashboards can no longer be directly edited and saved via the web interface. See the Grafana documentation for how to edit/save provisioned dashboards: http://docs.grafana.org/administration/provisioning/#making-changes-to-a-provisioned-dashboard. If you'd like to customize these dashboards, we recommend first adding them via provisioning then saving them with a new name. You can then either manage them via the web interface or add them to the provisioning system. -The extras package takes care of putting all these files in place. If you did not use the Crunchy package to install Grafana, see the additional instructions above. Once that is done, the only additional setup that needs to be done is to set the "provisioning" option in the `grafana.ini` to point to the top level directory if it hasn't been done already. +The extras package takes care of putting all these files in place. If you did not use the Crunchy package to install Grafana, see the additional instructions above. Once that is done, the only additional setup that needs to be done is to set the "provisioning" option in the `grafana.ini` to point to the top level directory if it hasn't been done already. ```ini [paths] provisioning = /etc/grafana/provisioning ``` - - diff --git a/hugo/content/prometheus/_index.md b/hugo/content/prometheus/_index.md index ea3ce085..8114b336 100644 --- a/hugo/content/prometheus/_index.md +++ b/hugo/content/prometheus/_index.md @@ -39,7 +39,10 @@ Or you can also download [Prometheus](https://prometheus.io/) and [Alertmanager] ##### Minimum Versions -pgMonitor assumes to be using at least Prometheus 2.9.x. We recommend to always use the latest minor version of Prometheus. +pgMonitor has been tested with the following versions at a minimum. Later versions should generally work. If they do not, please open an issue on our Github. + + * Prometheus 2.49.1 + * Alertmanager 0.26.0 ##### User and Configuration Directory Installation @@ -92,9 +95,9 @@ The following pgMonitor configuration files should be placed according to the fo ### Upgrading {#upgrading} -Please review the ChangeLog for any changes that may be relevant to your environment. - -Of note, items like the alert rules and configuration files often require user edits. The packages will install newer versions of these files, but if the user has changed their contents but kept the same file name, the package will not overwrite them. Instead it will make a file with an {{< shell >}}*.rpmnew{{< /shell >}} extension that contains the newer version of the file. These new files can be reviewed/compared to he user's file to incorporate any desired changes. +* If you are upgrading to version 5.0 and transitioning to using the new sql_exporter, please see the documentation in [Upgrading to pgMonitor v5.0.0](/changelog/v5_upgrade/) +* See the [CHANGELOG ](/changelog) for full details on both major & minor version upgrades. +* Note, items like the alert rules and configuration files often require user edits. The packages will install newer versions of these files, but if the user has changed their contents but kept the same file name, the package will not overwrite them. Instead it will make a file with an {{< shell >}}*.rpmnew{{< /shell >}} extension that contains the newer version of the file. These new files can be reviewed/compared to he user's file to incorporate any desired changes. ## Setup {#setup} @@ -118,10 +121,10 @@ The below files dictate how Prometheus and Alertmanager will behave at runtime f | File | Instructions | |------------------------------------------|--------------| -| /etc/prometheus/crunchy-prometheus.yml | Modify to set scrape interval if different from the default of 30s. Activate alert rules and Alertmanager by uncommenting lines when set as needed. Activate blackbox_exporter monitoring if desired. Service file provided by pgMonitor expects config file to be named "crunchy-prometheus.yml" | -| /etc/prometheus/crunchy-alertmanager.yml | Setup alert target (e.g., SMTP, SMS, etc.), receiver and route information. Service file provided by pgMonitor expects config file to be named "crunchy-alertmanager.yml" | -| /etc/prometheus/alert-ruled.d/crunchy-alert-rules-\*.yml.example | Update rules as needed and remove ".example" suffix. Prometheus config provided by pgmonitor expects ".yml" files to be located in "/etc/prometheus/alert-rules.d/" | -| /etc/prometheus/auto.d/*.yml | You will need at least one file with a final ".yml" extension. Copy the example files to create as many additional targets as needed. Ensure the configuration files you want to use do not end in ".yml.example" but only with ".yml". Note that in order to use the provided Grafana dashboards, the extra "exp_type" label must be applied to all targets and be set appropriately (pg or node). Also, PostgreSQL targets make use of the "cluster_name" variable and should be given a relevant value so all systems (primary & replicas) can be related to each other when needed (Grafana dashboards, etc). See the example target files provided for how to set the labels for postgres or node exporter targets. | +| /etc/prometheus/crunchy-prometheus.yml | Main configuration file for prometheus to set things like scrape intervals and alerting. blackbox_exporter monitoring can also be enabled if desired. Service file provided by pgMonitor expects config file to be named "crunchy-prometheus.yml". For full configuration options please see the [Prometheus upstream documentation](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) | +| /etc/prometheus/crunchy-alertmanager.yml | Setup alert target (e.g., SMTP, SMS, etc.), receiver and route information. Service file provided by pgMonitor expects config file to be named "crunchy-alertmanager.yml". For full configuration options please see the [Alertmanager upstream documentation](https://prometheus.io/docs/alerting/latest/configuration/) | +| /etc/prometheus/alert-ruled.d/crunchy-alert-rules-\*.yml.example | Update rules as needed and remove ".example" suffix. Prometheus config provided by pgmonitor expects ".yml" files to be located in "/etc/prometheus/alert-rules.d/". Additional information on configuring alert rules can be found in the [alert rules upstream documentation](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/). | +| /etc/prometheus/auto.d/*.yml | You will need at least one file with a final ".yml" extension. Copy the example files to create as many additional targets as needed. Ensure the configuration files you want to use do not end in ".yml.example" but only with ".yml". Note that in order to use the provided Grafana dashboards, the extra "exp_type" label must be applied to all targets and be set appropriately (pg, node, etcd, pgbouncer, etc). Also, PostgreSQL targets make use of the "cluster_name" variable and should be given a relevant value so all systems (primary & replicas) can be related to each other when needed (Grafana dashboards, etc). See the example target files provided for how to set the labels for postgres or node exporter targets. | #### Blackbox Exporter diff --git a/hugo/static/images/pgmonitor_logo.svg b/hugo/static/images/pgmonitor_logo.svg index 9cf2d1f1..9d5ae448 100644 --- a/hugo/static/images/pgmonitor_logo.svg +++ b/hugo/static/images/pgmonitor_logo.svg @@ -137,7 +137,7 @@ - @@ -172,11 +172,11 @@ - - - diff --git a/pgbadger/run_pgbadger.py b/pgbadger/run_pgbadger.py index 888b5ca7..faf39092 100644 --- a/pgbadger/run_pgbadger.py +++ b/pgbadger/run_pgbadger.py @@ -4,11 +4,11 @@ from datetime import date, timedelta parser = argparse.ArgumentParser(description="This script runs the pg_badger log analysis tool for all databases in a given database. Runs on yesterdays logs.") -parser.add_argument('-l', '--logdir', required=True, help="Full path to directory where postgresql log files are stored. Required.") +parser.add_argument('-l', '--logdir', required=True, help="Full path to directory where postgresql log files are stored. Required.") parser.add_argument('-c', '--connection', default="host=", help="""Connection string for psycopg. Defaults to "host=" (local socket).""") parser.add_argument('-d', '--dbname', help="Only run for given database. Otherwise defaults to all databases in the cluster") parser.add_argument('-o', '--output', default=os.getcwd(), help="Base directory to create folders for pgbadger output. Each database gets its own subfolder. Default is current location where script is run from.") -parser.add_argument('-e', '--exclude', action="append", help="Exclude a database. Set multiple times to exclude more than one. By default it already excludes postgres, template0 and template1") +parser.add_argument('-e', '--exclude', action="append", help="Exclude a database. Set multiple times to exclude more than one. By default it already excludes postgres, template0 and template1") parser.add_argument('-j', '--jobs', type=int, default=1, help="Use the -j option in pgbadger to set number of jobs to run on parallel on each log file.") parser.add_argument('-J', '--Jobs', type=int, default=1, help="Use the -J option in pgbadger to set number of log file to parse in parallel.") parser.add_argument('--pgbadger', default="pgbadger", help="Location of pgbadger script file. Otherwise assumed in PATH.") @@ -33,7 +33,7 @@ def get_databases(): cur = conn.cursor() sql = "SELECT datname FROM pg_catalog.pg_database WHERE datallowconn = true AND datname NOT IN ('postgres', 'template0', 'template1')" if args.dbname != None: - sql += " AND datname = %s" + sql += " AND datname = %s" sql += " ORDER BY datname" if args.dbname != None: cur.execute(sql, [args.dbname]) @@ -71,7 +71,7 @@ def archive(): max_diff = timedelta(days=args.archive_time) dbdir_list = os.listdir(args.output) - for dbname in dbdir_list: + for dbname in dbdir_list: if dbname == args.archive_folder: continue if not os.path.exists(os.path.join(args.output, args.archive_folder, dbname)): @@ -115,7 +115,7 @@ def archive(): continue # check that folder for given database exists and if it doesn't create it if not os.path.exists(os.path.join(args.output, d[0])): - os.makedirs(os.path.join(args.output, d[0])) + os.makedirs(os.path.join(args.output, d[0])) call_pgbadger = args.perl + " " + args.pgbadger + " " + os.path.join(args.logdir, "postgresql-" + report_date + "*") if args.verbose != True: call_pgbadger += " -q" @@ -127,10 +127,10 @@ def archive(): if args.exclude_query is not None: call_pgbadger += " --exclude-query=\"" + args.exclude_query + "\"" if args.verbose == True: - print call_pgbadger + print call_pgbadger os.system(call_pgbadger) if args.archive: - archive() + archive() """ diff --git a/postgres_exporter/common/pg11/queries_general.yml b/postgres_exporter/common/pg11/queries_general.yml index afe84b20..fc6e3804 100644 --- a/postgres_exporter/common/pg11/queries_general.yml +++ b/postgres_exporter/common/pg11/queries_general.yml @@ -6,7 +6,7 @@ # ### -# There are currently no PG11 only queries that are not already in queries_global.yml +# There are currently no PG11 only queries that are not already in queries_global.yml # This file is kept to allow consistent setting of QUERY_FILE_LIST in sysconfig with future PG versions ### diff --git a/postgres_exporter/common/pg11/queries_pg_stat_statements.yml b/postgres_exporter/common/pg11/queries_pg_stat_statements.yml index bbf8260e..dca7328b 100644 --- a/postgres_exporter/common/pg11/queries_pg_stat_statements.yml +++ b/postgres_exporter/common/pg11/queries_pg_stat_statements.yml @@ -133,4 +133,3 @@ ccp_pg_stat_statements_top_max: # End File: PG11 queries_pg_stat_statements.yml # ### - diff --git a/postgres_exporter/common/pg11/setup.sql b/postgres_exporter/common/pg11/setup.sql index fdf22836..8f593bde 100644 --- a/postgres_exporter/common/pg11/setup.sql +++ b/postgres_exporter/common/pg11/setup.sql @@ -16,7 +16,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; @@ -41,7 +41,7 @@ DECLARE v_gather_timestamp timestamptz; v_throttle interval; v_system_identifier bigint; - + BEGIN -- Get pgBackRest info in JSON format @@ -52,7 +52,7 @@ SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_ IF pg_catalog.pg_is_in_recovery() = 'f' THEN IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pgbackrest_info; SELECT system_identifier into v_system_identifier FROM pg_control_system(); @@ -69,34 +69,34 @@ IF NOT FOUND THEN RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; END IF; -END +END $function$; DROP FUNCTION IF EXISTS monitor.sequence_status(); -CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) LANGUAGE sql SECURITY DEFINER STABLE SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Provide detailed status information of sequences in the current database */ WITH default_value_sequences AS ( -- Get sequences defined as default values with related table - -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out - SELECT s.seqrelid, c.oid + SELECT s.seqrelid, c.oid FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) JOIN pg_catalog.pg_class c on a.attrelid = c.oid JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' ), dep_sequences AS ( - -- Get sequences set as dependencies with related tables (identities) + -- Get sequences set as dependencies with related tables (identities) SELECT s.seqrelid, c.oid - FROM pg_catalog.pg_sequence s + FROM pg_catalog.pg_sequence s JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid JOIN pg_catalog.pg_class c ON d.refobjid = c.oid UNION @@ -125,7 +125,7 @@ FROM ( FROM pg_catalog.pg_sequences s JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid GROUP BY 1,2,3,4,5 -) x +) x ORDER BY ROUND(used/slots*100) DESC $function$; @@ -137,7 +137,7 @@ CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT co SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) */ @@ -146,7 +146,7 @@ FROM ( SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used FROM pg_catalog.pg_sequences s -) x +) x WHERE (ROUND(used/slots*100)::int) > p_percent; $function$; @@ -155,12 +155,12 @@ $function$; * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts * Tables allow recording of existing settings so they can be referred back to to see what changed - * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 0, then NO settings have changed * If either checksum function returns 1, then something has changed since last known valid state * For replicas, logging past settings is not possible to compare what may have changed * For replicas, by default, it is expected that its settings will match the primary * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's - settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. */ @@ -189,9 +189,9 @@ CREATE INDEX ON monitor.pg_hba_checksum (created_at); DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); -CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -221,7 +221,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_settings_hash IS NOT NULL THEN v_settings_hash_old := p_known_settings_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -231,7 +231,7 @@ IF (v_settings_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_settings_checksum ( settings_hash_generated , settings_hash_known_provided @@ -260,7 +260,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -269,9 +269,9 @@ $function$; DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); -CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -307,7 +307,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_hba_hash IS NOT NULL THEN v_hba_hash_old := p_known_hba_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -317,7 +317,7 @@ IF (v_hba_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_hba_checksum ( hba_hash_generated , hba_hash_known_provided @@ -346,7 +346,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -359,7 +359,7 @@ DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_settings_checksum; @@ -374,7 +374,7 @@ DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_hba_checksum; @@ -389,7 +389,7 @@ CREATE VIEW monitor.pg_settings_hash AS WITH settings_ordered_list AS ( SELECT name , COALESCE(setting, '<>') AS setting - FROM pg_catalog.pg_settings + FROM pg_catalog.pg_settings ORDER BY name, setting) SELECT md5(string_agg(name||setting, ',')) AS md5_hash , string_agg(name||setting, ',') AS settings_string diff --git a/postgres_exporter/common/pg12/queries_general.yml b/postgres_exporter/common/pg12/queries_general.yml index eb0430d9..fbebd80f 100644 --- a/postgres_exporter/common/pg12/queries_general.yml +++ b/postgres_exporter/common/pg12/queries_general.yml @@ -10,7 +10,7 @@ ccp_data_checksum_failure: query: "SELECT datname AS dbname , checksum_failures AS count - , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds + , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database;" metrics: - dbname: diff --git a/postgres_exporter/common/pg12/queries_pg_stat_statements.yml b/postgres_exporter/common/pg12/queries_pg_stat_statements.yml index 3b623b4f..224a9d57 100644 --- a/postgres_exporter/common/pg12/queries_pg_stat_statements.yml +++ b/postgres_exporter/common/pg12/queries_pg_stat_statements.yml @@ -133,4 +133,3 @@ ccp_pg_stat_statements_top_max: # End File: PG12 queries_pg_stat_statements.yml # ### - diff --git a/postgres_exporter/common/pg12/queries_pg_stat_statements_reset_info.yml b/postgres_exporter/common/pg12/queries_pg_stat_statements_reset_info.yml index f2fe14e3..ba78f5e4 100644 --- a/postgres_exporter/common/pg12/queries_pg_stat_statements_reset_info.yml +++ b/postgres_exporter/common/pg12/queries_pg_stat_statements_reset_info.yml @@ -17,4 +17,3 @@ ccp_pg_stat_statements_reset: # End File: pg_stat_statements_reset_info.yml # ### - diff --git a/postgres_exporter/common/pg12/setup.sql b/postgres_exporter/common/pg12/setup.sql index 7515d06a..325d0937 100644 --- a/postgres_exporter/common/pg12/setup.sql +++ b/postgres_exporter/common/pg12/setup.sql @@ -16,7 +16,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; @@ -41,7 +41,7 @@ DECLARE v_gather_timestamp timestamptz; v_throttle interval; v_system_identifier bigint; - + BEGIN -- Get pgBackRest info in JSON format @@ -52,7 +52,7 @@ SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_ IF pg_catalog.pg_is_in_recovery() = 'f' THEN IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pgbackrest_info; SELECT system_identifier into v_system_identifier FROM pg_control_system(); @@ -69,34 +69,34 @@ IF NOT FOUND THEN RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; END IF; -END +END $function$; DROP FUNCTION IF EXISTS monitor.sequence_status(); -CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) LANGUAGE sql SECURITY DEFINER STABLE SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Provide detailed status information of sequences in the current database */ WITH default_value_sequences AS ( -- Get sequences defined as default values with related table - -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out - SELECT s.seqrelid, c.oid + SELECT s.seqrelid, c.oid FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) JOIN pg_catalog.pg_class c on a.attrelid = c.oid JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' ), dep_sequences AS ( - -- Get sequences set as dependencies with related tables (identities) + -- Get sequences set as dependencies with related tables (identities) SELECT s.seqrelid, c.oid - FROM pg_catalog.pg_sequence s + FROM pg_catalog.pg_sequence s JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid JOIN pg_catalog.pg_class c ON d.refobjid = c.oid UNION @@ -125,7 +125,7 @@ FROM ( FROM pg_catalog.pg_sequences s JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid GROUP BY 1,2,3,4,5 -) x +) x ORDER BY ROUND(used/slots*100) DESC $function$; @@ -137,7 +137,7 @@ CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT co SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) */ @@ -146,7 +146,7 @@ FROM ( SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used FROM pg_catalog.pg_sequences s -) x +) x WHERE (ROUND(used/slots*100)::int) > p_percent; $function$; @@ -155,12 +155,12 @@ $function$; * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts * Tables allow recording of existing settings so they can be referred back to to see what changed - * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 0, then NO settings have changed * If either checksum function returns 1, then something has changed since last known valid state * For replicas, logging past settings is not possible to compare what may have changed * For replicas, by default, it is expected that its settings will match the primary * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's - settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. */ @@ -189,9 +189,9 @@ CREATE INDEX ON monitor.pg_hba_checksum (created_at); DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); -CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -221,7 +221,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_settings_hash IS NOT NULL THEN v_settings_hash_old := p_known_settings_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -231,7 +231,7 @@ IF (v_settings_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_settings_checksum ( settings_hash_generated , settings_hash_known_provided @@ -260,7 +260,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -269,9 +269,9 @@ $function$; DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); -CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -307,7 +307,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_hba_hash IS NOT NULL THEN v_hba_hash_old := p_known_hba_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -317,7 +317,7 @@ IF (v_hba_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_hba_checksum ( hba_hash_generated , hba_hash_known_provided @@ -346,7 +346,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -359,7 +359,7 @@ DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_settings_checksum; @@ -374,7 +374,7 @@ DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_hba_checksum; @@ -389,7 +389,7 @@ CREATE VIEW monitor.pg_settings_hash AS WITH settings_ordered_list AS ( SELECT name , COALESCE(setting, '<>') AS setting - FROM pg_catalog.pg_settings + FROM pg_catalog.pg_settings ORDER BY name, setting) SELECT md5(string_agg(name||setting, ',')) AS md5_hash , string_agg(name||setting, ',') AS settings_string @@ -418,7 +418,7 @@ CREATE VIEW monitor.pg_hba_hash AS DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; -- Table to store last reset time for pg_stat_statements CREATE TABLE monitor.pg_stat_statements_reset_info( - reset_time timestamptz + reset_time timestamptz ); DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); @@ -433,7 +433,7 @@ DECLARE v_reset_timestamp timestamptz; v_throttle interval; - + BEGIN IF p_throttle_minutes < 0 THEN @@ -445,7 +445,7 @@ BEGIN SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pg_stat_statements_reset_info; PERFORM pg_stat_statements_reset(); INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); @@ -453,10 +453,10 @@ BEGIN RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); -EXCEPTION - WHEN others then +EXCEPTION + WHEN others then RETURN 0; -END +END $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; diff --git a/postgres_exporter/common/pg13/queries_pg_stat_statements_reset_info.yml b/postgres_exporter/common/pg13/queries_pg_stat_statements_reset_info.yml index f2fe14e3..ba78f5e4 100644 --- a/postgres_exporter/common/pg13/queries_pg_stat_statements_reset_info.yml +++ b/postgres_exporter/common/pg13/queries_pg_stat_statements_reset_info.yml @@ -17,4 +17,3 @@ ccp_pg_stat_statements_reset: # End File: pg_stat_statements_reset_info.yml # ### - diff --git a/postgres_exporter/common/pg13/setup.sql b/postgres_exporter/common/pg13/setup.sql index 1890bca8..3f267f88 100644 --- a/postgres_exporter/common/pg13/setup.sql +++ b/postgres_exporter/common/pg13/setup.sql @@ -16,7 +16,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; @@ -41,7 +41,7 @@ DECLARE v_gather_timestamp timestamptz; v_throttle interval; v_system_identifier bigint; - + BEGIN -- Get pgBackRest info in JSON format @@ -52,7 +52,7 @@ SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_ IF pg_catalog.pg_is_in_recovery() = 'f' THEN IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pgbackrest_info; SELECT system_identifier into v_system_identifier FROM pg_control_system(); @@ -69,34 +69,34 @@ IF NOT FOUND THEN RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; END IF; -END +END $function$; DROP FUNCTION IF EXISTS monitor.sequence_status(); -CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) LANGUAGE sql SECURITY DEFINER STABLE SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Provide detailed status information of sequences in the current database */ WITH default_value_sequences AS ( -- Get sequences defined as default values with related table - -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out - SELECT s.seqrelid, c.oid + SELECT s.seqrelid, c.oid FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) JOIN pg_catalog.pg_class c on a.attrelid = c.oid JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' ), dep_sequences AS ( - -- Get sequences set as dependencies with related tables (identities) + -- Get sequences set as dependencies with related tables (identities) SELECT s.seqrelid, c.oid - FROM pg_catalog.pg_sequence s + FROM pg_catalog.pg_sequence s JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid JOIN pg_catalog.pg_class c ON d.refobjid = c.oid UNION @@ -125,7 +125,7 @@ FROM ( FROM pg_catalog.pg_sequences s JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid GROUP BY 1,2,3,4,5 -) x +) x ORDER BY ROUND(used/slots*100) DESC $function$; @@ -137,7 +137,7 @@ CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT co SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) */ @@ -146,7 +146,7 @@ FROM ( SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used FROM pg_catalog.pg_sequences s -) x +) x WHERE (ROUND(used/slots*100)::int) > p_percent; $function$; @@ -155,12 +155,12 @@ $function$; * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts * Tables allow recording of existing settings so they can be referred back to to see what changed - * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 0, then NO settings have changed * If either checksum function returns 1, then something has changed since last known valid state * For replicas, logging past settings is not possible to compare what may have changed * For replicas, by default, it is expected that its settings will match the primary * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's - settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. */ @@ -189,9 +189,9 @@ CREATE INDEX ON monitor.pg_hba_checksum (created_at); DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); -CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -221,7 +221,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_settings_hash IS NOT NULL THEN v_settings_hash_old := p_known_settings_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -231,7 +231,7 @@ IF (v_settings_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_settings_checksum ( settings_hash_generated , settings_hash_known_provided @@ -260,7 +260,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -269,9 +269,9 @@ $function$; DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); -CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -307,7 +307,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_hba_hash IS NOT NULL THEN v_hba_hash_old := p_known_hba_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -317,7 +317,7 @@ IF (v_hba_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_hba_checksum ( hba_hash_generated , hba_hash_known_provided @@ -346,7 +346,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -359,7 +359,7 @@ DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_settings_checksum; @@ -374,7 +374,7 @@ DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_hba_checksum; @@ -389,7 +389,7 @@ CREATE VIEW monitor.pg_settings_hash AS WITH settings_ordered_list AS ( SELECT name , COALESCE(setting, '<>') AS setting - FROM pg_catalog.pg_settings + FROM pg_catalog.pg_settings ORDER BY name, setting) SELECT md5(string_agg(name||setting, ',')) AS md5_hash , string_agg(name||setting, ',') AS settings_string @@ -418,7 +418,7 @@ CREATE VIEW monitor.pg_hba_hash AS DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; -- Table to store last reset time for pg_stat_statements CREATE TABLE monitor.pg_stat_statements_reset_info( - reset_time timestamptz + reset_time timestamptz ); DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); @@ -433,7 +433,7 @@ DECLARE v_reset_timestamp timestamptz; v_throttle interval; - + BEGIN IF p_throttle_minutes < 0 THEN @@ -445,7 +445,7 @@ BEGIN SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pg_stat_statements_reset_info; PERFORM pg_stat_statements_reset(); INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); @@ -453,10 +453,10 @@ BEGIN RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); -EXCEPTION - WHEN others then +EXCEPTION + WHEN others then RETURN 0; -END +END $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; diff --git a/postgres_exporter/common/pg14/queries_pg_stat_statements_reset_info.yml b/postgres_exporter/common/pg14/queries_pg_stat_statements_reset_info.yml index f2fe14e3..ba78f5e4 100644 --- a/postgres_exporter/common/pg14/queries_pg_stat_statements_reset_info.yml +++ b/postgres_exporter/common/pg14/queries_pg_stat_statements_reset_info.yml @@ -17,4 +17,3 @@ ccp_pg_stat_statements_reset: # End File: pg_stat_statements_reset_info.yml # ### - diff --git a/postgres_exporter/common/pg14/setup.sql b/postgres_exporter/common/pg14/setup.sql index 6e05a875..313c0e0d 100644 --- a/postgres_exporter/common/pg14/setup.sql +++ b/postgres_exporter/common/pg14/setup.sql @@ -16,7 +16,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; @@ -41,7 +41,7 @@ DECLARE v_gather_timestamp timestamptz; v_throttle interval; v_system_identifier bigint; - + BEGIN -- Get pgBackRest info in JSON format @@ -52,7 +52,7 @@ SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_ IF pg_catalog.pg_is_in_recovery() = 'f' THEN IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pgbackrest_info; SELECT system_identifier into v_system_identifier FROM pg_control_system(); @@ -69,34 +69,34 @@ IF NOT FOUND THEN RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; END IF; -END +END $function$; DROP FUNCTION IF EXISTS monitor.sequence_status(); -CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) LANGUAGE sql SECURITY DEFINER STABLE SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Provide detailed status information of sequences in the current database */ WITH default_value_sequences AS ( -- Get sequences defined as default values with related table - -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out - SELECT s.seqrelid, c.oid + SELECT s.seqrelid, c.oid FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) JOIN pg_catalog.pg_class c on a.attrelid = c.oid JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' ), dep_sequences AS ( - -- Get sequences set as dependencies with related tables (identities) + -- Get sequences set as dependencies with related tables (identities) SELECT s.seqrelid, c.oid - FROM pg_catalog.pg_sequence s + FROM pg_catalog.pg_sequence s JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid JOIN pg_catalog.pg_class c ON d.refobjid = c.oid UNION @@ -125,7 +125,7 @@ FROM ( FROM pg_catalog.pg_sequences s JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid GROUP BY 1,2,3,4,5 -) x +) x ORDER BY ROUND(used/slots*100) DESC $function$; @@ -137,7 +137,7 @@ CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT co SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) */ @@ -146,7 +146,7 @@ FROM ( SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used FROM pg_catalog.pg_sequences s -) x +) x WHERE (ROUND(used/slots*100)::int) > p_percent; $function$; @@ -155,12 +155,12 @@ $function$; * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts * Tables allow recording of existing settings so they can be referred back to to see what changed - * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 0, then NO settings have changed * If either checksum function returns 1, then something has changed since last known valid state * For replicas, logging past settings is not possible to compare what may have changed * For replicas, by default, it is expected that its settings will match the primary * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's - settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. */ @@ -189,9 +189,9 @@ CREATE INDEX ON monitor.pg_hba_checksum (created_at); DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); -CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -221,7 +221,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_settings_hash IS NOT NULL THEN v_settings_hash_old := p_known_settings_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -231,7 +231,7 @@ IF (v_settings_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_settings_checksum ( settings_hash_generated , settings_hash_known_provided @@ -260,7 +260,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -269,9 +269,9 @@ $function$; DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); -CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -307,7 +307,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_hba_hash IS NOT NULL THEN v_hba_hash_old := p_known_hba_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -317,7 +317,7 @@ IF (v_hba_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_hba_checksum ( hba_hash_generated , hba_hash_known_provided @@ -346,7 +346,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -359,7 +359,7 @@ DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_settings_checksum; @@ -374,7 +374,7 @@ DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_hba_checksum; @@ -389,7 +389,7 @@ CREATE VIEW monitor.pg_settings_hash AS WITH settings_ordered_list AS ( SELECT name , COALESCE(setting, '<>') AS setting - FROM pg_catalog.pg_settings + FROM pg_catalog.pg_settings ORDER BY name, setting) SELECT md5(string_agg(name||setting, ',')) AS md5_hash , string_agg(name||setting, ',') AS settings_string @@ -418,7 +418,7 @@ CREATE VIEW monitor.pg_hba_hash AS DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; -- Table to store last reset time for pg_stat_statements CREATE TABLE monitor.pg_stat_statements_reset_info( - reset_time timestamptz + reset_time timestamptz ); DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); @@ -433,7 +433,7 @@ DECLARE v_reset_timestamp timestamptz; v_throttle interval; - + BEGIN IF p_throttle_minutes < 0 THEN @@ -445,7 +445,7 @@ BEGIN SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pg_stat_statements_reset_info; PERFORM pg_stat_statements_reset(); INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); @@ -453,10 +453,10 @@ BEGIN RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); -EXCEPTION - WHEN others then +EXCEPTION + WHEN others then RETURN 0; -END +END $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; diff --git a/postgres_exporter/common/pg15/queries_pg_stat_statements_reset_info.yml b/postgres_exporter/common/pg15/queries_pg_stat_statements_reset_info.yml index f2fe14e3..ba78f5e4 100644 --- a/postgres_exporter/common/pg15/queries_pg_stat_statements_reset_info.yml +++ b/postgres_exporter/common/pg15/queries_pg_stat_statements_reset_info.yml @@ -17,4 +17,3 @@ ccp_pg_stat_statements_reset: # End File: pg_stat_statements_reset_info.yml # ### - diff --git a/postgres_exporter/common/pg15/setup.sql b/postgres_exporter/common/pg15/setup.sql index faa48d1d..2c98b294 100644 --- a/postgres_exporter/common/pg15/setup.sql +++ b/postgres_exporter/common/pg15/setup.sql @@ -16,7 +16,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; @@ -41,7 +41,7 @@ DECLARE v_gather_timestamp timestamptz; v_throttle interval; v_system_identifier bigint; - + BEGIN -- Get pgBackRest info in JSON format @@ -52,7 +52,7 @@ SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_ IF pg_catalog.pg_is_in_recovery() = 'f' THEN IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pgbackrest_info; SELECT system_identifier into v_system_identifier FROM pg_control_system(); @@ -69,34 +69,34 @@ IF NOT FOUND THEN RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; END IF; -END +END $function$; DROP FUNCTION IF EXISTS monitor.sequence_status(); -CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) LANGUAGE sql SECURITY DEFINER STABLE SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Provide detailed status information of sequences in the current database */ WITH default_value_sequences AS ( -- Get sequences defined as default values with related table - -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out - SELECT s.seqrelid, c.oid + SELECT s.seqrelid, c.oid FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) JOIN pg_catalog.pg_class c on a.attrelid = c.oid JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' ), dep_sequences AS ( - -- Get sequences set as dependencies with related tables (identities) + -- Get sequences set as dependencies with related tables (identities) SELECT s.seqrelid, c.oid - FROM pg_catalog.pg_sequence s + FROM pg_catalog.pg_sequence s JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid JOIN pg_catalog.pg_class c ON d.refobjid = c.oid UNION @@ -125,7 +125,7 @@ FROM ( FROM pg_catalog.pg_sequences s JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid GROUP BY 1,2,3,4,5 -) x +) x ORDER BY ROUND(used/slots*100) DESC $function$; @@ -137,7 +137,7 @@ CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT co SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) */ @@ -146,7 +146,7 @@ FROM ( SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used FROM pg_catalog.pg_sequences s -) x +) x WHERE (ROUND(used/slots*100)::int) > p_percent; $function$; @@ -155,12 +155,12 @@ $function$; * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts * Tables allow recording of existing settings so they can be referred back to to see what changed - * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 0, then NO settings have changed * If either checksum function returns 1, then something has changed since last known valid state * For replicas, logging past settings is not possible to compare what may have changed * For replicas, by default, it is expected that its settings will match the primary * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's - settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. */ @@ -189,9 +189,9 @@ CREATE INDEX ON monitor.pg_hba_checksum (created_at); DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); -CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -221,7 +221,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_settings_hash IS NOT NULL THEN v_settings_hash_old := p_known_settings_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -231,7 +231,7 @@ IF (v_settings_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_settings_checksum ( settings_hash_generated , settings_hash_known_provided @@ -260,7 +260,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -269,9 +269,9 @@ $function$; DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); -CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -307,7 +307,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_hba_hash IS NOT NULL THEN v_hba_hash_old := p_known_hba_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -317,7 +317,7 @@ IF (v_hba_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_hba_checksum ( hba_hash_generated , hba_hash_known_provided @@ -346,7 +346,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -359,7 +359,7 @@ DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_settings_checksum; @@ -374,7 +374,7 @@ DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_hba_checksum; @@ -389,7 +389,7 @@ CREATE VIEW monitor.pg_settings_hash AS WITH settings_ordered_list AS ( SELECT name , COALESCE(setting, '<>') AS setting - FROM pg_catalog.pg_settings + FROM pg_catalog.pg_settings ORDER BY name, setting) SELECT md5(string_agg(name||setting, ',')) AS md5_hash , string_agg(name||setting, ',') AS settings_string @@ -418,7 +418,7 @@ CREATE VIEW monitor.pg_hba_hash AS DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; -- Table to store last reset time for pg_stat_statements CREATE TABLE monitor.pg_stat_statements_reset_info( - reset_time timestamptz + reset_time timestamptz ); DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); @@ -433,7 +433,7 @@ DECLARE v_reset_timestamp timestamptz; v_throttle interval; - + BEGIN IF p_throttle_minutes < 0 THEN @@ -445,7 +445,7 @@ BEGIN SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pg_stat_statements_reset_info; PERFORM pg_stat_statements_reset(); INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); @@ -453,10 +453,10 @@ BEGIN RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); -EXCEPTION - WHEN others then +EXCEPTION + WHEN others then RETURN 0; -END +END $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; diff --git a/postgres_exporter/common/pg16/queries_pg_stat_statements_reset_info.yml b/postgres_exporter/common/pg16/queries_pg_stat_statements_reset_info.yml index f2fe14e3..ba78f5e4 100644 --- a/postgres_exporter/common/pg16/queries_pg_stat_statements_reset_info.yml +++ b/postgres_exporter/common/pg16/queries_pg_stat_statements_reset_info.yml @@ -17,4 +17,3 @@ ccp_pg_stat_statements_reset: # End File: pg_stat_statements_reset_info.yml # ### - diff --git a/postgres_exporter/common/pg16/setup.sql b/postgres_exporter/common/pg16/setup.sql index 233d8434..2307a744 100644 --- a/postgres_exporter/common/pg16/setup.sql +++ b/postgres_exporter/common/pg16/setup.sql @@ -16,7 +16,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; @@ -41,7 +41,7 @@ DECLARE v_gather_timestamp timestamptz; v_throttle interval; v_system_identifier bigint; - + BEGIN -- Get pgBackRest info in JSON format @@ -52,7 +52,7 @@ SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_ IF pg_catalog.pg_is_in_recovery() = 'f' THEN IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pgbackrest_info; SELECT system_identifier into v_system_identifier FROM pg_control_system(); @@ -69,34 +69,34 @@ IF NOT FOUND THEN RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; END IF; -END +END $function$; DROP FUNCTION IF EXISTS monitor.sequence_status(); -CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) LANGUAGE sql SECURITY DEFINER STABLE SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Provide detailed status information of sequences in the current database */ WITH default_value_sequences AS ( -- Get sequences defined as default values with related table - -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out - SELECT s.seqrelid, c.oid + SELECT s.seqrelid, c.oid FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) JOIN pg_catalog.pg_class c on a.attrelid = c.oid JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' ), dep_sequences AS ( - -- Get sequences set as dependencies with related tables (identities) + -- Get sequences set as dependencies with related tables (identities) SELECT s.seqrelid, c.oid - FROM pg_catalog.pg_sequence s + FROM pg_catalog.pg_sequence s JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid JOIN pg_catalog.pg_class c ON d.refobjid = c.oid UNION @@ -125,7 +125,7 @@ FROM ( FROM pg_catalog.pg_sequences s JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid GROUP BY 1,2,3,4,5 -) x +) x ORDER BY ROUND(used/slots*100) DESC $function$; @@ -137,7 +137,7 @@ CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT co SET search_path TO pg_catalog, pg_temp AS $function$ -/* +/* * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) */ @@ -146,7 +146,7 @@ FROM ( SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used FROM pg_catalog.pg_sequences s -) x +) x WHERE (ROUND(used/slots*100)::int) > p_percent; $function$; @@ -155,12 +155,12 @@ $function$; * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts * Tables allow recording of existing settings so they can be referred back to to see what changed - * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 0, then NO settings have changed * If either checksum function returns 1, then something has changed since last known valid state * For replicas, logging past settings is not possible to compare what may have changed * For replicas, by default, it is expected that its settings will match the primary * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's - settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. */ @@ -189,9 +189,9 @@ CREATE INDEX ON monitor.pg_hba_checksum (created_at); DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); -CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -221,7 +221,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_settings_hash IS NOT NULL THEN v_settings_hash_old := p_known_settings_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -231,7 +231,7 @@ IF (v_settings_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_settings_checksum ( settings_hash_generated , settings_hash_known_provided @@ -260,7 +260,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -269,9 +269,9 @@ $function$; DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); -CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) RETURNS smallint - LANGUAGE plpgsql SECURITY DEFINER + LANGUAGE plpgsql SECURITY DEFINER SET search_path TO pg_catalog, pg_temp AS $function$ DECLARE @@ -307,7 +307,7 @@ ORDER BY created_at DESC LIMIT 1; IF p_known_hba_hash IS NOT NULL THEN v_hba_hash_old := p_known_hba_hash; - -- Do not base validity on the stored value if manual hash is given. + -- Do not base validity on the stored value if manual hash is given. v_valid := 0; END IF; @@ -317,7 +317,7 @@ IF (v_hba_hash_old IS NOT NULL) THEN v_valid := 1; - IF v_is_in_recovery = false THEN + IF v_is_in_recovery = false THEN INSERT INTO monitor.pg_hba_checksum ( hba_hash_generated , hba_hash_known_provided @@ -346,7 +346,7 @@ ELSE , v_valid); END IF; -END IF; +END IF; RETURN v_valid; @@ -359,7 +359,7 @@ DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_settings_checksum; @@ -374,7 +374,7 @@ DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. */ CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint - LANGUAGE sql + LANGUAGE sql AS $function$ TRUNCATE monitor.pg_hba_checksum; @@ -389,7 +389,7 @@ CREATE VIEW monitor.pg_settings_hash AS WITH settings_ordered_list AS ( SELECT name , COALESCE(setting, '<>') AS setting - FROM pg_catalog.pg_settings + FROM pg_catalog.pg_settings ORDER BY name, setting) SELECT md5(string_agg(name||setting, ',')) AS md5_hash , string_agg(name||setting, ',') AS settings_string @@ -418,7 +418,7 @@ CREATE VIEW monitor.pg_hba_hash AS DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; -- Table to store last reset time for pg_stat_statements CREATE TABLE monitor.pg_stat_statements_reset_info( - reset_time timestamptz + reset_time timestamptz ); DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); @@ -433,7 +433,7 @@ DECLARE v_reset_timestamp timestamptz; v_throttle interval; - + BEGIN IF p_throttle_minutes < 0 THEN @@ -445,7 +445,7 @@ BEGIN SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN - -- Ensure table is empty + -- Ensure table is empty DELETE FROM monitor.pg_stat_statements_reset_info; PERFORM pg_stat_statements_reset(); INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); @@ -453,10 +453,10 @@ BEGIN RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); -EXCEPTION - WHEN others then +EXCEPTION + WHEN others then RETURN 0; -END +END $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; diff --git a/postgres_exporter/common/queries_bloat.yml b/postgres_exporter/common/queries_bloat.yml index 270c8373..da53a1d8 100644 --- a/postgres_exporter/common/queries_bloat.yml +++ b/postgres_exporter/common/queries_bloat.yml @@ -6,17 +6,17 @@ # ### -ccp_bloat_check: +ccp_bloat_check: query: "SELECT current_database() AS dbname , schemaname , objectname , size_bytes , (dead_tuple_size_bytes + (free_space_bytes - (relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::bigint ))::bigint AS total_wasted_space_bytes - FROM bloat_stats" - metrics: - - dbname: - usage: "LABEL" - description: "Database name" + FROM bloat_stats" + metrics: + - dbname: + usage: "LABEL" + description: "Database name" - schemaname: usage: "LABEL" description: "Schema name" @@ -26,9 +26,9 @@ ccp_bloat_check: - size_bytes: usage: "GAUGE" description: "Size of object in bytes" - - total_wasted_space_bytes: - usage: "GAUGE" - description: "Total wasted space in bytes of given object" + - total_wasted_space_bytes: + usage: "GAUGE" + description: "Total wasted space in bytes of given object" ### # diff --git a/postgres_exporter/common/queries_global.yml b/postgres_exporter/common/queries_global.yml index fb00ac87..2e1ca555 100644 --- a/postgres_exporter/common/queries_global.yml +++ b/postgres_exporter/common/queries_global.yml @@ -9,7 +9,7 @@ ccp_postgresql_version: query: "SELECT current_setting('server_version_num')::int AS current" metrics: - - current: + - current: usage: "GAUGE" description: "The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######)." @@ -119,10 +119,10 @@ ccp_stat_database: - dbname: usage: "LABEL" description: "Name of database" - - xact_commit: + - xact_commit: usage: "GAUGE" description: "Number of transactions in this database that have been committed" - - xact_rollback: + - xact_rollback: usage: "GAUGE" description: "Number of transactions in this database that have been rolled back" - blks_read: @@ -175,10 +175,10 @@ ccp_transaction_wraparound: ccp_archive_command_status: - query: "SELECT CASE + query: "SELECT CASE WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 - ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) + ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) END AS seconds_since_last_fail , EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)) AS seconds_since_last_archive , archived_count @@ -216,7 +216,7 @@ ccp_postmaster_uptime: ccp_pg_settings_checksum: - query: "SELECT monitor.pg_settings_checksum() AS status" + query: "SELECT monitor.pg_settings_checksum() AS status" metrics: - status: usage: "GAUGE" @@ -246,7 +246,7 @@ ccp_replication_lag: metrics: - replay_time: usage: "GAUGE" - description: "Length of time since the last transaction was replayed on replica. Returns zero if last WAL recieved equals last WAL replayed. Avoids false positives when primary stops writing. Monitors for replicas that cannot keep up with primary WAL generation." + description: "Length of time since the last transaction was replayed on replica. Returns zero if last WAL received equals last WAL replayed. Avoids false positives when primary stops writing. Monitors for replicas that cannot keep up with primary WAL generation." - received_time: usage: "GAUGE" description: "Length of time since the last WAL file was received and replayed on replica. Always increases, possibly causing false positives if the primary stops writing. Monitors for replicas that stop receiving WAL all together." @@ -297,7 +297,7 @@ ccp_replication_lag_size: query: "SELECT client_addr as replica , client_hostname as replica_hostname , client_port as replica_port - , pg_wal_lsn_diff(sent_lsn, replay_lsn) as bytes + , pg_wal_lsn_diff(sent_lsn, replay_lsn) as bytes FROM pg_catalog.pg_stat_replication" metrics: - replica: @@ -320,8 +320,8 @@ ccp_replication_slots: - slot_name: usage: "LABEL" description: "Name of replication slot" - - active: - usage: "GAUGE" + - active: + usage: "GAUGE" description: "Active state of slot. 1 = true. 0 = false." - retained_bytes: usage: "GAUGE" @@ -342,7 +342,7 @@ ccp_wal_activity: ccp_pg_hba_checksum: - query: "SELECT monitor.pg_hba_checksum() AS status" + query: "SELECT monitor.pg_hba_checksum() AS status" metrics: - status: usage: "GAUGE" diff --git a/postgres_exporter/common/queries_nodemx.yml b/postgres_exporter/common/queries_nodemx.yml index 6bc52057..bdcc62d8 100644 --- a/postgres_exporter/common/queries_nodemx.yml +++ b/postgres_exporter/common/queries_nodemx.yml @@ -53,7 +53,7 @@ SELECT CASE WHEN monitor.cgroup_mode() = 'legacy' THEN (SELECT val FROM d WHERE key='mapped_file') - ELSE 0 + ELSE 0 END as mapped_file, CASE WHEN monitor.cgroup_mode() = 'legacy' @@ -71,7 +71,7 @@ SELECT END as usage_in_bytes, CASE WHEN monitor.cgroup_mode() = 'legacy' - THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes') + THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes') ELSE 0 END as kmem_usage_in_byte" metrics: @@ -133,7 +133,7 @@ ccp_nodemx_cpucfs: monitor.cgroup_scalar_bigint('cpu.cfs_period_us') ELSE (monitor.cgroup_array_bigint('cpu.max'))[2] - END AS period_us, + END AS period_us, CASE WHEN monitor.cgroup_mode() = 'legacy' THEN GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0) @@ -224,7 +224,7 @@ ccp_nodemx_disk_activity: description: "Total sectors read" - sectors_written: usage: "GAUGE" - description: "Total sectors writen" + description: "Total sectors written" ### # diff --git a/postgres_exporter/common/queries_pgbouncer.yml b/postgres_exporter/common/queries_pgbouncer.yml index ab3abf1b..f22787c4 100644 --- a/postgres_exporter/common/queries_pgbouncer.yml +++ b/postgres_exporter/common/queries_pgbouncer.yml @@ -22,7 +22,7 @@ ccp_pgbouncer_pools: description: "FDW server name for the target PgBouncer host" - conn_pool: usage: "LABEL" - description: "Conection Pool" + description: "Connection Pool" - client_active: usage: "GAUGE" description: "Client connections that are linked to server connection and can process queries." @@ -79,7 +79,7 @@ ccp_pgbouncer_clients: description: "FDW server name for the target PgBouncer host" - conn_pool: usage: "LABEL" - description: "Conection Pool" + description: "Connection Pool" - client_state: usage: "LABEL" description: "Client connection statuses per database" @@ -100,7 +100,7 @@ ccp_pgbouncer_servers: description: "FDW server name for the target PgBouncer host" - conn_pool: usage: "LABEL" - description: "Conection Pool" + description: "Connection Pool" - server_state: usage: "LABEL" description: "Server connection statuses per database" diff --git a/postgres_exporter/common/setup_metric_views.sql b/postgres_exporter/common/setup_metric_views.sql index ee0ac5f0..9f0c6004 100644 --- a/postgres_exporter/common/setup_metric_views.sql +++ b/postgres_exporter/common/setup_metric_views.sql @@ -106,7 +106,7 @@ AS SELECT current_database() as dbname , vacuum_count , autovacuum_count , analyze_count - , autoanalyze_count + , autoanalyze_count FROM pg_catalog.pg_stat_user_tables; CREATE UNIQUE INDEX ccp_user_tables_db_schema_relname_idx ON monitor.ccp_stat_user_tables (dbname, schemaname, relname); ALTER MATERIALIZED VIEW monitor.ccp_stat_user_tables OWNER TO ccp_monitoring; @@ -117,10 +117,10 @@ CREATE MATERIALIZED VIEW monitor.ccp_table_size AS SELECT current_database() as dbname , n.nspname as schemaname , c.relname - , pg_total_relation_size(c.oid) as size_bytes - FROM pg_catalog.pg_class c - JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid - WHERE NOT pg_is_other_temp_schema(n.oid) + , pg_total_relation_size(c.oid) as size_bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) AND relkind IN ('r', 'm', 'f'); CREATE UNIQUE INDEX ccp_table_size_idx ON monitor.ccp_table_size (dbname, schemaname, relname); ALTER MATERIALIZED VIEW monitor.ccp_table_size OWNER TO ccp_monitoring; @@ -129,8 +129,8 @@ ALTER MATERIALIZED VIEW monitor.ccp_table_size OWNER TO ccp_monitoring; DROP MATERIALIZED VIEW IF EXISTS monitor.ccp_database_size; CREATE MATERIALIZED VIEW monitor.ccp_database_size AS SELECT datname as dbname - , pg_database_size(datname) as bytes - FROM pg_catalog.pg_database + , pg_database_size(datname) as bytes + FROM pg_catalog.pg_database WHERE datistemplate = false; CREATE UNIQUE INDEX ccp_database_size_idx ON monitor.ccp_database_size (dbname); ALTER MATERIALIZED VIEW monitor.ccp_database_size OWNER TO ccp_monitoring; @@ -141,7 +141,7 @@ GRANT ALL ON ALL TABLES IN SCHEMA monitor TO ccp_monitoring; -- Don't alter any existing data that is already there for any given view INSERT INTO monitor.metric_views ( - view_name + view_name , run_interval , scope ) VALUES ( @@ -151,7 +151,7 @@ VALUES ( ON CONFLICT DO NOTHING; INSERT INTO monitor.metric_views ( - view_name + view_name , run_interval , scope ) VALUES ( @@ -161,7 +161,7 @@ VALUES ( ON CONFLICT DO NOTHING; INSERT INTO monitor.metric_views ( - view_name + view_name , run_interval , scope ) VALUES ( diff --git a/postgres_exporter/linux/crunchy-postgres-exporter@.service b/postgres_exporter/linux/crunchy-postgres-exporter@.service index f51fcc13..33f98f8e 100644 --- a/postgres_exporter/linux/crunchy-postgres-exporter@.service +++ b/postgres_exporter/linux/crunchy-postgres-exporter@.service @@ -3,7 +3,7 @@ # Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. # ### -# Template systemd service file to allow multiple postgres exporters to run with +# Template systemd service file to allow multiple postgres exporters to run with # a simple variable in service name to distinguish the environment file # Ex: systemctl enable postgres_exporter@postgres_exporter_mydb.service diff --git a/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11 b/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11 index ec911dcd..eba9e6ae 100644 --- a/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11 +++ b/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11 @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. PGBACKREST_INFO_THROTTLE_MINUTES=10 PG_STAT_STATEMENTS_LIMIT=20 @@ -21,4 +21,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni QUERY_FILE_LIST="/etc/postgres_exporter/11/queries_global.yml /etc/postgres_exporter/11/queries_general.yml /etc/postgres_exporter/11/queries_global_dbsize.yml" # Recommend running separate exporter service to collect per-db metrics, even if there is only one database in the instance. Allows easier expansion to support multiple databases at a later time. See postgres_exporter_pg##_per_db file. - diff --git a/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11_per_db b/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11_per_db index 40b1839b..9af7fc57 100644 --- a/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11_per_db +++ b/postgres_exporter/linux/pg11/sysconfig.postgres_exporter_pg11_per_db @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. # Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/11/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" diff --git a/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12 b/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12 index e9cc08f3..07d68bb7 100644 --- a/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12 +++ b/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12 @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. PGBACKREST_INFO_THROTTLE_MINUTES=10 PG_STAT_STATEMENTS_LIMIT=20 @@ -21,4 +21,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni QUERY_FILE_LIST="/etc/postgres_exporter/12/queries_global.yml /etc/postgres_exporter/12/queries_general.yml /etc/postgres_exporter/12/queries_global_dbsize.yml" # Recommend running separate exporter service to collect per-db metrics, even if there is only one database in the instance. Allows easier expansion to support multiple databases at a later time. See postgres_exporter_pg##_per_db file. - diff --git a/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12_per_db b/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12_per_db index bfda167f..619193db 100644 --- a/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12_per_db +++ b/postgres_exporter/linux/pg12/sysconfig.postgres_exporter_pg12_per_db @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. # Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/12/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" diff --git a/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13 b/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13 index e73ae132..fe1e0675 100644 --- a/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13 +++ b/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13 @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. PGBACKREST_INFO_THROTTLE_MINUTES=10 PG_STAT_STATEMENTS_LIMIT=20 @@ -21,4 +21,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni QUERY_FILE_LIST="/etc/postgres_exporter/13/queries_global.yml /etc/postgres_exporter/13/queries_general.yml /etc/postgres_exporter/13/queries_global_dbsize.yml" # Recommend running separate exporter service to collect per-db metrics, even if there is only one database in the instance. Allows easier expansion to support multiple databases at a later time. See postgres_exporter_pg##_per_db file. - diff --git a/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13_per_db b/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13_per_db index 1069ea42..e8dcccfb 100644 --- a/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13_per_db +++ b/postgres_exporter/linux/pg13/sysconfig.postgres_exporter_pg13_per_db @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. # Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/13/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" diff --git a/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14 b/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14 index 71e1f37b..fd65aa7b 100644 --- a/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14 +++ b/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14 @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. PGBACKREST_INFO_THROTTLE_MINUTES=10 PG_STAT_STATEMENTS_LIMIT=20 diff --git a/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14_per_db b/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14_per_db index 6a4d8e46..88a71f0f 100644 --- a/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14_per_db +++ b/postgres_exporter/linux/pg14/sysconfig.postgres_exporter_pg14_per_db @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. # Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/14/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" diff --git a/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15 b/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15 index 95dc3368..f2264829 100644 --- a/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15 +++ b/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15 @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. PGBACKREST_INFO_THROTTLE_MINUTES=10 PG_STAT_STATEMENTS_LIMIT=20 @@ -21,4 +21,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni QUERY_FILE_LIST="/etc/postgres_exporter/15/queries_global.yml /etc/postgres_exporter/15/queries_general.yml /etc/postgres_exporter/15/queries_global_dbsize.yml" # Recommend running separate exporter service to collect per-db metrics, even if there is only one database in the instance. Allows easier expansion to support multiple databases at a later time. See postgres_exporter_pg##_per_db file. - diff --git a/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15_per_db b/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15_per_db index 745e61ad..a1deee43 100644 --- a/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15_per_db +++ b/postgres_exporter/linux/pg15/sysconfig.postgres_exporter_pg15_per_db @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. # Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/15/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" @@ -19,4 +19,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni # Ex: For per db metric. DO NOT add queries that return the same labels/values on all databases otherwise the exporter will throw errors. The queries that pgmonitor provides for per-db metrics all include a "dbname" label to distinguish them. QUERY_FILE_LIST="/etc/postgres_exporter/15/queries_per_db.yml" - diff --git a/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16 b/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16 index d1152a1a..faa08691 100644 --- a/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16 +++ b/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16 @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. PGBACKREST_INFO_THROTTLE_MINUTES=10 PG_STAT_STATEMENTS_LIMIT=20 @@ -21,4 +21,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni QUERY_FILE_LIST="/etc/postgres_exporter/16/queries_global.yml /etc/postgres_exporter/16/queries_general.yml /etc/postgres_exporter/16/queries_global_dbsize.yml" # Recommend running separate exporter service to collect per-db metrics, even if there is only one database in the instance. Allows easier expansion to support multiple databases at a later time. See postgres_exporter_pg##_per_db file. - diff --git a/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16_per_db b/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16_per_db index 43bab5b2..87e9f468 100644 --- a/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16_per_db +++ b/postgres_exporter/linux/pg16/sysconfig.postgres_exporter_pg16_per_db @@ -9,7 +9,7 @@ # --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. # --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. # DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. -# QUERY_FILE_LIST: space delimitted yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. # Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/16/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" @@ -19,4 +19,3 @@ DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_moni # Ex: For per db metric. DO NOT add queries that return the same labels/values on all databases otherwise the exporter will throw errors. The queries that pgmonitor provides for per-db metrics all include a "dbname" label to distinguish them. QUERY_FILE_LIST="/etc/postgres_exporter/16/queries_per_db.yml" - diff --git a/postgres_exporter/linux/pgbackrest-info.sh b/postgres_exporter/linux/pgbackrest-info.sh index 2538d718..2538583e 100755 --- a/postgres_exporter/linux/pgbackrest-info.sh +++ b/postgres_exporter/linux/pgbackrest-info.sh @@ -43,4 +43,3 @@ elif [ -z "$BACKREST_CONFIGS" ] && [ ! -z "$BACKREST_STANZAS" ]; then fi done fi - diff --git a/postgres_exporter/linux/pgmonitor.conf b/postgres_exporter/linux/pgmonitor.conf index 9506cd58..f8fcc07d 100644 --- a/postgres_exporter/linux/pgmonitor.conf +++ b/postgres_exporter/linux/pgmonitor.conf @@ -1,26 +1,25 @@ -### +### # -# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. # ### -# +# # If you only have a single pgbackrest repository on one system, the following settings (BACKREST_CONFIGS, BACKREST_STANZAS, BACKREST_AUTO_CONFIG_STANZAS) are generally not needed. These are mostly used in container environments where multiple repositories are running on a single system. If you're unsure if you need to set these, it is recommended to leave them unset. # # BACKREST_CONFIGS sets config files to process # ex: BACKREST_CONFIGS="/etc/pg10backrest.conf /etc/pg11backrest.conf" # -# BACKREST_STANZAS sets stanza names to pocess +# BACKREST_STANZAS sets stanza names to process # ex: BACKREST_STANZAS="mypg10 mypg11" # # BACKREST_AUTO_CONFIG_STANZAS will auto detect stanza names # 0 = Disabled # 1 = Enabled # -# Note: When BACKREST_AUTO_CONFIG_STANZAS is set it supercedes BACKREST_CONFIGS and BACKREST_STANZAS +# Note: When BACKREST_AUTO_CONFIG_STANZAS is set it supersedes BACKREST_CONFIGS and BACKREST_STANZAS # BACKREST_CONFIGS="" BACKREST_STANZAS="" BACKREST_AUTOCONFIG_STANZAS=0 - diff --git a/postgres_exporter/linux/queries_backrest.yml b/postgres_exporter/linux/queries_backrest.yml index a2f69d6c..640e0feb 100644 --- a/postgres_exporter/linux/queries_backrest.yml +++ b/postgres_exporter/linux/queries_backrest.yml @@ -90,14 +90,14 @@ ccp_backrest_oldest_full_backup: , stanza , backup_data->'database'->>'repo-key' AS repo , min((backup_data->'timestamp'->>'stop')::bigint) time_seconds - FROM per_stanza + FROM per_stanza WHERE backup_data->>'type' IN ('full') GROUP BY config_file, stanza, backup_data->'database'->>'repo-key'" metrics: - config_file: usage: "LABEL" description: "Configuration file for this backup" - - stanza: + - stanza: usage: "LABEL" description: "pgBackRest stanza name" - repo: @@ -114,7 +114,7 @@ ccp_backrest_last_full_backup: , jsonb_array_elements(data) AS stanza_data FROM monitor.pgbackrest_info(#PGBACKREST_INFO_THROTTLE_MINUTES#) ) - , per_stanza AS ( + , per_stanza AS ( SELECT config_file , stanza_data->>'name' AS stanza , jsonb_array_elements(stanza_data->'backup') AS backup_data @@ -131,7 +131,7 @@ ccp_backrest_last_full_backup: - config_file: usage: "LABEL" description: "Configuration file for this backup" - - stanza: + - stanza: usage: "LABEL" description: "pgBackRest stanza name" - repo: @@ -148,7 +148,7 @@ ccp_backrest_last_diff_backup: , jsonb_array_elements(data) AS stanza_data FROM monitor.pgbackrest_info(#PGBACKREST_INFO_THROTTLE_MINUTES#) ) - , per_stanza AS ( + , per_stanza AS ( SELECT config_file , stanza_data->>'name' AS stanza , jsonb_array_elements(stanza_data->'backup') AS backup_data @@ -165,7 +165,7 @@ ccp_backrest_last_diff_backup: - config_file: usage: "LABEL" description: "Configuration file for this backup" - - stanza: + - stanza: usage: "LABEL" description: "pgBackRest stanza name" - repo: @@ -182,7 +182,7 @@ ccp_backrest_last_incr_backup: , jsonb_array_elements(data) AS stanza_data FROM monitor.pgbackrest_info(#PGBACKREST_INFO_THROTTLE_MINUTES#) ) - , per_stanza AS ( + , per_stanza AS ( SELECT config_file , stanza_data->>'name' AS stanza , jsonb_array_elements(stanza_data->'backup') AS backup_data @@ -199,7 +199,7 @@ ccp_backrest_last_incr_backup: - config_file: usage: "LABEL" description: "Configuration file for this backup" - - stanza: + - stanza: usage: "LABEL" description: "pgBackRest stanza name" - repo: diff --git a/prometheus/common/alert-rules.d/crunchy-alert-rules-etcd.yml.example b/prometheus/common/alert-rules.d/crunchy-alert-rules-etcd.yml.example index 67358163..1655f589 100644 --- a/prometheus/common/alert-rules.d/crunchy-alert-rules-etcd.yml.example +++ b/prometheus/common/alert-rules.d/crunchy-alert-rules-etcd.yml.example @@ -89,4 +89,3 @@ groups: # severity_num: 300 # annotations: # description: 'Leader metric is absent from target {{ $labels.job }}. Check that etcd is running on target host.' - diff --git a/prometheus/common/alert-rules.d/crunchy-alert-rules-patroni.yml.example b/prometheus/common/alert-rules.d/crunchy-alert-rules-patroni.yml.example index c3d7f7b1..77bb0768 100644 --- a/prometheus/common/alert-rules.d/crunchy-alert-rules-patroni.yml.example +++ b/prometheus/common/alert-rules.d/crunchy-alert-rules-patroni.yml.example @@ -14,7 +14,7 @@ groups: expr: patroni_postgres_running != 1 for: 60s labels: - service: patroni + service: patroni severity: critical severity_num: 300 annotations: @@ -24,7 +24,7 @@ groups: expr: patroni_pending_restart != 0 for: 1800s labels: - service: patroni + service: patroni severity: warning severity_num: 300 annotations: @@ -34,7 +34,7 @@ groups: expr: patroni_is_paused != 0 for: 900s labels: - service: patroni + service: patroni severity: critical severity_num: 300 annotations: @@ -45,8 +45,8 @@ groups: expr: (time() - patroni_dcs_last_seen) > 300 for: 60s labels: - service: patroni - severity: warning + service: patroni + severity: warning severity_num: 200 annotations: summary: 'Patroni running on {{ $labels.job }} has reported that it has not communicated with its DCS for at least 5 minutes' @@ -56,8 +56,8 @@ groups: expr: (time() - patroni_dcs_last_seen) > 1800 for: 60s labels: - service: patroni - severity: critical + service: patroni + severity: critical severity_num: 300 annotations: summary: 'Patroni running on {{ $labels.job }} has reported that it has not communicated with its DCS for at least 30 minutes' @@ -76,10 +76,10 @@ groups: # Detects one or more timeline switches in a given time - alert: PatroniTimelineSwitch - expr: patroni_postgres_timeline != patroni_postgres_timeline offset 5m + expr: patroni_postgres_timeline != patroni_postgres_timeline offset 5m for: 60s labels: - service: patroni + service: patroni severity: warning severity_num: 200 annotations: @@ -96,4 +96,3 @@ groups: severity_num: 300 annotations: summary: 'A leader node (neither primary nor standby) cannot be found for cluster {{ $labels.cluster }}.' - diff --git a/prometheus/common/alert-rules.d/crunchy-alert-rules-pg.yml.example b/prometheus/common/alert-rules.d/crunchy-alert-rules-pg.yml.example index fa250d67..5e098c79 100644 --- a/prometheus/common/alert-rules.d/crunchy-alert-rules-pg.yml.example +++ b/prometheus/common/alert-rules.d/crunchy-alert-rules-pg.yml.example @@ -26,7 +26,7 @@ groups: for: 60s labels: service: postgresql - severity: warning + severity: warning severity_num: 200 annotations: summary: 'Materialized view refresh on {{ $labels.job }} (instance: {{ $labels.instance }}) is taking longer than expected to run. Longest time recorded: ( {{ $value }} )' @@ -36,7 +36,7 @@ groups: for: 60s labels: service: postgresql - severity: critical + severity: critical severity_num: 300 annotations: summary: 'Materialized view refresh on {{ $labels.job }} (instance: {{ $labels.instance }}) is taking longer than expected to run. Longest time recorded: ( {{ $value }} )' @@ -46,7 +46,7 @@ groups: for: 60s labels: service: postgresql - severity: warning + severity: warning severity_num: 200 annotations: summary: 'Table refresh on {{ $labels.job }} (instance: {{ $labels.instance }}) is taking longer than expected to run. Longest time recorded: ( {{ $value }} )' @@ -56,7 +56,7 @@ groups: for: 60s labels: service: postgresql - severity: critical + severity: critical severity_num: 300 annotations: summary: 'Table refresh on {{ $labels.job }} (instance: {{ $labels.instance }}) is taking longer than expected to run. Longest time recorded: ( {{ $value }} )' @@ -66,7 +66,7 @@ groups: for: 60s labels: service: postgresql - severity: critical + severity: critical severity_num: 300 annotations: summary: 'Materialized view refresh on {{ $labels.job }} (instance: {{ $labels.instance }}) has not run within the expected interval of time. Number of objects not refreshed: ( {{ $value }} )' @@ -76,7 +76,7 @@ groups: for: 60s labels: service: postgresql - severity: critical + severity: critical severity_num: 300 annotations: summary: 'Table refresh on {{ $labels.job }} (instance: {{ $labels.instance }}) has not run within the expected interval of time. Number of objects not refreshed: ( {{ $value }} )' @@ -118,7 +118,7 @@ groups: ## Monitor for a failover event by checking if the recovery status value has changed within the specified time period ## IMPORTANT NOTE: This alert will *automatically resolve* after the given offset time period has passed! If you desire to have an alert that must be manually resolved, see the commented out alert beneath this one - alert: PGRecoveryStatusSwitch - expr: ccp_is_in_recovery_status != ccp_is_in_recovery_status offset 5m + expr: ccp_is_in_recovery_status != ccp_is_in_recovery_status offset 5m for: 60s labels: service: postgresql diff --git a/prometheus/linux/crunchy-prometheus-service-rhel.conf b/prometheus/linux/crunchy-prometheus-service-rhel.conf index 694b64b1..ab802966 100644 --- a/prometheus/linux/crunchy-prometheus-service-rhel.conf +++ b/prometheus/linux/crunchy-prometheus-service-rhel.conf @@ -17,4 +17,3 @@ ExecStart= ExecStart=/usr/bin/prometheus $OPT ExecReload=/usr/bin/kill -HUP $MAINPID Restart=always - diff --git a/prometheus/linux/crunchy-prometheus.yml b/prometheus/linux/crunchy-prometheus.yml index e80eea76..5df82792 100644 --- a/prometheus/linux/crunchy-prometheus.yml +++ b/prometheus/linux/crunchy-prometheus.yml @@ -30,20 +30,20 @@ scrape_configs: # regex: '(pg_locks_count.*|pg_settings.*|pg_stat_activity.*|pg_stat_bgwriter.*|pg_stat_database.*)' # action: drop ## Version 0.5+ of postgres_exporter automatically adds a "server" label to all custom query metrics. This breaks upgrades from older versions - ## of pgmonitor and also breaks our PG Overview dashboard and several other single metric panels. - ## pgmonitor only runs two exporters max by default and our per-db exporter distinguishes each metric by a dbname label, + ## of pgmonitor and also breaks our PG Overview dashboard and several other single metric panels. + ## pgmonitor only runs two exporters max by default and our per-db exporter distinguishes each metric by a dbname label, ## so we do not need the "server" label. This relabel will only target metrics that start with "ccp_", so shouldn't break users' custom metrics. - #- source_labels: [__name__, server] - # regex: "ccp_.*;.+" - # action: replace - # target_label: server + #- source_labels: [__name__, server] + # regex: "ccp_.*;.+" + # action: replace + # target_label: server # replacement: "" ### END postgres_exporter section - ## Monitoring for tcp services that don't have an associated exporter can be accomplished using the tcp probe + ## Monitoring for tcp services that don't have an associated exporter can be accomplished using the tcp probe ## of the blackbox_exporter provided by the Prometheus developers. ## Note this only provides a simple up/down that the service is listening on the given IP/port. - ## Below is an example to monitor the services indicated by the comment. + ## Below is an example to monitor the services indicated by the comment. ## The "targets" list is all that should need to be edited to customize to your setup assuming blackbox_exporter runs ## on same system as Prometheus. # - job_name: 'blackbox_tcp_services' @@ -84,7 +84,7 @@ scrape_configs: # - 192.168.122.31:8009 # etcd -#### Uncomment below if using alertmanager #### +#### Uncomment below if using alertmanager #### # rule_files: - /etc/prometheus/alert-rules.d/*.yml @@ -95,4 +95,3 @@ rule_files: # static_configs: # - targets: # - "127.0.0.1:9093" - diff --git a/prometheus/linux/sysconfig.prometheus b/prometheus/linux/sysconfig.prometheus index a37224b3..11eddb65 100644 --- a/prometheus/linux/sysconfig.prometheus +++ b/prometheus/linux/sysconfig.prometheus @@ -10,6 +10,6 @@ # --storage.tsdb.path: location that prometheus will store its time series database files # --storage.tsdb.retention: how long prometheus will keep data before expiring it. This is how much data will be available to trend in grafana. # --log.level: how verbose to make system logging. Setting to "debug" can help with diagnosing issues, but should not be left that way. -# --web.enable-admin-api: set this to make the admin api available which allows database snaphot backups. Left off by default for security. +# --web.enable-admin-api: set this to make the admin api available which allows database snapshot backups. Left off by default for security. OPT="--config.file=/etc/prometheus/crunchy-prometheus.yml --storage.tsdb.path=/var/lib/ccp_monitoring/prometheus --storage.tsdb.retention.time=7d --log.level=info" diff --git a/release/RELEASE.md b/release/RELEASE.md index a664642b..79e561a4 100644 --- a/release/RELEASE.md +++ b/release/RELEASE.md @@ -15,7 +15,7 @@ Starting from the previous release being tagged and released to the access porta - Create new "Next" milestone - Review existing "Future" issues and assign to "This Release" as approved - During development, if issues NNN is bigger than we thought, or there isn't enough time then can move from "[This Release]" to "Next" (basically backlog which should take priority for the next release cycle) -3. Determine the next release version number, i.e. whether this will be a major or minor release based on semantic versioning policies. Note this can change if a suddenly larger issue becomes prority or gets put off until later. But that should be discouraged and the focus for the next release should be determind up front. +3. Determine the next release version number, i.e. whether this will be a major or minor release based on semantic versioning policies. Note this can change if a suddenly larger issue becomes priority or gets put off until later. But that should be discouraged and the focus for the next release should be determined up front. 4. Any holdover PRs with major changes from the previous release (for instance major refactors or pieces which cover the majority of the role which need maximal testing coverage during development) should be merged in. These should be marked with a "READY TO MERGE" tag so we can track. 5. Feature development process: - discussion about the feature if required @@ -78,7 +78,7 @@ gitGraph checkout development commit ``` -16. Build team builds final code and lock package versions +16. Build team builds final code and lock package versions 17. Preprod testing/validation 18. Prod rollout to access portal 19. Documentation release to access portal via the doc tag for this release. If needed, additional documentation tweaks can be done against the `v4_9-STABLE` branch and the doc tag can be moved (though see "Making changes to back branches", step 6 for guidance). @@ -100,5 +100,5 @@ If a bug fix in the code or documentation for a given release is required, you s 6. If the changes involve any documentation updates, you will need to: a. tag a new specific doc release revision; i.e., `d4.9.0-N`. This corresponds to an immutable "docs have been released" version and allows us to track. b. ensure that the docs changes are what are expected; `git diff d4.9.0-2 d4.9.0-3 -- hugo` (using the appropriate current release and previous release). If you notice something wrong at this point and you have not pushed this tag you can correct locally and re-tag with the same release version. If this tag has already been pushed, correct and increment the docs release number, repeating step 5. - c. push the new tag to upstream via `git push upstream d4.9.0-N` (substitute appropriate upstream remote name and tag here). + c. push the new tag to upstream via `git push upstream d4.9.0-N` (substitute appropriate upstream remote name and tag here). 7. Inform the build team of the availability of the new code and/or doc tags. diff --git a/sql_exporter/common/crunchy_bloat_check_collector.yml b/sql_exporter/common/crunchy_bloat_check_collector.yml index 08bdd94e..b2e9f5c5 100644 --- a/sql_exporter/common/crunchy_bloat_check_collector.yml +++ b/sql_exporter/common/crunchy_bloat_check_collector.yml @@ -1,12 +1,12 @@ collector_name: crunchy_bloat_check -metrics: +metrics: - metric_name: ccp_bloat_check_size_bytes type: gauge help: "Size of object in bytes" values: [size_bytes] key_labels: - - dbname + - dbname - schemaname - objectname query_ref: ccp_bloat_check @@ -16,7 +16,7 @@ metrics: help: "Total wasted space in bytes of given object" values: [total_wasted_space_bytes] key_labels: - - dbname + - dbname - schemaname - objectname query_ref: ccp_bloat_check @@ -32,5 +32,3 @@ queries: , size_bytes , (dead_tuple_size_bytes + (free_space_bytes - (relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::bigint ))::bigint AS total_wasted_space_bytes FROM bloat_stats - - diff --git a/sql_exporter/common/crunchy_global_collector.yml b/sql_exporter/common/crunchy_global_collector.yml index 47efc2bd..707f773d 100644 --- a/sql_exporter/common/crunchy_global_collector.yml +++ b/sql_exporter/common/crunchy_global_collector.yml @@ -133,7 +133,7 @@ metrics: query_ref: ccp_replication_lag - metric_name: ccp_replication_lag_replay_time type: gauge - help: "Length of time since the last transaction was replayed on replica. Returns zero if last WAL recieved equals last WAL replayed. Avoids false positives when primary stops writing. Monitors for replicas that cannot keep up with primary WAL generation." + help: "Length of time since the last transaction was replayed on replica. Returns zero if last WAL received equals last WAL replayed. Avoids false positives when primary stops writing. Monitors for replicas that cannot keep up with primary WAL generation." values: [replay_time] query_ref: ccp_replication_lag @@ -183,52 +183,52 @@ metrics: type: gauge help: "Number of scheduled checkpoints that have been performed" values: [checkpoints_timed] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_checkpoints_req type: gauge help: "Number of requested checkpoints that have been performed" values: [checkpoints_req] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_checkpoint_write_time type: gauge help: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" values: [checkpoint_write_time] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_checkpoint_sync_time type: gauge help: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" values: [checkpoint_sync_time] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_buffers_checkpoint type: gauge help: "Number of buffers written during checkpoints" values: [buffers_checkpoint] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_buffers_clean type: gauge help: "Number of buffers written by the background writer" values: [buffers_clean] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_maxwritten_clean type: gauge help: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" values: [maxwritten_clean] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_buffers_backend type: gauge help: "Number of buffers written directly by a backend" values: [buffers_backend] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_buffers_backend_fsync type: gauge help: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" values: [buffers_backend_fsync] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_bgwriter_buffers_alloc type: gauge help: "Number of buffers allocated" values: [buffers_alloc] - query_ref: ccp_stat_bgwriter + query_ref: ccp_stat_bgwriter - metric_name: ccp_stat_database_xact_commit type: gauge @@ -351,43 +351,35 @@ metrics: values: [total_size_bytes] query_ref: ccp_wal_activity -# - metric_name: +# - metric_name: # type: gauge -# help: +# help: # values: [] -# query_ref: +# query_ref: ########## QUERIES ########## queries: - query_name: ccp_archive_command_status query: | - SELECT CASE - WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 - WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 - ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) - END AS seconds_since_last_fail - , EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)) AS seconds_since_last_archive + SELECT seconds_since_last_fail + , seconds_since_last_archive , archived_count , failed_count - FROM pg_catalog.pg_stat_archiver + FROM pgmonitor_ext.ccp_archive_command_status - query_name: ccp_connection_stats query: | - SELECT ((total - idle) - idle_in_txn) AS active + SELECT active , total , idle , idle_in_txn - , (select coalesce(extract(epoch from (max(clock_timestamp() - state_change))),0) from pg_catalog.pg_stat_activity where state = 'idle in transaction') AS max_idle_in_txn_time - , (select coalesce(extract(epoch from (max(clock_timestamp() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' AND state NOT LIKE 'idle%' ) AS max_query_time - , (select coalesce(extract(epoch from (max(clock_timestamp() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) AS max_blocked_query_time + , max_idle_in_txn_time + , max_query_time + , max_blocked_query_time , max_connections - FROM ( - SELECT count(*) AS total - , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle - , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x - JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true) + FROM pgmonitor_ext.ccp_connection_stats - query_name: ccp_database_size @@ -399,8 +391,8 @@ queries: - query_name: ccp_is_in_recovery query: | - SELECT CASE WHEN pg_is_in_recovery = true THEN 1 ELSE 2 END AS status - FROM pg_is_in_recovery() + SELECT status + FROM pgmonitor_ext.ccp_pg_is_in_recovery - query_name: ccp_locks @@ -419,53 +411,48 @@ queries: - query_name: ccp_pg_settings_checksum query: | SELECT pgmonitor_ext.pg_settings_checksum() AS status - + - query_name: ccp_postgresql_version query: | - SELECT current_setting('server_version_num')::int AS current + SELECT current + FROM pgmonitor_ext.ccp_postgresql_version - query_name: ccp_postmaster_runtime query: | - SELECT extract('epoch' from pg_postmaster_start_time) as start_time_seconds from pg_catalog.pg_postmaster_start_time() + SELECT start_time_seconds + FROM pgmonitor_ext.ccp_postmaster_runtime - query_name: ccp_postmaster_uptime query: | - SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds + SELECT seconds + FROM pgmonitor_ext.ccp_postmaster_uptime - query_name: ccp_replication_lag query: | - SELECT - CASE - WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0 - ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER - END - AS replay_time - , CASE - WHEN pg_is_in_recovery() = false THEN 0 - ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER - END - AS received_time + SELECT replay_time + , received_time + FROM pgmonitor_ext.ccp_replication_lag - query_name: ccp_replication_lag_size query: | - SELECT client_addr AS replica - , client_hostname AS replica_hostname - , client_port AS replica_port - , pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes - FROM pg_catalog.pg_stat_replication + SELECT replica + , replica_hostname + , replica_port + , bytes + FROM pgmonitor_ext.ccp_replication_lag_size - query_name: ccp_replication_slots query: | SELECT slot_name - , active::int - , pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, restart_lsn) AS retained_bytes - FROM pg_catalog.pg_replication_slots + , active + , retained_bytes + FROM pgmonitor_ext.ccp_replication_slots - query_name: ccp_sequence_exhaustion @@ -475,7 +462,8 @@ queries: - query_name: ccp_settings_pending_restart query: | - SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true + SELECT count + FROM pgmonitor_ext.ccp_settings_pending_restart - query_name: ccp_stat_bgwriter @@ -495,50 +483,33 @@ queries: - query_name: ccp_stat_database query: | - SELECT d.datname AS dbname - , s.xact_commit - , s.xact_rollback - , s.blks_read - , s.blks_hit - , s.tup_returned - , s.tup_fetched - , s.tup_inserted - , s.tup_updated - , s.tup_deleted - , s.conflicts - , s.temp_files - , s.temp_bytes - , s.deadlocks - FROM pg_catalog.pg_stat_database s - JOIN pg_catalog.pg_database d ON d.datname = s.datname - WHERE d.datistemplate = false + SELECT dbname + , xact_commit + , xact_rollback + , blks_read + , blks_hit + , tup_returned + , tup_fetched + , tup_inserted + , tup_updated + , tup_deleted + , conflicts + , temp_files + , temp_bytes + , deadlocks + FROM pgmonitor_ext.ccp_stat_database - query_name: ccp_transaction_wraparound query: | - WITH max_age AS ( - SELECT 2000000000 as max_old_xid, setting AS autovacuum_freeze_max_age FROM pg_catalog.pg_settings WHERE name = 'autovacuum_freeze_max_age' - ) - , per_database_stats AS ( - SELECT datname - , m.max_old_xid::int - , m.autovacuum_freeze_max_age::int - , age(d.datfrozenxid) AS oldest_current_xid - FROM pg_catalog.pg_database d - JOIN max_age m ON (true) WHERE d.datallowconn - ) - SELECT max(oldest_current_xid) AS oldest_current_xid - , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound - , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac - FROM per_database_stats - + SELECT oldest_current_xid + , percent_towards_wraparound + , percent_towards_emergency_autovac + FROM pgmonitor_ext.ccp_transaction_wraparound + - query_name: ccp_wal_activity query: | SELECT last_5_min_size_bytes - , (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes - FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes - FROM pg_catalog.pg_ls_waldir() - WHERE modification > CURRENT_TIMESTAMP - '5 minutes'::interval) x - - + , total_size_bytes + FROM pgmonitor_ext.ccp_wal_activity diff --git a/sql_exporter/common/crunchy_per_db_collector.yml b/sql_exporter/common/crunchy_per_db_collector.yml index 17f79457..d4cc8568 100644 --- a/sql_exporter/common/crunchy_per_db_collector.yml +++ b/sql_exporter/common/crunchy_per_db_collector.yml @@ -92,6 +92,16 @@ metrics: - relname query_ref: ccp_stat_user_tables + - metric_name: ccp_stat_user_tables_n_tup_newpage_upd + type: gauge + help: "Number of rows updated where the successor version goes onto a new heap page, leaving behind an original version with a t_ctid field that points to a different heap page. These are always non-HOT updates." + values: [n_tup_newpage_upd] + key_labels: + - dbname + - schemaname + - relname + query_ref: ccp_stat_user_tables + - metric_name: ccp_stat_user_tables_n_live_tup type: gauge help: "Estimated number of live rows" @@ -206,6 +216,7 @@ queries: , n_tup_upd , n_tup_del , n_tup_hot_upd + , n_tup_newpage_upd , n_live_tup , n_dead_tup , vacuum_count diff --git a/sql_exporter/common/crunchy_pg_stat_statements_collector.yml b/sql_exporter/common/crunchy_pg_stat_statements_collector.yml index 4a5a4033..ca7e9099 100644 --- a/sql_exporter/common/crunchy_pg_stat_statements_collector.yml +++ b/sql_exporter/common/crunchy_pg_stat_statements_collector.yml @@ -141,7 +141,7 @@ queries: , sum(rows) AS row_count FROM pgmonitor_ext.pg_stat_statements_func() GROUP BY 1,2 - + - query_name: ccp_pg_stat_statements_top_max query: | SELECT role diff --git a/sql_exporter/common/crunchy_pg_stat_statements_reset_collector.yml b/sql_exporter/common/crunchy_pg_stat_statements_reset_collector.yml index ec998cfe..03f7e852 100644 --- a/sql_exporter/common/crunchy_pg_stat_statements_reset_collector.yml +++ b/sql_exporter/common/crunchy_pg_stat_statements_reset_collector.yml @@ -1,11 +1,10 @@ -collector_name: crunchy_pg_stat_statements_reset +collector_name: crunchy_pg_stat_statements_reset metrics: - metric_name: crunchy_pg_stat_statements_reset_time type: gauge - help: "Epoch time since pg_stat_statements was last reset" + help: "Epoch time since pg_stat_statements was last reset" values: [time] query: | SELECT time FROM pgmonitor_ext.ccp_pg_stat_statements_reset; - diff --git a/sql_exporter/common/crunchy_pgbouncer_121_collector.yml b/sql_exporter/common/crunchy_pgbouncer_121_collector.yml index 351ebaed..fdf0bb6b 100644 --- a/sql_exporter/common/crunchy_pgbouncer_121_collector.yml +++ b/sql_exporter/common/crunchy_pgbouncer_121_collector.yml @@ -197,4 +197,3 @@ queries: # no_prepared_statement: true # query: | # SHOW STATS; - diff --git a/sql_exporter/common/setup_db.sql b/sql_exporter/common/setup_db.sql index e8ecb681..1ad7f59e 100644 --- a/sql_exporter/common/setup_db.sql +++ b/sql_exporter/common/setup_db.sql @@ -10,7 +10,7 @@ BEGIN END IF; END $$; - + GRANT pg_monitor to ccp_monitoring; GRANT pg_execute_server_program TO ccp_monitoring; diff --git a/sql_exporter/common/sql_exporter.yml.example b/sql_exporter/common/sql_exporter.yml.example index ffad2289..97330159 100644 --- a/sql_exporter/common/sql_exporter.yml.example +++ b/sql_exporter/common/sql_exporter.yml.example @@ -41,4 +41,3 @@ collector_files: - "/etc/sql_exporter/crunchy_global_collector.yml" - "/etc/sql_exporter/crunchy_per_db_collector.yml" # - "/etc/sql_exporter/crunchy_pgbouncer_collector.yml" - diff --git a/sql_exporter/linux/crontab.txt b/sql_exporter/linux/crontab.txt index 804b7413..41dfbedc 100644 --- a/sql_exporter/linux/crontab.txt +++ b/sql_exporter/linux/crontab.txt @@ -6,4 +6,3 @@ # The below script must be run as a user with the pg_read_all_data role or a superuser. You may have to adjust your pg_hba.conf to allow this and/or use a .pgpass file # By default only scans and stores statistics for objects that are at least 1GB in size (--min_size). Setting --quiet twice, as shown, suppresses all output. # 08 03 * * 0 /usr/bin/pg_bloat_check.py --quiet --quiet -c "dbname=postgres user=ccp_monitoring" --min_size=1073741824 - diff --git a/sql_exporter/linux/sql_exporter.sysconfig b/sql_exporter/linux/sql_exporter.sysconfig index 9ec5a81a..05794f19 100644 --- a/sql_exporter/linux/sql_exporter.sysconfig +++ b/sql_exporter/linux/sql_exporter.sysconfig @@ -5,4 +5,3 @@ ### OPT=-config.file=/etc/sql_exporter/sql_exporter.yml -web.listen-address=0.0.0.0:9399 -log.level=info -web.enable-reload=true -