From c3b6b239a6e4f8497657bc29f05e40b8995341a0 Mon Sep 17 00:00:00 2001 From: Dj Walker-Morgan Date: Mon, 23 Oct 2023 16:27:23 +0100 Subject: [PATCH] Added timeouts Signed-off-by: Dj Walker-Morgan --- product_docs/docs/pgd/5/reference/index.json | 4 +- product_docs/docs/pgd/5/reference/index.mdx | 8 +- .../docs/pgd/5/reference/pgd-settings.mdx | 109 ++++++++++++++---- 3 files changed, 94 insertions(+), 27 deletions(-) diff --git a/product_docs/docs/pgd/5/reference/index.json b/product_docs/docs/pgd/5/reference/index.json index b846fc85cdc..b7c8120a5fa 100644 --- a/product_docs/docs/pgd/5/reference/index.json +++ b/product_docs/docs/pgd/5/reference/index.json @@ -148,8 +148,10 @@ "bdrglobal_keepalives_interval": "/pgd/latest/reference/pgd-settings#bdrglobal_keepalives_interval", "bdrglobal_keepalives_count": "/pgd/latest/reference/pgd-settings#bdrglobal_keepalives_count", "bdrglobal_tcp_user_timeout": "/pgd/latest/reference/pgd-settings#bdrglobal_tcp_user_timeout", - "bdrraft_keep_min_entries": "/pgd/latest/reference/pgd-settings#bdrraft_keep_min_entries", + "bdrraft_global_election_timeout": "/pgd/latest/reference/pgd-settings#bdrraft_global_election_timeout", + "bdrraft_local_election_timeout": "/pgd/latest/reference/pgd-settings#bdrraft_local_election_timeout", "bdrraft_response_timeout": "/pgd/latest/reference/pgd-settings#bdrraft_response_timeout", + "bdrraft_keep_min_entries": "/pgd/latest/reference/pgd-settings#bdrraft_keep_min_entries", "bdrraft_log_min_apply_duration": "/pgd/latest/reference/pgd-settings#bdrraft_log_min_apply_duration", "bdrraft_log_min_message_duration": "/pgd/latest/reference/pgd-settings#bdrraft_log_min_message_duration", "bdrraft_group_max_connections": "/pgd/latest/reference/pgd-settings#bdrraft_group_max_connections", diff --git a/product_docs/docs/pgd/5/reference/index.mdx b/product_docs/docs/pgd/5/reference/index.mdx index 8504dba6028..1b3ceed0870 100644 --- a/product_docs/docs/pgd/5/reference/index.mdx +++ b/product_docs/docs/pgd/5/reference/index.mdx @@ -206,12 +206,16 @@ The reference section is a definitive listing of all functions, views and comman * [`bdr.global_keepalives_interval`](pgd-settings#bdrglobal_keepalives_interval) * [`bdr.global_keepalives_count`](pgd-settings#bdrglobal_keepalives_count) * [`bdr.global_tcp_user_timeout`](pgd-settings#bdrglobal_tcp_user_timeout) -### [Internal settings](pgd-settings#internal-settings) - * [`bdr.raft_keep_min_entries`](pgd-settings#bdrraft_keep_min_entries) +### [Internal settings - Raft timeouts](pgd-settings#internal-settings---raft-timeouts) + * [`bdr.raft_global_election_timeout`](pgd-settings#bdrraft_global_election_timeout) + * [`bdr.raft_local_election_timeout`](pgd-settings#bdrraft_local_election_timeout) * [`bdr.raft_response_timeout`](pgd-settings#bdrraft_response_timeout) +### [Internal settings - Other Raft values](pgd-settings#internal-settings---other-raft-values) + * [`bdr.raft_keep_min_entries`](pgd-settings#bdrraft_keep_min_entries) * [`bdr.raft_log_min_apply_duration`](pgd-settings#bdrraft_log_min_apply_duration) * [`bdr.raft_log_min_message_duration`](pgd-settings#bdrraft_log_min_message_duration) * [`bdr.raft_group_max_connections`](pgd-settings#bdrraft_group_max_connections) +### [Internal settings - Other values](pgd-settings#internal-settings---other-values) * [`bdr.backwards_compatibility`](pgd-settings#bdrbackwards_compatibility) * [`bdr.track_replication_estimates`](pgd-settings#bdrtrack_replication_estimates) * [`bdr.lag_tracker_apply_rate_weight`](pgd-settings#bdrlag_tracker_apply_rate_weight) diff --git a/product_docs/docs/pgd/5/reference/pgd-settings.mdx b/product_docs/docs/pgd/5/reference/pgd-settings.mdx index 9d6a47eebf9..4fce1e40821 100644 --- a/product_docs/docs/pgd/5/reference/pgd-settings.mdx +++ b/product_docs/docs/pgd/5/reference/pgd-settings.mdx @@ -461,10 +461,11 @@ Enabling tracing on all nodes of a EDB Postgres Distributed cluster might help EDB Support to diagnose issues. You can set this only at Postgres server start. !!! Warning - Setting `bdr.debug_level` or `bdr.trace_level` to a value >= - `log_min_messages` can produce a very large volume of log output, so don't - enabled it long term in production unless plans are in place for log filtering, - archival, and rotation to prevent disk space exhaustion. +Setting `bdr.debug_level` or `bdr.trace_level` to a value >= +`log_min_messages` can produce a very large volume of log output, so don't +enabled it long term in production unless plans are in place for log filtering, +archival, and rotation to prevent disk space exhaustion. +!!! ### `bdr.track_subscription_apply` @@ -482,64 +483,128 @@ Track lock timing when tracking statistics for relations. ### `bdr.enable_wal_decoder` -Enables logical change record (LCR) sending on a single node with a [decoding worker](../nodes#decoding-worker). By default, this setting is false. When set to true, a decoding worker process starts, and WAL senders send the LCRs it produces. If set back to false, any WAL senders using LCR are restarted and use the WAL directly. +Enables logical change record (LCR) sending on a single node with a [decoding +worker](../nodes#decoding-worker). By default, this setting is false. When set +to true, a decoding worker process starts, and WAL senders send the LCRs it +produces. If set back to false, any WAL senders using LCR are restarted and use +the WAL directly. -!!! Note - You also need to enable this setting on all nodes in the PGD group and set the `enable_wal_decoder` option to true on the group. +!!! Note +You also need to enable this setting on all nodes in the PGD group and +set the `enable_wal_decoder` option to true on the group. +!!! ### `bdr.receive_lcr` -When subscribing to another node, this setting enables the node to request the use of logical change records (LCRs) for the subscription. By default, this setting is false. When this setting is true on a downstream node, the node requests that upstream nodes use LCRs when sending to it. If you set `bdr.enable_wal_decoder` to true on a node, also set this setting to `true`. +When subscribing to another node, this setting enables the node to request the +use of logical change records (LCRs) for the subscription. By default, this +setting is false. When this setting is true on a downstream node, the node +requests that upstream nodes use LCRs when sending to it. If you set +`bdr.enable_wal_decoder` to true on a node, also set this setting to `true`. !!! Note - You also need to enable this setting on all nodes in the PGD group and set the `enable_wal_decoder` option to true on the group. +You also need to enable this setting on all nodes in the PGD group and +set the `enable_wal_decoder` option to true on the group. !!! ### `bdr.lcr_cleanup_interval` -Logical change record (LCR) file cleanup interval. When the [decoding worker](../nodes#decoding-worker) is enabled, the decoding worker stores LCR files as a buffer. These files are periodically cleaned, and this setting controls the interval between any two consecutive cleanups. The default is 3 minutes. Setting it to zero disables cleanup. +Logical change record (LCR) file cleanup interval. When the [decoding +worker](../nodes#decoding-worker) is enabled, the decoding worker stores LCR +files as a buffer. These files are periodically cleaned, and this setting +controls the interval between any two consecutive cleanups. The default is 3 +minutes. Setting it to zero disables cleanup. ## Connectivity settings -The following are a set of connectivity settings affecting all cross-node `libpq` connections. The defaults are set to fairly conservative values and cover most production needs. All variables have `SIGHUP` context, meaning changes are applied upon reload. +The following are a set of connectivity settings affecting all cross-node +`libpq` connections. The defaults are set to fairly conservative values and +cover most production needs. All variables have `SIGHUP` context, meaning +changes are applied upon reload. ### `bdr.global_connection_timeout` -Maximum time to wait while connecting, in seconds. Write as a decimal integer, for example, 10. Zero, negative, or not specified means wait indefinitely. The minimum allowed timeout is 2 seconds, therefore a value of 1 is interpreted as 2. +Maximum time to wait while connecting, in seconds. Write as a decimal integer, +for example, 10. Zero, negative, or not specified means wait indefinitely. The +minimum allowed timeout is 2 seconds, therefore a value of 1 is interpreted as +2. The default is 15 seconds. ### `bdr.global_keepalives` -Controls whether TCP keepalives are used. The default value is 1, meaning on. If you don't want keepalives, you can change this to 0, meaning off. This parameter is ignored for connections made by a Unix-domain socket. +Controls whether TCP keepalives are used. The default value is 1, meaning on. If +you don't want keepalives, you can change this to 0, meaning off. This parameter +is ignored for connections made by a Unix-domain socket. The default is 1 (on). ### `bdr.global_keepalives_idle` -Controls the number of seconds of inactivity after which TCP sends a keepalive message to the server. A value of zero uses the system default. This parameter is ignored for connections made by a Unix-domain socket or if keepalives are disabled. It's supported only on systems where `TCP_KEEPIDLE` or an equivalent socket option is available. On other systems, it has no effect. +Controls the number of seconds of inactivity after which TCP sends a keepalive +message to the server. A value of zero uses the system default. This parameter +is ignored for connections made by a Unix-domain socket or if keepalives are +disabled. It's supported only on systems where `TCP_KEEPIDLE` or an equivalent +socket option is available. On other systems, it has no effect. The default is 1 second. ### `bdr.global_keepalives_interval` -Controls the number of seconds after which to retransmit a TCP keepalive message that isn't acknowledged by the server. A value of zero uses the system default. This parameter is ignored for connections made by a Unix-domain socket or if keepalives are disabled. It's supported only on systems where `TCP_KEEPINTVL` or an equivalent socket option is available. On other systems, it has no effect. +Controls the number of seconds after which to retransmit a TCP keepalive message +that isn't acknowledged by the server. A value of zero uses the system default. +This parameter is ignored for connections made by a Unix-domain socket or if +keepalives are disabled. It's supported only on systems where `TCP_KEEPINTVL` or +an equivalent socket option is available. On other systems, it has no effect. The default is 2 seconds. ### `bdr.global_keepalives_count` -Controls the number of TCP keepalives that can be lost before the client's connection to the server is considered dead. A value of zero uses the system default. This parameter is ignored for connections made by a Unix-domain socket or if keepalives are disabled. It's supported only on systems where `TCP_KEEPCNT` or an equivalent socket option is available. On other systems, it has no effect. +Controls the number of TCP keepalives that can be lost before the client's +connection to the server is considered dead. A value of zero uses the system +default. This parameter is ignored for connections made by a Unix-domain socket +or if keepalives are disabled. It's supported only on systems where +`TCP_KEEPCNT` or an equivalent socket option is available. On other systems, it +has no effect. The default is 3. ### `bdr.global_tcp_user_timeout` -Controls the number of milliseconds that transmitted data can remain unacknowledged before a connection is forcibly closed. A value of zero uses the system default. This parameter is ignored for connections made by a Unix-domain socket. It's supported only on systems where `TCP_USER_TIMEOUT` is available. On other systems, it has no effect. +Controls the number of milliseconds that transmitted data can remain +unacknowledged before a connection is forcibly closed. A value of zero uses the +system default. This parameter is ignored for connections made by a Unix-domain +socket. It's supported only on systems where `TCP_USER_TIMEOUT` is available. On +other systems, it has no effect. The default is 5 seconds (expressed in milliseconds, 5000). -## Internal settings +## Internal settings - Raft timeouts + +### `bdr.raft_global_election_timeout` + +To account for network failures, the Raft consensus protocol implements timeouts +for elections and requests. This value is used when a request is +being sent to the global (top-level) group. It defaults to 6 seconds (6s). + +### `bdr.raft_local_election_timeout` + +To account for network failures, the Raft consensus protocol implements timeouts +for elections and requests. This value is used when a request is +being sent to the sub-group. It defaults to 3 seconds (3s). + +### `bdr.raft_response_timeout` + +For responses, the settings of +[`bdr.raft_global_election_timeout`](#bdrraft_global_election_timeout) and +[`bdr.raft_local_election_timeout`](#bdrraft_local_election_timeout) are used +as appropriate. This behavior can be overriden by setting +this variable. The setting of `bdr.raft_response_timeout` should be less than +either of the election timeout values. Set this variable to -1 and the override +is disabled. It defaults to -1. + +## Internal settings - Other Raft values ### `bdr.raft_keep_min_entries` @@ -551,12 +616,6 @@ Postgres server start. If log compaction is disabled, the log grows in size forever. !!! -### `bdr.raft_response_timeout` - -To account for network failures, the Raft consensus protocol implemented times -out requests after a certain amount of time. This timeout defaults to 30 -seconds. - ### `bdr.raft_log_min_apply_duration` To move the state machine forward, Raft appends entries to its internal log. @@ -578,6 +637,8 @@ These connections carry bdr consensus requests between the groups' nodes. Default value of this parameter is 100 connections. You can set it only at Postgres server start. +## Internal settings - Other values + ### `bdr.backwards_compatibility` Specifies the version to be backward compatible to, in the same numerical format