diff --git a/.github/workflows/install_deps.py b/.github/workflows/install_deps.py index 6480f66a3..b089bde07 100644 --- a/.github/workflows/install_deps.py +++ b/.github/workflows/install_deps.py @@ -110,7 +110,7 @@ def install_etcd(): def install_postgres(): - version = os.environ.get('PGVERSION', '15.1-1') + version = os.environ.get('PGVERSION', '16.1-1') platform = {'darwin': 'osx', 'win32': 'windows-x64', 'cygwin': 'windows-x64'}[sys.platform] if platform == 'osx': return subprocess.call(['brew', 'install', 'expect', 'postgresql@{0}'.format(version.split('.')[0])]) diff --git a/.github/workflows/mapping.py b/.github/workflows/mapping.py index f75efec43..279438b04 100644 --- a/.github/workflows/mapping.py +++ b/.github/workflows/mapping.py @@ -1 +1 @@ -versions = {'etcd': '9.6', 'etcd3': '16', 'consul': '13', 'exhibitor': '12', 'raft': '11', 'kubernetes': '15'} +versions = {'etcd': '9.6', 'etcd3': '16', 'consul': '13', 'exhibitor': '12', 'raft': '14', 'kubernetes': '15'} diff --git a/.github/workflows/run_tests.py b/.github/workflows/run_tests.py index 9a078e4f7..cece186f4 100644 --- a/.github/workflows/run_tests.py +++ b/.github/workflows/run_tests.py @@ -30,7 +30,7 @@ def main(): unbuffer = ['timeout', '900', 'unbuffer'] else: if sys.platform == 'darwin': - version = os.environ.get('PGVERSION', '15.1-1') + version = os.environ.get('PGVERSION', '16.1-1') path = '/usr/local/opt/postgresql@{0}/bin:.'.format(version.split('.')[0]) unbuffer = ['unbuffer'] else: diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 81119f258..f1f55e2a5 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -85,7 +85,7 @@ jobs: env: DCS: ${{ matrix.dcs }} ETCDVERSION: 3.4.23 - PGVERSION: 15.1-1 # for windows and macos + PGVERSION: 16.1-1 # for windows and macos strategy: fail-fast: false matrix: @@ -174,7 +174,7 @@ jobs: - uses: jakebailey/pyright-action@v1 with: - version: 1.1.333 + version: 1.1.338 docs: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index c902c6eb3..07e227ee3 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ lib64 pip-log.txt # Unit test / coverage reports -.coverage +.coverage* .tox nosetests.xml coverage.xml @@ -35,6 +35,7 @@ htmlcov junit.xml features/output* dummy +result.json # Translations *.mo diff --git a/Dockerfile b/Dockerfile index c5b927ee6..d4fddfea5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ## This Dockerfile is meant to aid in the building and debugging patroni whilst developing on your local machine ## It has all the necessary components to play/debug with a single node appliance, running etcd -ARG PG_MAJOR=15 +ARG PG_MAJOR=16 ARG COMPRESS=false ARG PGHOME=/home/postgres ARG PGDATA=$PGHOME/data @@ -94,9 +94,9 @@ RUN set -ex \ /usr/share/locale/??_?? \ /usr/share/postgresql/*/man \ /usr/share/postgresql-common/pg_wrapper \ - /usr/share/vim/vim80/doc \ - /usr/share/vim/vim80/lang \ - /usr/share/vim/vim80/tutor \ + /usr/share/vim/vim*/doc \ + /usr/share/vim/vim*/lang \ + /usr/share/vim/vim*/tutor \ # /var/lib/dpkg/info/* \ && find /usr/bin -xtype l -delete \ && find /var/log -type f -exec truncate --size 0 {} \; \ @@ -143,6 +143,7 @@ ARG PGBIN=/usr/lib/postgresql/$PG_MAJOR/bin ENV LC_ALL=$LC_ALL LANG=$LANG EDITOR=/usr/bin/editor ENV PGDATA=$PGDATA PATH=$PATH:$PGBIN +ENV ETCDCTL_API=3 COPY patroni /patroni/ COPY extras/confd/conf.d/haproxy.toml /etc/confd/conf.d/ diff --git a/Dockerfile.citus b/Dockerfile.citus index 7e6ec18c0..6f02215b3 100644 --- a/Dockerfile.citus +++ b/Dockerfile.citus @@ -1,6 +1,6 @@ ## This Dockerfile is meant to aid in the building and debugging patroni whilst developing on your local machine ## It has all the necessary components to play/debug with a single node appliance, running etcd -ARG PG_MAJOR=15 +ARG PG_MAJOR=16 ARG COMPRESS=false ARG PGHOME=/home/postgres ARG PGDATA=$PGHOME/data @@ -40,7 +40,7 @@ RUN set -ex \ echo "deb [signed-by=/etc/apt/trusted.gpg.d/citusdata_community.gpg] https://packagecloud.io/citusdata/community/debian/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/citusdata_community.list \ && curl -sL https://packagecloud.io/citusdata/community/gpgkey | gpg --dearmor > /etc/apt/trusted.gpg.d/citusdata_community.gpg \ && apt-get update -y \ - && apt-get -y install postgresql-$PG_MAJOR-citus-11.3; \ + && apt-get -y install postgresql-$PG_MAJOR-citus-12.1; \ fi \ \ # Cleanup all locales but en_US.UTF-8 @@ -113,9 +113,9 @@ RUN set -ex \ /usr/share/locale/??_?? \ /usr/share/postgresql/*/man \ /usr/share/postgresql-common/pg_wrapper \ - /usr/share/vim/vim80/doc \ - /usr/share/vim/vim80/lang \ - /usr/share/vim/vim80/tutor \ + /usr/share/vim/vim*/doc \ + /usr/share/vim/vim*/lang \ + /usr/share/vim/vim*/tutor \ # /var/lib/dpkg/info/* \ && find /usr/bin -xtype l -delete \ && find /var/log -type f -exec truncate --size 0 {} \; \ @@ -164,6 +164,7 @@ ARG PGBIN=/usr/lib/postgresql/$PG_MAJOR/bin ENV LC_ALL=$LC_ALL LANG=$LANG EDITOR=/usr/bin/editor ENV PGDATA=$PGDATA PATH=$PATH:$PGBIN +ENV ETCDCTL_API=3 COPY patroni /patroni/ COPY extras/confd/conf.d/haproxy.toml /etc/confd/conf.d/ diff --git a/README.rst b/README.rst index c2fc165ed..adade6b4a 100644 --- a/README.rst +++ b/README.rst @@ -151,7 +151,7 @@ run: YAML Configuration ================== -Go `here `__ for comprehensive information about settings for etcd, consul, and ZooKeeper. And for an example, see `postgres0.yml `__. +Go `here `__ for comprehensive information about settings for etcd, consul, and ZooKeeper. And for an example, see `postgres0.yml `__. ========================= Environment Configuration diff --git a/docker-compose-citus.yml b/docker-compose-citus.yml index 7ff2a2c5c..da71c50a3 100644 --- a/docker-compose-citus.yml +++ b/docker-compose-citus.yml @@ -19,7 +19,6 @@ services: image: ${PATRONI_TEST_IMAGE:-patroni-citus} networks: [ demo ] environment: - ETCDCTL_API: 3 ETCD_LISTEN_PEER_URLS: http://0.0.0.0:2380 ETCD_LISTEN_CLIENT_URLS: http://0.0.0.0:2379 ETCD_INITIAL_CLUSTER: etcd1=http://etcd1:2380,etcd2=http://etcd2:2380,etcd3=http://etcd3:2380 @@ -28,19 +27,19 @@ services: ETCD_UNSUPPORTED_ARCH: arm64 container_name: demo-etcd1 hostname: etcd1 - command: etcd -name etcd1 -initial-advertise-peer-urls http://etcd1:2380 + command: etcd --name etcd1 --initial-advertise-peer-urls http://etcd1:2380 etcd2: <<: *etcd container_name: demo-etcd2 hostname: etcd2 - command: etcd -name etcd2 -initial-advertise-peer-urls http://etcd2:2380 + command: etcd --name etcd2 --initial-advertise-peer-urls http://etcd2:2380 etcd3: <<: *etcd container_name: demo-etcd3 hostname: etcd3 - command: etcd -name etcd3 -initial-advertise-peer-urls http://etcd3:2380 + command: etcd --name etcd3 --initial-advertise-peer-urls http://etcd3:2380 haproxy: image: ${PATRONI_TEST_IMAGE:-patroni-citus} @@ -53,7 +52,6 @@ services: - "5001:5001" # Load-balancing across workers primaries command: haproxy environment: &haproxy_env - ETCDCTL_API: 3 ETCDCTL_ENDPOINTS: http://etcd1:2379,http://etcd2:2379,http://etcd3:2379 PATRONI_ETCD3_HOSTS: "'etcd1:2379','etcd2:2379','etcd3:2379'" PATRONI_SCOPE: demo diff --git a/docker-compose.yml b/docker-compose.yml index 996c2c829..6b7d7a929 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,19 +25,19 @@ services: ETCD_UNSUPPORTED_ARCH: arm64 container_name: demo-etcd1 hostname: etcd1 - command: etcd -name etcd1 -initial-advertise-peer-urls http://etcd1:2380 + command: etcd --name etcd1 --initial-advertise-peer-urls http://etcd1:2380 etcd2: <<: *etcd container_name: demo-etcd2 hostname: etcd2 - command: etcd -name etcd2 -initial-advertise-peer-urls http://etcd2:2380 + command: etcd --name etcd2 --initial-advertise-peer-urls http://etcd2:2380 etcd3: <<: *etcd container_name: demo-etcd3 hostname: etcd3 - command: etcd -name etcd3 -initial-advertise-peer-urls http://etcd3:2380 + command: etcd --name etcd3 --initial-advertise-peer-urls http://etcd3:2380 haproxy: image: ${PATRONI_TEST_IMAGE:-patroni} diff --git a/docker/README.md b/docker/README.md index da87842f5..0b7f3d584 100644 --- a/docker/README.md +++ b/docker/README.md @@ -19,102 +19,97 @@ The haproxy listens on ports 5000 (connects to the primary) and 5001 (does load- Example session: - $ docker-compose up -d - Creating demo-haproxy ... - Creating demo-patroni2 ... - Creating demo-patroni1 ... - Creating demo-patroni3 ... - Creating demo-etcd2 ... - Creating demo-etcd1 ... - Creating demo-etcd3 ... - Creating demo-haproxy - Creating demo-patroni2 - Creating demo-patroni1 - Creating demo-patroni3 - Creating demo-etcd1 - Creating demo-etcd2 - Creating demo-etcd2 ... done + $ docker compose up -d + ✔ Network patroni_demo Created + ✔ Container demo-etcd1 Started + ✔ Container demo-haproxy Started + ✔ Container demo-patroni1 Started + ✔ Container demo-patroni2 Started + ✔ Container demo-patroni3 Started + ✔ Container demo-etcd2 Started + ✔ Container demo-etcd3 Started $ docker ps - CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES - 5b7a90b4cfbf patroni "/bin/sh /entrypoint…" 29 seconds ago Up 27 seconds demo-etcd2 - e30eea5222f2 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 27 seconds demo-etcd1 - 83bcf3cb208f patroni "/bin/sh /entrypoint…" 29 seconds ago Up 27 seconds demo-etcd3 - 922532c56e7d patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds demo-patroni3 - 14f875e445f3 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds demo-patroni2 - 110d1073b383 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds demo-patroni1 - 5af5e6e36028 patroni "/bin/sh /entrypoint…" 29 seconds ago Up 28 seconds 0.0.0.0:5000-5001->5000-5001/tcp demo-haproxy + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + a37bcec56726 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-etcd3 + 034ab73868a8 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-patroni2 + 03837736f710 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-patroni3 + 22815c3d85b3 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-etcd2 + 814b4304d132 patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes 0.0.0.0:5000-5001->5000-5001/tcp, :::5000-5001->5000-5001/tcp demo-haproxy + 6375b0ba2d0a patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-patroni1 + aef8bf3ee91f patroni "/bin/sh /entrypoint…" 15 minutes ago Up 15 minutes demo-etcd1 $ docker logs demo-patroni1 - 2019-02-20 08:19:32,714 INFO: Failed to import patroni.dcs.consul - 2019-02-20 08:19:32,737 INFO: Selected new etcd server http://etcd3:2379 - 2019-02-20 08:19:35,140 INFO: Lock owner: None; I am patroni1 - 2019-02-20 08:19:35,174 INFO: trying to bootstrap a new cluster + 2023-11-21 09:04:33,547 INFO: Selected new etcd server http://172.29.0.3:2379 + 2023-11-21 09:04:33,605 INFO: Lock owner: None; I am patroni1 + 2023-11-21 09:04:33,693 INFO: trying to bootstrap a new cluster ... - 2019-02-20 08:19:39,310 INFO: postmaster pid=37 - 2019-02-20 08:19:39.314 UTC [37] LOG: listening on IPv4 address "0.0.0.0", port 5432 - 2019-02-20 08:19:39.321 UTC [37] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" - 2019-02-20 08:19:39.353 UTC [39] LOG: database system was shut down at 2019-02-20 08:19:36 UTC - 2019-02-20 08:19:39.354 UTC [40] FATAL: the database system is starting up - localhost:5432 - rejecting connections - 2019-02-20 08:19:39.369 UTC [37] LOG: database system is ready to accept connections + 2023-11-21 09:04:34.920 UTC [43] LOG: starting PostgreSQL 15.5 (Debian 15.5-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit + 2023-11-21 09:04:34.921 UTC [43] LOG: listening on IPv4 address "0.0.0.0", port 5432 + 2023-11-21 09:04:34,922 INFO: postmaster pid=43 + 2023-11-21 09:04:34.922 UTC [43] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" + 2023-11-21 09:04:34.925 UTC [47] LOG: database system was shut down at 2023-11-21 09:04:34 UTC + 2023-11-21 09:04:34.928 UTC [43] LOG: database system is ready to accept connections localhost:5432 - accepting connections - 2019-02-20 08:19:39,383 INFO: establishing a new patroni connection to the postgres cluster - 2019-02-20 08:19:39,408 INFO: running post_bootstrap - 2019-02-20 08:19:39,432 WARNING: Could not activate Linux watchdog device: "Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog'" - 2019-02-20 08:19:39,515 INFO: initialized a new cluster - 2019-02-20 08:19:49,424 INFO: Lock owner: patroni1; I am patroni1 - 2019-02-20 08:19:49,447 INFO: Lock owner: patroni1; I am patroni1 - 2019-02-20 08:19:49,480 INFO: no action. i am the leader with the lock - 2019-02-20 08:19:59,422 INFO: Lock owner: patroni1; I am patroni1 + localhost:5432 - accepting connections + 2023-11-21 09:04:34,938 INFO: establishing a new patroni heartbeat connection to postgres + 2023-11-21 09:04:34,992 INFO: running post_bootstrap + 2023-11-21 09:04:35,004 WARNING: User creation via "bootstrap.users" will be removed in v4.0.0 + 2023-11-21 09:04:35,009 WARNING: Could not activate Linux watchdog device: Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog' + 2023-11-21 09:04:35,189 INFO: initialized a new cluster + 2023-11-21 09:04:35,328 INFO: no action. I am (patroni1), the leader with the lock + 2023-11-21 09:04:43,824 INFO: establishing a new patroni restapi connection to postgres + 2023-11-21 09:04:45,322 INFO: no action. I am (patroni1), the leader with the lock + 2023-11-21 09:04:55,320 INFO: no action. I am (patroni1), the leader with the lock + ... $ docker exec -ti demo-patroni1 bash postgres@patroni1:~$ patronictl list - +---------+----------+------------+--------+---------+----+-----------+ - | Cluster | Member | Host | Role | State | TL | Lag in MB | - +---------+----------+------------+--------+---------+----+-----------+ - | demo | patroni1 | 172.22.0.3 | Leader | running | 1 | 0 | - | demo | patroni2 | 172.22.0.7 | | running | 1 | 0 | - | demo | patroni3 | 172.22.0.4 | | running | 1 | 0 | - +---------+----------+------------+--------+---------+----+-----------+ + + Cluster: demo (7303838734793224214) --------+----+-----------+ + | Member | Host | Role | State | TL | Lag in MB | + +----------+------------+---------+-----------+----+-----------+ + | patroni1 | 172.29.0.2 | Leader | running | 1 | | + | patroni2 | 172.29.0.6 | Replica | streaming | 1 | 0 | + | patroni3 | 172.29.0.5 | Replica | streaming | 1 | 0 | + +----------+------------+---------+-----------+----+-----------+ postgres@patroni1:~$ etcdctl get --keys-only --prefix /service/demo /service/demo/config /service/demo/initialize /service/demo/leader - /service/demo/members/ /service/demo/members/patroni1 /service/demo/members/patroni2 /service/demo/members/patroni3 - /service/demo/optime/ - /service/demo/optime/leader + /service/demo/status postgres@patroni1:~$ etcdctl member list - 1bab629f01fa9065: name=etcd3 peerURLs=http://etcd3:2380 clientURLs=http://etcd3:2379 isLeader=false - 8ecb6af518d241cc: name=etcd2 peerURLs=http://etcd2:2380 clientURLs=http://etcd2:2379 isLeader=true - b2e169fcb8a34028: name=etcd1 peerURLs=http://etcd1:2380 clientURLs=http://etcd1:2379 isLeader=false + 2bf3e2ceda5d5960, started, etcd2, http://etcd2:2380, http://172.29.0.3:2379 + 55b3264e129c7005, started, etcd3, http://etcd3:2380, http://172.29.0.7:2379 + acce7233f8ec127e, started, etcd1, http://etcd1:2380, http://172.29.0.8:2379 + + postgres@patroni1:~$ exit $ docker exec -ti demo-haproxy bash postgres@haproxy:~$ psql -h localhost -p 5000 -U postgres -W Password: postgres - psql (11.2 (Ubuntu 11.2-1.pgdg18.04+1), server 10.7 (Debian 10.7-1.pgdg90+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) Type "help" for help. - localhost/postgres=# select pg_is_in_recovery(); + postgres=# SELECT pg_is_in_recovery(); pg_is_in_recovery ─────────────────── f (1 row) - localhost/postgres=# \q + postgres=# \q - $postgres@haproxy:~ psql -h localhost -p 5001 -U postgres -W + postgres@haproxy:~$ psql -h localhost -p 5001 -U postgres -W Password: postgres - psql (11.2 (Ubuntu 11.2-1.pgdg18.04+1), server 10.7 (Debian 10.7-1.pgdg90+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) Type "help" for help. - localhost/postgres=# select pg_is_in_recovery(); + postgres=# SELECT pg_is_in_recovery(); pg_is_in_recovery ─────────────────── t @@ -127,81 +122,86 @@ The haproxy listens on ports 5000 (connects to the coordinator primary) and 5001 Example session: - $ docker-compose -f docker-compose-citus.yml up -d - Creating demo-work2-1 ... done - Creating demo-work1-1 ... done - Creating demo-etcd2 ... done - Creating demo-etcd1 ... done - Creating demo-coord3 ... done - Creating demo-etcd3 ... done - Creating demo-coord1 ... done - Creating demo-haproxy ... done - Creating demo-work2-2 ... done - Creating demo-coord2 ... done - Creating demo-work1-2 ... done + $ docker compose -f docker-compose-citus.yml up -d + ✔ Network patroni_demo Created + ✔ Container demo-coord2 Started + ✔ Container demo-work2-2 Started + ✔ Container demo-etcd1 Started + ✔ Container demo-haproxy Started + ✔ Container demo-work1-1 Started + ✔ Container demo-work2-1 Started + ✔ Container demo-work1-2 Started + ✔ Container demo-coord1 Started + ✔ Container demo-etcd3 Started + ✔ Container demo-coord3 Started + ✔ Container demo-etcd2 Started + $ docker ps - CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES - 852d8885a612 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-coord3 - cdd692f947ab patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-work1-2 - 9f4e340b36da patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-etcd3 - d69c129a960a patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-etcd1 - c5849689b8cd patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-coord1 - c9d72bd6217d patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-work2-1 - 24b1b43efa05 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-coord2 - cb0cc2b4ca0a patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 3 seconds demo-work2-2 - 9796c6b8aad5 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 5 seconds demo-work1-1 - 8baccd74dcae patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds demo-etcd2 - 353ec62a0187 patroni-citus "/bin/sh /entrypoint…" 6 seconds ago Up 4 seconds 0.0.0.0:5000-5001->5000-5001/tcp demo-haproxy + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + 79c95492fac9 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-etcd3 + 77eb82d0f0c1 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work2-1 + 03dacd7267ef patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-etcd1 + db9206c66f85 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-etcd2 + 9a0fef7b7dd4 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work1-2 + f06b031d99dc patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work2-2 + f7c58545f314 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-coord2 + 383f9e7e188a patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-work1-1 + f02e96dcc9d6 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-coord3 + 6945834b7056 patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes demo-coord1 + b96ca42f785d patroni-citus "/bin/sh /entrypoint…" 11 minutes ago Up 11 minutes 0.0.0.0:5000-5001->5000-5001/tcp, :::5000-5001->5000-5001/tcp demo-haproxy + $ docker logs demo-coord1 - 2023-01-05 15:09:31,295 INFO: Selected new etcd server http://172.27.0.4:2379 - 2023-01-05 15:09:31,388 INFO: Lock owner: None; I am coord1 - 2023-01-05 15:09:31,501 INFO: trying to bootstrap a new cluster + 2023-11-21 09:36:14,293 INFO: Selected new etcd server http://172.30.0.4:2379 + 2023-11-21 09:36:14,390 INFO: Lock owner: None; I am coord1 + 2023-11-21 09:36:14,478 INFO: trying to bootstrap a new cluster ... - 2023-01-05 15:09:45,096 INFO: postmaster pid=39 + 2023-11-21 09:36:16,475 INFO: postmaster pid=52 localhost:5432 - no response - 2023-01-05 15:09:45.137 UTC [39] LOG: starting PostgreSQL 15.1 (Debian 15.1-1.pgdg110+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit - 2023-01-05 15:09:45.137 UTC [39] LOG: listening on IPv4 address "0.0.0.0", port 5432 - 2023-01-05 15:09:45.152 UTC [39] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" - 2023-01-05 15:09:45.177 UTC [43] LOG: database system was shut down at 2023-01-05 15:09:32 UTC - 2023-01-05 15:09:45.193 UTC [39] LOG: database system is ready to accept connections + 2023-11-21 09:36:16.495 UTC [52] LOG: starting PostgreSQL 15.5 (Debian 15.5-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit + 2023-11-21 09:36:16.495 UTC [52] LOG: listening on IPv4 address "0.0.0.0", port 5432 + 2023-11-21 09:36:16.496 UTC [52] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" + 2023-11-21 09:36:16.498 UTC [56] LOG: database system was shut down at 2023-11-21 09:36:15 UTC + 2023-11-21 09:36:16.501 UTC [52] LOG: database system is ready to accept connections localhost:5432 - accepting connections localhost:5432 - accepting connections - 2023-01-05 15:09:46,139 INFO: establishing a new patroni connection to the postgres cluster - 2023-01-05 15:09:46,208 INFO: running post_bootstrap - 2023-01-05 15:09:47.209 UTC [55] LOG: starting maintenance daemon on database 16386 user 10 - 2023-01-05 15:09:47.209 UTC [55] CONTEXT: Citus maintenance daemon for database 16386 user 10 - 2023-01-05 15:09:47,215 WARNING: Could not activate Linux watchdog device: "Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog'" - 2023-01-05 15:09:47.446 UTC [41] LOG: checkpoint starting: immediate force wait - 2023-01-05 15:09:47,466 INFO: initialized a new cluster - 2023-01-05 15:09:47,594 DEBUG: query(SELECT nodeid, groupid, nodename, nodeport, noderole FROM pg_catalog.pg_dist_node WHERE noderole = 'primary', ()) - 2023-01-05 15:09:47,594 INFO: establishing a new patroni connection to the postgres cluster - 2023-01-05 15:09:47,467 INFO: Lock owner: coord1; I am coord1 - 2023-01-05 15:09:47,613 DEBUG: query(SELECT pg_catalog.citus_set_coordinator_host(%s, %s, 'primary', 'default'), ('172.27.0.6', 5432)) - 2023-01-05 15:09:47,924 INFO: no action. I am (coord1), the leader with the lock - 2023-01-05 15:09:51.282 UTC [41] LOG: checkpoint complete: wrote 1086 buffers (53.0%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.029 s, sync=3.746 s, total=3.837 s; sync files=280, longest=0.028 s, average=0.014 s; distance=8965 kB, estimate=8965 kB - 2023-01-05 15:09:51.283 UTC [41] LOG: checkpoint starting: immediate force wait - 2023-01-05 15:09:51.495 UTC [41] LOG: checkpoint complete: wrote 18 buffers (0.9%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.044 s, sync=0.091 s, total=0.212 s; sync files=15, longest=0.015 s, average=0.007 s; distance=67 kB, estimate=8076 kB - 2023-01-05 15:09:57,467 INFO: Lock owner: coord1; I am coord1 - 2023-01-05 15:09:57,569 INFO: Assigning synchronous standby status to ['coord3'] + 2023-11-21 09:36:17,509 INFO: establishing a new patroni heartbeat connection to postgres + 2023-11-21 09:36:17,569 INFO: running post_bootstrap + 2023-11-21 09:36:17,593 WARNING: User creation via "bootstrap.users" will be removed in v4.0.0 + 2023-11-21 09:36:17,783 INFO: establishing a new patroni restapi connection to postgres + 2023-11-21 09:36:17,969 WARNING: Could not activate Linux watchdog device: Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog' + 2023-11-21 09:36:17.969 UTC [70] LOG: starting maintenance daemon on database 16386 user 10 + 2023-11-21 09:36:17.969 UTC [70] CONTEXT: Citus maintenance daemon for database 16386 user 10 + 2023-11-21 09:36:18.159 UTC [54] LOG: checkpoint starting: immediate force wait + 2023-11-21 09:36:18,162 INFO: initialized a new cluster + 2023-11-21 09:36:18,164 INFO: Lock owner: coord1; I am coord1 + 2023-11-21 09:36:18,297 INFO: Enabled synchronous replication + 2023-11-21 09:36:18,298 DEBUG: Adding the new task: PgDistNode(nodeid=None,group=0,host=172.30.0.3,port=5432,event=after_promote) + 2023-11-21 09:36:18,298 DEBUG: Adding the new task: PgDistNode(nodeid=None,group=1,host=172.30.0.7,port=5432,event=after_promote) + 2023-11-21 09:36:18,298 DEBUG: Adding the new task: PgDistNode(nodeid=None,group=2,host=172.30.0.8,port=5432,event=after_promote) + 2023-11-21 09:36:18,299 DEBUG: query(SELECT nodeid, groupid, nodename, nodeport, noderole FROM pg_catalog.pg_dist_node WHERE noderole = 'primary', ()) + 2023-11-21 09:36:18,299 INFO: establishing a new patroni citus connection to postgres + 2023-11-21 09:36:18,323 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.30.0.7', 5432, 1)) + 2023-11-21 09:36:18,361 INFO: no action. I am (coord1), the leader with the lock + 2023-11-21 09:36:18,393 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.30.0.8', 5432, 2)) + 2023-11-21 09:36:28,164 INFO: Lock owner: coord1; I am coord1 + 2023-11-21 09:36:28,251 INFO: Assigning synchronous standby status to ['coord3'] server signaled - 2023-01-05 15:09:57.574 UTC [39] LOG: received SIGHUP, reloading configuration files - 2023-01-05 15:09:57.580 UTC [39] LOG: parameter "synchronous_standby_names" changed to "coord3" - 2023-01-05 15:09:59,637 INFO: Synchronous standby status assigned to ['coord3'] - 2023-01-05 15:09:59,638 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.27.0.2', 5432, 1)) - 2023-01-05 15:09:59.690 UTC [67] LOG: standby "coord3" is now a synchronous standby with priority 1 - 2023-01-05 15:09:59.690 UTC [67] STATEMENT: START_REPLICATION SLOT "coord3" 0/3000000 TIMELINE 1 - 2023-01-05 15:09:59,694 INFO: no action. I am (coord1), the leader with the lock - 2023-01-05 15:09:59,704 DEBUG: query(SELECT pg_catalog.citus_add_node(%s, %s, %s, 'primary', 'default'), ('172.27.0.8', 5432, 2)) - 2023-01-05 15:10:07,625 INFO: no action. I am (coord1), the leader with the lock - 2023-01-05 15:10:17,579 INFO: no action. I am (coord1), the leader with the lock + 2023-11-21 09:36:28.435 UTC [52] LOG: received SIGHUP, reloading configuration files + 2023-11-21 09:36:28.436 UTC [52] LOG: parameter "synchronous_standby_names" changed to "coord3" + 2023-11-21 09:36:28.641 UTC [83] LOG: standby "coord3" is now a synchronous standby with priority 1 + 2023-11-21 09:36:28.641 UTC [83] STATEMENT: START_REPLICATION SLOT "coord3" 0/3000000 TIMELINE 1 + 2023-11-21 09:36:30,582 INFO: Synchronous standby status assigned to ['coord3'] + 2023-11-21 09:36:30,626 INFO: no action. I am (coord1), the leader with the lock + 2023-11-21 09:36:38,250 INFO: no action. I am (coord1), the leader with the lock + ... $ docker exec -ti demo-haproxy bash postgres@haproxy:~$ etcdctl member list - 1bab629f01fa9065, started, etcd3, http://etcd3:2380, http://172.27.0.10:2379 - 8ecb6af518d241cc, started, etcd2, http://etcd2:2380, http://172.27.0.4:2379 - b2e169fcb8a34028, started, etcd1, http://etcd1:2380, http://172.27.0.7:2379 + 2b28411e74c0c281, started, etcd3, http://etcd3:2380, http://172.30.0.4:2379 + 6c70137d27cfa6c1, started, etcd2, http://etcd2:2380, http://172.30.0.5:2379 + a28f9a70ebf21304, started, etcd1, http://etcd1:2380, http://172.30.0.6:2379 postgres@haproxy:~$ etcdctl get --keys-only --prefix /service/demo /service/demo/0/config @@ -229,7 +229,7 @@ Example session: postgres@haproxy:~$ psql -h localhost -p 5000 -U postgres -d citus Password for user postgres: postgres - psql (15.1 (Debian 15.1-1.pgdg110+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, compression: off) Type "help" for help. @@ -240,67 +240,67 @@ Example session: (1 row) citus=# table pg_dist_node; - nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------ - 1 | 0 | 172.27.0.6 | 5432 | default | t | t | primary | default | t | f - 2 | 1 | 172.27.0.2 | 5432 | default | t | t | primary | default | t | t - 3 | 2 | 172.27.0.8 | 5432 | default | t | t | primary | default | t | t + 1 | 0 | 172.30.0.3 | 5432 | default | t | t | primary | default | t | f + 2 | 1 | 172.30.0.7 | 5432 | default | t | t | primary | default | t | t + 3 | 2 | 172.30.0.8 | 5432 | default | t | t | primary | default | t | t (3 rows) citus=# \q postgres@haproxy:~$ patronictl list - + Citus cluster: demo ----------+--------------+---------+----+-----------+ - | Group | Member | Host | Role | State | TL | Lag in MB | - +-------+---------+-------------+--------------+---------+----+-----------+ - | 0 | coord1 | 172.27.0.6 | Leader | running | 1 | | - | 0 | coord2 | 172.27.0.5 | Replica | running | 1 | 0 | - | 0 | coord3 | 172.27.0.9 | Sync Standby | running | 1 | 0 | - | 1 | work1-1 | 172.27.0.2 | Leader | running | 1 | | - | 1 | work1-2 | 172.27.0.12 | Sync Standby | running | 1 | 0 | - | 2 | work2-1 | 172.27.0.11 | Sync Standby | running | 1 | 0 | - | 2 | work2-2 | 172.27.0.8 | Leader | running | 1 | | - +-------+---------+-------------+--------------+---------+----+-----------+ + + Citus cluster: demo ----------+--------------+-----------+----+-----------+ + | Group | Member | Host | Role | State | TL | Lag in MB | + +-------+---------+-------------+--------------+-----------+----+-----------+ + | 0 | coord1 | 172.30.0.3 | Leader | running | 1 | | + | 0 | coord2 | 172.30.0.12 | Replica | streaming | 1 | 0 | + | 0 | coord3 | 172.30.0.2 | Sync Standby | streaming | 1 | 0 | + | 1 | work1-1 | 172.30.0.7 | Leader | running | 1 | | + | 1 | work1-2 | 172.30.0.10 | Sync Standby | streaming | 1 | 0 | + | 2 | work2-1 | 172.30.0.8 | Leader | running | 1 | | + | 2 | work2-2 | 172.30.0.11 | Sync Standby | streaming | 1 | 0 | + +-------+---------+-------------+--------------+-----------+----+-----------+ + postgres@haproxy:~$ patronictl switchover --group 2 --force Current cluster topology - + Citus cluster: demo (group: 2, 7185185529556963355) +-----------+ - | Member | Host | Role | State | TL | Lag in MB | - +---------+-------------+--------------+---------+----+-----------+ - | work2-1 | 172.27.0.11 | Sync Standby | running | 1 | 0 | - | work2-2 | 172.27.0.8 | Leader | running | 1 | | - +---------+-------------+--------------+---------+----+-----------+ - 2023-01-05 15:29:29.54204 Successfully switched over to "work2-1" - + Citus cluster: demo (group: 2, 7185185529556963355) -------+ + + Citus cluster: demo (group: 2, 7303846899271086103) --+-----------+ + | Member | Host | Role | State | TL | Lag in MB | + +---------+-------------+--------------+-----------+----+-----------+ + | work2-1 | 172.30.0.8 | Leader | running | 1 | | + | work2-2 | 172.30.0.11 | Sync Standby | streaming | 1 | 0 | + +---------+-------------+--------------+-----------+----+-----------+ + 2023-11-21 09:44:15.83849 Successfully switched over to "work2-2" + + Citus cluster: demo (group: 2, 7303846899271086103) -------+ | Member | Host | Role | State | TL | Lag in MB | +---------+-------------+---------+---------+----+-----------+ - | work2-1 | 172.27.0.11 | Leader | running | 1 | | - | work2-2 | 172.27.0.8 | Replica | stopped | | unknown | + | work2-1 | 172.30.0.8 | Replica | stopped | | unknown | + | work2-2 | 172.30.0.11 | Leader | running | 1 | | +---------+-------------+---------+---------+----+-----------+ postgres@haproxy:~$ patronictl list - + Citus cluster: demo ----------+--------------+---------+----+-----------+ - | Group | Member | Host | Role | State | TL | Lag in MB | - +-------+---------+-------------+--------------+---------+----+-----------+ - | 0 | coord1 | 172.27.0.6 | Leader | running | 1 | | - | 0 | coord2 | 172.27.0.5 | Replica | running | 1 | 0 | - | 0 | coord3 | 172.27.0.9 | Sync Standby | running | 1 | 0 | - | 1 | work1-1 | 172.27.0.2 | Leader | running | 1 | | - | 1 | work1-2 | 172.27.0.12 | Sync Standby | running | 1 | 0 | - | 2 | work2-1 | 172.27.0.11 | Leader | running | 2 | | - | 2 | work2-2 | 172.27.0.8 | Sync Standby | running | 2 | 0 | - +-------+---------+-------------+--------------+---------+----+-----------+ + + Citus cluster: demo ----------+--------------+-----------+----+-----------+ + | Group | Member | Host | Role | State | TL | Lag in MB | + +-------+---------+-------------+--------------+-----------+----+-----------+ + | 0 | coord1 | 172.30.0.3 | Leader | running | 1 | | + | 0 | coord2 | 172.30.0.12 | Replica | streaming | 1 | 0 | + | 0 | coord3 | 172.30.0.2 | Sync Standby | streaming | 1 | 0 | + | 1 | work1-1 | 172.30.0.7 | Leader | running | 1 | | + | 1 | work1-2 | 172.30.0.10 | Sync Standby | streaming | 1 | 0 | + | 2 | work2-1 | 172.30.0.8 | Sync Standby | streaming | 2 | 0 | + | 2 | work2-2 | 172.30.0.11 | Leader | running | 2 | | + +-------+---------+-------------+--------------+-----------+----+-----------+ postgres@haproxy:~$ psql -h localhost -p 5000 -U postgres -d citus - Password for user postgres: postgres - psql (15.1 (Debian 15.1-1.pgdg110+1)) + psql (15.5 (Debian 15.5-1.pgdg120+1)) SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, compression: off) Type "help" for help. citus=# table pg_dist_node; - nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------+---------+-------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------ - 1 | 0 | 172.27.0.6 | 5432 | default | t | t | primary | default | t | f - 3 | 2 | 172.27.0.11 | 5432 | default | t | t | primary | default | t | t - 2 | 1 | 172.27.0.2 | 5432 | default | t | t | primary | default | t | t + 1 | 0 | 172.30.0.3 | 5432 | default | t | t | primary | default | t | f + 3 | 2 | 172.30.0.11 | 5432 | default | t | t | primary | default | t | t + 2 | 1 | 172.30.0.7 | 5432 | default | t | t | primary | default | t | t (3 rows) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index fb30bee71..1e6e91b54 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -13,6 +13,8 @@ readonly PATRONI_NAMESPACE="${PATRONI_NAMESPACE%/}" DOCKER_IP=$(hostname --ip-address) readonly DOCKER_IP +export DUMB_INIT_SETSID=0 + case "$1" in haproxy) haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D @@ -72,4 +74,4 @@ export PATRONI_SUPERUSER_SSLKEY="${PATRONI_SUPERUSER_SSLKEY:-$PGSSLKEY}" export PATRONI_SUPERUSER_SSLCERT="${PATRONI_SUPERUSER_SSLCERT:-$PGSSLCERT}" export PATRONI_SUPERUSER_SSLROOTCERT="${PATRONI_SUPERUSER_SSLROOTCERT:-$PGSSLROOTCERT}" -exec python3 /patroni.py postgres0.yml +exec dumb-init python3 /patroni.py postgres0.yml diff --git a/docs/releases.rst b/docs/releases.rst index cd2714b69..c68977cd7 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -3,6 +3,44 @@ Release notes ============= +Version 3.2.1 +------------- + +**Bugfixes** + +- Limit accepted values for ``--format`` argument in ``patronictl`` (Alexander Kukushkin) + + It used to accept any arbitrary string and produce no output if the value wasn't recognized. + +- Verify that replica nodes received checkpoint LSN on shutdown before releasing the leader key (Alexander Kukushkin) + + Previously in some cases, we were using LSN of the SWITCH record that is followed by CHECKPOINT (if archiving mode is enabled). As a result the former primary sometimes had to do ``pg_rewind``, but there would be no data loss involved. + +- Do a real HTTP request when performing node name uniqueness check (Alexander Kukushkin) + + When running Patroni in containers it is possible that the traffic is routed using ``docker-proxy``, which listens on the port and accepts incoming connections. It was causing false positives. + +- Fixed Citus support with Etcd v2 (Alexander Kukushkin) + + Patroni was failing to deploy a new Citus cluster with Etcd v2. + +- Fixed ``pg_rewind`` behavior with Postgres v16+ (Alexander Kukushkin) + + The error message format of ``pg_waldump`` changed in v16 which caused ``pg_rewind`` to be called by Patroni even when it was not necessary. + +- Fixed bug with custom bootstrap (Alexander Kukushkin) + + Patroni was falsely applying ``--command`` argument, which is a bootstrap command itself. + +- Fixed the issue with REST API health check endpoints (Sophia Ruan) + + There were chances that after Postgres restart it could return ``unknown`` state for Postgres because connections were not properly closed. + +- Cache ``postgres --describe-config`` output results (Waynerv) + + They are used to figure out which GUCs are available to validate PostgreSQL configuration and we don't expect this list to change while Patroni is running. + + Version 3.2.0 ------------- diff --git a/features/environment.py b/features/environment.py index a0367657c..db10c93d6 100644 --- a/features/environment.py +++ b/features/environment.py @@ -1073,6 +1073,8 @@ def before_all(context): context.keyfile = os.path.join(context.pctl.output_dir, 'patroni.key') context.certfile = os.path.join(context.pctl.output_dir, 'patroni.crt') try: + if sys.platform == 'darwin' and 'GITHUB_ACTIONS' in os.environ: + raise Exception with open(os.devnull, 'w') as null: ret = subprocess.call(['openssl', 'req', '-nodes', '-new', '-x509', '-subj', '/CN=batman.patroni', '-addext', 'subjectAltName=IP:127.0.0.1', '-keyout', context.keyfile, diff --git a/features/priority_failover.feature b/features/priority_failover.feature index b33dd0456..acb1cb5a4 100644 --- a/features/priority_failover.feature +++ b/features/priority_failover.feature @@ -21,3 +21,21 @@ Feature: priority replication And I sleep for 5 seconds Then postgres3 role is the primary after 10 seconds And there is one of ["postgres3 has equally tolerable WAL position and priority 2, while this node has priority 1","Wal position of postgres3 is ahead of my wal position"] INFO in the postgres2 patroni log after 5 seconds + + Scenario: check conflicting configuration handling + When I set nofailover tag in postgres2 config + And I issue an empty POST request to http://127.0.0.1:8010/reload + Then I receive a response code 202 + And there is one of ["Conflicting configuration between nofailover: True and failover_priority: 1. Defaulting to nofailover: True"] WARNING in the postgres2 patroni log after 5 seconds + And "members/postgres2" key in DCS has tags={'failover_priority': '1', 'nofailover': True} after 10 seconds + When I issue a POST request to http://127.0.0.1:8010/failover with {"candidate": "postgres2"} + Then I receive a response code 412 + And I receive a response text "failover is not possible: no good candidates have been found" + When I reset nofailover tag in postgres1 config + And I issue an empty POST request to http://127.0.0.1:8009/reload + Then I receive a response code 202 + And there is one of ["Conflicting configuration between nofailover: False and failover_priority: 0. Defaulting to nofailover: False"] WARNING in the postgres1 patroni log after 5 seconds + And "members/postgres1" key in DCS has tags={'failover_priority': '0', 'nofailover': False} after 10 seconds + And I issue a POST request to http://127.0.0.1:8010/failover with {"candidate": "postgres1"} + Then I receive a response code 200 + And postgres1 role is the primary after 10 seconds diff --git a/features/steps/basic_replication.py b/features/steps/basic_replication.py index d70c6d0ee..f2db7110f 100644 --- a/features/steps/basic_replication.py +++ b/features/steps/basic_replication.py @@ -123,6 +123,6 @@ def check_patroni_log(context, message_list, level, node, timeout): messsages_of_level = context.pctl.read_patroni_log(node, level) if any(any(message in line for line in messsages_of_level) for message in message_list): break - time.sleep(1) + sleep(1) else: assert False, f"There were none of {message_list} {level} in the {node} patroni log after {timeout} seconds" diff --git a/features/steps/citus.py b/features/steps/citus.py index 4dd2ffa66..7277cccce 100644 --- a/features/steps/citus.py +++ b/features/steps/citus.py @@ -131,5 +131,5 @@ def check_transaction(context, name, time_limit): @step("a transaction finishes in {timeout:d} seconds") def check_transaction_timeout(context, timeout): - assert (datetime.now(tzutc) - context.xact_start).seconds > timeout, \ + assert (datetime.now(tzutc) - context.xact_start).seconds >= timeout, \ "a transaction finished earlier than in {0} seconds".format(timeout) diff --git a/features/steps/patroni_api.py b/features/steps/patroni_api.py index 2c76d32df..74a7c0da8 100644 --- a/features/steps/patroni_api.py +++ b/features/steps/patroni_api.py @@ -128,6 +128,12 @@ def scheduled_restart(context, url, in_seconds, data): context.execute_steps(u"""Given I issue a POST request to {0}/restart with {1}""".format(url, json.dumps(data))) +@step('I {action:w} {tag:w} tag in {pg_name:w} config') +def add_bool_tag_to_config(context, action, tag, pg_name): + value = action == 'set' + context.pctl.add_tag_to_config(pg_name, tag, value) + + @step('I add tag {tag:w} {value:w} to {pg_name:w} config') def add_tag_to_config(context, tag, value, pg_name): context.pctl.add_tag_to_config(pg_name, tag, value) diff --git a/kubernetes/Dockerfile b/kubernetes/Dockerfile index 29a683bd5..e41bf1cd9 100644 --- a/kubernetes/Dockerfile +++ b/kubernetes/Dockerfile @@ -1,4 +1,4 @@ -FROM postgres:15 +FROM postgres:16 LABEL maintainer="Alexander Kukushkin " RUN export DEBIAN_FRONTEND=noninteractive \ diff --git a/kubernetes/Dockerfile.citus b/kubernetes/Dockerfile.citus index f9564521d..7af9e5ae9 100644 --- a/kubernetes/Dockerfile.citus +++ b/kubernetes/Dockerfile.citus @@ -1,4 +1,4 @@ -FROM postgres:15 +FROM postgres:16 LABEL maintainer="Alexander Kukushkin " RUN export DEBIAN_FRONTEND=noninteractive \ @@ -11,7 +11,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ ## Make sure we have a en_US.UTF-8 locale available && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \ && if [ $(dpkg --print-architecture) = 'arm64' ]; then \ - apt-get install -y postgresql-server-dev-15 \ + apt-get install -y postgresql-server-dev-16 \ gcc make autoconf \ libc6-dev flex libcurl4-gnutls-dev \ libicu-dev libkrb5-dev liblz4-dev \ @@ -24,7 +24,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ echo "deb [signed-by=/etc/apt/trusted.gpg.d/citusdata_community.gpg] https://packagecloud.io/citusdata/community/debian/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/citusdata_community.list \ && curl -sL https://packagecloud.io/citusdata/community/gpgkey | gpg --dearmor > /etc/apt/trusted.gpg.d/citusdata_community.gpg \ && apt-get update -y \ - && apt-get -y install postgresql-15-citus-12.0; \ + && apt-get -y install postgresql-16-citus-12.1; \ fi \ && pip3 install --break-system-packages setuptools \ && pip3 install --break-system-packages 'git+https://github.com/zalando/patroni.git#egg=patroni[kubernetes]' \ @@ -38,7 +38,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ && chmod 664 /etc/passwd \ # Clean up && apt-get remove -y git python3-pip python3-wheel \ - postgresql-server-dev-15 gcc make autoconf \ + postgresql-server-dev-16 gcc make autoconf \ libc6-dev flex libicu-dev libkrb5-dev liblz4-dev \ libpam0g-dev libreadline-dev libselinux1-dev libssl-dev libxslt1-dev libzstd-dev uuid-dev \ && apt-get autoremove -y \ diff --git a/patroni/__main__.py b/patroni/__main__.py index 02ba56da9..2c253da44 100644 --- a/patroni/__main__.py +++ b/patroni/__main__.py @@ -68,7 +68,7 @@ def __init__(self, config: 'Config') -> None: self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() - self.postgresql = Postgresql(self.config['postgresql']) + self.postgresql = Postgresql(self.config['postgresql'], self.dcs.mpp) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) @@ -107,8 +107,6 @@ def load_dynamic_configuration(self) -> None: def ensure_unique_name(self) -> None: """A helper method to prevent splitbrain from operator naming error.""" - from urllib.parse import urlparse - from urllib3.connection import HTTPConnection from patroni.dcs import Member cluster = self.dcs.get_cluster() @@ -118,14 +116,14 @@ def ensure_unique_name(self) -> None: if not isinstance(member, Member): return try: - parts = urlparse(member.api_url) - if isinstance(parts.hostname, str): - connection = HTTPConnection(parts.hostname, port=parts.port or 80, timeout=3) - connection.connect() - logger.fatal("Can't start; there is already a node named '%s' running", self.config['name']) - sys.exit(1) + # Silence annoying WARNING: Retrying (...) messages when Patroni is quickly restarted. + # At this moment we don't have custom log levels configured and hence shouldn't lose anything useful. + self.logger.update_loggers({'urllib3.connectionpool': 'ERROR'}) + _ = self.request(member, endpoint="/liveness", timeout=3) + logger.fatal("Can't start; there is already a node named '%s' running", self.config['name']) + sys.exit(1) except Exception: - return + self.logger.update_loggers({}) def _get_tags(self) -> Dict[str, Any]: """Get tags configured for this node, if any. diff --git a/patroni/api.py b/patroni/api.py index 6a89b1bae..5c9d66030 100644 --- a/patroni/api.py +++ b/patroni/api.py @@ -26,7 +26,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TYPE_CHECKING, Union -from . import psycopg +from . import global_config, psycopg from .__main__ import Patroni from .dcs import Cluster from .exceptions import PostgresConnectionException, PostgresException @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) -def check_access(func: Callable[['RestApiHandler'], None]) -> Callable[..., None]: +def check_access(func: Callable[..., None]) -> Callable[..., None]: """Check the source ip, authorization header, or client certificates. .. note:: @@ -290,7 +290,7 @@ def do_GET(self, write_status_code_only: bool = False) -> None: patroni = self.server.patroni cluster = patroni.dcs.cluster - global_config = patroni.config.get_global_config(cluster) + config = global_config.from_cluster(cluster) leader_optime = cluster and cluster.last_lsn or 0 replayed_location = response.get('xlog', {}).get('replayed_location', 0) @@ -308,7 +308,7 @@ def do_GET(self, write_status_code_only: bool = False) -> None: standby_leader_status_code = 200 if response.get('role') == 'standby_leader' else 503 elif patroni.ha.is_leader(): leader_status_code = 200 - if global_config.is_standby_cluster: + if config.is_standby_cluster: primary_status_code = replica_status_code = 503 standby_leader_status_code = 200 if response.get('role') in ('replica', 'standby_leader') else 503 else: @@ -452,9 +452,8 @@ def do_GET_cluster(self) -> None: HTTP status ``200`` and the JSON representation of the cluster topology. """ cluster = self.server.patroni.dcs.get_cluster() - global_config = self.server.patroni.config.get_global_config(cluster) - response = cluster_as_json(cluster, global_config) + response = cluster_as_json(cluster) response['scope'] = self.server.patroni.postgresql.scope self._write_json_response(200, response) @@ -864,7 +863,7 @@ def do_POST_restart(self) -> None: if request: logger.debug("received restart request: {0}".format(request)) - if self.server.patroni.config.get_global_config(cluster).is_paused and 'schedule' in request: + if global_config.from_cluster(cluster).is_paused and 'schedule' in request: self.write_response(status_code, "Can't schedule restart in the paused state") return @@ -1033,7 +1032,7 @@ def is_failover_possible(self, cluster: Cluster, leader: Optional[str], candidat :returns: a string with the error message or ``None`` if good nodes are found. """ - is_synchronous_mode = self.server.patroni.config.get_global_config(cluster).is_synchronous_mode + is_synchronous_mode = global_config.from_cluster(cluster).is_synchronous_mode if leader and (not cluster.leader or cluster.leader.name != leader): return 'leader name does not match' if candidate: @@ -1091,7 +1090,7 @@ def do_POST_failover(self, action: str = 'failover') -> None: candidate = request.get('candidate') or request.get('member') scheduled_at = request.get('scheduled_at') cluster = self.server.patroni.dcs.get_cluster() - global_config = self.server.patroni.config.get_global_config(cluster) + config = global_config.from_cluster(cluster) logger.info("received %s request with leader=%s candidate=%s scheduled_at=%s", action, leader, candidate, scheduled_at) @@ -1104,12 +1103,12 @@ def do_POST_failover(self, action: str = 'failover') -> None: if not data and scheduled_at: if action == 'failover': data = "Failover can't be scheduled" - elif global_config.is_paused: + elif config.is_paused: data = "Can't schedule switchover in the paused state" else: (status_code, data, scheduled_at) = self.parse_schedule(scheduled_at, action) - if not data and global_config.is_paused and not candidate: + if not data and config.is_paused and not candidate: data = 'Switchover is possible only to a specific candidate in a paused state' if action == 'failover' and leader: @@ -1154,8 +1153,8 @@ def do_POST_switchover(self) -> None: def do_POST_citus(self) -> None: """Handle a ``POST`` request to ``/citus`` path. - Call :func:`~patroni.postgresql.CitusHandler.handle_event` to handle the request, then write a response with - HTTP status code ``200``. + Call :func:`~patroni.postgresql.mpp.AbstractMPPHandler.handle_event` to handle the request, + then write a response with HTTP status code ``200``. .. note:: If unable to parse the request body, then the request is silently discarded. @@ -1260,7 +1259,7 @@ def get_postgresql_status(self, retry: bool = False) -> Dict[str, Any]: """ postgresql = self.server.patroni.postgresql cluster = self.server.patroni.dcs.cluster - global_config = self.server.patroni.config.get_global_config(cluster) + config = global_config.from_cluster(cluster) try: if postgresql.state not in ('running', 'restarting', 'starting'): @@ -1291,10 +1290,10 @@ def get_postgresql_status(self, retry: bool = False) -> Dict[str, Any]: }) } - if result['role'] == 'replica' and global_config.is_standby_cluster: + if result['role'] == 'replica' and config.is_standby_cluster: result['role'] = postgresql.role - if result['role'] == 'replica' and global_config.is_synchronous_mode\ + if result['role'] == 'replica' and config.is_synchronous_mode\ and cluster and cluster.sync.matches(postgresql.name): result['sync_standby'] = True @@ -1319,7 +1318,7 @@ def get_postgresql_status(self, retry: bool = False) -> Dict[str, Any]: state = 'unknown' result: Dict[str, Any] = {'state': state, 'role': postgresql.role} - if global_config.is_paused: + if config.is_paused: result['pause'] = True if not cluster or cluster.is_unlocked(): result['cluster_unlocked'] = True diff --git a/patroni/config.py b/patroni/config.py index 00dba9d90..f7d648d5f 100644 --- a/patroni/config.py +++ b/patroni/config.py @@ -12,7 +12,7 @@ from . import PATRONI_ENV_PREFIX from .collections import CaseInsensitiveDict -from .dcs import ClusterConfig, Cluster +from .dcs import ClusterConfig from .exceptions import ConfigParseError from .file_perm import pg_perm from .postgresql.config import ConfigHandler @@ -54,154 +54,6 @@ def default_validator(conf: Dict[str, Any]) -> List[str]: return [] -class GlobalConfig(object): - """A class that wraps global configuration and provides convenient methods to access/check values. - - It is instantiated either by calling :func:`get_global_config` or :meth:`Config.get_global_config`, which picks - either a configuration from provided :class:`Cluster` object (the most up-to-date) or from the - local cache if :class:`ClusterConfig` is not initialized or doesn't have a valid config. - """ - - def __init__(self, config: Dict[str, Any]) -> None: - """Initialize :class:`GlobalConfig` object with given *config*. - - :param config: current configuration either from - :class:`ClusterConfig` or from :func:`Config.dynamic_configuration`. - """ - self.__config = config - - def get(self, name: str) -> Any: - """Gets global configuration value by *name*. - - :param name: parameter name. - - :returns: configuration value or ``None`` if it is missing. - """ - return self.__config.get(name) - - def check_mode(self, mode: str) -> bool: - """Checks whether the certain parameter is enabled. - - :param mode: parameter name, e.g. ``synchronous_mode``, ``failsafe_mode``, ``pause``, ``check_timeline``, and - so on. - - :returns: ``True`` if parameter *mode* is enabled in the global configuration. - """ - return bool(parse_bool(self.__config.get(mode))) - - @property - def is_paused(self) -> bool: - """``True`` if cluster is in maintenance mode.""" - return self.check_mode('pause') - - @property - def is_synchronous_mode(self) -> bool: - """``True`` if synchronous replication is requested and it is not a standby cluster config.""" - return self.check_mode('synchronous_mode') and not self.is_standby_cluster - - @property - def is_synchronous_mode_strict(self) -> bool: - """``True`` if at least one synchronous node is required.""" - return self.check_mode('synchronous_mode_strict') - - def get_standby_cluster_config(self) -> Union[Dict[str, Any], Any]: - """Get ``standby_cluster`` configuration. - - :returns: a copy of ``standby_cluster`` configuration. - """ - return deepcopy(self.get('standby_cluster')) - - @property - def is_standby_cluster(self) -> bool: - """``True`` if global configuration has a valid ``standby_cluster`` section.""" - config = self.get_standby_cluster_config() - return isinstance(config, dict) and\ - bool(config.get('host') or config.get('port') or config.get('restore_command')) - - def get_int(self, name: str, default: int = 0) -> int: - """Gets current value of *name* from the global configuration and try to return it as :class:`int`. - - :param name: name of the parameter. - :param default: default value if *name* is not in the configuration or invalid. - - :returns: currently configured value of *name* from the global configuration or *default* if it is not set or - invalid. - """ - ret = parse_int(self.get(name)) - return default if ret is None else ret - - @property - def min_synchronous_nodes(self) -> int: - """The minimal number of synchronous nodes based on whether ``synchronous_mode_strict`` is enabled or not.""" - return 1 if self.is_synchronous_mode_strict else 0 - - @property - def synchronous_node_count(self) -> int: - """Currently configured value of ``synchronous_node_count`` from the global configuration. - - Assume ``1`` if it is not set or invalid. - """ - return max(self.get_int('synchronous_node_count', 1), self.min_synchronous_nodes) - - @property - def maximum_lag_on_failover(self) -> int: - """Currently configured value of ``maximum_lag_on_failover`` from the global configuration. - - Assume ``1048576`` if it is not set or invalid. - """ - return self.get_int('maximum_lag_on_failover', 1048576) - - @property - def maximum_lag_on_syncnode(self) -> int: - """Currently configured value of ``maximum_lag_on_syncnode`` from the global configuration. - - Assume ``-1`` if it is not set or invalid. - """ - return self.get_int('maximum_lag_on_syncnode', -1) - - @property - def primary_start_timeout(self) -> int: - """Currently configured value of ``primary_start_timeout`` from the global configuration. - - Assume ``300`` if it is not set or invalid. - - .. note:: - ``master_start_timeout`` is still supported to keep backward compatibility. - """ - default = 300 - return self.get_int('primary_start_timeout', default)\ - if 'primary_start_timeout' in self.__config else self.get_int('master_start_timeout', default) - - @property - def primary_stop_timeout(self) -> int: - """Currently configured value of ``primary_stop_timeout`` from the global configuration. - - Assume ``0`` if it is not set or invalid. - - .. note:: - ``master_stop_timeout`` is still supported to keep backward compatibility. - """ - default = 0 - return self.get_int('primary_stop_timeout', default)\ - if 'primary_stop_timeout' in self.__config else self.get_int('master_stop_timeout', default) - - -def get_global_config(cluster: Optional[Cluster], default: Optional[Dict[str, Any]] = None) -> GlobalConfig: - """Instantiates :class:`GlobalConfig` based on the input. - - :param cluster: the currently known cluster state from DCS. - :param default: default configuration, which will be used if there is no valid *cluster.config*. - - :returns: :class:`GlobalConfig` object. - """ - # Try to protect from the case when DCS was wiped out - if cluster and cluster.config and cluster.config.modify_version: - config = cluster.config.data - else: - config = default or {} - return GlobalConfig(deepcopy(config)) - - class Config(object): """Handle Patroni configuration. @@ -290,10 +142,10 @@ def __init__(self, configfile: str, self.__effective_configuration = self._build_effective_configuration({}, self._local_configuration) self._data_dir = self.__effective_configuration.get('postgresql', {}).get('data_dir', "") self._cache_file = os.path.join(self._data_dir, self.__CACHE_FILENAME) - if validator: # patronictl uses validator=None and we don't want to load anything from local cache in this case - self._load_cache() + if validator: # patronictl uses validator=None + self._load_cache() # we don't want to load anything from local cache for ctl + self._validate_failover_tags() # irrelevant for ctl self._cache_needs_saving = False - self._validate_failover_tags() @property def config_file(self) -> Optional[str]: @@ -504,6 +356,7 @@ def reload_local_configuration(self) -> Optional[bool]: new_configuration = self._build_effective_configuration(self._dynamic_configuration, configuration) self._local_configuration = configuration self.__effective_configuration = new_configuration + self._validate_failover_tags() return True else: logger.info('No local configuration items changed.') @@ -949,18 +802,6 @@ def copy(self) -> Dict[str, Any]: """ return deepcopy(self.__effective_configuration) - def get_global_config(self, cluster: Optional[Cluster]) -> GlobalConfig: - """Instantiate :class:`GlobalConfig` based on input. - - Use the configuration from provided *cluster* (the most up-to-date) or from the - local cache if *cluster.config* is not initialized or doesn't have a valid config. - - :param cluster: the currently known cluster state from DCS. - - :returns: :class:`GlobalConfig` object. - """ - return get_global_config(cluster, self._dynamic_configuration) - def _validate_failover_tags(self) -> None: """Check ``nofailover``/``failover_priority`` config and warn user if it's contradictory. @@ -974,10 +815,12 @@ def _validate_failover_tags(self) -> None: bedrock source of truth) """ tags = self.get('tags', {}) + if 'nofailover' not in tags: + return nofailover_tag = tags.get('nofailover') failover_priority_tag = parse_int(tags.get('failover_priority')) if failover_priority_tag is not None \ - and (nofailover_tag is True and failover_priority_tag > 0 - or nofailover_tag is False and failover_priority_tag <= 0): + and (bool(nofailover_tag) is True and failover_priority_tag > 0 + or bool(nofailover_tag) is False and failover_priority_tag <= 0): logger.warning('Conflicting configuration between nofailover: %s and failover_priority: %s. ' 'Defaulting to nofailover: %s', nofailover_tag, failover_priority_tag, nofailover_tag) diff --git a/patroni/ctl.py b/patroni/ctl.py index b4c57873d..6a5fe0e66 100644 --- a/patroni/ctl.py +++ b/patroni/ctl.py @@ -46,10 +46,12 @@ except ImportError: # pragma: no cover from cdiff import markup_to_pager, PatchStream # pyright: ignore [reportMissingModuleSource] -from .config import Config, get_global_config +from . import global_config +from .config import Config from .dcs import get_dcs as _get_dcs, AbstractDCS, Cluster, Member from .exceptions import PatroniException from .postgresql.misc import postgres_version_to_int +from .postgresql.mpp import get_mpp from .utils import cluster_as_json, patch_config, polling_loop from .request import PatroniRequest from .version import __version__ @@ -255,15 +257,23 @@ def load_config(path: str, dcs_url: Optional[str]) -> Dict[str, Any]: return config +def _get_configuration() -> Dict[str, Any]: + """Get configuration object. + + :returns: configuration object from the current context. + """ + return click.get_current_context().obj['__config'] + + option_format = click.option('--format', '-f', 'fmt', help='Output format', default='pretty', type=click.Choice(['pretty', 'tsv', 'json', 'yaml', 'yml'])) option_watchrefresh = click.option('-w', '--watch', type=float, help='Auto update the screen every X seconds') option_watch = click.option('-W', is_flag=True, help='Auto update the screen every 2 seconds') option_force = click.option('--force', is_flag=True, help='Do not ask for confirmation at any point') arg_cluster_name = click.argument('cluster_name', required=False, - default=lambda: click.get_current_context().obj.get('scope')) + default=lambda: _get_configuration().get('scope')) option_default_citus_group = click.option('--group', required=False, type=int, help='Citus group', - default=lambda: click.get_current_context().obj.get('citus', {}).get('group')) + default=lambda: _get_configuration().get('citus', {}).get('group')) option_citus_group = click.option('--group', required=False, type=int, help='Citus group') role_choice = click.Choice(['leader', 'primary', 'standby-leader', 'replica', 'standby', 'any', 'master']) @@ -301,15 +311,23 @@ def ctl(ctx: click.Context, config_file: str, dcs_url: Optional[str], insecure: level = os.environ.get(name, level) logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=level) logging.captureWarnings(True) # Capture eventual SSL warning - ctx.obj = load_config(config_file, dcs_url) + config = load_config(config_file, dcs_url) # backward compatibility for configuration file where ctl section is not defined - ctx.obj.setdefault('ctl', {})['insecure'] = ctx.obj.get('ctl', {}).get('insecure') or insecure + config.setdefault('ctl', {})['insecure'] = config.get('ctl', {}).get('insecure') or insecure + ctx.obj = {'__config': config, '__mpp': get_mpp(config)} + + +def is_citus_cluster() -> bool: + """Check if we are working with Citus cluster. + + :returns: ``True`` if configuration has ``citus`` section, otherwise ``False``. + """ + return click.get_current_context().obj['__mpp'].is_enabled() -def get_dcs(config: Dict[str, Any], scope: str, group: Optional[int]) -> AbstractDCS: +def get_dcs(scope: str, group: Optional[int]) -> AbstractDCS: """Get the DCS object. - :param config: Patroni configuration. :param scope: cluster name. :param group: if *group* is defined, use it to select which alternative Citus group this DCS refers to. If *group* is ``None`` and a Citus configuration exists, assume this is the coordinator. Coordinator has the group ``0``. @@ -320,14 +338,16 @@ def get_dcs(config: Dict[str, Any], scope: str, group: Optional[int]) -> Abstrac :raises: :class:`PatroniCtlException`: if not suitable DCS configuration could be found. """ + config = _get_configuration() config.update({'scope': scope, 'patronictl': True}) if group is not None: - config['citus'] = {'group': group} + config['citus'] = {'group': group, 'database': 'postgres'} config.setdefault('name', scope) try: dcs = _get_dcs(config) - if config.get('citus') and group is None: + if is_citus_cluster() and group is None: dcs.is_citus_coordinator = lambda: True + click.get_current_context().obj['__mpp'] = dcs.mpp return dcs except PatroniException as e: raise PatroniCtlException(str(e)) @@ -347,7 +367,7 @@ def request_patroni(member: Member, method: str = 'GET', ctx = click.get_current_context() # the current click context request_executor = ctx.obj.get('__request_patroni') if not request_executor: - request_executor = ctx.obj['__request_patroni'] = PatroniRequest(ctx.obj) + request_executor = ctx.obj['__request_patroni'] = PatroniRequest(_get_configuration()) return request_executor(member, method, endpoint, data) @@ -414,9 +434,9 @@ def print_output(columns: Optional[List[str]], rows: List[List[Any]], alignment: def watching(w: bool, watch: Optional[int], max_count: Optional[int] = None, clear: bool = True) -> Iterator[int]: - """Yield a value every ``x`` seconds. + """Yield a value every ``watch`` seconds. - Used to run a command with a watch-based aproach. + Used to run a command with a watch-based approach. :param w: if ``True`` and *watch* is ``None``, then *watch* assumes the value ``2``. :param watch: amount of seconds to wait before yielding another value. @@ -452,11 +472,9 @@ def watching(w: bool, watch: Optional[int], max_count: Optional[int] = None, cle yield 0 -def get_all_members(obj: Dict[str, Any], cluster: Cluster, - group: Optional[int], role: str = 'leader') -> Iterator[Member]: +def get_all_members(cluster: Cluster, group: Optional[int], role: str = 'leader') -> Iterator[Member]: """Get all cluster members that have the given *role*. - :param obj: the Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members from. If ``None`` get from all groups. :param role: role to filter members. Can be one among: @@ -470,7 +488,7 @@ def get_all_members(obj: Dict[str, Any], cluster: Cluster, :yields: members that have the given *role*. """ clusters = {0: cluster} - if obj.get('citus') and group is None: + if is_citus_cluster() and group is None: clusters.update(cluster.workers) if role in ('leader', 'master', 'primary', 'standby-leader'): # In the DCS the members' role can be one among: ``primary``, ``master``, ``replica`` or ``standby_leader``. @@ -492,11 +510,10 @@ def get_all_members(obj: Dict[str, Any], cluster: Cluster, yield m -def get_any_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], +def get_any_member(cluster: Cluster, group: Optional[int], role: Optional[str] = None, member: Optional[str] = None) -> Optional[Member]: """Get the first found cluster member that has the given *role*. - :param obj: the Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members from. If ``None`` get from all groups. :param role: role to filter members. See :func:`get_all_members` for available options. @@ -514,7 +531,7 @@ def get_any_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], elif role is None: role = 'leader' - for m in get_all_members(obj, cluster, group, role): + for m in get_all_members(cluster, group, role): if member is None or m.name == member: return m @@ -535,7 +552,7 @@ def get_all_members_leader_first(cluster: Cluster) -> Iterator[Member]: yield member -def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], connect_parameters: Dict[str, Any], +def get_cursor(cluster: Cluster, group: Optional[int], connect_parameters: Dict[str, Any], role: Optional[str] = None, member_name: Optional[str] = None) -> Union['cursor', 'Cursor[Any]', None]: """Get a cursor object to execute queries against a member that has the given *role* or *member_name*. @@ -544,7 +561,6 @@ def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], conn * ``fallback_application_name``: as ``Patroni ctl``; * ``connect_timeout``: as ``5``. - :param obj: the Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members to create a cursor against. If ``None`` consider members from all groups. @@ -559,7 +575,7 @@ def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], conn * A :class:`psycopg2.extensions.cursor` if using :mod:`psycopg2`; * ``None`` if not able to get a cursor that attendees *role* and *member_name*. """ - member = get_any_member(obj, cluster, group, role=role, member=member_name) + member = get_any_member(cluster, group, role=role, member=member_name) if member is None: return None @@ -594,7 +610,7 @@ def get_cursor(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], conn return None -def get_members(obj: Dict[str, Any], cluster: Cluster, cluster_name: str, member_names: List[str], role: str, +def get_members(cluster: Cluster, cluster_name: str, member_names: List[str], role: str, force: bool, action: str, ask_confirmation: bool = True, group: Optional[int] = None) -> List[Member]: """Get the list of members based on the given filters. @@ -618,7 +634,6 @@ def get_members(obj: Dict[str, Any], cluster: Cluster, cluster_name: str, member ``ask_confirmation=False``, and later call :func:`confirm_members_action` manually in the caller method. That way the workflow won't look broken to the user that is interacting with ``patronictl``. - :param obj: Patroni configuration. :param cluster: Patroni cluster. :param cluster_name: name of the Patroni cluster. :param member_names: used to filter which members should take the *action* based on their names. Each item is the @@ -647,13 +662,13 @@ def get_members(obj: Dict[str, Any], cluster: Cluster, cluster_name: str, member * Cluster does not have members that match the given *member_names*; or * No member with given *role* is found among the specified *member_names*. """ - members = list(get_all_members(obj, cluster, group, role)) + members = list(get_all_members(cluster, group, role)) candidates = {m.name for m in members} if not force or role: if not member_names and not candidates: raise PatroniCtlException('{0} cluster doesn\'t have any members'.format(cluster_name)) - output_members(obj, cluster, cluster_name, group=group) + output_members(cluster, cluster_name, group=group) if member_names: member_names = list(set(member_names) & candidates) @@ -713,9 +728,7 @@ def confirm_members_action(members: List[Member], force: bool, action: str, @click.option('--member', '-m', help='Generate a dsn for this member', type=str) @arg_cluster_name @option_citus_group -@click.pass_obj -def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - role: Optional[str], member: Optional[str]) -> None: +def dsn(cluster_name: str, group: Optional[int], role: Optional[str], member: Optional[str]) -> None: """Process ``dsn`` command of ``patronictl`` utility. Get DSN to connect to *member*. @@ -723,7 +736,6 @@ def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. note:: If no *role* nor *member* is given assume *role* as ``leader``. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get members to get DSN from. Refer to the module note for more details. @@ -736,8 +748,8 @@ def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], * both *role* and *member* are provided; or * No member matches requested *member* or *role*. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() - m = get_any_member(obj, cluster, group, role=role, member=member) + cluster = get_dcs(cluster_name, group).get_cluster() + m = get_any_member(cluster, group, role=role, member=member) if m is None: raise PatroniCtlException('Can not find a suitable member') @@ -759,9 +771,7 @@ def dsn(obj: Dict[str, Any], cluster_name: str, group: Optional[int], @click.option('--delimiter', help='The column delimiter', default='\t') @click.option('--command', '-c', help='The SQL commands to execute') @click.option('-d', '--dbname', help='database name to connect to', type=str) -@click.pass_obj def query( - obj: Dict[str, Any], cluster_name: str, group: Optional[int], role: Optional[str], @@ -780,7 +790,6 @@ def query( Perform a Postgres query in a Patroni node. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get members from to perform the query. Refer to the module note for more details. @@ -812,7 +821,7 @@ def query( raise PatroniCtlException('You need to specify either --command or --file') sql = command - connect_parameters = {} + connect_parameters: Dict[str, str] = {} if username: connect_parameters['username'] = username if password: @@ -820,24 +829,22 @@ def query( if dbname: connect_parameters['dbname'] = dbname - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = cursor = None for _ in watching(w, watch, clear=False): if cluster is None: cluster = dcs.get_cluster() -# cursor = get_cursor(obj, cluster, group, connect_parameters, role=role, member=member) - output, header = query_member(obj, cluster, group, cursor, member, role, sql, connect_parameters) + output, header = query_member(cluster, group, cursor, member, role, sql, connect_parameters) print_output(header, output, fmt=fmt, delimiter=delimiter) -def query_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], - cursor: Union['cursor', 'Cursor[Any]', None], member: Optional[str], role: Optional[str], - command: str, connect_parameters: Dict[str, Any]) -> Tuple[List[List[Any]], Optional[List[Any]]]: +def query_member(cluster: Cluster, group: Optional[int], cursor: Union['cursor', 'Cursor[Any]', None], + member: Optional[str], role: Optional[str], command: str, + connect_parameters: Dict[str, Any]) -> Tuple[List[List[Any]], Optional[List[Any]]]: """Execute SQL *command* against a member. - :param obj: Patroni configuration. :param cluster: the Patroni cluster. :param group: filter which Citus group we should get members from to perform the query. Refer to the module note for more details. @@ -866,7 +873,7 @@ def query_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], from . import psycopg try: if cursor is None: - cursor = get_cursor(obj, cluster, group, connect_parameters, role=role, member_name=member) + cursor = get_cursor(cluster, group, connect_parameters, role=role, member_name=member) if cursor is None: if member is not None: @@ -893,13 +900,11 @@ def query_member(obj: Dict[str, Any], cluster: Cluster, group: Optional[int], @click.argument('cluster_name') @option_citus_group @option_format -@click.pass_obj -def remove(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: str) -> None: +def remove(cluster_name: str, group: Optional[int], fmt: str) -> None: """Process ``remove`` command of ``patronictl`` utility. Remove cluster *cluster_name* from the DCS. - :param obj: Patroni configuration. :param cluster_name: name of the cluster which information will be wiped out of the DCS. :param group: which Citus group should have its information wiped out of the DCS. Refer to the module note for more details. @@ -913,12 +918,12 @@ def remove(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: st * use did not type the correct leader name when requesting removal of a healthy cluster. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - if obj.get('citus') and group is None: + if is_citus_cluster() and group is None: raise PatroniCtlException('For Citus clusters the --group must me specified') - output_members(obj, cluster, cluster_name, fmt=fmt) + output_members(cluster, cluster_name, fmt=fmt) confirm = click.prompt('Please confirm the cluster name to remove', type=str) if confirm != cluster_name: @@ -1003,31 +1008,28 @@ def parse_scheduled(scheduled: Optional[str]) -> Optional[datetime.datetime]: @option_citus_group @click.option('--role', '-r', help='Reload only members with this role', type=role_choice, default='any') @option_force -@click.pass_obj -def reload(obj: Dict[str, Any], cluster_name: str, member_names: List[str], - group: Optional[int], force: bool, role: str) -> None: +def reload(cluster_name: str, member_names: List[str], group: Optional[int], force: bool, role: str) -> None: """Process ``reload`` command of ``patronictl`` utility. Reload configuration of cluster members based on given filters. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param member_names: name of the members which configuration should be reloaded. :param group: filter which Citus group we should reload members. Refer to the module note for more details. :param force: perform the reload without asking for confirmations. :param role: role to filter members. See :func:`get_all_members` for available options. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - members = get_members(obj, cluster, cluster_name, member_names, role, force, 'reload', group=group) + members = get_members(cluster, cluster_name, member_names, role, force, 'reload', group=group) for member in members: r = request_patroni(member, 'post', 'reload') if r.status == 200: click.echo('No changes to apply on member {0}'.format(member.name)) elif r.status == 202: - config = get_global_config(cluster) + config = global_config.from_cluster(cluster) click.echo('Reload request received for member {0} and will be processed within {1} seconds'.format( member.name, config.get('loop_wait') or dcs.loop_wait) ) @@ -1050,15 +1052,13 @@ def reload(obj: Dict[str, Any], cluster_name: str, member_names: List[str], @click.option('--pending', help='Restart if pending', is_flag=True) @click.option('--timeout', help='Return error and fail over if necessary when restarting takes longer than this.') @option_force -@click.pass_obj -def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member_names: List[str], +def restart(cluster_name: str, group: Optional[int], member_names: List[str], force: bool, role: str, p_any: bool, scheduled: Optional[str], version: Optional[str], pending: bool, timeout: Optional[str]) -> None: """Process ``restart`` command of ``patronictl`` utility. Restart Postgres on cluster members based on given filters. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should restart members. Refer to the module note for more details. :param member_names: name of the members that should be restarted. @@ -1076,9 +1076,9 @@ def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member * *version* could not be parsed; or * a restart is attempted against a cluster that is in maintenance mode. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() + cluster = get_dcs(cluster_name, group).get_cluster() - members = get_members(obj, cluster, cluster_name, member_names, role, force, 'restart', False, group=group) + members = get_members(cluster, cluster_name, member_names, role, force, 'restart', False, group=group) if scheduled is None and not force: next_hour = (datetime.datetime.now() + datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M') scheduled = click.prompt('When should the restart take place (e.g. ' + next_hour + ') ', @@ -1095,7 +1095,7 @@ def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member version = click.prompt('Restart if the PostgreSQL version is less than provided (e.g. 9.5.2) ', type=str, default='') - content = {} + content: Dict[str, Any] = {} if pending: content['restart_pending'] = True @@ -1108,7 +1108,7 @@ def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member content['postgres_version'] = version if scheduled_at: - if get_global_config(cluster).is_paused: + if global_config.from_cluster(cluster).is_paused: raise PatroniCtlException("Can't schedule restart in the paused state") content['schedule'] = scheduled_at.isoformat() @@ -1140,9 +1140,7 @@ def restart(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member @click.argument('member_names', nargs=-1) @option_force @click.option('--wait', help='Wait until reinitialization completes', is_flag=True) -@click.pass_obj -def reinit(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - member_names: List[str], force: bool, wait: bool) -> None: +def reinit(cluster_name: str, group: Optional[int], member_names: List[str], force: bool, wait: bool) -> None: """Process ``reinit`` command of ``patronictl`` utility. Reinitialize cluster members based on given filters. @@ -1150,15 +1148,14 @@ def reinit(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. note:: Only reinitialize replica members, not a leader. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should reinit members. Refer to the module note for more details. :param member_names: name of the members that should be reinitialized. :param force: perform the restart without asking for confirmations. :param wait: wait for the operation to complete. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() - members = get_members(obj, cluster, cluster_name, member_names, 'replica', force, 'reinitialize', group=group) + cluster = get_dcs(cluster_name, group).get_cluster() + members = get_members(cluster, cluster_name, member_names, 'replica', force, 'reinitialize', group=group) wait_on_members: List[Member] = [] for member in members: @@ -1189,8 +1186,8 @@ def reinit(obj: Dict[str, Any], cluster_name: str, group: Optional[int], wait_on_members.remove(member) -def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: str, - group: Optional[int], leader: Optional[str], candidate: Optional[str], +def _do_failover_or_switchover(action: str, cluster_name: str, group: Optional[int], + switchover_leader: Optional[str], candidate: Optional[str], force: bool, scheduled: Optional[str] = None) -> None: """Perform a failover or a switchover operation in the cluster. @@ -1200,12 +1197,11 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s .. note:: If not able to perform the operation through the REST API, write directly to the DCS as a fall back. - :param obj: Patroni configuration. :param action: action to be taken -- ``failover`` or ``switchover``. :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a failover or switchover. If ``None``, user will be prompted for filling it -- unless *force* is ``True``, in which case an exception is raised. - :param leader: name of the current leader member. + :param switchover_leader: name of the leader member passed as switchover option. :param candidate: name of a standby member to be promoted. Nodes that are tagged with ``nofailover`` cannot be used. :param force: perform the failover or switchover without asking for confirmations. :param scheduled: timestamp when the switchover should be scheduled to occur. If ``now`` perform immediately. @@ -1214,46 +1210,48 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s :class:`PatroniCtlException`: if: * Patroni is running on a Citus cluster, but no *group* was specified; or * a switchover was requested by the cluster has no leader; or - * *leader* does not match the current leader of the cluster; or + * *switchover_leader* does not match the current leader of the cluster; or * cluster has no candidates available for the operation; or * no *candidate* is given for a failover operation; or - * *leader* and *candidate* are the same; or + * current leader and *candidate* are the same; or + * *candidate* is tagged as nofailover; or * *candidate* is not a member of the cluster; or * trying to schedule a switchover in a cluster that is in maintenance mode; or * user aborts the operation. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() click.echo('Current cluster topology') - output_members(obj, cluster, cluster_name, group=group) + output_members(cluster, cluster_name, group=group) - if obj.get('citus') and group is None: + if is_citus_cluster() and group is None: if force: raise PatroniCtlException('For Citus clusters the --group must me specified') else: group = click.prompt('Citus group', type=int) - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - global_config = get_global_config(cluster) + config = global_config.from_cluster(cluster) + cluster_leader = cluster.leader and cluster.leader.name # leader has to be be defined for switchover only if action == 'switchover': - if cluster.leader is None or not cluster.leader.name: + if not cluster_leader: raise PatroniCtlException('This cluster has no leader') - if leader is None: + if switchover_leader is None: if force: - leader = cluster.leader.name + switchover_leader = cluster_leader else: - prompt = 'Standby Leader' if global_config.is_standby_cluster else 'Primary' - leader = click.prompt(prompt, type=str, default=(cluster.leader and cluster.leader.name)) + prompt = 'Standby Leader' if config.is_standby_cluster else 'Primary' + switchover_leader = click.prompt(prompt, type=str, default=cluster_leader) - if cluster.leader.name != leader: - raise PatroniCtlException(f'Member {leader} is not the leader of cluster {cluster_name}') + if cluster_leader != switchover_leader: + raise PatroniCtlException(f'Member {switchover_leader} is not the leader of cluster {cluster_name}') # excluding members with nofailover tag - candidate_names = [str(m.name) for m in cluster.members if m.name != leader and not m.nofailover] + candidate_names = [str(m.name) for m in cluster.members if m.name != cluster_leader and not m.nofailover] # We sort the names for consistent output to the client candidate_names.sort() @@ -1266,19 +1264,19 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s if action == 'failover' and not candidate: raise PatroniCtlException('Failover could be performed only to a specific candidate') - if candidate == leader: - raise PatroniCtlException(action.title() + ' target and source are the same.') - if candidate and candidate not in candidate_names: + if candidate == cluster_leader: + raise PatroniCtlException( + f'Member {candidate} is already the leader of cluster {cluster_name}') raise PatroniCtlException( f'Member {candidate} does not exist in cluster {cluster_name} or is tagged as nofailover') if all((not force, action == 'failover', - global_config.is_synchronous_mode, + config.is_synchronous_mode, not cluster.sync.is_empty, not cluster.sync.matches(candidate, True))): - if click.confirm(f'Are you sure you want to failover to the asynchronous node {candidate}'): + if not click.confirm(f'Are you sure you want to failover to the asynchronous node {candidate}?'): raise PatroniCtlException('Aborting ' + action) scheduled_at_str = None @@ -1292,13 +1290,13 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s scheduled_at = parse_scheduled(scheduled) if scheduled_at: - if global_config.is_paused: + if config.is_paused: raise PatroniCtlException("Can't schedule switchover in the paused state") scheduled_at_str = scheduled_at.isoformat() failover_value = {'candidate': candidate} if action == 'switchover': - failover_value['leader'] = leader + failover_value['leader'] = switchover_leader if scheduled_at_str: failover_value['scheduled_at'] = scheduled_at_str @@ -1306,7 +1304,7 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s # By now we have established that the leader exists and the candidate exists if not force: - demote_msg = f', demoting current leader {cluster.leader.name}' if cluster.leader else '' + demote_msg = f', demoting current leader {cluster_leader}' if cluster_leader else '' if scheduled_at_str: # only switchover can be scheduled if not click.confirm(f'Are you sure you want to schedule switchover of cluster ' @@ -1340,9 +1338,9 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s logging.exception(r) logging.warning('Failing over to DCS') click.echo('{0} Could not {1} using Patroni api, falling back to DCS'.format(timestamp(), action)) - dcs.manual_failover(leader, candidate, scheduled_at=scheduled_at) + dcs.manual_failover(switchover_leader, candidate, scheduled_at=scheduled_at) - output_members(obj, cluster, cluster_name, group=group) + output_members(cluster, cluster_name, group=group) @ctl.command('failover', help='Failover to a replica') @@ -1351,8 +1349,7 @@ def _do_failover_or_switchover(obj: Dict[str, Any], action: str, cluster_name: s @click.option('--leader', '--primary', '--master', 'leader', help='The name of the current leader', default=None) @click.option('--candidate', help='The name of the candidate', default=None) @option_force -@click.pass_obj -def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], +def failover(cluster_name: str, group: Optional[int], leader: Optional[str], candidate: Optional[str], force: bool) -> None: """Process ``failover`` command of ``patronictl`` utility. @@ -1366,7 +1363,6 @@ def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. seealso:: Refer to :func:`_do_failover_or_switchover` for details. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a failover or switchover. If ``None``, user will be prompted for filling it -- unless *force* is ``True``, in which case an exception is raised by @@ -1381,7 +1377,7 @@ def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], click.echo(click.style( 'Supplying a leader name using this command is deprecated and will be removed in a future version of' ' Patroni, change your scripts to use `switchover` instead.\nExecuting switchover!', fg='red')) - _do_failover_or_switchover(obj, action, cluster_name, group, leader, candidate, force) + _do_failover_or_switchover(action, cluster_name, group, leader, candidate, force) @ctl.command('switchover', help='Switchover to a replica') @@ -1392,9 +1388,8 @@ def failover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], @click.option('--scheduled', help='Timestamp of a scheduled switchover in unambiguous format (e.g. ISO 8601)', default=None) @option_force -@click.pass_obj -def switchover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - leader: Optional[str], candidate: Optional[str], force: bool, scheduled: Optional[str]) -> None: +def switchover(cluster_name: str, group: Optional[int], leader: Optional[str], + candidate: Optional[str], force: bool, scheduled: Optional[str]) -> None: """Process ``switchover`` command of ``patronictl`` utility. Perform a switchover operation in the cluster. @@ -1402,7 +1397,6 @@ def switchover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], .. seealso:: Refer to :func:`_do_failover_or_switchover` for details. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter Citus group within we should perform a switchover. If ``None``, user will be prompted for filling it -- unless *force* is ``True``, in which case an exception is raised by @@ -1412,7 +1406,7 @@ def switchover(obj: Dict[str, Any], cluster_name: str, group: Optional[int], :param force: perform the switchover without asking for confirmations. :param scheduled: timestamp when the switchover should be scheduled to occur. If ``now`` perform immediately. """ - _do_failover_or_switchover(obj, 'switchover', cluster_name, group, leader, candidate, force, scheduled) + _do_failover_or_switchover('switchover', cluster_name, group, leader, candidate, force, scheduled) def generate_topology(level: int, member: Dict[str, Any], @@ -1514,8 +1508,8 @@ def get_cluster_service_info(cluster: Dict[str, Any]) -> List[str]: return service_info -def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, - extended: bool = False, fmt: str = 'pretty', group: Optional[int] = None) -> None: +def output_members(cluster: Cluster, name: str, extended: bool = False, + fmt: str = 'pretty', group: Optional[int] = None) -> None: """Print information about the Patroni cluster and its members. Information is printed to console through :func:`print_output`, and contains: @@ -1540,7 +1534,6 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, The 3 extended columns are always included if *extended*, even if the member has no value for a given column. If not *extended*, these columns may still be shown if any of the members has any information for them. - :param obj: Patroni configuration. :param cluster: Patroni cluster. :param name: name of the Patroni cluster. :param extended: if extended information (pending restarts, scheduled restarts, node tags) should be printed, if @@ -1558,8 +1551,7 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, clusters = {group or 0: cluster_as_json(cluster)} - is_citus_cluster = obj.get('citus') - if is_citus_cluster: + if is_citus_cluster(): columns.insert(1, 'Group') if group is None: clusters.update({g: cluster_as_json(c) for g, c in cluster.workers.items()}) @@ -1597,10 +1589,12 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, rows.append([member.get(n.lower().replace(' ', '_'), '') for n in columns]) - title = 'Citus cluster' if is_citus_cluster else 'Cluster' - title_details = f' ({initialize})' - if is_citus_cluster: + if is_citus_cluster(): + title = 'Citus cluster' title_details = '' if group is None else f' (group: {group}, {initialize})' + else: + title = 'Cluster' + title_details = f' ({initialize})' title = f' {title}: {name}{title_details} ' print_output(columns, rows, {'Group': 'r', 'Lag in MB': 'r', 'TL': 'r'}, fmt, title) @@ -1611,7 +1605,7 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, for g, c in sorted(clusters.items()): service_info = get_cluster_service_info(c) if service_info: - if is_citus_cluster and group is None: + if is_citus_cluster() and group is None: click.echo('Citus group: {0}'.format(g)) click.echo(' ' + '\n '.join(service_info)) @@ -1624,16 +1618,14 @@ def output_members(obj: Dict[str, Any], cluster: Cluster, name: str, @option_format @option_watch @option_watchrefresh -@click.pass_obj -def members(obj: Dict[str, Any], cluster_names: List[str], group: Optional[int], - fmt: str, watch: Optional[int], w: bool, extended: bool, ts: bool) -> None: +def members(cluster_names: List[str], group: Optional[int], fmt: str, + watch: Optional[int], w: bool, extended: bool, ts: bool) -> None: """Process ``list`` command of ``patronictl`` utility. Print information about the Patroni cluster through :func:`output_members`. - :param obj: Patroni configuration. :param cluster_names: name of clusters that should be printed. If ``None`` consider only the cluster present in - ``scope`` key of *obj*. + ``scope`` key of the configuration. :param group: filter which Citus group we should get members from. Refer to the module note for more details. :param fmt: the output table printing format. See :func:`print_output` for available options. :param watch: if given print output every *watch* seconds. @@ -1642,9 +1634,10 @@ def members(obj: Dict[str, Any], cluster_names: List[str], group: Optional[int], more details. :param ts: if timestamp should be included in the output. """ + config = _get_configuration() if not cluster_names: - if 'scope' in obj: - cluster_names = [obj['scope']] + if 'scope' in config: + cluster_names = [config['scope']] if not cluster_names: return logging.warning('Listing members: No cluster names were provided') @@ -1653,10 +1646,10 @@ def members(obj: Dict[str, Any], cluster_names: List[str], group: Optional[int], click.echo(timestamp(0)) for cluster_name in cluster_names: - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - output_members(obj, cluster, cluster_name, extended, fmt, group) + output_members(cluster, cluster_name, extended, fmt, group) @ctl.command('topology', help='Prints ASCII topology for given cluster') @@ -1698,14 +1691,12 @@ def timestamp(precision: int = 6) -> str: @click.argument('target', type=click.Choice(['restart', 'switchover'])) @click.option('--role', '-r', help='Flush only members with this role', type=role_choice, default='any') @option_force -@click.pass_obj -def flush(obj: Dict[str, Any], cluster_name: str, group: Optional[int], +def flush(cluster_name: str, group: Optional[int], member_names: List[str], force: bool, role: str, target: str) -> None: """Process ``flush`` command of ``patronictl`` utility. Discard scheduled restart or switchover events. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should flush an event. Refer to the module note for more details. :param member_names: name of the members which events should be flushed. @@ -1713,11 +1704,11 @@ def flush(obj: Dict[str, Any], cluster_name: str, group: Optional[int], :param role: role to filter members. See :func:`get_all_members` for available options. :param target: the event that should be flushed -- ``restart`` or ``switchover``. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() if target == 'restart': - for member in get_members(obj, cluster, cluster_name, member_names, role, force, 'flush', group=group): + for member in get_members(cluster, cluster_name, member_names, role, force, 'flush', group=group): if member.data.get('scheduled_restart'): r = request_patroni(member, 'delete', 'restart') check_response(r, member.name, 'flush scheduled restart') @@ -1753,7 +1744,7 @@ def wait_until_pause_is_applied(dcs: AbstractDCS, paused: bool, old_cluster: Clu :param old_cluster: original cluster information before pause or unpause has been requested. Used to report which nodes are still pending to have ``pause`` equal *paused* at a given point in time. """ - config = get_global_config(old_cluster) + config = global_config.from_cluster(old_cluster) click.echo("'{0}' request sent, waiting until it is recognized by all nodes".format(paused and 'pause' or 'resume')) old = {m.name: m.version for m in old_cluster.members if m.api_url} @@ -1775,10 +1766,9 @@ def wait_until_pause_is_applied(dcs: AbstractDCS, paused: bool, old_cluster: Clu return click.echo('Success: cluster management is {0}'.format(paused and 'paused' or 'resumed')) -def toggle_pause(config: Dict[str, Any], cluster_name: str, group: Optional[int], paused: bool, wait: bool) -> None: +def toggle_pause(cluster_name: str, group: Optional[int], paused: bool, wait: bool) -> None: """Toggle the ``pause`` state in the cluster members. - :param config: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should toggle the pause state of. Refer to the module note for more details. @@ -1790,9 +1780,9 @@ def toggle_pause(config: Dict[str, Any], cluster_name: str, group: Optional[int] * ``pause`` state is already *paused*; or * cluster contains no accessible members. """ - dcs = get_dcs(config, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() - if get_global_config(cluster).is_paused == paused: + if global_config.from_cluster(cluster).is_paused == paused: raise PatroniCtlException('Cluster is {0} paused'.format(paused and 'already' or 'not')) for member in get_all_members_leader_first(cluster): @@ -1819,37 +1809,33 @@ def toggle_pause(config: Dict[str, Any], cluster_name: str, group: Optional[int] @ctl.command('pause', help='Disable auto failover') @arg_cluster_name @option_default_citus_group -@click.pass_obj @click.option('--wait', help='Wait until pause is applied on all nodes', is_flag=True) -def pause(obj: Dict[str, Any], cluster_name: str, group: Optional[int], wait: bool) -> None: +def pause(cluster_name: str, group: Optional[int], wait: bool) -> None: """Process ``pause`` command of ``patronictl`` utility. Put the cluster in maintenance mode. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should pause. Refer to the module note for more details. :param wait: ``True`` if it should block until the operation is finished or ``false`` for returning immediately. """ - return toggle_pause(obj, cluster_name, group, True, wait) + return toggle_pause(cluster_name, group, True, wait) @ctl.command('resume', help='Resume auto failover') @arg_cluster_name @option_default_citus_group @click.option('--wait', help='Wait until pause is cleared on all nodes', is_flag=True) -@click.pass_obj -def resume(obj: Dict[str, Any], cluster_name: str, group: Optional[int], wait: bool) -> None: +def resume(cluster_name: str, group: Optional[int], wait: bool) -> None: """Process ``unpause`` command of ``patronictl`` utility. Put the cluster out of maintenance mode. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should unpause. Refer to the module note for more details. :param wait: ``True`` if it should block until the operation is finished or ``false`` for returning immediately. """ - return toggle_pause(obj, cluster_name, group, False, wait) + return toggle_pause(cluster_name, group, False, wait) @contextmanager @@ -2081,15 +2067,12 @@ def invoke_editor(before_editing: str, cluster_name: str) -> Tuple[str, Dict[str @click.option('--replace', 'replace_filename', help='Apply configuration from file, replacing existing configuration.' ' Use - for stdin.') @option_force -@click.pass_obj -def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], - force: bool, quiet: bool, kvpairs: List[str], pgkvpairs: List[str], - apply_filename: Optional[str], replace_filename: Optional[str]) -> None: +def edit_config(cluster_name: str, group: Optional[int], force: bool, quiet: bool, kvpairs: List[str], + pgkvpairs: List[str], apply_filename: Optional[str], replace_filename: Optional[str]) -> None: """Process ``edit-config`` command of ``patronictl`` utility. Update or replace Patroni configuration in the DCS. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group configuration we should edit. Refer to the module note for more details. :param force: if ``True`` apply config changes without asking for confirmations. @@ -2106,7 +2089,7 @@ def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], * Configuration is absent from DCS; or * Detected a concurrent modification of the configuration in the DCS. """ - dcs = get_dcs(obj, cluster_name, group) + dcs = get_dcs(cluster_name, group) cluster = dcs.get_cluster() if not cluster.config: @@ -2144,7 +2127,7 @@ def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], return if force or click.confirm('Apply these changes?'): - if not dcs.set_config_value(json.dumps(changed_data), cluster.config.version): + if not dcs.set_config_value(json.dumps(changed_data, separators=(',', ':')), cluster.config.version): raise PatroniCtlException("Config modification aborted due to concurrent changes") click.echo("Configuration changed") @@ -2152,17 +2135,15 @@ def edit_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int], @ctl.command('show-config', help="Show cluster configuration") @arg_cluster_name @option_default_citus_group -@click.pass_obj -def show_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int]) -> None: +def show_config(cluster_name: str, group: Optional[int]) -> None: """Process ``show-config`` command of ``patronictl`` utility. Show Patroni configuration stored in the DCS. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group configuration we should show. Refer to the module note for more details. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() + cluster = get_dcs(cluster_name, group).get_cluster() if cluster.config: click.echo(format_config_for_editing(cluster.config.data)) @@ -2171,8 +2152,7 @@ def show_config(obj: Dict[str, Any], cluster_name: str, group: Optional[int]) -> @click.argument('cluster_name', required=False) @click.argument('member_names', nargs=-1) @option_citus_group -@click.pass_obj -def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member_names: List[str]) -> None: +def version(cluster_name: str, group: Optional[int], member_names: List[str]) -> None: """Process ``version`` command of ``patronictl`` utility. Show version of: @@ -2180,7 +2160,6 @@ def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member * ``patroni`` on all members of the cluster; * ``PostgreSQL`` on all members of the cluster. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get members from. Refer to the module note for more details. :param member_names: filter which members we should get version information from. @@ -2191,8 +2170,8 @@ def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member return click.echo("") - cluster = get_dcs(obj, cluster_name, group).get_cluster() - for m in get_all_members(obj, cluster, group, 'any'): + cluster = get_dcs(cluster_name, group).get_cluster() + for m in get_all_members(cluster, group, 'any'): if m.api_url: if not member_names or m.name in member_names: try: @@ -2210,8 +2189,7 @@ def version(obj: Dict[str, Any], cluster_name: str, group: Optional[int], member @arg_cluster_name @option_default_citus_group @option_format -@click.pass_obj -def history(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: str) -> None: +def history(cluster_name: str, group: Optional[int], fmt: str) -> None: """Process ``history`` command of ``patronictl`` utility. Show the history of failover/switchover events in the cluster. @@ -2223,12 +2201,11 @@ def history(obj: Dict[str, Any], cluster_name: str, group: Optional[int], fmt: s * ``Timestamp``: timestamp when the event occurred; * ``New Leader``: the Postgres node that was promoted during the event. - :param obj: Patroni configuration. :param cluster_name: name of the Patroni cluster. :param group: filter which Citus group we should get events from. Refer to the module note for more details. :param fmt: the output table printing format. See :func:`print_output` for available options. """ - cluster = get_dcs(obj, cluster_name, group).get_cluster() + cluster = get_dcs(cluster_name, group).get_cluster() cluster_history = cluster.history.lines if cluster.history else [] history: List[List[Any]] = list(map(list, cluster_history)) table_header_row = ['TL', 'LSN', 'Reason', 'Timestamp', 'New Leader'] diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py index 389cae868..a210615bf 100644 --- a/patroni/dcs/__init__.py +++ b/patroni/dcs/__init__.py @@ -1,26 +1,22 @@ """Abstract classes for Distributed Configuration Store.""" import abc import datetime -import importlib -import inspect import json import logging -import os -import pkgutil import re -import sys import time from collections import defaultdict from copy import deepcopy from random import randint from threading import Event, Lock -from types import ModuleType -from typing import Any, Callable, Collection, Dict, List, NamedTuple, Optional, Set, Tuple, Union, TYPE_CHECKING, \ - Type, Iterator +from typing import Any, Callable, Collection, Dict, Iterator, List, \ + NamedTuple, Optional, Tuple, Type, TYPE_CHECKING, Union from urllib.parse import urlparse, urlunparse, parse_qsl import dateutil.parser +from .. import global_config +from ..dynamic_loader import iter_classes, iter_modules from ..exceptions import PatroniFatalException from ..utils import deep_compare, uri from ..tags import Tags @@ -28,10 +24,10 @@ if TYPE_CHECKING: # pragma: no cover from ..config import Config + from ..postgresql import Postgresql + from ..postgresql.mpp import AbstractMPP SLOT_ADVANCE_AVAILABLE_VERSION = 110000 -CITUS_COORDINATOR_GROUP_ID = 0 -citus_group_re = re.compile('^(0|[1-9][0-9]*)$') slot_name_re = re.compile('^[a-z0-9_]{1,63}$') logger = logging.getLogger(__name__) @@ -87,28 +83,9 @@ def parse_connection_string(value: str) -> Tuple[str, Union[str, None]]: def dcs_modules() -> List[str]: """Get names of DCS modules, depending on execution environment. - .. note:: - If being packaged with PyInstaller, modules aren't discoverable dynamically by scanning source directory because - :class:`importlib.machinery.FrozenImporter` doesn't implement :func:`iter_modules`. But it is still possible to - find all potential DCS modules by iterating through ``toc``, which contains list of all "frozen" resources. - :returns: list of known module names with absolute python module path namespace, e.g. ``patroni.dcs.etcd``. """ - dcs_dirname = os.path.dirname(__file__) - module_prefix = __package__ + '.' - - if getattr(sys, 'frozen', False): - toc: Set[str] = set() - # dcs_dirname may contain a dot, which causes pkgutil.iter_importers() - # to misinterpret the path as a package name. This can be avoided - # altogether by not passing a path at all, because PyInstaller's - # FrozenImporter is a singleton and registered as top-level finder. - for importer in pkgutil.iter_importers(): - if hasattr(importer, 'toc'): - toc |= getattr(importer, 'toc') - return [module for module in toc if module.startswith(module_prefix) and module.count('.') == 2] - - return [module_prefix + name for _, name, is_pkg in pkgutil.iter_modules([dcs_dirname]) if not is_pkg] + return iter_modules(__package__) def iter_dcs_classes( @@ -122,44 +99,16 @@ def iter_dcs_classes( :param config: configuration information with possible DCS names as keys. If given, only attempt to import DCS modules defined in the configuration. Else, if ``None``, attempt to import any supported DCS module. - :yields: a tuple containing the module ``name`` and the imported DCS class object. - """ - for mod_name in dcs_modules(): - name = mod_name.rpartition('.')[2] - if config is None or name in config: - - try: - module = importlib.import_module(mod_name) - dcs_module = find_dcs_class_in_module(module) - if dcs_module: - yield name, dcs_module - - except ImportError: - logger.log(logging.DEBUG if config is not None else logging.INFO, - 'Failed to import %s', mod_name) - - -def find_dcs_class_in_module(module: ModuleType) -> Optional[Type['AbstractDCS']]: - """Try to find the implementation of :class:`AbstractDCS` interface in *module* matching the *module* name. - - :param module: Imported DCS module. - - :returns: class with a name matching the name of *module* that implements :class:`AbstractDCS` or ``None`` if not - found. + :returns: an iterator of tuples, each containing the module ``name`` and the imported DCS class object. """ - module_name = module.__name__.rpartition('.')[2] - return next( - (obj for obj_name, obj in module.__dict__.items() - if (obj_name.lower() == module_name - and inspect.isclass(obj) and issubclass(obj, AbstractDCS))), - None) + return iter_classes(__package__, AbstractDCS, config) def get_dcs(config: Union['Config', Dict[str, Any]]) -> 'AbstractDCS': """Attempt to load a Distributed Configuration Store from known available implementations. .. note:: - Using the list of available DCS classes returned by :func:`iter_dcs_classes` attempt to dynamically + Using the list of available DCS classes returned by :func:`iter_classes` attempt to dynamically instantiate the class that implements a DCS using the abstract class :class:`AbstractDCS`. Basic top-level configuration parameters retrieved from *config* are propagated to the DCS specific config @@ -180,14 +129,13 @@ def get_dcs(config: Union['Config', Dict[str, Any]]) -> 'AbstractDCS': p: config[p] for p in ('namespace', 'name', 'scope', 'loop_wait', 'patronictl', 'ttl', 'retry_timeout') if p in config}) - # From citus section we only need "group" parameter, but will propagate everything just in case. - if isinstance(config.get('citus'), dict): - config[name].update(config['citus']) - return dcs_class(config[name]) - raise PatroniFatalException( - f"Can not find suitable configuration of distributed configuration store\n" - f"Available implementations: {', '.join(sorted([n for n, _ in iter_dcs_classes()]))}") + from patroni.postgresql.mpp import get_mpp + return dcs_class(config[name], get_mpp(config)) + + available_implementations = ', '.join(sorted([n for n, _ in iter_dcs_classes()])) + raise PatroniFatalException("Can not find suitable configuration of distributed configuration store\n" + f"Available implementations: {available_implementations}") _Version = Union[int, str] @@ -590,24 +538,6 @@ def from_node(version: _Version, value: str, modify_version: Optional[_Version] modify_version = 0 return ClusterConfig(version, data, version if modify_version is None else modify_version) - @property - def permanent_slots(self) -> Dict[str, Any]: - """Dictionary of permanent slots information looked up from :attr:`~ClusterConfig.data`.""" - return (self.data.get('permanent_replication_slots') - or self.data.get('permanent_slots') - or self.data.get('slots') - or {}) - - @property - def ignore_slots_matchers(self) -> List[Dict[str, Any]]: - """The value for ``ignore_slots`` from :attr:`~ClusterConfig.data` if defined or an empty list.""" - return self.data.get('ignore_slots') or [] - - @property - def max_timelines_history(self) -> int: - """The value for ``max_timelines_history`` from :attr:`~ClusterConfig.data` if defined or ``0``.""" - return self.data.get('max_timelines_history', 0) - class SyncState(NamedTuple): """Immutable object (namedtuple) which represents last observed synchronous replication state. @@ -626,7 +556,7 @@ def from_node(version: Optional[_Version], value: Union[str, Dict[str, Any], Non """Factory method to parse *value* as synchronisation state information. :param version: optional *version* number for the object. - :param value: (optionally JSON serialised) sychronisation state information + :param value: (optionally JSON serialised) synchronisation state information :returns: constructed :class:`SyncState` object. @@ -996,7 +926,7 @@ def is_logical_slot(value: Union[Any, Dict[str, Any]]) -> bool: @property def __permanent_slots(self) -> Dict[str, Union[Dict[str, Any], Any]]: """Dictionary of permanent replication slots with their known LSN.""" - ret: Dict[str, Union[Dict[str, Any], Any]] = deepcopy(self.config.permanent_slots if self.config else {}) + ret: Dict[str, Union[Dict[str, Any], Any]] = global_config.permanent_slots members: Dict[str, int] = {slot_name_from_member_name(m.name): m.lsn or 0 for m in self.members} slots: Dict[str, int] = {k: parse_int(v) or 0 for k, v in (self.slots or {}).items()} @@ -1025,36 +955,29 @@ def __permanent_logical_slots(self) -> Dict[str, Any]: """Dictionary of permanent ``logical`` replication slots.""" return {name: value for name, value in self.__permanent_slots.items() if self.is_logical_slot(value)} - @property - def use_slots(self) -> bool: - """``True`` if cluster is configured to use replication slots.""" - return bool(self.config and (self.config.data.get('postgresql') or {}).get('use_slots', True)) - - def get_replication_slots(self, my_name: str, role: str, nofailover: bool, major_version: int, *, - is_standby_cluster: bool = False, show_error: bool = False) -> Dict[str, Dict[str, Any]]: + def get_replication_slots(self, postgresql: 'Postgresql', member: Tags, *, + role: Optional[str] = None, show_error: bool = False) -> Dict[str, Dict[str, Any]]: """Lookup configured slot names in the DCS, report issues found and merge with permanent slots. Will log an error if: * Any logical slots are disabled, due to version compatibility, and *show_error* is ``True``. - :param my_name: name of this node. - :param role: role of this node. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. - :param major_version: postgresql major version. - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. + :param postgresql: reference to :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface. + :param role: role of the node, if not set will be taken from *postgresql*. :param show_error: if ``True`` report error if any disabled logical slots or conflicting slot names are found. :returns: final dictionary of slot names, after merging with permanent slots and performing sanity checks. """ - slots: Dict[str, Dict[str, str]] = self._get_members_slots(my_name, role) - permanent_slots: Dict[str, Any] = self._get_permanent_slots(is_standby_cluster=is_standby_cluster, - role=role, nofailover=nofailover, - major_version=major_version) + name = member.name if isinstance(member, Member) else postgresql.name + role = role or postgresql.role + + slots: Dict[str, Dict[str, str]] = self._get_members_slots(name, role) + permanent_slots: Dict[str, Any] = self._get_permanent_slots(postgresql, member, role) disabled_permanent_logical_slots: List[str] = self._merge_permanent_slots( - slots, permanent_slots, my_name, major_version) + slots, permanent_slots, name, postgresql.major_version) if disabled_permanent_logical_slots and show_error: logger.error("Permanent logical replication slots supported by Patroni only starting from PostgreSQL 11. " @@ -1062,7 +985,7 @@ def get_replication_slots(self, my_name: str, role: str, nofailover: bool, major return slots - def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slots: Dict[str, Any], my_name: str, + def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slots: Dict[str, Any], name: str, major_version: int) -> List[str]: """Merge replication *slots* for members with *permanent_slots*. @@ -1072,7 +995,7 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo Type is assumed to be ``physical`` if there are no attributes stored as the slot value. :param slots: Slot names with existing attributes if known. - :param my_name: name of this node. + :param name: name of this node. :param permanent_slots: dictionary containing slot name key and slot information values. :param major_version: postgresql major version. @@ -1080,9 +1003,9 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo """ disabled_permanent_logical_slots: List[str] = [] - for name, value in permanent_slots.items(): - if not slot_name_re.match(name): - logger.error("Invalid permanent replication slot name '%s'", name) + for slot_name, value in permanent_slots.items(): + if not slot_name_re.match(slot_name): + logger.error("Invalid permanent replication slot name '%s'", slot_name) logger.error("Slot name may only contain lower case letters, numbers, and the underscore chars") continue @@ -1093,25 +1016,24 @@ def _merge_permanent_slots(self, slots: Dict[str, Dict[str, str]], permanent_slo if value['type'] == 'physical': # Don't try to create permanent physical replication slot for yourself - if name != slot_name_from_member_name(my_name): - slots[name] = value + if slot_name != slot_name_from_member_name(name): + slots[slot_name] = value continue if self.is_logical_slot(value): if major_version < SLOT_ADVANCE_AVAILABLE_VERSION: - disabled_permanent_logical_slots.append(name) - elif name in slots: + disabled_permanent_logical_slots.append(slot_name) + elif slot_name in slots: logger.error("Permanent logical replication slot {'%s': %s} is conflicting with" - " physical replication slot for cluster member", name, value) + " physical replication slot for cluster member", slot_name, value) else: - slots[name] = value + slots[slot_name] = value continue - logger.error("Bad value for slot '%s' in permanent_slots: %s", name, permanent_slots[name]) + logger.error("Bad value for slot '%s' in permanent_slots: %s", slot_name, permanent_slots[slot_name]) return disabled_permanent_logical_slots - def _get_permanent_slots(self, *, is_standby_cluster: bool, role: str, - nofailover: bool, major_version: int) -> Dict[str, Any]: + def _get_permanent_slots(self, postgresql: 'Postgresql', tags: Tags, role: str) -> Dict[str, Any]: """Get configured permanent replication slots. .. note:: @@ -1123,25 +1045,23 @@ def _get_permanent_slots(self, *, is_standby_cluster: bool, role: str, The returned dictionary for a non-standby cluster always contains permanent logical replication slots in order to show a warning if they are not supported by PostgreSQL before v11. - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. - :param role: role of this node -- ``primary``, ``standby_leader`` or ``replica``. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param tags: reference to an object implementing :class:`Tags` interface. + :param role: role of the node -- ``primary``, ``standby_leader`` or ``replica``. :returns: dictionary of permanent slot names mapped to attributes. """ - if not self.use_slots or nofailover: + if not global_config.use_slots or tags.nofailover: return {} - if is_standby_cluster: + if global_config.is_standby_cluster: return self.__permanent_physical_slots \ - if major_version >= SLOT_ADVANCE_AVAILABLE_VERSION or role == 'standby_leader' else {} + if postgresql.major_version >= SLOT_ADVANCE_AVAILABLE_VERSION or role == 'standby_leader' else {} - return self.__permanent_slots if major_version >= SLOT_ADVANCE_AVAILABLE_VERSION\ + return self.__permanent_slots if postgresql.major_version >= SLOT_ADVANCE_AVAILABLE_VERSION\ or role in ('master', 'primary') else self.__permanent_logical_slots - def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str]]: + def _get_members_slots(self, name: str, role: str) -> Dict[str, Dict[str, str]]: """Get physical replication slots configuration for members that sourcing from this node. If the ``replicatefrom`` tag is set on the member - we should not create the replication slot for it on @@ -1153,25 +1073,25 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str * Conflicting slot names between members are found - :param my_name: name of this node. + :param name: name of this node. :param role: role of this node, if this is a ``primary`` or ``standby_leader`` return list of members replicating from this node. If not then return a list of members replicating as cascaded replicas from this node. :returns: dictionary of physical replication slots that should exist on a given node. """ - if not self.use_slots: + if not global_config.use_slots: return {} # we always want to exclude the member with our name from the list - members = filter(lambda m: m.name != my_name, self.members) + members = filter(lambda m: m.name != name, self.members) if role in ('master', 'primary', 'standby_leader'): members = [m for m in members if m.replicatefrom is None - or m.replicatefrom == my_name or not self.has_member(m.replicatefrom)] + or m.replicatefrom == name or not self.has_member(m.replicatefrom)] else: # only manage slots for replicas that replicate from this one, except for the leader among them - members = [m for m in members if m.replicatefrom == my_name and m.name != self.leader_name] + members = [m for m in members if m.replicatefrom == name and m.name != self.leader_name] slots = {slot_name_from_member_name(m.name): {'type': 'physical'} for m in members} if len(slots) < len(members): @@ -1184,84 +1104,76 @@ def _get_members_slots(self, my_name: str, role: str) -> Dict[str, Dict[str, str for k, v in slot_conflicts.items() if len(v) > 1)) return slots - def has_permanent_slots(self, my_name: str, *, is_standby_cluster: bool = False, nofailover: bool = False, - major_version: int = SLOT_ADVANCE_AVAILABLE_VERSION) -> bool: - """Check if the given member node has permanent replication slots configured. + def has_permanent_slots(self, postgresql: 'Postgresql', member: Tags) -> bool: + """Check if our node has permanent replication slots configured. - :param my_name: name of the member node to check. - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface for + the node that we are checking permanent logical replication slots for. :returns: ``True`` if there are permanent replication slots configured, otherwise ``False``. """ role = 'replica' - members_slots: Dict[str, Dict[str, str]] = self._get_members_slots(my_name, role) - permanent_slots: Dict[str, Any] = self._get_permanent_slots(is_standby_cluster=is_standby_cluster, - role=role, nofailover=nofailover, - major_version=major_version) + members_slots: Dict[str, Dict[str, str]] = self._get_members_slots(postgresql.name, role) + permanent_slots: Dict[str, Any] = self._get_permanent_slots(postgresql, member, role) slots = deepcopy(members_slots) - self._merge_permanent_slots(slots, permanent_slots, my_name, major_version) + self._merge_permanent_slots(slots, permanent_slots, postgresql.name, postgresql.major_version) return len(slots) > len(members_slots) or any(self.is_physical_slot(v) for v in permanent_slots.values()) - def filter_permanent_slots(self, slots: Dict[str, int], is_standby_cluster: bool, - major_version: int) -> Dict[str, int]: + def filter_permanent_slots(self, postgresql: 'Postgresql', slots: Dict[str, int]) -> Dict[str, int]: """Filter out all non-permanent slots from provided *slots* dict. - :param slots: slot names with LSN values - :param is_standby_cluster: ``True`` if it is known that this is a standby cluster. We pass the value from - the outside because we want to protect from the ``/config`` key removal. - :param major_version: postgresql major version. + :param postgresql: reference to :class:`Postgresql` object. + :param slots: slot names with LSN values. :returns: a :class:`dict` object that contains only slots that are known to be permanent. """ - if major_version < SLOT_ADVANCE_AVAILABLE_VERSION: + if postgresql.major_version < SLOT_ADVANCE_AVAILABLE_VERSION: return {} # for legacy PostgreSQL we don't support permanent slots on standby nodes - permanent_slots: Dict[str, Any] = self._get_permanent_slots(is_standby_cluster=is_standby_cluster, - role='replica', - nofailover=False, - major_version=major_version) + permanent_slots: Dict[str, Any] = self._get_permanent_slots(postgresql, RemoteMember('', {}), 'replica') members_slots = {slot_name_from_member_name(m.name) for m in self.members} return {name: value for name, value in slots.items() if name in permanent_slots and (self.is_physical_slot(permanent_slots[name]) or self.is_logical_slot(permanent_slots[name]) and name not in members_slots)} - def _has_permanent_logical_slots(self, my_name: str, nofailover: bool) -> bool: + def _has_permanent_logical_slots(self, postgresql: 'Postgresql', member: Tags) -> bool: """Check if the given member node has permanent ``logical`` replication slots configured. - :param my_name: name of the member node to check. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. + :param postgresql: reference to a :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface for + the node that we are checking permanent logical replication slots for. :returns: ``True`` if any detected replications slots are ``logical``, otherwise ``False``. """ - slots = self.get_replication_slots(my_name, 'replica', nofailover, SLOT_ADVANCE_AVAILABLE_VERSION).values() + slots = self.get_replication_slots(postgresql, member, role='replica').values() return any(v for v in slots if v.get("type") == "logical") - def should_enforce_hot_standby_feedback(self, my_name: str, nofailover: bool) -> bool: + def should_enforce_hot_standby_feedback(self, postgresql: 'Postgresql', member: Tags) -> bool: """Determine whether ``hot_standby_feedback`` should be enabled for the given member. The ``hot_standby_feedback`` must be enabled if the current replica has ``logical`` slots, or it is working as a cascading replica for the other node that has ``logical`` slots. - :param my_name: name of the member node to check. - :param nofailover: ``True`` if this node is tagged to not be a failover candidate. + :param postgresql: reference to a :class:`Postgresql` object. + :param member: reference to an object implementing :class:`Tags` interface for + the node that we are checking permanent logical replication slots for. :returns: ``True`` if this node or any member replicating from this node has permanent logical slots, otherwise ``False``. """ - if self._has_permanent_logical_slots(my_name, nofailover): + if self._has_permanent_logical_slots(postgresql, member): return True - if self.use_slots: - members = [m for m in self.members if m.replicatefrom == my_name and m.name != self.leader_name] - return any(self.should_enforce_hot_standby_feedback(m.name, m.nofailover) for m in members) + if global_config.use_slots: + name = member.name if isinstance(member, Member) else postgresql.name + members = [m for m in self.members if m.replicatefrom == name and m.name != self.leader_name] + return any(self.should_enforce_hot_standby_feedback(postgresql, m) for m in members) return False - def get_my_slot_name_on_primary(self, my_name: str, replicatefrom: Optional[str]) -> str: - """Canonical slot name for physical replication. + def get_slot_name_on_primary(self, name: str, tags: Tags) -> str: + """Get the name of physical replication slot for this node on the primary. .. note:: P <-- I <-- L @@ -1269,14 +1181,14 @@ def get_my_slot_name_on_primary(self, my_name: str, replicatefrom: Optional[str] In case of cascading replication we have to check not our physical slot, but slot of the replica that connects us to the primary. - :param my_name: the member node name that is replicating. - :param replicatefrom: the Intermediate member name that is configured to replicate for cascading replication. + :param name: name of the member node to check. + :param tags: reference to an object implementing :class:`Tags` interface. - :returns: The slot name that is in use for physical replication on this no`de. + :returns: the slot name on the primary that is in use for physical replication on this node. """ - m = self.get_member(replicatefrom, False) if replicatefrom else None - return self.get_my_slot_name_on_primary(m.name, m.replicatefrom) \ - if isinstance(m, Member) else slot_name_from_member_name(my_name) + replicatefrom = self.get_member(tags.replicatefrom, False) if tags.replicatefrom else None + return self.get_slot_name_on_primary(replicatefrom.name, replicatefrom) \ + if isinstance(replicatefrom, Member) else slot_name_from_member_name(name) @property def timeline(self) -> int: @@ -1424,15 +1336,15 @@ class AbstractDCS(abc.ABC): _SYNC = 'sync' _FAILSAFE = 'failsafe' - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: 'AbstractMPP') -> None: """Prepare DCS paths, Citus group ID, initial values for state information and processing dependencies. :ivar config: :class:`dict`, reference to config section of selected DCS. i.e.: ``zookeeper`` for zookeeper, ``etcd`` for etcd, etc... """ + self._mpp = mpp self._name = config['name'] self._base_path = re.sub('/+', '/', '/'.join(['', config.get('namespace', 'service'), config['scope']])) - self._citus_group = str(config['group']) if isinstance(config.get('group'), int) else None self._set_loop_wait(config.get('loop_wait', 10)) self._ctl = bool(config.get('patronictl', False)) @@ -1445,6 +1357,11 @@ def __init__(self, config: Dict[str, Any]) -> None: self._last_failsafe: Optional[Dict[str, str]] = {} self.event = Event() + @property + def mpp(self) -> 'AbstractMPP': + """Get the effective underlying MPP, if any has been configured.""" + return self._mpp + def client_path(self, path: str) -> str: """Construct the absolute key name from appropriate parts for the DCS type. @@ -1453,8 +1370,8 @@ def client_path(self, path: str) -> str: :returns: absolute key name for the current Patroni cluster. """ components = [self._base_path] - if self._citus_group: - components.append(self._citus_group) + if self._mpp.is_enabled(): + components.append(str(self._mpp.group)) components.append(path.lstrip('/')) return '/'.join(components) @@ -1564,7 +1481,7 @@ def _cluster_loader(self, path: Any) -> Cluster: """ @abc.abstractmethod - def _citus_cluster_loader(self, path: Any) -> Union[Cluster, Dict[int, Cluster]]: + def _citus_cluster_loader(self, path: Any) -> Dict[int, Cluster]: """Load and build all Patroni clusters from a single Citus cluster. :param path: the path in DCS where to load Cluster(s) from. @@ -1608,9 +1525,9 @@ def __get_patroni_cluster(self, path: Optional[str] = None) -> Cluster: def is_citus_coordinator(self) -> bool: """:class:`Cluster` instance has a Citus Coordinator group ID. - :returns: ``True`` if the given node is running as Citus Coordinator (``group=0``). + :returns: ``True`` if the given node is running as the MPP Coordinator. """ - return self._citus_group == str(CITUS_COORDINATOR_GROUP_ID) + return self._mpp.is_coordinator() def get_citus_coordinator(self) -> Optional[Cluster]: """Load the Patroni cluster for the Citus Coordinator. @@ -1618,10 +1535,10 @@ def get_citus_coordinator(self) -> Optional[Cluster]: .. note:: This method is only executed on the worker nodes (``group!=0``) to find the coordinator. - :returns: Select :class:`Cluster` instance associated with the Citus Coordinator group ID. + :returns: Select :class:`Cluster` instance associated with the MPP Coordinator group ID. """ try: - return self.__get_patroni_cluster(f'{self._base_path}/{CITUS_COORDINATOR_GROUP_ID}/') + return self.__get_patroni_cluster(f'{self._base_path}/{self._mpp.coordinator_group_id}/') except Exception as e: logger.error('Failed to load Citus coordinator cluster from %s: %r', self.__class__.__name__, e) return None @@ -1635,7 +1552,7 @@ def _get_citus_cluster(self) -> Cluster: groups = self._load_cluster(self._base_path + '/', self._citus_cluster_loader) if TYPE_CHECKING: # pragma: no cover assert isinstance(groups, dict) - cluster = groups.pop(CITUS_COORDINATOR_GROUP_ID, Cluster.empty()) + cluster = groups.pop(self._mpp.coordinator_group_id, Cluster.empty()) cluster.workers.update(groups) return cluster diff --git a/patroni/dcs/consul.py b/patroni/dcs/consul.py index 58e200301..19d65306a 100644 --- a/patroni/dcs/consul.py +++ b/patroni/dcs/consul.py @@ -16,8 +16,9 @@ from typing import Any, Callable, Dict, List, Mapping, NamedTuple, Optional, Union, Tuple, TYPE_CHECKING from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, ReturnFalseException, catch_return_false_exception, citus_group_re + TimelineHistory, ReturnFalseException, catch_return_false_exception from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare, parse_bool, Retry, RetryFailedError, split_host_port, uri, USER_AGENT if TYPE_CHECKING: # pragma: no cover from ..config import Config @@ -232,8 +233,8 @@ def replace_char(match: Any) -> str: class Consul(AbstractDCS): - def __init__(self, config: Dict[str, Any]) -> None: - super(Consul, self).__init__(config) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Consul, self).__init__(config, mpp) self._base_path = self._base_path[1:] self._scope = config['scope'] self._session = None @@ -422,8 +423,8 @@ def _consistency(self) -> str: def _cluster_loader(self, path: str) -> Cluster: _, results = self.retry(self._client.kv.get, path, recurse=True, consistency=self._consistency) if results is None: - raise NotFound - nodes = {} + return Cluster.empty() + nodes: Dict[str, Dict[str, Any]] = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') nodes[node['Key'][len(path):]] = node @@ -435,7 +436,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: clusters: Dict[int, Dict[str, Cluster]] = defaultdict(dict) for node in results or []: key = node['Key'][len(path):].split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): node['Value'] = (node['Value'] or b'').decode('utf-8') clusters[int(key[0])][key[1]] = node return {group: self._cluster_from_nodes(nodes) for group, nodes in clusters.items()} @@ -445,8 +446,6 @@ def _load_cluster( ) -> Union[Cluster, Dict[int, Cluster]]: try: return loader(path) - except NotFound: - return Cluster.empty() except Exception: logger.exception('get_cluster') raise ConsulError('Consul is not responding properly') @@ -668,7 +667,7 @@ def set_sync_state_value(self, value: str, version: Optional[int] = None) -> Uni if ret: # We have no other choise, only read after write :( if not retry.ensure_deadline(0.5): return False - _, ret = self.retry(self._client.kv.get, self.sync_path) + _, ret = self.retry(self._client.kv.get, self.sync_path, consistency='consistent') if ret and (ret.get('Value') or b'').decode('utf-8') == value: return ret['ModifyIndex'] return False diff --git a/patroni/dcs/etcd.py b/patroni/dcs/etcd.py index f242a6b25..b9d3e0abc 100644 --- a/patroni/dcs/etcd.py +++ b/patroni/dcs/etcd.py @@ -22,8 +22,9 @@ from urllib3.exceptions import HTTPError, ReadTimeoutError, ProtocolError from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, ReturnFalseException, catch_return_false_exception, citus_group_re + TimelineHistory, ReturnFalseException, catch_return_false_exception from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..request import get as requests_get from ..utils import Retry, RetryFailedError, split_host_port, uri, USER_AGENT if TYPE_CHECKING: # pragma: no cover @@ -470,9 +471,9 @@ def _prepare_request(self, kwargs: Dict[str, Any], params: Optional[Dict[str, An class AbstractEtcd(AbstractDCS): - def __init__(self, config: Dict[str, Any], client_cls: Type[AbstractEtcdClientWithFailover], + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP, client_cls: Type[AbstractEtcdClientWithFailover], retry_errors_cls: Union[Type[Exception], Tuple[Type[Exception], ...]]) -> None: - super(AbstractEtcd, self).__init__(config) + super(AbstractEtcd, self).__init__(config, mpp) self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=retry_errors_cls) self._ttl = int(config.get('ttl') or 30) @@ -645,8 +646,8 @@ def wrapper(self: AbstractEtcd, *args: Any, **kwargs: Any) -> Any: class Etcd(AbstractEtcd): - def __init__(self, config: Dict[str, Any]) -> None: - super(Etcd, self).__init__(config, EtcdClient, (etcd.EtcdLeaderElectionInProgress, EtcdRaftInternal)) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Etcd, self).__init__(config, mpp, EtcdClient, (etcd.EtcdLeaderElectionInProgress, EtcdRaftInternal)) self.__do_not_watch = False @property @@ -710,16 +711,23 @@ def _cluster_from_nodes(self, etcd_index: int, nodes: Dict[str, etcd.EtcdResult] return Cluster(initialize, config, leader, status, members, failover, sync, history, failsafe) def _cluster_loader(self, path: str) -> Cluster: - result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) + try: + result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) + except etcd.EtcdKeyNotFound: + return Cluster.empty() nodes = {node.key[len(result.key):].lstrip('/'): node for node in result.leaves} return self._cluster_from_nodes(result.etcd_index, nodes) def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: + try: + result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) + except etcd.EtcdKeyNotFound: + return {} + clusters: Dict[int, Dict[str, etcd.EtcdResult]] = defaultdict(dict) - result = self.retry(self._client.read, path, recursive=True, quorum=self._ctl) for node in result.leaves: key = node.key[len(result.key):].lstrip('/').split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): clusters[int(key[0])][key[1]] = node return {group: self._cluster_from_nodes(result.etcd_index, nodes) for group, nodes in clusters.items()} @@ -729,8 +737,6 @@ def _load_cluster( cluster = None try: cluster = loader(path) - except etcd.EtcdKeyNotFound: - cluster = Cluster.empty() except Exception as e: self._handle_exception(e, 'get_cluster', raise_ex=EtcdError('Etcd is not responding properly')) self._has_failed = False diff --git a/patroni/dcs/etcd3.py b/patroni/dcs/etcd3.py index ea7e52f24..7cc2a1155 100644 --- a/patroni/dcs/etcd3.py +++ b/patroni/dcs/etcd3.py @@ -16,9 +16,10 @@ from typing import Any, Callable, Collection, Dict, Iterator, List, Optional, Tuple, Type, TYPE_CHECKING, Union from . import ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, catch_return_false_exception, citus_group_re + TimelineHistory, catch_return_false_exception from .etcd import AbstractEtcdClientWithFailover, AbstractEtcd, catch_etcd_errors, DnsCachingResolver, Retry from ..exceptions import DCSError, PatroniException +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare, enable_keepalive, iter_response_objects, RetryFailedError, USER_AGENT logger = logging.getLogger(__name__) @@ -671,8 +672,9 @@ def txn(self, compare: Dict[str, Any], success: Dict[str, Any], class Etcd3(AbstractEtcd): - def __init__(self, config: Dict[str, Any]) -> None: - super(Etcd3, self).__init__(config, PatroniEtcd3Client, (DeadlineExceeded, Unavailable, FailedPrecondition)) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Etcd3, self).__init__(config, mpp, PatroniEtcd3Client, + (DeadlineExceeded, Unavailable, FailedPrecondition)) self.__do_not_watch = False self._lease = None self._last_lease_refresh = 0 @@ -796,7 +798,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: path = self._base_path + '/' for node in self._client.get_cluster(path): key = node['key'][len(path):].split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): clusters[int(key[0])][key[1]] = node return {group: self._cluster_from_nodes(nodes) for group, nodes in clusters.items()} diff --git a/patroni/dcs/exhibitor.py b/patroni/dcs/exhibitor.py index 2b06073be..03d235758 100644 --- a/patroni/dcs/exhibitor.py +++ b/patroni/dcs/exhibitor.py @@ -7,6 +7,7 @@ from . import Cluster from .zookeeper import ZooKeeper +from ..postgresql.mpp import AbstractMPP from ..request import get as requests_get from ..utils import uri @@ -66,10 +67,10 @@ def zookeeper_hosts(self) -> str: class Exhibitor(ZooKeeper): - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: interval = config.get('poll_interval', 300) self._ensemble_provider = ExhibitorEnsembleProvider(config['hosts'], config['port'], poll_interval=interval) - super(Exhibitor, self).__init__({**config, 'hosts': self._ensemble_provider.zookeeper_hosts}) + super(Exhibitor, self).__init__({**config, 'hosts': self._ensemble_provider.zookeeper_hosts}, mpp) def _load_cluster( self, path: str, loader: Callable[[str], Union[Cluster, Dict[int, Cluster]]] diff --git a/patroni/dcs/kubernetes.py b/patroni/dcs/kubernetes.py index aee87bd3d..343b496cb 100644 --- a/patroni/dcs/kubernetes.py +++ b/patroni/dcs/kubernetes.py @@ -19,9 +19,9 @@ from threading import Condition, Lock, Thread from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Type, Union, TYPE_CHECKING -from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, CITUS_COORDINATOR_GROUP_ID, citus_group_re +from . import AbstractDCS, Cluster, ClusterConfig, Failover, Leader, Member, Status, SyncState, TimelineHistory from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare, iter_response_objects, keepalive_socket_options, \ Retry, RetryFailedError, tzutc, uri, USER_AGENT if TYPE_CHECKING: # pragma: no cover @@ -748,7 +748,7 @@ class Kubernetes(AbstractDCS): _CITUS_LABEL = 'citus-group' - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: self._labels = deepcopy(config['labels']) self._labels[config.get('scope_label', 'cluster-name')] = config['scope'] self._label_selector = ','.join('{0}={1}'.format(k, v) for k, v in self._labels.items()) @@ -759,9 +759,9 @@ def __init__(self, config: Dict[str, Any]) -> None: self._standby_leader_label_value = config.get('standby_leader_label_value', 'master') self._tmp_role_label = config.get('tmp_role_label') self._ca_certs = os.environ.get('PATRONI_KUBERNETES_CACERT', config.get('cacert')) or SERVICE_CERT_FILENAME - super(Kubernetes, self).__init__({**config, 'namespace': ''}) - if self._citus_group: - self._labels[self._CITUS_LABEL] = self._citus_group + super(Kubernetes, self).__init__({**config, 'namespace': ''}, mpp) + if self._mpp.is_enabled(): + self._labels[self._CITUS_LABEL] = str(self._mpp.group) self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=KubernetesRetriableException) @@ -944,12 +944,12 @@ def _citus_cluster_loader(self, path: Dict[str, Any]) -> Dict[int, Cluster]: for name, pod in path['pods'].items(): group = pod.metadata.labels.get(self._CITUS_LABEL) - if group and citus_group_re.match(group): + if group and self._mpp.group_re.match(group): clusters[group]['pods'][name] = pod for name, kind in path['nodes'].items(): group = kind.metadata.labels.get(self._CITUS_LABEL) - if group and citus_group_re.match(group): + if group and self._mpp.group_re.match(group): clusters[group]['nodes'][name] = kind return {int(group): self._cluster_from_nodes(group, value['nodes'], value['pods'].values()) for group, value in clusters.items()} @@ -976,12 +976,12 @@ def __load_cluster( def _load_cluster( self, path: str, loader: Callable[[Any], Union[Cluster, Dict[int, Cluster]]] ) -> Union[Cluster, Dict[int, Cluster]]: - group = self._citus_group if path == self.client_path('') else None + group = str(self._mpp.group) if self._mpp.is_enabled() and path == self.client_path('') else None return self.__load_cluster(group, loader) def get_citus_coordinator(self) -> Optional[Cluster]: try: - ret = self.__load_cluster(str(CITUS_COORDINATOR_GROUP_ID), self._cluster_loader) + ret = self.__load_cluster(str(self._mpp.coordinator_group_id), self._cluster_loader) if TYPE_CHECKING: # pragma: no cover assert isinstance(ret, Cluster) return ret diff --git a/patroni/dcs/raft.py b/patroni/dcs/raft.py index 98c48f44e..0528cfb0a 100644 --- a/patroni/dcs/raft.py +++ b/patroni/dcs/raft.py @@ -12,9 +12,9 @@ from pysyncobj.utility import TcpUtility from typing import Any, Callable, Collection, Dict, List, Optional, Set, Union, TYPE_CHECKING -from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, citus_group_re +from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, TimelineHistory from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import validate_directory if TYPE_CHECKING: # pragma: no cover from ..config import Config @@ -285,8 +285,8 @@ def destroy(self) -> None: class Raft(AbstractDCS): - def __init__(self, config: Dict[str, Any]) -> None: - super(Raft, self).__init__(config) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(Raft, self).__init__(config, mpp) self._ttl = int(config.get('ttl') or 30) ready_event = threading.Event() @@ -387,7 +387,7 @@ def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: response = self._sync_obj.get(path, recursive=True) for key, value in (response or {}).items(): key = key[len(path):].split('/', 1) - if len(key) == 2 and citus_group_re.match(key[0]): + if len(key) == 2 and self._mpp.group_re.match(key[0]): clusters[int(key[0])][key[1]] = value return {group: self._cluster_from_nodes(nodes) for group, nodes in clusters.items()} diff --git a/patroni/dcs/zookeeper.py b/patroni/dcs/zookeeper.py index 3704b579d..6bf77ae45 100644 --- a/patroni/dcs/zookeeper.py +++ b/patroni/dcs/zookeeper.py @@ -12,9 +12,9 @@ from kazoo.security import ACL, make_acl from typing import Any, Callable, Dict, List, Optional, Union, Tuple, TYPE_CHECKING -from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, \ - TimelineHistory, citus_group_re +from . import AbstractDCS, ClusterConfig, Cluster, Failover, Leader, Member, Status, SyncState, TimelineHistory from ..exceptions import DCSError +from ..postgresql.mpp import AbstractMPP from ..utils import deep_compare if TYPE_CHECKING: # pragma: no cover from ..config import Config @@ -87,8 +87,8 @@ def _call(self, request: Tuple[Any], async_object: AsyncResult) -> Optional[bool class ZooKeeper(AbstractDCS): - def __init__(self, config: Dict[str, Any]) -> None: - super(ZooKeeper, self).__init__(config) + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: + super(ZooKeeper, self).__init__(config, mpp) hosts: Union[str, List[str]] = config.get('hosts', []) if isinstance(hosts, list): @@ -261,7 +261,7 @@ def _cluster_loader(self, path: str) -> Cluster: def _citus_cluster_loader(self, path: str) -> Dict[int, Cluster]: ret: Dict[int, Cluster] = {} for node in self.get_children(path): - if citus_group_re.match(node): + if self._mpp.group_re.match(node): ret[int(node)] = self._cluster_loader(path + node + '/') return ret diff --git a/patroni/dynamic_loader.py b/patroni/dynamic_loader.py new file mode 100644 index 000000000..6c207349e --- /dev/null +++ b/patroni/dynamic_loader.py @@ -0,0 +1,96 @@ +"""Helper functions to search for implementations of specific abstract interface in a package.""" +import importlib +import inspect +import logging +import os +import pkgutil +import sys +from types import ModuleType + +from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, TYPE_CHECKING, Type, TypeVar, Union + +if TYPE_CHECKING: # pragma: no cover + from .config import Config + +logger = logging.getLogger(__name__) + + +def iter_modules(package: str) -> List[str]: + """Get names of modules from *package*, depending on execution environment. + + .. note:: + If being packaged with PyInstaller, modules aren't discoverable dynamically by scanning source directory because + :class:`importlib.machinery.FrozenImporter` doesn't implement :func:`iter_modules`. But it is still possible to + find all potential modules by iterating through ``toc``, which contains list of all "frozen" resources. + + :param package: a package name to search modules in, e.g. ``patroni.dcs``. + + :returns: list of known module names with absolute python module path namespace, e.g. ``patroni.dcs.etcd``. + """ + module_prefix = package + '.' + + if getattr(sys, 'frozen', False): + toc: Set[str] = set() + # dirname may contain a few dots, which causes pkgutil.iter_importers() + # to misinterpret the path as a package name. This can be avoided + # altogether by not passing a path at all, because PyInstaller's + # FrozenImporter is a singleton and registered as top-level finder. + for importer in pkgutil.iter_importers(): + if hasattr(importer, 'toc'): + toc |= getattr(importer, 'toc') + dots = module_prefix.count('.') # search for modules only on the same level + return [module for module in toc if module.startswith(module_prefix) and module.count('.') == dots] + + # here we are making an assumption that the package which is calling this function is already imported + pkg_file = sys.modules[package].__file__ + if TYPE_CHECKING: # pragma: no cover + assert isinstance(pkg_file, str) + return [name for _, name, is_pkg in pkgutil.iter_modules([os.path.dirname(pkg_file)], module_prefix) if not is_pkg] + + +ClassType = TypeVar("ClassType") + + +def find_class_in_module(module: ModuleType, cls_type: Type[ClassType]) -> Optional[Type[ClassType]]: + """Try to find the implementation of *cls_type* class interface in *module* matching the *module* name. + + :param module: imported module. + :param cls_type: a class type we are looking for. + + :returns: class with a name matching the name of *module* that implements *cls_type* or ``None`` if not found. + """ + module_name = module.__name__.rpartition('.')[2] + return next( + (obj for obj_name, obj in module.__dict__.items() + if (obj_name.lower() == module_name + and inspect.isclass(obj) and issubclass(obj, cls_type))), + None) + + +def iter_classes( + package: str, cls_type: Type[ClassType], + config: Optional[Union['Config', Dict[str, Any]]] = None +) -> Iterator[Tuple[str, Type[ClassType]]]: + """Attempt to import modules and find implementations of *cls_type* that are present in the given configuration. + + .. note:: + If a module successfully imports we can assume that all its requirements are installed. + + :param package: a package name to search modules in, e.g. ``patroni.dcs``. + :param cls_type: a class type we are looking for. + :param config: configuration information with possible module names as keys. If given, only attempt to import + modules defined in the configuration. Else, if ``None``, attempt to import any supported module. + + :yields: a tuple containing the module ``name`` and the imported class object. + """ + for mod_name in iter_modules(package): + name = mod_name.rpartition('.')[2] + if config is None or name in config: + try: + module = importlib.import_module(mod_name) + module_cls = find_class_in_module(module, cls_type) + if module_cls: + yield name, module_cls + except ImportError: + logger.log(logging.DEBUG if config is not None else logging.INFO, + 'Failed to import %s', mod_name) diff --git a/patroni/global_config.py b/patroni/global_config.py new file mode 100644 index 000000000..7731cf59f --- /dev/null +++ b/patroni/global_config.py @@ -0,0 +1,227 @@ +"""Implements *global_config* facilities. + +The :class:`GlobalConfig` object is instantiated on import and replaces +``patroni.global_config`` module in :data:`sys.modules`, what allows to use +its properties and methods like they were module variables and functions. +""" +import sys +import types + +from copy import deepcopy +from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING + +from .utils import parse_bool, parse_int + +if TYPE_CHECKING: # pragma: no cover + from .dcs import Cluster + + +def __getattr__(mod: types.ModuleType, name: str) -> Any: + """This function exists just to make pyright happy. + + Without it pyright complains about access to unknown members of global_config module. + """ + return getattr(sys.modules[__name__], name) # pragma: no cover + + +class GlobalConfig(types.ModuleType): + """A class that wraps global configuration and provides convenient methods to access/check values.""" + + __file__ = __file__ # just to make unittest and pytest happy + + def __init__(self) -> None: + """Initialize :class:`GlobalConfig` object.""" + super().__init__(__name__) + self.__config = {} + + @staticmethod + def _cluster_has_valid_config(cluster: Optional['Cluster']) -> bool: + """Check if provided *cluster* object has a valid global configuration. + + :param cluster: the currently known cluster state from DCS. + + :returns: ``True`` if provided *cluster* object has a valid global configuration, otherwise ``False``. + """ + return bool(cluster and cluster.config and cluster.config.modify_version) + + def update(self, cluster: Optional['Cluster']) -> None: + """Update with the new global configuration from the :class:`Cluster` object view. + + .. note:: + Global configuration is updated only when configuration in the *cluster* view is valid. + + Update happens in-place and is executed only from the main heartbeat thread. + + :param cluster: the currently known cluster state from DCS. + """ + # Try to protect from the case when DCS was wiped out + if self._cluster_has_valid_config(cluster): + self.__config = cluster.config.data # pyright: ignore [reportOptionalMemberAccess] + + def from_cluster(self, cluster: Optional['Cluster']) -> 'GlobalConfig': + """Return :class:`GlobalConfig` instance from the provided :class:`Cluster` object view. + + .. note:: + If the provided *cluster* object doesn't have a valid global configuration we return + the last known valid state of the :class:`GlobalConfig` object. + + This method is used when we need to have the most up-to-date values in the global configuration, + but we don't want to update the global object. + + :param cluster: the currently known cluster state from DCS. + + :returns: :class:`GlobalConfig` object. + """ + if not self._cluster_has_valid_config(cluster): + return self + + ret = GlobalConfig() + ret.update(cluster) + return ret + + def get(self, name: str) -> Any: + """Gets global configuration value by *name*. + + :param name: parameter name. + + :returns: configuration value or ``None`` if it is missing. + """ + return self.__config.get(name) + + def check_mode(self, mode: str) -> bool: + """Checks whether the certain parameter is enabled. + + :param mode: parameter name, e.g. ``synchronous_mode``, ``failsafe_mode``, ``pause``, ``check_timeline``, and + so on. + + :returns: ``True`` if parameter *mode* is enabled in the global configuration. + """ + return bool(parse_bool(self.__config.get(mode))) + + @property + def is_paused(self) -> bool: + """``True`` if cluster is in maintenance mode.""" + return self.check_mode('pause') + + @property + def is_synchronous_mode(self) -> bool: + """``True`` if synchronous replication is requested and it is not a standby cluster config.""" + return self.check_mode('synchronous_mode') and not self.is_standby_cluster + + @property + def is_synchronous_mode_strict(self) -> bool: + """``True`` if at least one synchronous node is required.""" + return self.check_mode('synchronous_mode_strict') + + def get_standby_cluster_config(self) -> Union[Dict[str, Any], Any]: + """Get ``standby_cluster`` configuration. + + :returns: a copy of ``standby_cluster`` configuration. + """ + return deepcopy(self.get('standby_cluster')) + + @property + def is_standby_cluster(self) -> bool: + """``True`` if global configuration has a valid ``standby_cluster`` section.""" + config = self.get_standby_cluster_config() + return isinstance(config, dict) and\ + bool(config.get('host') or config.get('port') or config.get('restore_command')) + + def get_int(self, name: str, default: int = 0) -> int: + """Gets current value of *name* from the global configuration and try to return it as :class:`int`. + + :param name: name of the parameter. + :param default: default value if *name* is not in the configuration or invalid. + + :returns: currently configured value of *name* from the global configuration or *default* if it is not set or + invalid. + """ + ret = parse_int(self.get(name)) + return default if ret is None else ret + + @property + def min_synchronous_nodes(self) -> int: + """The minimum number of synchronous nodes based on whether ``synchronous_mode_strict`` is enabled or not.""" + return 1 if self.is_synchronous_mode_strict else 0 + + @property + def synchronous_node_count(self) -> int: + """Currently configured value of ``synchronous_node_count`` from the global configuration. + + Assume ``1`` if it is not set or invalid. + """ + return max(self.get_int('synchronous_node_count', 1), self.min_synchronous_nodes) + + @property + def maximum_lag_on_failover(self) -> int: + """Currently configured value of ``maximum_lag_on_failover`` from the global configuration. + + Assume ``1048576`` if it is not set or invalid. + """ + return self.get_int('maximum_lag_on_failover', 1048576) + + @property + def maximum_lag_on_syncnode(self) -> int: + """Currently configured value of ``maximum_lag_on_syncnode`` from the global configuration. + + Assume ``-1`` if it is not set or invalid. + """ + return self.get_int('maximum_lag_on_syncnode', -1) + + @property + def primary_start_timeout(self) -> int: + """Currently configured value of ``primary_start_timeout`` from the global configuration. + + Assume ``300`` if it is not set or invalid. + + .. note:: + ``master_start_timeout`` is still supported to keep backward compatibility. + """ + default = 300 + return self.get_int('primary_start_timeout', default)\ + if 'primary_start_timeout' in self.__config else self.get_int('master_start_timeout', default) + + @property + def primary_stop_timeout(self) -> int: + """Currently configured value of ``primary_stop_timeout`` from the global configuration. + + Assume ``0`` if it is not set or invalid. + + .. note:: + ``master_stop_timeout`` is still supported to keep backward compatibility. + """ + default = 0 + return self.get_int('primary_stop_timeout', default)\ + if 'primary_stop_timeout' in self.__config else self.get_int('master_stop_timeout', default) + + @property + def ignore_slots_matchers(self) -> List[Dict[str, Any]]: + """Currently configured value of ``ignore_slots`` from the global configuration. + + Assume an empty :class:`list` if not set. + """ + return self.get('ignore_slots') or [] + + @property + def max_timelines_history(self) -> int: + """Currently configured value of ``max_timelines_history`` from the global configuration. + + Assume ``0`` if not set or invalid. + """ + return self.get_int('max_timelines_history', 0) + + @property + def use_slots(self) -> bool: + """``True`` if cluster is configured to use replication slots.""" + return bool(parse_bool((self.get('postgresql') or {}).get('use_slots', True))) + + @property + def permanent_slots(self) -> Dict[str, Any]: + """Dictionary of permanent slots information from the global configuration.""" + return deepcopy(self.get('permanent_replication_slots') + or self.get('permanent_slots') + or self.get('slots') + or {}) + + +sys.modules[__name__] = GlobalConfig() diff --git a/patroni/ha.py b/patroni/ha.py index 4a21972aa..dea78163a 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -10,7 +10,7 @@ from threading import RLock from typing import Any, Callable, Collection, Dict, List, NamedTuple, Optional, Union, Tuple, TYPE_CHECKING -from . import psycopg +from . import global_config, psycopg from .__main__ import Patroni from .async_executor import AsyncExecutor, CriticalTask from .collections import CaseInsensitiveSet @@ -156,7 +156,6 @@ def __init__(self, patroni: Patroni): self._rewind = Rewind(self.state_handler) self.dcs = patroni.dcs self.cluster = Cluster.empty() - self.global_config = self.patroni.config.get_global_config(None) self.old_cluster = Cluster.empty() self._leader_expiry = 0 self._leader_expiry_lock = RLock() @@ -188,20 +187,20 @@ def __init__(self, patroni: Patroni): def primary_stop_timeout(self) -> Union[int, None]: """:returns: "primary_stop_timeout" from the global configuration or `None` when not in synchronous mode.""" - ret = self.global_config.primary_stop_timeout + ret = global_config.primary_stop_timeout return ret if ret > 0 and self.is_synchronous_mode() else None def is_paused(self) -> bool: """:returns: `True` if in maintenance mode.""" - return self.global_config.is_paused + return global_config.is_paused def check_timeline(self) -> bool: """:returns: `True` if should check whether the timeline is latest during the leader race.""" - return self.global_config.check_mode('check_timeline') + return global_config.check_mode('check_timeline') def is_standby_cluster(self) -> bool: """:returns: `True` if global configuration has a valid "standby_cluster" section.""" - return self.global_config.is_standby_cluster + return global_config.is_standby_cluster def is_leader(self) -> bool: """:returns: `True` if the current node is the leader, based on expiration set when it last held the key.""" @@ -295,9 +294,8 @@ def update_lock(self, update_status: bool = False) -> bool: try: last_lsn = self.state_handler.last_operation() slots = self.cluster.filter_permanent_slots( - {**self.state_handler.slots(), slot_name_from_member_name(self.state_handler.name): last_lsn}, - self.is_standby_cluster(), - self.state_handler.major_version) + self.state_handler, + {**self.state_handler.slots(), slot_name_from_member_name(self.state_handler.name): last_lsn}) except Exception: logger.exception('Exception when called state_handler.last_operation()') if TYPE_CHECKING: # pragma: no cover @@ -334,7 +332,7 @@ def notify_citus_coordinator(self, event: str) -> None: if coordinator and coordinator.leader and coordinator.leader.conn_url: try: data = {'type': event, - 'group': self.state_handler.citus_handler.group(), + 'group': self.state_handler.citus_handler.group, 'leader': self.state_handler.name, 'timeout': self.dcs.ttl, 'cooldown': self.patroni.config['retry_timeout']} @@ -450,7 +448,7 @@ def bootstrap(self) -> str: return ret or 'trying to bootstrap {0}'.format(msg) # no leader, but configuration may allowed replica creation using backup tools - create_replica_methods = self.global_config.get_standby_cluster_config().get('create_replica_methods', []) \ + create_replica_methods = global_config.get_standby_cluster_config().get('create_replica_methods', []) \ if self.is_standby_cluster() else None can_bootstrap = self.state_handler.can_create_replica_without_replication_connection(create_replica_methods) concurrent_bootstrap = self.cluster.initialize == "" @@ -525,7 +523,7 @@ def recover(self) -> str: :returns: action message, describing what was performed. """ if self.has_lock() and self.update_lock(): - timeout = self.global_config.primary_start_timeout + timeout = global_config.primary_start_timeout if timeout == 0: # We are requested to prefer failing over to restarting primary. But see first if there # is anyone to fail over to. @@ -622,7 +620,7 @@ def _get_node_to_follow(self, cluster: Cluster) -> Union[Leader, Member, None]: for param in params: # It is highly unlikely to happen, but we want to protect from the case node_to_follow.data.pop(param, None) # when above-mentioned params came from outside. if self.is_standby_cluster(): - standby_config = self.global_config.get_standby_cluster_config() + standby_config = global_config.get_standby_cluster_config() node_to_follow.data.update({p: standby_config[p] for p in params if standby_config.get(p)}) return node_to_follow @@ -684,11 +682,11 @@ def follow(self, demote_reason: str, follow_reason: str, refresh: bool = True) - def is_synchronous_mode(self) -> bool: """:returns: `True` if synchronous replication is requested.""" - return self.global_config.is_synchronous_mode + return global_config.is_synchronous_mode def is_failsafe_mode(self) -> bool: """:returns: `True` if failsafe_mode is enabled in global configuration.""" - return self.global_config.check_mode('failsafe_mode') + return global_config.check_mode('failsafe_mode') def process_sync_replication(self) -> None: """Process synchronous standby beahvior. @@ -732,7 +730,7 @@ def process_sync_replication(self) -> None: return logger.info('Synchronous replication key updated by someone else.') # When strict mode and no suitable replication connections put "*" to synchronous_standby_names - if self.global_config.is_synchronous_mode_strict and not picked: + if global_config.is_synchronous_mode_strict and not picked: picked = CaseInsensitiveSet('*') logger.warning("No standbys available!") @@ -805,7 +803,7 @@ def update_cluster_history(self) -> None: cluster_history_dict: Dict[int, List[Any]] = {line[0]: list(line) for line in cluster_history} history: List[List[Any]] = list(map(list, self.state_handler.get_history(primary_timeline))) if self.cluster.config: - history = history[-self.cluster.config.max_timelines_history:] + history = history[-global_config.max_timelines_history:] for line in history: # enrich current history with promotion timestamps stored in DCS cluster_history_line = cluster_history_dict.get(line[0], []) @@ -849,7 +847,7 @@ def enforce_primary_role(self, message: str, promote_message: str) -> str: self.state_handler.set_role('master') self.process_sync_replication() self.update_cluster_history() - self.state_handler.citus_handler.sync_pg_dist_node(self.cluster) + self.state_handler.citus_handler.sync_meta_data(self.cluster) return message elif self.state_handler.role in ('master', 'promoted', 'primary'): self.process_sync_replication() @@ -863,7 +861,7 @@ def enforce_primary_role(self, message: str, promote_message: str) -> str: # promotion until next cycle. TODO: trigger immediate retry of run_cycle return 'Postponing promotion because synchronous replication state was updated by somebody else' self.state_handler.sync_handler.set_synchronous_standby_names( - CaseInsensitiveSet('*') if self.global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) + CaseInsensitiveSet('*') if global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) if self.state_handler.role not in ('master', 'promoted', 'primary'): # reset failsafe state when promote self._failsafe.set_is_active(0) @@ -974,7 +972,7 @@ def is_lagging(self, wal_position: int) -> bool: :returns True when node is lagging """ lag = (self.cluster.last_lsn or 0) - wal_position - return lag > self.global_config.maximum_lag_on_failover + return lag > global_config.maximum_lag_on_failover def _is_healthiest_node(self, members: Collection[Member], check_replication_lag: bool = True) -> bool: """This method tries to determine whether I am healthy enough to became a new leader candidate or not.""" @@ -1541,7 +1539,7 @@ def restart(self, restart_data: Dict[str, Any], run_async: bool = False) -> Tupl # Now that restart is scheduled we can set timeout for startup, it will get reset # once async executor runs and main loop notices PostgreSQL as up. - timeout = restart_data.get('timeout', self.global_config.primary_start_timeout) + timeout = restart_data.get('timeout', global_config.primary_start_timeout) self.set_start_timeout(timeout) def before_shutdown() -> None: @@ -1605,7 +1603,7 @@ def handle_long_action_in_progress(self) -> str: """Figure out what to do with the task AsyncExecutor is performing.""" if self.has_lock() and self.update_lock(): if self._async_executor.scheduled_action == 'doing crash recovery in a single user mode': - time_left = self.global_config.primary_start_timeout - (time.time() - self._crash_recovery_started) + time_left = global_config.primary_start_timeout - (time.time() - self._crash_recovery_started) if time_left <= 0 and self.is_failover_possible(): logger.info("Demoting self because crash recovery is taking too long") self.state_handler.cancellable.cancel(True) @@ -1690,7 +1688,7 @@ def post_bootstrap(self) -> str: self.set_is_leader(True) if self.is_synchronous_mode(): self.state_handler.sync_handler.set_synchronous_standby_names( - CaseInsensitiveSet('*') if self.global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) + CaseInsensitiveSet('*') if global_config.is_synchronous_mode_strict else CaseInsensitiveSet()) self.state_handler.call_nowait(CallbackAction.ON_START) self.load_cluster_from_dcs() @@ -1713,7 +1711,7 @@ def handle_starting_instance(self) -> Optional[str]: self.demote('immediate-nolock') return 'stopped PostgreSQL while starting up because leader key was lost' - timeout = self._start_timeout or self.global_config.primary_start_timeout + timeout = self._start_timeout or global_config.primary_start_timeout time_left = timeout - self.state_handler.time_in_state() if time_left <= 0: @@ -1746,8 +1744,8 @@ def _run_cycle(self) -> str: try: try: self.load_cluster_from_dcs() - self.global_config = self.patroni.config.get_global_config(self.cluster) - self.state_handler.reset_cluster_info_state(self.cluster, self.patroni.nofailover, self.global_config) + global_config.update(self.cluster) + self.state_handler.reset_cluster_info_state(self.cluster, self.patroni) except Exception: self.state_handler.reset_cluster_info_state(None) raise @@ -1767,10 +1765,10 @@ def _run_cycle(self) -> str: self.touch_member() # cluster has leader key but not initialize key - if not (self.cluster.is_unlocked() or self.sysid_valid(self.cluster.initialize)) and self.has_lock(): + if self.has_lock(False) and not self.sysid_valid(self.cluster.initialize): self.dcs.initialize(create_new=(self.cluster.initialize is None), sysid=self.state_handler.sysid) - if not (self.cluster.is_unlocked() or self.cluster.config and self.cluster.config.data) and self.has_lock(): + if self.has_lock(False) and not (self.cluster.config and self.cluster.config.data): self.dcs.set_config_value(json.dumps(self.patroni.config.dynamic_configuration, separators=(',', ':'))) self.cluster = self.dcs.get_cluster() @@ -1851,10 +1849,9 @@ def _run_cycle(self) -> str: logger.fatal('system ID mismatch, node %s belongs to a different cluster: %s != %s', self.state_handler.name, self.cluster.initialize, data_sysid) sys.exit(1) - elif self.cluster.is_unlocked() and not self.is_paused(): + elif self.cluster.is_unlocked() and not self.is_paused() and not self.state_handler.cb_called: # "bootstrap", but data directory is not empty - if not self.state_handler.cb_called and self.state_handler.is_running() \ - and not self.state_handler.is_primary(): + if self.state_handler.is_running() and not self.state_handler.is_primary(): self._join_aborted = True logger.error('No initialize key in DCS and PostgreSQL is running as replica, aborting start') logger.error('Please first start Patroni on the node running as primary') @@ -1904,7 +1901,7 @@ def _run_cycle(self) -> str: if not is_promoting and create_slots and self.cluster.leader: err = self._async_executor.try_run_async('copy_logical_slots', self.state_handler.slots_handler.copy_logical_slots, - args=(self.cluster, create_slots)) + args=(self.cluster, self.patroni, create_slots)) if not err: ret = 'Copying logical slots {0} from the primary'.format(create_slots) return ret @@ -1960,10 +1957,7 @@ def _sync_replication_slots(self, dcs_failed: bool) -> List[str]: cluster = self._failsafe.update_cluster(self.cluster)\ if self.is_failsafe_mode() and not self.is_leader() else self.cluster if cluster: - slots = self.state_handler.slots_handler.sync_replication_slots(cluster, - self.patroni.nofailover, - self.patroni.replicatefrom, - self.is_paused()) + slots = self.state_handler.slots_handler.sync_replication_slots(cluster, self.patroni) # Don't copy replication slots if failsafe_mode is active return [] if self.failsafe_is_active() else slots @@ -2047,7 +2041,7 @@ def get_remote_member(self, member: Union[Leader, Member, None] = None) -> Remot config or cluster.config.data. """ data: Dict[str, Any] = {} - cluster_params = self.global_config.get_standby_cluster_config() + cluster_params = global_config.get_standby_cluster_config() if cluster_params: data.update({k: v for k, v in cluster_params.items() if k in RemoteMember.ALLOWED_KEYS}) diff --git a/patroni/log.py b/patroni/log.py index 09d738830..6ac67a17d 100644 --- a/patroni/log.py +++ b/patroni/log.py @@ -202,24 +202,37 @@ def __init__(self) -> None: self._proxy_handler = ProxyHandler(self) self._root_logger.addHandler(self._proxy_handler) - def update_loggers(self) -> None: - """Configure loggers' log level as defined in ``log.loggers`` section of Patroni configuration. + def update_loggers(self, config: Dict[str, Any]) -> None: + """Configure custom loggers' log levels. .. note:: It creates logger objects that are not defined yet in the log manager. + + :param config: :class:`dict` object with custom loggers configuration, is set either from: + + * ``log.loggers`` section of Patroni configuration; or + + * from the method that is trying to make sure that the node name + isn't duplicated (to silence annoying ``urllib3`` WARNING's). + + :Example: + + .. code-block:: python + + update_loggers({'urllib3.connectionpool': 'WARNING'}) """ - loggers = deepcopy((self._config or {}).get('loggers') or {}) + loggers = deepcopy(config) for name, logger in self._root_logger.manager.loggerDict.items(): # ``Placeholder`` is a node in the log manager for which no logger has been defined. We are interested only # in the ones that were defined if not isinstance(logger, logging.PlaceHolder): - # if this logger is present in ``log.loggers`` Patroni configuration, use the configured level, - # otherwise use ``logging.NOTSET``, which means it will inherit the level from any parent node up to - # the root for which log level is defined. + # if this logger is present in *config*, use the configured level, otherwise + # use ``logging.NOTSET``, which means it will inherit the level + # from any parent node up to the root for which log level is defined. level = loggers.pop(name, logging.NOTSET) logger.setLevel(level) - # define loggers that do not exist yet and set level as configured in ``log.loggers`` section of configuration. + # define loggers that do not exist yet and set level as configured in the *config* for name, level in loggers.items(): logger = self._root_logger.manager.getLogger(name) logger.setLevel(level) @@ -274,7 +287,7 @@ def reload_config(self, config: Dict[str, Any]) -> None: self.log_handler = new_handler self._config = config.copy() - self.update_loggers() + self.update_loggers(config.get('loggers') or {}) def _close_old_handlers(self) -> None: """Close old log handlers. diff --git a/patroni/postgresql/__init__.py b/patroni/postgresql/__init__.py index fdfc26c1c..f6e3ab545 100644 --- a/patroni/postgresql/__init__.py +++ b/patroni/postgresql/__init__.py @@ -19,22 +19,22 @@ from .cancellable import CancellableSubprocess from .config import ConfigHandler, mtime from .connection import ConnectionPool, get_connection_cursor -from .citus import CitusHandler from .misc import parse_history, parse_lsn, postgres_major_version_to_int +from .mpp import AbstractMPP from .postmaster import PostmasterProcess from .slots import SlotsHandler from .sync import SyncHandler -from .. import psycopg +from .. import global_config, psycopg from ..async_executor import CriticalTask from ..collections import CaseInsensitiveSet from ..dcs import Cluster, Leader, Member, SLOT_ADVANCE_AVAILABLE_VERSION from ..exceptions import PostgresConnectionException from ..utils import Retry, RetryFailedError, polling_loop, data_directory_is_empty, parse_int +from ..tags import Tags if TYPE_CHECKING: # pragma: no cover from psycopg import Connection as Connection3, Cursor from psycopg2 import connection as connection3, cursor - from ..config import GlobalConfig logger = logging.getLogger(__name__) @@ -63,7 +63,7 @@ class Postgresql(object): "pg_catalog.pg_{0}_{1}_diff(COALESCE(pg_catalog.pg_last_{0}_receive_{1}(), '0/0'), '0/0')::bigint, " "pg_catalog.pg_is_in_recovery() AND pg_catalog.pg_is_{0}_replay_paused()") - def __init__(self, config: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any], mpp: AbstractMPP) -> None: self.name: str = config['name'] self.scope: str = config['scope'] self._data_dir: str = config['data_dir'] @@ -73,7 +73,6 @@ def __init__(self, config: Dict[str, Any]) -> None: self.connection_string: str self.proxy_url: Optional[str] self._major_version = self.get_major_version() - self._global_config = None self._state_lock = Lock() self.set_state('stopped') @@ -81,7 +80,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self._pending_restart = False self.connection_pool = ConnectionPool() self._connection = self.connection_pool.get('heartbeat') - self.citus_handler = CitusHandler(self, config.get('citus')) + self.citus_handler = mpp.get_handler_impl(self) self.config = ConfigHandler(self, config) self.config.check_directories() @@ -119,6 +118,8 @@ def __init__(self, config: Dict[str, Any]) -> None: # Last known running process self._postmaster_proc = None + self._available_gucs = None + if self.is_running(): # If we found postmaster process we need to figure out whether postgres is accepting connections self.set_state('starting') @@ -217,7 +218,7 @@ def cluster_info_query(self) -> str: "FROM pg_catalog.pg_stat_get_wal_senders() w," " pg_catalog.pg_stat_get_activity(w.pid)" " WHERE w.state = 'streaming') r)").format(self.wal_name, self.lsn_name) - if (not self.global_config or self.global_config.is_synchronous_mode) + if global_config.is_synchronous_mode and self.role in ('master', 'primary', 'promoted') else "'on', '', NULL") if self._major_version >= 90600: @@ -241,7 +242,9 @@ def cluster_info_query(self) -> str: @property def available_gucs(self) -> CaseInsensitiveSet: """GUCs available in this Postgres server.""" - return self._get_gucs() + if not self._available_gucs: + self._available_gucs = self._get_gucs() + return self._available_gucs def _version_file_exists(self) -> bool: return not self.data_directory_empty() and os.path.isfile(self._version_file) @@ -426,46 +429,30 @@ def set_enforce_hot_standby_feedback(self, value: bool) -> None: self.config.write_postgresql_conf() self.reload() - @property - def global_config(self) -> Optional['GlobalConfig']: - return self._global_config - - def reset_cluster_info_state(self, cluster: Union[Cluster, None], nofailover: bool = False, - global_config: Optional['GlobalConfig'] = None) -> None: + def reset_cluster_info_state(self, cluster: Optional[Cluster], tags: Optional[Tags] = None) -> None: """Reset monitoring query cache. - It happens in the beginning of heart-beat loop and on change of `synchronous_standby_names`. + .. note:: + It happens in the beginning of heart-beat loop and on change of `synchronous_standby_names`. :param cluster: currently known cluster state from DCS - :param nofailover: whether this node could become a new primary. - Important when there are logical permanent replication slots because "nofailover" - node could do cascading replication and should enable `hot_standby_feedback` - :param global_config: last known :class:`GlobalConfig` object + :param tags: reference to an object implementing :class:`Tags` interface. """ self._cluster_info_state = {} - if global_config: - self._global_config = global_config - - if not self._global_config: + if not tags: return - if self._global_config.is_standby_cluster: + if global_config.is_standby_cluster: # Standby cluster can't have logical replication slots, and we don't need to enforce hot_standby_feedback self.set_enforce_hot_standby_feedback(False) if cluster and cluster.config and cluster.config.modify_version: # We want to enable hot_standby_feedback if the replica is supposed # to have a logical slot or in case if it is the cascading replica. - self.set_enforce_hot_standby_feedback(not self._global_config.is_standby_cluster and self.can_advance_slots - and cluster.should_enforce_hot_standby_feedback(self.name, - nofailover)) - - self._has_permanent_slots = cluster.has_permanent_slots( - my_name=self.name, - is_standby_cluster=self._global_config.is_standby_cluster, - nofailover=nofailover, - major_version=self.major_version) + self.set_enforce_hot_standby_feedback(not global_config.is_standby_cluster and self.can_advance_slots + and cluster.should_enforce_hot_standby_feedback(self, tags)) + self._has_permanent_slots = cluster.has_permanent_slots(self, tags) def _cluster_info_state_get(self, name: str) -> Optional[Any]: if not self._cluster_info_state: diff --git a/patroni/postgresql/bootstrap.py b/patroni/postgresql/bootstrap.py index 751c0797f..a544bd735 100644 --- a/patroni/postgresql/bootstrap.py +++ b/patroni/postgresql/bootstrap.py @@ -188,10 +188,9 @@ def _custom_bootstrap(self, config: Any) -> bool: params = [] if config.get('no_params') else ['--scope=' + self._postgresql.scope, '--datadir=' + self._postgresql.data_dir] # Add custom parameters specified by the user - reserved_args = {'no_params', 'keep_existing_recovery_conf', 'recovery_conf', 'scope', 'datadir'} - for arg, val in config.items(): - if arg not in reserved_args: - params.append(f"--{arg}={val}") + reserved_args = {'command', 'no_params', 'keep_existing_recovery_conf', 'recovery_conf', 'scope', 'datadir'} + params += [f"--{arg}={val}" for arg, val in config.items() if arg not in reserved_args] + try: logger.info('Running custom bootstrap script: %s', config['command']) if self._postgresql.cancellable.call(shlex.split(config['command']) + params) != 0: diff --git a/patroni/postgresql/config.py b/patroni/postgresql/config.py index b15f7f234..7dfa89344 100644 --- a/patroni/postgresql/config.py +++ b/patroni/postgresql/config.py @@ -12,6 +12,7 @@ from typing import Any, Collection, Dict, Iterator, List, Optional, Union, Tuple, Type, TYPE_CHECKING from .validator import recovery_parameters, transform_postgresql_parameter_value, transform_recovery_parameter_value +from .. import global_config from ..collections import CaseInsensitiveDict, CaseInsensitiveSet from ..dcs import Leader, Member, RemoteMember, slot_name_from_member_name from ..exceptions import PatroniFatalException, PostgresConnectionException @@ -595,7 +596,7 @@ def build_recovery_params(self, member: Union[Leader, Member, None]) -> CaseInse is_remote_member = isinstance(member, RemoteMember) primary_conninfo = self.primary_conninfo_params(member) if primary_conninfo: - use_slots = self.get('use_slots', True) and self._postgresql.major_version >= 90400 + use_slots = global_config.use_slots and self._postgresql.major_version >= 90400 if use_slots and not (is_remote_member and member.no_replication_slot): primary_slot_name = member.primary_slot_name if is_remote_member else self._postgresql.name recovery_params['primary_slot_name'] = slot_name_from_member_name(primary_slot_name) @@ -930,10 +931,10 @@ def get_server_parameters(self, config: Dict[str, Any]) -> CaseInsensitiveDict: parameters = config['parameters'].copy() listen_addresses, port = split_host_port(config['listen'], 5432) parameters.update(cluster_name=self._postgresql.scope, listen_addresses=listen_addresses, port=str(port)) - if not self._postgresql.global_config or self._postgresql.global_config.is_synchronous_mode: + if global_config.is_synchronous_mode: synchronous_standby_names = self._server_parameters.get('synchronous_standby_names') if synchronous_standby_names is None: - if self._postgresql.global_config and self._postgresql.global_config.is_synchronous_mode_strict\ + if global_config.is_synchronous_mode_strict\ and self._postgresql.role in ('master', 'primary', 'promoted'): parameters['synchronous_standby_names'] = '*' else: @@ -1097,6 +1098,12 @@ def reload_config(self, config: Dict[str, Any], sighup: bool = False) -> None: local_connection_address_changed = True else: logger.info('Changed %s from %s to %s', r[0], r[1], new_value) + elif r[0] in self._server_parameters \ + and not compare_values(r[3], r[2], r[1], self._server_parameters[r[0]]): + # Check if any parameter was set back to the current pg_settings value + # We can use pg_settings value here, as it is proved to be equal to new_value + logger.info('Changed %s from %s to %s', r[0], self._server_parameters[r[0]], r[1]) + conf_changed = True for param, value in changes.items(): if '.' in param: # Check that user-defined-paramters have changed (parameters with period in name) diff --git a/patroni/postgresql/connection.py b/patroni/postgresql/connection.py index 2a50dbb5b..040dcf78d 100644 --- a/patroni/postgresql/connection.py +++ b/patroni/postgresql/connection.py @@ -147,7 +147,8 @@ def get(self, name: str, kwargs_override: Optional[Dict[str, Any]] = None) -> Na def close(self) -> None: """Close all named connections from Patroni to PostgreSQL registered in the pool.""" with self._lock: - if any(conn.close(True) for conn in self._connections.values()): + closed_connections = [conn.close(True) for conn in self._connections.values()] + if any(closed_connections): logger.info("closed patroni connections to postgres") diff --git a/patroni/postgresql/mpp/__init__.py b/patroni/postgresql/mpp/__init__.py new file mode 100644 index 000000000..3494793b5 --- /dev/null +++ b/patroni/postgresql/mpp/__init__.py @@ -0,0 +1,296 @@ +"""Abstract classes for MPP handler. + +MPP stands for Massively Parallel Processing, and Citus belongs to this architecture. Currently, Citus is the only +supported MPP cluster. However, we may consider adapting other databases such as TimescaleDB, GPDB, etc. into Patroni. +""" +import abc + +from typing import Any, Dict, Iterator, Optional, Union, Tuple, Type, TYPE_CHECKING + +from ...dcs import Cluster +from ...dynamic_loader import iter_classes +from ...exceptions import PatroniException + +if TYPE_CHECKING: # pragma: no cover + from .. import Postgresql + from ...config import Config + + +class AbstractMPP(abc.ABC): + """An abstract class which should be passed to :class:`AbstractDCS`. + + .. note:: + We create :class:`AbstractMPP` and :class:`AbstractMPPHandler` to solve the chicken-egg initialization problem. + When initializing DCS, we dynamically create an object implementing :class:`AbstractMPP`, later this object is + used to instantiate an object implementing :class:`AbstractMPPHandler`. + """ + + group_re: Any # re.Pattern[str] + + def __init__(self, config: Dict[str, Union[str, int]]) -> None: + """Init method for :class:`AbstractMPP`. + + :param config: configuration of MPP section. + """ + self._config = config + + def is_enabled(self) -> bool: + """Check if MPP is enabled for a given MPP. + + .. note:: + We just check that the :attr:`_config` object isn't empty and expect + it to be empty only in case of :class:`Null`. + + :returns: ``True`` if MPP is enabled, otherwise ``False``. + """ + return bool(self._config) + + @staticmethod + @abc.abstractmethod + def validate_config(config: Any) -> bool: + """Check whether provided config is good for a given MPP. + + :param config: configuration of MPP section. + + :returns: ``True`` is config passes validation, otherwise ``False``. + """ + + @property + @abc.abstractmethod + def group(self) -> Any: + """The group for a given MPP implementation.""" + + @property + @abc.abstractmethod + def coordinator_group_id(self) -> Any: + """The group id of the coordinator PostgreSQL cluster.""" + + def is_coordinator(self) -> bool: + """Check whether this node is running in the coordinator PostgreSQL cluster. + + :returns: ``True`` if MPP is enabled and the group id of this node + matches with the :attr:`coordinator_group_id`, otherwise ``False``. + """ + return self.is_enabled() and self.group == self.coordinator_group_id + + def is_worker(self) -> bool: + """Check whether this node is running as a MPP worker PostgreSQL cluster. + + :returns: ``True`` if MPP is enabled and this node is known to be not running + as the coordinator PostgreSQL cluster, otherwise ``False``. + """ + return self.is_enabled() and not self.is_coordinator() + + def _get_handler_cls(self) -> Iterator[Type['AbstractMPPHandler']]: + """Find Handler classes inherited from a class type of this object. + + :yields: handler classes for this object. + """ + for cls in self.__class__.__subclasses__(): + if issubclass(cls, AbstractMPPHandler) and cls.__name__.startswith(self.__class__.__name__): + yield cls + + def get_handler_impl(self, postgresql: 'Postgresql') -> 'AbstractMPPHandler': + """Find and instantiate Handler implementation of this object. + + :param postgresql: a reference to :class:`Postgresql` object. + + :raises: + :exc:`PatroniException`: if the Handler class haven't been found. + + :returns: an instantiated class that implements Handler for this object. + """ + for cls in self._get_handler_cls(): + return cls(postgresql, self._config) + raise PatroniException(f'Failed to initialize {self.__class__.__name__}Handler object') + + +class AbstractMPPHandler(AbstractMPP): + """An abstract class which defines interfaces that should be implemented by real handlers.""" + + def __init__(self, postgresql: 'Postgresql', config: Dict[str, Union[str, int]]) -> None: + """Init method for :class:`AbstractMPPHandler`. + + :param postgresql: a reference to :class:`Postgresql` object. + :param config: configuration of MPP section. + """ + super().__init__(config) + self._postgresql = postgresql + + @abc.abstractmethod + def handle_event(self, cluster: Cluster, event: Dict[str, Any]) -> None: + """Handle an event sent from a worker node. + + :param cluster: the currently known cluster state from DCS. + :param event: the event to be handled. + """ + + @abc.abstractmethod + def sync_meta_data(self, cluster: Cluster) -> None: + """Sync meta data on the coordinator. + + :param cluster: the currently known cluster state from DCS. + """ + + @abc.abstractmethod + def on_demote(self) -> None: + """On demote handler. + + Is called when the primary was demoted. + """ + + @abc.abstractmethod + def schedule_cache_rebuild(self) -> None: + """Cache rebuild handler. + + Is called to notify handler that it has to refresh its metadata cache from the database. + """ + + @abc.abstractmethod + def bootstrap(self) -> None: + """Bootstrap handler. + + Is called when the new cluster is initialized (through ``initdb`` or a custom bootstrap method). + """ + + @abc.abstractmethod + def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: + """Adjust GUCs in the current PostgreSQL configuration. + + :param parameters: dictionary of GUCs, with key as GUC name and the corresponding value as current GUC value. + """ + + @abc.abstractmethod + def ignore_replication_slot(self, slot: Dict[str, str]) -> bool: + """Check whether provided replication *slot* existing in the database should not be removed. + + .. note:: + MPP database may create replication slots for its own use, for example to migrate data between workers + using logical replication, and we don't want to suddenly drop them. + + :param slot: dictionary containing the replication slot settings, like ``name``, ``database``, ``type``, and + ``plugin``. + + :returns: ``True`` if the replication slots should not be removed, otherwise ``False``. + """ + + +class Null(AbstractMPP): + """Dummy implementation of :class:`AbstractMPP`.""" + + def __init__(self) -> None: + """Init method for :class:`Null`.""" + super().__init__({}) + + @staticmethod + def validate_config(config: Any) -> bool: + """Check whether provided config is good for :class:`Null`. + + :returns: always ``True``. + """ + return True + + @property + def group(self) -> None: + """The group for :class:`Null`. + + :returns: always ``None``. + """ + return None + + @property + def coordinator_group_id(self) -> None: + """The group id of the coordinator PostgreSQL cluster. + + :returns: always ``None``. + """ + return None + + +class NullHandler(Null, AbstractMPPHandler): + """Dummy implementation of :class:`AbstractMPPHandler`.""" + + def __init__(self, postgresql: 'Postgresql', config: Dict[str, Union[str, int]]) -> None: + """Init method for :class:`NullHandler`. + + :param postgresql: a reference to :class:`Postgresql` object. + :param config: configuration of MPP section. + """ + AbstractMPPHandler.__init__(self, postgresql, config) + + def handle_event(self, cluster: Cluster, event: Dict[str, Any]) -> None: + """Handle an event sent from a worker node. + + :param cluster: the currently known cluster state from DCS. + :param event: the event to be handled. + """ + + def sync_meta_data(self, cluster: Cluster) -> None: + """Sync meta data on the coordinator. + + :param cluster: the currently known cluster state from DCS. + """ + + def on_demote(self) -> None: + """On demote handler. + + Is called when the primary was demoted. + """ + + def schedule_cache_rebuild(self) -> None: + """Cache rebuild handler. + + Is called to notify handler that it has to refresh its metadata cache from the database. + """ + + def bootstrap(self) -> None: + """Bootstrap handler. + + Is called when the new cluster is initialized (through ``initdb`` or a custom bootstrap method). + """ + + def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: + """Adjust GUCs in the current PostgreSQL configuration. + + :param parameters: dictionary of GUCs, with key as GUC name and corresponding value as current GUC value. + """ + + def ignore_replication_slot(self, slot: Dict[str, str]) -> bool: + """Check whether provided replication *slot* existing in the database should not be removed. + + .. note:: + MPP database may create replication slots for its own use, for example to migrate data between workers + using logical replication, and we don't want to suddenly drop them. + + :param slot: dictionary containing the replication slot settings, like ``name``, ``database``, ``type``, and + ``plugin``. + + :returns: always ``False``. + """ + return False + + +def iter_mpp_classes( + config: Optional[Union['Config', Dict[str, Any]]] = None +) -> Iterator[Tuple[str, Type[AbstractMPP]]]: + """Attempt to import MPP modules that are present in the given configuration. + + :param config: configuration information with possible MPP names as keys. If given, only attempt to import MPP + modules defined in the configuration. Else, if ``None``, attempt to import any supported MPP module. + + :yields: tuples, each containing the module ``name`` and the imported MPP class object. + """ + yield from iter_classes(__package__, AbstractMPP, config) + + +def get_mpp(config: Union['Config', Dict[str, Any]]) -> AbstractMPP: + """Attempt to load and instantiate a MPP module from known available implementations. + + :param config: object or dictionary with Patroni configuration. + + :returns: The successfully loaded MPP or fallback to :class:`Null`. + """ + for name, mpp_class in iter_mpp_classes(config): + if mpp_class.validate_config(config[name]): + return mpp_class(config[name]) + return Null() diff --git a/patroni/postgresql/citus.py b/patroni/postgresql/mpp/citus.py similarity index 83% rename from patroni/postgresql/citus.py rename to patroni/postgresql/mpp/citus.py index 26923f374..f3d6394c3 100644 --- a/patroni/postgresql/citus.py +++ b/patroni/postgresql/mpp/citus.py @@ -6,12 +6,15 @@ from urllib.parse import urlparse from typing import Any, Dict, List, Optional, Union, Tuple, TYPE_CHECKING -from ..dcs import CITUS_COORDINATOR_GROUP_ID, Cluster -from ..psycopg import connect, quote_ident +from . import AbstractMPP, AbstractMPPHandler +from ...dcs import Cluster +from ...psycopg import connect, quote_ident, DuplicateDatabase +from ...utils import parse_int if TYPE_CHECKING: # pragma: no cover - from . import Postgresql + from .. import Postgresql +CITUS_COORDINATOR_GROUP_ID = 0 CITUS_SLOT_NAME_RE = re.compile(r'^citus_shard_(move|split)_slot(_[1-9][0-9]*){2,3}$') logger = logging.getLogger(__name__) @@ -63,13 +66,45 @@ def __repr__(self) -> str: return str(self) -class CitusHandler(Thread): +class Citus(AbstractMPP): - def __init__(self, postgresql: 'Postgresql', config: Optional[Dict[str, Union[str, int]]]) -> None: - super(CitusHandler, self).__init__() + group_re = re.compile('^(0|[1-9][0-9]*)$') + + @staticmethod + def validate_config(config: Union[Any, Dict[str, Union[str, int]]]) -> bool: + """Check whether provided config is good for a given MPP. + + :param config: configuration of ``citus`` MPP section. + + :returns: ``True`` is config passes validation, otherwise ``False``. + """ + return isinstance(config, dict) \ + and isinstance(config.get('database'), str) \ + and parse_int(config.get('group')) is not None + + @property + def group(self) -> int: + """The group of this Citus node.""" + return int(self._config['group']) + + @property + def coordinator_group_id(self) -> int: + """The group id of the Citus coordinator PostgreSQL cluster.""" + return CITUS_COORDINATOR_GROUP_ID + + +class CitusHandler(Citus, AbstractMPPHandler, Thread): + """Define the interfaces for handling an underlying Citus cluster.""" + + def __init__(self, postgresql: 'Postgresql', config: Dict[str, Union[str, int]]) -> None: + """"Initialize a new instance of :class:`CitusHandler`. + + :param postgresql: the Postgres node. + :param config: the ``citus`` MPP config section. + """ + Thread.__init__(self) + AbstractMPPHandler.__init__(self, postgresql, config) self.daemon = True - self._postgresql = postgresql - self._config = config if config: self._connection = postgresql.connection_pool.get( 'citus', {'dbname': config['database'], @@ -81,26 +116,19 @@ def __init__(self, postgresql: 'Postgresql', config: Optional[Dict[str, Union[st self._condition = Condition() # protects _pg_dist_node, _tasks, _in_flight, and _schedule_load_pg_dist_node self.schedule_cache_rebuild() - def is_enabled(self) -> bool: - return isinstance(self._config, dict) - - def group(self) -> Optional[int]: - return int(self._config['group']) if isinstance(self._config, dict) else None - - def is_coordinator(self) -> bool: - return self.is_enabled() and self.group() == CITUS_COORDINATOR_GROUP_ID - - def is_worker(self) -> bool: - return self.is_enabled() and not self.is_coordinator() - def schedule_cache_rebuild(self) -> None: + """Cache rebuild handler. + + Is called to notify handler that it has to refresh its metadata cache from the database. + """ with self._condition: self._schedule_load_pg_dist_node = True def on_demote(self) -> None: with self._condition: self._pg_dist_node.clear() - self._tasks[:] = [] + empty_tasks: List[PgDistNode] = [] + self._tasks[:] = empty_tasks self._in_flight = None def query(self, sql: str, *params: Any) -> List[Tuple[Any, ...]]: @@ -133,8 +161,8 @@ def load_pg_dist_node(self) -> bool: self._pg_dist_node = {r[1]: PgDistNode(r[1], r[2], r[3], 'after_promote', r[0]) for r in rows} return True - def sync_pg_dist_node(self, cluster: Cluster) -> None: - """Maintain the `pg_dist_node` from the coordinator leader every heartbeat loop. + def sync_meta_data(self, cluster: Cluster) -> None: + """Maintain the ``pg_dist_node`` from the coordinator leader every heartbeat loop. We can't always rely on REST API calls from worker nodes in order to maintain `pg_dist_node`, therefore at least once per heartbeat @@ -295,16 +323,16 @@ def _add_task(self, task: PgDistNode) -> bool: with self._condition: i = self.find_task_by_group(task.group) - # The `PgDistNode.timeout` == None is an indicator that it was scheduled from the sync_pg_dist_node(). + # The `PgDistNode.timeout` == None is an indicator that it was scheduled from the sync_meta_data(). if task.timeout is None: # We don't want to override the already existing task created from REST API. if i is not None and self._tasks[i].timeout is not None: return False # There is a little race condition with tasks created from REST API - the call made "before" the member - # key is updated in DCS. Therefore it is possible that :func:`sync_pg_dist_node` will try to create a - # task based on the outdated values of "state"/"role". To solve it we introduce an artificial timeout. - # Only when the timeout is reached new tasks could be scheduled from sync_pg_dist_node() + # key is updated in DCS. Therefore it is possible that :func:`sync_meta_data` will try to create a task + # based on the outdated values of "state"/"role". To solve it we introduce an artificial timeout. + # Only when the timeout is reached new tasks could be scheduled from sync_meta_data() if self._in_flight and self._in_flight.group == task.group and self._in_flight.timeout is not None\ and self._in_flight.deadline > time.time(): return False @@ -352,9 +380,10 @@ def handle_event(self, cluster: Cluster, event: Dict[str, Any]) -> None: task.wait() def bootstrap(self) -> None: - if not isinstance(self._config, dict): # self.is_enabled() - return + """Bootstrap handler. + Is called when the new cluster is initialized (through ``initdb`` or a custom bootstrap method). + """ conn_kwargs = {**self._postgresql.connection_pool.conn_kwargs, 'options': '-c synchronous_commit=local -c statement_timeout=0'} if self._config['database'] != self._postgresql.database: @@ -363,6 +392,8 @@ def bootstrap(self) -> None: with conn.cursor() as cur: cur.execute('CREATE DATABASE {0}'.format( quote_ident(self._config['database'], conn)).encode('utf-8')) + except DuplicateDatabase as e: + logger.debug('Exception when creating database: %r', e) finally: conn.close() @@ -370,7 +401,7 @@ def bootstrap(self) -> None: conn = connect(**conn_kwargs) try: with conn.cursor() as cur: - cur.execute('CREATE EXTENSION citus') + cur.execute('CREATE EXTENSION IF NOT EXISTS citus') superuser = self._postgresql.config.superuser params = {k: superuser[k] for k in ('password', 'sslcert', 'sslkey') if k in superuser} @@ -387,9 +418,10 @@ def bootstrap(self) -> None: conn.close() def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: - if not self.is_enabled(): - return + """Adjust GUCs in the current PostgreSQL configuration. + :param parameters: dictionary of GUCs, with key as GUC name and the corresponding value as current GUC value. + """ # citus extension must be on the first place in shared_preload_libraries shared_preload_libraries = list(filter( lambda el: el and el != 'citus', @@ -407,8 +439,18 @@ def adjust_postgres_gucs(self, parameters: Dict[str, Any]) -> None: parameters['citus.local_hostname'] = self._postgresql.connection_pool.conn_kwargs.get('host', 'localhost') def ignore_replication_slot(self, slot: Dict[str, str]) -> bool: - if isinstance(self._config, dict) and self._postgresql.is_primary() and\ - slot['type'] == 'logical' and slot['database'] == self._config['database']: + """Check whether provided replication *slot* existing in the database should not be removed. + + .. note:: + MPP database may create replication slots for its own use, for example to migrate data between workers + using logical replication, and we don't want to suddenly drop them. + + :param slot: dictionary containing the replication slot settings, like ``name``, ``database``, ``type``, and + ``plugin``. + + :returns: ``True`` if the replication slots should not be removed, otherwise ``False``. + """ + if self._postgresql.is_primary() and slot['type'] == 'logical' and slot['database'] == self._config['database']: m = CITUS_SLOT_NAME_RE.match(slot['name']) return bool(m and {'move': 'pgoutput', 'split': 'citus'}.get(m.group(1)) == slot['plugin']) return False diff --git a/patroni/postgresql/postmaster.py b/patroni/postgresql/postmaster.py index 4505e7f7d..97eb10e4d 100644 --- a/patroni/postgresql/postmaster.py +++ b/patroni/postgresql/postmaster.py @@ -176,7 +176,7 @@ def pg_ctl_kill(self, mode: str, pg_ctl: str) -> Optional[bool]: return not self.is_running() def wait_for_user_backends_to_close(self, stop_timeout: Optional[float]) -> None: - # These regexps are cross checked against versions PostgreSQL 9.1 .. 15 + # These regexps are cross checked against versions PostgreSQL 9.1 .. 16 aux_proc_re = re.compile("(?:postgres:)( .*:)? (?:(?:archiver|startup|autovacuum launcher|autovacuum worker|" "checkpointer|logger|stats collector|wal receiver|wal writer|writer)(?: process )?|" "walreceiver|wal sender process|walsender|walwriter|background writer|" diff --git a/patroni/postgresql/rewind.py b/patroni/postgresql/rewind.py index 6e1aab884..4a5283f7c 100644 --- a/patroni/postgresql/rewind.py +++ b/patroni/postgresql/rewind.py @@ -101,12 +101,26 @@ def check_leader_has_run_checkpoint(conn_kwargs: Dict[str, Any]) -> Optional[str return 'not accessible or not healty' def _get_checkpoint_end(self, timeline: int, lsn: int) -> int: - """The checkpoint record size in WAL depends on postgres major version and platform (memory alignment). - Hence, the only reliable way to figure out where it ends, read the record from file with the help of pg_waldump - and parse the output. We are trying to read two records, and expect that it will fail to read the second one: - `pg_waldump: fatal: error in WAL record at 0/182E220: invalid record length at 0/182E298: wanted 24, got 0` - The error message contains information about LSN of the next record, which is exactly where checkpoint ends.""" + """Get the end of checkpoint record from WAL. + .. note:: + The checkpoint record size in WAL depends on postgres major version and platform (memory alignment). + Hence, the only reliable way to figure out where it ends, is to read the record from file with the + help of ``pg_waldump`` and parse the output. + + We are trying to read two records, and expect that it will fail to read the second record with message: + + fatal: error in WAL record at 0/182E220: invalid record length at 0/182E298: wanted 24, got 0; or + + fatal: error in WAL record at 0/182E220: invalid record length at 0/182E298: expected at least 24, got 0 + + The error message contains information about LSN of the next record, which is exactly where checkpoint ends. + + :param timeline: the checkpoint *timeline* from ``pg_controldata``. + :param lsn: the checkpoint *location* as :class:`int` from ``pg_controldata``. + + :returns: the end of checkpoint record as :class:`int` or ``0`` if failed to parse ``pg_waldump`` output. + """ lsn8 = format_lsn(lsn, True) lsn_str = format_lsn(lsn) out, err = self._postgresql.waldump(timeline, lsn_str, 2) @@ -117,12 +131,17 @@ def _get_checkpoint_end(self, timeline: int, lsn: int) -> int: if len(out) == 1 and len(err) == 1 and ', lsn: {0}, prev '.format(lsn8) in out[0] and pattern in err[0]: i = err[0].find(pattern) + len(pattern) - j = err[0].find(": wanted ", i) - if j > -1: - try: - return parse_lsn(err[0][i:j]) - except Exception as e: - logger.error('Failed to parse lsn %s: %r', err[0][i:j], e) + # Message format depends on the major version: + # * expected at least -- starting from v16 + # * wanted -- before v16 + # We will simply check all possible combinations. + for pattern in (': expected at least ', ': wanted '): + j = err[0].find(pattern, i) + if j > -1: + try: + return parse_lsn(err[0][i:j]) + except Exception as e: + logger.error('Failed to parse lsn %s: %r', err[0][i:j], e) logger.error('Failed to parse pg_%sdump output', self._postgresql.wal_name) logger.error(' stdout=%s', '\n'.join(out)) logger.error(' stderr=%s', '\n'.join(err)) diff --git a/patroni/postgresql/slots.py b/patroni/postgresql/slots.py index 48b275e49..7f7fd294a 100644 --- a/patroni/postgresql/slots.py +++ b/patroni/postgresql/slots.py @@ -13,9 +13,11 @@ from .connection import get_connection_cursor from .misc import format_lsn, fsync_dir +from .. import global_config from ..dcs import Cluster, Leader from ..file_perm import pg_perm from ..psycopg import OperationalError +from ..tags import Tags if TYPE_CHECKING: # pragma: no cover from psycopg import Cursor @@ -289,11 +291,11 @@ def ignore_replication_slot(self, cluster: Cluster, name: str) -> bool: :param name: name of the slot to ignore :returns: ``True`` if slot *name* matches any slot specified in ``ignore_slots`` configuration, - otherwise will pass through and return result of :meth:`CitusHandler.ignore_replication_slot`. + otherwise will pass through and return result of :meth:`AbstractMPPHandler.ignore_replication_slot`. """ slot = self._replication_slots[name] if cluster.config: - for matcher in cluster.config.ignore_slots_matchers: + for matcher in global_config.ignore_slots_matchers: if ( (matcher.get("name") is None or matcher["name"] == name) and all(not matcher.get(a) or matcher[a] == slot.get(a) @@ -319,7 +321,7 @@ def drop_replication_slot(self, name: str) -> Tuple[bool, bool]: ' FULL OUTER JOIN dropped ON true'), name) return (rows[0][0], rows[0][1]) if rows else (False, False) - def _drop_incorrect_slots(self, cluster: Cluster, slots: Dict[str, Any], paused: bool) -> None: + def _drop_incorrect_slots(self, cluster: Cluster, slots: Dict[str, Any]) -> None: """Compare required slots and configured as permanent slots with those found, dropping extraneous ones. .. note:: @@ -330,11 +332,10 @@ def _drop_incorrect_slots(self, cluster: Cluster, slots: Dict[str, Any], paused: :param cluster: cluster state information object. :param slots: dictionary of desired slot names as keys with slot attributes as a dictionary value, if known. - :param paused: ``True`` if the patroni cluster is currently in a paused state. """ # drop old replication slots which are not presented in desired slots. for name in set(self._replication_slots) - set(slots): - if not paused and not self.ignore_replication_slot(cluster, name): + if not global_config.is_paused and not self.ignore_replication_slot(cluster, name): active, dropped = self.drop_replication_slot(name) if dropped: logger.info("Dropped unknown replication slot '%s'", name) @@ -492,8 +493,7 @@ class instance. Slots that exist are also advanced if their ``confirmed_flush_ls self._schedule_load_slots = True return create_slots + copy_slots - def sync_replication_slots(self, cluster: Cluster, nofailover: bool, - replicatefrom: Optional[str] = None, paused: bool = False) -> List[str]: + def sync_replication_slots(self, cluster: Cluster, tags: Tags) -> List[str]: """During the HA loop read, check and alter replication slots found in the cluster. Read physical and logical slots from ``pg_replication_slots``, then compare to those configured in the DCS. @@ -503,22 +503,18 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, them on replica nodes by copying slot files from the primary. :param cluster: object containing stateful information for the cluster. - :param nofailover: ``True`` if this node has been tagged to not be a failover candidate. - :param replicatefrom: the tag containing the node to replicate from. - :param paused: ``True`` if the cluster is in maintenance mode. + :param tags: reference to an object implementing :class:`Tags` interface. :returns: list of logical replication slots names that should be copied from the primary. """ ret = [] - if self._postgresql.major_version >= 90400 and self._postgresql.global_config and cluster.config: + if self._postgresql.major_version >= 90400 and cluster.config: try: self.load_replication_slots() - slots = cluster.get_replication_slots( - self._postgresql.name, self._postgresql.role, nofailover, self._postgresql.major_version, - is_standby_cluster=self._postgresql.global_config.is_standby_cluster, show_error=True) + slots = cluster.get_replication_slots(self._postgresql, tags, show_error=True) - self._drop_incorrect_slots(cluster, slots, paused) + self._drop_incorrect_slots(cluster, slots) self._ensure_physical_slots(slots) @@ -526,7 +522,7 @@ def sync_replication_slots(self, cluster: Cluster, nofailover: bool, self._logical_slots_processing_queue.clear() self._ensure_logical_slots_primary(slots) else: - self.check_logical_slots_readiness(cluster, replicatefrom) + self.check_logical_slots_readiness(cluster, tags) ret = self._ensure_logical_slots_replica(slots) self._replication_slots = slots @@ -552,7 +548,7 @@ def _get_leader_connection_cursor(self, leader: Leader) -> Iterator[Union['curso with get_connection_cursor(connect_timeout=3, options="-c statement_timeout=2000", **conn_kwargs) as cur: yield cur - def check_logical_slots_readiness(self, cluster: Cluster, replicatefrom: Optional[str]) -> bool: + def check_logical_slots_readiness(self, cluster: Cluster, tags: Tags) -> bool: """Determine whether all known logical slots are synchronised from the leader. 1) Retrieve the current ``catalog_xmin`` value for the physical slot from the cluster leader, and @@ -561,13 +557,13 @@ def check_logical_slots_readiness(self, cluster: Cluster, replicatefrom: Optiona 3) store logical slot ``catalog_xmin`` when the physical slot ``catalog_xmin`` becomes valid. :param cluster: object containing stateful information for the cluster. - :param replicatefrom: name of the member that should be used to replicate from. + :param tags: reference to an object implementing :class:`Tags` interface. :returns: ``False`` if any issue while checking logical slots readiness, ``True`` otherwise. """ catalog_xmin = None if self._logical_slots_processing_queue and cluster.leader: - slot_name = cluster.get_my_slot_name_on_primary(self._postgresql.name, replicatefrom) + slot_name = cluster.get_slot_name_on_primary(self._postgresql.name, tags) try: with self._get_leader_connection_cursor(cluster.leader) as cur: cur.execute("SELECT slot_name, catalog_xmin FROM pg_catalog.pg_get_replication_slots()" @@ -645,16 +641,17 @@ def _ready_logical_slots(self, primary_physical_catalog_xmin: Optional[int] = No if standby_logical_slot: logger.info('Logical slot %s is safe to be used after a failover', name) - def copy_logical_slots(self, cluster: Cluster, create_slots: List[str]) -> None: + def copy_logical_slots(self, cluster: Cluster, tags: Tags, create_slots: List[str]) -> None: """Create logical replication slots on standby nodes. :param cluster: object containing stateful information for the cluster. + :param tags: reference to an object implementing :class:`Tags` interface. :param create_slots: list of slot names to copy from the primary. """ leader = cluster.leader if not leader: return - slots = cluster.get_replication_slots(self._postgresql.name, 'replica', False, self._postgresql.major_version) + slots = cluster.get_replication_slots(self._postgresql, tags, role='replica') copy_slots: Dict[str, Dict[str, Any]] = {} with self._get_leader_connection_cursor(leader) as cur: try: diff --git a/patroni/postgresql/sync.py b/patroni/postgresql/sync.py index 9cff04e02..577422b5d 100644 --- a/patroni/postgresql/sync.py +++ b/patroni/postgresql/sync.py @@ -5,6 +5,7 @@ from copy import deepcopy from typing import Collection, List, NamedTuple, Tuple, TYPE_CHECKING +from .. import global_config from ..collections import CaseInsensitiveDict, CaseInsensitiveSet from ..dcs import Cluster from ..psycopg import quote_ident as _quote_ident @@ -303,11 +304,8 @@ def current_state(self, cluster: Cluster) -> Tuple[CaseInsensitiveSet, CaseInsen replica_list = _ReplicaList(self._postgresql, cluster) self._process_replica_readiness(cluster, replica_list) - if TYPE_CHECKING: # pragma: no cover - assert self._postgresql.global_config is not None - sync_node_count = self._postgresql.global_config.synchronous_node_count\ - if self._postgresql.supports_multiple_sync else 1 - sync_node_maxlag = self._postgresql.global_config.maximum_lag_on_syncnode + sync_node_count = global_config.synchronous_node_count if self._postgresql.supports_multiple_sync else 1 + sync_node_maxlag = global_config.maximum_lag_on_syncnode candidates = CaseInsensitiveSet() sync_nodes = CaseInsensitiveSet() diff --git a/patroni/psycopg.py b/patroni/psycopg.py index 4a92047ca..5d47ad5c5 100644 --- a/patroni/psycopg.py +++ b/patroni/psycopg.py @@ -9,7 +9,8 @@ from psycopg import Connection from psycopg2 import connection, cursor -__all__ = ['connect', 'quote_ident', 'quote_literal', 'DatabaseError', 'Error', 'OperationalError', 'ProgrammingError'] +__all__ = ['connect', 'quote_ident', 'quote_literal', 'DatabaseError', 'Error', 'OperationalError', 'ProgrammingError', + 'DuplicateDatabase'] _legacy = False try: @@ -18,6 +19,7 @@ if parse_version(__version__) < MIN_PSYCOPG2: raise ImportError from psycopg2 import connect as _connect, Error, DatabaseError, OperationalError, ProgrammingError + from psycopg2.errors import DuplicateDatabase from psycopg2.extensions import adapt try: @@ -43,6 +45,7 @@ def quote_literal(value: Any, conn: Optional[Any] = None) -> str: return value.getquoted().decode('utf-8') except ImportError: from psycopg import connect as __connect, sql, Error, DatabaseError, OperationalError, ProgrammingError + from psycopg.errors import DuplicateDatabase def _connect(dsn: Optional[str] = None, **kwargs: Any) -> 'Connection[Any]': """Call :func:`psycopg.connect` with *dsn* and ``**kwargs``. diff --git a/patroni/tags.py b/patroni/tags.py index 998ff6934..eedc96742 100644 --- a/patroni/tags.py +++ b/patroni/tags.py @@ -22,14 +22,18 @@ def _filter_tags(tags: Dict[str, Any]) -> Dict[str, Any]: A custom tag is any tag added to the configuration ``tags`` section that is not one of ``clonefrom``, ``nofailover``, ``noloadbalance`` or ``nosync``. - For the Patroni predefined tags, the returning object will only contain them if they are enabled as they - all are boolean values that default to disabled. + For most of the Patroni predefined tags, the returning object will only contain them if they are enabled as + they all are boolean values that default to disabled. + However ``nofailover`` tag is always returned if ``failover_priority`` tag is defined. In this case, we need + both values to see if they are contradictory and the ``nofailover`` value should be used. :returns: a dictionary of tags set for this node. The key is the tag name, and the value is the corresponding tag value. """ return {tag: value for tag, value in tags.items() - if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value} + if any((tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync'), + value, + tag == 'nofailover' and 'failover_priority' in tags))} @property @abc.abstractmethod diff --git a/patroni/utils.py b/patroni/utils.py index 6957369fd..2b1ded5b8 100644 --- a/patroni/utils.py +++ b/patroni/utils.py @@ -33,7 +33,6 @@ if TYPE_CHECKING: # pragma: no cover from .dcs import Cluster - from .config import GlobalConfig tzutc = tz.tzutc() @@ -401,22 +400,23 @@ def parse_real(value: Any, base_unit: Optional[str] = None) -> Optional[float]: return convert_to_base_unit(val, unit, base_unit) -def compare_values(vartype: str, unit: Optional[str], old_value: Any, new_value: Any) -> bool: - """Check if *old_value* and *new_value* are equivalent after parsing them as *vartype*. +def compare_values(vartype: str, unit: Optional[str], settings_value: Any, config_value: Any) -> bool: + """Check if the value from ``pg_settings`` and from Patroni config are equivalent after parsing them as *vartype*. - :param vartpe: the target type to parse *old_value* and *new_value* before comparing them. Accepts any among of the - following (case sensitive): + :param vartype: the target type to parse *settings_value* and *config_value* before comparing them. + Accepts any among of the following (case sensitive): * ``bool``: parse values using :func:`parse_bool`; or * ``integer``: parse values using :func:`parse_int`; or * ``real``: parse values using :func:`parse_real`; or * ``enum``: parse values as lowercase strings; or * ``string``: parse values as strings. This one is used by default if no valid value is passed as *vartype*. - :param unit: base unit to be used as argument when calling :func:`parse_int` or :func:`parse_real` for *new_value*. - :param old_value: value to be compared with *new_value*. - :param new_value: value to be compared with *old_value*. + :param unit: base unit to be used as argument when calling :func:`parse_int` or :func:`parse_real` + for *config_value*. + :param settings_value: value to be compared with *config_value*. + :param config_value: value to be compared with *settings_value*. - :returns: ``True`` if *old_value* is equivalent to *new_value* when both are parsed as *vartype*. + :returns: ``True`` if *settings_value* is equivalent to *config_value* when both are parsed as *vartype*. :Example: @@ -456,8 +456,8 @@ def compare_values(vartype: str, unit: Optional[str], old_value: Any, new_value: } converter = converters.get(vartype) or converters['string'] - old_converted = converter(old_value, None) - new_converted = converter(new_value, unit) + old_converted = converter(settings_value, None) + new_converted = converter(config_value, unit) return old_converted is not None and new_converted is not None and old_converted == new_converted @@ -759,12 +759,10 @@ def iter_response_objects(response: HTTPResponse) -> Iterator[Dict[str, Any]]: prev = chunk[idx:] -def cluster_as_json(cluster: 'Cluster', global_config: Optional['GlobalConfig'] = None) -> Dict[str, Any]: +def cluster_as_json(cluster: 'Cluster') -> Dict[str, Any]: """Get a JSON representation of *cluster*. :param cluster: the :class:`~patroni.dcs.Cluster` object to be parsed as JSON. - :param global_config: optional :class:`~patroni.config.GlobalConfig` object to check the cluster state. - if not provided will be instantiated from the `Cluster.config`. :returns: JSON representation of *cluster*. @@ -793,16 +791,16 @@ def cluster_as_json(cluster: 'Cluster', global_config: Optional['GlobalConfig'] * ``from``: name of the member to be demoted; * ``to``: name of the member to be promoted. """ - if not global_config: - from patroni.config import get_global_config - global_config = get_global_config(cluster) + from . import global_config + + config = global_config.from_cluster(cluster) leader_name = cluster.leader.name if cluster.leader else None cluster_lsn = cluster.last_lsn or 0 ret: Dict[str, Any] = {'members': []} for m in cluster.members: if m.name == leader_name: - role = 'standby_leader' if global_config.is_standby_cluster else 'leader' + role = 'standby_leader' if config.is_standby_cluster else 'leader' elif cluster.sync.matches(m.name): role = 'sync_standby' else: @@ -832,7 +830,7 @@ def cluster_as_json(cluster: 'Cluster', global_config: Optional['GlobalConfig'] # sort members by name for consistency cmp: Callable[[Dict[str, Any]], bool] = lambda m: m['name'] ret['members'].sort(key=cmp) - if global_config.is_paused: + if config.is_paused: ret['pause'] = True if cluster.failover and cluster.failover.scheduled_at: ret['scheduled_switchover'] = {'at': cluster.failover.scheduled_at.isoformat()} diff --git a/patroni/validator.py b/patroni/validator.py index 016f4b2e7..bd69cc0e7 100644 --- a/patroni/validator.py +++ b/patroni/validator.py @@ -9,7 +9,7 @@ import shutil import socket -from typing import Any, Dict, Union, Iterator, List, Optional as OptionalType, Tuple +from typing import Any, Dict, Union, Iterator, List, Optional as OptionalType, Tuple, TYPE_CHECKING from .collections import CaseInsensitiveSet @@ -200,6 +200,8 @@ def get_bin_name(bin_name: str) -> str: :returns: value of ``postgresql.bin_name[*bin_name*]``, if present, otherwise *bin_name*. """ + if TYPE_CHECKING: # pragma: no cover + assert isinstance(schema.data, dict) return (schema.data.get('postgresql', {}).get('bin_name', {}) or {}).get(bin_name, bin_name) @@ -239,6 +241,8 @@ def validate_data_dir(data_dir: str) -> bool: if not os.path.isdir(os.path.join(data_dir, waldir)): raise ConfigParseError("data dir for the cluster is not empty, but doesn't contain" " \"{}\" directory".format(waldir)) + if TYPE_CHECKING: # pragma: no cover + assert isinstance(schema.data, dict) bin_dir = schema.data.get("postgresql", {}).get("bin_dir", None) major_version = get_major_version(bin_dir, get_bin_name('postgres')) if pgversion != major_version: @@ -274,6 +278,8 @@ def validate_binary_name(bin_name: str) -> bool: """ if not bin_name: raise ConfigParseError("is an empty string") + if TYPE_CHECKING: # pragma: no cover + assert isinstance(schema.data, dict) bin_dir = schema.data.get('postgresql', {}).get('bin_dir', None) if not shutil.which(bin_name, path=bin_dir): raise ConfigParseError(f"does not contain '{bin_name}' in '{bin_dir or '$PATH'}'") @@ -523,7 +529,7 @@ class Schema(object): * :class:`dict`: dictionary representing the YAML configuration tree. """ - def __init__(self, validator: Any) -> None: + def __init__(self, validator: Union[Dict[Any, Any], List[Any], Any]) -> None: """Create a :class:`Schema` object. .. note:: @@ -614,7 +620,7 @@ def __call__(self, data: Any) -> List[str]: errors.append(str(i)) return errors - def validate(self, data: Any) -> Iterator[Result]: + def validate(self, data: Union[Dict[Any, Any], Any]) -> Iterator[Result]: """Perform all validations from the schema against the given configuration. It first checks that *data* argument type is compliant with the type of ``validator`` attribute. @@ -638,11 +644,8 @@ def validate(self, data: Any) -> Iterator[Result]: # iterable objects in the structure, until we eventually reach a leaf node to validate its value. if isinstance(self.validator, str): yield Result(isinstance(self.data, str), "is not a string", level=1, data=self.data) - elif issubclass(type(self.validator), type): - validator = self.validator - if self.validator == str: - validator = str - yield Result(isinstance(self.data, validator), + elif isinstance(self.validator, type): + yield Result(isinstance(self.data, self.validator), "is not {}".format(_get_type_name(self.validator)), level=1, data=self.data) elif callable(self.validator): if hasattr(self.validator, "expected_type"): @@ -689,7 +692,7 @@ def iter(self) -> Iterator[Result]: for v in Schema(self.validator[0]).validate(value): yield Result(v.status, v.error, path=(str(key) + ("." + v.path if v.path else "")), level=v.level, data=value) - elif isinstance(self.validator, Directory): + elif isinstance(self.validator, Directory) and isinstance(self.data, str): yield from self.validator.validate(self.data) elif isinstance(self.validator, Or): yield from self.iter_or() @@ -701,6 +704,9 @@ def iter_dict(self) -> Iterator[Result]: """ # One key in `validator` attribute (`key` variable) can be mapped to one or more keys in `data` attribute (`d` # variable), depending on the `key` type. + if TYPE_CHECKING: # pragma: no cover + assert isinstance(self.validator, dict) + assert isinstance(self.data, dict) for key in self.validator.keys(): if isinstance(key, AtMostOne) and len(list(self._data_key(key))) > 1: yield Result(False, f"Multiple of {key.args} provided") @@ -730,6 +736,8 @@ def iter_or(self) -> Iterator[Result]: :yields: objects with the error message related to the failure, if any check fails. """ + if TYPE_CHECKING: # pragma: no cover + assert isinstance(self.validator, Or) results: List[Result] = [] for a in self.validator.args: r: List[Result] = [] @@ -766,7 +774,7 @@ def _data_key(self, key: Union[str, Optional, Or, AtMostOne]) -> Iterator[str]: yield key.name # If the key was defined as an `Or` object in `validator` attribute, then each of its values are the keys to # access the `data` dictionary. - elif isinstance(key, Or): + elif isinstance(key, Or) and isinstance(self.data, dict): # At least one of the `Or` entries should be available in the `data` dictionary. If we find at least one of # them in `data`, then we return all found entries so the caller method can validate them all. if any([item in self.data for item in key.args]): @@ -780,7 +788,7 @@ def _data_key(self, key: Union[str, Optional, Or, AtMostOne]) -> Iterator[str]: yield item # If the key was defined as a `AtMostOne` object in `validator` attribute, then each of its values # are the keys to access the `data` dictionary. - elif isinstance(key, AtMostOne): + elif isinstance(key, AtMostOne) and isinstance(self.data, dict): # Yield back all of the entries from the `data` dictionary, each will be validated and then counted # to inform us if we've provided too many for item in key.args: @@ -929,6 +937,18 @@ def validate_watchdog_mode(value: Any) -> None: schema = Schema({ "name": str, "scope": str, + Optional("log"): { + Optional("level"): EnumValidator(('DEBUG', 'INFO', 'WARN', 'WARNING', 'ERROR', 'FATAL', 'CRITICAL'), + case_sensitive=True, raise_assert=True), + Optional("traceback_level"): EnumValidator(('DEBUG', 'ERROR'), raise_assert=True), + Optional("format"): str, + Optional("dateformat"): str, + Optional("max_queue_size"): int, + Optional("dir"): str, + Optional("file_num"): int, + Optional("file_size"): int, + Optional("loggers"): dict + }, Optional("ctl"): { Optional("insecure"): bool, Optional("cacert"): str, diff --git a/patroni/version.py b/patroni/version.py index e5bcac2dd..96592c7da 100644 --- a/patroni/version.py +++ b/patroni/version.py @@ -2,4 +2,4 @@ :var __version__: the current Patroni version. """ -__version__ = '3.2.0' +__version__ = '3.2.1' diff --git a/postgres0.yml b/postgres0.yml index 8a975156c..84796a469 100644 --- a/postgres0.yml +++ b/postgres0.yml @@ -132,7 +132,7 @@ postgresql: # safety_margin: 5 tags: - nofailover: false + # failover_priority: 1 noloadbalance: false clonefrom: false nosync: false diff --git a/postgres1.yml b/postgres1.yml index 6ca2aa646..c86e8790d 100644 --- a/postgres1.yml +++ b/postgres1.yml @@ -124,6 +124,6 @@ postgresql: #pre_promote: /path/to/pre_promote.sh tags: - nofailover: false + # failover_priority: 1 noloadbalance: false clonefrom: false diff --git a/postgres2.yml b/postgres2.yml index ee61a0232..7384568ec 100644 --- a/postgres2.yml +++ b/postgres2.yml @@ -114,7 +114,7 @@ postgresql: # krb_server_keyfile: /var/spool/keytabs/postgres unix_socket_directories: '..' # parent directory of data_dir tags: - nofailover: false + # failover_priority: 1 noloadbalance: false clonefrom: false # replicatefrom: postgresql1 diff --git a/tests/__init__.py b/tests/__init__.py index bd70ba3d8..2f3730f69 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,6 +12,7 @@ from patroni.dcs import Leader, Member from patroni.postgresql import Postgresql from patroni.postgresql.config import ConfigHandler +from patroni.postgresql.mpp import get_mpp from patroni.utils import RetryFailedError, tzutc @@ -25,8 +26,41 @@ class SleepException(Exception): 'max_wal_senders', 'max_worker_processes', 'port', 'search_path', 'shared_preload_libraries', 'stats_temp_directory', 'synchronous_standby_names', 'track_commit_timestamp', 'unix_socket_directories', 'vacuum_cost_delay', 'vacuum_cost_limit', 'wal_keep_size', 'wal_level', 'wal_log_hints', 'zero_damaged_pages', + 'autovacuum', 'wal_segment_size', 'wal_block_size', 'shared_buffers', 'wal_buffers', }) +GET_PG_SETTINGS_RESULT = [ + ('wal_segment_size', '2048', '8kB', 'integer', 'internal'), + ('wal_block_size', '8192', None, 'integer', 'internal'), + ('shared_buffers', '16384', '8kB', 'integer', 'postmaster'), + ('wal_buffers', '-1', '8kB', 'integer', 'postmaster'), + ('max_connections', '100', None, 'integer', 'postmaster'), + ('max_prepared_transactions', '200', None, 'integer', 'postmaster'), + ('max_worker_processes', '8', None, 'integer', 'postmaster'), + ('max_locks_per_transaction', '64', None, 'integer', 'postmaster'), + ('max_wal_senders', '5', None, 'integer', 'postmaster'), + ('search_path', 'public', None, 'string', 'user'), + ('port', '5432', None, 'integer', 'postmaster'), + ('listen_addresses', '127.0.0.2, 127.0.0.3', None, 'string', 'postmaster'), + ('autovacuum', 'on', None, 'bool', 'sighup'), + ('unix_socket_directories', '/tmp', None, 'string', 'postmaster'), + ('shared_preload_libraries', 'citus', None, 'string', 'postmaster'), + ('wal_keep_size', '128', 'MB', 'integer', 'sighup'), + ('cluster_name', 'batman', None, 'string', 'postmaster'), + ('vacuum_cost_delay', '200', 'ms', 'real', 'user'), + ('vacuum_cost_limit', '-1', None, 'integer', 'user'), + ('max_stack_depth', '2048', 'kB', 'integer', 'superuser'), + ('constraint_exclusion', '', None, 'enum', 'user'), + ('force_parallel_mode', '1', None, 'enum', 'user'), + ('zero_damaged_pages', 'off', None, 'bool', 'superuser'), + ('stats_temp_directory', '/tmp', None, 'string', 'sighup'), + ('track_commit_timestamp', 'off', None, 'bool', 'postmaster'), + ('wal_log_hints', 'on', None, 'bool', 'superuser'), + ('hot_standby', 'on', None, 'bool', 'superuser'), + ('max_replication_slots', '5', None, 'integer', 'superuser'), + ('wal_level', 'logical', None, 'enum', 'superuser'), +] + class MockResponse(object): @@ -95,6 +129,8 @@ def execute(self, sql, *params): sql = sql.decode('utf-8') if sql.startswith('blabla'): raise psycopg.ProgrammingError() + if sql.startswith('CREATE DATABASE'): + raise psycopg.DuplicateDatabase() elif sql == 'CHECKPOINT' or sql.startswith('SELECT pg_catalog.pg_create_'): raise psycopg.OperationalError() elif sql.startswith('RetryFailedError'): @@ -133,22 +169,9 @@ def execute(self, sql, *params): ('archive_command', 'my archive command'), ('cluster_name', 'my_cluster')] elif sql.startswith('SELECT name, setting'): - self.results = [('wal_segment_size', '2048', '8kB', 'integer', 'internal'), - ('wal_block_size', '8192', None, 'integer', 'internal'), - ('shared_buffers', '16384', '8kB', 'integer', 'postmaster'), - ('wal_buffers', '-1', '8kB', 'integer', 'postmaster'), - ('max_connections', '100', None, 'integer', 'postmaster'), - ('max_prepared_transactions', '0', None, 'integer', 'postmaster'), - ('max_worker_processes', '8', None, 'integer', 'postmaster'), - ('max_locks_per_transaction', '64', None, 'integer', 'postmaster'), - ('max_wal_senders', '5', None, 'integer', 'postmaster'), - ('search_path', 'public', None, 'string', 'user'), - ('port', '5433', None, 'integer', 'postmaster'), - ('listen_addresses', '*', None, 'string', 'postmaster'), - ('autovacuum', 'on', None, 'bool', 'sighup'), - ('unix_socket_directories', '/tmp', None, 'string', 'postmaster')] + self.results = GET_PG_SETTINGS_RESULT elif sql.startswith('SELECT COUNT(*) FROM pg_catalog.pg_settings'): - self.results = [(1,)] + self.results = [(0,)] elif sql.startswith('IDENTIFY_SYSTEM'): self.results = [('1', 3, '0/402EEC0', '')] elif sql.startswith('TIMELINE_HISTORY '): @@ -218,11 +241,11 @@ class PostgresInit(unittest.TestCase): _PARAMETERS = {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'f.oo': 'bar', 'search_path': 'public', 'hot_standby': 'on', 'max_wal_senders': 5, 'wal_keep_segments': 8, 'wal_log_hints': 'on', 'max_locks_per_transaction': 64, - 'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 0, + 'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 200, 'track_commit_timestamp': 'off', 'unix_socket_directories': '/tmp', - 'trigger_file': 'bla', 'stats_temp_directory': '/tmp', 'zero_damaged_pages': '', + 'trigger_file': 'bla', 'stats_temp_directory': '/tmp', 'zero_damaged_pages': 'off', 'force_parallel_mode': '1', 'constraint_exclusion': '', - 'max_stack_depth': 'Z', 'vacuum_cost_limit': -1, 'vacuum_cost_delay': 200} + 'max_stack_depth': 2048, 'vacuum_cost_limit': -1, 'vacuum_cost_delay': 200} @patch('patroni.psycopg._connect', psycopg_connect) @patch('patroni.postgresql.CallbackExecutor', Mock()) @@ -232,23 +255,24 @@ class PostgresInit(unittest.TestCase): @patch.object(Postgresql, 'get_postgres_role_from_data_directory', Mock(return_value='primary')) def setUp(self): data_dir = os.path.join('data', 'test0') - self.p = Postgresql({'name': 'postgresql0', 'scope': 'batman', 'data_dir': data_dir, - 'config_dir': data_dir, 'retry_timeout': 10, - 'krbsrvname': 'postgres', 'pgpass': os.path.join(data_dir, 'pgpass0'), - 'listen': '127.0.0.2, 127.0.0.3:5432', - 'connect_address': '127.0.0.2:5432', 'proxy_address': '127.0.0.2:5433', - 'authentication': {'superuser': {'username': 'foo', 'password': 'test'}, - 'replication': {'username': '', 'password': 'rep-pass'}, - 'rewind': {'username': 'rewind', 'password': 'test'}}, - 'remove_data_directory_on_rewind_failure': True, - 'use_pg_rewind': True, 'pg_ctl_timeout': 'bla', 'use_unix_socket': True, - 'parameters': self._PARAMETERS, - 'recovery_conf': {'foo': 'bar'}, - 'pg_hba': ['host all all 0.0.0.0/0 md5'], - 'pg_ident': ['krb realm postgres'], - 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_reload': 'true', - 'on_restart': 'true', 'on_role_change': 'true'}, - 'citus': {'group': 0, 'database': 'citus'}}) + config = {'name': 'postgresql0', 'scope': 'batman', 'data_dir': data_dir, + 'config_dir': data_dir, 'retry_timeout': 10, + 'krbsrvname': 'postgres', 'pgpass': os.path.join(data_dir, 'pgpass0'), + 'listen': '127.0.0.2, 127.0.0.3:5432', + 'connect_address': '127.0.0.2:5432', 'proxy_address': '127.0.0.2:5433', + 'authentication': {'superuser': {'username': 'foo', 'password': 'test'}, + 'replication': {'username': '', 'password': 'rep-pass'}, + 'rewind': {'username': 'rewind', 'password': 'test'}}, + 'remove_data_directory_on_rewind_failure': True, + 'use_pg_rewind': True, 'pg_ctl_timeout': 'bla', 'use_unix_socket': True, + 'parameters': self._PARAMETERS, + 'recovery_conf': {'foo': 'bar'}, + 'pg_hba': ['host all all 0.0.0.0/0 md5'], + 'pg_ident': ['krb realm postgres'], + 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_reload': 'true', + 'on_restart': 'true', 'on_role_change': 'true'}, + 'citus': {'group': 0, 'database': 'citus'}} + self.p = Postgresql(config, get_mpp(config)) class BaseTestPostgresql(PostgresInit): diff --git a/tests/test_api.py b/tests/test_api.py index 71c566cd4..234a6824f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -8,8 +8,8 @@ from mock import Mock, PropertyMock, patch from socketserver import ThreadingMixIn +from patroni import global_config from patroni.api import RestApiHandler, RestApiServer -from patroni.config import GlobalConfig from patroni.dcs import ClusterConfig, Member from patroni.exceptions import PostgresConnectionException from patroni.ha import _MemberStatus @@ -148,16 +148,9 @@ class MockLogger(object): records_lost = 1 -class MockConfig(object): - - def get_global_config(self, _): - return GlobalConfig({}) - - class MockPatroni(object): ha = MockHa() - config = MockConfig() postgresql = ha.state_handler dcs = Mock() logger = MockLogger() @@ -211,7 +204,7 @@ class TestRestApiHandler(unittest.TestCase): def test_do_GET(self): MockPatroni.dcs.cluster.last_lsn = 20 MockPatroni.dcs.cluster.sync.members = [MockPostgresql.name] - with patch.object(GlobalConfig, 'is_synchronous_mode', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_synchronous_mode', PropertyMock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /replica') MockRestApiServer(RestApiHandler, 'GET /replica?lag=1M') MockRestApiServer(RestApiHandler, 'GET /replica?lag=10MB') @@ -234,7 +227,7 @@ def test_do_GET(self): with patch.object(MockHa, 'is_leader', Mock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /replica') MockRestApiServer(RestApiHandler, 'GET /read-only-sync') - with patch.object(GlobalConfig, 'is_standby_cluster', Mock(return_value=True)): + with patch.object(global_config.__class__, 'is_standby_cluster', Mock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /standby_leader') MockPatroni.dcs.cluster = None with patch.object(RestApiHandler, 'get_postgresql_status', Mock(return_value={'role': 'primary'})): @@ -244,8 +237,8 @@ def test_do_GET(self): self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /primary')) with patch.object(RestApiServer, 'query', Mock(return_value=[('', 1, '', '', '', '', False, None, None, '')])): self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /patroni')) - with patch.object(GlobalConfig, 'is_standby_cluster', Mock(return_value=True)), \ - patch.object(GlobalConfig, 'is_paused', Mock(return_value=True)): + with patch.object(global_config.__class__, 'is_standby_cluster', Mock(return_value=True)), \ + patch.object(global_config.__class__, 'is_paused', Mock(return_value=True)): MockRestApiServer(RestApiHandler, 'GET /standby_leader') # test tags @@ -475,7 +468,7 @@ def make_request(request=None, **kwargs): request = make_request(role='primary', postgres_version='9.5.2') MockRestApiServer(RestApiHandler, request) - with patch.object(GlobalConfig, 'is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): MockRestApiServer(RestApiHandler, make_request(schedule='2016-08-42 12:45TZ+1', role='primary')) # Valid timeout MockRestApiServer(RestApiHandler, make_request(timeout='60s')) @@ -537,7 +530,7 @@ def test_do_POST_switchover(self, dcs): # Switchover in pause mode with patch.object(RestApiHandler, 'write_response') as response_mock, \ - patch.object(GlobalConfig, 'is_paused', PropertyMock(return_value=True)): + patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with( 400, 'Switchover is possible only to a specific candidate in a paused state') @@ -546,7 +539,8 @@ def test_do_POST_switchover(self, dcs): for is_synchronous_mode, response in ( (True, 'switchover is not possible: can not find sync_standby'), (False, 'switchover is not possible: cluster does not have members except leader')): - with patch.object(GlobalConfig, 'is_synchronous_mode', PropertyMock(return_value=is_synchronous_mode)), \ + with patch.object(global_config.__class__, 'is_synchronous_mode', + PropertyMock(return_value=is_synchronous_mode)), \ patch.object(RestApiHandler, 'write_response') as response_mock: MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with(412, response) @@ -571,7 +565,8 @@ def test_do_POST_switchover(self, dcs): cluster.sync.matches.return_value = False for is_synchronous_mode, response in ( (True, 'candidate name does not match with sync_standby'), (False, 'candidate does not exists')): - with patch.object(GlobalConfig, 'is_synchronous_mode', PropertyMock(return_value=is_synchronous_mode)), \ + with patch.object(global_config.__class__, 'is_synchronous_mode', + PropertyMock(return_value=is_synchronous_mode)), \ patch.object(RestApiHandler, 'write_response') as response_mock: MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with(412, response) @@ -632,7 +627,7 @@ def test_do_POST_switchover(self, dcs): # Schedule in paused mode with patch.object(RestApiHandler, 'write_response') as response_mock, \ - patch.object(GlobalConfig, 'is_paused', PropertyMock(return_value=True)): + patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): dcs.manual_failover.return_value = False MockRestApiServer(RestApiHandler, request) response_mock.assert_called_with(400, "Can't schedule switchover in the paused state") diff --git a/tests/test_barman_recover.py b/tests/test_barman_recover.py index 4334d6ae9..c0efda838 100644 --- a/tests/test_barman_recover.py +++ b/tests/test_barman_recover.py @@ -1,5 +1,6 @@ import logging -from mock import MagicMock, Mock, call, patch +import mock +from mock import MagicMock, Mock, patch import unittest from urllib3.exceptions import MaxRetryError @@ -156,18 +157,19 @@ def test__create_recovery_operation(self, mock_post_request, mock_sleep, mock_lo "Maximum number of retries exceeded for method BarmanRecover._create_recovery_operation.") self.assertEqual(mock_sleep.call_count, self.br.max_retries) - mock_sleep.assert_has_calls([call(self.br.retry_wait)] * self.br.max_retries) + + mock_sleep.assert_has_calls([mock.call(self.br.retry_wait)] * self.br.max_retries) self.assertEqual(mock_logging.call_count, self.br.max_retries) for i in range(mock_logging.call_count): - call_args = mock_logging.mock_calls[i].args + call_args = mock_logging.call_args_list[i][0] self.assertEqual(len(call_args), 5) self.assertEqual(call_args[0], "Attempt %d of %d on method %s failed with %r.") self.assertEqual(call_args[1], i + 1) self.assertEqual(call_args[2], self.br.max_retries) self.assertEqual(call_args[3], "BarmanRecover._create_recovery_operation") self.assertIsInstance(call_args[4], KeyError) - self.assertEqual(repr(call_args[4]), "KeyError('operation_id')") + self.assertEqual(call_args[4].args, ('operation_id',)) @patch("logging.warning") @patch("time.sleep") @@ -190,18 +192,18 @@ def test__get_recovery_operation_status(self, mock_get_request, mock_sleep, mock "Maximum number of retries exceeded for method BarmanRecover._get_recovery_operation_status.") self.assertEqual(mock_sleep.call_count, self.br.max_retries) - mock_sleep.assert_has_calls([call(self.br.retry_wait)] * self.br.max_retries) + mock_sleep.assert_has_calls([mock.call(self.br.retry_wait)] * self.br.max_retries) self.assertEqual(mock_logging.call_count, self.br.max_retries) for i in range(mock_logging.call_count): - call_args = mock_logging.mock_calls[i].args + call_args = mock_logging.call_args_list[i][0] self.assertEqual(len(call_args), 5) self.assertEqual(call_args[0], "Attempt %d of %d on method %s failed with %r.") self.assertEqual(call_args[1], i + 1) self.assertEqual(call_args[2], self.br.max_retries) self.assertEqual(call_args[3], "BarmanRecover._get_recovery_operation_status") self.assertIsInstance(call_args[4], KeyError) - self.assertEqual(repr(call_args[4]), "KeyError('status')") + self.assertEqual(call_args[4].args, ('status',)) @patch.object(BarmanRecover, "_get_recovery_operation_status") @patch("time.sleep") @@ -232,16 +234,16 @@ def test_restore_backup(self, mock_create_op, mock_log_critical, mock_log_info, mock_create_op.assert_called_once() self.assertEqual(mock_get_status.call_count, 21) - mock_get_status.assert_has_calls([call("some_id")] * 21) + mock_get_status.assert_has_calls([mock.call("some_id")] * 21) self.assertEqual(mock_log_info.call_count, 21) - mock_log_info.assert_has_calls([call("Created the recovery operation with ID %s", "some_id")] - + [call("Recovery operation %s is still in progress", "some_id")] * 20) + mock_log_info.assert_has_calls([mock.call("Created the recovery operation with ID %s", "some_id")] + + [mock.call("Recovery operation %s is still in progress", "some_id")] * 20) mock_log_critical.assert_not_called() self.assertEqual(mock_sleep.call_count, 20) - mock_sleep.assert_has_calls([call(LOOP_WAIT)] * 20) + mock_sleep.assert_has_calls([mock.call(LOOP_WAIT)] * 20) # failed fast restore mock_create_op.reset_mock() @@ -271,16 +273,16 @@ def test_restore_backup(self, mock_create_op, mock_log_critical, mock_log_info, mock_create_op.assert_called_once() self.assertEqual(mock_get_status.call_count, 21) - mock_get_status.assert_has_calls([call("some_id")] * 21) + mock_get_status.assert_has_calls([mock.call("some_id")] * 21) self.assertEqual(mock_log_info.call_count, 21) - mock_log_info.assert_has_calls([call("Created the recovery operation with ID %s", "some_id")] - + [call("Recovery operation %s is still in progress", "some_id")] * 20) + mock_log_info.assert_has_calls([mock.call("Created the recovery operation with ID %s", "some_id")] + + [mock.call("Recovery operation %s is still in progress", "some_id")] * 20) mock_log_critical.assert_not_called() self.assertEqual(mock_sleep.call_count, 20) - mock_sleep.assert_has_calls([call(LOOP_WAIT)] * 20) + mock_sleep.assert_has_calls([mock.call(LOOP_WAIT)] * 20) # create retries exceeded mock_log_info.reset_mock() diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index 4c2d1c982..8724b03cb 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -179,10 +179,17 @@ def test_bootstrap(self): @patch.object(Postgresql, 'controldata', Mock(return_value={'Database cluster state': 'in production'})) def test_custom_bootstrap(self, mock_cancellable_subprocess_call): self.p.config._config.pop('pg_hba') - config = {'method': 'foo', 'foo': {'command': 'bar'}} + config = {'method': 'foo', 'foo': {'command': 'bar --arg1=val1'}} mock_cancellable_subprocess_call.return_value = 1 self.assertFalse(self.b.bootstrap(config)) + self.assertEqual(mock_cancellable_subprocess_call.call_args_list[0][0][0], + ['bar', '--arg1=val1', '--scope=batman', '--datadir=' + os.path.join('data', 'test0')]) + + mock_cancellable_subprocess_call.reset_mock() + config['foo']['no_params'] = 1 + self.assertFalse(self.b.bootstrap(config)) + self.assertEqual(mock_cancellable_subprocess_call.call_args_list[0][0][0], ['bar', '--arg1=val1']) mock_cancellable_subprocess_call.return_value = 0 with patch('multiprocessing.Process', Mock(side_effect=Exception("42"))), \ diff --git a/tests/test_citus.py b/tests/test_citus.py index 7279893e0..f1d8a020f 100644 --- a/tests/test_citus.py +++ b/tests/test_citus.py @@ -1,12 +1,12 @@ import time from mock import Mock, patch -from patroni.postgresql.citus import CitusHandler +from patroni.postgresql.mpp.citus import CitusHandler from . import BaseTestPostgresql, MockCursor, psycopg_connect, SleepException from .test_ha import get_cluster_initialized_with_leader -@patch('patroni.postgresql.citus.Thread', Mock()) +@patch('patroni.postgresql.mpp.citus.Thread', Mock()) @patch('patroni.psycopg.connect', psycopg_connect) class TestCitus(BaseTestPostgresql): @@ -17,9 +17,9 @@ def setUp(self): self.cluster.workers[1] = self.cluster @patch('time.time', Mock(side_effect=[100, 130, 160, 190, 220, 250, 280, 310, 340, 370])) - @patch('patroni.postgresql.citus.logger.exception', Mock(side_effect=SleepException)) - @patch('patroni.postgresql.citus.logger.warning') - @patch('patroni.postgresql.citus.PgDistNode.wait', Mock()) + @patch('patroni.postgresql.mpp.citus.logger.exception', Mock(side_effect=SleepException)) + @patch('patroni.postgresql.mpp.citus.logger.warning') + @patch('patroni.postgresql.mpp.citus.PgDistNode.wait', Mock()) @patch.object(CitusHandler, 'is_alive', Mock(return_value=True)) def test_run(self, mock_logger_warning): # `before_demote` or `before_promote` REST API calls starting a @@ -39,10 +39,10 @@ def test_run(self, mock_logger_warning): @patch.object(CitusHandler, 'is_alive', Mock(return_value=False)) @patch.object(CitusHandler, 'start', Mock()) - def test_sync_pg_dist_node(self): + def test_sync_meta_data(self): with patch.object(CitusHandler, 'is_enabled', Mock(return_value=False)): - self.c.sync_pg_dist_node(self.cluster) - self.c.sync_pg_dist_node(self.cluster) + self.c.sync_meta_data(self.cluster) + self.c.sync_meta_data(self.cluster) def test_handle_event(self): self.c.handle_event(self.cluster, {}) @@ -51,22 +51,22 @@ def test_handle_event(self): 'leader': 'leader', 'timeout': 30, 'cooldown': 10}) def test_add_task(self): - with patch('patroni.postgresql.citus.logger.error') as mock_logger, \ - patch('patroni.postgresql.citus.urlparse', Mock(side_effect=Exception)): + with patch('patroni.postgresql.mpp.citus.logger.error') as mock_logger, \ + patch('patroni.postgresql.mpp.citus.urlparse', Mock(side_effect=Exception)): self.c.add_task('', 1, None) mock_logger.assert_called_once() - with patch('patroni.postgresql.citus.logger.debug') as mock_logger: + with patch('patroni.postgresql.mpp.citus.logger.debug') as mock_logger: self.c.add_task('before_demote', 1, 'postgres://host:5432/postgres', 30) mock_logger.assert_called_once() self.assertTrue(mock_logger.call_args[0][0].startswith('Adding the new task:')) - with patch('patroni.postgresql.citus.logger.debug') as mock_logger: + with patch('patroni.postgresql.mpp.citus.logger.debug') as mock_logger: self.c.add_task('before_promote', 1, 'postgres://host:5432/postgres', 30) mock_logger.assert_called_once() self.assertTrue(mock_logger.call_args[0][0].startswith('Overriding existing task:')) - # add_task called from sync_pg_dist_node should not override already scheduled or in flight task until deadline + # add_task called from sync_meta_data should not override already scheduled or in flight task until deadline self.assertIsNotNone(self.c.add_task('after_promote', 1, 'postgres://host:5432/postgres', 30)) self.assertIsNone(self.c.add_task('after_promote', 1, 'postgres://host:5432/postgres')) self.c._in_flight = self.c._tasks.pop() @@ -106,7 +106,7 @@ def test_process_tasks(self): self.c.process_tasks() self.c.add_task('after_promote', 0, 'postgres://host3:5432/postgres') - with patch('patroni.postgresql.citus.logger.error') as mock_logger, \ + with patch('patroni.postgresql.mpp.citus.logger.error') as mock_logger, \ patch.object(CitusHandler, 'query', Mock(side_effect=Exception)): self.c.process_tasks() mock_logger.assert_called_once() @@ -115,7 +115,7 @@ def test_process_tasks(self): def test_on_demote(self): self.c.on_demote() - @patch('patroni.postgresql.citus.logger.error') + @patch('patroni.postgresql.mpp.citus.logger.error') @patch.object(MockCursor, 'execute', Mock(side_effect=Exception)) def test_load_pg_dist_node(self, mock_logger): # load_pg_dist_node() triggers, query fails and exception is property handled @@ -140,10 +140,6 @@ def test_adjust_postgres_gucs(self): self.assertEqual(parameters['wal_level'], 'logical') self.assertEqual(parameters['citus.local_hostname'], '/tmp') - def test_bootstrap(self): - self.c._config = None - self.c.bootstrap() - def test_ignore_replication_slot(self): self.assertFalse(self.c.ignore_replication_slot({'name': 'foo', 'type': 'physical', 'database': 'bar', 'plugin': 'wal2json'})) @@ -161,3 +157,11 @@ def test_ignore_replication_slot(self): 'type': 'logical', 'database': 'citus', 'plugin': 'pgoutput'})) self.assertTrue(self.c.ignore_replication_slot({'name': 'citus_shard_split_slot_1_2_3', 'type': 'logical', 'database': 'citus', 'plugin': 'citus'})) + + @patch('patroni.postgresql.mpp.citus.logger.debug') + @patch('patroni.postgresql.mpp.citus.connect', psycopg_connect) + @patch('patroni.postgresql.mpp.citus.quote_ident', Mock()) + def test_bootstrap_duplicate_database(self, mock_logger): + self.c.bootstrap() + mock_logger.assert_called_once() + self.assertTrue(mock_logger.call_args[0][0].startswith('Exception when creating database')) diff --git a/tests/test_config.py b/tests/test_config.py index dfb3b6e34..a02a33fda 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -5,7 +5,11 @@ from copy import deepcopy from mock import MagicMock, Mock, patch -from patroni.config import Config, ConfigParseError, GlobalConfig + +from patroni import global_config +from patroni.config import ClusterConfig, Config, ConfigParseError + +from .test_ha import get_cluster_initialized_with_only_leader class TestConfig(unittest.TestCase): @@ -155,48 +159,40 @@ def test_invalid_path(self): @patch('patroni.config.logger') def test__validate_failover_tags(self, mock_logger, mock_get): """Ensures that only one of `nofailover` or `failover_priority` can be provided""" - mock_logger.warning.reset_mock() config = Config("postgres0.yml") + # Providing one of `nofailover` or `failover_priority` is fine - just_nofailover = {"nofailover": True} - mock_get.side_effect = [just_nofailover] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() - just_failover_priority = {"failover_priority": 1} - mock_get.side_effect = [just_failover_priority] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() + for single_param in ({"nofailover": True}, {"failover_priority": 1}, {"failover_priority": 0}): + mock_get.side_effect = [single_param] * 2 + self.assertIsNone(config._validate_failover_tags()) + mock_logger.warning.assert_not_called() + # Providing both `nofailover` and `failover_priority` is fine if consistent - consistent_false = {"nofailover": False, "failover_priority": 1} - mock_get.side_effect = [consistent_false] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() - consistent_true = {"nofailover": True, "failover_priority": 0} - mock_get.side_effect = [consistent_true] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_not_called() + for consistent_state in ( + {"nofailover": False, "failover_priority": 1}, + {"nofailover": True, "failover_priority": 0}, + {"nofailover": "False", "failover_priority": 0} + ): + mock_get.side_effect = [consistent_state] * 2 + self.assertIsNone(config._validate_failover_tags()) + mock_logger.warning.assert_not_called() + # Providing both inconsistently should log a warning - inconsistent_false = {"nofailover": False, "failover_priority": 0} - mock_get.side_effect = [inconsistent_false] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_called_once_with( - 'Conflicting configuration between nofailover: %s and failover_priority: %s.' - + ' Defaulting to nofailover: %s', - False, - 0, - False - ) - mock_logger.warning.reset_mock() - inconsistent_true = {"nofailover": True, "failover_priority": 1} - mock_get.side_effect = [inconsistent_true] * 2 - self.assertIsNone(config._validate_failover_tags()) - mock_logger.warning.assert_called_once_with( - 'Conflicting configuration between nofailover: %s and failover_priority: %s.' - + ' Defaulting to nofailover: %s', - True, - 1, - True - ) + for inconsistent_state in ( + {"nofailover": False, "failover_priority": 0}, + {"nofailover": True, "failover_priority": 1}, + {"nofailover": "False", "failover_priority": 1}, + {"nofailover": "", "failover_priority": 0} + ): + mock_get.side_effect = [inconsistent_state] * 2 + self.assertIsNone(config._validate_failover_tags()) + mock_logger.warning.assert_called_once_with( + 'Conflicting configuration between nofailover: %s and failover_priority: %s.' + + ' Defaulting to nofailover: %s', + inconsistent_state['nofailover'], + inconsistent_state['failover_priority'], + inconsistent_state['nofailover']) + mock_logger.warning.reset_mock() def test__process_postgresql_parameters(self): expected_params = { @@ -248,4 +244,6 @@ def test__validate_and_adjust_timeouts(self): def test_global_config_is_synchronous_mode(self): # we should ignore synchronous_mode setting in a standby cluster config = {'standby_cluster': {'host': 'some_host'}, 'synchronous_mode': True} - self.assertFalse(GlobalConfig(config).is_synchronous_mode) + cluster = get_cluster_initialized_with_only_leader(cluster_config=ClusterConfig(1, config, 1)) + test_config = global_config.from_cluster(cluster) + self.assertFalse(test_config.is_synchronous_mode) diff --git a/tests/test_consul.py b/tests/test_consul.py index 83ee67d83..494d11266 100644 --- a/tests/test_consul.py +++ b/tests/test_consul.py @@ -3,8 +3,10 @@ from consul import ConsulException, NotFound from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.consul import AbstractDCS, Cluster, Consul, ConsulInternalError, \ ConsulError, ConsulClient, HTTPClient, InvalidSessionTTL, InvalidSession, RetryFailedError +from patroni.postgresql.mpp import get_mpp from . import SleepException @@ -91,13 +93,17 @@ class TestConsul(unittest.TestCase): @patch.object(consul.Consul.KV, 'get', kv_get) @patch.object(consul.Consul.KV, 'delete', Mock()) def setUp(self): - Consul({'ttl': 30, 'scope': 't', 'name': 'p', 'url': 'https://l:1', 'retry_timeout': 10, - 'verify': 'on', 'key': 'foo', 'cert': 'bar', 'cacert': 'buz', 'token': 'asd', 'dc': 'dc1', - 'register_service': True}) - Consul({'ttl': 30, 'scope': 't_', 'name': 'p', 'url': 'https://l:1', 'retry_timeout': 10, - 'verify': 'on', 'cert': 'bar', 'cacert': 'buz', 'register_service': True}) - self.c = Consul({'ttl': 30, 'scope': 'test', 'name': 'postgresql1', 'host': 'localhost:1', 'retry_timeout': 10, - 'register_service': True, 'service_check_tls_server_name': True}) + self.assertIsInstance(get_dcs({'ttl': 30, 'scope': 't', 'name': 'p', 'retry_timeout': 10, + 'consul': {'url': 'https://l:1', 'verify': 'on', + 'key': 'foo', 'cert': 'bar', 'cacert': 'buz', + 'token': 'asd', 'dc': 'dc1', 'register_service': True}}), Consul) + self.assertIsInstance(get_dcs({'ttl': 30, 'scope': 't_', 'name': 'p', 'retry_timeout': 10, + 'consul': {'url': 'https://l:1', 'verify': 'on', + 'cert': 'bar', 'cacert': 'buz', 'register_service': True}}), Consul) + self.c = get_dcs({'ttl': 30, 'scope': 'test', 'name': 'postgresql1', 'retry_timeout': 10, + 'consul': {'host': 'localhost:1', 'register_service': True, + 'service_check_tls_server_name': True}}) + self.assertIsInstance(self.c, Consul) self.c._base_path = 'service/good' self.c.get_cluster() @@ -130,7 +136,7 @@ def test_get_cluster(self): self.assertIsInstance(self.c.get_cluster(), Cluster) def test__get_citus_cluster(self): - self.c._citus_group = '0' + self.c._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.c.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) diff --git a/tests/test_ctl.py b/tests/test_ctl.py index 856918123..f5341f88a 100644 --- a/tests/test_ctl.py +++ b/tests/test_ctl.py @@ -1,3 +1,4 @@ +import click import etcd import mock import os @@ -6,10 +7,12 @@ from click.testing import CliRunner from datetime import datetime, timedelta from mock import patch, Mock, PropertyMock +from patroni import global_config from patroni.ctl import ctl, load_config, output_members, get_dcs, parse_dcs, \ get_all_members, get_any_member, get_cursor, query_member, PatroniCtlException, apply_config_changes, \ format_config_for_editing, show_diff, invoke_editor, format_pg_version, CONFIG_FILE_PATH, PatronictlPrettyTable -from patroni.dcs.etcd import AbstractEtcdClientWithFailover, Cluster, Failover +from patroni.dcs import Cluster, Failover +from patroni.postgresql.mpp import get_mpp from patroni.psycopg import OperationalError from patroni.utils import tzutc from prettytable import PrettyTable, ALL @@ -21,26 +24,26 @@ get_cluster_initialized_with_only_leader, get_cluster_not_initialized_without_leader, get_cluster, Member -DEFAULT_CONFIG = { - 'scope': 'alpha', - 'restapi': {'listen': '::', 'certfile': 'a'}, - 'ctl': {'certfile': 'a'}, - 'etcd': {'host': 'localhost:2379'}, - 'citus': {'database': 'citus', 'group': 0}, - 'postgresql': {'data_dir': '.', 'pgpass': './pgpass', 'parameters': {}, 'retry_timeout': 5} -} +def get_default_config(*args): + return { + 'scope': 'alpha', + 'restapi': {'listen': '::', 'certfile': 'a'}, + 'ctl': {'certfile': 'a'}, + 'etcd': {'host': 'localhost:2379', 'retry_timeout': 10, 'ttl': 30}, + 'citus': {'database': 'citus', 'group': 0}, + 'postgresql': {'data_dir': '.', 'pgpass': './pgpass', 'parameters': {}, 'retry_timeout': 5} + } -@patch('patroni.ctl.load_config', Mock(return_value=DEFAULT_CONFIG)) +@patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) +@patch('patroni.ctl.load_config', get_default_config) +@patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) class TestCtl(unittest.TestCase): TEST_ROLES = ('master', 'primary', 'leader') @patch('socket.getaddrinfo', socket_getaddrinfo) - @patch.object(AbstractEtcdClientWithFailover, '_get_machines_list', Mock(return_value=['http://remotehost:2379'])) def setUp(self): self.runner = CliRunner() - self.e = get_dcs({'etcd': {'ttl': 30, 'host': 'ok:2379', 'retry_timeout': 10}, - 'citus': {'group': 0}}, 'foo', None) @patch('patroni.ctl.logging.debug') def test_load_config(self, mock_logger_debug): @@ -66,29 +69,31 @@ def test_load_config(self, mock_logger_debug): @patch('patroni.psycopg.connect', psycopg_connect) def test_get_cursor(self): - for role in self.TEST_ROLES: - self.assertIsNone(get_cursor({}, get_cluster_initialized_without_leader(), None, {}, role=role)) - self.assertIsNotNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {}, role=role)) + with click.Context(click.Command('query')) as ctx: + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} + for role in self.TEST_ROLES: + self.assertIsNone(get_cursor(get_cluster_initialized_without_leader(), None, {}, role=role)) + self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {}, role=role)) - # MockCursor returns pg_is_in_recovery as false - self.assertIsNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {}, role='replica')) + # MockCursor returns pg_is_in_recovery as false + self.assertIsNone(get_cursor(get_cluster_initialized_with_leader(), None, {}, role='replica')) - self.assertIsNotNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, role='any')) + self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, role='any')) - # Mutually exclusive options - with self.assertRaises(PatroniCtlException) as e: - get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, member_name='other', - role='replica') + # Mutually exclusive options + with self.assertRaises(PatroniCtlException) as e: + get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, member_name='other', + role='replica') - self.assertEqual(str(e.exception), '--role and --member are mutually exclusive options') + self.assertEqual(str(e.exception), '--role and --member are mutually exclusive options') - # Invalid member provided - self.assertIsNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, - member_name='invalid')) + # Invalid member provided + self.assertIsNone(get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, + member_name='invalid')) - # Valid member provided - self.assertIsNotNone(get_cursor({}, get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, - member_name='other')) + # Valid member provided + self.assertIsNotNone(get_cursor(get_cluster_initialized_with_leader(), None, {'dbname': 'foo'}, + member_name='other')) def test_parse_dcs(self): assert parse_dcs(None) is None @@ -102,23 +107,20 @@ def test_parse_dcs(self): self.assertRaises(PatroniCtlException, parse_dcs, 'invalid://test') def test_output_members(self): - scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) - cluster = get_cluster_initialized_with_leader(Failover(1, 'foo', 'bar', scheduled_at)) - del cluster.members[1].data['conn_url'] - for fmt in ('pretty', 'json', 'yaml', 'topology'): - self.assertIsNone(output_members({}, cluster, name='abc', fmt=fmt)) - - with patch('click.echo') as mock_echo: - self.assertIsNone(output_members({}, cluster, name='abc', fmt='tsv')) - self.assertEqual(mock_echo.call_args[0][0], 'abc\tother\t\tReplica\trunning\t\tunknown') - - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - def test_switchover(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - mock_get_dcs.return_value.set_failover_value = Mock() - + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} + scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) + cluster = get_cluster_initialized_with_leader(Failover(1, 'foo', 'bar', scheduled_at)) + del cluster.members[1].data['conn_url'] + for fmt in ('pretty', 'json', 'yaml', 'topology'): + self.assertIsNone(output_members(cluster, name='abc', fmt=fmt)) + + with patch('click.echo') as mock_echo: + self.assertIsNone(output_members(cluster, name='abc', fmt='tsv')) + self.assertEqual(mock_echo.call_args[0][0], 'abc\tother\t\tReplica\trunning\t\tunknown') + + @patch('patroni.dcs.AbstractDCS.set_failover_value', Mock()) + def test_switchover(self): # Confirm result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') self.assertEqual(result.exit_code, 0) @@ -147,7 +149,7 @@ def test_switchover(self, mock_get_dcs): self.assertEqual(result.exit_code, 0) # Scheduled in pause mode - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0', '--force', '--scheduled', '2015-01-01T12:00:00']) self.assertEqual(result.exit_code, 1) @@ -156,7 +158,8 @@ def test_switchover(self, mock_get_dcs): # Target and source are equal result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nleader\n\ny') self.assertEqual(result.exit_code, 1) - self.assertIn('Switchover target and source are the same', result.output) + self.assertIn("Candidate ['other']", result.output) + self.assertIn('Member leader is already the leader of cluster dummy', result.output) # Candidate is not a member of the cluster result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nReality\n\ny') @@ -180,12 +183,12 @@ def test_switchover(self, mock_get_dcs): self.assertIn('Member dummy is not the leader of cluster dummy', result.output) # Errors while sending Patroni REST API request - with patch.object(PoolManager, 'request', Mock(side_effect=Exception)): + with patch('patroni.ctl.request_patroni', Mock(side_effect=Exception)): result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n2300-01-01T12:23:00\ny') self.assertIn('falling back to DCS', result.output) - with patch.object(PoolManager, 'request') as mock_api_request: + with patch('patroni.ctl.request_patroni') as mock_api_request: mock_api_request.return_value.status = 500 result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') self.assertIn('Switchover failed', result.output) @@ -196,64 +199,65 @@ def test_switchover(self, mock_get_dcs): self.assertIn('Switchover failed', result.output) # No members available - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_only_leader - result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') - self.assertEqual(result.exit_code, 1) - self.assertIn('No candidates found to switchover to', result.output) + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(return_value=get_cluster_initialized_with_only_leader())): + result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') + self.assertEqual(result.exit_code, 1) + self.assertIn('No candidates found to switchover to', result.output) # No leader available - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_without_leader - result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') - self.assertEqual(result.exit_code, 1) - self.assertIn('This cluster has no leader', result.output) + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_without_leader())): + result = self.runner.invoke(ctl, ['switchover', 'dummy', '--group', '0'], input='leader\nother\n\ny') + self.assertEqual(result.exit_code, 1) + self.assertIn('This cluster has no leader', result.output) # Citus cluster, no group number specified result = self.runner.invoke(ctl, ['switchover', 'dummy', '--force'], input='\n') self.assertEqual(result.exit_code, 1) self.assertIn('For Citus clusters the --group must me specified', result.output) - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - @patch('patroni.ctl.request_patroni', Mock(return_value=MockResponse())) - def test_failover(self, mock_get_dcs): - mock_get_dcs.return_value.set_failover_value = Mock() - + @patch('patroni.dcs.AbstractDCS.set_failover_value', Mock()) + def test_failover(self): # No candidate specified - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader result = self.runner.invoke(ctl, ['failover', 'dummy'], input='0\n') self.assertIn('Failover could be performed only to a specific candidate', result.output) - cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) + # Candidate is the same as the leader + result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0'], input='leader\n') + self.assertIn("Candidate ['other']", result.output) + self.assertIn('Member leader is already the leader of cluster dummy', result.output) # Temp test to check a fallback to switchover if leader is specified with patch('patroni.ctl._do_failover_or_switchover') as failover_func_mock: result = self.runner.invoke(ctl, ['failover', '--leader', 'leader', 'dummy'], input='0\n') self.assertIn('Supplying a leader name using this command is deprecated', result.output) - failover_func_mock.assert_called_once_with( - DEFAULT_CONFIG, 'switchover', 'dummy', None, 'leader', None, False) + failover_func_mock.assert_called_once_with('switchover', 'dummy', None, 'leader', None, False) - # Failover to an async member in sync mode (confirm) + cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) cluster.members.append(Member(0, 'async', 28, {'api_url': 'http://127.0.0.1:8012/patroni'})) cluster.config.data['synchronous_mode'] = True - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='y\ny') - self.assertIn('Are you sure you want to failover to the asynchronous node async', result.output) + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=cluster)): + # Failover to an async member in sync mode (confirm) + result = self.runner.invoke(ctl, + ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='y\ny') + self.assertIn('Are you sure you want to failover to the asynchronous node async', result.output) + self.assertEqual(result.exit_code, 0) - # Failover to an async member in sync mode (abort) - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='N') - self.assertEqual(result.exit_code, 1) + # Failover to an async member in sync mode (abort) + result = self.runner.invoke(ctl, ['failover', 'dummy', '--group', '0', '--candidate', 'async'], input='N') + self.assertEqual(result.exit_code, 1) + self.assertIn('Aborting failover', result.output) - @patch('patroni.dcs.dcs_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) + @patch('patroni.dynamic_loader.iter_modules', Mock(return_value=['patroni.dcs.dummy', 'patroni.dcs.etcd'])) def test_get_dcs(self): - self.assertRaises(PatroniCtlException, get_dcs, {'dummy': {}}, 'dummy', 0) + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {'dummy': {}}, '__mpp': get_mpp({})} + self.assertRaises(PatroniCtlException, get_dcs, 'dummy', 0) @patch('patroni.psycopg.connect', psycopg_connect) @patch('patroni.ctl.query_member', Mock(return_value=([['mock column']], None))) - @patch('patroni.ctl.get_dcs') @patch.object(etcd.Client, 'read', etcd_read) - def test_query(self, mock_get_dcs): - mock_get_dcs.return_value = self.e + def test_query(self): # Mutually exclusive for role in self.TEST_ROLES: result = self.runner.invoke(ctl, ['query', 'alpha', '--member', 'abc', '--role', role]) @@ -286,31 +290,29 @@ def test_query(self, mock_get_dcs): def test_query_member(self): with patch('patroni.ctl.get_cursor', Mock(return_value=MockConnect().cursor())): for role in self.TEST_ROLES: - rows = query_member({}, None, None, None, None, role, 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, role, 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('False' in str(rows)) with patch.object(MockCursor, 'execute', Mock(side_effect=OperationalError('bla'))): - rows = query_member({}, None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) with patch('patroni.ctl.get_cursor', Mock(return_value=None)): # No role nor member given -- generic message - rows = query_member({}, None, None, None, None, None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('No connection is available' in str(rows)) # Member given -- message pointing to member - rows = query_member({}, None, None, None, 'foo', None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, 'foo', None, 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('No connection to member foo' in str(rows)) # Role given -- message pointing to role - rows = query_member({}, None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) self.assertTrue('No connection to role replica' in str(rows)) with patch('patroni.ctl.get_cursor', Mock(side_effect=OperationalError('bla'))): - rows = query_member({}, None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) + rows = query_member(None, None, None, None, 'replica', 'SELECT pg_catalog.pg_is_in_recovery()', {}) - @patch('patroni.ctl.get_dcs') - def test_dsn(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_dsn(self): result = self.runner.invoke(ctl, ['dsn', 'alpha']) assert 'host=127.0.0.1 port=5435' in result.output @@ -323,11 +325,8 @@ def test_dsn(self, mock_get_dcs): result = self.runner.invoke(ctl, ['dsn', 'alpha', '--member', 'dummy']) assert result.exit_code == 1 - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') - def test_reload(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - + @patch('patroni.ctl.request_patroni') + def test_reload(self, mock_post): result = self.runner.invoke(ctl, ['reload', 'alpha'], input='y') assert 'Failed: reload for member' in result.output @@ -339,10 +338,8 @@ def test_reload(self, mock_get_dcs, mock_post): result = self.runner.invoke(ctl, ['reload', 'alpha'], input='y') assert 'Reload request received for member' in result.output - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') - def test_restart_reinit(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + @patch('patroni.ctl.request_patroni') + def test_restart_reinit(self, mock_post): mock_post.return_value.status = 503 result = self.runner.invoke(ctl, ['restart', 'alpha'], input='now\ny\n') assert 'Failed: restart for' in result.output @@ -382,7 +379,7 @@ def test_restart_reinit(self, mock_get_dcs, mock_post): result = self.runner.invoke(ctl, ['restart', 'alpha', 'other', '--force', '--scheduled', '2300-10-01T14:30']) assert 'Failed: flush scheduled restart' in result.output - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['restart', 'alpha', 'other', '--force', '--scheduled', '2300-10-01T14:30']) assert result.exit_code == 1 @@ -417,12 +414,10 @@ def test_restart_reinit(self, mock_get_dcs, mock_post): assert 'Failed: another restart is already' in result.output assert result.exit_code == 0 - @patch('patroni.ctl.get_dcs') - def test_remove(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_remove(self): result = self.runner.invoke(ctl, ['remove', 'dummy'], input='\n') assert 'For Citus clusters the --group must me specified' in result.output - result = self.runner.invoke(ctl, ['-k', 'remove', 'alpha', '--group', '0'], input='alpha\nstandby') + result = self.runner.invoke(ctl, ['remove', 'alpha', '--group', '0'], input='alpha\nstandby') assert 'Please confirm' in result.output assert 'You are about to remove all' in result.output # Not typing an exact confirmation @@ -440,37 +435,36 @@ def test_remove(self, mock_get_dcs): assert result.exit_code == 0 def test_ctl(self): - self.runner.invoke(ctl, ['list']) - result = self.runner.invoke(ctl, ['--help']) assert 'Usage:' in result.output def test_get_any_member(self): - for role in self.TEST_ROLES: - self.assertIsNone(get_any_member({}, get_cluster_initialized_without_leader(), None, role=role)) + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} + for role in self.TEST_ROLES: + self.assertIsNone(get_any_member(get_cluster_initialized_without_leader(), None, role=role)) - m = get_any_member({}, get_cluster_initialized_with_leader(), None, role=role) - self.assertEqual(m.name, 'leader') + m = get_any_member(get_cluster_initialized_with_leader(), None, role=role) + self.assertEqual(m.name, 'leader') def test_get_all_members(self): - for role in self.TEST_ROLES: - self.assertEqual(list(get_all_members({}, get_cluster_initialized_without_leader(), None, role=role)), []) - - r = list(get_all_members({}, get_cluster_initialized_with_leader(), None, role=role)) - self.assertEqual(len(r), 1) - self.assertEqual(r[0].name, 'leader') + with click.Context(click.Command('list')) as ctx: + ctx.obj = {'__config': {}, '__mpp': get_mpp({})} + for role in self.TEST_ROLES: + self.assertEqual(list(get_all_members(get_cluster_initialized_without_leader(), None, role=role)), []) - r = list(get_all_members({}, get_cluster_initialized_with_leader(), None, role='replica')) - self.assertEqual(len(r), 1) - self.assertEqual(r[0].name, 'other') + r = list(get_all_members(get_cluster_initialized_with_leader(), None, role=role)) + self.assertEqual(len(r), 1) + self.assertEqual(r[0].name, 'leader') - self.assertEqual(len(list(get_all_members({}, get_cluster_initialized_without_leader(), - None, role='replica'))), 2) + r = list(get_all_members(get_cluster_initialized_with_leader(), None, role='replica')) + self.assertEqual(len(r), 1) + self.assertEqual(r[0].name, 'other') - @patch('patroni.ctl.get_dcs') - def test_members(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + self.assertEqual(len(list(get_all_members(get_cluster_initialized_without_leader(), + None, role='replica'))), 2) + def test_members(self): result = self.runner.invoke(ctl, ['list']) assert '127.0.0.1' in result.output assert result.exit_code == 0 @@ -479,127 +473,100 @@ def test_members(self, mock_get_dcs): result = self.runner.invoke(ctl, ['list', '--group', '0']) assert 'Citus cluster: alpha (group: 0, 12345678901) -' in result.output - with patch('patroni.ctl.load_config', Mock(return_value={'scope': 'alpha'})): + config = get_default_config() + del config['citus'] + with patch('patroni.ctl.load_config', Mock(return_value=config)): result = self.runner.invoke(ctl, ['list']) assert 'Cluster: alpha (12345678901) -' in result.output with patch('patroni.ctl.load_config', Mock(return_value={})): self.runner.invoke(ctl, ['list']) - @patch('patroni.ctl.get_dcs') - def test_list_extended(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - cluster = get_cluster_initialized_with_leader(sync=('leader', 'other')) - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - + def test_list_extended(self): result = self.runner.invoke(ctl, ['list', 'dummy', '--extended', '--timestamp']) assert '2100' in result.output assert 'Scheduled restart' in result.output - @patch('patroni.ctl.get_dcs') - def test_topology(self, mock_get_dcs): - mock_get_dcs.return_value = self.e + def test_topology(self): cluster = get_cluster_initialized_with_leader() - cascade_member = Member(0, 'cascade', 28, {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5437/postgres', - 'api_url': 'http://127.0.0.1:8012/patroni', - 'state': 'running', - 'tags': {'replicatefrom': 'other'}, - }) - cascade_member_wrong_tags = Member(0, 'wrong_cascade', 28, - {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5438/postgres', - 'api_url': 'http://127.0.0.1:8013/patroni', - 'state': 'running', - 'tags': {'replicatefrom': 'nonexistinghost'}, - }) - cluster.members.append(cascade_member) - cluster.members.append(cascade_member_wrong_tags) - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['topology', 'dummy']) - assert '+\n| 0 | leader | 127.0.0.1:5435 | Leader |' in result.output - assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output - assert '|\n| 0 | + cascade | 127.0.0.1:5437 | Replica |' in result.output - assert '|\n| 0 | + wrong_cascade | 127.0.0.1:5438 | Replica |' in result.output - - cluster = get_cluster_initialized_without_leader() - mock_get_dcs.return_value.get_cluster = Mock(return_value=cluster) - result = self.runner.invoke(ctl, ['topology', 'dummy']) - assert '+\n| 0 | + leader | 127.0.0.1:5435 | Replica |' in result.output - assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output - - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - def test_flush_restart(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - + cluster.members.append(Member(0, 'cascade', 28, + {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5437/postgres', + 'api_url': 'http://127.0.0.1:8012/patroni', 'state': 'running', + 'tags': {'replicatefrom': 'other'}})) + cluster.members.append(Member(0, 'wrong_cascade', 28, + {'conn_url': 'postgres://replicator:rep-pass@127.0.0.1:5438/postgres', + 'api_url': 'http://127.0.0.1:8013/patroni', 'state': 'running', + 'tags': {'replicatefrom': 'nonexistinghost'}})) + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=cluster)): + result = self.runner.invoke(ctl, ['topology', 'dummy']) + assert '+\n| 0 | leader | 127.0.0.1:5435 | Leader |' in result.output + assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output + assert '|\n| 0 | + cascade | 127.0.0.1:5437 | Replica |' in result.output + assert '|\n| 0 | + wrong_cascade | 127.0.0.1:5438 | Replica |' in result.output + + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_without_leader())): + result = self.runner.invoke(ctl, ['topology', 'dummy']) + assert '+\n| 0 | + leader | 127.0.0.1:5435 | Replica |' in result.output + assert '|\n| 0 | + other | 127.0.0.1:5436 | Replica |' in result.output + + @patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) + def test_flush_restart(self): for role in self.TEST_ROLES: - result = self.runner.invoke(ctl, ['-k', 'flush', 'dummy', 'restart', '-r', role], input='y') + result = self.runner.invoke(ctl, ['flush', 'dummy', 'restart', '-r', role], input='y') assert 'No scheduled restart' in result.output result = self.runner.invoke(ctl, ['flush', 'dummy', 'restart', '--force']) assert 'Success: flush scheduled restart' in result.output - with patch.object(PoolManager, 'request', return_value=MockResponse(404)): + with patch('patroni.ctl.request_patroni', Mock(return_value=MockResponse(404))): result = self.runner.invoke(ctl, ['flush', 'dummy', 'restart', '--force']) assert 'Failed: flush scheduled restart' in result.output - @patch('patroni.ctl.get_dcs') - @patch.object(PoolManager, 'request', Mock(return_value=MockResponse())) - def test_flush_switchover(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) - assert 'No pending scheduled switchover' in result.output + def test_flush_switchover(self): + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())): + result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) + assert 'No pending scheduled switchover' in result.output scheduled_at = datetime.now(tzutc) + timedelta(seconds=600) - mock_get_dcs.return_value.get_cluster = Mock( - return_value=get_cluster_initialized_with_leader(Failover(1, 'a', 'b', scheduled_at))) - result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) - assert result.output.startswith('Success: ') + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(return_value=get_cluster_initialized_with_leader(Failover(1, 'a', 'b', scheduled_at)))): + result = self.runner.invoke(ctl, ['-k', 'flush', 'dummy', 'switchover']) + assert result.output.startswith('Success: ') - mock_get_dcs.return_value.manual_failover = Mock() - with patch.object(PoolManager, 'request', side_effect=[MockResponse(409), Exception]): - result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) - assert 'Could not find any accessible member of cluster' in result.output + with patch('patroni.ctl.request_patroni', side_effect=[MockResponse(409), Exception]), \ + patch('patroni.dcs.AbstractDCS.manual_failover', Mock()): + result = self.runner.invoke(ctl, ['flush', 'dummy', 'switchover']) + assert 'Could not find any accessible member of cluster' in result.output - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') @patch('patroni.ctl.polling_loop', Mock(return_value=[1])) - def test_pause_cluster(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - - mock_post.return_value.status = 500 - result = self.runner.invoke(ctl, ['pause', 'dummy']) - assert 'Failed' in result.output + def test_pause_cluster(self): + with patch('patroni.ctl.request_patroni', Mock(return_value=MockResponse(500))): + result = self.runner.invoke(ctl, ['pause', 'dummy']) + assert 'Failed' in result.output - mock_post.return_value.status = 200 - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['pause', 'dummy']) assert 'Cluster is already paused' in result.output result = self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) assert "'pause' request sent" in result.output - mock_get_dcs.return_value.get_cluster = Mock(side_effect=[get_cluster_initialized_with_leader(), - get_cluster(None, None, [], None, None)]) - self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) - member = Member(1, 'other', 28, {}) - mock_get_dcs.return_value.get_cluster = Mock(side_effect=[get_cluster_initialized_with_leader(), - get_cluster(None, None, [member], None, None)]) - self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) - - @patch.object(PoolManager, 'request') - @patch('patroni.ctl.get_dcs') - def test_resume_cluster(self, mock_get_dcs, mock_post): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(side_effect=[get_cluster_initialized_with_leader(), get_cluster(None, None, [], None, None)])): + self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(side_effect=[get_cluster_initialized_with_leader(), + get_cluster(None, None, [Member(1, 'other', 28, {})], None, None)])): + self.runner.invoke(ctl, ['pause', 'dummy', '--wait']) + + @patch('patroni.ctl.request_patroni') + @patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_with_leader())) + def test_resume_cluster(self, mock_post): mock_post.return_value.status = 200 - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=False)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=False)): result = self.runner.invoke(ctl, ['resume', 'dummy']) assert 'Cluster is not paused' in result.output - with patch('patroni.config.GlobalConfig.is_paused', PropertyMock(return_value=True)): + with patch.object(global_config.__class__, 'is_paused', PropertyMock(return_value=True)): result = self.runner.invoke(ctl, ['resume', 'dummy']) assert 'Success' in result.output @@ -701,67 +668,53 @@ def test_invoke_editor(self, mock_subprocess_call): with patch('shutil.which', Mock(return_value=e)): self.assertRaises(PatroniCtlException, invoke_editor, 'foo: bar\n', 'test') - @patch('patroni.ctl.get_dcs') - def test_show_config(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_show_config(self): self.runner.invoke(ctl, ['show-config', 'dummy']) - @patch('patroni.ctl.get_dcs') @patch('subprocess.call', Mock(return_value=0)) - def test_edit_config(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - mock_get_dcs.return_value.set_config_value = Mock(return_value=False) + def test_edit_config(self): os.environ['EDITOR'] = 'true' self.runner.invoke(ctl, ['edit-config', 'dummy']) self.runner.invoke(ctl, ['edit-config', 'dummy', '-s', 'foo=bar']) self.runner.invoke(ctl, ['edit-config', 'dummy', '--replace', 'postgres0.yml']) self.runner.invoke(ctl, ['edit-config', 'dummy', '--apply', '-'], input='foo: bar') self.runner.invoke(ctl, ['edit-config', 'dummy', '--force', '--apply', '-'], input='foo: bar') - mock_get_dcs.return_value.set_config_value.return_value = True - self.runner.invoke(ctl, ['edit-config', 'dummy', '--force', '--apply', '-'], input='foo: bar') - mock_get_dcs.return_value.get_cluster = Mock(return_value=Cluster.empty()) - result = self.runner.invoke(ctl, ['edit-config', 'dummy']) - assert result.exit_code == 1 - assert 'The config key does not exist in the cluster dummy' in result.output - - @patch('patroni.ctl.get_dcs') - def test_version(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader - with patch.object(PoolManager, 'request') as mocked: - result = self.runner.invoke(ctl, ['version']) - assert 'patronictl version' in result.output - mocked.return_value.data = b'{"patroni":{"version":"1.2.3"},"server_version": 100001}' - result = self.runner.invoke(ctl, ['version', 'dummy']) - assert '1.2.3' in result.output - with patch.object(PoolManager, 'request', Mock(side_effect=Exception)): - result = self.runner.invoke(ctl, ['version', 'dummy']) - assert 'failed to get version' in result.output - - @patch('patroni.ctl.get_dcs') - def test_history(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = Mock() - mock_get_dcs.return_value.get_cluster.return_value.history.lines = [[1, 67176, 'no recovery target specified']] - result = self.runner.invoke(ctl, ['history']) - assert 'Reason' in result.output + with patch('patroni.dcs.etcd.Etcd.set_config_value', Mock(return_value=True)): + self.runner.invoke(ctl, ['edit-config', 'dummy', '--force', '--apply', '-'], input='foo: bar') + with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=Cluster.empty())): + result = self.runner.invoke(ctl, ['edit-config', 'dummy']) + assert result.exit_code == 1 + assert 'The config key does not exist in the cluster dummy' in result.output + + @patch('patroni.ctl.request_patroni') + def test_version(self, mock_request): + result = self.runner.invoke(ctl, ['version']) + assert 'patronictl version' in result.output + mock_request.return_value.data = b'{"patroni":{"version":"1.2.3"},"server_version": 100001}' + result = self.runner.invoke(ctl, ['version', 'dummy']) + assert '1.2.3' in result.output + mock_request.side_effect = Exception + result = self.runner.invoke(ctl, ['version', 'dummy']) + assert 'failed to get version' in result.output + + def test_history(self): + with patch('patroni.dcs.AbstractDCS.get_cluster') as mock_get_cluster: + mock_get_cluster.return_value.history.lines = [[1, 67176, 'no recovery target specified']] + result = self.runner.invoke(ctl, ['history']) + assert 'Reason' in result.output def test_format_pg_version(self): self.assertEqual(format_pg_version(100001), '10.1') self.assertEqual(format_pg_version(90605), '9.6.5') - @patch('patroni.ctl.get_dcs') - def test_get_members(self, mock_get_dcs): - mock_get_dcs.return_value = self.e - mock_get_dcs.return_value.get_cluster = get_cluster_not_initialized_without_leader - result = self.runner.invoke(ctl, ['reinit', 'dummy']) - assert "cluster doesn\'t have any members" in result.output + def test_get_members(self): + with patch('patroni.dcs.AbstractDCS.get_cluster', + Mock(return_value=get_cluster_not_initialized_without_leader())): + result = self.runner.invoke(ctl, ['reinit', 'dummy']) + assert "cluster doesn\'t have any members" in result.output @patch('time.sleep', Mock()) - @patch('patroni.ctl.get_dcs') - def test_reinit_wait(self, mock_get_dcs): - mock_get_dcs.return_value.get_cluster = get_cluster_initialized_with_leader + def test_reinit_wait(self): with patch.object(PoolManager, 'request') as mocked: mocked.side_effect = [Mock(data=s, status=200) for s in [b"reinitialize", b'{"state":"creating replica"}', b'{"state":"running"}']] diff --git a/tests/test_etcd.py b/tests/test_etcd.py index 90402b5f9..d7a423bec 100644 --- a/tests/test_etcd.py +++ b/tests/test_etcd.py @@ -5,8 +5,10 @@ from dns.exception import DNSException from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.etcd import AbstractDCS, EtcdClient, Cluster, Etcd, EtcdError, DnsCachingResolver from patroni.exceptions import DCSError +from patroni.postgresql.mpp import get_mpp from patroni.utils import Retry from urllib3.exceptions import ReadTimeoutError @@ -138,8 +140,9 @@ class TestClient(unittest.TestCase): @patch.object(EtcdClient, '_get_machines_list', Mock(return_value=['http://localhost:2379', 'http://localhost:4001'])) def setUp(self): - self.etcd = Etcd({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 3, - 'srv': 'test', 'scope': 'test', 'name': 'foo'}) + self.etcd = get_dcs({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 3, + 'etcd': {'srv': 'test'}, 'scope': 'test', 'name': 'foo'}) + self.assertIsInstance(self.etcd, Etcd) self.client = self.etcd._client self.client.http.request = http_request self.client.http.request_encode_body = http_request @@ -235,7 +238,7 @@ class TestEtcd(unittest.TestCase): Mock(return_value=['http://localhost:2379', 'http://localhost:4001'])) def setUp(self): self.etcd = Etcd({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 10, - 'host': 'localhost:2379', 'scope': 'test', 'name': 'foo'}) + 'host': 'localhost:2379', 'scope': 'test', 'name': 'foo'}, get_mpp({})) def test_base_path(self): self.assertEqual(self.etcd._base_path, '/patroni/test') @@ -270,10 +273,12 @@ def test_get_cluster(self): self.assertRaises(EtcdError, self.etcd.get_cluster) def test__get_citus_cluster(self): - self.etcd._citus_group = '0' + self.etcd._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.etcd.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) + self.etcd._base_path = '/service/nocluster' + self.assertTrue(self.etcd.get_cluster().is_empty()) def test_touch_member(self): self.assertFalse(self.etcd.touch_member('')) diff --git a/tests/test_etcd3.py b/tests/test_etcd3.py index 10ab1ea50..fcfd4e4b6 100644 --- a/tests/test_etcd3.py +++ b/tests/test_etcd3.py @@ -4,10 +4,12 @@ import urllib3 from mock import Mock, PropertyMock, patch +from patroni.dcs import get_dcs from patroni.dcs.etcd import DnsCachingResolver from patroni.dcs.etcd3 import PatroniEtcd3Client, Cluster, Etcd3, Etcd3Client, \ Etcd3Error, Etcd3ClientError, ReAuthenticateMode, RetryFailedError, InvalidAuthToken, Unavailable, \ Unknown, UnsupportedEtcdVersion, UserEmpty, AuthFailed, AuthOldRevision, base64_encode +from patroni.postgresql.mpp import get_mpp from threading import Thread from . import SleepException, MockResponse @@ -80,9 +82,9 @@ class BaseTestEtcd3(unittest.TestCase): @patch.object(Thread, 'start', Mock()) @patch.object(urllib3.PoolManager, 'urlopen', mock_urlopen) def setUp(self): - self.etcd3 = Etcd3({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 10, - 'host': 'localhost:2378', 'scope': 'test', 'name': 'foo', - 'username': 'etcduser', 'password': 'etcdpassword'}) + self.etcd3 = get_dcs({'namespace': '/patroni/', 'ttl': 30, 'retry_timeout': 10, 'name': 'foo', 'scope': 'test', + 'etcd3': {'host': 'localhost:2378', 'username': 'etcduser', 'password': 'etcdpassword'}}) + self.assertIsInstance(self.etcd3, Etcd3) self.client = self.etcd3._client self.kv_cache = self.client._kv_cache @@ -236,7 +238,7 @@ def test_get_cluster(self): self.assertRaises(Etcd3Error, self.etcd3.get_cluster) def test__get_citus_cluster(self): - self.etcd3._citus_group = '0' + self.etcd3._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.etcd3.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) diff --git a/tests/test_exhibitor.py b/tests/test_exhibitor.py index a908e1fc2..5a72eb21b 100644 --- a/tests/test_exhibitor.py +++ b/tests/test_exhibitor.py @@ -2,6 +2,7 @@ import urllib3 from mock import Mock, patch +from patroni.dcs import get_dcs from patroni.dcs.exhibitor import ExhibitorEnsembleProvider, Exhibitor from patroni.dcs.zookeeper import ZooKeeperError @@ -26,8 +27,9 @@ class TestExhibitor(unittest.TestCase): status=200, body=b'{"servers":["127.0.0.1","127.0.0.2","127.0.0.3"],"port":2181}'))) @patch('patroni.dcs.zookeeper.PatroniKazooClient', MockKazooClient) def setUp(self): - self.e = Exhibitor({'hosts': ['localhost', 'exhibitor'], 'port': 8181, 'scope': 'test', - 'name': 'foo', 'ttl': 30, 'retry_timeout': 10}) + self.e = get_dcs({'exhibitor': {'hosts': ['localhost', 'exhibitor'], 'port': 8181}, + 'scope': 'test', 'name': 'foo', 'ttl': 30, 'retry_timeout': 10}) + self.assertIsInstance(self.e, Exhibitor) @patch.object(ExhibitorEnsembleProvider, 'poll', Mock(return_value=True)) @patch.object(MockKazooClient, 'get_children', Mock(side_effect=Exception)) diff --git a/tests/test_ha.py b/tests/test_ha.py index e197d28bc..45f641647 100644 --- a/tests/test_ha.py +++ b/tests/test_ha.py @@ -4,6 +4,7 @@ import sys from mock import Mock, MagicMock, PropertyMock, patch, mock_open +from patroni import global_config from patroni.collections import CaseInsensitiveSet from patroni.config import Config from patroni.dcs import Cluster, ClusterConfig, Failover, Leader, Member, get_dcs, Status, SyncState, TimelineHistory @@ -196,7 +197,7 @@ def run_async(self, func, args=()): @patch('patroni.async_executor.AsyncExecutor.busy', PropertyMock(return_value=False)) @patch('patroni.async_executor.AsyncExecutor.run_async', run_async) @patch('patroni.postgresql.rewind.Thread', Mock()) -@patch('patroni.postgresql.citus.CitusHandler.start', Mock()) +@patch('patroni.postgresql.mpp.citus.CitusHandler.start', Mock()) @patch('subprocess.call', Mock(return_value=0)) @patch('time.sleep', Mock()) class TestHa(PostgresInit): @@ -217,6 +218,7 @@ def setUp(self): self.ha = Ha(MockPatroni(self.p, self.e)) self.ha.old_cluster = self.e.get_cluster() self.ha.cluster = get_cluster_initialized_without_leader() + global_config.update(self.ha.cluster) self.ha.load_cluster_from_dcs = Mock() def test_update_lock(self): @@ -251,8 +253,10 @@ def test_start_as_replica(self): @patch('patroni.dcs.etcd.Etcd.initialize', return_value=True) def test_bootstrap_as_standby_leader(self, initialize): self.p.data_directory_empty = true + self.ha.cluster = get_cluster_not_initialized_without_leader( + cluster_config=ClusterConfig(1, {"standby_cluster": {"port": 5432}}, 1)) + global_config.update(self.ha.cluster) self.ha.cluster = get_cluster_not_initialized_without_leader(cluster_config=ClusterConfig(0, {}, 0)) - self.ha.patroni.config._dynamic_configuration = {"standby_cluster": {"port": 5432}} self.assertEqual(self.ha.run_cycle(), 'trying to bootstrap a new standby leader') def test_bootstrap_waiting_for_standby_leader(self): @@ -318,7 +322,7 @@ def test_crash_recovery(self): self.ha.state_handler.cancellable._process = Mock() self.ha._crash_recovery_started -= 600 self.ha.cluster.config.data.update({'maximum_lag_on_failover': 10}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertEqual(self.ha.run_cycle(), 'terminated crash recovery because of startup timeout') @patch.object(Rewind, 'ensure_clean_shutdown', Mock()) @@ -509,7 +513,7 @@ def test_no_dcs_connection_primary_demote(self): def test_check_failsafe_topology(self): self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly')) self.ha.cluster = get_cluster_initialized_with_leader_and_failsafe() - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.dcs._last_failsafe = self.ha.cluster.failsafe self.assertEqual(self.ha.run_cycle(), 'demoting self because DCS is not accessible and I was a leader') self.ha.state_handler.name = self.ha.cluster.leader.name @@ -529,7 +533,7 @@ def test_check_failsafe_topology(self): def test_no_dcs_connection_primary_failsafe(self): self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly')) self.ha.cluster = get_cluster_initialized_with_leader_and_failsafe() - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.dcs._last_failsafe = self.ha.cluster.failsafe self.ha.state_handler.name = self.ha.cluster.leader.name self.assertEqual(self.ha.run_cycle(), @@ -546,7 +550,7 @@ def test_readonly_dcs_primary_failsafe(self): def test_no_dcs_connection_replica_failsafe(self): self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly')) self.ha.cluster = get_cluster_initialized_with_leader_and_failsafe() - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.update_failsafe({'name': 'leader', 'api_url': 'http://127.0.0.1:8008/patroni', 'conn_url': 'postgres://127.0.0.1:5432/postgres', 'slots': {'foo': 1000}}) self.p.is_primary = false @@ -589,8 +593,8 @@ def test_bootstrap_initialize_lock_failed(self): self.assertEqual(self.ha.bootstrap(), 'failed to acquire initialize lock') @patch('patroni.psycopg.connect', psycopg_connect) - @patch('patroni.postgresql.citus.connect', psycopg_connect) - @patch('patroni.postgresql.citus.quote_ident', Mock()) + @patch('patroni.postgresql.mpp.citus.connect', psycopg_connect) + @patch('patroni.postgresql.mpp.citus.quote_ident', Mock()) @patch.object(Postgresql, 'connection', Mock(return_value=None)) def test_bootstrap_initialized_new_cluster(self): self.ha.cluster = get_cluster_not_initialized_without_leader() @@ -611,8 +615,8 @@ def test_bootstrap_release_initialize_key_on_failure(self): self.assertRaises(PatroniFatalException, self.ha.post_bootstrap) @patch('patroni.psycopg.connect', psycopg_connect) - @patch('patroni.postgresql.citus.connect', psycopg_connect) - @patch('patroni.postgresql.citus.quote_ident', Mock()) + @patch('patroni.postgresql.mpp.citus.connect', psycopg_connect) + @patch('patroni.postgresql.mpp.citus.quote_ident', Mock()) @patch.object(Postgresql, 'connection', Mock(return_value=None)) def test_bootstrap_release_initialize_key_on_watchdog_failure(self): self.ha.cluster = get_cluster_not_initialized_without_leader() @@ -655,7 +659,7 @@ def test_restart(self): @patch.object(ConfigHandler, 'replace_pg_hba', Mock()) @patch.object(ConfigHandler, 'replace_pg_ident', Mock()) @patch.object(PostmasterProcess, 'start', Mock(return_value=MockPostmaster())) - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_worker_restart(self): self.ha.has_lock = true self.ha.patroni.request = Mock() @@ -690,7 +694,7 @@ def test_restart_in_progress(self): self.ha.is_paused = true self.assertEqual(self.ha.run_cycle(), 'PAUSE: restart in progress') - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_manual_failover_from_leader(self): self.ha.has_lock = true # I am the leader @@ -729,7 +733,7 @@ def test_manual_failover_from_leader(self): ('Member %s exceeds maximum replication lag', 'b')) self.ha.cluster.members.pop() - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_manual_switchover_from_leader(self): self.ha.has_lock = true # I am the leader @@ -766,11 +770,11 @@ def test_manual_switchover_from_leader(self): with patch('patroni.ha.logger.info') as mock_info: self.ha.fetch_node_status = get_node_status(wal_position=1) self.ha.cluster.config.data.update({'maximum_lag_on_failover': 5}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0), the leader with the lock') self.assertEqual(mock_info.call_args_list[0][0], ('Member %s exceeds maximum replication lag', 'leader')) - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_scheduled_switchover_from_leader(self): self.ha.has_lock = true # I am the leader @@ -1032,7 +1036,7 @@ def test_is_healthiest_node(self): def test__is_healthiest_node(self): self.p.is_primary = false self.ha.cluster = get_cluster_initialized_without_leader(sync=('postgresql1', self.p.name)) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members)) self.ha.fetch_node_status = get_node_status() # accessible, in_recovery self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members)) @@ -1049,7 +1053,7 @@ def test__is_healthiest_node(self): with patch.object(Ha, 'is_synchronous_mode', Mock(return_value=True)): self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members)) self.ha.cluster.config.data.update({'maximum_lag_on_failover': 5}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) with patch('patroni.postgresql.Postgresql.last_operation', return_value=1): self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members)) with patch('patroni.postgresql.Postgresql.replica_cached_timeline', return_value=None): @@ -1272,7 +1276,7 @@ def test_failover_immediately_on_zero_primary_start_timeout(self, demote): self.p.is_running = false self.ha.cluster = get_cluster_initialized_with_leader(sync=(self.p.name, 'other')) self.ha.cluster.config.data.update({'synchronous_mode': True, 'primary_start_timeout': 0}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.ha.has_lock = true self.ha.update_lock = true self.ha.fetch_node_status = get_node_status() # accessible, in_recovery @@ -1282,13 +1286,13 @@ def test_failover_immediately_on_zero_primary_start_timeout(self, demote): def test_primary_stop_timeout(self): self.assertEqual(self.ha.primary_stop_timeout(), None) self.ha.cluster.config.data.update({'primary_stop_timeout': 30}) - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) with patch.object(Ha, 'is_synchronous_mode', Mock(return_value=True)): self.assertEqual(self.ha.primary_stop_timeout(), 30) with patch.object(Ha, 'is_synchronous_mode', Mock(return_value=False)): self.assertEqual(self.ha.primary_stop_timeout(), None) self.ha.cluster.config.data['primary_stop_timeout'] = None - self.ha.global_config = self.ha.patroni.config.get_global_config(self.ha.cluster) + global_config.update(self.ha.cluster) self.assertEqual(self.ha.primary_stop_timeout(), None) @patch('patroni.postgresql.Postgresql.follow') @@ -1380,8 +1384,9 @@ def test_process_sync_replication(self): # Test sync set to '*' when synchronous_mode_strict is enabled mock_set_sync.reset_mock() self.p.sync_handler.current_state = Mock(return_value=(CaseInsensitiveSet(), CaseInsensitiveSet())) - with patch('patroni.config.GlobalConfig.is_synchronous_mode_strict', PropertyMock(return_value=True)): - self.ha.run_cycle() + self.ha.cluster.config.data['synchronous_mode_strict'] = True + global_config.update(self.ha.cluster) + self.ha.run_cycle() mock_set_sync.assert_called_once_with(CaseInsensitiveSet('*')) def test_sync_replication_become_primary(self): @@ -1514,7 +1519,6 @@ def test_effective_tags(self): @patch('patroni.postgresql.mtime', Mock(return_value=1588316884)) @patch('builtins.open', Mock(side_effect=Exception)) - @patch.object(Cluster, 'is_unlocked', Mock(return_value=False)) def test_restore_cluster_config(self): self.ha.cluster.config.data.clear() self.ha.has_lock = true @@ -1540,7 +1544,7 @@ def stop(*args, **kwargs): self.ha.is_failover_possible = true self.ha.shutdown() - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_shutdown_citus_worker(self): self.ha.is_leader = true self.p.is_running = Mock(side_effect=[Mock(), False]) @@ -1581,6 +1585,11 @@ def test_abort_join(self, exit_mock): self.p.is_primary = false self.ha.run_cycle() exit_mock.assert_called_once_with(1) + self.p.set_role('replica') + self.ha.dcs.initialize = Mock() + with patch.object(Postgresql, 'cb_called', PropertyMock(return_value=True)): + self.assertEqual(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock') + self.ha.dcs.initialize.assert_not_called() @patch.object(Cluster, 'is_unlocked', Mock(return_value=False)) def test_after_pause(self): @@ -1647,7 +1656,7 @@ def test_acquire_lock(self): self.assertRaises(DCSError, self.ha.acquire_lock) self.assertFalse(self.ha.acquire_lock()) - @patch('patroni.postgresql.citus.CitusHandler.is_coordinator', Mock(return_value=False)) + @patch('patroni.postgresql.mpp.AbstractMPPHandler.is_coordinator', Mock(return_value=False)) def test_notify_citus_coordinator(self): self.ha.patroni.request = Mock() self.ha.notify_citus_coordinator('before_demote') diff --git a/tests/test_kubernetes.py b/tests/test_kubernetes.py index b6db7fb31..c493d7999 100644 --- a/tests/test_kubernetes.py +++ b/tests/test_kubernetes.py @@ -8,9 +8,11 @@ import urllib3 from mock import Mock, PropertyMock, mock_open, patch +from patroni.dcs import get_dcs from patroni.dcs.kubernetes import Cluster, k8s_client, k8s_config, K8sConfig, K8sConnectionFailed, \ K8sException, K8sObject, Kubernetes, KubernetesError, KubernetesRetriableException, \ Retry, RetryFailedError, SERVICE_HOST_ENV_NAME, SERVICE_PORT_ENV_NAME +from patroni.postgresql.mpp import get_mpp from threading import Thread from . import MockResponse, SleepException @@ -225,11 +227,12 @@ class BaseTestKubernetes(unittest.TestCase): @patch.object(k8s_client.CoreV1Api, 'list_namespaced_pod', mock_list_namespaced_pod, create=True) @patch.object(k8s_client.CoreV1Api, 'list_namespaced_config_map', mock_list_namespaced_config_map, create=True) def setUp(self, config=None): - config = config or {} - config.update(ttl=30, scope='test', name='p-0', loop_wait=10, group=0, - retry_timeout=10, labels={'f': 'b'}, bypass_api_service=True) - self.k = Kubernetes(config) - self.k._citus_group = None + config = {'ttl': 30, 'scope': 'test', 'name': 'p-0', 'loop_wait': 10, 'retry_timeout': 10, + 'kubernetes': {'labels': {'f': 'b'}, 'bypass_api_service': True, **(config or {})}, + 'citus': {'group': 0, 'database': 'postgres'}} + self.k = get_dcs(config) + self.assertIsInstance(self.k, Kubernetes) + self.k._mpp = get_mpp({}) self.assertRaises(AttributeError, self.k._pods._build_cache) self.k._pods._is_ready = True self.assertRaises(TypeError, self.k._kinds._build_cache) @@ -254,7 +257,7 @@ def test_get_cluster(self): self.assertRaises(KubernetesError, self.k.get_cluster) def test__get_citus_cluster(self): - self.k._citus_group = '0' + self.k._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) cluster = self.k.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsInstance(cluster.workers[1], Cluster) @@ -466,7 +469,7 @@ class TestCacheBuilder(BaseTestKubernetes): @patch('patroni.dcs.kubernetes.ObjectCache._watch', mock_watch) @patch.object(urllib3.HTTPResponse, 'read_chunked') def test__build_cache(self, mock_read_chunked): - self.k._citus_group = '0' + self.k._mpp = get_mpp({'citus': {'group': 0, 'database': 'postgres'}}) mock_read_chunked.return_value = [json.dumps( {'type': 'MODIFIED', 'object': {'metadata': { 'name': self.k.config_path, 'resourceVersion': '2', 'annotations': {self.k._CONFIG: 'foo'}}}} diff --git a/tests/test_mpp.py b/tests/test_mpp.py new file mode 100644 index 000000000..9eb876334 --- /dev/null +++ b/tests/test_mpp.py @@ -0,0 +1,52 @@ +from typing import Any +from patroni.exceptions import PatroniException +from patroni.postgresql.mpp import AbstractMPP, get_mpp, Null + +from . import BaseTestPostgresql +from .test_ha import get_cluster_initialized_with_leader + + +class TestMPP(BaseTestPostgresql): + + def setUp(self): + super(TestMPP, self).setUp() + self.cluster = get_cluster_initialized_with_leader() + + def test_get_handler_impl_exception(self): + class DummyMPP(AbstractMPP): + def __init__(self) -> None: + super().__init__({}) + + @staticmethod + def validate_config(config: Any) -> bool: + return True + + @property + def group(self) -> None: + return None + + @property + def coordinator_group_id(self) -> None: + return None + + @property + def type(self) -> str: + return "dummy" + + mpp = DummyMPP() + self.assertRaises(PatroniException, mpp.get_handler_impl, self.p) + + def test_null_handler(self): + config = {} + mpp = get_mpp(config) + self.assertIsInstance(mpp, Null) + self.assertIsNone(mpp.group) + self.assertTrue(mpp.validate_config(config)) + nullHandler = mpp.get_handler_impl(self.p) + self.assertIsNone(nullHandler.handle_event(self.cluster, {})) + self.assertIsNone(nullHandler.sync_meta_data(self.cluster)) + self.assertIsNone(nullHandler.on_demote()) + self.assertIsNone(nullHandler.schedule_cache_rebuild()) + self.assertIsNone(nullHandler.bootstrap()) + self.assertIsNone(nullHandler.adjust_postgres_gucs({})) + self.assertFalse(nullHandler.ignore_replication_slot({})) diff --git a/tests/test_patroni.py b/tests/test_patroni.py index 19497ab54..2f8428b1c 100644 --- a/tests/test_patroni.py +++ b/tests/test_patroni.py @@ -45,7 +45,7 @@ class MockFrozenImporter(object): @patch('time.sleep', Mock()) @patch('subprocess.call', Mock(return_value=0)) @patch('patroni.psycopg.connect', psycopg_connect) -@patch('urllib3.connection.HTTPConnection.connect', Mock(side_effect=Exception)) +@patch('urllib3.PoolManager.request', Mock(side_effect=Exception)) @patch.object(ConfigHandler, 'append_pg_hba', Mock()) @patch.object(ConfigHandler, 'write_postgresql_conf', Mock()) @patch.object(ConfigHandler, 'write_recovery_conf', Mock()) @@ -69,7 +69,7 @@ def test_validate_config(self): self.assertRaises(SystemExit, _main) @patch('pkgutil.iter_importers', Mock(return_value=[MockFrozenImporter()])) - @patch('urllib3.connection.HTTPConnection.connect', Mock(side_effect=Exception)) + @patch('urllib3.PoolManager.request', Mock(side_effect=Exception)) @patch('sys.frozen', Mock(return_value=True), create=True) @patch.object(HTTPServer, '__init__', Mock()) @patch.object(etcd.Client, 'read', etcd_read) @@ -154,6 +154,7 @@ def test_run(self): self.p.api.start = Mock() self.p.logger.start = Mock() self.p.config._dynamic_configuration = {} + self.assertRaises(SleepException, self.p.run) with patch('patroni.dcs.Cluster.is_unlocked', Mock(return_value=True)): self.assertRaises(SleepException, self.p.run) with patch('patroni.config.Config.reload_local_configuration', Mock(return_value=False)): @@ -174,6 +175,20 @@ def test_schedule_next_run(self): self.p.next_run = time.time() - self.p.dcs.loop_wait - 1 self.p.schedule_next_run() + def test__filter_tags(self): + tags = {'noloadbalance': False, 'clonefrom': False, 'nosync': False, 'smth': 'random'} + self.assertEqual(self.p._filter_tags(tags), {'smth': 'random'}) + + tags['clonefrom'] = True + tags['smth'] = False + self.assertEqual(self.p._filter_tags(tags), {'clonefrom': True, 'smth': False}) + + tags = {'nofailover': False, 'failover_priority': 0} + self.assertEqual(self.p._filter_tags(tags), tags) + + tags = {'nofailover': True, 'failover_priority': 1} + self.assertEqual(self.p._filter_tags(tags), tags) + def test_noloadbalance(self): self.p.tags['noloadbalance'] = True self.assertTrue(self.p.noloadbalance) @@ -185,9 +200,11 @@ def test_nofailover(self): # Setting `nofailover: True` has precedence (True, 0, True), (True, 1, True), + ('False', 1, True), # because we use bool() for the value # Similarly, setting `nofailover: False` has precedence (False, 0, False), (False, 1, False), + ('', 0, False), # Only when we have `nofailover: None` should we got based on priority (None, 0, True), (None, 1, False), @@ -273,8 +290,8 @@ def test_ensure_unique_name(self): ) with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=bad_cluster)): # If the api of the running node cannot be reached, this implies unique name - with patch('urllib3.connection.HTTPConnection.connect', Mock(side_effect=ConnectionError)): + with patch('urllib3.PoolManager.request', Mock(side_effect=ConnectionError)): self.assertIsNone(self.p.ensure_unique_name()) # Only if the api of the running node is reachable do we throw an error - with patch('urllib3.connection.HTTPConnection.connect', Mock()): + with patch('urllib3.PoolManager.request', Mock()): self.assertRaises(SystemExit, self.p.ensure_unique_name) diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py index cdd0c160b..8a5c491b8 100644 --- a/tests/test_postgresql.py +++ b/tests/test_postgresql.py @@ -5,13 +5,14 @@ import subprocess import time +from copy import deepcopy from mock import Mock, MagicMock, PropertyMock, patch, mock_open import patroni.psycopg as psycopg +from patroni import global_config from patroni.async_executor import CriticalTask from patroni.collections import CaseInsensitiveSet -from patroni.config import GlobalConfig from patroni.dcs import RemoteMember from patroni.exceptions import PostgresConnectionException, PatroniException from patroni.postgresql import Postgresql, STATE_REJECT, STATE_NO_RESPONSE @@ -25,7 +26,8 @@ from patroni.utils import RetryFailedError from threading import Thread, current_thread -from . import BaseTestPostgresql, MockCursor, MockPostmaster, psycopg_connect, mock_available_gucs +from . import (BaseTestPostgresql, MockCursor, MockPostmaster, psycopg_connect, mock_available_gucs, + GET_PG_SETTINGS_RESULT) mtime_ret = {} @@ -559,31 +561,103 @@ def test_replica_method_can_work_without_replication_connection(self): @patch('time.sleep', Mock()) @patch.object(Postgresql, 'is_running', Mock(return_value=True)) - def test_reload_config(self): - parameters = self._PARAMETERS.copy() - parameters.pop('f.oo') - parameters['wal_buffers'] = '512' - config = {'pg_hba': [''], 'pg_ident': [''], 'use_unix_socket': True, 'use_unix_socket_repl': True, - 'authentication': {}, - 'retry_timeout': 10, 'listen': '*', 'krbsrvname': 'postgres', 'parameters': parameters} + @patch('patroni.postgresql.config.logger.info') + @patch('patroni.postgresql.config.logger.warning') + def test_reload_config(self, mock_warning, mock_info): + config = deepcopy(self.p.config._config) + + # Nothing changed + self.p.reload_config(config) + mock_info.assert_called_once_with('No PostgreSQL configuration items changed, nothing to reload.') + mock_warning.assert_not_called() + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # Handle wal_buffers + self.p.config._config['parameters']['wal_buffers'] = '512' self.p.reload_config(config) - parameters['b.ar'] = 'bar' - with patch.object(MockCursor, 'fetchall', - Mock(side_effect=[[('wal_block_size', '8191', None, 'integer', 'internal'), - ('wal_segment_size', '2048', '8kB', 'integer', 'internal'), - ('shared_buffers', '16384', '8kB', 'integer', 'postmaster'), - ('wal_buffers', '-1', '8kB', 'integer', 'postmaster'), - ('port', '5433', None, 'integer', 'postmaster')], Exception])): + mock_info.assert_called_once_with('No PostgreSQL configuration items changed, nothing to reload.') + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + config = deepcopy(self.p.config._config) + + # hba/ident_changed + config['pg_hba'] = [''] + config['pg_ident'] = [''] + self.p.reload_config(config) + mock_info.assert_called_once_with('Reloading PostgreSQL configuration.') + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # Postmaster parameter change (pending_restart) + init_max_worker_processes = config['parameters']['max_worker_processes'] + config['parameters']['max_worker_processes'] *= 2 + with patch('patroni.postgresql.Postgresql._query', Mock(side_effect=[GET_PG_SETTINGS_RESULT, [(1,)]])): self.p.reload_config(config) - parameters['autovacuum'] = 'on' + self.assertEqual(mock_info.call_args_list[0][0], ('Changed %s from %s to %s (restart might be required)', + 'max_worker_processes', str(init_max_worker_processes), + config['parameters']['max_worker_processes'])) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, True) + + mock_info.reset_mock() + + # Reset to the initial value without restart + config['parameters']['max_worker_processes'] = init_max_worker_processes + self.p.reload_config(config) + self.assertEqual(mock_info.call_args_list[0][0], ('Changed %s from %s to %s', 'max_worker_processes', + init_max_worker_processes * 2, + str(config['parameters']['max_worker_processes']))) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # User-defined parameter changed (removed) + config['parameters'].pop('f.oo') self.p.reload_config(config) - parameters['autovacuum'] = 'off' - parameters.pop('search_path') - config['listen'] = '*:5433' + self.assertEqual(mock_info.call_args_list[0][0], ('Changed %s from %s to %s', 'f.oo', 'bar', None)) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, False) + + mock_info.reset_mock() + + # Non-postmaster parameter change + config['parameters']['autovacuum'] = 'off' self.p.reload_config(config) - parameters['unix_socket_directories'] = '.' + self.assertEqual(mock_info.call_args_list[0][0], ("Changed %s from %s to %s", 'autovacuum', 'on', 'off')) + self.assertEqual(mock_info.call_args_list[1][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, False) + + config['parameters']['autovacuum'] = 'on' + mock_info.reset_mock() + + # Remove invalid parameter + config['parameters']['invalid'] = 'value' self.p.reload_config(config) - self.p.config.resolve_connection_addresses() + self.assertEqual(mock_warning.call_args_list[0][0], + ('Removing invalid parameter `%s` from postgresql.parameters', 'invalid')) + config['parameters'].pop('invalid') + + mock_warning.reset_mock() + mock_info.reset_mock() + + # Non-empty result (outside changes) and exception while querying pending_restart parameters + with patch('patroni.postgresql.Postgresql._query', + Mock(side_effect=[GET_PG_SETTINGS_RESULT, [(1,)], GET_PG_SETTINGS_RESULT, Exception])): + self.p.reload_config(config, True) + self.assertEqual(mock_info.call_args_list[0][0], ('Reloading PostgreSQL configuration.',)) + self.assertEqual(self.p.pending_restart, True) + + # Invalid values, just to increase silly coverage in postgresql.validator. + # One day we will have proper tests there. + config['parameters']['autovacuum'] = 'of' # Bool.transform() + config['parameters']['vacuum_cost_limit'] = 'smth' # Number.transform() + self.p.reload_config(config, True) + self.assertEqual(mock_warning.call_args_list[-1][0][0], 'Exception %r when running query') def test_resolve_connection_addresses(self): self.p.config._config['use_unix_socket'] = self.p.config._config['use_unix_socket_repl'] = True @@ -692,12 +766,12 @@ def time_in_state(*args): def test_get_server_parameters(self): config = {'parameters': {'wal_level': 'hot_standby', 'max_prepared_transactions': 100}, 'listen': '0'} - self.p._global_config = GlobalConfig({'synchronous_mode': True}) - self.p.config.get_server_parameters(config) - self.p._global_config = GlobalConfig({'synchronous_mode': True, 'synchronous_mode_strict': True}) - self.p.config.get_server_parameters(config) - self.p.config.set_synchronous_standby_names('foo') - self.assertTrue(str(self.p.config.get_server_parameters(config)).startswith(' None: ... def __setitem__(self, key, val) -> None: ...