From 23e36b54c07e4d632e7891a7a3a4cbeb40712816 Mon Sep 17 00:00:00 2001 From: pert5432 <63555269+pert5432@users.noreply.github.com> Date: Thu, 12 Sep 2024 23:32:51 +0200 Subject: [PATCH] feat: Postgis support in ParadeDB (#1636) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: pert5432 <63555269+pert5432@users.noreply.github.com> Co-authored-by: Philippe Noël <21990816+philippemnoel@users.noreply.github.com> --- README.md | 2 +- docker/Dockerfile | 13 +++++++++++-- docker/bootstrap.sh | 8 ++++++++ docs/deploy/extensions.mdx | 26 +++++++++++++++++--------- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7f3bd63bf5..8389a9a350 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ ParadeDB is currently in Public Beta. Star and watch this repository to get noti - [x] Docker image based on [Postgres](https://hub.docker.com/_/postgres) ([see deployment instructions](https://docs.paradedb.com/deploy/aws)) - [x] Kubernetes Helm chart based on [CloudNativePG](https://artifacthub.io/packages/helm/cloudnative-pg/cloudnative-pg) ([see deployment instructions](https://docs.paradedb.com/deploy/helm)) - [x] Specialized Workloads - - [ ] Support for geospatial data with [PostGIS](https://github.com/postgis/postgis) + - [x] Support for geospatial data with [PostGIS](https://github.com/postgis/postgis) - [x] Support for cron jobs with [pg_cron](https://github.com/citusdata/pg_cron) - [x] Support for basic incremental view maintenance (IVM) via [pg_ivm](https://github.com/sraoss/pg_ivm) diff --git a/docker/Dockerfile b/docker/Dockerfile index ffa80e4c72..4c00231451 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -175,6 +175,7 @@ ENV PG_VERSION_MAJOR=${PG_VERSION_MAJOR} SHELL ["/bin/bash", "-o", "pipefail", "-c", "-e"] + # Copy third-party extensions from their builder stages COPY --from=builder-pgvector /tmp/pgvector/*.so /usr/lib/postgresql/${PG_VERSION_MAJOR}/lib/ COPY --from=builder-pgvector /tmp/pgvector/*.control /usr/share/postgresql/${PG_VERSION_MAJOR}/extension/ @@ -220,8 +221,16 @@ RUN apt-get update && \ find /var/cache -type f -exec truncate --size 0 {} \; && \ find /var/log -type f -exec truncate --size 0 {} \; -# This is required for TLS connection to PostHog, which is used for telemetry. -RUN apt-get update && apt-get install -y --no-install-recommends \ +# Install Postgis and ca-certificates +# ca-certificates required for TLS connection to PostHog (which is used for telemetry) and PostGIS +ENV POSTGIS_MAJOR 3 +ENV POSTGIS_VERSION 3.4.2+dfsg-1.pgdg120+1 + +RUN apt-get update \ + && apt-cache showpkg postgresql-$PG_VERSION_MAJOR-postgis-$POSTGIS_MAJOR \ + && apt-get install -y --no-install-recommends \ + postgresql-$PG_VERSION_MAJOR-postgis-$POSTGIS_MAJOR=$POSTGIS_VERSION \ + postgresql-$PG_VERSION_MAJOR-postgis-$POSTGIS_MAJOR-scripts \ ca-certificates \ && rm -rf /var/lib/apt/lists/* && \ update-ca-certificates diff --git a/docker/bootstrap.sh b/docker/bootstrap.sh index 2047447df9..adff20039b 100755 --- a/docker/bootstrap.sh +++ b/docker/bootstrap.sh @@ -25,6 +25,14 @@ for DB in template_paradedb "$POSTGRES_DB"; do CREATE EXTENSION IF NOT EXISTS pg_ivm; CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS vectorscale; + + CREATE EXTENSION IF NOT EXISTS postgis; + CREATE EXTENSION IF NOT EXISTS postgis_topology; + -- Reconnect to update pg_setting.resetval + -- See https://github.com/postgis/docker-postgis/issues/288 + \c + CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; + CREATE EXTENSION IF NOT EXISTS postgis_tiger_geocoder; EOSQL done diff --git a/docs/deploy/extensions.mdx b/docs/deploy/extensions.mdx index b9ad16c49a..a697f8e998 100644 --- a/docs/deploy/extensions.mdx +++ b/docs/deploy/extensions.mdx @@ -7,6 +7,7 @@ Postgres has a rich ecosystem of extensions. To keep the size of the ParadeDB Do - `pg_search` for full text search - `pg_analytics` for fast queries over data lakes - `pgvector` for vector search +- `postgis` for geospatial search - `pg_ivm` for incremental materialized views - `pg_cron` for cron jobs @@ -27,7 +28,7 @@ The process for installing an extension varies by extension. Generally speaking, - Run `CREATE EXTENSION ` We recommend installing third party extensions from prebuilt binaries to keep the image size small. As an -example, let's install [PostGIS](https://github.com/postgis/postgis), an extension for geospatial data. +example, let's install [pg_partman](https://github.com/pgpartman/pg_partman), an extension for managing table partition sets. ### Install Prebuilt Binaries @@ -41,12 +42,12 @@ docker exec -it --user root paradedb bash This command assumes that your ParadeDB container name is `paradedb`. -Next, install the [prebuilt binaries](https://postgis.net/documentation/getting_started/install_ubuntu/). +Next, install the [prebuilt binaries](https://pkgs.org/search/?q=partman). Most popular Postgres extensions can be installed with `apt-get install`. ```bash apt-get update -apt-get install -y --no-install-recommends postgis postgresql-16-postgis-3 +apt-get install -y --no-install-recommends postgresql-16-partman ``` @@ -57,14 +58,19 @@ apt-get install -y --no-install-recommends postgis postgresql-16-postgis-3 ### Add to `shared_preload_libraries` -PostGIS does not need to be added to `shared_preload_libraries` in `postgresql.conf`, so we skip this step. - + If you are installing an extension which requires this step, you can do so -via the following command, replacing `` by your extension's name: +via the following command, replacing `` with your extension's name: ```bash -sed -i "/^shared_preload_libraries/s/'\([^']*\)'/'\1,'/" /var/lib/postgresql/data/postgresql.conf` +sed -i "/^shared_preload_libraries/s/'\([^']*\)'/'\1,'/" /var/lib/postgresql/data/postgresql.conf +``` + +For `pg_partman`, the command is: + +```bash +sed -i "/^shared_preload_libraries/s/'\([^']*\)'/'\1,pg_partman_bgw'/" /var/lib/postgresql/data/postgresql.conf ``` @@ -76,7 +82,9 @@ Postgres must be restarted afterwards. We recommend simply restarting the Docker Connect to ParadeDB via `psql` and create the extension. ```sql -CREATE EXTENSION postgis; +CREATE EXTENSION pg_partman; ``` -PostGIS is now ready to use! +pg_partman is now ready to use! + +Note that this is a simple example of installing `pg_partman`. The full list of settings and optional dependencies can be found in the [official installation instructions](https://github.com/pgpartman/pg_partman?tab=readme-ov-file#installation).