Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase version requirements for PostgreSQL and PostGIS #3591

Merged
merged 4 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/build-nominatim/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite-dev libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 psycopg==3.1.7 datrie asyncpg aiosqlite
else
Expand Down
37 changes: 9 additions & 28 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,16 @@ jobs:
needs: create-archive
strategy:
matrix:
flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
flavour: ["ubuntu-20", "ubuntu-24"]
include:
- flavour: oldstuff
ubuntu: 20
postgresql: '9.6'
postgis: '2.5'
lua: '5.1'
- flavour: ubuntu-20
ubuntu: 20
postgresql: 13
postgresql: 12
postgis: 3
lua: '5.3'
- flavour: ubuntu-22
ubuntu: 22
postgresql: 15
lua: '5.1'
- flavour: ubuntu-24
ubuntu: 24
postgresql: 17
postgis: 3
lua: '5.3'

Expand Down Expand Up @@ -80,37 +75,25 @@ jobs:
flavour: ${{ matrix.flavour }}
lua: ${{ matrix.lua }}

- name: Install test prerequisites (behave from apt)
run: sudo apt-get install -y -qq python3-behave
if: matrix.flavour == 'ubuntu-20'

- name: Install test prerequisites (behave from pip)
- name: Install test prerequisites
run: pip3 install behave==1.2.6
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')

- name: Install test prerequisites (from apt for Ununtu 2x)
- name: Install test prerequisites
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
if: matrix.flavour != 'oldstuff'

- name: Install newer pytest-asyncio
run: pip3 install -U pytest-asyncio
if: matrix.flavour == 'ubuntu-20'

- name: Install test prerequisites (from pip for Ubuntu 18)
run: pip3 install pytest pytest-asyncio uvicorn
if: matrix.flavour == 'oldstuff'

- name: Install Python webservers
run: pip3 install falcon starlette asgi_lifespan

- name: Install latest flake8
run: pip3 install -U flake8
if: matrix.flavour == 'ubuntu-22'

- name: Python linting
run: python3 -m flake8 src
working-directory: Nominatim
if: matrix.flavour == 'ubuntu-22'

- name: Python unit tests
run: python3 -m pytest test/python
Expand All @@ -124,12 +107,10 @@ jobs:

- name: Install mypy and typechecking info
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
if: matrix.flavour != 'oldstuff'

- name: Python static typechecking
run: python3 -m mypy --strict src
run: python3 -m mypy --strict --python-version 3.8 src
working-directory: Nominatim
if: matrix.flavour != 'oldstuff'

install:
runs-on: ubuntu-latest
Expand Down
9 changes: 2 additions & 7 deletions docs/admin/Installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,10 @@ and can't offer support.

### Software

!!! Warning
For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
otherwise import and queries will be slow to the point of being unusable.
Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.

For running Nominatim:

* [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
* [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
* [PostgreSQL](https://www.postgresql.org) (12+ will work, 13+ strongly recommended)
* [PostGIS](https://postgis.net) (3.0+ will work, 3.2+ strongly recommended)
* [osm2pgsql](https://osm2pgsql.org) (1.8+, optional when building with CMake)
* [Python 3](https://www.python.org/) (3.7+)

Expand Down
24 changes: 10 additions & 14 deletions lib-sql/indices.sql
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,14 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode
---
CREATE INDEX IF NOT EXISTS idx_search_name_centroid
ON search_name USING GIST (centroid) {{db.tablespace.search_index}};

{% if postgres.has_index_non_key_column %}
---
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
ON placex USING btree (parent_place_id)
INCLUDE (housenumber) {{db.tablespace.search_index}}
WHERE housenumber is not null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
ON location_property_osmline USING btree(parent_place_id)
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
WHERE startnumber is not null;
{% endif %}

---
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
ON placex USING btree (parent_place_id)
INCLUDE (housenumber) {{db.tablespace.search_index}}
WHERE housenumber is not null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
ON location_property_osmline USING btree(parent_place_id)
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
WHERE startnumber is not null;
{% endif %}
6 changes: 3 additions & 3 deletions lib-sql/tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,21 @@ CREATE INDEX idx_placex_geometry_address_area_candidates ON placex

-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');

-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
CREATE INDEX idx_placex_geometry_placenode ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26
and class = 'place' and type != 'postcode';

-- Usage: - is node part of a way?
-- - find parent of interpolation spatially
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and rank_search >= 26;

-- Usage: - linking place nodes by wikidata tag to boundaries
Expand Down
2 changes: 0 additions & 2 deletions lib-sql/tiger_import_finish.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
--index only on parent_place_id
CREATE INDEX IF NOT EXISTS idx_location_property_tiger_parent_place_id_imp
ON location_property_tiger_import (parent_place_id)
{% if postgres.has_index_non_key_column %}
INCLUDE (startnumber, endnumber, step)
{% endif %}
{{db.tablespace.aux_index}};
CREATE UNIQUE INDEX IF NOT EXISTS idx_location_property_tiger_place_id_imp
ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}};
Expand Down
18 changes: 8 additions & 10 deletions src/nominatim_api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,19 +138,17 @@ def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
async with engine.begin() as conn:
result = await conn.scalar(sa.text('SHOW server_version_num'))
server_version = int(result)
if server_version >= 110000:
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
await conn.execute(sa.text(
"SET max_parallel_workers_per_gather TO '0'"))
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
await conn.execute(sa.text(
"SET max_parallel_workers_per_gather TO '0'"))
except (PGCORE_ERROR, sa.exc.OperationalError):
server_version = 0

if server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")

self._property_cache['DB:server_version'] = server_version

Expand Down
11 changes: 2 additions & 9 deletions src/nominatim_db/db/sql_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import jinja2

from .connection import Connection, server_version_tuple, postgis_version_tuple
from .connection import Connection
from ..config import Configuration
from ..db.query_pool import QueryPool

Expand Down Expand Up @@ -69,14 +69,7 @@ def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
""" Set up a dictionary with various optional Postgresql/Postgis features that
depend on the database version.
"""
pg_version = server_version_tuple(conn)
postgis_version = postgis_version_tuple(conn)
pg11plus = pg_version >= (11, 0, 0)
ps3 = postgis_version >= (3, 0)
return {
'has_index_non_key_column': pg11plus,
'spgist_geom': 'SPGIST' if pg11plus and ps3 else 'GIST'
}
return {}


class SQLPreprocessor:
Expand Down
107 changes: 32 additions & 75 deletions src/nominatim_db/tokenizer/icu_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from psycopg.types.json import Jsonb
from psycopg import sql as pysql

from ..db.connection import connect, Connection, Cursor, server_version_tuple, \
from ..db.connection import connect, Connection, Cursor, \
drop_tables, table_exists, execute_scalar
from ..config import Configuration
from ..db.sql_preprocessor import SQLPreprocessor
Expand Down Expand Up @@ -110,80 +110,37 @@ def update_statistics(self, config: Configuration, threads: int = 2) -> None:
cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
.format(pysql.Literal(min(threads, 6),)))

if server_version_tuple(conn) < (12, 0):
LOG.info('Computing word frequencies')
drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
cur.execute("""CREATE TEMP TABLE word_frequencies AS
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute('CREATE INDEX ON word_frequencies(id)')
cur.execute("""CREATE TEMP TABLE addressword_frequencies AS
SELECT unnest(nameaddress_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute('CREATE INDEX ON addressword_frequencies(id)')
cur.execute("""
CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
INOUT info JSONB)
AS $$
DECLARE rec RECORD;
BEGIN
IF info is null THEN
info = '{}'::jsonb;
END IF;
FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
LOOP
info = info || jsonb_build_object('count', rec.count);
END LOOP;
FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
LOOP
info = info || jsonb_build_object('addr_count', rec.count);
END LOOP;
IF info = '{}'::jsonb THEN
info = null;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
""")
LOG.info('Update word table with recomputed frequencies')
drop_tables(conn, 'tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
word_freq_update(word_id, info) as info
FROM word
""")
drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
else:
LOG.info('Computing word frequencies')
drop_tables(conn, 'word_frequencies')
cur.execute("""
CREATE TEMP TABLE word_frequencies AS
WITH word_freq AS MATERIALIZED (
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id),
addr_freq AS MATERIALIZED (
SELECT unnest(nameaddress_vector) as id, count(*)
FROM search_name GROUP BY id)
SELECT coalesce(a.id, w.id) as id,
(CASE WHEN w.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('count', w.count) END
||
CASE WHEN a.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('addr_count', a.count) END) as info
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
""")
cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
cur.execute('ANALYSE word_frequencies')
LOG.info('Update word table with recomputed frequencies')
drop_tables(conn, 'tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
(CASE WHEN wf.info is null THEN word.info
ELSE coalesce(word.info, '{}'::jsonb) || wf.info
END) as info
FROM word LEFT JOIN word_frequencies wf
ON word.word_id = wf.id
""")
drop_tables(conn, 'word_frequencies')
LOG.info('Computing word frequencies')
drop_tables(conn, 'word_frequencies')
cur.execute("""
CREATE TEMP TABLE word_frequencies AS
WITH word_freq AS MATERIALIZED (
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id),
addr_freq AS MATERIALIZED (
SELECT unnest(nameaddress_vector) as id, count(*)
FROM search_name GROUP BY id)
SELECT coalesce(a.id, w.id) as id,
(CASE WHEN w.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('count', w.count) END
||
CASE WHEN a.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('addr_count', a.count) END) as info
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
""")
cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
cur.execute('ANALYSE word_frequencies')
LOG.info('Update word table with recomputed frequencies')
drop_tables(conn, 'tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
(CASE WHEN wf.info is null THEN word.info
ELSE coalesce(word.info, '{}'::jsonb) || wf.info
END) as info
FROM word LEFT JOIN word_frequencies wf
ON word.word_id = wf.id
""")
drop_tables(conn, 'word_frequencies')

with conn.cursor() as cur:
cur.execute('SET max_parallel_workers_per_gather TO 0')
Expand Down
9 changes: 4 additions & 5 deletions src/nominatim_db/tools/check_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from textwrap import dedent

from ..config import Configuration
from ..db.connection import connect, Connection, server_version_tuple, \
from ..db.connection import connect, Connection, \
index_exists, table_exists, execute_scalar
from ..db import properties
from ..errors import UsageError
Expand Down Expand Up @@ -121,10 +121,9 @@ def _get_indexes(conn: Connection) -> List[str]:
if table_exists(conn, 'search_name'):
indexes.extend(('idx_search_name_nameaddress_vector',
'idx_search_name_name_vector',
'idx_search_name_centroid'))
if server_version_tuple(conn) >= (11, 0, 0):
indexes.extend(('idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))
'idx_search_name_centroid',
'idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))

# These won't exist if --no-updates import was used
if table_exists(conn, 'place'):
Expand Down
5 changes: 1 addition & 4 deletions src/nominatim_db/tools/database_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,7 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')

postgis_version = postgis_version_tuple(conn)
if postgis_version[0] >= 3:
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')

conn.commit()

Expand Down
Loading