Skip to content

Commit

Permalink
Merge pull request #3487 from lonvia/port-to-psycopg3
Browse files Browse the repository at this point in the history
Move importer code to psycopg3
  • Loading branch information
lonvia authored Jul 29, 2024
2 parents e3353de + 253dc7d commit 0add25e
Show file tree
Hide file tree
Showing 66 changed files with 1,136 additions and 1,686 deletions.
4 changes: 2 additions & 2 deletions .github/actions/build-nominatim/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ runs:
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
pip3 install MarkupSafe==2.0.1 python-dotenv jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 psycopg==3.1.7 datrie asyncpg aiosqlite
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-dotenv python3-yaml
pip3 install sqlalchemy psycopg aiosqlite
fi
shell: bash
Expand Down
10 changes: 4 additions & 6 deletions docs/admin/Installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,15 @@ For running Nominatim:

Furthermore the following Python libraries are required:

* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Psycopg3](https://www.psycopg.org)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
* one of
* [psycopg3](https://www.psycopg.org)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)
* [datrie](https://github.com/pytries/datrie)

These will be installed automatically, when using pip installation.
These will be installed automatically when using pip installation.

When using legacy CMake-based installation:

Expand All @@ -69,6 +65,8 @@ For running continuous updates:

For running the Python frontend:

* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+, only when using SQLAlchemy < 2.0)
* one of the following web frameworks:
* [falcon](https://falconframework.org/) (3.0+)
* [starlette](https://www.starlette.io/)
Expand Down
2 changes: 1 addition & 1 deletion lib-sql/indices.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
-- Index is needed during import but can be dropped as soon as a full
-- geometry index is in place. The partial index is almost as big as the full
-- index.
---
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
---
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
Expand Down Expand Up @@ -60,7 +61,6 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode
---
DROP INDEX IF EXISTS idx_placex_geometry_address_area_candidates;
DROP INDEX IF EXISTS idx_placex_geometry_buildings;
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
DROP INDEX IF EXISTS idx_placex_wikidata;
DROP INDEX IF EXISTS idx_placex_rank_address_sector;
DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector;
Expand Down
2 changes: 1 addition & 1 deletion packaging/nominatim-db/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ classifiers = [
"Operating System :: OS Independent",
]
dependencies = [
"psycopg2-binary",
"psycopg",
"python-dotenv",
"jinja2",
"pyYAML>=5.1",
Expand Down
14 changes: 7 additions & 7 deletions src/nominatim_api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""
Implementation of classes for API access via libraries.
"""
from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple, cast
import asyncio
import sys
import contextlib
Expand Down Expand Up @@ -107,16 +107,16 @@ async def setup_database(self) -> None:
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
else:
dsn = self.config.get_database_params()
query = {k: v for k, v in dsn.items()
query = {k: str(v) for k, v in dsn.items()
if k not in ('user', 'password', 'dbname', 'host', 'port')}

dburl = sa.engine.URL.create(
f'postgresql+{PGCORE_LIB}',
database=dsn.get('dbname'),
username=dsn.get('user'),
password=dsn.get('password'),
host=dsn.get('host'),
port=int(dsn['port']) if 'port' in dsn else None,
database=cast(str, dsn.get('dbname')),
username=cast(str, dsn.get('user')),
password=cast(str, dsn.get('password')),
host=cast(str, dsn.get('host')),
port=int(cast(str, dsn['port'])) if 'port' in dsn else None,
query=query)

engine = sa_asyncio.create_async_engine(dburl, **extra_args)
Expand Down
35 changes: 22 additions & 13 deletions src/nominatim_db/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import sys
import argparse
import asyncio
from pathlib import Path

from .config import Configuration
Expand Down Expand Up @@ -170,22 +171,30 @@ def run(self, args: NominatimArgs) -> int:
raise UsageError("PHP frontend not configured.")
run_php_server(args.server, args.project_dir / 'website')
else:
import uvicorn # pylint: disable=import-outside-toplevel
server_info = args.server.split(':', 1)
host = server_info[0]
if len(server_info) > 1:
if not server_info[1].isdigit():
raise UsageError('Invalid format for --server parameter. Use <host>:<port>')
port = int(server_info[1])
else:
port = 8088
asyncio.run(self.run_uvicorn(args))

server_module = importlib.import_module(f'nominatim_api.server.{args.engine}.server')
return 0

app = server_module.get_application(args.project_dir)
uvicorn.run(app, host=host, port=port)

return 0
async def run_uvicorn(self, args: NominatimArgs) -> None:
import uvicorn # pylint: disable=import-outside-toplevel

server_info = args.server.split(':', 1)
host = server_info[0]
if len(server_info) > 1:
if not server_info[1].isdigit():
raise UsageError('Invalid format for --server parameter. Use <host>:<port>')
port = int(server_info[1])
else:
port = 8088

server_module = importlib.import_module(f'nominatim_api.server.{args.engine}.server')

app = server_module.get_application(args.project_dir)

config = uvicorn.Config(app, host=host, port=port)
server = uvicorn.Server(config)
await server.serve()


def get_set_parser() -> CommandlineParser:
Expand Down
23 changes: 16 additions & 7 deletions src/nominatim_db/clicmd/add_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import cast
import argparse
import logging
import asyncio

import psutil

Expand Down Expand Up @@ -64,15 +65,10 @@ def add_args(self, parser: argparse.ArgumentParser) -> None:


def run(self, args: NominatimArgs) -> int:
from ..tokenizer import factory as tokenizer_factory
from ..tools import tiger_data, add_osm_data
from ..tools import add_osm_data

if args.tiger_data:
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
return tiger_data.add_tiger_data(args.tiger_data,
args.config,
args.threads or psutil.cpu_count() or 1,
tokenizer)
return asyncio.run(self._add_tiger_data(args))

osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
if args.file or args.diff:
Expand All @@ -99,3 +95,16 @@ def run(self, args: NominatimArgs) -> int:
osm2pgsql_params)

return 0


async def _add_tiger_data(self, args: NominatimArgs) -> int:
from ..tokenizer import factory as tokenizer_factory
from ..tools import tiger_data

assert args.tiger_data

tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
return await tiger_data.add_tiger_data(args.tiger_data,
args.config,
args.threads or psutil.cpu_count() or 1,
tokenizer)
4 changes: 2 additions & 2 deletions src/nominatim_db/clicmd/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import random

from ..errors import UsageError
from ..db.connection import connect
from ..db.connection import connect, table_exists
from .args import NominatimArgs

# Do not repeat documentation of subcommand classes.
Expand Down Expand Up @@ -115,7 +115,7 @@ def _warm(self, args: NominatimArgs) -> int:

tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
with connect(args.config.get_libpq_dsn()) as conn:
if conn.table_exists('search_name'):
if table_exists(conn, 'search_name'):
words = tokenizer.most_frequent_words(conn, 1000)
else:
words = []
Expand Down
34 changes: 21 additions & 13 deletions src/nominatim_db/clicmd/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Implementation of the 'index' subcommand.
"""
import argparse
import asyncio

import psutil

Expand Down Expand Up @@ -44,23 +45,30 @@ def add_args(self, parser: argparse.ArgumentParser) -> None:


def run(self, args: NominatimArgs) -> int:
from ..indexer.indexer import Indexer
from ..tokenizer import factory as tokenizer_factory

tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)

indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)

if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
indexer.index_postcodes()
asyncio.run(self._do_index(args))

if not args.no_boundaries and not args.boundaries_only \
and args.minrank == 0 and args.maxrank == 30:
with connect(args.config.get_libpq_dsn()) as conn:
status.set_indexed(conn, True)

return 0


async def _do_index(self, args: NominatimArgs) -> None:
from ..tokenizer import factory as tokenizer_factory

tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
from ..indexer.indexer import Indexer

indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)

has_pending = True # run at least once
while has_pending:
if not args.no_boundaries:
await indexer.index_boundaries(args.minrank, args.maxrank)
if not args.boundaries_only:
await indexer.index_by_rank(args.minrank, args.maxrank)
await indexer.index_postcodes()
has_pending = indexer.has_pending()
7 changes: 4 additions & 3 deletions src/nominatim_db/clicmd/refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
import argparse
import logging
from pathlib import Path
import asyncio

from ..config import Configuration
from ..db.connection import connect
from ..db.connection import connect, table_exists
from ..tokenizer.base import AbstractTokenizer
from .args import NominatimArgs

Expand Down Expand Up @@ -99,7 +100,7 @@ def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, t
args.project_dir, tokenizer)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_postcodes()
asyncio.run(indexer.index_postcodes())
else:
LOG.error("The place table doesn't exist. "
"Postcode updates on a frozen database is not possible.")
Expand All @@ -124,7 +125,7 @@ def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, t
with connect(args.config.get_libpq_dsn()) as conn:
# If the table did not exist before, then the importance code
# needs to be enabled.
if not conn.table_exists('secondary_importance'):
if not table_exists(conn, 'secondary_importance'):
args.functions = True

LOG.warning('Import secondary importance raster data from %s', args.project_dir)
Expand Down
10 changes: 5 additions & 5 deletions src/nominatim_db/clicmd/replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import socket
import time
import asyncio

from ..db import status
from ..db.connection import connect
Expand Down Expand Up @@ -123,7 +124,7 @@ def _compute_update_interval(self, args: NominatimArgs) -> int:
return update_interval


def _update(self, args: NominatimArgs) -> None:
async def _update(self, args: NominatimArgs) -> None:
# pylint: disable=too-many-locals
from ..tools import replication
from ..indexer.indexer import Indexer
Expand Down Expand Up @@ -161,7 +162,7 @@ def _update(self, args: NominatimArgs) -> None:

if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
indexer.index_full(analyse=False)
await indexer.index_full(analyse=False)

with connect(dsn) as conn:
status.set_indexed(conn, True)
Expand All @@ -172,8 +173,7 @@ def _update(self, args: NominatimArgs) -> None:

if state is replication.UpdateState.NO_CHANGES and \
args.catch_up or update_interval > 40*60:
while indexer.has_pending():
indexer.index_full(analyse=False)
await indexer.index_full(analyse=False)

if LOG.isEnabledFor(logging.WARNING):
assert batchdate is not None
Expand All @@ -196,5 +196,5 @@ def run(self, args: NominatimArgs) -> int:
if args.check_for_updates:
return self._check_for_updates(args)

self._update(args)
asyncio.run(self._update(args))
return 0
Loading

0 comments on commit 0add25e

Please sign in to comment.