diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index eaab5ae017..724de3dec6 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -27,10 +27,10 @@ runs: run: | sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson if [ "$FLAVOUR" == "oldstuff" ]; then - pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 GeoAlchemy2==0.10.0 datrie asyncpg + pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 datrie asyncpg else sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml - pip3 install sqlalchemy GeoAlchemy2 psycopg + pip3 install sqlalchemy psycopg fi shell: bash env: diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index 61fc1bcee7..11442eed46 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -49,7 +49,6 @@ For running Nominatim: * [psutil](https://github.com/giampaolo/psutil) * [Jinja2](https://palletsprojects.com/p/jinja/) * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4+ with greenlet support) - * [GeoAlchemy2](https://geoalchemy-2.readthedocs.io/) (0.10+) * [asyncpg](https://magicstack.github.io/asyncpg) (0.8+) * [PyICU](https://pypi.org/project/PyICU/) * [PyYaml](https://pyyaml.org/) (5.1+) diff --git a/lib-sql/functions/ranking.sql b/lib-sql/functions/ranking.sql index af23335a86..0b18954ced 100644 --- a/lib-sql/functions/ranking.sql +++ b/lib-sql/functions/ranking.sql @@ -284,3 +284,26 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; + + +CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[], + term_vectors TEXT[], + weight_vectors FLOAT[], + def_weight FLOAT) + RETURNS FLOAT + AS $$ +DECLARE + pos INT := 1; + terms TEXT; +BEGIN + FOREACH terms IN ARRAY term_vectors + LOOP + IF search_vector @> terms::INTEGER[] THEN + RETURN weight_vectors[pos]; + END IF; + pos := pos + 1; + END LOOP; + RETURN def_weight; +END; +$$ +LANGUAGE plpgsql IMMUTABLE; diff --git a/nominatim/api/connection.py b/nominatim/api/connection.py index e157d06208..72cabf7814 100644 --- a/nominatim/api/connection.py +++ b/nominatim/api/connection.py @@ -10,11 +10,11 @@ from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set import sqlalchemy as sa -from geoalchemy2 import Geometry from sqlalchemy.ext.asyncio import AsyncConnection from nominatim.typing import SaFromClause from nominatim.db.sqlalchemy_schema import SearchTables +from nominatim.db.sqlalchemy_types import Geometry from nominatim.api.logging import log class SearchConnection: @@ -38,7 +38,7 @@ async def scalar(self, sql: sa.sql.base.Executable, ) -> Any: """ Execute a 'scalar()' query on the connection. """ - log().sql(self.connection, sql) + log().sql(self.connection, sql, params) return await self.connection.scalar(sql, params) @@ -47,7 +47,7 @@ async def execute(self, sql: 'sa.Executable', ) -> 'sa.Result[Any]': """ Execute a 'execute()' query on the connection. """ - log().sql(self.connection, sql) + log().sql(self.connection, sql, params) return await self.connection.execute(sql, params) @@ -112,4 +112,4 @@ async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]: return sa.Table(tablename, self.t.meta, sa.Column('place_id', sa.BigInteger), - sa.Column('centroid', Geometry(srid=4326, spatial_index=False))) + sa.Column('centroid', Geometry)) diff --git a/nominatim/api/core.py b/nominatim/api/core.py index a9fc124393..8d503fa5e8 100644 --- a/nominatim/api/core.py +++ b/nominatim/api/core.py @@ -66,7 +66,8 @@ async def setup_database(self) -> None: username=dsn.get('user'), password=dsn.get('password'), host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None, query=query) - engine = sa_asyncio.create_async_engine(dburl, future=True) + engine = sa_asyncio.create_async_engine(dburl, future=True, + echo=self.config.get_bool('DEBUG_SQL')) try: async with engine.begin() as conn: diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py index 6c8b1b3882..3785579073 100644 --- a/nominatim/api/logging.py +++ b/nominatim/api/logging.py @@ -7,11 +7,12 @@ """ Functions for specialised logging with HTML output. """ -from typing import Any, Iterator, Optional, List, Tuple, cast +from typing import Any, Iterator, Optional, List, Tuple, cast, Union, Mapping, Sequence from contextvars import ContextVar import datetime as dt import textwrap import io +import re import sqlalchemy as sa from sqlalchemy.ext.asyncio import AsyncConnection @@ -74,22 +75,39 @@ def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None: """ - def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None: + def sql(self, conn: AsyncConnection, statement: 'sa.Executable', + params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None: """ Print the SQL for the given statement. """ - def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> str: + def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable', + extra_params: Union[Mapping[str, Any], + Sequence[Mapping[str, Any]], None]) -> str: """ Return the comiled version of the statement. """ - try: - return str(cast('sa.ClauseElement', statement) - .compile(conn.sync_engine, compile_kwargs={"literal_binds": True})) - except sa.exc.CompileError: - pass - except NotImplementedError: - pass + compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine) - return str(cast('sa.ClauseElement', statement).compile(conn.sync_engine)) + params = dict(compiled.params) + if isinstance(extra_params, Mapping): + for k, v in extra_params.items(): + params[k] = str(v) + elif isinstance(extra_params, Sequence) and extra_params: + for k in extra_params[0]: + params[k] = f':{k}' + + sqlstr = str(compiled) + + if sa.__version__.startswith('1'): + try: + return sqlstr % tuple((repr(params.get(name, None)) + for name in compiled.positiontup)) # type: ignore + except TypeError: + return sqlstr + + # Fixes an odd issue with Python 3.7 where percentages are not + # quoted correctly. + sqlstr = re.sub(r'%(?!\()', '%%', sqlstr) + return sqlstr % params class HTMLLogger(BaseLogger): @@ -183,9 +201,10 @@ def format_osm(osm_object: Optional[Tuple[str, int]]) -> str: self._write(f'TOTAL: {total}

') - def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None: + def sql(self, conn: AsyncConnection, statement: 'sa.Executable', + params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None: self._timestamp() - sqlstr = self.format_sql(conn, statement) + sqlstr = self.format_sql(conn, statement, params) if CODE_HIGHLIGHT: sqlstr = highlight(sqlstr, PostgresLexer(), HtmlFormatter(nowrap=True, lineseparator='
')) @@ -276,8 +295,9 @@ def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None: self._write(f'TOTAL: {total}\n\n') - def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None: - sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement), width=78)) + def sql(self, conn: AsyncConnection, statement: 'sa.Executable', + params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None: + sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78)) self._write(f"| {sqlstr}\n\n") diff --git a/nominatim/api/results.py b/nominatim/api/results.py index c661b508bd..3416fc7a0d 100644 --- a/nominatim/api/results.py +++ b/nominatim/api/results.py @@ -263,7 +263,7 @@ def create_from_placex_row(row: Optional[SaRow], rank_search=row.rank_search, importance=row.importance, country_code=row.country_code, - centroid=Point.from_wkb(row.centroid.data), + centroid=Point.from_wkb(row.centroid), geometry=_filter_geometries(row)) @@ -288,7 +288,7 @@ def create_from_osmline_row(row: Optional[SaRow], address=row.address, postcode=row.postcode, country_code=row.country_code, - centroid=Point.from_wkb(row.centroid.data), + centroid=Point.from_wkb(row.centroid), geometry=_filter_geometries(row)) if hnr is None: @@ -321,7 +321,7 @@ def create_from_tiger_row(row: Optional[SaRow], category=('place', 'houses' if hnr is None else 'house'), postcode=row.postcode, country_code='us', - centroid=Point.from_wkb(row.centroid.data), + centroid=Point.from_wkb(row.centroid), geometry=_filter_geometries(row)) if hnr is None: @@ -350,7 +350,7 @@ def create_from_postcode_row(row: Optional[SaRow], rank_search=row.rank_search, rank_address=row.rank_address, country_code=row.country_code, - centroid=Point.from_wkb(row.centroid.data), + centroid=Point.from_wkb(row.centroid), geometry=_filter_geometries(row)) @@ -365,7 +365,7 @@ def create_from_country_row(row: Optional[SaRow], return class_type(source_table=SourceTable.COUNTRY, category=('place', 'country'), - centroid=Point.from_wkb(row.centroid.data), + centroid=Point.from_wkb(row.centroid), names=row.name, rank_address=4, rank_search=4, country_code=row.country_code) diff --git a/nominatim/api/reverse.py b/nominatim/api/reverse.py index 10c97cad22..00605d45b3 100644 --- a/nominatim/api/reverse.py +++ b/nominatim/api/reverse.py @@ -7,16 +7,16 @@ """ Implementation of reverse geocoding. """ -from typing import Optional, List, Callable, Type, Tuple +from typing import Optional, List, Callable, Type, Tuple, Dict, Any import sqlalchemy as sa -from geoalchemy2 import WKTElement -from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow +from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, SaBind from nominatim.api.connection import SearchConnection import nominatim.api.results as nres from nominatim.api.logging import log from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox +from nominatim.db.sqlalchemy_types import Geometry # In SQLAlchemy expression which compare with NULL need to be expressed with # the equal sign. @@ -24,20 +24,20 @@ RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]] -def _select_from_placex(t: SaFromClause, wkt: Optional[str] = None) -> SaSelect: +WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry) +MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') + +def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect: """ Create a select statement with the columns relevant for reverse results. """ - if wkt is None: + if not use_wkt: distance = t.c.distance centroid = t.c.centroid else: - distance = t.c.geometry.ST_Distance(wkt) - centroid = sa.case( - (t.c.geometry.ST_GeometryType().in_(('ST_LineString', - 'ST_MultiLineString')), - t.c.geometry.ST_ClosestPoint(wkt)), - else_=t.c.centroid).label('centroid') + distance = t.c.geometry.ST_Distance(WKT_PARAM) + centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)), + else_=t.c.centroid).label('centroid') return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, @@ -66,11 +66,11 @@ def _interpolated_position(table: SaFromClause) -> SaLabel: else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid') -def _locate_interpolation(table: SaFromClause, wkt: WKTElement) -> SaLabel: +def _locate_interpolation(table: SaFromClause) -> SaLabel: """ Given a position, locate the closest point on the line. """ - return sa.case((table.c.linegeo.ST_GeometryType() == 'ST_LineString', - sa.func.ST_LineLocatePoint(table.c.linegeo, wkt)), + return sa.case((table.c.linegeo.is_line_like(), + table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)), else_=0).label('position') @@ -79,9 +79,11 @@ def _is_address_point(table: SaFromClause) -> SaColumn: sa.or_(table.c.housenumber != None, table.c.name.has_key('housename'))) + def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]: return min(rows, key=lambda row: 1000 if row is None else row.distance) + class ReverseGeocoder: """ Class implementing the logic for looking up a place from a coordinate. @@ -91,6 +93,8 @@ def __init__(self, conn: SearchConnection, params: ReverseDetails) -> None: self.conn = conn self.params = params + self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank} + @property def max_rank(self) -> int: @@ -122,6 +126,7 @@ def has_feature_layers(self) -> bool: """ return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL) + def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect: if not self.has_geometries(): return sql @@ -129,16 +134,16 @@ def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect: out = [] if self.params.geometry_simplification > 0.0: - col = col.ST_SimplifyPreserveTopology(self.params.geometry_simplification) + col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification) if self.params.geometry_output & GeometryFormat.GEOJSON: - out.append(col.ST_AsGeoJSON().label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson')) if self.params.geometry_output & GeometryFormat.TEXT: - out.append(col.ST_AsText().label('geometry_text')) + out.append(sa.func.ST_AsText(col).label('geometry_text')) if self.params.geometry_output & GeometryFormat.KML: - out.append(col.ST_AsKML().label('geometry_kml')) + out.append(sa.func.ST_AsKML(col).label('geometry_kml')) if self.params.geometry_output & GeometryFormat.SVG: - out.append(col.ST_AsSVG().label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col).label('geometry_svg')) return sql.add_columns(*out) @@ -160,20 +165,18 @@ def _filter_by_layer(self, table: SaFromClause) -> SaColumn: return table.c.class_.in_(tuple(include)) - async def _find_closest_street_or_poi(self, wkt: WKTElement, - distance: float) -> Optional[SaRow]: + async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]: """ Look up the closest rank 26+ place in the database, which is closer than the given distance. """ t = self.conn.t.placex - sql = _select_from_placex(t, wkt)\ - .where(t.c.geometry.ST_DWithin(wkt, distance))\ + sql = _select_from_placex(t)\ + .where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\ .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ - .where(sa.or_(t.c.geometry.ST_GeometryType() - .not_in(('ST_Polygon', 'ST_MultiPolygon')), - t.c.centroid.ST_Distance(wkt) < distance))\ + .where(sa.or_(sa.not_(t.c.geometry.is_area()), + t.c.centroid.ST_Distance(WKT_PARAM) < distance))\ .order_by('distance')\ .limit(1) @@ -189,24 +192,25 @@ async def _find_closest_street_or_poi(self, wkt: WKTElement, if self.layer_enabled(DataLayer.POI) and self.max_rank == 30: restrict.append(sa.and_(t.c.rank_search == 30, t.c.class_.not_in(('place', 'building')), - t.c.geometry.ST_GeometryType() != 'ST_LineString')) + sa.not_(t.c.geometry.is_line_like()))) if self.has_feature_layers(): - restrict.append(sa.and_(t.c.rank_search.between(26, self.max_rank), + restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM), t.c.rank_address == 0, self._filter_by_layer(t))) if not restrict: return None - return (await self.conn.execute(sql.where(sa.or_(*restrict)))).one_or_none() + sql = sql.where(sa.or_(*restrict)) + + return (await self.conn.execute(sql, self.bind_params)).one_or_none() - async def _find_housenumber_for_street(self, parent_place_id: int, - wkt: WKTElement) -> Optional[SaRow]: + async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]: t = self.conn.t.placex - sql = _select_from_placex(t, wkt)\ - .where(t.c.geometry.ST_DWithin(wkt, 0.001))\ + sql = _select_from_placex(t)\ + .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\ .where(t.c.parent_place_id == parent_place_id)\ .where(_is_address_point(t))\ .where(t.c.indexed_status == 0)\ @@ -216,18 +220,17 @@ async def _find_housenumber_for_street(self, parent_place_id: int, sql = self._add_geometry_columns(sql, t.c.geometry) - return (await self.conn.execute(sql)).one_or_none() + return (await self.conn.execute(sql, self.bind_params)).one_or_none() async def _find_interpolation_for_street(self, parent_place_id: Optional[int], - wkt: WKTElement, distance: float) -> Optional[SaRow]: t = self.conn.t.osmline sql = sa.select(t, - t.c.linegeo.ST_Distance(wkt).label('distance'), - _locate_interpolation(t, wkt))\ - .where(t.c.linegeo.ST_DWithin(wkt, distance))\ + t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), + _locate_interpolation(t))\ + .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\ .where(t.c.startnumber != None)\ .order_by('distance')\ .limit(1) @@ -248,18 +251,18 @@ async def _find_interpolation_for_street(self, parent_place_id: Optional[int], sub = sql.subquery('geom') sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid) - return (await self.conn.execute(sql)).one_or_none() + return (await self.conn.execute(sql, self.bind_params)).one_or_none() async def _find_tiger_number_for_street(self, parent_place_id: int, - parent_type: str, parent_id: int, - wkt: WKTElement) -> Optional[SaRow]: + parent_type: str, + parent_id: int) -> Optional[SaRow]: t = self.conn.t.tiger inner = sa.select(t, - t.c.linegeo.ST_Distance(wkt).label('distance'), - _locate_interpolation(t, wkt))\ - .where(t.c.linegeo.ST_DWithin(wkt, 0.001))\ + t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), + _locate_interpolation(t))\ + .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\ .where(t.c.parent_place_id == parent_place_id)\ .order_by('distance')\ .limit(1)\ @@ -278,18 +281,17 @@ async def _find_tiger_number_for_street(self, parent_place_id: int, sub = sql.subquery('geom') sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid) - return (await self.conn.execute(sql)).one_or_none() + return (await self.conn.execute(sql, self.bind_params)).one_or_none() - async def lookup_street_poi(self, - wkt: WKTElement) -> Tuple[Optional[SaRow], RowFunc]: + async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]: """ Find a street or POI/address for the given WKT point. """ log().section('Reverse lookup on street/address level') distance = 0.006 parent_place_id = None - row = await self._find_closest_street_or_poi(wkt, distance) + row = await self._find_closest_street_or_poi(distance) row_func: RowFunc = nres.create_from_placex_row log().var_dump('Result (street/building)', row) @@ -302,7 +304,7 @@ async def lookup_street_poi(self, distance = 0.001 parent_place_id = row.place_id log().comment('Find housenumber for street') - addr_row = await self._find_housenumber_for_street(parent_place_id, wkt) + addr_row = await self._find_housenumber_for_street(parent_place_id) log().var_dump('Result (street housenumber)', addr_row) if addr_row is not None: @@ -313,8 +315,7 @@ async def lookup_street_poi(self, log().comment('Find TIGER housenumber for street') addr_row = await self._find_tiger_number_for_street(parent_place_id, row.osm_type, - row.osm_id, - wkt) + row.osm_id) log().var_dump('Result (street Tiger housenumber)', addr_row) if addr_row is not None: @@ -328,7 +329,7 @@ async def lookup_street_poi(self, if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS): log().comment('Find interpolation for street') addr_row = await self._find_interpolation_for_street(parent_place_id, - wkt, distance) + distance) log().var_dump('Result (street interpolation)', addr_row) if addr_row is not None: row = addr_row @@ -337,7 +338,7 @@ async def lookup_street_poi(self, return row, row_func - async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: + async def _lookup_area_address(self) -> Optional[SaRow]: """ Lookup large addressable areas for the given WKT point. """ log().comment('Reverse lookup by larger address area features') @@ -346,10 +347,10 @@ async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: # The inner SQL brings results in the right order, so that # later only a minimum of results needs to be checked with ST_Contains. inner = sa.select(t, sa.literal(0.0).label('distance'))\ - .where(t.c.rank_search.between(5, self.max_rank))\ + .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\ .where(t.c.rank_address.between(5, 25))\ - .where(t.c.geometry.ST_GeometryType().in_(('ST_Polygon', 'ST_MultiPolygon')))\ - .where(t.c.geometry.intersects(wkt))\ + .where(t.c.geometry.is_area())\ + .where(t.c.geometry.intersects(WKT_PARAM))\ .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ @@ -358,23 +359,23 @@ async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: .limit(50)\ .subquery('area') - sql = _select_from_placex(inner)\ - .where(inner.c.geometry.ST_Contains(wkt))\ + sql = _select_from_placex(inner, False)\ + .where(inner.c.geometry.ST_Contains(WKT_PARAM))\ .order_by(sa.desc(inner.c.rank_search))\ .limit(1) sql = self._add_geometry_columns(sql, inner.c.geometry) - address_row = (await self.conn.execute(sql)).one_or_none() + address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (area)', address_row) if address_row is not None and address_row.rank_search < self.max_rank: log().comment('Search for better matching place nodes inside the area') inner = sa.select(t, - t.c.geometry.ST_Distance(wkt).label('distance'))\ + t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ .where(t.c.osm_type == 'N')\ .where(t.c.rank_search > address_row.rank_search)\ - .where(t.c.rank_search <= self.max_rank)\ + .where(t.c.rank_search <= MAX_RANK_PARAM)\ .where(t.c.rank_address.between(5, 25))\ .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ @@ -382,13 +383,13 @@ async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: .where(t.c.type != 'postcode')\ .where(t.c.geometry .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) - .intersects(wkt))\ + .intersects(WKT_PARAM))\ .order_by(sa.desc(t.c.rank_search))\ .limit(50)\ .subquery('places') touter = self.conn.t.placex.alias('outer') - sql = _select_from_placex(inner)\ + sql = _select_from_placex(inner, False)\ .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\ .where(touter.c.place_id == address_row.place_id)\ .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\ @@ -397,7 +398,7 @@ async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: sql = self._add_geometry_columns(sql, inner.c.geometry) - place_address_row = (await self.conn.execute(sql)).one_or_none() + place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (place node)', place_address_row) if place_address_row is not None: @@ -406,65 +407,64 @@ async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: return address_row - async def _lookup_area_others(self, wkt: WKTElement) -> Optional[SaRow]: + async def _lookup_area_others(self) -> Optional[SaRow]: t = self.conn.t.placex - inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\ + inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ .where(t.c.rank_address == 0)\ - .where(t.c.rank_search.between(5, self.max_rank))\ + .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\ .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ .where(self._filter_by_layer(t))\ .where(t.c.geometry .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) - .intersects(wkt))\ + .intersects(WKT_PARAM))\ .order_by(sa.desc(t.c.rank_search))\ .limit(50)\ .subquery() - sql = _select_from_placex(inner)\ - .where(sa.or_(inner.c.geometry.ST_GeometryType() - .not_in(('ST_Polygon', 'ST_MultiPolygon')), - inner.c.geometry.ST_Contains(wkt)))\ + sql = _select_from_placex(inner, False)\ + .where(sa.or_(sa.not_(inner.c.geometry.is_area()), + inner.c.geometry.ST_Contains(WKT_PARAM)))\ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .limit(1) sql = self._add_geometry_columns(sql, inner.c.geometry) - row = (await self.conn.execute(sql)).one_or_none() + row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (non-address feature)', row) return row - async def lookup_area(self, wkt: WKTElement) -> Optional[SaRow]: - """ Lookup large areas for the given WKT point. + async def lookup_area(self) -> Optional[SaRow]: + """ Lookup large areas for the current search. """ log().section('Reverse lookup by larger area features') if self.layer_enabled(DataLayer.ADDRESS): - address_row = await self._lookup_area_address(wkt) + address_row = await self._lookup_area_address() else: address_row = None if self.has_feature_layers(): - other_row = await self._lookup_area_others(wkt) + other_row = await self._lookup_area_others() else: other_row = None return _get_closest(address_row, other_row) - async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]: - """ Lookup the country for the given WKT point. + async def lookup_country(self) -> Optional[SaRow]: + """ Lookup the country for the current search. """ log().section('Reverse lookup by country code') t = self.conn.t.country_grid sql = sa.select(t.c.country_code).distinct()\ - .where(t.c.geometry.ST_Contains(wkt)) + .where(t.c.geometry.ST_Contains(WKT_PARAM)) - ccodes = tuple((r[0] for r in await self.conn.execute(sql))) + ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params))) log().var_dump('Country codes', ccodes) if not ccodes: @@ -475,10 +475,10 @@ async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]: log().comment('Search for place nodes in country') inner = sa.select(t, - t.c.geometry.ST_Distance(wkt).label('distance'))\ + t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ .where(t.c.osm_type == 'N')\ .where(t.c.rank_search > 4)\ - .where(t.c.rank_search <= self.max_rank)\ + .where(t.c.rank_search <= MAX_RANK_PARAM)\ .where(t.c.rank_address.between(5, 25))\ .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ @@ -487,26 +487,26 @@ async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]: .where(t.c.country_code.in_(ccodes))\ .where(t.c.geometry .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) - .intersects(wkt))\ + .intersects(WKT_PARAM))\ .order_by(sa.desc(t.c.rank_search))\ .limit(50)\ .subquery() - sql = _select_from_placex(inner)\ + sql = _select_from_placex(inner, False)\ .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .limit(1) sql = self._add_geometry_columns(sql, inner.c.geometry) - address_row = (await self.conn.execute(sql)).one_or_none() + address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (addressable place node)', address_row) else: address_row = None if address_row is None: # Still nothing, then return a country with the appropriate country code. - sql = _select_from_placex(t, wkt)\ + sql = _select_from_placex(t)\ .where(t.c.country_code.in_(ccodes))\ .where(t.c.rank_address == 4)\ .where(t.c.rank_search == 4)\ @@ -516,7 +516,7 @@ async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]: sql = self._add_geometry_columns(sql, t.c.geometry) - address_row = (await self.conn.execute(sql)).one_or_none() + address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() return address_row @@ -528,26 +528,26 @@ async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]: log().function('reverse_lookup', coord=coord, params=self.params) - wkt = WKTElement(f'POINT({coord[0]} {coord[1]})', srid=4326) + self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})' row: Optional[SaRow] = None row_func: RowFunc = nres.create_from_placex_row if self.max_rank >= 26: - row, tmp_row_func = await self.lookup_street_poi(wkt) + row, tmp_row_func = await self.lookup_street_poi() if row is not None: row_func = tmp_row_func if row is None and self.max_rank > 4: - row = await self.lookup_area(wkt) + row = await self.lookup_area() if row is None and self.layer_enabled(DataLayer.ADDRESS): - row = await self.lookup_country(wkt) + row = await self.lookup_country() result = row_func(row, nres.ReverseResult) if result is not None: assert row is not None result.distance = row.distance if hasattr(row, 'bbox'): - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) await nres.add_result_details(self.conn, [result], self.params) return result diff --git a/nominatim/api/search/db_search_fields.py b/nominatim/api/search/db_search_fields.py index 325e08df35..13f1c56eb0 100644 --- a/nominatim/api/search/db_search_fields.py +++ b/nominatim/api/search/db_search_fields.py @@ -129,10 +129,11 @@ def sql_penalty(self, table: SaFromClause) -> SaColumn: """ assert self.rankings - col = table.c[self.column] - - return sa.case(*((col.contains(r.tokens),r.penalty) for r in self.rankings), - else_=self.default) + return sa.func.weigh_search(table.c[self.column], + [f"{{{','.join((str(s) for s in r.tokens))}}}" + for r in self.rankings], + [r.penalty for r in self.rankings], + self.default) @dataclasses.dataclass diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 76ff368f85..cea19c8528 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -7,22 +7,48 @@ """ Implementation of the acutal database accesses for forward search. """ -from typing import List, Tuple, AsyncIterator +from typing import List, Tuple, AsyncIterator, Dict, Any import abc import sqlalchemy as sa from sqlalchemy.dialects.postgresql import ARRAY, array_agg from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \ - SaExpression, SaSelect, SaRow + SaExpression, SaSelect, SaRow, SaBind from nominatim.api.connection import SearchConnection from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox import nominatim.api.results as nres from nominatim.api.search.db_search_fields import SearchData, WeightedCategories +from nominatim.db.sqlalchemy_types import Geometry #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements +def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]: + """ Create a dictionary from search parameters that can be used + as bind parameter for SQL execute. + """ + return {'limit': details.max_results, + 'min_rank': details.min_rank, + 'max_rank': details.max_rank, + 'viewbox': details.viewbox, + 'viewbox2': details.viewbox_x2, + 'near': details.near, + 'near_radius': details.near_radius, + 'excluded': details.excluded, + 'countries': details.countries} + + +LIMIT_PARAM: SaBind = sa.bindparam('limit') +MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank') +MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') +VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry) +VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry) +NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry) +NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius') +EXCLUDED_PARAM: SaBind = sa.bindparam('excluded') +COUNTRIES_PARAM: SaBind = sa.bindparam('countries') + def _select_placex(t: SaFromClause) -> SaSelect: return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, @@ -41,16 +67,16 @@ def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) out = [] if details.geometry_simplification > 0.0: - col = col.ST_SimplifyPreserveTopology(details.geometry_simplification) + col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification) if details.geometry_output & GeometryFormat.GEOJSON: - out.append(col.ST_AsGeoJSON().label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson')) if details.geometry_output & GeometryFormat.TEXT: - out.append(col.ST_AsText().label('geometry_text')) + out.append(sa.func.ST_AsText(col).label('geometry_text')) if details.geometry_output & GeometryFormat.KML: - out.append(col.ST_AsKML().label('geometry_kml')) + out.append(sa.func.ST_AsKML(col).label('geometry_kml')) if details.geometry_output & GeometryFormat.SVG: - out.append(col.ST_AsSVG().label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col).label('geometry_svg')) return sql.add_columns(*out) @@ -70,7 +96,7 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, for n in numerals))) if details.excluded: - sql = sql.where(table.c.place_id.not_in(details.excluded)) + sql = sql.where(table.c.place_id.not_in(EXCLUDED_PARAM)) return sql.scalar_subquery() @@ -129,7 +155,7 @@ async def _get_placex_housenumbers(conn: SearchConnection, for row in await conn.execute(sql): result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) yield result @@ -259,28 +285,25 @@ async def lookup_category(self, results: nres.SearchResults, sql = sql.join(table, t.c.place_id == table.c.place_id)\ .join(tgeom, sa.case((sa.and_(tgeom.c.rank_address < 9, - tgeom.c.geometry.ST_GeometryType().in_( - ('ST_Polygon', 'ST_MultiPolygon'))), + tgeom.c.geometry.is_area()), tgeom.c.geometry.ST_Contains(table.c.centroid)), else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\ .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) + sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM)) if details.countries: - sql = sql.where(t.c.country_code.in_(details.countries)) - if details.min_rank > 0: - sql = sql.where(t.c.rank_address >= details.min_rank) - if details.max_rank < 30: - sql = sql.where(t.c.rank_address <= details.max_rank) + sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM)) if details.layers is not None: sql = sql.where(_filter_by_layer(t, details.layers)) - for row in await conn.execute(sql.limit(details.max_results)): + sql = sql.limit(LIMIT_PARAM) + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + penalty - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) @@ -298,6 +321,7 @@ async def lookup(self, conn: SearchConnection, details: SearchDetails) -> nres.SearchResults: """ Find results for the search in the database. """ + bind_params = _details_to_bind_params(details) t = conn.t.placex rows: List[SaRow] = [] @@ -306,15 +330,14 @@ async def lookup(self, conn: SearchConnection, # simply search in placex table sql = _select_placex(t) \ .where(t.c.linked_place_id == None) \ - .where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) \ - .order_by(t.c.centroid.ST_Distance(details.near.sql_value())) + .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \ + .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) classtype = self.categories.values if len(classtype) == 1: @@ -324,7 +347,8 @@ async def lookup(self, conn: SearchConnection, sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ) for cls, typ in classtype))) - rows.extend(await conn.execute(sql.limit(details.max_results))) + sql = sql.limit(LIMIT_PARAM) + rows.extend(await conn.execute(sql, bind_params)) else: # use the class type tables for category in self.categories.values: @@ -336,24 +360,25 @@ async def lookup(self, conn: SearchConnection, .where(t.c.type == category[1]) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(table.c.centroid.intersects(details.viewbox.sql_value())) + sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM)) - if details.near: - sql = sql.order_by(table.c.centroid.ST_Distance(details.near.sql_value()))\ - .where(table.c.centroid.ST_DWithin(details.near.sql_value(), - details.near_radius or 0.5)) + if details.near and details.near_radius is not None: + sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\ + .where(table.c.centroid.ST_DWithin(NEAR_PARAM, + NEAR_RADIUS_PARAM)) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) - rows.extend(await conn.execute(sql.limit(details.max_results))) + sql = sql.limit(LIMIT_PARAM) + rows.extend(await conn.execute(sql, bind_params)) results = nres.SearchResults() for row in rows: result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type)) - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) return results @@ -380,17 +405,16 @@ async def lookup(self, conn: SearchConnection, sql = _add_geometry_columns(sql, t.c.geometry, details) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM)) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) if details.near is not None and details.near_radius is not None: - sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) + sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) results = nres.SearchResults() - for row in await conn.execute(sql): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) @@ -419,10 +443,9 @@ async def lookup_in_country_table(self, conn: SearchConnection, .group_by(tgrid.c.country_code) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(tgrid.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM)) if details.near is not None and details.near_radius is not None: - sql = sql.where(tgrid.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) + sql = sql.where(tgrid.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) sub = sql.subquery('grid') @@ -435,7 +458,7 @@ async def lookup_in_country_table(self, conn: SearchConnection, .join(sub, t.c.country_code == sub.c.country_code) results = nres.SearchResults() - for row in await conn.execute(sql): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_country_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) @@ -474,23 +497,22 @@ async def lookup(self, conn: SearchConnection, if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) else: - penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0), - (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0), + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), else_=2.0) if details.near is not None: if details.near_radius is not None: - sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) - sql = sql.order_by(t.c.geometry.ST_Distance(details.near.sql_value())) + sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) + sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM)) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM)) if self.lookups: assert len(self.lookups) == 1 @@ -509,10 +531,10 @@ async def lookup(self, conn: SearchConnection, sql = sql.add_columns(penalty.label('accuracy')) - sql = sql.order_by('accuracy') + sql = sql.order_by('accuracy').limit(LIMIT_PARAM) results = nres.SearchResults() - for row in await conn.execute(sql.limit(details.max_results)): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_postcode_row(row, nres.SearchResult) assert result result.accuracy = row.accuracy @@ -542,7 +564,6 @@ async def lookup(self, conn: SearchConnection, """ t = conn.t.placex.alias('p') tsearch = conn.t.search_name.alias('s') - limit = details.max_results sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, @@ -587,17 +608,16 @@ async def lookup(self, conn: SearchConnection, if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(tsearch.c.centroid.intersects(details.viewbox.sql_value())) + sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) else: - penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0), - (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0), + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), else_=2.0) if details.near is not None: if details.near_radius is not None: - sql = sql.where(tsearch.c.centroid.ST_DWithin(details.near.sql_value(), - details.near_radius)) - sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(details.near.sql_value()) + sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) + sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM) .label('importance')) sql = sql.order_by(sa.desc(sa.text('importance'))) else: @@ -613,7 +633,7 @@ async def lookup(self, conn: SearchConnection, hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M" sql = sql.where(tsearch.c.address_rank.between(16, 30))\ .where(sa.or_(tsearch.c.address_rank < 30, - t.c.housenumber.regexp_match(hnr_regexp, flags='i'))) + t.c.housenumber.op('~*')(hnr_regexp))) # Cross check for housenumbers, need to do that on a rather large # set. Worst case there are 40.000 main streets in OSM. @@ -624,12 +644,12 @@ async def lookup(self, conn: SearchConnection, pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call] place_sql = sa.select(pid_list)\ .where(thnr.c.parent_place_id == inner.c.place_id)\ - .where(thnr.c.housenumber.regexp_match(hnr_regexp, flags='i'))\ + .where(thnr.c.housenumber.op('~*')(hnr_regexp))\ .where(thnr.c.linked_place_id == None)\ .where(thnr.c.indexed_status == 0) if details.excluded: - place_sql = place_sql.where(thnr.c.place_id.not_in(details.excluded)) + place_sql = place_sql.where(thnr.c.place_id.not_in(EXCLUDED_PARAM)) if self.qualifiers: place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr)) @@ -665,22 +685,23 @@ async def lookup(self, conn: SearchConnection, if self.qualifiers: sql = sql.where(self.qualifiers.sql_restrict(t)) if details.excluded: - sql = sql.where(tsearch.c.place_id.not_in(details.excluded)) + sql = sql.where(tsearch.c.place_id.not_in(EXCLUDED_PARAM)) if details.min_rank > 0: - sql = sql.where(sa.or_(tsearch.c.address_rank >= details.min_rank, - tsearch.c.search_rank >= details.min_rank)) + sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM, + tsearch.c.search_rank >= MIN_RANK_PARAM)) if details.max_rank < 30: - sql = sql.where(sa.or_(tsearch.c.address_rank <= details.max_rank, - tsearch.c.search_rank <= details.max_rank)) + sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM, + tsearch.c.search_rank <= MAX_RANK_PARAM)) if details.layers is not None: sql = sql.where(_filter_by_layer(t, details.layers)) + sql = sql.limit(LIMIT_PARAM) results = nres.SearchResults() - for row in await conn.execute(sql.limit(limit)): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) result.accuracy = row.accuracy if not details.excluded or not result.place_id in details.excluded: results.append(result) diff --git a/nominatim/api/types.py b/nominatim/api/types.py index 87568a09ac..43e83c1f68 100644 --- a/nominatim/api/types.py +++ b/nominatim/api/types.py @@ -14,9 +14,7 @@ import enum import math from struct import unpack - -from geoalchemy2 import WKTElement -import geoalchemy2.functions +from binascii import unhexlify from nominatim.errors import UsageError @@ -73,11 +71,13 @@ def to_geojson(self) -> str: @staticmethod - def from_wkb(wkb: bytes) -> 'Point': + def from_wkb(wkb: Union[str, bytes]) -> 'Point': """ Create a point from EWKB as returned from the database. """ + if isinstance(wkb, str): + wkb = unhexlify(wkb) if len(wkb) != 25: - raise ValueError("Point wkb has unexpected length") + raise ValueError(f"Point wkb has unexpected length {len(wkb)}") if wkb[0] == 0: gtype, srid, x, y = unpack('>iidd', wkb[1:]) elif wkb[0] == 1: @@ -122,10 +122,10 @@ def from_param(inp: Any) -> 'Point': return Point(x, y) - def sql_value(self) -> WKTElement: - """ Create an SQL expression for the point. + def to_wkt(self) -> str: + """ Return the WKT representation of the point. """ - return WKTElement(f'POINT({self.x} {self.y})', srid=4326) + return f'POINT({self.x} {self.y})' @@ -179,12 +179,6 @@ def area(self) -> float: return (self.coords[2] - self.coords[0]) * (self.coords[3] - self.coords[1]) - def sql_value(self) -> Any: - """ Create an SQL expression for the box. - """ - return geoalchemy2.functions.ST_MakeEnvelope(*self.coords, 4326) - - def contains(self, pt: Point) -> bool: """ Check if the point is inside or on the boundary of the box. """ @@ -192,14 +186,25 @@ def contains(self, pt: Point) -> bool: and self.coords[2] >= pt[0] and self.coords[3] >= pt[1] + def to_wkt(self) -> str: + """ Return the WKT representation of the Bbox. This + is a simple polygon with four points. + """ + return 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'\ + .format(*self.coords) # pylint: disable=consider-using-f-string + + @staticmethod - def from_wkb(wkb: Optional[bytes]) -> 'Optional[Bbox]': + def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]': """ Create a Bbox from a bounding box polygon as returned by the database. Return s None if the input value is None. """ if wkb is None: return None + if isinstance(wkb, str): + wkb = unhexlify(wkb) + if len(wkb) != 97: raise ValueError("WKB must be a bounding box polygon") if wkb.startswith(WKB_BBOX_HEADER_LE): @@ -439,6 +444,7 @@ class SearchDetails(LookupDetails): """ Restrict search to places with one of the given class/type categories. An empty list (the default) will disable this filter. """ + viewbox_x2: Optional[Bbox] = None def __post_init__(self) -> None: if self.viewbox is not None: diff --git a/nominatim/db/sqlalchemy_schema.py b/nominatim/db/sqlalchemy_schema.py index 550f1f12af..7af3d44cd6 100644 --- a/nominatim/db/sqlalchemy_schema.py +++ b/nominatim/db/sqlalchemy_schema.py @@ -10,10 +10,11 @@ from typing import Any import sqlalchemy as sa -from geoalchemy2 import Geometry from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB from sqlalchemy.dialects.sqlite import JSON as sqlite_json +from nominatim.db.sqlalchemy_types import Geometry + class PostgresTypes: """ Type definitions for complex types as used in Postgres variants. """ @@ -72,12 +73,12 @@ def __init__(self, meta: sa.MetaData, engine_name: str) -> None: sa.Column('name', self.types.Composite), sa.Column('address', self.types.Composite), sa.Column('extratags', self.types.Composite), - sa.Column('geometry', Geometry(srid=4326), nullable=False), + sa.Column('geometry', Geometry, nullable=False), sa.Column('wikipedia', sa.Text), sa.Column('country_code', sa.String(2)), sa.Column('housenumber', sa.Text), sa.Column('postcode', sa.Text), - sa.Column('centroid', Geometry(srid=4326, spatial_index=False))) + sa.Column('centroid', Geometry)) self.addressline = sa.Table('place_addressline', meta, sa.Column('place_id', sa.BigInteger, index=True), @@ -96,7 +97,7 @@ def __init__(self, meta: sa.MetaData, engine_name: str) -> None: sa.Column('indexed_date', sa.DateTime), sa.Column('country_code', sa.String(2)), sa.Column('postcode', sa.Text, index=True), - sa.Column('geometry', Geometry(srid=4326))) + sa.Column('geometry', Geometry)) self.osmline = sa.Table('location_property_osmline', meta, sa.Column('place_id', sa.BigInteger, nullable=False, unique=True), @@ -108,7 +109,7 @@ def __init__(self, meta: sa.MetaData, engine_name: str) -> None: sa.Column('step', sa.SmallInteger), sa.Column('partition', sa.SmallInteger), sa.Column('indexed_status', sa.SmallInteger), - sa.Column('linegeo', Geometry(srid=4326)), + sa.Column('linegeo', Geometry), sa.Column('address', self.types.Composite), sa.Column('postcode', sa.Text), sa.Column('country_code', sa.String(2))) @@ -123,7 +124,7 @@ def __init__(self, meta: sa.MetaData, engine_name: str) -> None: self.country_grid = sa.Table('country_osm_grid', meta, sa.Column('country_code', sa.String(2)), sa.Column('area', sa.Float), - sa.Column('geometry', Geometry(srid=4326))) + sa.Column('geometry', Geometry)) # The following tables are not necessarily present. self.search_name = sa.Table('search_name', meta, @@ -134,7 +135,7 @@ def __init__(self, meta: sa.MetaData, engine_name: str) -> None: sa.Column('name_vector', self.types.IntArray, index=True), sa.Column('nameaddress_vector', self.types.IntArray, index=True), sa.Column('country_code', sa.String(2)), - sa.Column('centroid', Geometry(srid=4326))) + sa.Column('centroid', Geometry)) self.tiger = sa.Table('location_property_tiger', meta, sa.Column('place_id', sa.BigInteger), @@ -143,5 +144,5 @@ def __init__(self, meta: sa.MetaData, engine_name: str) -> None: sa.Column('endnumber', sa.Integer), sa.Column('step', sa.SmallInteger), sa.Column('partition', sa.SmallInteger), - sa.Column('linegeo', Geometry(srid=4326, spatial_index=False)), + sa.Column('linegeo', Geometry), sa.Column('postcode', sa.Text)) diff --git a/nominatim/db/sqlalchemy_types.py b/nominatim/db/sqlalchemy_types.py new file mode 100644 index 0000000000..ed4aef1f15 --- /dev/null +++ b/nominatim/db/sqlalchemy_types.py @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Custom types for SQLAlchemy. +""" +from typing import Callable, Any, cast +import sys + +import sqlalchemy as sa +from sqlalchemy import types + +from nominatim.typing import SaColumn, SaBind + +#pylint: disable=all + +class Geometry(types.UserDefinedType): # type: ignore[type-arg] + """ Simplified type decorator for PostGIS geometry. This type + only supports geometries in 4326 projection. + """ + cache_ok = True + + def __init__(self, subtype: str = 'Geometry'): + self.subtype = subtype + + + def get_col_spec(self) -> str: + return f'GEOMETRY({self.subtype}, 4326)' + + + def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]: + def process(value: Any) -> str: + if isinstance(value, str): + return 'SRID=4326;' + value + + return 'SRID=4326;' + cast(str, value.to_wkt()) + return process + + + def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]: + def process(value: Any) -> str: + assert isinstance(value, str) + return value + return process + + + def bind_expression(self, bindvalue: SaBind) -> SaColumn: + return sa.func.ST_GeomFromText(bindvalue, type_=self) + + + class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg] + + def intersects(self, other: SaColumn) -> 'sa.Operators': + return self.op('&&')(other) + + def is_line_like(self) -> SaColumn: + return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString', + 'ST_MultiLineString')) + + def is_area(self) -> SaColumn: + return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon', + 'ST_MultiPolygon')) + + + def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn: + return sa.func.ST_DWithin(self, other, distance, type_=sa.Float) + + + def ST_Distance(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Distance(self, other, type_=sa.Float) + + + def ST_Contains(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Contains(self, other, type_=sa.Float) + + + def ST_ClosestPoint(self, other: SaColumn) -> SaColumn: + return sa.func.ST_ClosestPoint(self, other, type_=Geometry) + + + def ST_Buffer(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Buffer(self, other, type_=Geometry) + + + def ST_Expand(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Expand(self, other, type_=Geometry) + + + def ST_Collect(self) -> SaColumn: + return sa.func.ST_Collect(self, type_=Geometry) + + + def ST_Centroid(self) -> SaColumn: + return sa.func.ST_Centroid(self, type_=Geometry) + + + def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn: + return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry) + + + def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn: + return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float) diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 0c88493bfb..ffeb4958f4 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -46,7 +46,6 @@ def migrate(config: Configuration, paths: Any) -> int: db_version = _guess_version(conn) - has_run_migration = False for version, func in _MIGRATION_FUNCTIONS: if db_version < version or \ (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)): @@ -55,13 +54,11 @@ def migrate(config: Configuration, paths: Any) -> int: kwargs = dict(conn=conn, config=config, paths=paths) func(**kwargs) conn.commit() - has_run_migration = True - if has_run_migration: - LOG.warning('Updating SQL functions.') - refresh.create_functions(conn, config) - tokenizer = tokenizer_factory.get_tokenizer_for_db(config) - tokenizer.update_sql_functions(config) + LOG.warning('Updating SQL functions.') + refresh.create_functions(conn, config) + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + tokenizer.update_sql_functions(config) properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION)) diff --git a/nominatim/typing.py b/nominatim/typing.py index d988fe04a3..ebb5e1e9d5 100644 --- a/nominatim/typing.py +++ b/nominatim/typing.py @@ -70,3 +70,4 @@ def __getitem__(self, x: Union[int, str]) -> Any: ... SaLabel: TypeAlias = 'sa.Label[Any]' SaFromClause: TypeAlias = 'sa.FromClause' SaSelectable: TypeAlias = 'sa.Selectable' +SaBind: TypeAlias = 'sa.BindParameter[Any]' diff --git a/nominatim/version.py b/nominatim/version.py index 346af5eb65..beec32a53c 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -34,7 +34,7 @@ def __str__(self) -> str: return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}" -NOMINATIM_VERSION = NominatimVersion(4, 2, 99, 1) +NOMINATIM_VERSION = NominatimVersion(4, 2, 99, 2) POSTGRESQL_REQUIRED_VERSION = (9, 6) POSTGIS_REQUIRED_VERSION = (2, 2) diff --git a/settings/env.defaults b/settings/env.defaults index 84cd24f16c..9c2f7cac18 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -222,3 +222,7 @@ NOMINATIM_LOG_DB=no # Enable logging of requests into a file. # To enable logging set this setting to the file to log to. NOMINATIM_LOG_FILE= + +# Echo raw SQL from SQLAlchemy statements. +# Works only in command line/library use. +NOMINATIM_DEBUG_SQL=no diff --git a/test/python/api/conftest.py b/test/python/api/conftest.py index cfe14e1edd..1b5b88ed5f 100644 --- a/test/python/api/conftest.py +++ b/test/python/api/conftest.py @@ -66,11 +66,11 @@ def add_placex(self, **kw): 'rank_search': kw.get('rank_search', 30), 'rank_address': kw.get('rank_address', 30), 'importance': kw.get('importance'), - 'centroid': 'SRID=4326;POINT(%f %f)' % centroid, + 'centroid': 'POINT(%f %f)' % centroid, 'indexed_status': kw.get('indexed_status', 0), 'indexed_date': kw.get('indexed_date', dt.datetime(2022, 12, 7, 14, 14, 46, 0)), - 'geometry': 'SRID=4326;' + geometry}) + 'geometry': geometry}) def add_address_placex(self, object_id, **kw): @@ -97,7 +97,7 @@ def add_osmline(self, **kw): 'address': kw.get('address'), 'postcode': kw.get('postcode'), 'country_code': kw.get('country_code'), - 'linegeo': 'SRID=4326;' + kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')}) + 'linegeo': kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')}) def add_tiger(self, **kw): @@ -108,7 +108,7 @@ def add_tiger(self, **kw): 'endnumber': kw.get('endnumber', 6), 'step': kw.get('step', 2), 'postcode': kw.get('postcode'), - 'linegeo': 'SRID=4326;' + kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')}) + 'linegeo': kw.get('geometry', 'LINESTRING(1.1 -0.2, 1.09 -0.22)')}) def add_postcode(self, **kw): @@ -121,14 +121,14 @@ def add_postcode(self, **kw): 'rank_address': kw.get('rank_address', 22), 'indexed_date': kw.get('indexed_date', dt.datetime(2022, 12, 7, 14, 14, 46, 0)), - 'geometry': 'SRID=4326;' + kw.get('geometry', 'POINT(23 34)')}) + 'geometry': kw.get('geometry', 'POINT(23 34)')}) def add_country(self, country_code, geometry): self.add_data('country_grid', {'country_code': country_code, 'area': 0.1, - 'geometry': 'SRID=4326;' + geometry}) + 'geometry': geometry}) def add_country_name(self, country_code, names, partition=0): @@ -148,7 +148,7 @@ def add_search_name(self, place_id, **kw): 'name_vector': kw.get('names', []), 'nameaddress_vector': kw.get('address', []), 'country_code': kw.get('country_code', 'xx'), - 'centroid': 'SRID=4326;POINT(%f %f)' % centroid}) + 'centroid': 'POINT(%f %f)' % centroid}) def add_class_type_table(self, cls, typ): diff --git a/test/python/api/test_results.py b/test/python/api/test_results.py index 97d95ac0a6..232740b417 100644 --- a/test/python/api/test_results.py +++ b/test/python/api/test_results.py @@ -8,6 +8,7 @@ Tests for result datatype helper functions. """ import struct +from binascii import hexlify import pytest import pytest_asyncio @@ -17,10 +18,8 @@ from nominatim.api import SourceTable, DetailedResult, Point import nominatim.api.results as nresults -class FakeCentroid: - def __init__(self, x, y): - self.data = struct.pack("=biidd", 1, 0x20000001, 4326, - x, y) +def mkpoint(x, y): + return hexlify(struct.pack("=biidd", 1, 0x20000001, 4326, x, y)).decode('utf-8') class FakeRow: def __init__(self, **kwargs): @@ -60,7 +59,7 @@ def test_create_row_none(func): def test_create_row_with_housenumber(func): row = FakeRow(place_id=2345, osm_type='W', osm_id=111, housenumber=4, address=None, postcode='99900', country_code='xd', - centroid=FakeCentroid(0, 0)) + centroid=mkpoint(0, 0)) res = func(row, DetailedResult) @@ -75,7 +74,7 @@ def test_create_row_without_housenumber(func): row = FakeRow(place_id=2345, osm_type='W', osm_id=111, startnumber=1, endnumber=11, step=2, address=None, postcode='99900', country_code='xd', - centroid=FakeCentroid(0, 0)) + centroid=mkpoint(0, 0)) res = func(row, DetailedResult) diff --git a/test/python/tools/test_migration.py b/test/python/tools/test_migration.py index d102b97da9..88c7a4dd78 100644 --- a/test/python/tools/test_migration.py +++ b/test/python/tools/test_migration.py @@ -71,20 +71,6 @@ def test_already_at_version(def_config, property_table): assert migration.migrate(def_config, {}) == 0 -def test_no_migrations_necessary(def_config, temp_db_cursor, property_table, - monkeypatch): - oldversion = [x for x in nominatim.version.NOMINATIM_VERSION] - oldversion[0] -= 1 - property_table.set('database_version', - '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(oldversion)) - - oldversion[0] = 0 - monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS', - [(tuple(oldversion), lambda **attr: True)]) - - assert migration.migrate(def_config, {}) == 0 - - def test_run_single_migration(def_config, temp_db_cursor, property_table, monkeypatch, postprocess_mock): oldversion = [x for x in nominatim.version.NOMINATIM_VERSION] diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index ef77657983..e52bdee764 100755 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: # Some of the Python packages that come with Ubuntu 20.04 are too old, so # install the latest version from pip: - pip3 install --user sqlalchemy GeoAlchemy2 asyncpg + pip3 install --user sqlalchemy asyncpg # diff --git a/vagrant/Install-on-Ubuntu-22.sh b/vagrant/Install-on-Ubuntu-22.sh index c44cf87d51..fdb38203cc 100755 --- a/vagrant/Install-on-Ubuntu-22.sh +++ b/vagrant/Install-on-Ubuntu-22.sh @@ -29,7 +29,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: php-cli php-pgsql php-intl libicu-dev python3-dotenv \ python3-psycopg2 python3-psutil python3-jinja2 \ python3-icu python3-datrie python3-sqlalchemy \ - python3-geoalchemy2 python3-asyncpg git + python3-asyncpg git # # System Configuration