Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC for hyperscan usage in UrlDispatcher #9907

Draft
wants to merge 39 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
2e7c5a7
hyperscan
asvetlov Nov 15, 2024
1a5a10d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 15, 2024
def5f07
Fix mypy
asvetlov Nov 15, 2024
bea95da
Reforman
asvetlov Nov 15, 2024
bd1f61f
Update setup.cfg
asvetlov Nov 15, 2024
934f755
step
asvetlov Nov 16, 2024
2940323
step
asvetlov Nov 16, 2024
b41363b
merge
asvetlov Nov 16, 2024
87056a2
fix
asvetlov Nov 16, 2024
ac2f8e3
fix
asvetlov Nov 16, 2024
91e890a
Drop debug info
asvetlov Nov 16, 2024
e0eb966
Tune deps
asvetlov Nov 16, 2024
d6c019b
fix
asvetlov Nov 16, 2024
5db0bfd
fix
asvetlov Nov 16, 2024
7ce5180
fix
asvetlov Nov 16, 2024
d8c5da1
fix
asvetlov Nov 16, 2024
d27eea9
tune
asvetlov Nov 16, 2024
03719e9
tune
asvetlov Nov 16, 2024
45f947f
Merge branch 'master' into hyperscan
asvetlov Nov 16, 2024
89bac7a
inline
asvetlov Nov 16, 2024
505e7f4
Merge branch 'master' into hyperscan
asvetlov Nov 16, 2024
4a29903
Use dict lookup for plain resources
asvetlov Nov 16, 2024
ed931e2
Merge branch 'master' into hyperscan
asvetlov Nov 16, 2024
51c26c0
Add separate dict for prefix resources
asvetlov Nov 16, 2024
e6e9e48
Tune
asvetlov Nov 16, 2024
f452675
Refactor prefixed resources routing
asvetlov Nov 17, 2024
f56a12a
address review
asvetlov Nov 17, 2024
58a605e
Merge branch 'master' into hyperscan
asvetlov Nov 17, 2024
e14bad9
tune
asvetlov Nov 17, 2024
a622920
Merge branch 'master' into hyperscan
asvetlov Nov 17, 2024
3563173
comment
asvetlov Nov 17, 2024
00e6331
Fix prefix resource lookup
asvetlov Nov 18, 2024
55a0c9d
Alternative strategy for prefix resource matching
asvetlov Nov 18, 2024
a8bed53
relax deps
asvetlov Nov 18, 2024
2ac1eca
fix
asvetlov Nov 18, 2024
a718f6e
fix
asvetlov Nov 18, 2024
c82a824
Merge branch 'master' into hyperscan
asvetlov Nov 18, 2024
e1cbdf0
Merge branch 'master' into hyperscan
asvetlov Nov 19, 2024
a87dc32
Merge branch 'master' into hyperscan
asvetlov Nov 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ ignore_missing_imports = True

[mypy-gunicorn.*]
ignore_missing_imports = True

[mypy-hyperscan]
ignore_missing_imports = True
asvetlov marked this conversation as resolved.
Show resolved Hide resolved
145 changes: 126 additions & 19 deletions aiohttp/web_urldispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from .abc import AbstractMatchInfo, AbstractRouter, AbstractView
from .helpers import DEBUG
from .http import HttpVersion11
from .log import web_logger
from .typedefs import Handler, PathLike
from .web_exceptions import (
HTTPException,
Expand Down Expand Up @@ -68,6 +69,14 @@
)


try:
import hyperscan

HAS_HYPERSCAN = True
except ImportError:
HAS_HYPERSCAN = False


if TYPE_CHECKING:
from .web_app import Application

Expand Down Expand Up @@ -735,6 +744,7 @@
def add_prefix(self, prefix: str) -> None:
super().add_prefix(prefix)
self._add_prefix_to_resources(prefix)
self._app.router._rebuild()

def _add_prefix_to_resources(self, prefix: str) -> None:
router = self._app.router
Expand Down Expand Up @@ -998,27 +1008,75 @@
self._named_resources: Dict[str, AbstractResource] = {}
self._resource_index: dict[str, list[AbstractResource]] = {}
self._matched_sub_app_resources: List[MatchedSubAppResource] = []
self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported]
self._plain_resources: dict[str, PlainResource] = {}
self._prefix_resources: list[tuple[str, PrefixResource]] = []

def _on_match(
self, id_: int, from_: int, to: int, flags: int, found: list[int]
) -> Optional[bool]:
found.append(id_)
return None

async def resolve(self, request: Request) -> UrlMappingMatchInfo:
resource_index = self._resource_index
allowed_methods: Set[str] = set()

# Walk the url parts looking for candidates. We walk the url backwards
# to ensure the most explicit match is found first. If there are multiple
# candidates for a given url part because there are multiple resources
# registered for the same canonical path, we resolve them in a linear
# fashion to ensure registration order is respected.
url_part = request.rel_url.path_safe
while url_part:
for candidate in resource_index.get(url_part, ()):
match_dict, allowed = await candidate.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed
if url_part == "/":
break
url_part = url_part.rpartition("/")[0] or "/"
allowed_methods: set[str] = set()
path = request.rel_url.path_safe

if (plain_resource := self._plain_resources.get(path)) is not None:
match_dict, allowed = await plain_resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed

for prefix, prefix_resource in self._prefix_resources:
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
match_dict, allowed = await prefix_resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed

if self._hyperdb is not None:
found: list[int] = []
resources = self._resources

self._hyperdb.scan(
path.encode("utf8"), match_event_handler=self._on_match, context=found
)
if found:
asvetlov marked this conversation as resolved.
Show resolved Hide resolved
if len(found) > 1:
# Multiple matches are found,
# use the FIRST match.
# Match ids are basically indexes in self._resources
# with an offset for variable resources
found.sort()

Check warning on line 1052 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1052

Added line #L1052 was not covered by tests

for idx in found:
resource = resources[idx]
match_dict, allowed = await resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed

Check warning on line 1060 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1060

Added line #L1060 was not covered by tests
else:
url_part = path
resource_index = self._resource_index

# Walk the url parts looking for candidates. We walk the url backwards
# to ensure the most explicit match is found first. If there are multiple
# candidates for a given url part because there are multiple resources
# registered for the same canonical path, we resolve them in a linear
# fashion to ensure registration order is respected.
while url_part:
for candidate in resource_index.get(url_part, ()):
match_dict, allowed = await candidate.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed
if url_part == "/":
break
url_part = url_part.rpartition("/")[0] or "/"

#
# We didn't find any candidates, so we'll try the matched sub-app
Expand Down Expand Up @@ -1239,6 +1297,55 @@
super().freeze()
for resource in self._resources:
resource.freeze()
self._rebuild()

def _rebuild(self) -> None:
self._hyperdb = None
self._plain_resources.clear()
del self._prefix_resources[:]
patterns: list[bytes] = []
ids: list[int] = []
for id_, resource in enumerate(self._resources):
if isinstance(resource, PlainResource):
self._plain_resources[resource.get_info()["path"]] = resource
continue
elif isinstance(resource, DynamicResource):
pattern = resource.get_info()["pattern"].pattern
elif isinstance(resource, PrefixResource):
if isinstance(resource, MatchedSubAppResource):
# wildcard resources doesn't fit hyperscan table
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
continue

Check warning on line 1317 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1317

Added line #L1317 was not covered by tests
self._prefix_resources.append((resource.get_info()["prefix"], resource))
continue
else:
raise RuntimeError(f"Unsupported resource type {type(resource)}")

Check warning on line 1321 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1321

Added line #L1321 was not covered by tests

patterns.append(f"^{pattern}$".encode())
ids.append(id_)

if not HAS_HYPERSCAN:
return

count = len(patterns)
self._hyperdb = hyperscan.Database()
try:
self._hyperdb.compile(
expressions=patterns,
ids=ids,
elements=count,
flags=[
hyperscan.HS_FLAG_UTF8
| hyperscan.HS_FLAG_UCP
| hyperscan.HS_FLAG_SINGLEMATCH
]
* count,
)
except hyperscan.error as exc:
web_logger.warning(
"Cannot compile hyperscan database: %s, switching to fallback url resolver",
repr(exc),
)
self._hyperdb = None

def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]:
"""Append routes to route table.
Expand Down
1 change: 1 addition & 0 deletions requirements/runtime-deps.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ async-timeout >= 4.0, < 6.0 ; python_version < "3.11"
Brotli; platform_python_implementation == 'CPython'
brotlicffi; platform_python_implementation != 'CPython'
frozenlist >= 1.1.1
hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14")
multidict >=4.5, < 7.0
propcache >= 0.2.0
yarl >= 1.17.0, < 2.0
2 changes: 2 additions & 0 deletions requirements/runtime-deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ frozenlist==1.5.0
# via
# -r requirements/runtime-deps.in
# aiosignal
hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14")
# via -r requirements/runtime-deps.in
idna==3.6
# via yarl
multidict==6.1.0
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ speedups =
aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin"
Brotli; platform_python_implementation == 'CPython'
brotlicffi; platform_python_implementation != 'CPython'
hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14")
asvetlov marked this conversation as resolved.
Show resolved Hide resolved

[options.packages.find]
exclude =
Expand Down
Loading