From 2f35bc0e4a12f79911f2a55b209459c45471a207 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 19 Dec 2020 16:55:00 -0500 Subject: [PATCH 01/24] Initial WIP country blacklist structure. --- web/security/waf.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/web/security/waf.py b/web/security/waf.py index c3134ab..af8fc64 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -222,3 +222,33 @@ def __init__(self) -> None: 'admin', 'mysql', 'phpMyAdmin', 'pma', 'dbadmin', 'MyAdmin', 'phppgadmin', # Common administrative access. 'crossdomain.xml', 'README', 'LICENSE', 'webdav', re(r'w00tw00t'), # Generic probes. ) + + +class GeoCountryHeuristic(WAFHeuristic): + """A rule which preemptively blocks attempted access from specific countries of origin. + + Example usage: + + GeoCountryHeuristic( + 'cn', 'kp', # China, take that, "Great Firewall", and North Korea. + 'ae', 'ir', 'iq', 'sa', # Middle-eastern states. + 'by', 'ru', 'ua', # Russia and nearby former states. + 'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states. + ) + """ + + countries: Set[str] # The set of blocked ISO 3166 country codes. + + def __init__(self, *countries:str) -> None: + assert check_argument_types() + + self.countries = set(countries) + + def __repr__(self, *extra:str) -> str: + countries = "'" + "', '".join(sorted(self.countries)) + "'" + return super().__repr__(countries, *extra) + + def __call__(self, environ:dict, uri:URI) -> None: + assert check_argument_types() + + ... From f231f041fb18146b4ccf941dd05d385f6f33fd0b Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 19 Dec 2020 18:07:24 -0500 Subject: [PATCH 02/24] Add request de-serialization protections. --- web/ext/waf.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index 650cb63..90c2d90 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -105,6 +105,14 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): return inner + def prepare(self, context: Context) -> None: + """Armor the base extension against maliciously formed requests.""" + + try: + Request(context.environ) # Requests are singletons, so BaseExtension later won't do more work. + except Exception as e: + raise HTTPClose(f"Encountered error de-serializing the request: {e!r}") + def start(self, context: Context) -> None: """Executed during application startup just after binding the server. @@ -113,7 +121,7 @@ def start(self, context: Context) -> None: Any of the actions you wanted to perform during `__init__` you should do here. """ ... - + def stop(self, context: Context) -> None: """Executed during application shutdown after the last request has been served. From 1647e14d1ce470c35b366028685eac6769dd8e39 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 19 Dec 2020 18:36:40 -0500 Subject: [PATCH 03/24] Improved hinting, now storing packed IPs. --- web/ext/waf.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index 90c2d90..dcda3af 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -12,11 +12,14 @@ from html import escape from re import compile as re +from socket import inet_aton from typeguard import check_argument_types from uri import URI +from webob import Request -from web.core.typing import Any, Dict, Union, Callable, ClassVar, Path, Set, Pattern, Iterable, MutableSet, Optional +from web.core.typing import Any, Union, Callable, ClassVar, Iterable, Optional +from web.core.typing import Dict, Path, Set, Pattern, MutableSet from web.core.typing import Context, WSGI, WSGIEnvironment, WSGIStartResponse, Request, Response, Tags from web.core.context import Context from web.security.waf import WAFHeuristic @@ -56,8 +59,12 @@ def __init__(self, *heuristics, blacklist:Optional[ClientSet]=None, exempt:Optio super().__init__() self.heuristics = heuristics - self.blacklist = set() if blacklist is None else blacklist # Permit custom backing stores to be passed in. - self.exempt = set() if exempt is None else exempt # Permit custom backing stores to be passed in. + + # Permit custom backing stores to be passed in; we optimize by storing packed binary values, not strings. + self.blacklist = set() if blacklist is None else set(inet_aton(i) for i in blacklist) + + # Permit custom backing stores to be passed in. + self.exempt = set() if exempt is None else exempt def __call__(self, context:Context, app:WSGI) -> WSGI: """Wrap the WSGI application callable in our 'web application firewall'.""" @@ -67,7 +74,11 @@ def __call__(self, context:Context, app:WSGI) -> WSGI: def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): # Identify the remote user. - request: Request = Request(environ) + try: + request: Request = Request(environ) + except Exception as e: # Protect against de-serialization errors. + return HTTPClose(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) + uri: URI = URI(request.url) # https://docs.pylonsproject.org/projects/webob/en/stable/api/request.html#webob.request.BaseRequest.client_addr @@ -76,7 +87,7 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): try: # Immediately reject known bad actors. - if request.client_addr in self.blacklist: + if inet_aton(request.client_addr) in self.blacklist: return HTTPClose()(environ, start_response) # No need to re-blacklist. # Validate the heuristic rules. @@ -88,13 +99,13 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): raise # Invoke the wrapped application if everything seems OK. Note that this pattern of wrapping permits - # your application to raise HTTPClose if wishing to blacklist the active connection. + # your application to raise HTTPClose if wishing to blacklist the active connection for any reason. return app(environ, start_response) except HTTPClose as e: if request.client_addr not in self.exempt: log.warning(f"Blacklisting: {request.client_addr}") - self.blacklist.add(request.client_addr) + self.blacklist.add(inet_aton(request.client_addr)) if not __debug__: e = HTTPClose() # Do not disclose the reason in production environments. elif ': ' in e.args[0]: # XXX: Not currently effective. @@ -105,14 +116,6 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): return inner - def prepare(self, context: Context) -> None: - """Armor the base extension against maliciously formed requests.""" - - try: - Request(context.environ) # Requests are singletons, so BaseExtension later won't do more work. - except Exception as e: - raise HTTPClose(f"Encountered error de-serializing the request: {e!r}") - def start(self, context: Context) -> None: """Executed during application startup just after binding the server. @@ -138,14 +141,14 @@ def graceful(self, context: Context, **config) -> None: """ ... - def status(self, context: Context) -> None: + def status(self, context: Context) -> Generator[str, None, None]: """Report on the current status of the Web Application Firewall.""" def plural(quantity, single, plural): return single if quantity == 1 else plural c = len(self.heuristics) - yield f"**Rules:** {c} {plural(c, 'entry', 'entries')}" + yield f"Rules: {c} {plural(c, 'entry', 'entries')}" c = len(self.blacklist) - yield f"**Blacklist:** {c} {plural(c, 'entry', 'entries')}" + yield f"Blacklist: {c} {plural(c, 'entry', 'entries')}" From 643b9b7feb0a0ab1e5fa79012b17c807eebf0b6a Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 19 Dec 2020 19:07:31 -0500 Subject: [PATCH 04/24] Correct missing import, pre-trigger cached de-serialization operations which may fail. --- web/ext/waf.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index dcda3af..5f06767 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -18,7 +18,7 @@ from uri import URI from webob import Request -from web.core.typing import Any, Union, Callable, ClassVar, Iterable, Optional +from web.core.typing import Any, Union, Callable, ClassVar, Generator, Iterable, Optional from web.core.typing import Dict, Path, Set, Pattern, MutableSet from web.core.typing import Context, WSGI, WSGIEnvironment, WSGIStartResponse, Request, Response, Tags from web.core.context import Context @@ -42,6 +42,8 @@ class WebApplicationFirewallExtension: first:ClassVar[bool] = True # Always try to be first: if truthy, become a dependency for all non-first extensions. extensions:ClassVar[Tags] = {'waf.rule'} # A set of entry_point namespaces to search for related plugin registrations. + uses:ClassVar[Tags] = {'timing.prefix'} # We want our execution time to be counted. + heuristics:Iterable[WAFHeuristic] # The prepared heuristic instances. blacklist:ClientSet # The current blacklist. Can theoretically be swapped for any mutable set-like object. exempt:ClientSet # IP addresses exempt from blacklisting. @@ -75,7 +77,11 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): # Identify the remote user. try: - request: Request = Request(environ) + # While these operations "front-load" the processing of these aspects of the request, they are cached, + # with the request itself acting as a singleton by storing itself within the WSGI environment. + request: Request = Request(environ) # This will be remembered and re-used as a singleton later. + request.GET # "Force" de-serialization of query string parameters. + request.POST # "Force" de-serialization of form-encoded request bodies, when applicable. except Exception as e: # Protect against de-serialization errors. return HTTPClose(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) From b17797899d1b2841eecc3ee81315436f19930bb2 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:10:26 -0500 Subject: [PATCH 05/24] Additional optional installation USE flags. --- README.rst | 21 +++++++++++++++++++++ setup.py | 7 ++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index d115197..d18a2c5 100644 --- a/README.rst +++ b/README.rst @@ -68,6 +68,27 @@ and submit a pull request. This process is beyond the scope of this documentati `GitHub's documentation `_. +Installation "Use" Flags +------------------------ + +Several `extras_require` dependencies are declared, for bundled installation of tools required for additional features +that are not required for basic usage. To utilize these flags, on any reference to the project or on-disk project +location when executing `pip install`, add the flags comma-separated within square brackets after the name or path: + + pip install -U -e '.[development,geographic]' + +Quoting will be required in most shells, as square brackets would ordinarily be "expanded". + +* `development` — Install a standard suite of development-time support packages, testing framework, and testing components. + +* `ecdsa` — Require an efficient ECDSA implementation for use of Elliptic Curve signing operations. + +* `geo` — This project utilizes IP2Location LITE data available from http://www.ip2location.com to blacklist users by + country of origin. Enabling this flag will install the official `IP2Location` library, however the actual dataset + will need to be downloaded separately. + + + Version History =============== diff --git a/setup.py b/setup.py index a259685..cd38905 100755 --- a/setup.py +++ b/setup.py @@ -80,9 +80,10 @@ ], extras_require = dict( - development = tests_require + ['pre-commit'], - ecdsa = ['ecdsa'], - fastecdsa = ['fastecdsa>=1.0.3'], + development = tests_require + ['pre-commit', 'bandit', 'e', 'pudb', 'ptipython'], + ecdsa = ['fastecdsa>=1.0.3'], + fastecdsa = ['fastecdsa>=1.0.3'], # Deprecated reference. + geo = ['IP2Location'], ), tests_require = tests_require, From 48d6dcab6f1a26ad0a4042af2d6acd8f97079fff Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:10:50 -0500 Subject: [PATCH 06/24] Document addition of WAF extension. --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index d18a2c5..3248940 100644 --- a/README.rst +++ b/README.rst @@ -99,6 +99,9 @@ Version 3.0 * **Removed Python 2 support and version specific code.** The project has been updated to modern Python packaging standards, including modern namespace use. Modern namespaces are wholly incompatible with the previous namespacing mechanism; this project can not be simultaneously installed with any Marrow project that is Python 2 compatible. +* **Added Web Application Firewall extension.** To protect your application against passive scanning attempts, access of tools for a programming language that are absolutely not present (i.e. PHP, ColdFusion, Adobe Flex, …), malicious probes, and even to restrict access by geographic location. + + Version 2.0 ----------- From dfc006904ac79aabc235f7aa398715ca4617c548 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:11:29 -0500 Subject: [PATCH 07/24] Add API definition for persistent blacklists. --- web/ext/waf.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index 5f06767..f9d7537 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -29,7 +29,18 @@ log = __import__('logging').getLogger(__name__) # A standard logger object. -ClientSet = MutableSet[str] +ClientSet = MutableSet[bytes] + +class PersistentClientSet(ClientSet, metaclass=ABCMeta): + """A mutable set exposing two methods for persisting and restoring its contents.""" + + @abstractmethod + def persist(self, context:Context) -> None: + ... + + @abstractmethod + def restore(self, context:Context) -> None: + ... class WebApplicationFirewallExtension: From 5644df9426d71480c821e1ad0f497f73d6a7f012 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:12:30 -0500 Subject: [PATCH 08/24] Permit persistence of the blacklist and exemptions. --- web/ext/waf.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index f9d7537..a4a34a0 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -76,7 +76,7 @@ def __init__(self, *heuristics, blacklist:Optional[ClientSet]=None, exempt:Optio # Permit custom backing stores to be passed in; we optimize by storing packed binary values, not strings. self.blacklist = set() if blacklist is None else set(inet_aton(i) for i in blacklist) - # Permit custom backing stores to be passed in. + # Permit custom backing stores to be passed in for the exemptions, as well. self.exempt = set() if exempt is None else exempt def __call__(self, context:Context, app:WSGI) -> WSGI: @@ -96,7 +96,8 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): except Exception as e: # Protect against de-serialization errors. return HTTPClose(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) - uri: URI = URI(request.url) + except Exception as e: # Protect against de-serialization errors. + return HTTPBadRequest(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) # https://docs.pylonsproject.org/projects/webob/en/stable/api/request.html#webob.request.BaseRequest.client_addr # Ref: https://www.nginx.com/resources/wiki/start/topics/examples/forwarded/ @@ -140,23 +141,32 @@ def start(self, context: Context) -> None: Any of the actions you wanted to perform during `__init__` you should do here. """ - ... + + # Permit the storage objects to resume from a saved state. + if hasattr(self.blacklist, 'restore'): self.blacklist.restore(context) + if hasattr(self.exempt, 'restore'): self.exempt.restore(context) def stop(self, context: Context) -> None: """Executed during application shutdown after the last request has been served. The first argument is the global context class, not request-local context instance. """ - ... + + # As per startup, permit the storage objects to persist their state. + if hasattr(self.blacklist, 'persist'): self.blacklist.persist(context) + if hasattr(self.exempt, 'persist'): self.exempt.persist(context) - def graceful(self, context: Context, **config) -> None: + def graceful(self, context: Context) -> None: """Called when a SIGHUP is sent to the application. The first argument is the global context class, not request-local context instance. Allows your code to re-load configuration and your code should close then re-open sockets and files. """ - ... + + # Ask the storage object to persist its state, if able. + if hasattr(self.blacklist, 'persist'): self.blacklist.persist(context) + if hasattr(self.exempt, 'persist'): self.exempt.persist(context) def status(self, context: Context) -> Generator[str, None, None]: """Report on the current status of the Web Application Firewall.""" From c5f3a993e39ef203bcdf55b7dcd7a14551910454 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:15:56 -0500 Subject: [PATCH 09/24] Imports, docstring. --- web/ext/waf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index a4a34a0..cd78e11 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -10,6 +10,7 @@ * https://www.cloudflare.com/en-ca/waf/ """ +from abc import ABCMeta, abstractmethod from html import escape from re import compile as re from socket import inet_aton @@ -17,6 +18,7 @@ from typeguard import check_argument_types from uri import URI from webob import Request +from webob.exc import HTTPBadRequest from web.core.typing import Any, Union, Callable, ClassVar, Generator, Iterable, Optional from web.core.typing import Dict, Path, Set, Pattern, MutableSet @@ -44,10 +46,7 @@ def restore(self, context:Context) -> None: class WebApplicationFirewallExtension: - """A basic rules-based Web Application Firewall implementation. - - WIP. - """ + """A basic rules-based Web Application Firewall implementation.""" provides:ClassVar[Tags] = {'waf'} # A set of keywords usable in `uses` and `needs` declarations. first:ClassVar[bool] = True # Always try to be first: if truthy, become a dependency for all non-first extensions. From 5c1ffcca6c65105dbe9f94bb4a98044833592fb3 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:16:34 -0500 Subject: [PATCH 10/24] Deserialization errors are better handled in core at time of collect callback execution. --- web/ext/waf.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index cd78e11..c3471c3 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -84,16 +84,9 @@ def __call__(self, context:Context, app:WSGI) -> WSGI: assert check_argument_types() def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): - # Identify the remote user. - try: - # While these operations "front-load" the processing of these aspects of the request, they are cached, - # with the request itself acting as a singleton by storing itself within the WSGI environment. request: Request = Request(environ) # This will be remembered and re-used as a singleton later. - request.GET # "Force" de-serialization of query string parameters. - request.POST # "Force" de-serialization of form-encoded request bodies, when applicable. - except Exception as e: # Protect against de-serialization errors. - return HTTPClose(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) + uri: URI = URI(request.url) except Exception as e: # Protect against de-serialization errors. return HTTPBadRequest(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) From 309e4501b61d59e0730336a3dc4c4bf62654020d Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 19 Dec 2020 16:55:00 -0500 Subject: [PATCH 11/24] Initial WIP country blacklist structure. --- web/security/waf.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/web/security/waf.py b/web/security/waf.py index c3134ab..af8fc64 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -222,3 +222,33 @@ def __init__(self) -> None: 'admin', 'mysql', 'phpMyAdmin', 'pma', 'dbadmin', 'MyAdmin', 'phppgadmin', # Common administrative access. 'crossdomain.xml', 'README', 'LICENSE', 'webdav', re(r'w00tw00t'), # Generic probes. ) + + +class GeoCountryHeuristic(WAFHeuristic): + """A rule which preemptively blocks attempted access from specific countries of origin. + + Example usage: + + GeoCountryHeuristic( + 'cn', 'kp', # China, take that, "Great Firewall", and North Korea. + 'ae', 'ir', 'iq', 'sa', # Middle-eastern states. + 'by', 'ru', 'ua', # Russia and nearby former states. + 'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states. + ) + """ + + countries: Set[str] # The set of blocked ISO 3166 country codes. + + def __init__(self, *countries:str) -> None: + assert check_argument_types() + + self.countries = set(countries) + + def __repr__(self, *extra:str) -> str: + countries = "'" + "', '".join(sorted(self.countries)) + "'" + return super().__repr__(countries, *extra) + + def __call__(self, environ:dict, uri:URI) -> None: + assert check_argument_types() + + ... From 5ffe0f442a6f00375e11940ff967aabe8f37308d Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 19 Dec 2020 18:07:37 -0500 Subject: [PATCH 12/24] Sorting of example countries. --- web/security/waf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/security/waf.py b/web/security/waf.py index af8fc64..ad31bd8 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -231,8 +231,8 @@ class GeoCountryHeuristic(WAFHeuristic): GeoCountryHeuristic( 'cn', 'kp', # China, take that, "Great Firewall", and North Korea. - 'ae', 'ir', 'iq', 'sa', # Middle-eastern states. - 'by', 'ru', 'ua', # Russia and nearby former states. + 'ae', 'ir', 'iq', 'sa', # Middle-eastern nations. + 'by', 'ru', 'ua', # Russia and nearby former bloc states. 'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states. ) """ From 7b9e49f1c2bb2cd32a6ae2a74057be0a5095f32f Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:33:10 -0500 Subject: [PATCH 13/24] Register the WAF heuristics as plugins. --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index cd38905..615798c 100755 --- a/setup.py +++ b/setup.py @@ -102,5 +102,13 @@ 'matches = web.security.predicate:ContextMatch', 'contains = web.security.predicate:ContextContains', ], + 'web.security.heuristic': [ + 'dns = web.security.waf:ClientDNSHeuristic', + 'path = web.security.waf:PathHeuristic', + 'php = web.security.waf:PHPHeuristic', + 'wordpress = web.security.waf:WordpressHeuristic', + 'hosting = web.security.waf:HostingCombinedHeuristic', + 'country = web.security.waf:GeoCountryHeuristic', + ] }, ) From 6dd43d5eac2aa8d169c755476552634abd8cf74e Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 09:45:13 -0500 Subject: [PATCH 14/24] Use correct code, we are not a client ourselves. --- web/security/exc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/security/exc.py b/web/security/exc.py index 1606ab0..7611349 100644 --- a/web/security/exc.py +++ b/web/security/exc.py @@ -4,6 +4,6 @@ class HTTPClose(HTTPClientError): """Indicate to the front-end load balancer (FELB) that it should hang up on the client.""" - code = 499 - title = "Client Closed Request" + code = 444 + title = "Connection Closed Without Response" explanation = "The server did not accept your request." From 71c27b88f69c530da7f922dbeb35a9068f6162d2 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 11:29:27 -0500 Subject: [PATCH 15/24] Pass client IP down to heuristics, prime query string arguments. --- web/ext/waf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index c3471c3..2fb95cd 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -48,12 +48,11 @@ def restore(self, context:Context) -> None: class WebApplicationFirewallExtension: """A basic rules-based Web Application Firewall implementation.""" + uses:ClassVar[Tags] = {'timing.prefix'} # We want our execution time to be counted. provides:ClassVar[Tags] = {'waf'} # A set of keywords usable in `uses` and `needs` declarations. first:ClassVar[bool] = True # Always try to be first: if truthy, become a dependency for all non-first extensions. extensions:ClassVar[Tags] = {'waf.rule'} # A set of entry_point namespaces to search for related plugin registrations. - uses:ClassVar[Tags] = {'timing.prefix'} # We want our execution time to be counted. - heuristics:Iterable[WAFHeuristic] # The prepared heuristic instances. blacklist:ClientSet # The current blacklist. Can theoretically be swapped for any mutable set-like object. exempt:ClientSet # IP addresses exempt from blacklisting. @@ -87,6 +86,7 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): try: request: Request = Request(environ) # This will be remembered and re-used as a singleton later. uri: URI = URI(request.url) + request.GET # As will this "attempt to access query string parameters", malformation detection. except Exception as e: # Protect against de-serialization errors. return HTTPBadRequest(f"Encountered error de-serializing the request: {e!r}")(environ, start_response) @@ -103,7 +103,7 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse): # Validate the heuristic rules. for heuristic in self.heuristics: try: - heuristic(environ, uri) + heuristic(environ, uri, client) except HTTPClose as e: log.error(f"{heuristic} {e.args[0].lower()}") raise From 86592c0f1bdb88d5d78272eec533dc3d8af92cc5 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 11:38:24 -0500 Subject: [PATCH 16/24] Heuristics are now passed the client IP. --- web/security/waf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/security/waf.py b/web/security/waf.py index ad31bd8..6fee90a 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -13,7 +13,7 @@ class WAFHeuristic: - def __call__(self, environ:WSGIEnvironment, uri:URI) -> Optional[bool]: + def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]: """Perform the heuristic check. May return True to indicate processing should stop, raise an HTTPException to propagate to the client, or may @@ -78,7 +78,7 @@ def __repr__(self, *extra:str) -> str: *extra ) - def __call__(self, environ:WSGIEnvironment, uri:URI) -> Optional[bool]: + def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]: assert check_argument_types() addr:str = environ.get(self.origin, '') # Attempt to retrieve the client IP from the WSGI environment. @@ -170,7 +170,7 @@ def __repr__(self, *extra:str) -> str: *extra ) - def __call__(self, environ:dict, uri:URI) -> None: + def __call__(self, environ:dict, uri:URI, client:str) -> None: assert check_argument_types() if self.forbidden & set(uri.path.parts): # This is ~a third faster than the simplest regex use. @@ -248,7 +248,7 @@ def __repr__(self, *extra:str) -> str: countries = "'" + "', '".join(sorted(self.countries)) + "'" return super().__repr__(countries, *extra) - def __call__(self, environ:dict, uri:URI) -> None: + def __call__(self, environ:dict, uri:URI, client:str) -> None: assert check_argument_types() ... From 7b5c734cafb621f9b3497c4c24cf5226feb533b6 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 11:39:18 -0500 Subject: [PATCH 17/24] IP2Location utilization. --- web/security/waf.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/web/security/waf.py b/web/security/waf.py index 6fee90a..70f35cd 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -11,6 +11,11 @@ from .util import DNS from .exc import HTTPClose +try: + from IP2Location import IP2Location +except ImportError: + IP2Location = None + class WAFHeuristic: def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]: @@ -238,11 +243,18 @@ class GeoCountryHeuristic(WAFHeuristic): """ countries: Set[str] # The set of blocked ISO 3166 country codes. + resolver: IP2Location - def __init__(self, *countries:str) -> None: + def __init__(self, *countries:str, db:str='IP2LOCATION-LITE-DB1.IPV6.BIN') -> None: + """Initialize the country heuristic's geographic database and blacklist.""" + assert check_argument_types() - self.countries = set(countries) + if IP2Location is None: + raise ImportError("You must have the IP2Location library installed.") + + self.countries = {i.upper() for i in countries} + self.resolver = IP2Location(db) def __repr__(self, *extra:str) -> str: countries = "'" + "', '".join(sorted(self.countries)) + "'" From 72d19e1c96e395cbb903b61731eb404dfa9089d1 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 11:39:41 -0500 Subject: [PATCH 18/24] Ban-by-country implementation. --- web/security/waf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/web/security/waf.py b/web/security/waf.py index 70f35cd..43105af 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -263,4 +263,7 @@ def __repr__(self, *extra:str) -> str: def __call__(self, environ:dict, uri:URI, client:str) -> None: assert check_argument_types() - ... + rec = self.resolver.get_country_short(client) + + if rec in self.countries: + raise HTTPClose(f"Access from {self.resolver.get_country_long(client)} forbidden.") From 7460ee3d313f73a3fb6358f8987208e5827f2cd5 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 11:40:46 -0500 Subject: [PATCH 19/24] Hosting combined heuristic default extensions. --- web/security/waf.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/web/security/waf.py b/web/security/waf.py index 43105af..b3a487b 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -214,12 +214,30 @@ def __init__(self) -> None: class HostingCombinedHeuristic(PathHeuristic): """A combined set of suspicious URI fragments and general patterns matching commonly exploited tools. - This is the result of casually browsing through around ten years of error logs on an active hosting service. + This is the result of casually browsing through around ten years of error logs on an active hosting service and + combines a number of the other PathHeuristic rules into one for convenience. (The WAF already optimizes these down + into a single regex for runtime checking; this is an import optimization.) + + Several filename extensions which ought to be delivered by a front-end load balancer are included in this list; + DO NOT INCLUDE THIS HEURISTIC AT DEVELOPMENT TIME if you are delivering static content via an endpoint within your + application. A critical message will be emitted if used at development time. """ - def __init__(self) -> None: + def __init__(self, *extensions:str) -> None: + """Prepare a 'combined hosting experience' heuristic. + + You can pass in additional extensions to block beyond the basic set included as stringy regular expression + fragments via positional arguments. + """ + + if __debug__: + log.critical("Use of this heuristic if delivering statics from the application at development time will" \ + "likely blacklist you.") + + extensions = set(extensions) | {'html?', 'phps?', 'py', 'js', 'css', 'swf', 'txt', 'md'} + super().__init__( - re(r'\.(html?|swf|phps?)($|/)'), # Bare HTML files, Adobe Flash, or PHP. + re(r'\.(' + '|'.join(sorted(extensions)) + r')($|/)'), # Forbidden filename extensions. re(r'((web)?mail)|(round|cube|roundcube)((web)?mail)?2?(-[0-9\.]+)?'), # Webmail service, in general. 'wm', 'rc', 'rms', 'mss', 'mss2', # More common webmail containers. 'FlexDataServices', 'amfphp', 'soapCaller.bs', # Adobe Flex AMF and RPC services. From 5a8ba9733d81e168d5f08ad0a691f1068b5c8242 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 20 Dec 2020 11:52:36 -0500 Subject: [PATCH 20/24] Who needs a temporary variable? --- web/security/waf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/web/security/waf.py b/web/security/waf.py index b3a487b..2ccc7c7 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -281,7 +281,5 @@ def __repr__(self, *extra:str) -> str: def __call__(self, environ:dict, uri:URI, client:str) -> None: assert check_argument_types() - rec = self.resolver.get_country_short(client) - - if rec in self.countries: + if self.resolver.get_country_short(client) in self.countries: raise HTTPClose(f"Access from {self.resolver.get_country_long(client)} forbidden.") From 182dbe637bcaa70a0538921bf6727cad80d5d68a Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 26 Dec 2020 23:21:35 -0500 Subject: [PATCH 21/24] Additional example country, short and long name for logs. --- web/security/waf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/security/waf.py b/web/security/waf.py index 2ccc7c7..807b576 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -254,7 +254,7 @@ class GeoCountryHeuristic(WAFHeuristic): GeoCountryHeuristic( 'cn', 'kp', # China, take that, "Great Firewall", and North Korea. - 'ae', 'ir', 'iq', 'sa', # Middle-eastern nations. + 'ae', 'ir', 'iq', 'sa', 'tr', # Middle-eastern nations. 'by', 'ru', 'ua', # Russia and nearby former bloc states. 'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states. ) @@ -281,5 +281,5 @@ def __repr__(self, *extra:str) -> str: def __call__(self, environ:dict, uri:URI, client:str) -> None: assert check_argument_types() - if self.resolver.get_country_short(client) in self.countries: - raise HTTPClose(f"Access from {self.resolver.get_country_long(client)} forbidden.") + if (short := self.resolver.get_country_short(client)) in self.countries: + raise HTTPClose(f"Access from {short} ({self.resolver.get_country_long(client)}) forbidden.") From 011cbe3b36a6cb755baef03910096c1cb74f1307 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sat, 26 Dec 2020 23:21:51 -0500 Subject: [PATCH 22/24] Adjustments to logging levels and extras. --- web/ext/acl.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/web/ext/acl.py b/web/ext/acl.py index ab2ef46..b09886b 100644 --- a/web/ext/acl.py +++ b/web/ext/acl.py @@ -249,7 +249,7 @@ def __init__(self, *_policy, default=None, policy=None): def prepare(self, context): """Called to prepare the request context by adding an `acl` attribute.""" - if __debug__: log.debug("Populating request context with ACL.", extra=dict(request=id(context))) + if __debug__: log.trace("Populating request context with ACL.", extra=context.extra) context.acl = ACL(context=context, policy=self.policy) @@ -262,24 +262,24 @@ def dispatch(self, context, crumb): acl = getattr(crumb.handler, '__acl__', ()) inherit = getattr(crumb.handler, '__acl_inherit__', True) - if __debug__: log.debug(f"Handling dispatch event: {crumb.handler!r} {acl!r}", extra=dict( - request = id(context), - consumed = crumb.path, - handler = safe_name(crumb.handler), - endpoint = crumb.endpoint, - acl = [repr(i) for i in acl], - inherit = inherit, - )) + if __debug__: log.trace(f"Handling dispatch event: {crumb.handler!r} {acl!r}", extra={ + 'consumed': crumb.path, + 'handler': safe_name(crumb.handler), + 'endpoint': crumb.endpoint, + 'acl': [repr(i) for i in acl], + 'inherit': inherit, + **context.extra + }) if not inherit: - if __debug__: log.info("Clearing collected access control list.") + if __debug__: log.warn("Clearing collected access control list.") del context.acl[:] context.acl.extend((Path(context.request.path), i, handler) for i in acl) def collect(self, context, handler, args, kw): if not context.acl: - if __debug__: log.debug("Skipping validation of empty ACL.", extra=dict(request=id(context))) + if __debug__: log.debug("Skipping validation of empty ACL.", extra=context.extra) return grant = context.acl.is_authorized From a2807c05d14e01c11174691e19d0fba5e8b3a622 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 18 Feb 2021 14:01:13 -0500 Subject: [PATCH 23/24] Blacklist serialization and deserialization. --- web/ext/waf.py | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/web/ext/waf.py b/web/ext/waf.py index 2fb95cd..e6081c8 100644 --- a/web/ext/waf.py +++ b/web/ext/waf.py @@ -12,6 +12,7 @@ from abc import ABCMeta, abstractmethod from html import escape +from pathlib import Path from re import compile as re from socket import inet_aton @@ -34,15 +35,46 @@ ClientSet = MutableSet[bytes] class PersistentClientSet(ClientSet, metaclass=ABCMeta): - """A mutable set exposing two methods for persisting and restoring its contents.""" + """An ABC describing a mutable set that exposes methods for persisting and restoring its contents.""" @abstractmethod def persist(self, context:Context) -> None: - ... + """Persist the state of the set. + + It is up to the individual implementation to decide how to do this. Typically this would involve serialization + on-disk or the use of some form of data store, such as SQLite, PostgreSQL, or MongoDB. + """ + + raise NotImplementedError() @abstractmethod def restore(self, context:Context) -> None: - ... + """Restore the state of the set. + + It is up to the individual implementation to decide how to do this. Typically this involves deserialization + from disk or the use of some form of data store, such as SQLite, PostgreSQL, or MongoDB. + """ + + raise NotImplementedError() + + +class LineSerializedSet(set, PersistentClientSet): + location:Path # The target path to read and write data from/to. + + def __init__(self, *args, location:Union[str,Path]): + self.location = Path(location) + + def persist(self, context:Context) -> None: + with self.location.open('w') as fh: + for element in sorted(self): + fh.write(str(element) + "\n") + + def restore(self, context:Context) -> None: + self.clear() + + with self.location.open('r') as fh: + for line in fh.readlines(): + self.add(int(line.strip())) class WebApplicationFirewallExtension: @@ -72,7 +104,7 @@ def __init__(self, *heuristics, blacklist:Optional[ClientSet]=None, exempt:Optio self.heuristics = heuristics # Permit custom backing stores to be passed in; we optimize by storing packed binary values, not strings. - self.blacklist = set() if blacklist is None else set(inet_aton(i) for i in blacklist) + self.blacklist = set() if blacklist is None else blacklist.__class__(inet_aton(i) for i in blacklist) # Permit custom backing stores to be passed in for the exemptions, as well. self.exempt = set() if exempt is None else exempt From 19e498d263c040528cf1778b7f543267e494f8ea Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 18 Feb 2021 14:01:34 -0500 Subject: [PATCH 24/24] Correction for escaping within a quoted string, additional example geographic exclusions. --- web/security/waf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/security/waf.py b/web/security/waf.py index 807b576..88ddf00 100644 --- a/web/security/waf.py +++ b/web/security/waf.py @@ -144,7 +144,7 @@ class PathHeuristic(WAFHeuristic): One can also deny any request targeting a PHP script: - PathHeuristic(re.compile(r'\.phps?($|/)')) + PathHeuristic(re.compile(r'\\.phps?($|/)')) It's important to note that regular expression flags (such as case insensitivity) will be ignored; the search is always case sensitive. (phpMyAdmin != phpmyadmin; these are legitimately separate resources.) @@ -257,6 +257,7 @@ class GeoCountryHeuristic(WAFHeuristic): 'ae', 'ir', 'iq', 'sa', 'tr', # Middle-eastern nations. 'by', 'ru', 'ua', # Russia and nearby former bloc states. 'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states. + 'af', 'mr', 'ng', 'ph', 'pl', 'sd', 'ye', # LGBTQ and human rights violators, others included above. ) """