From 731ee4bcd60398b8ae2c7fbd013296db31821a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kr=C3=BCger?= Date: Mon, 20 Jan 2025 19:15:40 +0100 Subject: [PATCH] Let nikola serve work together with non-root BASE_URL or SITE_URL. Fixes #3726 (#3804) * Let nikola serve work together with non-root BASE_URL or SITE_URL. * Backporting a type hint to Python 3.8. * pydocstyle improvement. * Comment cosmetics. * Workaround for Windows \r\n newlines. * Moving the base-path extraction to utils.py. * Fixing imports that were wrongly done by my dev ide. --- CHANGES.txt | 1 + nikola/plugins/command/auto/__init__.py | 14 +-- nikola/plugins/command/serve.py | 100 +++++++++++++---- nikola/utils.py | 15 ++- tests/integration/dev_server_test_helper.py | 43 +++++++ ..._dev_server.py => test_dev_server_auto.py} | 75 ++----------- .../test_dev_server_basepath_helper.py | 29 +++++ tests/integration/test_dev_server_serve.py | 105 ++++++++++++++++++ 8 files changed, 279 insertions(+), 103 deletions(-) create mode 100644 tests/integration/dev_server_test_helper.py rename tests/integration/{test_dev_server.py => test_dev_server_auto.py} (68%) create mode 100644 tests/integration/test_dev_server_basepath_helper.py create mode 100644 tests/integration/test_dev_server_serve.py diff --git a/CHANGES.txt b/CHANGES.txt index 0f04b5644..91a21b7a8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -12,6 +12,7 @@ Bugfixes * Restore `annotation_helper.tmpl` with dummy content - fix themes still mentioning it (Issue #3764, #3773) * Fix compatibility with watchdog 4 (Issue #3766) +* `nikola serve` now works with non-root SITE_URL. New in v8.3.1 ============= diff --git a/nikola/plugins/command/auto/__init__.py b/nikola/plugins/command/auto/__init__.py index a700f9e31..7c702214c 100644 --- a/nikola/plugins/command/auto/__init__.py +++ b/nikola/plugins/command/auto/__init__.py @@ -35,14 +35,13 @@ import subprocess import sys import typing -import urllib.parse import webbrowser from pathlib import Path import blinker from nikola.plugin_categories import Command -from nikola.utils import dns_sd, req_missing, get_theme_path, makedirs, pkg_resources_path +from nikola.utils import base_path_from_siteuri, dns_sd, get_theme_path, makedirs, pkg_resources_path, req_missing try: import aiohttp @@ -67,17 +66,6 @@ IDLE_REFRESH_DELAY = 0.05 -def base_path_from_siteuri(siteuri: str) -> str: - """Extract the path part from a URI such as site['SITE_URL']. - - The path never ends with a "/". (If only "/" is intended, it is empty.) - """ - path = urllib.parse.urlsplit(siteuri).path - if path.endswith("/"): - path = path[:-1] - return path - - class CommandAuto(Command): """Automatic rebuilds for Nikola.""" diff --git a/nikola/plugins/command/serve.py b/nikola/plugins/command/serve.py index 49f86fc6a..0b2cbac56 100644 --- a/nikola/plugins/command/serve.py +++ b/nikola/plugins/command/serve.py @@ -25,19 +25,22 @@ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """Start test server.""" - +import atexit import os import sys import re import signal import socket +import threading import webbrowser from http.server import HTTPServer from http.server import SimpleHTTPRequestHandler from io import BytesIO as StringIO +from threading import Thread, current_thread +from typing import Callable, Optional from nikola.plugin_categories import Command -from nikola.utils import dns_sd +from nikola.utils import base_path_from_siteuri, dns_sd class IPv6Server(HTTPServer): @@ -52,7 +55,8 @@ class CommandServe(Command): name = "serve" doc_usage = "[options]" doc_purpose = "start the test webserver" - dns_sd = None + httpd: Optional[HTTPServer] = None + httpd_serving_thread: Optional[Thread] = None cmd_options = ( { @@ -98,13 +102,21 @@ class CommandServe(Command): ) def shutdown(self, signum=None, _frame=None): - """Shut down the server that is running detached.""" - if self.dns_sd: - self.dns_sd.Reset() + """Shut down the server.""" if os.path.exists(self.serve_pidfile): os.remove(self.serve_pidfile) - if not self.detached: - self.logger.info("Server is shutting down.") + + # Deal with the non-detached state: + if self.httpd is not None and self.httpd_serving_thread is not None and self.httpd_serving_thread != current_thread(): + shut_me_down = self.httpd + self.httpd = None + self.httpd_serving_thread = None + self.logger.info("Web server is shutting down.") + shut_me_down.shutdown() + else: + self.logger.debug("No need to shut down the web server.") + + # If this was called as a signal handler, shut down the entire application: if signum: sys.exit(0) @@ -127,29 +139,33 @@ def _execute(self, options, args): ipv6 = False OurHTTP = HTTPServer - httpd = OurHTTP((options['address'], options['port']), - OurHTTPRequestHandler) - sa = httpd.socket.getsockname() + base_path = base_path_from_siteuri(self.site.config['BASE_URL']) + if base_path == "": + handler_factory = OurHTTPRequestHandler + else: + handler_factory = _create_RequestHandler_removing_basepath(base_path) + self.httpd = OurHTTP((options['address'], options['port']), handler_factory) + + sa = self.httpd.socket.getsockname() if ipv6: - server_url = "http://[{0}]:{1}/".format(*sa) + server_url = "http://[{0}]:{1}/".format(*sa) + base_path else: - server_url = "http://{0}:{1}/".format(*sa) + server_url = "http://{0}:{1}/".format(*sa) + base_path self.logger.info("Serving on {0} ...".format(server_url)) if options['browser']: # Some browsers fail to load 0.0.0.0 (Issue #2755) if sa[0] == '0.0.0.0': - server_url = "http://127.0.0.1:{1}/".format(*sa) + server_url = "http://127.0.0.1:{1}/".format(*sa) + base_path self.logger.info("Opening {0} in the default web browser...".format(server_url)) webbrowser.open(server_url) if options['detach']: - self.detached = True OurHTTPRequestHandler.quiet = True try: pid = os.fork() if pid == 0: signal.signal(signal.SIGTERM, self.shutdown) - httpd.serve_forever() + self.httpd.serve_forever() else: with open(self.serve_pidfile, 'w') as fh: fh.write('{0}\n'.format(pid)) @@ -160,11 +176,26 @@ def _execute(self, options, args): else: raise else: - self.detached = False try: - self.dns_sd = dns_sd(options['port'], (options['ipv6'] or '::' in options['address'])) - signal.signal(signal.SIGTERM, self.shutdown) - httpd.serve_forever() + dns_socket_publication = dns_sd(options['port'], (options['ipv6'] or '::' in options['address'])) + try: + self.httpd_serving_thread = threading.current_thread() + if threading.main_thread() == self.httpd_serving_thread: + # If we are running as the main thread, + # likely no other threads are running and nothing else will run after us. + # In this special case, we take some responsibility for the application whole + # (not really the job of any single plugin). + # Clean up the socket publication on exit (if we actually had a socket publication): + if dns_socket_publication is not None: + atexit.register(dns_socket_publication.Reset) + # Enable application shutdown via SIGTERM: + signal.signal(signal.SIGTERM, self.shutdown) + self.logger.info("Starting web server.") + self.httpd.serve_forever() + self.logger.info("Web server has shut down.") + finally: + if dns_socket_publication is not None: + dns_socket_publication.Reset() except KeyboardInterrupt: self.shutdown() return 130 @@ -186,7 +217,7 @@ def log_message(self, *args): # NOTICE: this is a patched version of send_head() to disable all sorts of # caching. `nikola serve` is a development server, hence caching should - # not happen to have access to the newest resources. + # not happen, instead, we should give access to the newest resources. # # The original code was copy-pasted from Python 2.7. Python 3.3 contains # the same code, missing the binary mode comment. @@ -205,6 +236,7 @@ def send_head(self): """ path = self.translate_path(self.path) + f = None if os.path.isdir(path): path_parts = list(self.path.partition('?')) @@ -277,3 +309,29 @@ def send_head(self): # end no-cache patch self.end_headers() return f + + +def _omit_basepath_component(base_path_with_slash: str, path: str) -> str: + if path.startswith(base_path_with_slash): + return path[len(base_path_with_slash) - 1:] + elif path == base_path_with_slash[:-1]: + return "/" + else: + # Somewhat dubious. We should not really get asked this, normally. + return path + + +def _create_RequestHandler_removing_basepath(base_path: str) -> Callable: + """Create a new subclass of OurHTTPRequestHandler that removes a trailing base path from the path. + + Returns that class (used as a factory for objects). + Better return type should be Callable[[...], OurHTTPRequestHandler], but Python 3.8 doesn't understand that. + """ + base_path_with_slash = base_path if base_path.endswith("/") else f"{base_path}/" + + class OmitBasepathRequestHandler(OurHTTPRequestHandler): + + def translate_path(self, path: str) -> str: + return super().translate_path(_omit_basepath_component(base_path_with_slash, path)) + + return OmitBasepathRequestHandler diff --git a/nikola/utils.py b/nikola/utils.py index 5f3ed7758..e6d8aa2eb 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -30,6 +30,8 @@ import datetime import hashlib import io +import urllib + import lxml.html import operator import os @@ -1862,7 +1864,7 @@ def color_hsl_adjust_hex(hexstr, adjust_h=None, adjust_s=None, adjust_l=None): def dns_sd(port, inet6): - """Optimistically publish a HTTP service to the local network over DNS-SD. + """Optimistically publish an HTTP service to the local network over DNS-SD. Works only on Linux/FreeBSD. Requires the `avahi` and `dbus` modules (symlinks in virtualenvs) """ @@ -2168,3 +2170,14 @@ def read_from_config(self, site, basename, posts_per_classification_per_language args = {'translation_manager': self, 'site': site, 'posts_per_classification_per_language': posts_per_classification_per_language} signal('{}_translations_config'.format(basename.lower())).send(args) + + +def base_path_from_siteuri(siteuri: str) -> str: + """Extract the path part from a URI such as site['SITE_URL']. + + The path returned doesn't end with a "/". (If only "/" is intended, it is empty.) + """ + path = urllib.parse.urlsplit(siteuri).path + if path.endswith("/"): + path = path[:-1] + return path diff --git a/tests/integration/dev_server_test_helper.py b/tests/integration/dev_server_test_helper.py new file mode 100644 index 000000000..78479981a --- /dev/null +++ b/tests/integration/dev_server_test_helper.py @@ -0,0 +1,43 @@ +import pathlib +import socket +from typing import Dict, Any + +from ..helper import FakeSite +from nikola.utils import get_logger + +SERVER_ADDRESS = "localhost" +TEST_MAX_DURATION = 10 # Watchdog: Give up the test if it did not succeed during this time span. + +# Folder that has the fixture file we expect the server to serve: +OUTPUT_FOLDER = pathlib.Path(__file__).parent.parent / "data" / "dev_server_sample_output_folder" + +LOGGER = get_logger("test_dev_server") + + +def find_unused_port() -> int: + """Ask the OS for a currently unused port number. + + (More precisely, a port that can be used for a TCP server servicing SERVER_ADDRESS.) + We use a method here rather than a fixture to minimize side effects of failing tests. + """ + s = socket.socket() + try: + ANY_PORT = 0 + s.bind((SERVER_ADDRESS, ANY_PORT)) + address, port = s.getsockname() + LOGGER.info("Trying to set up dev server on http://%s:%i/", address, port) + return port + finally: + s.close() + + +class MyFakeSite(FakeSite): + def __init__(self, config: Dict[str, Any], configuration_filename="conf.py"): + super(MyFakeSite, self).__init__() + self.configured = True + self.debug = True + self.THEMES = [] + self._plugin_places = [] + self.registered_auto_watched_folders = set() + self.config = config + self.configuration_filename = configuration_filename diff --git a/tests/integration/test_dev_server.py b/tests/integration/test_dev_server_auto.py similarity index 68% rename from tests/integration/test_dev_server.py rename to tests/integration/test_dev_server_auto.py index 53f2af9e2..872b4af3b 100644 --- a/tests/integration/test_dev_server.py +++ b/tests/integration/test_dev_server_auto.py @@ -1,50 +1,13 @@ import asyncio -import nikola.plugins.command.auto as auto -from nikola.utils import get_logger +from typing import Optional, Tuple + import pytest -import pathlib import requests -import socket -from typing import Optional, Tuple, Any, Dict - -from ..helper import FakeSite - -SERVER_ADDRESS = "localhost" -TEST_MAX_DURATION = 10 # Watchdog: Give up the test if it did not succeed during this time span. - -# Folder that has the fixture file we expect the server to serve: -OUTPUT_FOLDER = pathlib.Path(__file__).parent.parent / "data" / "dev_server_sample_output_folder" - -LOGGER = get_logger("test_dev_server") - -def find_unused_port() -> int: - """Ask the OS for a currently unused port number. - - (More precisely, a port that can be used for a TCP server servicing SERVER_ADDRESS.) - We use a method here rather than a fixture to minimize side effects of failing tests. - """ - s = socket.socket() - try: - ANY_PORT = 0 - s.bind((SERVER_ADDRESS, ANY_PORT)) - address, port = s.getsockname() - LOGGER.info("Trying to set up dev server on http://%s:%i/", address, port) - return port - finally: - s.close() - - -class MyFakeSite(FakeSite): - def __init__(self, config: Dict[str, Any], configuration_filename="conf.py"): - super(MyFakeSite, self).__init__() - self.configured = True - self.debug = True - self.THEMES = [] - self._plugin_places = [] - self.registered_auto_watched_folders = set() - self.config = config - self.configuration_filename = configuration_filename +import nikola.plugins.command.auto as auto +from nikola.utils import base_path_from_siteuri +from .dev_server_test_helper import MyFakeSite, SERVER_ADDRESS, find_unused_port, TEST_MAX_DURATION, LOGGER, \ + OUTPUT_FOLDER def test_serves_root_dir( @@ -157,7 +120,7 @@ def site_and_base_path(request) -> Tuple[MyFakeSite, str]: "SITE_URL": request.param, "OUTPUT_FOLDER": OUTPUT_FOLDER.as_posix(), } - return MyFakeSite(config), auto.base_path_from_siteuri(request.param) + return MyFakeSite(config), base_path_from_siteuri(request.param) @pytest.fixture(scope="module") @@ -170,27 +133,3 @@ def expected_text(): with open(OUTPUT_FOLDER / "index.html", encoding="utf-8") as html_file: all_html = html_file.read() return all_html[all_html.find(""):] - - -@pytest.mark.parametrize(("uri", "expected_basepath"), [ - ("http://localhost", ""), - ("http://local.host", ""), - ("http://localhost/", ""), - ("http://local.host/", ""), - ("http://localhost:123/", ""), - ("http://local.host:456/", ""), - ("https://localhost", ""), - ("https://local.host", ""), - ("https://localhost/", ""), - ("https://local.host/", ""), - ("https://localhost:123/", ""), - ("https://local.host:456/", ""), - ("http://example.org/blog", "/blog"), - ("https://lorem.ipsum/dolet/", "/dolet"), - ("http://example.org:124/blog", "/blog"), - ("http://example.org:124/Deep/Rab_bit/hol.e/", "/Deep/Rab_bit/hol.e"), - # Would anybody in a sane mind actually do this? - ("http://example.org:124/blog?lorem=ipsum&dol=et", "/blog"), -]) -def test_basepath(uri: str, expected_basepath: Optional[str]) -> None: - assert expected_basepath == auto.base_path_from_siteuri(uri) diff --git a/tests/integration/test_dev_server_basepath_helper.py b/tests/integration/test_dev_server_basepath_helper.py new file mode 100644 index 000000000..680f055f0 --- /dev/null +++ b/tests/integration/test_dev_server_basepath_helper.py @@ -0,0 +1,29 @@ +from typing import Optional + +import pytest + +from nikola.utils import base_path_from_siteuri + + +@pytest.mark.parametrize(("uri", "expected_basepath"), [ + ("http://localhost", ""), + ("http://local.host", ""), + ("http://localhost/", ""), + ("http://local.host/", ""), + ("http://localhost:123/", ""), + ("http://local.host:456/", ""), + ("https://localhost", ""), + ("https://local.host", ""), + ("https://localhost/", ""), + ("https://local.host/", ""), + ("https://localhost:123/", ""), + ("https://local.host:456/", ""), + ("http://example.org/blog", "/blog"), + ("https://lorem.ipsum/dolet/", "/dolet"), + ("http://example.org:124/blog", "/blog"), + ("http://example.org:124/Deep/Rab_bit/hol.e/", "/Deep/Rab_bit/hol.e"), + # Would anybody in a sane mind actually do this? + ("http://example.org:124/blog?lorem=ipsum&dol=et", "/blog"), +]) +def test_basepath(uri: str, expected_basepath: Optional[str]) -> None: + assert expected_basepath == base_path_from_siteuri(uri) diff --git a/tests/integration/test_dev_server_serve.py b/tests/integration/test_dev_server_serve.py new file mode 100644 index 000000000..fad2362e6 --- /dev/null +++ b/tests/integration/test_dev_server_serve.py @@ -0,0 +1,105 @@ +from time import sleep +from typing import Tuple +import requests +import pytest +from concurrent.futures import ThreadPoolExecutor + +import nikola.plugins.command.serve as serve +from nikola.utils import base_path_from_siteuri +from .dev_server_test_helper import MyFakeSite, SERVER_ADDRESS, find_unused_port, LOGGER, OUTPUT_FOLDER + + +def test_serves_root_dir( + site_and_base_path: Tuple[MyFakeSite, str], expected_text: str +) -> None: + site, base_path = site_and_base_path + command_serve = serve.CommandServe() + command_serve.set_site(site) + options = { + "address": SERVER_ADDRESS, + "port": find_unused_port(), + "browser": False, + "detach": False, + "ipv6": False, + } + + with ThreadPoolExecutor(max_workers=2) as executor: + future_to_run_web_server = executor.submit(lambda: command_serve.execute(options=options)) + try: + sleep(0.05) # Wait for the web server to start up. + with requests.Session() as session: + server_root_uri = f"http://{options['address']}:{options['port']}" + + # Grab the document root index.html file: + server_base_uri = f"{server_root_uri}{base_path}" + LOGGER.info("Attempting to fetch HTML from %s", server_base_uri) + res = session.get(server_base_uri) + res.raise_for_status() + assert "text/html; charset=UTF-8" == res.headers['content-type'] + text_found = res.text.replace("\r\n", "\n") # On windows, the server provides spurious \r + assert expected_text == text_found + + assert not base_path.endswith("/") + res2 = session.get(f"{server_root_uri}{base_path}/") + res2.raise_for_status() + assert "text/html; charset=UTF-8" == res2.headers['content-type'] + text_found_2 = res2.text.replace("\r\n", "\n") + assert expected_text == text_found_2 + + res3 = session.get(f"{server_root_uri}{base_path}/index.html") + res3.raise_for_status() + assert "text/html; charset=UTF-8" == res3.headers['content-type'] + text_found_3 = res3.text.replace("\r\n", "\n") + assert expected_text in text_found_3 + + LOGGER.info("Web server access successful with intended result.") + finally: + LOGGER.info("Asking the webserver to shut down") + command_serve.shutdown() + future_to_run_web_server.result() + LOGGER.info("Webserver shut down successfully.") + LOGGER.info("Threadpool closed.") + + +@pytest.fixture(scope="module", + params=["https://example.org", + "https://example.org:1234/blog", + "https://example.org:3456/blog/", + "http://example.org/deep/down/a/rabbit/hole" + ]) +def site_and_base_path(request) -> Tuple[MyFakeSite, str]: + """Return a fake site and the base_path (root) the dev server should be serving.""" + assert OUTPUT_FOLDER.is_dir(), \ + f"Could not find dev server test fixture {OUTPUT_FOLDER.as_posix()}" + + config = { + "post_pages": [], + "FILES_FOLDERS": [], + "GALLERY_FOLDERS": [], + "LISTINGS_FOLDERS": [], + "IMAGE_FOLDERS": [], + "OUTPUT_FOLDER": OUTPUT_FOLDER.as_posix(), + # See https://github.com/getnikola/nikola/issues/3802 + # "SITE_URL": request.param, + "BASE_URL": request.param + } + return MyFakeSite(config), base_path_from_siteuri(request.param) + + +@pytest.fixture(scope="module") +def expected_text(): + """Read the index.html file from the fixture folder and return it. + """ + with open(OUTPUT_FOLDER / "index.html", encoding="utf-8") as html_file: + return html_file.read() + + +@pytest.mark.parametrize("basepath,path,expected_result", [ + ("/huba/", "/huba/buba", "/buba"), + ("/huba/", "/huba/", "/"), + ("/ping/pong/", "/ping/pong", "/"), + ("/huba/", "/huba/lorem/ipsum.txt", "/lorem/ipsum.txt"), + ("/", "/huba/buba", "/huba/buba") +]) +def test_path_omitted(basepath, path, expected_result) -> None: + assert expected_result == serve._omit_basepath_component(basepath, path)