From 3f417144e70fa4377bc8b13948b041a6879527c2 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sat, 6 Apr 2024 14:41:21 +0200 Subject: [PATCH] Anansi: Discover `objects.inv` also from RTD and PyPI --- CHANGES.md | 1 + docs/usage.md | 9 +++- linksmith/sphinx/community/anansi.py | 45 +++++++++++------ linksmith/sphinx/util.py | 75 ++++++++++++++++++++++++++++ tests/test_anansi.py | 20 ++++++++ 5 files changed, 134 insertions(+), 16 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 61817d4..fc37848 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -14,3 +14,4 @@ - Anansi: Provide `anansi list-projects` subcommand, to list curated projects managed in accompanying `curated.yaml` file. - Anansi: Accept `--threshold` option, forwarding to `sphobjinv`. +- Anansi: Discover `objects.inv` also from RTD and PyPI. diff --git a/docs/usage.md b/docs/usage.md index 7257887..31110a1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -59,7 +59,8 @@ linksmith inventory (anansi)= ## Anansi -Suggest references from curated intersphinx inventories. +Suggest references from intersphinx inventories, derived from curated projects, +RTD, or PyPI. :::{rubric} Synopsis ::: @@ -71,6 +72,12 @@ anansi suggest sarge capture ```shell anansi suggest matplotlib draw ``` +```shell +anansi suggest requests patch +``` +```shell +anansi suggest beradio json +``` Display list of curated projects. ```shell diff --git a/linksmith/sphinx/community/anansi.py b/linksmith/sphinx/community/anansi.py index a70a4eb..31fe6bf 100644 --- a/linksmith/sphinx/community/anansi.py +++ b/linksmith/sphinx/community/anansi.py @@ -25,6 +25,7 @@ from linksmith.settings import help_config from linksmith.sphinx.inventory import InventoryManager +from linksmith.sphinx.util import RemoteObjectsInv from linksmith.util.data import multikeysort logger = logging.getLogger(__name__) @@ -82,7 +83,24 @@ def to_list(self): data.append(item.to_dict()) return data - def suggest(self, project: str, term: str, threshold: int = 50): + def get_project_documentation_url(self, project: str) -> str: + """ + Given a project name, attempt to resolve it via curated list, RTD, or PyPI. + """ + logger.info(f"Attempting to resolve project from curated list: {project}") + for item in self.items: + if item.name == project: + return item.url + + logger.info(f"Attempting to resolve project from Internet: {project}") + try: + return RemoteObjectsInv(project).discover() + except FileNotFoundError as ex: + logger.warning(ex) + + raise KeyError(f"Project not found: {project}") + + def suggest(self, project: str, term: str, threshold: int = 50) -> t.List[str]: """ Find occurrences for "term" in Sphinx inventory. A wrapper around sphobjinv's `suggest`. @@ -95,20 +113,17 @@ def suggest(self, project: str, term: str, threshold: int = 50): https://sphobjinv.readthedocs.io/en/stable/cli/suggest.html https://sphobjinv.readthedocs.io/en/stable/api/inventory.html#sphobjinv.inventory.Inventory.suggest """ - for item in self.items: - if item.name == project: - url = f"{item.url.rstrip('/')}/objects.inv" - inv = InventoryManager(url).soi_factory() - results = inv.suggest(term, thresh=threshold) - if results: - hits = len(results) - logger.info(f"{hits} hits for project/term: {project}/{term}") - return results - else: - logger.warning(f"No hits for project/term: {project}/{term}") - return [] + documentation_url = self.get_project_documentation_url(project) + url = f"{documentation_url.rstrip('/')}/objects.inv" + inv = InventoryManager(url).soi_factory() + results = inv.suggest(term, thresh=threshold) + if results: + hits = len(results) + logger.info(f"{hits} hits for project/term: {project}/{term}") + return results else: - raise KeyError(f"Project not found: {project}") + logger.warning(f"No hits for project/term: {project}/{term}") + return [] @click.group() @@ -155,7 +170,7 @@ def cli_suggest(ctx: click.Context, project: str, term: str, threshold: int = 50 try: results = library.suggest(project, term, threshold=threshold) print("\n".join(results)) # noqa: T201 - except Exception as ex: + except (KeyError, FileNotFoundError) as ex: logger.error(str(ex).strip("'")) sys.exit(1) diff --git a/linksmith/sphinx/util.py b/linksmith/sphinx/util.py index 4e78ef7..37a28ae 100644 --- a/linksmith/sphinx/util.py +++ b/linksmith/sphinx/util.py @@ -1,5 +1,11 @@ +import logging +import re from pathlib import Path +import requests + +logger = logging.getLogger(__name__) + class LocalObjectsInv: """ @@ -27,3 +33,72 @@ def discover(cls, project_root: Path) -> Path: if path.exists(): return path raise FileNotFoundError("No objects.inv found in working directory") + + +class RemoteObjectsInv: + """ + Support discovering an `objects.inv` on Read the Docs. + """ + + HTTP_TIMEOUT = 5 + + def __init__(self, project: str): + self.project = project + + def discover(self) -> str: + try: + return self.discover_rtd() + except FileNotFoundError: + return self.discover_pypi() + + def discover_rtd(self) -> str: + logger.info(f"Attempting to resolve project through RTD: {self.project}") + try: + result = requests.get( + "https://readthedocs.org/api/v3/search/", + params={"q": f"project:{self.project} *"}, + timeout=self.HTTP_TIMEOUT, + ).json()["results"][0] + except IndexError: + raise FileNotFoundError(f"Project not found at Read the Docs: {self.project}") + domain = result["domain"] + path = result["path"] + + # No way to discover the language slot via API? + # Derive `/en/latest/` into `/en/latest/objects.inv`. (requests) + # Derive `/en/stable/examples.html` into `/en/stable/objects.inv`. (requests-cache) + # Derive `/genindex.html` into `/objects.inv`. (cratedb-guide) + # TODO: Also handle nested URLs like `/en/latest/snippets/myst/dropdown-group.html`. + path = re.sub(r"(.*)/.*\.html?$", "\\1", path) + + rtd_url = f"{domain}/{path}" + rtd_exists = requests.get(rtd_url, allow_redirects=True, timeout=self.HTTP_TIMEOUT).status_code == 200 + + if rtd_exists: + return rtd_url + + raise FileNotFoundError("No objects.inv discovered through Read the Docs") + + def discover_pypi(self) -> str: + logger.info(f"Attempting to resolve project through PyPI: {self.project}") + pypi_url = f"https://pypi.org/pypi/{self.project}/json" + metadata = requests.get(pypi_url, timeout=self.HTTP_TIMEOUT).json() + docs_url = metadata["info"]["docs_url"] + home_page = metadata["info"]["home_page"] + home_page2 = metadata["info"]["project_urls"]["Homepage"] + for candidate in docs_url, home_page, home_page2: + if candidate is None: + continue + objects_inv_candidate = f"{candidate.rstrip('/')}/objects.inv" + try: + objects_inv_status = requests.get( + objects_inv_candidate, + allow_redirects=True, + timeout=self.HTTP_TIMEOUT, + ).status_code + if objects_inv_status < 400: + return candidate + except Exception: # noqa: S110 + pass + + raise FileNotFoundError("No objects.inv discovered through PyPI") diff --git a/tests/test_anansi.py b/tests/test_anansi.py index 3283978..880b1a1 100644 --- a/tests/test_anansi.py +++ b/tests/test_anansi.py @@ -69,3 +69,23 @@ def test_anansi_suggest_miss(cli_runner, caplog): ) assert result.exit_code == 0 assert "No hits for project/term: sarge/foo" in caplog.messages + + +def test_anansi_suggest_via_rtd(cli_runner): + result = cli_runner.invoke( + cli, + args="anansi suggest requests-cache patch --threshold=75", + catch_exceptions=False, + ) + assert result.exit_code == 0 + assert ":std:label:`patching`" in result.output + + +def test_anansi_suggest_via_pypi(cli_runner): + result = cli_runner.invoke( + cli, + args="anansi suggest beradio json", + catch_exceptions=False, + ) + assert result.exit_code == 0 + assert ":py:method:`beradio.message.BERadioMessage.json`" in result.output