Skip to content

Commit

Permalink
Anansi: Discover objects.inv also from RTD and PyPI
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Apr 6, 2024
1 parent 75fdf6d commit 3f41714
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
- Anansi: Provide `anansi list-projects` subcommand, to list curated
projects managed in accompanying `curated.yaml` file.
- Anansi: Accept `--threshold` option, forwarding to `sphobjinv`.
- Anansi: Discover `objects.inv` also from RTD and PyPI.
9 changes: 8 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ linksmith inventory
(anansi)=
## Anansi

Suggest references from curated intersphinx inventories.
Suggest references from intersphinx inventories, derived from curated projects,
RTD, or PyPI.

:::{rubric} Synopsis
:::
Expand All @@ -71,6 +72,12 @@ anansi suggest sarge capture
```shell
anansi suggest matplotlib draw
```
```shell
anansi suggest requests patch
```
```shell
anansi suggest beradio json
```

Display list of curated projects.
```shell
Expand Down
45 changes: 30 additions & 15 deletions linksmith/sphinx/community/anansi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from linksmith.settings import help_config
from linksmith.sphinx.inventory import InventoryManager
from linksmith.sphinx.util import RemoteObjectsInv
from linksmith.util.data import multikeysort

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -82,7 +83,24 @@ def to_list(self):
data.append(item.to_dict())
return data

def suggest(self, project: str, term: str, threshold: int = 50):
def get_project_documentation_url(self, project: str) -> str:
"""
Given a project name, attempt to resolve it via curated list, RTD, or PyPI.
"""
logger.info(f"Attempting to resolve project from curated list: {project}")
for item in self.items:
if item.name == project:
return item.url

logger.info(f"Attempting to resolve project from Internet: {project}")
try:
return RemoteObjectsInv(project).discover()
except FileNotFoundError as ex:
logger.warning(ex)

raise KeyError(f"Project not found: {project}")

def suggest(self, project: str, term: str, threshold: int = 50) -> t.List[str]:
"""
Find occurrences for "term" in Sphinx inventory.
A wrapper around sphobjinv's `suggest`.
Expand All @@ -95,20 +113,17 @@ def suggest(self, project: str, term: str, threshold: int = 50):
https://sphobjinv.readthedocs.io/en/stable/cli/suggest.html
https://sphobjinv.readthedocs.io/en/stable/api/inventory.html#sphobjinv.inventory.Inventory.suggest
"""
for item in self.items:
if item.name == project:
url = f"{item.url.rstrip('/')}/objects.inv"
inv = InventoryManager(url).soi_factory()
results = inv.suggest(term, thresh=threshold)
if results:
hits = len(results)
logger.info(f"{hits} hits for project/term: {project}/{term}")
return results
else:
logger.warning(f"No hits for project/term: {project}/{term}")
return []
documentation_url = self.get_project_documentation_url(project)
url = f"{documentation_url.rstrip('/')}/objects.inv"
inv = InventoryManager(url).soi_factory()
results = inv.suggest(term, thresh=threshold)
if results:
hits = len(results)
logger.info(f"{hits} hits for project/term: {project}/{term}")
return results
else:
raise KeyError(f"Project not found: {project}")
logger.warning(f"No hits for project/term: {project}/{term}")
return []


@click.group()
Expand Down Expand Up @@ -155,7 +170,7 @@ def cli_suggest(ctx: click.Context, project: str, term: str, threshold: int = 50
try:
results = library.suggest(project, term, threshold=threshold)
print("\n".join(results)) # noqa: T201
except Exception as ex:
except (KeyError, FileNotFoundError) as ex:
logger.error(str(ex).strip("'"))
sys.exit(1)

Expand Down
75 changes: 75 additions & 0 deletions linksmith/sphinx/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import logging
import re
from pathlib import Path

import requests

logger = logging.getLogger(__name__)


class LocalObjectsInv:
"""
Expand Down Expand Up @@ -27,3 +33,72 @@ def discover(cls, project_root: Path) -> Path:
if path.exists():
return path
raise FileNotFoundError("No objects.inv found in working directory")


class RemoteObjectsInv:
"""
Support discovering an `objects.inv` on Read the Docs.
"""

HTTP_TIMEOUT = 5

def __init__(self, project: str):
self.project = project

def discover(self) -> str:
try:
return self.discover_rtd()
except FileNotFoundError:
return self.discover_pypi()

def discover_rtd(self) -> str:
logger.info(f"Attempting to resolve project through RTD: {self.project}")
try:
result = requests.get(
"https://readthedocs.org/api/v3/search/",
params={"q": f"project:{self.project} *"},
timeout=self.HTTP_TIMEOUT,
).json()["results"][0]
except IndexError:
raise FileNotFoundError(f"Project not found at Read the Docs: {self.project}")
domain = result["domain"]
path = result["path"]

# No way to discover the language slot via API?
# Derive `/en/latest/` into `/en/latest/objects.inv`. (requests)
# Derive `/en/stable/examples.html` into `/en/stable/objects.inv`. (requests-cache)
# Derive `/genindex.html` into `/objects.inv`. (cratedb-guide)
# TODO: Also handle nested URLs like `/en/latest/snippets/myst/dropdown-group.html`.
path = re.sub(r"(.*)/.*\.html?$", "\\1", path)

rtd_url = f"{domain}/{path}"
rtd_exists = requests.get(rtd_url, allow_redirects=True, timeout=self.HTTP_TIMEOUT).status_code == 200

if rtd_exists:
return rtd_url

raise FileNotFoundError("No objects.inv discovered through Read the Docs")

Check warning on line 80 in linksmith/sphinx/util.py

View check run for this annotation

Codecov / codecov/patch

linksmith/sphinx/util.py#L80

Added line #L80 was not covered by tests

def discover_pypi(self) -> str:
logger.info(f"Attempting to resolve project through PyPI: {self.project}")
pypi_url = f"https://pypi.org/pypi/{self.project}/json"
metadata = requests.get(pypi_url, timeout=self.HTTP_TIMEOUT).json()
docs_url = metadata["info"]["docs_url"]
home_page = metadata["info"]["home_page"]
home_page2 = metadata["info"]["project_urls"]["Homepage"]
for candidate in docs_url, home_page, home_page2:
if candidate is None:
continue
objects_inv_candidate = f"{candidate.rstrip('/')}/objects.inv"
try:
objects_inv_status = requests.get(
objects_inv_candidate,
allow_redirects=True,
timeout=self.HTTP_TIMEOUT,
).status_code
if objects_inv_status < 400:
return candidate
except Exception: # noqa: S110
pass

raise FileNotFoundError("No objects.inv discovered through PyPI")
20 changes: 20 additions & 0 deletions tests/test_anansi.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,23 @@ def test_anansi_suggest_miss(cli_runner, caplog):
)
assert result.exit_code == 0
assert "No hits for project/term: sarge/foo" in caplog.messages


def test_anansi_suggest_via_rtd(cli_runner):
result = cli_runner.invoke(
cli,
args="anansi suggest requests-cache patch --threshold=75",
catch_exceptions=False,
)
assert result.exit_code == 0
assert ":std:label:`patching`" in result.output


def test_anansi_suggest_via_pypi(cli_runner):
result = cli_runner.invoke(
cli,
args="anansi suggest beradio json",
catch_exceptions=False,
)
assert result.exit_code == 0
assert ":py:method:`beradio.message.BERadioMessage.json`" in result.output

0 comments on commit 3f41714

Please sign in to comment.