Skip to content

Commit

Permalink
chore: more script rework
Browse files Browse the repository at this point in the history
  • Loading branch information
phil65 committed Oct 18, 2024
1 parent 67ecc71 commit a5affd1
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 125 deletions.
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ extend-exclude = ['docs', '__init__.py', "prettyqt/qt/"]
target-version = "py311"

[tool.ruff.lint]
preview = true
select = [
# "A", # Flake8-builtins
# "ANN", # Flake8-Annotations
Expand All @@ -130,7 +131,7 @@ select = [
"C4", # flake8-comprehensions
# "C90", # MCCabe
# "COM", # Flake8-commas
"CPY", # Copyright-related rules
# "CPY", # Copyright-related rules
"D", # PyDocStyle
# "DTZ", # Flake8- Datetimez
"E", # PyCodeStyle Error
Expand Down Expand Up @@ -222,7 +223,7 @@ docstring-code-format = true

[tool.ruff.lint.isort]
lines-after-imports = 2
lines-between-types = 1
# lines-between-types = 1
# atomic = true
force-sort-within-sections = true
combine-as-imports = true
Expand Down
229 changes: 133 additions & 96 deletions scripts/create_inv_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,112 +19,149 @@
from sphinx.util.inventory import InventoryFileReader


pyside_uri = "https://doc.qt.io/qtforpython/"
package_name = "PySide6"

alias_modules = PySide6._find_all_qt_modules()

# the filename to use to save the original objects.inv file
original_inv = pathlib.Path("qt6-original.inv")
original_txt = pathlib.Path("qt6-original.txt")
PYSIDE_URI = "https://doc.qt.io/qtforpython/"
PACKAGE_NAME = "PySide6"

# File paths
ORIGINAL_INV = pathlib.Path("qt6-original.inv")
ORIGINAL_TXT = pathlib.Path("qt6-original.txt")
MODIFIED_INV = pathlib.Path("qt6-with-aliases.inv")
MODIFIED_TXT = pathlib.Path("qt6-with-aliases.txt")


def get_alias_modules() -> list[str]:
"""Get a list of all Qt modules.
Returns:
A list of Qt module names.
"""
return PySide6.__all__


def download_original_inv(url: str, output_path: pathlib.Path) -> None:
"""Download the original objects.inv file.
Args:
url: The URL to download the inventory file from.
output_path: The path to save the downloaded file.
"""
response = requests.get(url)
response.raise_for_status()
output_path.write_bytes(response.content)


def parse_inventory_line(line: str) -> tuple[str, str, str, str, str]:
"""Parse a line from the inventory file.
Args:
line: A line from the inventory file.
Returns:
A tuple containing (name, type, priority, location, display_name).
"""
match = re.match(r"(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+(\S+)\s+(.*)", line.rstrip())
if not match:
raise ValueError(f"Invalid inventory line: {line}")
return match.groups()


def generate_aliases(module: str, classname: str, method: str) -> list[str]:
"""Generate aliases for a given class and method.
Args:
module: The Qt module name.
classname: The class name.
method: The method name (can be empty).
Returns:
A list of generated aliases.
"""
return [
f"PyQt6.{module}.{classname}{method}",
f"prettyqt.qt.{module}.{classname}{method}",
f"prettyqt.{module[2:].lower()}.{classname}{method}",
f"qtpy.{module}.{classname}{method}",
f"PySide6.{module}.{classname}{method}",
f"{module}.{classname}{method}",
f"{classname}{method}",
]


def create_modified_inv(
original_path: pathlib.Path,
modified_path: pathlib.Path,
alias_modules: list[str],
) -> None:
"""Create a modified inventory file with aliases.
Args:
original_path: Path to the original inventory file.
modified_path: Path to save the modified inventory file.
alias_modules: List of Qt modules to generate aliases for.
"""
with original_path.open("rb") as fin, modified_path.open("wb") as fout:
compressor = zlib.compressobj(9)
reader = InventoryFileReader(fin)

def write(*args: str) -> None:
fout.write(compressor.compress((" ".join(args) + "\n").encode("utf-8")))

# Copy the header
for _ in range(4):
fout.write((reader.readline() + "\n").encode("utf-8"))

for line in reader.read_compressed_lines():
name, typ, prio, location, dispname = parse_inventory_line(line)
location = location.rstrip("$") + name

write(name, typ, prio, location, dispname)
if name.endswith("QtCore.Signal"):
write(
f"{PACKAGE_NAME}.QtCore.SignalInstance", typ, prio, location, dispname
)

# the filename to use to save the Sphinx-compatible object.inv file
modified_inv = pathlib.Path("qt6-with-aliases.inv")
modified_txt = pathlib.Path("qt6-with-aliases.txt")
# Apply the aliases
for module in alias_modules:
match = re.match(
rf"{PACKAGE_NAME}\.{module}\.{PACKAGE_NAME}\.{module}\.(\w+)(\.\w+)?",
name,
)
if match:
classname, method = match.groups()
method = method or ""
for alias in generate_aliases(module, classname, method):
write(alias, typ, prio, location, dispname)

fout.write(compressor.flush())

def create_modified_inv():
def write(*args):
fout.write(compressor.compress((" ".join(args) + "\n").encode("utf-8")))

# download the original objects.inv file
with original_inv.open(mode="wb") as f:
f.write(requests.get(f"{pyside_uri}objects.inv").content)
def inspect_inventory(inv_path: pathlib.Path, output_path: pathlib.Path) -> None:
"""Inspect an inventory file and save the output to a text file.
with original_inv.open(mode="rb") as fin:
with modified_inv.open(mode="wb") as fout:
# use the same compression for the output file as
# sphinx.util.inventory.InventoryFile.dump
compressor = zlib.compressobj(9)
Args:
inv_path: Path to the inventory file to inspect.
output_path: Path to save the inspection output.
"""
with codecs.open(output_path, "wb", encoding="utf-8") as f:
sys.stdout = f
inspect_main([str(inv_path)])
sys.stdout = sys.__stdout__

reader = InventoryFileReader(fin)

# copy the header
for _i in range(4):
fout.write((reader.readline() + "\n").encode("utf-8"))
def main() -> None:
"""Main function to orchestrate the inventory file creation and inspection."""
alias_modules = get_alias_modules()
download_original_inv(f"{PYSIDE_URI}objects.inv", ORIGINAL_INV)
create_modified_inv(ORIGINAL_INV, MODIFIED_INV, alias_modules)

for line in reader.read_compressed_lines():
# the re.match code is copied from
# sphinx.util.inventory.InventoryFile.load_v2
m = re.match(
r"(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+(\S+)\s+(.*)", line.rstrip()
)
if not m:
continue

name, typ, prio, location, dispname = m.groups()
location = location.rstrip("$") + name

write(name, typ, prio, location, dispname)
if name.endswith("QtCore.Signal"):
# QtCore.SignalInstance maps to QtCore.Signal
write(
f"{package_name}.QtCore.SignalInstance",
typ,
prio,
location,
dispname,
)

# apply the aliases
for module in alias_modules:
m = re.match(
rf"{package_name}\.{module}\.{package_name}\.{module}\.(\w+)(\.\w+)?",
name,
)
if m:
classname, method = m.groups()
if method is None:
method = ""

aliases = [
f"PyQt6.{module}.{classname}{method}",
f"prettyqt.qt.{module}.{classname}{method}",
f"prettyqt.{module[2:].lower()}.{classname}{method}",
f"qtpy.{module}.{classname}{method}",
f"PySide6.{module}.{classname}{method}",
f"{module}.{classname}{method}",
classname + method,
]

for alias in aliases:
write(alias, typ, prio, location, dispname)
# print(location)

fout.write(compressor.flush())


def main():
create_modified_inv()
inspect_inventory(ORIGINAL_INV, ORIGINAL_TXT)
inspect_inventory(MODIFIED_INV, MODIFIED_TXT)

print("Created:")
print(f" {original_inv}")
print(f" {original_txt}")
print(f" {modified_inv}")
print(f" {modified_txt}")

# redirect the print() statements in the inspect_main() function to a file
sys.stdout = codecs.open(original_txt, "wb", encoding="utf-8")
inspect_main([original_inv])
sys.stdout.close()

# if the following succeeds without raising an exception then Sphinx is
# able to read the pyqt#-modified-objects.inv file that was just created
sys.stdout = codecs.open(modified_txt, "wb", encoding="utf-8")
inspect_main([modified_inv])
sys.stdout.close()

sys.exit(0)
print(f" {ORIGINAL_INV}")
print(f" {ORIGINAL_TXT}")
print(f" {MODIFIED_INV}")
print(f" {MODIFIED_TXT}")


if __name__ == "__main__":
Expand Down
86 changes: 59 additions & 27 deletions scripts/download_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,76 @@
import inspect
import logging
import pathlib
from typing import Any

from bs4 import BeautifulSoup
import requests

from prettyqt.qt import QtCore, QtGui, QtWidgets


module_dict = dict(QtWidgets=QtWidgets, QtGui=QtGui, QtCore=QtCore)
# Setup logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)

# Define modules to scrape
MODULE_DICT: dict[str, Any] = {"QtWidgets": QtWidgets, "QtGui": QtGui, "QtCore": QtCore}

def scrape(module_name, klass_name):
url = f"https://doc.qt.io/qtforpython-6/PySide6/{module_name}/{klass_name}.html"
website = requests.get(url)
results = BeautifulSoup(website.content, "html.parser")
match = results.find(id="detailed-description")
# logger.warning(match)
if match is None:
return
match = match.find(**{"class": "reference internal"})
# logger.warning(match)
if match is None:
return
text = match.parent.get_text()
text = text.encode("cp1252", errors="ignore")
text = text.decode(errors="ignore")
logger.warning(text)
pathlib.Path() / module_name
# path.mkdir(parents=True, exist_ok=True)
# filepath = path / f"{klass_name}.txt"
# filepath.write_text(text)

def get_class_description(module_name: str, class_name: str) -> str | None:
"""Scrape and return the detailed description of a PySide6 class.
logger = logging.getLogger(__name__)
for module_name, module in module_dict.items():
clsmembers = inspect.getmembers(module, inspect.isclass)
for klass_name, _klass in clsmembers:
scrape(module_name, klass_name)
Args:
module_name: The name of the module (e.g., 'QtWidgets').
class_name: The name of the class (e.g., 'QAbstractItemView').
Returns:
The detailed description as a string, or None if not found.
"""
url = f"https://doc.qt.io/qtforpython-6/PySide6/{module_name}/{class_name}.html"
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")

description_section = soup.find(id="detailed-description")
if description_section is None:
return None

reference = description_section.find(class_="reference internal")
if reference is None:
return None

text = reference.parent.get_text()
return text.encode("cp1252", errors="ignore").decode(errors="ignore")
except requests.RequestException as e:
logger.error(f"Error fetching {url}: {e}")
return None


def process_module(module_name: str, module: Any, save: bool = False) -> None:
"""Process all classes in a given module.
Args:
module_name: The name of the module.
module: The module object.
save: whether to save the docs on disk
"""
for class_name, _ in inspect.getmembers(module, inspect.isclass):
description = get_class_description(module_name, class_name)
if description:
logger.warning(f"{module_name}.{class_name}: {description}")
if save:
save_path = pathlib.Path(module_name)
save_path.mkdir(parents=True, exist_ok=True)
(save_path / f"{class_name}.txt").write_text(description)


def main() -> None:
"""Main function to process all modules and classes."""
for module_name, module in MODULE_DICT.items():
process_module(module_name, module, save=True)


if __name__ == "__main__":
scrape("QtWidgets", "QAbstractItemView")
main()

0 comments on commit a5affd1

Please sign in to comment.