Activate pyright strict mode and fix many typing issues

openzim · Nov 18, 2024 · 29dd2e7 · 29dd2e7
1 parent 86fb7dc
commit 29dd2e7
Show file tree

Hide file tree

Showing 64 changed files with 1,214 additions and 818 deletions.
diff --git a/contrib/encode_video.py b/contrib/encode_video.py
@@ -22,6 +22,8 @@ def encode_video(src_path: Path, dst_path: Path, preset: str):
         ffmpeg_args=preset_cls().to_ffmpeg_args(),
         with_process=True,
     )  # pyright: ignore[reportGeneralTypeIssues] (returned type is variable, depending on `with_process` value)
+    if not process:  # pragma: no branch
+        raise ValueError("process should have been returned")
     if not success:
         logger.error(f"conversion failed:\n{process.stdout}")
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -264,12 +264,9 @@ include = ["contrib", "src", "tests", "tasks.py"]
 exclude = [".env/**", ".venv/**"]
 extraPaths = ["src"]
 pythonVersion = "3.12"
-typeCheckingMode="basic"
+typeCheckingMode="strict"
 disableBytesTypePromotions = true
 
-[tool.pyright.overrides]
-strict = true  # Enable strict mode for specific files
-
 [[tool.pyright.overrides.files]]
 files = [
   "src/zimscraperlib/rewriting**/*.py",

diff --git a/rules/generate_rules.py b/rules/generate_rules.py
@@ -156,11 +156,11 @@
 {% endfor %}
     ]
 )
-def {{ rule['name'] }}_case(request):
+def {{ rule['name'] }}_case(request: pytest.FixtureRequest):
     yield request.param
 
 
-def test_fuzzyrules_{{ rule['name'] }}({{ rule['name'] }}_case):
+def test_fuzzyrules_{{ rule['name'] }}({{ rule['name'] }}_case: ContentForTests):
     assert (
         ArticleUrlRewriter.apply_additional_rules({{ rule['name'] }}_case.input_str)
         == {{ rule['name'] }}_case.expected_str

diff --git a/src/zimscraperlib/__init__.py b/src/zimscraperlib/__init__.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 import logging as stdlogging
 import os
 

diff --git a/src/zimscraperlib/constants.py b/src/zimscraperlib/constants.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 import pathlib
 
 from zimscraperlib.__about__ import __version__

diff --git a/src/zimscraperlib/download.py b/src/zimscraperlib/download.py
@@ -1,18 +1,15 @@
-#!/usr/bin/env python3
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 from __future__ import annotations
 
 import pathlib
 import subprocess
 from concurrent.futures import Future, ThreadPoolExecutor
-from typing import IO, ClassVar
+from typing import IO, Any, ClassVar
 
 import requests
 import requests.adapters
 import requests.structures
 import urllib3.util
-import yt_dlp as youtube_dl
+import yt_dlp as youtube_dl  # pyright: ignore[reportMissingTypeStubs]
 
 from zimscraperlib import logger
 
@@ -29,24 +26,24 @@ def __init__(self, threads: int | None = 1) -> None:
     def __enter__(self):
         return self
 
-    def __exit__(self, *args):
+    def __exit__(self, *_: Any):
         self.shutdown()
 
     def shutdown(self) -> None:
         """shuts down the executor, awaiting completion"""
         self.executor.shutdown(wait=True)
 
-    def _run_youtube_dl(self, url: str, options: dict) -> None:
+    def _run_youtube_dl(self, url: str, options: dict[str, Any]) -> None:
         with youtube_dl.YoutubeDL(options) as ydl:
-            ydl.download([url])
+            ydl.download([url])  # pyright: ignore[reportUnknownMemberType]
 
     def download(
         self,
         url: str,
-        options: dict | None,
+        options: dict[str, Any] | None,
         *,
         wait: bool | None = True,
-    ) -> bool | Future:
+    ) -> bool | Future[Any]:
         """Downloads video using initialized executor.
 
         url: URL or Video ID
@@ -65,7 +62,7 @@ def download(
         raise future.exception()  # pyright: ignore
 
 
-class YoutubeConfig(dict):
+class YoutubeConfig(dict[str, str | bool | int | None]):
     options: ClassVar[dict[str, str | bool | int | None]] = {}
     defaults: ClassVar[dict[str, str | bool | int | None]] = {
         "writethumbnail": True,
@@ -81,7 +78,7 @@ class YoutubeConfig(dict):
         "outtmpl": "video.%(ext)s",
     }
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: str | bool | int | None):
         super().__init__(self, **type(self).defaults)
         self.update(self.options)
         self.update(kwargs)
@@ -91,7 +88,7 @@ def get_options(
         cls,
         target_dir: pathlib.Path | None = None,
         filepath: pathlib.Path | None = None,
-        **options,
+        **options: str | bool | int | None,
     ):
         if "outtmpl" not in options:
             outtmpl = cls.options.get("outtmpl", cls.defaults["outtmpl"])
@@ -142,9 +139,10 @@ def save_large_file(url: str, fpath: pathlib.Path) -> None:
     )
 
 
-def _get_retry_adapter(
+def get_retry_adapter(
     max_retries: int | None = 5,
 ) -> requests.adapters.BaseAdapter:
+    """A requests adapter to automatically retry on known HTTP status that can be"""
     retries = urllib3.util.retry.Retry(
         total=max_retries,  # total number of retries
         connect=max_retries,  # connection errors
@@ -168,7 +166,7 @@ def _get_retry_adapter(
 def get_session(max_retries: int | None = 5) -> requests.Session:
     """Session to hold cookies and connection pool together"""
     session = requests.Session()
-    session.mount("http", _get_retry_adapter(max_retries))  # tied to http and https
+    session.mount("http", get_retry_adapter(max_retries))  # tied to http and https
     return session
 
 
@@ -198,7 +196,11 @@ def stream_file(
     Returns the total number of bytes downloaded and the response headers"""
 
     # if no output option is supplied
-    if fpath is None and byte_stream is None:
+    if fpath is not None:
+        fp = open(fpath, "wb")
+    elif byte_stream is not None:
+        fp = byte_stream
+    else:
         raise ValueError("Either file path or a bytesIO object is needed")
 
     if not session:
@@ -212,12 +214,6 @@ def stream_file(
     resp.raise_for_status()
 
     total_downloaded = 0
-    if fpath is not None:
-        fp = open(fpath, "wb")
-    elif (
-        byte_stream is not None
-    ):  # pragma: no branch (we use a precise condition to help type checker)
-        fp = byte_stream
 
     for data in resp.iter_content(block_size):
         total_downloaded += len(data)

diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 """ Files manipulation tools
 
     Shortcuts to retrieve mime type using magic"""
@@ -46,7 +43,7 @@ def get_content_mimetype(content: bytes | str) -> str:
 
 def delete_callback(
     fpath: str | pathlib.Path,
-    callback: Callable | None = None,
+    callback: Callable[..., Any] | None = None,
     *callback_args: Any,
 ):
     """helper deleting passed filepath, optionnaly calling an additional callback"""
@@ -55,4 +52,4 @@ def delete_callback(
 
     # call the callback if requested
     if callback and callable(callback):
-        callback.__call__(*callback_args)
+        callback(*callback_args)
diff --git a/src/zimscraperlib/fix_ogvjs_dist.py b/src/zimscraperlib/fix_ogvjs_dist.py
@@ -1,7 +1,3 @@
-#!/usr/bin/env python3
-# vim: ai ts=4 sts=4 et sw=4 nu
-
-
 """ quick script to fix videojs-ogvjs so that it triggers on webm mimetype """
 
 from __future__ import annotations

diff --git a/src/zimscraperlib/html.py b/src/zimscraperlib/html.py
@@ -1,7 +1,5 @@
-#!/usr/bin/env python
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 """ Tools to work with HTML contents """
+
 from __future__ import annotations
 
 import pathlib
@@ -43,9 +41,7 @@ def find_language_in(content: str | BinaryIO | TextIO, mime_type: str) -> str:
         for key in keylist:
             node = soup.find(nodename)
             if node:
-                if not isinstance(node, element.Tag) or (
-                    isinstance(node, element.Tag) and not node.has_attr(key)
-                ):
+                if not isinstance(node, element.Tag) or not node.has_attr(key):
                     continue
                 if (
                     nodename == "meta"

diff --git a/src/zimscraperlib/i18n.py b/src/zimscraperlib/i18n.py
@@ -47,35 +47,6 @@ def __init__(
         self.querytype = querytype
         self.query = query
 
-    def __eq__(self, value: object) -> bool:
-        if not isinstance(value, LangAndDetails):
-            return False
-
-        return (
-            self.iso_639_1 == value.iso_639_1
-            and self.iso_639_2b == value.iso_639_2b
-            and self.iso_639_2t == value.iso_639_2t
-            and self.iso_639_3 == value.iso_639_3
-            and self.iso_639_5 == value.iso_639_5
-            and self.english == value.english
-            and self.native == value.native
-        )
-
-    def __hash__(self) -> int:
-        return hash(
-            (
-                self.iso_639_1,
-                self.iso_639_2b,
-                self.iso_639_2t,
-                self.iso_639_3,
-                self.iso_639_5,
-                self.english,
-                self.native,
-                self.query,
-                self.querytype,
-            )
-        )
-
 
 def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
     """ISO-639-x languages details for lang. Raises NotFoundError

diff --git a/src/zimscraperlib/image/__init__.py b/src/zimscraperlib/image/__init__.py
@@ -1,7 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 # flake8: noqa
 from .conversion import convert_image
 from .optimization import optimize_image

diff --git a/src/zimscraperlib/image/conversion.py b/src/zimscraperlib/image/conversion.py
@@ -1,13 +1,10 @@
-#!/usr/bin/env python3
-# vim: ai ts=4 sts=4 et sw=4 nu
-
 from __future__ import annotations
 
 import io
 import pathlib
-from typing import IO
+from typing import IO, Any
 
-import cairosvg.svg
+import cairosvg.svg  # pyright: ignore[reportMissingTypeStubs]
 from PIL.Image import open as pilopen
 
 from zimscraperlib.constants import ALPHA_NOT_SUPPORTED
@@ -31,7 +28,9 @@ def convert_image(
      to RGB. ex: RGB, ARGB, CMYK (and other PIL colorspaces)"""
 
     colorspace = params.get("colorspace")  # requested colorspace
-    fmt = params.pop("fmt").upper() if "fmt" in params else None  # requested format
+    fmt = (  # requested format
+        (params.pop("fmt") or "").upper() if "fmt" in params else None
+    )
     if not fmt:
         fmt = format_for(dst)
     if not fmt:
@@ -53,7 +52,7 @@ def convert_svg2png(
     Output width and height might be specified if resize is needed.
     PNG background is transparent.
     """
-    kwargs = {}
+    kwargs: dict[str, Any] = {}
     if isinstance(src, pathlib.Path):
         src = str(src)
     if isinstance(src, str):
@@ -65,9 +64,13 @@ def convert_svg2png(
     if height:
         kwargs["output_height"] = height
     if isinstance(dst, pathlib.Path):
-        cairosvg.svg2png(write_to=str(dst), **kwargs)
+        cairosvg.svg2png(  # pyright: ignore[reportUnknownMemberType]
+            write_to=str(dst), **kwargs
+        )
     else:
-        result = cairosvg.svg2png(**kwargs)
+        result = cairosvg.svg2png(  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+            **kwargs
+        )
         if not isinstance(result, bytes):
             raise Exception(
                 "Unexpected type returned by cairosvg.svg2png"