From 14cb8a76371f17b51cda2026f2558e0958229bfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Tue, 5 Mar 2024 18:37:55 +0100 Subject: [PATCH] fix: allow recursive pdf file searching --- edspdf/data/files.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/edspdf/data/files.py b/edspdf/data/files.py index 94cda77..d67c8df 100644 --- a/edspdf/data/files.py +++ b/edspdf/data/files.py @@ -1,6 +1,7 @@ # ruff: noqa: F401 import json import os +import sys from collections import Counter from pathlib import Path from typing import ( @@ -39,6 +40,7 @@ def __init__( keep_ipynb_checkpoints: bool = False, load_annotations: bool = False, filesystem: Optional[Any] = None, + recursive: bool = False, ): super().__init__() @@ -66,9 +68,14 @@ def __init__( if not self.filesystem.exists(path): raise FileNotFoundError(f"Path {path} does not exist") + assert sys.version_info >= (3, 8) or not recursive, ( + "Recursive reading is only supported with Python 3.8 or higher. " + "Please upgrade your Python version or set `recursive=False`." + ) + glob_str = "**/*.pdf" if recursive else "*.pdf" self.files: List[str] = [ file - for file in self.filesystem.glob(os.path.join(str(self.path), "*.pdf")) + for file in self.filesystem.glob(os.path.join(str(self.path), glob_str)) if (keep_ipynb_checkpoints or ".ipynb_checkpoints" not in str(file)) and ( not load_annotations