Skip to content

Commit

Permalink
fix: allow recursive pdf file searching
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Mar 5, 2024
1 parent 22f20f2 commit 14cb8a7
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion edspdf/data/files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# ruff: noqa: F401
import json
import os
import sys
from collections import Counter
from pathlib import Path
from typing import (
Expand Down Expand Up @@ -39,6 +40,7 @@ def __init__(
keep_ipynb_checkpoints: bool = False,
load_annotations: bool = False,
filesystem: Optional[Any] = None,
recursive: bool = False,
):
super().__init__()

Expand Down Expand Up @@ -66,9 +68,14 @@ def __init__(
if not self.filesystem.exists(path):
raise FileNotFoundError(f"Path {path} does not exist")

assert sys.version_info >= (3, 8) or not recursive, (
"Recursive reading is only supported with Python 3.8 or higher. "
"Please upgrade your Python version or set `recursive=False`."
)
glob_str = "**/*.pdf" if recursive else "*.pdf"
self.files: List[str] = [
file
for file in self.filesystem.glob(os.path.join(str(self.path), "*.pdf"))
for file in self.filesystem.glob(os.path.join(str(self.path), glob_str))
if (keep_ipynb_checkpoints or ".ipynb_checkpoints" not in str(file))
and (
not load_annotations
Expand Down

0 comments on commit 14cb8a7

Please sign in to comment.