diff --git a/tagstudio/src/core/constants.py b/tagstudio/src/core/constants.py index 48c087fd..291c69c2 100644 --- a/tagstudio/src/core/constants.py +++ b/tagstudio/src/core/constants.py @@ -13,3 +13,5 @@ TAG_FAVORITE = 1 TAG_ARCHIVED = 0 + +TS_FOLDER_NOINDEX: str = ".ts_noindex" diff --git a/tagstudio/src/core/library/alchemy/library.py b/tagstudio/src/core/library/alchemy/library.py index 34267eab..210f58e2 100644 --- a/tagstudio/src/core/library/alchemy/library.py +++ b/tagstudio/src/core/library/alchemy/library.py @@ -15,7 +15,6 @@ and_, create_engine, delete, - exists, func, or_, select, @@ -34,6 +33,7 @@ TAG_ARCHIVED, TAG_FAVORITE, TS_FOLDER_NAME, + TS_FOLDER_NOINDEX, ) from ...enums import LibraryPrefs from .db import make_tables @@ -362,6 +362,8 @@ def verify_ts_folders(self, library_dir: Path) -> None: if not full_ts_path.exists(): logger.info("creating library directory", dir=full_ts_path) full_ts_path.mkdir(parents=True, exist_ok=True) + # create noindex file to ignore the folder + (full_ts_path / TS_FOLDER_NOINDEX).touch() def add_entries(self, items: list[Entry]) -> list[int]: """Add multiple Entry records to the Library.""" @@ -389,10 +391,10 @@ def remove_entries(self, entry_ids: list[int]) -> None: session.query(Entry).where(Entry.id.in_(entry_ids)).delete() session.commit() - def has_path_entry(self, path: Path) -> bool: + def get_path_entry(self, path: Path) -> Entry | None: """Check if item with given path is in library already.""" with Session(self.engine) as session: - return session.query(exists().where(Entry.path == path)).scalar() + return session.scalar(select(Entry).where(Entry.path == path)) def search_library( self, diff --git a/tagstudio/src/core/utils/refresh_dir.py b/tagstudio/src/core/utils/refresh_dir.py index 87b734ea..b2c164cd 100644 --- a/tagstudio/src/core/utils/refresh_dir.py +++ b/tagstudio/src/core/utils/refresh_dir.py @@ -4,7 +4,7 @@ from time import time import structlog -from src.core.constants import TS_FOLDER_NAME +from src.core import constants from src.core.library import Entry, Library logger = structlog.get_logger(__name__) @@ -39,7 +39,11 @@ def save_new_files(self) -> Iterator[int]: self.files_not_in_library = [] def refresh_dir(self, lib_path: Path) -> Iterator[int]: - """Scan a directory for files, and add those relative filenames to internal variables.""" + """Scan a directory for changes. + + - Keep track of files which are not in library. + - Remove files from library which are in ignored dirs. + """ if self.library.library_dir is None: raise ValueError("No library directory set.") @@ -49,24 +53,43 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]: self.files_not_in_library = [] dir_file_count = 0 - for path in lib_path.glob("**/*"): - str_path = str(path) - if path.is_dir(): + for root, _, files in lib_path.walk(): + if "$RECYCLE.BIN" in str(root).upper(): continue - if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path: + # - if directory contains file `.ts_noindex` then skip the directory + if constants.TS_FOLDER_NOINDEX in files: + logger.info("TS Ignore File found, skipping", directory=root) + # however check if the ignored files aren't in the library; if so, remove them + entries_to_remove = [] + for file in files: + file_path = root / file + entry_path = file_path.relative_to(lib_path) + if entry := self.library.get_path_entry(entry_path): + entries_to_remove.append(entry.id) + + # Yield output every 1/30 of a second + if (time() - start_time_loop) > 0.034: + # yield but dont increase the count + yield dir_file_count + start_time_loop = time() + + self.library.remove_entries(entries_to_remove) continue - dir_file_count += 1 - relative_path = path.relative_to(lib_path) - # TODO - load these in batch somehow - if not self.library.has_path_entry(relative_path): - self.files_not_in_library.append(relative_path) + for file in files: + path = root / file + dir_file_count += 1 + + relative_path = path.relative_to(lib_path) + # TODO - load these in batch somehow + if not self.library.get_path_entry(relative_path): + self.files_not_in_library.append(relative_path) - # Yield output every 1/30 of a second - if (time() - start_time_loop) > 0.034: - yield dir_file_count - start_time_loop = time() + # Yield output every 1/30 of a second + if (time() - start_time_loop) > 0.034: + yield dir_file_count + start_time_loop = time() end_time_total = time() logger.info( diff --git a/tagstudio/tests/macros/test_refresh_dir.py b/tagstudio/tests/macros/test_refresh_dir.py index 4655d399..a1c206ef 100644 --- a/tagstudio/tests/macros/test_refresh_dir.py +++ b/tagstudio/tests/macros/test_refresh_dir.py @@ -3,6 +3,7 @@ import pytest from src.core.enums import LibraryPrefs +from src.core.library import Entry from src.core.utils.refresh_dir import RefreshDirTracker CWD = pathlib.Path(__file__).parent @@ -22,3 +23,37 @@ def test_refresh_new_files(library, exclude_mode): # Then assert registry.files_not_in_library == [pathlib.Path("FOO.MD")] + + +@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True) +def test_refresh_removes_noindex_content(library): + # Given + registry = RefreshDirTracker(library=library) + + # create subdirectory with .ts_noindex file in it + (library.library_dir / "subdir").mkdir() + (library.library_dir / "subdir" / ".ts_noindex").touch() + # add entry into library + entry = Entry( + path=pathlib.Path("subdir/FOO.MD"), + folder=library.folder, + fields=library.default_fields, + ) + library.add_entries([entry]) + + # create its file in noindex directory + (library.library_dir / entry.path).touch() + # create another file in the same directory + (library.library_dir / "subdir" / "test.txt").touch() + + # add non-ignored entry into library + (library.library_dir / "root.txt").touch() + + # When + list(registry.refresh_dir(library.library_dir)) + + # Then + # file in noindex folder should be removed + assert not library.get_path_entry(entry.path) + # file in index folder should be registered + assert registry.files_not_in_library == [pathlib.Path("root.txt")] diff --git a/tagstudio/tests/test_library.py b/tagstudio/tests/test_library.py index 81f26690..d7f4cf8a 100644 --- a/tagstudio/tests/test_library.py +++ b/tagstudio/tests/test_library.py @@ -18,11 +18,11 @@ def test_library_add_file(library): fields=library.default_fields, ) - assert not library.has_path_entry(entry.path) + assert not library.get_path_entry(entry.path) assert library.add_entries([entry]) - assert library.has_path_entry(entry.path) + assert library.get_path_entry(entry.path) is not None def test_create_tag(library, generate_tag):