Skip to content

Commit

Permalink
feat: add option to ignore dir content via .ts_noindex file
Browse files Browse the repository at this point in the history
  • Loading branch information
yedpodtrzitko committed Sep 19, 2024
1 parent 073d517 commit bbd6551
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 21 deletions.
2 changes: 2 additions & 0 deletions tagstudio/src/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
COLLAGE_FOLDER_NAME: str = "collages"
LIBRARY_FILENAME: str = "ts_library.json"

TS_FOLDER_NOINDEX: str = ".ts_noindex"

# TODO: Turn this whitelist into a user-configurable blacklist.
IMAGE_TYPES: list[str] = [
".png",
Expand Down
8 changes: 5 additions & 3 deletions tagstudio/src/core/library/alchemy/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
and_,
create_engine,
delete,
exists,
func,
or_,
select,
Expand All @@ -34,6 +33,7 @@
TAG_ARCHIVED,
TAG_FAVORITE,
TS_FOLDER_NAME,
TS_FOLDER_NOINDEX,
LibraryPrefs,
)
from .db import make_tables
Expand Down Expand Up @@ -317,6 +317,8 @@ def verify_ts_folders(self, library_dir: Path) -> None:
if not full_ts_path.exists():
logger.info("creating library directory", dir=full_ts_path)
full_ts_path.mkdir(parents=True, exist_ok=True)
# create noindex file to ignore the folder
(full_ts_path / TS_FOLDER_NOINDEX).touch()

def add_entries(self, items: list[Entry]) -> list[int]:
"""Add multiple Entry records to the Library."""
Expand All @@ -341,10 +343,10 @@ def remove_entries(self, entry_ids: list[int]) -> None:
session.query(Entry).where(Entry.id.in_(entry_ids)).delete()
session.commit()

def has_path_entry(self, path: Path) -> bool:
def get_path_entry(self, path: Path) -> Entry | None:
"""Check if item with given path is in library already."""
with Session(self.engine) as session:
return session.query(exists().where(Entry.path == path)).scalar()
return session.scalar(select(Entry).where(Entry.path == path))

def search_library(
self,
Expand Down
53 changes: 38 additions & 15 deletions tagstudio/src/core/utils/refresh_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from time import time

import structlog
from src.core.constants import TS_FOLDER_NAME
from src.core import constants
from src.core.library import Entry, Library

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -39,7 +39,11 @@ def save_new_files(self) -> Iterator[int]:
self.files_not_in_library = []

def refresh_dir(self, lib_path: Path) -> Iterator[int]:
"""Scan a directory for files, and add those relative filenames to internal variables."""
"""Scan a directory for changes.
- Keep track of files which are not in library.
- Remove files from library which are in ignored dirs.
"""
if self.library.library_dir is None:
raise ValueError("No library directory set.")

Expand All @@ -49,24 +53,43 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
self.files_not_in_library = []
dir_file_count = 0

for path in lib_path.glob("**/*"):
str_path = str(path)
if path.is_dir():
for root, _, files in lib_path.walk():
if "$RECYCLE.BIN" in str(root).upper():
continue

if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path:
# - if directory contains file `.ts_noindex` then skip the directory
if constants.TS_FOLDER_NOINDEX in files:
logger.info("TS Ignore File found, skipping", directory=root)
# however check if the ignored files aren't in the library; if so, remove them
entries_to_remove = []
for file in files:
file_path = root / file
entry_path = file_path.relative_to(lib_path)
if entry := self.library.get_path_entry(entry_path):
entries_to_remove.append(entry.id)

# Yield output every 1/30 of a second
if (time() - start_time_loop) > 0.034:
# yield but dont increase the count
yield dir_file_count
start_time_loop = time()

self.library.remove_entries(entries_to_remove)
continue

dir_file_count += 1
relative_path = path.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.has_path_entry(relative_path):
self.files_not_in_library.append(relative_path)
for file in files:
path = root / file
dir_file_count += 1

relative_path = path.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.get_path_entry(relative_path):
self.files_not_in_library.append(relative_path)

# Yield output every 1/30 of a second
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()
# Yield output every 1/30 of a second
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()

end_time_total = time()
logger.info(
Expand Down
37 changes: 36 additions & 1 deletion tagstudio/tests/macros/test_refresh_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
from src.core.constants import LibraryPrefs
from src.core.library import Entry
from src.core.utils.refresh_dir import RefreshDirTracker

CWD = pathlib.Path(__file__).parent
Expand All @@ -18,7 +19,41 @@ def test_refresh_new_files(library, exclude_mode):
(library.library_dir / "FOO.MD").touch()

# When
assert not list(registry.refresh_dir(library.library_dir))
list(registry.refresh_dir(library.library_dir))

# Then
assert registry.files_not_in_library == [pathlib.Path("FOO.MD")]


@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_refresh_removes_noindex_content(library):
# Given
registry = RefreshDirTracker(library=library)

# create subdirectory with .ts_noindex file in it
(library.library_dir / "subdir").mkdir()
(library.library_dir / "subdir" / ".ts_noindex").touch()
# add entry into library
entry = Entry(
path=pathlib.Path("subdir/FOO.MD"),
folder=library.folder,
fields=library.default_fields,
)
library.add_entries([entry])

# create its file in noindex directory
(library.library_dir / entry.path).touch()
# create another file in the same directory
(library.library_dir / "subdir" / "test.txt").touch()

# add non-ignored entry into library
(library.library_dir / "root.txt").touch()

# When
list(registry.refresh_dir(library.library_dir))

# Then
# file in noindex folder should be removed
assert not library.get_path_entry(entry.path)
# file in index folder should be registered
assert registry.files_not_in_library == [pathlib.Path("root.txt")]
4 changes: 2 additions & 2 deletions tagstudio/tests/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ def test_library_add_file():
fields=lib.default_fields,
)

assert not lib.has_path_entry(entry.path)
assert not lib.get_path_entry(entry.path)

assert lib.add_entries([entry])

assert lib.has_path_entry(entry.path) is True
assert lib.get_path_entry(entry.path) is not None


def test_create_tag(library, generate_tag):
Expand Down

0 comments on commit bbd6551

Please sign in to comment.