Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to ignore dir content via .ts_noindex file #516

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tagstudio/src/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@

TAG_FAVORITE = 1
TAG_ARCHIVED = 0

TS_FOLDER_NOINDEX: str = ".ts_noindex"
8 changes: 5 additions & 3 deletions tagstudio/src/core/library/alchemy/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
and_,
create_engine,
delete,
exists,
func,
or_,
select,
Expand All @@ -34,6 +33,7 @@
TAG_ARCHIVED,
TAG_FAVORITE,
TS_FOLDER_NAME,
TS_FOLDER_NOINDEX,
)
from ...enums import LibraryPrefs
from .db import make_tables
Expand Down Expand Up @@ -362,6 +362,8 @@ def verify_ts_folders(self, library_dir: Path) -> None:
if not full_ts_path.exists():
logger.info("creating library directory", dir=full_ts_path)
full_ts_path.mkdir(parents=True, exist_ok=True)
# create noindex file to ignore the folder
(full_ts_path / TS_FOLDER_NOINDEX).touch()

def add_entries(self, items: list[Entry]) -> list[int]:
"""Add multiple Entry records to the Library."""
Expand Down Expand Up @@ -389,10 +391,10 @@ def remove_entries(self, entry_ids: list[int]) -> None:
session.query(Entry).where(Entry.id.in_(entry_ids)).delete()
session.commit()

def has_path_entry(self, path: Path) -> bool:
def get_path_entry(self, path: Path) -> Entry | None:
"""Check if item with given path is in library already."""
with Session(self.engine) as session:
return session.query(exists().where(Entry.path == path)).scalar()
return session.scalar(select(Entry).where(Entry.path == path))

def search_library(
self,
Expand Down
53 changes: 38 additions & 15 deletions tagstudio/src/core/utils/refresh_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from time import time

import structlog
from src.core.constants import TS_FOLDER_NAME
from src.core import constants
from src.core.library import Entry, Library

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -39,7 +39,11 @@ def save_new_files(self) -> Iterator[int]:
self.files_not_in_library = []

def refresh_dir(self, lib_path: Path) -> Iterator[int]:
"""Scan a directory for files, and add those relative filenames to internal variables."""
"""Scan a directory for changes.
- Keep track of files which are not in library.
- Remove files from library which are in ignored dirs.
"""
if self.library.library_dir is None:
raise ValueError("No library directory set.")

Expand All @@ -49,24 +53,43 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
self.files_not_in_library = []
dir_file_count = 0

for path in lib_path.glob("**/*"):
str_path = str(path)
if path.is_dir():
for root, _, files in lib_path.walk():
if "$RECYCLE.BIN" in str(root).upper():
continue

if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path:
# - if directory contains file `.ts_noindex` then skip the directory
if constants.TS_FOLDER_NOINDEX in files:
logger.info("TS Ignore File found, skipping", directory=root)
# however check if the ignored files aren't in the library; if so, remove them
entries_to_remove = []
for file in files:
file_path = root / file
entry_path = file_path.relative_to(lib_path)
if entry := self.library.get_path_entry(entry_path):
entries_to_remove.append(entry.id)

# Yield output every 1/30 of a second
if (time() - start_time_loop) > 0.034:
# yield but dont increase the count
yield dir_file_count
start_time_loop = time()

self.library.remove_entries(entries_to_remove)
continue

dir_file_count += 1
relative_path = path.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.has_path_entry(relative_path):
self.files_not_in_library.append(relative_path)
for file in files:
path = root / file
dir_file_count += 1

relative_path = path.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.get_path_entry(relative_path):
self.files_not_in_library.append(relative_path)

# Yield output every 1/30 of a second
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()
# Yield output every 1/30 of a second
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()

end_time_total = time()
logger.info(
Expand Down
35 changes: 35 additions & 0 deletions tagstudio/tests/macros/test_refresh_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
from src.core.enums import LibraryPrefs
from src.core.library import Entry
from src.core.utils.refresh_dir import RefreshDirTracker

CWD = pathlib.Path(__file__).parent
Expand All @@ -22,3 +23,37 @@ def test_refresh_new_files(library, exclude_mode):

# Then
assert registry.files_not_in_library == [pathlib.Path("FOO.MD")]


@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_refresh_removes_noindex_content(library):
# Given
registry = RefreshDirTracker(library=library)

# create subdirectory with .ts_noindex file in it
(library.library_dir / "subdir").mkdir()
(library.library_dir / "subdir" / ".ts_noindex").touch()
# add entry into library
entry = Entry(
path=pathlib.Path("subdir/FOO.MD"),
folder=library.folder,
fields=library.default_fields,
)
library.add_entries([entry])

# create its file in noindex directory
(library.library_dir / entry.path).touch()
# create another file in the same directory
(library.library_dir / "subdir" / "test.txt").touch()

# add non-ignored entry into library
(library.library_dir / "root.txt").touch()

# When
list(registry.refresh_dir(library.library_dir))

# Then
# file in noindex folder should be removed
assert not library.get_path_entry(entry.path)
# file in index folder should be registered
assert registry.files_not_in_library == [pathlib.Path("root.txt")]
4 changes: 2 additions & 2 deletions tagstudio/tests/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def test_library_add_file(library):
fields=library.default_fields,
)

assert not library.has_path_entry(entry.path)
assert not library.get_path_entry(entry.path)

assert library.add_entries([entry])

assert library.has_path_entry(entry.path)
assert library.get_path_entry(entry.path) is not None


def test_create_tag(library, generate_tag):
Expand Down