From 2e3871cf2e91995fc9271aa9605bf283c08765ee Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Wed, 18 Oct 2023 13:27:15 +0200 Subject: [PATCH 1/9] remove conflict warning if unmanged file is empty or contents eq to what we want to write --- entangled/filedb.py | 7 ++++++- entangled/transaction.py | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/entangled/filedb.py b/entangled/filedb.py index cc91b2c..5673ce8 100644 --- a/entangled/filedb.py +++ b/entangled/filedb.py @@ -30,13 +30,16 @@ class FileStat: deps: Optional[list[Path]] modified: datetime hexdigest: str + size: int @staticmethod def from_path(path: Path, deps: Optional[list[Path]]): stat = os.stat(path) + size = stat.st_size with open(path, "r") as f: digest = hexdigest(f.read()) - return FileStat(path, deps, datetime.fromtimestamp(stat.st_mtime), digest) + + return FileStat(path, deps, datetime.fromtimestamp(stat.st_mtime), digest, size) def __lt__(self, other: FileStat) -> bool: return self.modified < other.modified @@ -51,6 +54,7 @@ def from_json(data) -> FileStat: None if data["deps"] is None else [Path(d) for d in data["deps"]], datetime.fromisoformat(data["modified"]), data["hexdigest"], + data["size"] ) def to_json(self): @@ -59,6 +63,7 @@ def to_json(self): "deps": None if self.deps is None else [str(p) for p in self.deps], "modified": self.modified.isoformat(), "hexdigest": self.hexdigest, + "size": self.size } diff --git a/entangled/transaction.py b/entangled/transaction.py index b8c651e..a65ed84 100644 --- a/entangled/transaction.py +++ b/entangled/transaction.py @@ -14,7 +14,7 @@ WITH_RICH = False from .utility import cat_maybes -from .filedb import FileDB, stat, file_db +from .filedb import FileDB, stat, file_db, hexdigest from .errors.internal import InternalError @@ -41,6 +41,12 @@ class Create(Action): def conflict(self, _) -> Optional[str]: if self.target.exists(): + # Check if file contents are the same as what we want to write or is empty + md_stat = stat(self.target) + fileHexdigest = md_stat.hexdigest + contentHexdigest = hexdigest(self.content) + if (contentHexdigest == fileHexdigest) or (md_stat.size == 0): + return None return f"{self.target} already exists and is not managed by Entangled" return None From ac0e81c0a3d052c4aa8231c632d963d91766310e Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Wed, 18 Oct 2023 13:41:38 +0200 Subject: [PATCH 2/9] switch to atomicwrites --- entangled/transaction.py | 7 ++++--- pyproject.toml | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/entangled/transaction.py b/entangled/transaction.py index a65ed84..942f366 100644 --- a/entangled/transaction.py +++ b/entangled/transaction.py @@ -3,6 +3,7 @@ from pathlib import Path from contextlib import contextmanager from enum import Enum +from atomicwrites import atomic_write import logging @@ -47,12 +48,12 @@ def conflict(self, _) -> Optional[str]: contentHexdigest = hexdigest(self.content) if (contentHexdigest == fileHexdigest) or (md_stat.size == 0): return None - return f"{self.target} already exists and is not managed by Entangled" + return f"{self.target} is not managed by Entangled" return None def run(self, db: FileDB): self.target.parent.mkdir(parents=True, exist_ok=True) - with open(self.target, "w") as f: + with atomic_write(self.target, overwrite=True) as f: f.write(self.content) db.update(self.target, self.sources) if self.sources != []: @@ -78,7 +79,7 @@ def conflict(self, db: FileDB) -> Optional[str]: return None def run(self, db: FileDB): - with open(self.target, "w") as f: + with atomic_write(self.target, overwrite=True) as f: f.write(self.content) db.update(self.target, self.sources) diff --git a/pyproject.toml b/pyproject.toml index c2871d1..6f0be35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ filelock = "^3.12.0" # file lock for json db argh = "^0.28.1" rich = "^13.3.5" tomlkit = "^0.12.1" +atomicwrites = "^1.4.1" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" From 028662516a7d258df68ccb03ca224d596feed893 Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Wed, 18 Oct 2023 14:06:28 +0200 Subject: [PATCH 3/9] if file has been modified outside entagled but content remain the same do not raise conflict --- entangled/transaction.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/entangled/transaction.py b/entangled/transaction.py index 942f366..64089b7 100644 --- a/entangled/transaction.py +++ b/entangled/transaction.py @@ -43,6 +43,7 @@ class Create(Action): def conflict(self, _) -> Optional[str]: if self.target.exists(): # Check if file contents are the same as what we want to write or is empty + # then it is safe to take ownership. md_stat = stat(self.target) fileHexdigest = md_stat.hexdigest contentHexdigest = hexdigest(self.content) @@ -70,6 +71,9 @@ class Write(Action): def conflict(self, db: FileDB) -> Optional[str]: st = stat(self.target) + # If content remained the same then we resolve the conflict + if st.hexdigest == db[self.target].hexdigest: + return None if st != db[self.target]: return f"`{self.target}` seems to have changed outside the control of Entangled" if self.sources: From 90b480021454aadf9a7d5831c6271e5cdd845800 Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Wed, 18 Oct 2023 14:19:46 +0200 Subject: [PATCH 4/9] delete return msg conform to others --- entangled/transaction.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/entangled/transaction.py b/entangled/transaction.py index 64089b7..e3c94ef 100644 --- a/entangled/transaction.py +++ b/entangled/transaction.py @@ -71,9 +71,6 @@ class Write(Action): def conflict(self, db: FileDB) -> Optional[str]: st = stat(self.target) - # If content remained the same then we resolve the conflict - if st.hexdigest == db[self.target].hexdigest: - return None if st != db[self.target]: return f"`{self.target}` seems to have changed outside the control of Entangled" if self.sources: @@ -96,9 +93,7 @@ class Delete(Action): def conflict(self, db: FileDB) -> Optional[str]: st = stat(self.target) if st != db[self.target]: - return ( - f"{self.target} seems to have changed outside the control of Entangled" - ) + return f"{self.target} seems to have changed outside the control of Entangled" return None def run(self, db: FileDB): From cf35733da5411e6834853e456d8d44497b973ec5 Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Wed, 18 Oct 2023 16:17:19 +0200 Subject: [PATCH 5/9] adding ignore_list --- entangled/commands/stitch.py | 4 +++- entangled/commands/sync.py | 4 +++- entangled/commands/tangle.py | 5 ++++- entangled/config/__init__.py | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/entangled/commands/stitch.py b/entangled/commands/stitch.py index 2347faf..530b0d1 100644 --- a/entangled/commands/stitch.py +++ b/entangled/commands/stitch.py @@ -28,7 +28,9 @@ def get(item: Content): @argh.arg("-s", "--show", help="only show, don't act") def stitch(*, force: bool = False, show: bool = False): """Stitch code changes back into the Markdown""" - input_file_list = list(chain.from_iterable(map(Path(".").glob, config.watch_list))) + include_file_list = chain.from_iterable(map(Path(".").glob, config.watch_list)) + exclude_file_list = list(chain.from_iterable(map(Path(".").glob, config.ignore_list))) + input_file_list = [path for path in include_file_list if not path in exclude_file_list] if show: mode = TransactionMode.SHOW diff --git a/entangled/commands/sync.py b/entangled/commands/sync.py index dc5f173..98e3369 100644 --- a/entangled/commands/sync.py +++ b/entangled/commands/sync.py @@ -16,7 +16,9 @@ def _stitch_then_tangle(): def sync_action() -> Optional[Callable[[], None]]: - input_file_list = list(chain.from_iterable(map(Path(".").glob, config.watch_list))) + include_file_list = chain.from_iterable(map(Path(".").glob, config.watch_list)) + exclude_file_list = list(chain.from_iterable(map(Path(".").glob, config.ignore_list))) + input_file_list = [path for path in include_file_list if not path in exclude_file_list] with file_db(readonly=True) as db: changed = set(db.changed()) diff --git a/entangled/commands/tangle.py b/entangled/commands/tangle.py index 3186100..255c022 100644 --- a/entangled/commands/tangle.py +++ b/entangled/commands/tangle.py @@ -29,7 +29,10 @@ def tangle(*, annotate: Optional[str] = None, force: bool = False, show: bool = else: annotation_method = AnnotationMethod[annotate.upper()] - input_file_list = chain.from_iterable(map(Path(".").glob, config.watch_list)) + include_file_list = chain.from_iterable(map(Path(".").glob, config.watch_list)) + exclude_file_list = list(chain.from_iterable(map(Path(".").glob, config.ignore_list))) + input_file_list = [path for path in include_file_list if not path in exclude_file_list] + refs = ReferenceMap() hooks = get_hooks() diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index 0392a0b..3a9cca9 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -63,6 +63,7 @@ class Config(threading.local): languages: list[Language] = field(default_factory=list) markers: Markers = field(default_factory=lambda: copy(markers)) watch_list: list[str] = field(default_factory=lambda: ["**/*.md"]) + ignore_list: list[str] = field(default_factory=lambda: ["**/README.md"]) annotation_format: Optional[str] = None annotation: AnnotationMethod = AnnotationMethod.STANDARD use_line_directives: bool = False From 08c169a3c633e6dfc93d59e1b477e2afaa302256 Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Wed, 18 Oct 2023 17:30:17 +0200 Subject: [PATCH 6/9] remove atomicwrites since it is not maintained anymore and use tempfile and os.replace instead --- entangled/transaction.py | 17 ++++++++++++++--- pyproject.toml | 1 - 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/entangled/transaction.py b/entangled/transaction.py index e3c94ef..22e5741 100644 --- a/entangled/transaction.py +++ b/entangled/transaction.py @@ -3,8 +3,9 @@ from pathlib import Path from contextlib import contextmanager from enum import Enum -from atomicwrites import atomic_write +import os +import tempfile import logging try: @@ -54,8 +55,13 @@ def conflict(self, _) -> Optional[str]: def run(self, db: FileDB): self.target.parent.mkdir(parents=True, exist_ok=True) - with atomic_write(self.target, overwrite=True) as f: + # Write to tmp file then replace with file name + with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: f.write(self.content) + # Flush and sync contents to disk + f.flush() + os.fsync(f.fileno()) + os.replace(f.name, self.target) db.update(self.target, self.sources) if self.sources != []: db.managed.add(self.target) @@ -80,8 +86,13 @@ def conflict(self, db: FileDB) -> Optional[str]: return None def run(self, db: FileDB): - with atomic_write(self.target, overwrite=True) as f: + # Write to tmp file then replace with file name + with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: f.write(self.content) + # Flush and sync contents to disk + f.flush() + os.fsync(f.fileno()) + os.replace(f.name, self.target) db.update(self.target, self.sources) def __str__(self): diff --git a/pyproject.toml b/pyproject.toml index 6f0be35..c2871d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ filelock = "^3.12.0" # file lock for json db argh = "^0.28.1" rich = "^13.3.5" tomlkit = "^0.12.1" -atomicwrites = "^1.4.1" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" From 5a124d3268bc8907a21bc9b0f0ef1246a6f9fad0 Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Thu, 19 Oct 2023 11:07:00 +0200 Subject: [PATCH 7/9] write cleaned db when file is deleted --- entangled/filedb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/entangled/filedb.py b/entangled/filedb.py index 5673ce8..c8ee97c 100644 --- a/entangled/filedb.py +++ b/entangled/filedb.py @@ -159,6 +159,8 @@ def initialize() -> FileDB: "File `%s` in DB doesn't exist. Removing entry from DB.", path ) del db[path] + if len(undead) > 0: + db.write() return db FileDB.path().parent.mkdir(parents=True, exist_ok=True) From c67c24b6c17ddef8e08145f57c6afa7a168f687c Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Thu, 19 Oct 2023 11:54:41 +0200 Subject: [PATCH 8/9] adding ignore test --- entangled/status.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/entangled/status.py b/entangled/status.py index 24df35b..562a06b 100644 --- a/entangled/status.py +++ b/entangled/status.py @@ -16,7 +16,9 @@ def find_watch_dirs(): def list_input_files(): """List all input files.""" - return chain.from_iterable(map(Path(".").glob, config.watch_list)) + include_file_list = chain.from_iterable(map(Path(".").glob, config.watch_list)) + exclude_file_list = list(chain.from_iterable(map(Path(".").glob, config.ignore_list))) + return [path for path in include_file_list if not path in exclude_file_list] def list_dependent_files(): From ce7bedeb19f0e6a792257e774090fe971543e8ac Mon Sep 17 00:00:00 2001 From: Reggie Cushing Date: Thu, 19 Oct 2023 11:54:53 +0200 Subject: [PATCH 9/9] adding ignore test --- test/test_ignore_list.py | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 test/test_ignore_list.py diff --git a/test/test_ignore_list.py b/test/test_ignore_list.py new file mode 100644 index 0000000..9d9adc0 --- /dev/null +++ b/test/test_ignore_list.py @@ -0,0 +1,63 @@ +from entangled.config import config +from entangled.status import find_watch_dirs, list_input_files +from contextlib import chdir +from entangled.commands.tangle import tangle +from pathlib import Path + +readme_md = """ +# README +```{.python file=src/test.py} +print("test") +``` +""" + +index_md_1 = """ +# Test + +``` {.c file=src/test.c} +#include +int main() { printf("Hello, World!\\n"); return 0; } +``` +""" + +index_md_2 = """ +``` {.makefile file=Makefile} +.RECIPEPREFIX = > + +%.o: %.c +> gcc -c $< -o $@ + +hello: test.o +> gcc $^ -o $@ +``` +""" + +data_md = """ +Don't tangle me! +```{.python file=src/test2.py} +print("test2") +``` +""" + +def list_files_recursive(directory): + for root, _, files in os.walk(directory): + for file in files: + file_path = os.path.join(root, file) + print(file_path) + +def test_watch_dirs(tmp_path): + with chdir(tmp_path): + Path("./docs").mkdir() + Path("./docs/data").mkdir() + Path("./docs/index.md").write_text(index_md_1) + Path("./docs/data/data.md").write_text(data_md) + Path("./docs/README.md").write_text(readme_md) + with config(watch_list=["**/*.md"], ignore_list=["**/data/*", "**/README.md"]): + tangle() + # data.md and README.md should not be entangled cause they are part + # of the ignore list while index.md should be so test2.py, test.py + # should not be created while test.c should be created. + assert not Path.exists(Path("./src/test2.py")) + assert not Path.exists(Path("./src/test.py")) + assert Path.exists(Path("./src/test.c")) +