diff --git a/.gitignore b/.gitignore index af4dfc8..3fd49ff 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,3 @@ coverage.* dist poetry.lock site - diff --git a/Makefile b/Makefile index dc3d059..5e6b1cb 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ test: poetry run coverage run --source=entangled -m pytest poetry run coverage xml poetry run coverage report + poetry run mypy docs: poetry run mkdocs build diff --git a/README.md b/README.md index 762cb22..6ceca43 100644 --- a/README.md +++ b/README.md @@ -224,9 +224,64 @@ In principle, you could do a lot of things with the `build` hook, supposing that That being said, candidates for hooks could be: - *Code metadata*. Some code could use more meta data than just a name and language. One way to include metadata is by having a header that is separated with a three hyphen line `---` from the actual code content. A hook could change the way the code is tangled, possibly injecting the metadata as a docstring, or leaving it out of the tangled code and have the document generator use it for other purposes. -- *She-bang lines*. When you're coding scripts, it may be desirable to have `#!/bin/bash` equivalent line at the top. This is currently not supported in the Python port of Entangled. - *Integration with package managers* like `cargo`, `cabal`, `poetry` etc. These are usually configured in a separate file. A hook could be used to specify dependencies. That way you could also document why a certain dependency is needed, or why it needs to have the version you specify. +## Loom +Entangled has a small build engine (similar to GNU Make) embedded, called Loom. You may give it a list of tasks (specified in TOML) that may depend on one another. Loom will run these when dependencies are newer than the target. Execution is lazy and in parallel. Loom supports: + +- Running tasks by passing a script to any configured interpreter, e.g. Bash or Python. +- Redirecting `stdout` or `stdin` to or from files. +- Defining so called "phony" targets. +- Define `pattern` for programmable reuse. +- `include` other Loom files, even ones that need to be generated by another `task`. +- `stdin` and `stdout` are automatic dependency and target. + +### Examples +To write out "Hello, World!" to a file `msg.txt`, we may do the following, + +```toml +[[task]] +stdout = "secret.txt" +language = "Python" +script = """ +print("Uryyb, Jbeyq!") +""" +``` + +To have this message decoded define a pattern, + +```toml +[pattern.rot13] +stdout = "{stdout}" +stdin = "{stdin}" +language = "Bash" +script = """ +tr a-zA-Z n-za-mN-ZA-M +""" + +[[call]] +pattern = "rot13" + [call.args] + stdin = "secret.txt" + stdout = "msg.txt" +``` + +To define a phony target "all", + +```toml +[[task]] +targets = ["phony(all)"] +dependencies = ["msg.txt"] +``` + +Features on the roadmap: +- Defining "tmpfile" targets. +- Enable Jinja in patterns. +- Specify that certain tasks should not run in parallel by having a named set of semaphores. +- Enable versioned output directory (maybe Jinja solves this) + +We may yet decide to put Loom into an external Python package. + ## Support for Document Generators Entangled has been used successfully with the following document generators. Note that some of these examples were built using older versions of Entangled, but they should work just the same. diff --git a/entangled/commands/__init__.py b/entangled/commands/__init__.py index 33a5e02..8a75570 100644 --- a/entangled/commands/__init__.py +++ b/entangled/commands/__init__.py @@ -4,11 +4,13 @@ from .sync import sync from .tangle import tangle from .watch import watch +from .loom import loom __all__ = [ "new", - "status" + "loom", + "status", "stitch", "sync", "tangle", diff --git a/entangled/commands/loom.py b/entangled/commands/loom.py new file mode 100644 index 0000000..ba57c7c --- /dev/null +++ b/entangled/commands/loom.py @@ -0,0 +1,27 @@ +from typing import Optional +import argh # type: ignore +import asyncio + +from ..config import config +from ..loom import resolve_tasks, Target +from ..logging import logger + +log = logger() + +async def main(target_strs: list[str], force_run: bool, throttle: Optional[int]): + db = await resolve_tasks(config.loom) + for t in db.tasks: + log.debug(str(t)) + if throttle: + db.throttle = asyncio.Semaphore(throttle) + db.force_run = force_run + jobs = [db.run(Target.from_str(t)) for t in target_strs] + await asyncio.gather(*jobs) + + +@argh.arg("targets", nargs="+", help="name of target to run") +@argh.arg("-B", "--force-run", help="rebuild all dependencies") +@argh.arg("-j", "--throttle", help="limit number of concurrent jobs") +def loom(targets: list[str], force_run: bool = False, throttle: Optional[int] = None): + """Build one of the configured targets.""" + asyncio.run(main(targets, force_run, throttle)) diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index 3a9cca9..daa7141 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -4,7 +4,6 @@ from __future__ import annotations -import logging import threading from contextlib import contextmanager from copy import copy @@ -15,9 +14,16 @@ import tomlkit +from entangled.errors.user import UserError + +from ..loom import Program from ..construct import construct from .language import Language, languages from .version import Version +from ..logging import logger + + +log = logger() class AnnotationMethod(Enum): @@ -42,22 +48,35 @@ class Markers: open: str close: str - begin_ignore: str - end_ignore: str + begin_ignore: str = r"^\s*\~\~\~markdown\s*$" + end_ignore: str = r"^\s*\~\~\~\s*$" markers = Markers( - r"^(?P\s*)```\s*{(?P[^{}]*)}\s*$", - r"^(?P\s*)```\s*$", - r"^\s*\~\~\~markdown\s*$", - r"^\s*\~\~\~\s*$", + r"^(?P\s*)```\s*{(?P[^{}]*)}\s*$", r"^(?P\s*)```\s*$" ) @dataclass class Config(threading.local): - """Main config class. This class is made thread-local to make - it possible to test in parallel.""" + """Main config class. + + Attributes: + version: Version of Entangled for which this config was created. + Entangled should read all versions lower than its own. + languages: List of programming languages and their comment styles. + markers: Regexes for detecting open and close of code blocks. + watch_list: List of glob-expressions indicating files to include + for tangling. + annotation: Style of annotation. + annotation_format: Extra annotation. + use_line_directives: Wether to print pragmas in source code for + indicating markdown source locations. + hooks: List of enabled hooks. + hook: Sub-config of hooks. + loom: Sub-config of loom. + + This class is made thread-local to make it possible to test in parallel.""" version: Version languages: list[Language] = field(default_factory=list) @@ -69,6 +88,7 @@ class Config(threading.local): use_line_directives: bool = False hooks: list[str] = field(default_factory=list) hook: dict = field(default_factory=dict) + loom: Program = field(default_factory=Program) def __post_init__(self): self.languages = languages + self.languages @@ -109,11 +129,11 @@ def read_config_from_toml( json = json[s] return construct(Config, json) except ValueError as e: - logging.error("Could not read config: %s", e) + log.error("Could not read config: %s", e) return None except KeyError as e: - logging.debug("%s", e) - logging.debug("The config file %s should contain a section %s", path, section) + log.debug("%s", str(e)) + log.debug("The config file %s should contain a section %s", path, section) return None @@ -147,6 +167,7 @@ def __call__(self, **kwargs): setattr(self.config, k, backup[k]) def get_language(self, lang_name: str) -> Optional[Language]: + assert self.config return self.config.language_index.get(lang_name, None) diff --git a/entangled/construct.py b/entangled/construct.py index a430991..c512ee8 100644 --- a/entangled/construct.py +++ b/entangled/construct.py @@ -1,10 +1,13 @@ -from typing import Union +from pathlib import Path +from typing import Any, Union from dataclasses import is_dataclass from enum import Enum import typing import types +from entangled.errors.user import ConfigError + from .parsing import Parser @@ -12,7 +15,14 @@ def isgeneric(annot): return hasattr(annot, "__origin__") and hasattr(annot, "__args__") -def construct(annot, json): +def construct(annot: Any, json: Any) -> Any: + try: + return _construct(annot, json) + except (AssertionError, ValueError): + raise ConfigError(annot, json) + + +def _construct(annot: Any, json: Any) -> Any: """Construct an object from a given type from a JSON stream. The `annot` type should be one of: str, int, list[T], Optional[T], @@ -28,9 +38,20 @@ def construct(annot, json): if isinstance(json, str) and isinstance(annot, Parser): result, _ = annot.read(json) return result + if ( + isgeneric(annot) + and typing.get_origin(annot) is dict + and typing.get_args(annot)[0] is str + ): + assert isinstance(json, dict) + return {k: construct(typing.get_args(annot)[1], v) for k, v in json.items()} + if annot is Any: + return json if annot is dict or isgeneric(annot) and typing.get_origin(annot) is dict: assert isinstance(json, dict) return json + if annot is Path and isinstance(json, str): + return Path(json) if isgeneric(annot) and typing.get_origin(annot) is list: assert isinstance(json, list) return [construct(typing.get_args(annot)[0], item) for item in json] @@ -49,7 +70,7 @@ def construct(annot, json): # assert all(k in json for k in arg_annot) args = {k: construct(arg_annot[k], json[k]) for k in json} return annot(**args) - if isinstance(json, str) and issubclass(annot, Enum): + if isinstance(json, str) and isinstance(annot, type) and issubclass(annot, Enum): options = {opt.name.lower(): opt for opt in annot} assert json.lower() in options return options[json.lower()] diff --git a/entangled/document.py b/entangled/document.py index 997adfd..089e4b0 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -7,6 +7,7 @@ from .config import Language, AnnotationMethod, config from .properties import Property, get_attribute from .errors.internal import InternalError +from .text_location import TextLocation def length(iter: Iterable[Any]) -> int: @@ -31,15 +32,6 @@ class PlainText: Content = Union[PlainText, ReferenceId] -@dataclass -class TextLocation: - filename: str - line_number: int = 0 - - def __str__(self): - return f"{self.filename}:{self.line_number}" - - @dataclass class CodeBlock: language: Language diff --git a/entangled/errors/internal.py b/entangled/errors/internal.py index 57cfdd8..b8f7339 100644 --- a/entangled/errors/internal.py +++ b/entangled/errors/internal.py @@ -13,7 +13,7 @@ def __str__(self): return f"Internal error: {self.msg}" -def bug_contact(): +def bug_contact(e: Exception): logging.error( "This error is due to an internal bug in Entangled. Please file an " "issue including the above stack trace " diff --git a/entangled/errors/user.py b/entangled/errors/user.py index 243369e..ed19f47 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -1,8 +1,7 @@ from dataclasses import dataclass -from textwrap import wrap -from typing import Callable +from typing import Any, Callable -from ..document import TextLocation +from ..text_location import TextLocation class UserError(Exception): @@ -10,6 +9,15 @@ def __str__(self): return "Unknown user error." +@dataclass +class ConfigError(UserError): + expected: str + got: Any + + def __str__(self): + return f"Expected {self.expected}, got: {self.got}" + + @dataclass class HelpfulUserError(UserError): """Raise a user error and supply an optional function `func` for context. diff --git a/entangled/logging.py b/entangled/logging.py new file mode 100644 index 0000000..d989351 --- /dev/null +++ b/entangled/logging.py @@ -0,0 +1,42 @@ +import logging +from rich.logging import RichHandler +from rich.highlighter import RegexHighlighter + +from .version import __version__ + +LOGGING_SETUP = False + + +class BackTickHighlighter(RegexHighlighter): + highlights = [r"`(?P[^`]*)`"] + + +def logger(): + return logging.getLogger("entangled") + + +def configure(debug=False): + global LOGGING_SETUP + if LOGGING_SETUP: + return + + if debug: + level = logging.DEBUG + else: + level = logging.INFO + + FORMAT = "%(message)s" + logging.basicConfig( + level=level, + format=FORMAT, + datefmt="[%X]", + handlers= + [RichHandler(show_path=debug, highlighter=BackTickHighlighter())], + ) + log = logging.getLogger("entangled") + log.setLevel(level) + # log.addHandler(RichHandler(show_path=debug, highlighter=BackTickHighlighter())) + # log.propagate = False + log.info(f"Entangled {__version__} (https://entangled.github.io/)") + + LOGGING_SETUP = True diff --git a/entangled/loom/__init__.py b/entangled/loom/__init__.py new file mode 100644 index 0000000..5f67108 --- /dev/null +++ b/entangled/loom/__init__.py @@ -0,0 +1,4 @@ +from .program import Program, resolve_tasks +from .task import Task, TaskDB, Target + +__all__ = ["Program", "resolve_tasks", "Task", "TaskDB", "Target"] diff --git a/entangled/loom/lazy.py b/entangled/loom/lazy.py new file mode 100644 index 0000000..0d32884 --- /dev/null +++ b/entangled/loom/lazy.py @@ -0,0 +1,157 @@ +""" +Module `entangled.loom.lazy` presents us with a `Lazy` tasks that have +targets and a set of dependencies. A `Task` will have an abstract +method `run`. Then the purpose is to run those tasks in correct +order and possibly in parallel. + +This is achieved by memoizing results and keeping locks on the `Lazy` +task when it is still evaluating. +""" + +from __future__ import annotations +from dataclasses import dataclass, field +from typing import Generic, Optional, TypeVar, Union +import asyncio + +from ..errors.user import UserError + + +T = TypeVar("T") +R = TypeVar("R") + + +@dataclass +class Failure(Generic[T]): + task: T + + def __bool__(self): + return False + + +class MissingFailure(Failure[T]): + pass + + +@dataclass +class TaskFailure(Failure[T], Exception): + message: str + + def __post_init__(self): + Exception.__init__(self, self.message) + + +@dataclass +class DependencyFailure(Failure[T], Generic[T]): + dependent: list[Failure[T]] + + +@dataclass +class Ok(Generic[T, R]): + task: Lazy[T, R] + value: R + + def __bool__(self): + return True + + +Result = Union[Failure, Ok[T, R]] + + +@dataclass +class Lazy(Generic[T, R]): + """Base class for tasks that are tagged with type `T` (usually `str` or + `Path`) and representing values of type `R`. + + To implement a specific task, you need to implement the asynchronous + `run` method, which should return a value of `R` or throw `TaskFailure`. + + Attributes: + targets: list of target identifiers, for instance paths that are + generated by running a particular task. + dependencies: list of dependency identifiers. All of these need to + be realized before the task can run. + result (property): value of the result, once the task was run. This + throws an exception if accessed before the task is complete. + """ + + targets: list[T] + dependencies: list[T] + _lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False) + _result: Optional[Result[T, R]] = field(default=None, init=False) + + def __bool__(self): + return self._result is not None and bool(self._result) + + @property + def result(self) -> R: + if self._result is None: + raise ValueError("Task has not run yet.") + if not self._result: + raise ValueError("Task has failed.") + assert isinstance(self._result, Ok) + return self._result.value + + async def run(self) -> R: + raise NotImplementedError() + + async def run_after_deps(self, recurse, *args) -> Result[T, R]: + dep_res = await asyncio.gather(*(recurse(dep) for dep in self.dependencies)) + if not all(dep_res): + return DependencyFailure(self, [f for f in dep_res if not f]) + try: + result = await self.run(*args) + return Ok(self, result) + except TaskFailure as f: + return f + + async def run_cached(self, recurse, *args) -> Result[T, R]: + async with self._lock: + if self._result is not None: + return self._result + self._result = await self.run_after_deps(recurse, *args) + return self._result + + def reset(self): + self._result = None + + +TaskT = TypeVar("TaskT", bound=Lazy) + + +class MissingDependency(Exception): + pass + + +@dataclass +class LazyDB(Generic[T, TaskT]): + """Collect tasks and coordinate running a task from a task identifier.""" + + tasks: list[TaskT] = field(default_factory=list) + index: dict[T, TaskT] = field(default_factory=dict) + + async def run(self, t: T, *args) -> Result[T, R]: + if t not in self.index: + try: + task = self.on_missing(t) + except MissingDependency: + return MissingFailure(t) + else: + task = self.index[t] + return await task.run_cached(self.run, *args) + + def on_missing(self, _: T) -> TaskT: + raise MissingDependency() + + def add(self, task: TaskT): + """Add a task to the DB.""" + self.tasks.append(task) + for target in task.targets: + self.index[target] = task + + def clean(self): + self.tasks = [] + self.index = {} + + def reset(self): + for t in self.tasks: + t.reset() diff --git a/entangled/loom/program.py b/entangled/loom/program.py new file mode 100644 index 0000000..d80ffd2 --- /dev/null +++ b/entangled/loom/program.py @@ -0,0 +1,114 @@ +from __future__ import annotations +from copy import copy +import logging +from typing import Any, Generic, Optional +from dataclasses import dataclass, field, asdict, fields +from pathlib import Path + +import tomlkit + +from entangled.errors.user import UserError + +from ..construct import construct +from .task import DEFAULT_RUNNERS, Task, TaskDB, Pattern, Runner, Target + + +@dataclass +class MissingInclude(UserError): + path: Path + + def __str__(self): + return f"Include `{self.path}` not found." + + +@dataclass +class MissingPattern(UserError): + name: str + + def __str__(self): + return f"Pattern `{self.name}` not found." + + +@dataclass +class PatternCall: + pattern: str + args: dict[str, Any] + + +@dataclass +class TaskProxy: + targets: list[Target] = field(default_factory=list) + dependencies: list[Target] = field(default_factory=list) + language: Optional[str] = None + path: Optional[Path] = None + script: Optional[str] = None + stdin: Optional[Path] = None + stdout: Optional[Path] = None + + +@dataclass +class Program: + task: list[TaskProxy] = field(default_factory=list) + pattern: dict[str, Pattern] = field(default_factory=dict) + call: list[PatternCall] = field(default_factory=list) + include: list[Path] = field(default_factory=list) + runner: dict[str, Runner] = field(default_factory=dict) + + def write(self, path: Path): + with open(path, "w") as f_out: + tomlkit.dump(self.__dict__, f_out) + + @staticmethod + def read(path: Path) -> Program: + with open(path, "r") as f_in: + data = tomlkit.load(f_in) + return construct(Program, data) + + +async def resolve_tasks(program: Program) -> TaskDB: + db = TaskDB() + pattern_index = dict() + + async def go(program: Program): + tasks = [Task(**t.__dict__) for t in program.task] + pattern_index.update(program.pattern) + delayed_calls: list[PatternCall] = [] + + db.runners.update(program.runner) + + for t in tasks: + db.add(t) + + for c in program.call: + if c.pattern not in pattern_index: + logging.debug( + "pattern `%s` not available, waiting for includes to resolve", + c.pattern, + ) + delayed_calls.append(c) + continue + p = pattern_index[c.pattern] + task = p.call(c.args) + db.add(task) + + for inc in program.include: + if Target(inc) in db.index: + await db.run(Target(inc)) + if not inc.exists(): + raise MissingInclude(inc) + + prg = Program.read(inc) + await go(prg) + + for c in delayed_calls: + if c.pattern not in pattern_index: + logging.debug( + "pattern `%s` still not available, now this is an error", c.pattern + ) + raise MissingPattern(c.pattern) + p = pattern_index[c.pattern] + db.add(p.call(c.args)) + + return db + + return await go(program) diff --git a/entangled/loom/target.py b/entangled/loom/target.py new file mode 100644 index 0000000..52877b1 --- /dev/null +++ b/entangled/loom/target.py @@ -0,0 +1,55 @@ +from __future__ import annotations +from dataclasses import dataclass, field +from typing import ClassVar +from pathlib import Path +from ..parsing import Parsable, Parser, matching, starmap, fmap, fullmatch, choice + + +@dataclass +class Phony(Parsable): + _pattern: ClassVar[Parser] = matching(r"phony\(([^()\s]+)\)") + name: str + + @staticmethod + def __parser__() -> Parser[Phony]: + return Phony._pattern >> starmap(lambda n: Phony(n)) + + def __str__(self): + return f"phony({self.name})" + + def __hash__(self): + return hash(f"#{self.name}#") + + +@dataclass +class Target(Parsable): + phony_or_path: Phony | Path + + @staticmethod + def __parser__() -> Parser[Target]: + return choice(Phony, fullmatch(".*") >> fmap(Path)) >> fmap(Target) + + @staticmethod + def from_str(s: str) -> Target: + result: Target = Target.__parser__().read(s)[0] + return result + + def __str__(self): + return f"Target({self.phony_or_path})" + + def __hash__(self): + return hash(self.phony_or_path) + + def is_phony(self) -> bool: + return isinstance(self.phony_or_path, Phony) + + def is_path(self) -> bool: + return isinstance(self.phony_or_path, Path) + + @property + def path(self) -> Path: + if not isinstance(self.phony_or_path, Path): + raise ValueError("Not a path") + return self.phony_or_path + + diff --git a/entangled/loom/task.py b/entangled/loom/task.py new file mode 100644 index 0000000..6784972 --- /dev/null +++ b/entangled/loom/task.py @@ -0,0 +1,199 @@ +"""The `entangled.loom.task` module defines the final form of a `Task` in +Loom. +""" + +from __future__ import annotations +import asyncio +from contextlib import nullcontext +from copy import copy +from dataclasses import dataclass, field +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Any, Optional, Union +from asyncio import create_subprocess_exec +from textwrap import indent + +from .lazy import MissingDependency, Lazy, LazyDB +from .target import Phony, Target +from ..filedb import stat +from ..logging import logger +from ..errors.user import UserError + + +@dataclass +class FailedTaskError(UserError): + error_code: int + stderr: str + + def __str__(self): + return f"process returned code {self.error_code}\n" \ + f"standard error output: {self.stderr}" + + +log = logger() + +@dataclass +class Runner: + command: str + args: list[str] + + +DEFAULT_RUNNERS: dict[str, Runner] = { + "Python": Runner("python", ["{script}"]), + "Bash": Runner("bash", ["{script}"]), +} + + +@dataclass +class Task(Lazy[Target, None]): + language: Optional[str] = None + path: Optional[Path] = None + script: Optional[str] = None + stdin: Optional[Path] = None + stdout: Optional[Path] = None + + def __str__(self): + tgts = ", ".join(str(t) for t in self.targets) + deps = ", ".join(str(t) for t in self.dependencies) + if self.script is not None: + src = indent(self.script, prefix = " ▎ ", predicate = lambda _: True) + elif self.path is not None: + src = str(self.path) + else: + src = " - " + return f"[{tgts}] <- [{deps}]\n" + src + + def __post_init__(self): + if self.stdin and Target(self.stdin) not in self.dependencies: + self.dependencies.append(Target(self.stdin)) + if self.path and Target(self.path) not in self.dependencies: + self.dependencies.append(Target(self.path)) + if self.stdout and Target(self.stdout) not in self.targets: + self.targets.append(Target(self.stdout)) + + def validate(self): + assert (self.path is None) or (self.script is None) + if self.stdin is not None: + assert Target(self.stdin) in self.dependencies + if self.stdout is not None: + assert Target(self.stdout) in self.targets + + def always_run(self) -> bool: + dep_paths = [p.path for p in self.dependencies if p.is_path()] + if not dep_paths: + return True + return False + + def needs_run(self) -> bool: + target_paths = [t.path for t in self.targets if t.is_path()] + dep_paths = [p.path for p in self.dependencies if p.is_path()] + if any(not path.exists() for path in target_paths): + return True + target_stats = [stat(p) for p in target_paths] + dep_stats = [stat(p) for p in dep_paths] + if any(t < d for t in target_stats for d in dep_stats): + return True + return False + + async def run(self, cfg): + log.debug(f"targets: {self.targets}") + if not self.always_run() and not self.needs_run() and not cfg.force_run: + return + + if self.language is None or (self.path is None and self.script is None): + return + + runner = cfg.runners[self.language] + if self.path is not None: + tmpfile = None + path = self.path + elif self.script is not None: + tmpfile = NamedTemporaryFile("w") + tmpfile.write(self.script) + tmpfile.flush() + path = Path(tmpfile.name) + else: + raise ValueError("A `Rule` can have either `path` or `script` defined.") + + args = [arg.format(script=path) for arg in runner.args] + stdin = open(self.stdin, "r") if self.stdin is not None else None + stdout = open(self.stdout, "w") if self.stdout is not None else None + + tgt_str = "(" + " ".join(str(t) for t in self.targets) + ")" + log.info(f"{tgt_str} -> {runner.command} " + " ".join(args)) + async with cfg.throttle or nullcontext(): + proc = await create_subprocess_exec( + runner.command, *args, stdin=stdin, stdout=stdout, stderr=asyncio.subprocess.PIPE + ) + stderr = await proc.stderr.read() if proc.stderr else b"" + await proc.wait() + log.debug(f"return-code {proc.returncode}") + + if tmpfile is not None: + tmpfile.close() + + if self.needs_run(): + raise FailedTaskError(proc.returncode or 0, stderr.decode()) + + +@dataclass +class TaskDB(LazyDB[Target, Task]): + runners: dict[str, Runner] = field(default_factory=lambda: copy(DEFAULT_RUNNERS)) + throttle: Optional[asyncio.Semaphore] = None + force_run: bool = False + + async def run(self, t: Target, *args): + log.debug(str(t)) + return await super().run(t, self, *args) + + def on_missing(self, t: Target): + if not t.is_path() or not t.path.exists(): + raise MissingDependency() + return Task([t], []) + + def target(self, target_path: Union[str, Path], deps: list[Target], **kwargs): + target_path = Path(target_path) + task = Task([Target(target_path)], deps, **kwargs) + self.add(task) + + def phony(self, target_name: str, deps: list[Target], **kwargs): + task = Task([Target(Phony(target_name))], deps, **kwargs) + self.add(task) + + +@dataclass +class Pattern: + """NYI. A `Pattern` can be used to generate `Rule`s from a template. + This template will be expanded using Jinja.""" + + targets: list[str] = field(default_factory=list) + dependencies: list[str] = field(default_factory=list) + language: Optional[str] = None + path: Optional[Path] = None + script: Optional[str] = None + stdout: Optional[str] = None + stdin: Optional[str] = None + + def validate(self): + assert (self.path is None) ^ (self.script is None) + + def call(self, args: dict[str, Any]) -> Task: + targets: list[Target] = [ + Target.from_str(t.format(**args)) for t in self.targets + ] + deps: list[Target] = [ + Target.from_str(d.format(**args)) for d in self.dependencies + ] + lang = self.language + if self.path is not None: + script = self.path.read_text().format(**args) + elif self.script is not None: + script = self.script.format(**args) + else: + raise ValueError( + "A `Pattern` needs to have either a `path` or `script` defined." + ) + + stdout = Path(self.stdout.format(**args)) if self.stdout is not None else None + stdin = Path(self.stdin.format(**args)) if self.stdin is not None else None + return Task(targets, deps, lang, script=script, stdout=stdout, stdin=stdin) diff --git a/entangled/main.py b/entangled/main.py index 41898ad..6ad1641 100644 --- a/entangled/main.py +++ b/entangled/main.py @@ -1,54 +1,22 @@ -import argh # type: ignore -import logging -import sys - - -try: - from rich.logging import RichHandler - from rich.highlighter import RegexHighlighter +from .logging import configure, logger - WITH_RICH = True -except ImportError: - WITH_RICH = False +configure(debug=False) +import argh # type: ignore +import sys +import traceback +import logging -from .commands import new, status, stitch, sync, tangle, watch +from .commands import new, status, stitch, sync, tangle, watch, loom from .errors.internal import bug_contact from .errors.user import HelpfulUserError, UserError from .version import __version__ - -if WITH_RICH: - - class BackTickHighlighter(RegexHighlighter): - highlights = [r"`(?P[^`]*)`"] - - -def configure(debug=False): - if debug: - level = logging.DEBUG - else: - level = logging.INFO - - if WITH_RICH: - FORMAT = "%(message)s" - logging.basicConfig( - level=level, - format=FORMAT, - datefmt="[%X]", - handlers=[RichHandler(show_path=debug, highlighter=BackTickHighlighter())], - ) - logging.debug("Rich logging enabled") - else: - logging.basicConfig(level=level) - logging.debug("Plain logging enabled") - - logging.info(f"Entangled {__version__} (https://entangled.github.io/)") - - def cli(): import argparse + log = logger() + try: parser = argparse.ArgumentParser() parser.add_argument( @@ -57,29 +25,34 @@ def cli(): parser.add_argument( "-v", "--version", action="store_true", help="show version number" ) - argh.add_commands(parser, [new, status, stitch, sync, tangle, watch]) + argh.add_commands(parser, [new, loom, status, stitch, sync, tangle, watch]) args = parser.parse_args() if args.version: print(f"Entangled {__version__}") sys.exit(0) - configure(args.debug) + if args.debug: + log.level = logging.DEBUG + else: + log.level = logging.INFO + argh.dispatch(parser) except KeyboardInterrupt: - logging.info("Goodbye") + log.info("Goodbye") sys.exit(0) except HelpfulUserError as e: - logging.error(e, exc_info=False) + log.error(e, exc_info=False) e.func() sys.exit(0) except UserError as e: - logging.error(e, exc_info=False) + log.error(e, exc_info=False) sys.exit(0) except Exception as e: - logging.error(str(e)) + log.error(str(e)) bug_contact(e) - sys.exit(1) + traceback.print_exc() + raise e if __name__ == "__main__": diff --git a/entangled/parsing.py b/entangled/parsing.py index 94f45ff..ad22dbd 100644 --- a/entangled/parsing.py +++ b/entangled/parsing.py @@ -53,8 +53,8 @@ def expected(self): return self.msg def __str__(self): - if len(inp) > 20: - inp = f"{inp[:20]} ..." + if len(self.inp) > 20: + inp = f"{self.inp[:20]} ..." return f'expected: {self.expected}, got: "{self.inp}"' @@ -72,7 +72,7 @@ def expected(self): class Parser(Generic[T]): """Base class for parsers.""" - def read(self, inp: str) -> tuple[T, str]: + def read(self, _: str) -> tuple[T, str]: """Read a string and return an object the remainder of the string.""" raise NotImplementedError() @@ -168,7 +168,7 @@ def bound(inp: str): def choice(*options: Parser[Any]) -> Parser[Any]: @parser - def _choice(inp: str) -> tuple[T, str]: + def _choice(inp: str) -> tuple[Any, str]: failures = [] for o in options: @@ -202,11 +202,11 @@ def _many(inp: str) -> tuple[list[T], str]: return _many -def matching(regex: str) -> Parser[re.Match]: +def matching(regex: str) -> Parser[tuple[str | Any, ...]]: pattern = re.compile(f"^{regex}") @parser - def _matching(inp: str): + def _matching(inp: str) -> tuple[tuple[str | Any, ...], str]: if m := pattern.match(inp): return m.groups(), inp[m.end() :] raise Expected(f"/^{regex}/", inp) diff --git a/entangled/text_location.py b/entangled/text_location.py new file mode 100644 index 0000000..21b597c --- /dev/null +++ b/entangled/text_location.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + + +@dataclass +class TextLocation: + filename: str + line_number: int = 0 + + def __str__(self): + return f"{self.filename}:{self.line_number}" diff --git a/pyproject.toml b/pyproject.toml index 23a8844..465855e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ mkdocs = "^1.4.3" mkdocs-material = "^9.1.13" mkdocstrings = {extras = ["python"], version = "^0.21.2"} twine = "^4.0.2" +pytest-asyncio = "^0.21.1" [build-system] requires = ["poetry-core"] diff --git a/test/loom/test_loom.py b/test/loom/test_loom.py new file mode 100644 index 0000000..7dcf6f5 --- /dev/null +++ b/test/loom/test_loom.py @@ -0,0 +1,128 @@ +from dataclasses import dataclass +import os +from typing import Optional, Union +import pytest +from contextlib import asynccontextmanager, chdir +from pathlib import Path +import time +from entangled.filedb import stat +from entangled.loom.task import TaskDB, Phony, Target + + +@dataclass +class Elapsed: + time: Optional[float] = None + + +@asynccontextmanager +async def timer(): + e = Elapsed() + t = time.perf_counter() + yield e + e.time = time.perf_counter() - t + + +@pytest.mark.asyncio +async def test_hello(tmp_path: Path): + with chdir(tmp_path): + db = TaskDB() + tgt = Path("hello.txt") + db.target( + tgt, + [], + language="Python", + script=f"with open('{tgt}', 'w') as f:\n" + f' print("Hello, World!", file=f)\n', + ) + db.phony("all", [Target(tgt)]) + + await db.run(Target(Phony("all"))) + os.sync() + assert tgt.exists() + assert tgt.read_text() == "Hello, World!\n" + + +@pytest.mark.asyncio +async def test_hello_stdout(tmp_path: Path): + with chdir(tmp_path): + db = TaskDB() + tgt = Path("hello.txt") + db.target( + tgt, [], language="Python", stdout=tgt, script='print("Hello, World!")\n' + ) + db.phony("all", [Target(tgt)]) + + await db.run(Target(Phony("all"))) + os.sync() + assert tgt.exists() + assert tgt.read_text() == "Hello, World!\n" + + +@pytest.mark.asyncio +async def test_runtime(tmp_path: Path): + with chdir(tmp_path): + db = TaskDB() + for a in range(4): + db.phony(f"sleep{a}", [], language="Bash", script=f"sleep 0.2\n") + db.phony("all", [Target(Phony(f"sleep{a}")) for a in range(4)]) + async with timer() as t: + await db.run(Target(Phony("all"))) + + assert t.time is not None + assert t.time > 0.1 and t.time < 0.4 + + +@pytest.mark.asyncio +async def test_rebuild(tmp_path: Path): + with chdir(tmp_path): + db = TaskDB() + + # Set input + i1, i2 = (Path(f"i{n}") for n in [1, 2]) + i1.write_text("1\n") + i2.write_text("3\n") + + # Make tasks + a, b, c = (Path(x) for x in "abc") + # a = i1 + 1 + db.target( + a, + [Target(i1)], + language="Python", + stdout=a, + script="print(int(open('i1','r').read()) + 1)", + ) + # b = a * i2 + db.target( + b, + [Target(a), Target(i2)], + language="Python", + stdout=b, + script="print(int(open('a','r').read()) * int(open('i2','r').read()))", + ) + # c = a + b + db.target( + c, + [Target(a), Target(b)], + language="Python", + stdout=c, + script="print(int(open('b','r').read()) * int(open('a','r').read()))", + ) + await db.run(Target(c)) + assert all(x.exists() for x in (a, b, c)) + assert c.read_text() == "12\n" + + i2.write_text("4\n") + os.sync() + + assert not db.index[Target(a)].needs_run() + assert db.index[Target(b)].needs_run() + + db.reset() + await db.run(Target(c)) + os.sync() + + assert stat(a) < stat(i2) + assert a.read_text() == "2\n" + assert b.read_text() == "8\n" + assert c.read_text() == "16\n" diff --git a/test/loom/test_phony.py b/test/loom/test_phony.py new file mode 100644 index 0000000..a357cc3 --- /dev/null +++ b/test/loom/test_phony.py @@ -0,0 +1,9 @@ +from entangled.loom.task import Phony + +# from entangled.parsing import + + +def test_phony_parsing(): + x, _ = Phony.read("phony(all)") + assert x == Phony("all") + assert str(x) == "phony(all)" diff --git a/test/loom/test_program.py b/test/loom/test_program.py new file mode 100644 index 0000000..84162dc --- /dev/null +++ b/test/loom/test_program.py @@ -0,0 +1,149 @@ +from contextlib import chdir +from pathlib import Path +import sys + +import pytest +from entangled.loom.task import Phony, Target +from entangled.loom.program import Program, resolve_tasks + + +hello_world_loom = """ +[[task]] +targets = ["phony(all)"] +dependencies = ["hello.txt"] + +[[task]] +targets = ["hello.txt"] +stdout = "hello.txt" +language = "Bash" +script = "echo 'Hello, World'" +""" + + +@pytest.mark.asyncio +async def test_loom(tmp_path): + with chdir(tmp_path): + src = Path("hello.toml") + tgt = Path("hello.txt") + src.write_text(hello_world_loom) + prg = Program.read(src) + db = await resolve_tasks(prg) + assert db.index[Target(tgt)].stdout == tgt + await db.run(Target(Phony("all"))) + assert tgt.exists() + assert tgt.read_text() == "Hello, World\n" + + +include_loom = """ +include = [ + "generated_wf.toml" +] + +[[task]] +targets = ["generated_wf.toml"] +stdout = "generated_wf.toml" +language = "Python" +script = ''' +print(\"\"\" +[[task]] +targets = ["hello.txt"] +stdout = "hello.txt" +language = "Bash" +script = "echo 'Hello, World'" +\"\"\") +''' + +[[task]] +targets = ["phony(all)"] +dependencies = ["hello.txt"] +""" + + +@pytest.mark.asyncio +async def test_include(tmp_path): + with chdir(tmp_path): + src = Path("hello.toml") + tgt = Path("hello.txt") + src.write_text(include_loom) + prg = Program.read(src) + db = await resolve_tasks(prg) + assert db.index[Target(tgt)].stdout == tgt + await db.run(Target(Phony("all"))) + assert tgt.exists() + assert tgt.read_text() == "Hello, World\n" + + +pattern_loom = """ +[pattern.echo] +targets = ["{stdout}"] +stdout = "{stdout}" +language = "Python" +script = ''' +print("{text}") +''' + +[[task]] +targets = ["phony(all)"] +dependencies = ["hello.txt"] + +[[call]] +pattern = "echo" +args = { stdout = "hello.txt", text = "Hello, World" } +""" + + +@pytest.mark.asyncio +async def test_pattern(tmp_path): + with chdir(tmp_path): + src = Path("hello.toml") + tgt = Path("hello.txt") + src.write_text(pattern_loom) + prg = Program.read(src) + db = await resolve_tasks(prg) + assert db.index[Target(tgt)].stdout == tgt + await db.run(Target(Phony("all"))) + assert tgt.exists() + assert tgt.read_text() == "Hello, World\n" + + +rot_13_loom = """ +[[task]] +stdout = "secret.txt" +language = "Python" +script = \"\"\" +print("Uryyb, Jbeyq!") +\"\"\" + +[pattern.rot13] +stdout = "{stdout}" +stdin = "{stdin}" +language = "Bash" +script = \"\"\" +tr a-zA-Z n-za-mN-ZA-M +\"\"\" + +[[call]] +pattern = "rot13" + [call.args] + stdin = "secret.txt" + stdout = "hello.txt" + +[[task]] +targets = ["phony(all)"] +dependencies = ["hello.txt"] +""" + + +@pytest.mark.skipif(sys.platform == "win32", reason="no `tr` on windows") +@pytest.mark.asyncio +async def test_rot13(tmp_path): + with chdir(tmp_path): + src = Path("hello.toml") + tgt = Path("hello.txt") + src.write_text(rot_13_loom) + prg = Program.read(src) + db = await resolve_tasks(prg) + assert db.index[Target(tgt)].stdout == tgt + await db.run(Target(Phony("all"))) + assert tgt.exists() + assert tgt.read_text() == "Hello, World!\n" diff --git a/test/loom/test_target.py b/test/loom/test_target.py new file mode 100644 index 0000000..eeb27f0 --- /dev/null +++ b/test/loom/test_target.py @@ -0,0 +1,11 @@ +from pathlib import Path +from entangled.loom.target import Target + + +def test_target_parsing(): + t1 = Target.from_str("phony(all)") + assert t1.is_phony() + t2 = Target.from_str("blah") + assert t2.is_path() + assert t2.path == Path("blah") + diff --git a/test/loom/test_task.py b/test/loom/test_task.py new file mode 100644 index 0000000..e55912c --- /dev/null +++ b/test/loom/test_task.py @@ -0,0 +1,83 @@ +from __future__ import annotations +import pytest +from dataclasses import dataclass +from typing import Any +from entangled.loom.lazy import Lazy, LazyDB +import uuid + + +@dataclass +class PyFunc(Lazy[str, Any]): + foo: Any + db: LazyDB + + async def run(self): + args = [self.db.index[t].result for t in self.dependencies] + return self.foo(*args) + + async def eval(self): + return await self.db.run(self.targets[0]) + + +@dataclass +class PyLiteral(Lazy[str, Any]): + value: Any + + async def run(self): + return self.value + + +class PyTaskDB(LazyDB[str, Any]): + def lazy(self, f): + def delayed(*args): + target = uuid.uuid4().hex + deps = [] + for arg in args: + if isinstance(arg, Lazy): + deps.append(arg.targets[0]) + else: + dep = uuid.uuid4().hex + self.add(PyLiteral([dep], [], arg)) + deps.append(dep) + + task = PyFunc([target], deps, f, self) + self.add(task) + return task + + return delayed + + +@pytest.mark.asyncio +async def test_noodles(): + db = PyTaskDB() + + @db.lazy + def add1(x, y): + return x + y + + @db.lazy + def pure(v): + return v + + z = add1(pure(3), pure(5)) + await z.eval() + assert z and z.result == 8 + + db.clean() + + exec_order = [] + + @db.lazy + def add2(label, x, y): + exec_order.append(label) + return x + y + + x = add2("x", 1, 2) + y = add2("y", x, 3) + z = add2("z", x, 4) + w = add2("w", y, z) + assert len(exec_order) == 0 + w_result = await w.eval() + assert w_result.value == 13 + assert exec_order[-1] == "w" + assert exec_order[0] == "x" diff --git a/test/test_daemon.py b/test/test_daemon.py index 9e9fcaa..c542711 100644 --- a/test/test_daemon.py +++ b/test/test_daemon.py @@ -9,27 +9,30 @@ from contextlib import chdir + def wait_for_file(filename, timeout=5): start_time = time.time() while time.time() - start_time < timeout: if os.path.exists(filename): return True - time.sleep(0.1) + time.sleep(0.1) return False + def wait_for_stat_diff(md_stat, filename, timeout=5): start_time = time.time() while time.time() - start_time < timeout: md_stat2 = stat(Path(filename)) - if(md_stat != md_stat2): + if md_stat != md_stat2: return True - time.sleep(0.1) + time.sleep(0.1) return False + def test_daemon(tmp_path: Path): config.read() with chdir(tmp_path): diff --git a/test/test_filedb.py b/test/test_filedb.py index cd30694..091518b 100644 --- a/test/test_filedb.py +++ b/test/test_filedb.py @@ -4,11 +4,13 @@ import pytest from contextlib import chdir + @pytest.fixture(scope="session") def example_files(tmp_path_factory: pytest.TempPathFactory): tmp_path = tmp_path_factory.mktemp("test-filedb") with open(tmp_path / "a", "w") as f: f.write("hello") + # modification times of b, c, and d need to be later than a sleep(0.01) with open(tmp_path / "b", "w") as f: f.write("hello") diff --git a/test/test_shebang.py b/test/test_shebang.py index 2ecc75d..7d0c347 100644 --- a/test/test_shebang.py +++ b/test/test_shebang.py @@ -37,6 +37,7 @@ ``` """ + def test_shebang(): md = MarkdownReader("-") md.run(input_md) @@ -44,12 +45,8 @@ def test_shebang(): assert next(md.reference_map["test.sh"]).header == "#!/bin/bash" content, _ = tangle_ref(md.reference_map, "test.sh", AnnotationMethod.STANDARD) assert content.strip() == output_test_sh.strip() - + cr = CodeReader("test.sh", md.reference_map) cr.run(output_test_sh_modified) md_content = stitch_markdown(md.reference_map, md.content) assert md_content.strip() == input_md_modified.strip() - - - - diff --git a/test/test_watch_dir.py b/test/test_watch_dir.py index f32ef4d..ee6a415 100644 --- a/test/test_watch_dir.py +++ b/test/test_watch_dir.py @@ -29,6 +29,7 @@ Don't tangle me! """ + def test_watch_dirs(tmp_path): with chdir(tmp_path): Path("./docs").mkdir() @@ -41,7 +42,8 @@ def test_watch_dirs(tmp_path): assert set(find_watch_dirs()) == set([Path("./docs"), Path("./src")]) Path("./docs/index.md").write_text(index_md_1 + "\n" + index_md_2) tangle() - assert set(find_watch_dirs()) == set([Path("."), Path("./docs"), Path("./src")]) + assert set(find_watch_dirs()) == set( + [Path("."), Path("./docs"), Path("./src")] + ) assert sorted(list_input_files()) == [Path("./docs/index.md")] -