From f90078717eb3cb314e2a949ecc7a58d52755b49a Mon Sep 17 00:00:00 2001 From: Austin Appleby Date: Sat, 16 Mar 2024 21:34:27 -0700 Subject: [PATCH] Make it possible to load and run sub-Hancho-tasks from their own root dir. Needed to buld stuff in submodules cleanly, etcetera. --- hancho.py | 270 +++++++++++++++++++++++++++++++++++++------------- tests/test.py | 2 +- 2 files changed, 203 insertions(+), 69 deletions(-) diff --git a/hancho.py b/hancho.py index cdf8bad..06fabe0 100755 --- a/hancho.py +++ b/hancho.py @@ -144,7 +144,7 @@ def flatten(elements): # suffice. -async def flatten_variant(rule, variant, depth=0): +async def flatten_async(rule, variant, depth=0): """Turns 'variant' into a flat array of non-templated strings, paths, and callbacks.""" # pylint: disable=too-many-return-statements @@ -161,24 +161,24 @@ async def flatten_variant(rule, variant, depth=0): return [] if inspect.isawaitable(variant): - return await flatten_variant(rule, await variant, depth + 1) + return await flatten_async(rule, await variant, depth + 1) if isinstance(variant, Task): - return await flatten_variant(rule, variant.promise, depth + 1) + return await flatten_async(rule, variant.promise, depth + 1) if isinstance(variant, Path): - return [Path(await stringize_variant(rule, str(variant), depth + 1))] + return [Path(await stringize_async(rule, str(variant), depth + 1))] if isinstance(variant, list): result = [] for element in variant: - result.extend(await flatten_variant(rule, element, depth + 1)) + result.extend(await flatten_async(rule, element, depth + 1)) return result - return [await stringize_variant(rule, variant, depth + 1)] + return [await stringize_async(rule, variant, depth + 1)] -async def stringize_variant(rule, variant, depth=0): +async def stringize_async(rule, variant, depth=0): """Turns 'variant' into a non-templated string.""" # pylint: disable=too-many-return-statements @@ -190,23 +190,23 @@ async def stringize_variant(rule, variant, depth=0): if isinstance(variant, str): if template_regex.search(variant): - return await expand_template(rule, variant, depth + 1) + return await expand_async(rule, variant, depth + 1) return variant if variant is None: return "" if inspect.isawaitable(variant): - return await stringize_variant(rule, await variant, depth + 1) + return await stringize_async(rule, await variant, depth + 1) if isinstance(variant, Task): - return await stringize_variant(rule, variant.promise, depth + 1) + return await stringize_async(rule, variant.promise, depth + 1) if isinstance(variant, Path): - return await stringize_variant(rule, str(variant), depth + 1) + return await stringize_async(rule, str(variant), depth + 1) if isinstance(variant, list): - variant = await flatten_variant(rule, variant, depth + 1) + variant = await flatten_async(rule, variant, depth + 1) variant = [str(s) for s in variant if s is not None] variant = " ".join(variant) return variant @@ -214,7 +214,7 @@ async def stringize_variant(rule, variant, depth=0): return str(variant) -async def expand_template(rule, template, depth=0): +async def expand_async(rule, template, depth=0): """Expands all templates to produce a non-templated string.""" if not isinstance(template, str): @@ -236,24 +236,127 @@ async def expand_template(rule, template, depth=0): except Exception as exc: # pylint: disable=broad-except raise ValueError(f"Template '{exp}' failed to eval") from exc - result += await stringize_variant(rule, replacement, depth + 1) + result += await stringize_async(rule, replacement, depth + 1) template = template[span.end() :] result += template return result +#################################################################################################### +# Due to Python async rules, we _can't_ use the same flatten/stringize/expand code in a sync context +# and an async context. I'll refactor these to be less redundant later. -def load(mod_path): +def flatten_sync(rule, variant, depth=0): + """Turns 'variant' into a flat array of non-templated strings, paths, and callbacks.""" + # pylint: disable=too-many-return-statements + + if depth > MAX_EXPAND_DEPTH: + raise ValueError(f"Flattening '{variant}' failed to terminate") + + if isinstance(variant, asyncio.CancelledError): + raise variant + + if inspect.isfunction(variant): + return [variant] + + if variant is None: + return [] + + if isinstance(variant, Task): + return flatten_sync(rule, variant.promise, depth + 1) + + if isinstance(variant, Path): + return [Path(stringize_sync(rule, str(variant), depth + 1))] + + if isinstance(variant, list): + result = [] + for element in variant: + result.extend(flatten_sync(rule, element, depth + 1)) + return result + + return [stringize_sync(rule, variant, depth + 1)] + + +def stringize_sync(rule, variant, depth=0): + """Turns 'variant' into a non-templated string.""" + # pylint: disable=too-many-return-statements + + if depth > MAX_EXPAND_DEPTH: + raise ValueError(f"Stringizing '{variant}' failed to terminate") + + if isinstance(variant, asyncio.CancelledError): + raise variant + + if isinstance(variant, str): + if template_regex.search(variant): + return expand_sync(rule, variant, depth + 1) + return variant + + if variant is None: + return "" + + if isinstance(variant, Task): + return stringize_sync(rule, variant.promise, depth + 1) + + if isinstance(variant, Path): + return Path(stringize_sync(rule, str(variant), depth + 1)) + + if isinstance(variant, list): + variant = flatten_sync(rule, variant, depth + 1) + variant = [str(s) for s in variant if s is not None] + variant = " ".join(variant) + return variant + + return str(variant) + + +def expand_sync(rule, template, depth=0): + """Expands all templates to produce a non-templated string.""" + + if not isinstance(template, str): + raise ValueError(f"Don't know how to expand {type(template)}") + + if depth > MAX_EXPAND_DEPTH: + raise ValueError(f"Expanding '{template}' failed to terminate") + + result = "" + while span := template_regex.search(template): + result += template[0 : span.start()] + exp = template[span.start() : span.end()] + + # Evaluate the template contents. + replacement = "" + try: + # pylint: disable=eval-used + replacement = eval(exp[1:-1], globals(), rule) + except Exception as exc: # pylint: disable=broad-except + raise ValueError(f"Template '{exp}' failed to eval") from exc + + result += stringize_sync(rule, replacement, depth + 1) + template = template[span.end() :] + + result += template + return result + +#################################################################################################### + +def load(file = None, root = None): """Module loader entry point for .hancho files. Searches the loaded Hancho module stack for a module whose directory contains 'mod_path', then loads the module relative to that path. """ - test_path = abspath(Path(app.mod_stack[-1].__file__).parent / mod_path) + if file is None: + raise FileNotFoundError(f"No .hancho filename given") + + if root is not None: + file = Path(root) / Path(file) + + test_path = abspath(Path(app.mod_stack[-1].__file__).parent / file) if test_path.exists(): # print(f"load_module({test_path})") - result = app.load_module(test_path) + result = app.load_module(test_path, root) return result - raise FileNotFoundError(f"Could not load module {mod_path}") + raise FileNotFoundError(f"Could not load module {file}") class Chdir: @@ -332,20 +435,16 @@ def __call__(self, files_in, files_out=None, **kwargs): if files_out is not None: task.files_out = files_out - task.call_dir = relpath( - Path(inspect.stack(context=0)[1].filename).parent, self.root_dir - ) - task.work_dir = relpath(Path.cwd(), self.root_dir) + task.root_dir = app.root_stack[-1] + task.call_dir = Path(inspect.stack(context=0)[1].filename).parent # A task that's created during task execution instead of module loading will have no mod - # stack entry to pull load_dir from, so it runs from '.' (root_dir) instead. + # stack entry to pull load_dir from, so it just inherits its parent's cwd. if "load_dir" not in kwargs: if app.mod_stack: - task.load_dir = relpath( - Path(app.mod_stack[-1].__file__).parent, self.root_dir - ) + task.load_dir = Path(app.mod_stack[-1].__file__).parent else: - task.load_dir = Path(".") + task.load_dir = Path.cwd() if task.job_count > config.jobs: raise ValueError("Task requires too many cores!") @@ -410,36 +509,44 @@ async def task_main(self): raise ValueError("Task missing files_out") # Stringize our directories - self.in_dir = Path(await stringize_variant(self, self.in_dir)) - self.deps_dir = Path(await stringize_variant(self, self.deps_dir)) - self.out_dir = Path(await stringize_variant(self, self.out_dir)) - self.task_dir = Path(await stringize_variant(self, self.task_dir)) + self.work_dir = Path(await stringize_async(self, self.work_dir)) + self.in_dir = Path(await stringize_async(self, self.in_dir)) + self.deps_dir = Path(await stringize_async(self, self.deps_dir)) + self.out_dir = Path(await stringize_async(self, self.out_dir)) + + assert self.work_dir.is_absolute() and self.work_dir.exists() + assert self.in_dir.is_absolute() and self.in_dir.exists() + assert self.deps_dir.is_absolute() and self.deps_dir.exists() + assert self.out_dir.is_absolute() # may not exist yet and that's OK, we will create it. # Flatten our file lists - self.files_in = await flatten_variant(self, self.files_in) - self.files_out = await flatten_variant(self, self.files_out) - self.deps = await flatten_variant(self, self.deps) + self.files_in = await flatten_async(self, self.files_in) + self.deps = await flatten_async(self, self.deps) + self.files_out = await flatten_async(self, self.files_out) # Prepend directories to filenames and then normalize + absolute them. # If they're already absolute, this does nothing. - self.abs_files_in = [abspath(self.in_dir / f) for f in self.files_in] - self.abs_files_out = [abspath(self.out_dir / f) for f in self.files_out] - self.abs_deps = [abspath(self.deps_dir / f) for f in self.deps] - - # Strip task_dir off the absolute paths to produce task_dir-relative - # paths - self.files_in = [relpath(f, self.task_dir) for f in self.abs_files_in] - self.files_out = [relpath(f, self.task_dir) for f in self.abs_files_out] - self.deps = [relpath(f, self.task_dir) for f in self.abs_deps] + self.abs_files_in = [abspath(self.in_dir / f) for f in self.files_in] + self.abs_deps = [abspath(self.deps_dir / f) for f in self.deps] + self.abs_files_out = [abspath(self.out_dir / f) for f in self.files_out] + + # Strip the working directory off all our file paths to make our command lines less bulky. + # Note that we _don't_ want relpath() here as it could add "../../.." that would go up + # through a symlink to the wrong directory. + work_dir_prefix = str(self.work_dir) + "/" + self.files_in = [Path(str(f).removeprefix(work_dir_prefix)) for f in self.abs_files_in] + self.deps = [Path(str(f).removeprefix(work_dir_prefix)) for f in self.abs_deps] + self.files_out = [Path(str(f).removeprefix(work_dir_prefix)) for f in self.abs_files_out] # Now that files_in/files_out/deps are flat, we can expand our # description and command list - self.command = await flatten_variant(self, self.command) + self.command = await flatten_async(self, self.command) + # pylint: disable=access-member-before-definition if self.desc: - self.desc = await stringize_variant(self, self.desc) + self.desc = await stringize_async(self, self.desc) if self.depfile: - self.depfile = await stringize_variant(self, self.depfile) + self.depfile = await stringize_async(self, self.depfile) # Check for missing inputs if not self.dryrun: @@ -494,19 +601,23 @@ async def run_commands(self): sameline=not self.verbose, ) + if (self.work_dir == self.start_dir): + work_dir = "." + else: + work_dir = str(self.work_dir).removeprefix(str(self.start_dir) + '/') + dryrun = "(DRY RUN) " if self.dryrun else "" + if self.verbose or self.debug: log(f"{color(128,128,128)}Reason: {self.reason}{color()}") - for command in self.command: - task_dir = Path("root") / relpath(self.task_dir, self.root_dir) - dryrun = "(DRY RUN) " if self.dryrun else "" - log(f"{color(128,128,255)}{task_dir}$ {color()}{dryrun}{command}") - if self.debug: - log(self) + + if self.debug: + log(self) result = [] - with Chdir(self.task_dir): - for command in self.command: - result = await self.run_command(command) + for command in self.command: + if self.verbose or self.debug: + log(f"{color(128,128,255)}{work_dir}$ {color()}{dryrun}{command}") + result = await self.run_command(command) finally: await app.release_jobs(self.job_count) @@ -532,11 +643,15 @@ async def run_command(self, command): raise ValueError(f"Don't know what to do with {command}") # Create the subprocess via asyncio and then await the result. - proc = await asyncio.create_subprocess_shell( - command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + # Note - We do _not_ want this 'with Chdir()' statement higher up the call stack. If we hit + # an await while inside the block, we could switch to another task and they would then be + # running in the wrong directory. + with Chdir(self.work_dir): + proc = await asyncio.create_subprocess_shell( + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) (stdout_data, stderr_data) = await proc.communicate() self.stdout = stdout_data.decode() @@ -614,6 +729,8 @@ def needs_rerun(self, force=False): else: raise ValueError(f"Invalid depformat {self.depformat}") + # The contents of the depfile are RELATIVE TO THE WORKING DIRECTORY + deplines = [self.work_dir / Path(d) for d in deplines] if deplines and max(mtime(f) for f in deplines) >= min_out: return ( f"Rebuilding {self.files_out} because a dependency in " @@ -646,6 +763,7 @@ def __init__(self): self.line_dirty = False self.jobs_available = os.cpu_count() self.jobs_lock = asyncio.Condition() + self.root_stack = [Path.cwd()] def main(self): """Our main() just handles command line args and delegates to async_main()""" @@ -697,7 +815,7 @@ async def async_main(self): root_filename = abspath(config.filename) if not root_filename.exists(): raise FileNotFoundError(f"Could not find {root_filename}") - self.load_module(root_filename) + self.load_module(root_filename, Path.cwd()) # Root module(s) loaded. Run all tasks in the queue until we run out. while True: @@ -729,7 +847,7 @@ async def async_main(self): return -1 if self.tasks_fail else 0 - def load_module(self, abs_path): + def load_module(self, abs_path, root = None): """Loads a Hancho module ***while chdir'd into its directory***""" phys_path = Path(abs_path).resolve() @@ -750,7 +868,10 @@ def load_module(self, abs_path): # be necessary. sys.path.insert(0, str(abs_path.parent)) + root = self.root_stack[-1] if root is None else abspath(root) + self.mod_stack.append(module) + self.root_stack.append(root) # We must chdir()s into the .hancho file directory before running it so that # glob() can resolve files relative to the .hancho file itself. @@ -760,6 +881,7 @@ def load_module(self, abs_path): types.FunctionType(code, module.__dict__)() self.mod_stack.pop() + self.root_stack.pop() return module @@ -805,12 +927,24 @@ def __init__(self): self.force = False self.depformat = "gcc" - self.root_dir = Path.cwd() - self.task_dir = Path("{root_dir}") - self.in_dir = Path("{root_dir / load_dir}") - self.deps_dir = Path("{root_dir / load_dir}") - self.out_dir = Path("{root_dir / build_dir / load_dir}") - self.build_dir = Path("build") + # The directory we started hancho.py from. + self.start_dir = Path.cwd() + + # The working directory that we run commands in. For single projects it's the same as + # start_dir, for stuff we're building from submodules it's the submodule's root directory. + self.work_dir = Path("{root_dir}") + + # Input filenames are resolved relative to in_dir. + self.in_dir = Path("{load_dir}") + + # Dependency filenames are resolved relative to deps_dir. + self.deps_dir = Path("{load_dir}") + + # All output files from all tasks go under build_dir. + self.build_dir = Path("build") + + # Each .hancho file gets a separate directory under build_dir for its output files. + self.out_dir = Path("{start_dir / build_dir / relpath(load_dir, start_dir)}") self.files_out = [] self.deps = [] diff --git a/tests/test.py b/tests/test.py index 645f00b..8f3944c 100755 --- a/tests/test.py +++ b/tests/test.py @@ -18,7 +18,7 @@ # loading a module directly and then via "../foo.hancho" should not load two # copies # all the predefined directories need test cases -# overriding in_dir/out_dir/task_dir need test cases +# overriding in_dir/out_dir/work_dir need test cases # min delta seems to be 4 msec on linux, 1 msec on windows? # os.system("touch blahblah.txt")