From 6bafdb29188531090a122211a76da5d98683a2d7 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 10 Oct 2023 22:48:11 +0200 Subject: [PATCH] Abstract away SCM integration The goal is to support different backends, like sapling. --- alibuild_helpers/build.py | 100 +++++++++++++++++++++++++++-------- alibuild_helpers/workarea.py | 39 +++++++------- tests/test_build.py | 6 +-- tests/test_hashing.py | 2 +- 4 files changed, 101 insertions(+), 46 deletions(-) diff --git a/alibuild_helpers/build.py b/alibuild_helpers/build.py index 0bf205b9..54d5ac99 100644 --- a/alibuild_helpers/build.py +++ b/alibuild_helpers/build.py @@ -17,7 +17,7 @@ from alibuild_helpers.sync import (NoRemoteSync, HttpRemoteSync, S3RemoteSync, Boto3RemoteSync, RsyncRemoteSync) import yaml -from alibuild_helpers.workarea import cleanup_git_log, logged_git, updateReferenceRepoSpec +from alibuild_helpers.workarea import cleanup_git_log, logged_scm, updateReferenceRepoSpec from alibuild_helpers.log import logger_handler, LogFormatter, ProgressPrint from datetime import datetime from glob import glob @@ -61,13 +61,15 @@ def update_git_repos(args, specs, buildOrder, develPkgs): """ def update_repo(package, git_prompt): + specs[package]["scm"] = Git() updateReferenceRepoSpec(args.referenceSources, package, specs[package], fetch=args.fetchRepos, usePartialClone=not args.docker, allowGitPrompt=git_prompt) # Retrieve git heads - cmd = ["ls-remote", "--heads", "--tags"] + scm = specs[package]["scm"] + cmd = scm.prefecthCmd() if package in develPkgs: specs[package]["source"] = \ os.path.join(os.getcwd(), specs[package]["package"]) @@ -75,12 +77,9 @@ def update_repo(package, git_prompt): else: cmd.append(specs[package].get("reference", specs[package]["source"])) - output = logged_git(package, args.referenceSources, + output = logged_scm(scm, package, args.referenceSources, cmd, ".", prompt=git_prompt, logOutput=False) - specs[package]["git_refs"] = { - git_ref: git_hash for git_hash, sep, git_ref - in (line.partition("\t") for line in output.splitlines()) if sep - } + specs[package]["scm_refs"] = scm.parseRefs(output) requires_auth = set() with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: @@ -102,7 +101,7 @@ def update_repo(package, git_prompt): (futurePackage, exc)) else: debug("%r package updated: %d refs found", futurePackage, - len(specs[futurePackage]["git_refs"])) + len(specs[futurePackage]["scm_refs"])) # Now execute git commands for private packages one-by-one, so the user can # type their username and password without multiple prompts interfering. @@ -114,7 +113,7 @@ def update_repo(package, git_prompt): specs[package]["source"]) update_repo(package, git_prompt=True) debug("%r package updated: %d refs found", package, - len(specs[package]["git_refs"])) + len(specs[package]["scm_refs"])) # Creates a directory in the store which contains symlinks to the package @@ -191,7 +190,7 @@ def hash_data_for_key(key): h_default(spec["commit_hash"]) try: # If spec["commit_hash"] is a tag, get the actual git commit hash. - real_commit_hash = spec["git_refs"]["refs/tags/" + spec["commit_hash"]] + real_commit_hash = spec["scm_refs"]["refs/tags/" + spec["commit_hash"]] except KeyError: # If it's not a tag, assume it's an actual commit hash. real_commit_hash = spec["commit_hash"] @@ -202,7 +201,7 @@ def hash_data_for_key(key): h_real_commit(real_commit_hash) h_alternatives = [(spec.get("tag", "0"), spec["commit_hash"], h_default), (spec.get("tag", "0"), real_commit_hash, h_real_commit)] - for ref, git_hash in spec.get("git_refs", {}).items(): + for ref, git_hash in spec.get("scm_refs", {}).items(): if ref.startswith("refs/tags/") and git_hash == real_commit_hash: tag_name = ref[len("refs/tags/"):] debug("Tag %s also points to %s, storing alternative", @@ -269,12 +268,14 @@ def h_all(data): # pylint: disable=function-redefined list({h.hexdigest() for _, _, h, in h_alternatives} - {spec["local_revision_hash"]}) -def hash_local_changes(directory): +def hash_local_changes(spec): """Produce a hash of all local changes in the given git repo. If there are untracked files, this function returns a unique hash to force a rebuild, and logs a warning, as we cannot detect changes to those files. """ + directory = spec["source"] + scm = spec["scm"] untrackedFilesDirectories = [] class UntrackedChangesError(Exception): """Signal that we cannot detect code changes due to untracked files.""" @@ -283,10 +284,10 @@ def hash_output(msg, args): lines = msg % args # `git status --porcelain` indicates untracked files using "??". # Lines from `git diff` never start with "??". - if any(line.startswith("?? ") for line in lines.split("\n")): + if any(scm.checkUntracked(line) for line in lines.split("\n")): raise UntrackedChangesError() h(lines) - cmd = "cd %s && git diff -r HEAD && git status --porcelain" % directory + cmd = scm.diffCmd(directory) try: err = execute(cmd, hash_output) debug("Command %s returned %d", cmd, err) @@ -318,6 +319,55 @@ def better_tarball(spec, old, new): hashes = spec["local_hashes" if old_is_local else "remote_hashes"] return old if hashes.index(old_hash) < hashes.index(new_hash) else new +class SCM(object): + def whereAmI(self, directory): + raise NotImplementedError + def branchOrRef(self, directory): + raise NotImplementedError + def lsRemote(self, remote): + raise NotImplementedError + def prefecthCmd(self): + raise NotImplementedError + def parseRefs(self, output): + raise NotImplementedError + def exec(self, *args, **kwargs): + raise NotImplementedError + def cloneCmd(self, spec, referenceRepo, usePartialClone): + raise NotImplementedError + def diffCmd(self, directory): + raise NotImplementedError + def checkUntracked(self, line): + raise NotImplementedError + +class Git(SCM): + name = "Git" + def whereAmI(self, directory): + return git(("rev-parse", "HEAD"), directory) + def branchOrRef(self, directory): + out = git(("rev-parse", "--abbrev-ref", "HEAD"), directory=directory) + if out == "HEAD": + out = git(("rev-parse", "HEAD"), directory)[:10] + return out + def exec(self, *args, **kwargs): + return git(*args, **kwargs) + def parseRefs(self, output): + return { + git_ref: git_hash for git_hash, sep, git_ref + in (line.partition("\t") for line in output.splitlines()) if sep + } + def prefecthCmd(self): + return ["ls-remote", "--heads", "--tags"] + def cloneCmd(self, source, referenceRepo, usePartialClone): + cmd = ["clone", "--bare", source, referenceRepo] + if usePartialClone: + cmd.extend(clone_speedup_options()) + def fetchCmd(self, source): + return ["fetch", "-f", "--tags", source, "+refs/heads/*:refs/heads/*"] + def diffCmd(self, directory): + return "cd %s && git diff -r HEAD && git status --porcelain" % directory + def checkUntracked(self, line): + return line.startswith("?? ") + def doBuild(args, parser): if args.remoteStore.startswith("http"): @@ -363,7 +413,15 @@ def doBuild(args, parser): if not exists(specDir): makedirs(specDir) - os.environ["ALIBUILD_ALIDIST_HASH"] = git(("rev-parse", "HEAD"), directory=args.configDir) + # if the alidist workdir contains a .git directory, we use Git as SCM + # otherwise we use Sapling + if exists("%s/.git" % args.configDir): + scm = Git() + else: + error("Cannot find .git directory in %s.", args.configDir) + return 1 + + os.environ["ALIBUILD_ALIDIST_HASH"] = scm.whereAmI(directory=args.configDir) debug("Building for architecture %s", args.architecture) debug("Number of parallel builds: %d", args.jobs) @@ -504,23 +562,21 @@ def doBuild(args, parser): # the commit_hash. If it's not a branch, it must be a tag or a raw commit # hash, so we use it directly. Finally if the package is a development # one, we use the name of the branch as commit_hash. - assert "git_refs" in spec + assert "scm_refs" in spec try: - spec["commit_hash"] = spec["git_refs"]["refs/heads/" + spec["tag"]] + spec["commit_hash"] = spec["scm_refs"]["refs/heads/" + spec["tag"]] except KeyError: spec["commit_hash"] = spec["tag"] # We are in development mode, we need to rebuild if the commit hash is # different or if there are extra changes on top. if spec["package"] in develPkgs: # Devel package: we get the commit hash from the checked source, not from remote. - out = git(("rev-parse", "HEAD"), directory=spec["source"]) + out = spec["scm"].whereAmI(directory=spec["source"]) spec["commit_hash"] = out.strip() - local_hash, untracked = hash_local_changes(spec["source"]) + local_hash, untracked = hash_local_changes(spec) untrackedFilesDirectories.extend(untracked) spec["devel_hash"] = spec["commit_hash"] + local_hash - out = git(("rev-parse", "--abbrev-ref", "HEAD"), directory=spec["source"]) - if out == "HEAD": - out = git(("rev-parse", "HEAD"), directory=spec["source"])[:10] + out = spec["scm"].branchOrRef(directory=spec["source"]) develPackageBranch = out.replace("/", "-") spec["tag"] = args.develPrefix if "develPrefix" in args else develPackageBranch spec["commit_hash"] = "0" diff --git a/alibuild_helpers/workarea.py b/alibuild_helpers/workarea.py index 4fc49ae1..4c866118 100644 --- a/alibuild_helpers/workarea.py +++ b/alibuild_helpers/workarea.py @@ -29,32 +29,32 @@ def cleanup_git_log(referenceSources): "Could not delete stale git log: %s" % exc) -def logged_git(package, referenceSources, +def logged_scm(scm, package, referenceSources, command, directory, prompt, logOutput=True): - """Run a git command, but produce an output file if it fails. + """Run an SCM command, but produce an output file if it fails. - This is useful in CI, so that we can pick up git failures and show them in + This is useful in CI, so that we can pick up SCM failures and show them in the final produced log. For this reason, the file we write in this function - must not contain any secrets. We only output the git command we ran, its exit + must not contain any secrets. We only output the SCM command we ran, its exit code, and the package name, so this should be safe. """ # This might take a long time, so show the user what's going on. - info("Git %s for repository for %s...", command[0], package) - err, output = git(command, directory=directory, check=False, prompt=prompt) + info("%s %s for repository for %s...", scm.name, command[0], package) + err, output = scm.exec(command, directory=directory, check=False, prompt=prompt) if logOutput: debug(output) if err: try: with codecs.open(os.path.join(referenceSources, FETCH_LOG_NAME), "a", encoding="utf-8", errors="replace") as logf: - logf.write("Git command for package %r failed.\n" - "Command: git %s\nIn directory: %s\nExit code: %d\n" % - (package, " ".join(command), directory, err)) + logf.write("%s command for package %r failed.\n" + "Command: %s %s\nIn directory: %s\nExit code: %d\n" % + (scm.name, package, scm.name.lower(), " ".join(command), directory, err)) except OSError as exc: - error("Could not write error log from git command:", exc_info=exc) - dieOnError(err, "Error during git %s for reference repo for %s." % - (command[0], package)) - info("Done git %s for repository for %s", command[0], package) + error("Could not write error log from SCM command:", exc_info=exc) + dieOnError(err, "Error during %s %s for reference repo for %s." % + (scm.name.lower(), command[0], package)) + info("Done %s %s for repository for %s", scm.name.lower(), command[0], package) return output @@ -97,6 +97,8 @@ def updateReferenceRepo(referenceSources, p, spec, if "source" not in spec: return + scm = spec["scm"] + debug("Updating references.") referenceRepo = os.path.join(os.path.abspath(referenceSources), p.lower()) @@ -114,14 +116,11 @@ def updateReferenceRepo(referenceSources, p, spec, return None # no reference can be found and created (not fatal) if not os.path.exists(referenceRepo): - cmd = ["clone", "--bare", spec["source"], referenceRepo] - if usePartialClone: - cmd.extend(clone_speedup_options()) - logged_git(p, referenceSources, cmd, ".", allowGitPrompt) + cmd = scm.cloneCmd(spec["source"], referenceRepo, usePartialClone) + logged_scm(scm, p, referenceSources, cmd, ".", allowGitPrompt) elif fetch: - logged_git(p, referenceSources, ( - "fetch", "-f", "--tags", spec["source"], "+refs/heads/*:refs/heads/*", - ), referenceRepo, allowGitPrompt) + cmd = scm.fetchCmd(spec["source"], referenceRepo) + logged_scm(scm, p, referenceSources, cmd, referenceRepo, allowGitPrompt) return referenceRepo # reference is read-write diff --git a/tests/test_build.py b/tests/test_build.py index b1ddf85e..4b3bd00d 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -323,13 +323,13 @@ def setup_spec(script): (root, TEST_ROOT_GIT_REFS), (extra, TEST_EXTRA_GIT_REFS)): spec.setdefault("requires", []).append(default["package"]) - spec["git_refs"] = {ref: hash for hash, _, ref in ( + spec["scm_refs"] = {ref: hash for hash, _, ref in ( line.partition("\t") for line in refs.splitlines() )} try: - spec["commit_hash"] = spec["git_refs"]["refs/tags/" + spec["tag"]] + spec["commit_hash"] = spec["scm_refs"]["refs/tags/" + spec["tag"]] except KeyError: - spec["commit_hash"] = spec["git_refs"]["refs/heads/" + spec["tag"]] + spec["commit_hash"] = spec["scm_refs"]["refs/heads/" + spec["tag"]] specs = {pkg["package"]: pkg for pkg in (default, zlib, root, extra)} storeHashes("defaults-release", specs, isDevelPkg=False, considerRelocation=False) diff --git a/tests/test_hashing.py b/tests/test_hashing.py index f9f32b1d..4882910c 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -58,7 +58,7 @@ def test_hashes_match_build_log(self): self.assertEqual(spec["remote_revision_hash"], remote) self.assertEqual(spec["local_revision_hash"], local) # For logs produced by old hash implementations (which didn't - # consider spec["git_refs"]), alt_{remote,local} will only + # consider spec["scm_refs"]), alt_{remote,local} will only # contain the primary hash anyway, so this works nicely. self.assertEqual(spec["remote_hashes"], alt_remote.split(", ")) self.assertEqual(spec["local_hashes"], alt_local.split(", "))