From 0e77d12ebafb47a64ea2996db4eec23f05ee800a Mon Sep 17 00:00:00 2001 From: Grzegorz Banasiak Date: Wed, 2 Oct 2024 10:19:22 +0200 Subject: [PATCH] Pull non-default branch commits selectively (#1879) After switching Elasticsearch nightly benchmarks to non-default branch it was observed `branch@timestamp` notation in `--revision` command line flag does not guarantee constant results due to ongoing merges from default branch (`main`) into non-default branch. Specifically, if a merge from main brought in a commit that happened before `timestamp`, such commit might be selected despite not being part of non-default branch. This change mitigates this problem by filtering commit IDs of default branch (`main`) using `main..branch` notation of `git rev-list` command. --- esrally/mechanic/supplier.py | 2 +- esrally/utils/git.py | 8 ++++++-- tests/mechanic/supplier_test.py | 2 +- tests/utils/git_test.py | 21 ++++++++++++++++++--- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/esrally/mechanic/supplier.py b/esrally/mechanic/supplier.py index 0fa2c486a..b5b4e923a 100644 --- a/esrally/mechanic/supplier.py +++ b/esrally/mechanic/supplier.py @@ -750,7 +750,7 @@ def _update(self, revision): branch, self.name, ) - git.pull_ts(self.src_dir, git_ts_revision, remote="origin", branch=branch) + git.pull_ts(self.src_dir, git_ts_revision, remote="origin", branch=branch, default_branch=DEFAULT_ELASTICSEARCH_BRANCH) elif self.has_remote(): # we can have either a commit hash, branch name, or tag git.fetch(self.src_dir, remote="origin") if git.is_branch(self.src_dir, identifier=revision): diff --git a/esrally/utils/git.py b/esrally/utils/git.py index c55e3aee3..7101189fd 100644 --- a/esrally/utils/git.py +++ b/esrally/utils/git.py @@ -105,10 +105,14 @@ def pull(src_dir, *, remote, branch): @probed -def pull_ts(src_dir, ts, *, remote, branch): +def pull_ts(src_dir, ts, *, remote, branch, default_branch): fetch(src_dir, remote=remote) clean_src = io.escape_path(src_dir) - rev_list_command = f'git -C {clean_src} rev-list -n 1 --before="{ts}" --date=iso8601 {remote}/{branch}' + # non-default ES branches might receive merges from default ES branch which we want to filter out + if branch != default_branch: + rev_list_command = f'git -C {clean_src} rev-list -n 1 --before="{ts}" --date=iso8601 {remote}/{default_branch}..{remote}/{branch}' + else: + rev_list_command = f'git -C {clean_src} rev-list -n 1 --before="{ts}" --date=iso8601 {remote}/{branch}' revision = process.run_subprocess_with_output(rev_list_command)[0].strip() if process.run_subprocess_with_logging(f"git -C {clean_src} checkout {revision}"): raise exceptions.SupplyError("Could not checkout source tree for timestamped revision [%s]" % ts) diff --git a/tests/mechanic/supplier_test.py b/tests/mechanic/supplier_test.py index 9d47a644a..96ca4e071 100644 --- a/tests/mechanic/supplier_test.py +++ b/tests/mechanic/supplier_test.py @@ -147,7 +147,7 @@ def test_checkout_ts(self, mock_is_working_copy, mock_pull_ts, mock_head_revisio s.fetch("@2023-04-20T11:09:12Z") mock_is_working_copy.assert_called_with("/src") - mock_pull_ts.assert_called_with("/src", "2023-04-20T11:09:12Z", remote="origin", branch="main") + mock_pull_ts.assert_called_with("/src", "2023-04-20T11:09:12Z", remote="origin", branch="main", default_branch="main") mock_head_revision.assert_called_with("/src") @mock.patch("esrally.utils.git.fetch", autospec=True) diff --git a/tests/utils/git_test.py b/tests/utils/git_test.py index 57bc152b2..e8be18399 100644 --- a/tests/utils/git_test.py +++ b/tests/utils/git_test.py @@ -42,6 +42,7 @@ def setup(request, tmp_path_factory): cls.local_remote_name = "remote_repo" cls.local_branch = "rally-unit-test-local-only-branch" cls.remote_branch = "rally-unit-test-remote-only-branch" + cls.remote_branch_non_master = "rally-unit-test-remote-only-branch-non-master" cls.rebase_branch = "rally-unit-test-rebase-branch" cls.local_tmp_src_dir = str(tmp_path_factory.mktemp("rally-unit-test-local-dir")) @@ -70,8 +71,16 @@ def setup(request, tmp_path_factory): cls.old_revision = cls.remote_repo.heads["master"].commit.hexsha commit(cls.remote_repo) cls.remote_branch_hash = cls.remote_repo.heads["master"].commit.hexsha - cls.remote_repo.create_head(cls.remote_branch, "HEAD") + + # create branch not aligned with master + non_master_branch = cls.remote_repo.create_head(cls.remote_branch_non_master, cls.old_revision) + cls.remote_repo.head.reference = non_master_branch + cls.remote_repo.head.reset(index=True, working_tree=True) + commit(cls.remote_repo, date="2016-01-01 01:00:00+0000") + cls.remote_branch_non_master_hash = cls.remote_repo.heads[cls.remote_branch_non_master].commit.hexsha + commit(cls.remote_repo) + cls.local_repo.create_remote(cls.local_remote_name, cls.remote_tmp_src_dir) cls.local_repo.remotes[cls.local_remote_name].fetch() @@ -185,8 +194,14 @@ def test_head_revision(self): def test_pull_ts(self): # minimum 'core.abbrev' is to return 7 char prefixes - git.pull_ts(self.local_tmp_src_dir, "2016-01-01T110000Z", remote=self.local_remote_name, branch=self.remote_branch) - assert git.head_revision(self.local_tmp_src_dir).startswith(self.old_revision) + git.pull_ts( + self.local_tmp_src_dir, + "2016-01-01T110000Z", + remote=self.local_remote_name, + branch=self.remote_branch_non_master, + default_branch="master", + ) + assert git.head_revision(self.local_tmp_src_dir).startswith(self.remote_branch_non_master_hash) def test_rebase(self, setup_teardown_rebase): # fetch required first to get remote branch