Skip to content

Commit

Permalink
Pull non-default branch commits selectively (elastic#1879)
Browse files Browse the repository at this point in the history
After switching Elasticsearch nightly benchmarks to non-default branch
it was observed `branch@timestamp` notation in `--revision` command line
flag does not guarantee constant results due to ongoing merges from
default branch (`main`) into non-default branch. Specifically, if a
merge from main brought in a commit that happened before `timestamp`,
such commit might be selected despite not being part of non-default
branch.

This change mitigates this problem by filtering commit IDs of default
branch (`main`) using `main..branch` notation of `git rev-list` command.
  • Loading branch information
gbanasiak authored Oct 2, 2024
1 parent 75c8070 commit 0e77d12
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
2 changes: 1 addition & 1 deletion esrally/mechanic/supplier.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ def _update(self, revision):
branch,
self.name,
)
git.pull_ts(self.src_dir, git_ts_revision, remote="origin", branch=branch)
git.pull_ts(self.src_dir, git_ts_revision, remote="origin", branch=branch, default_branch=DEFAULT_ELASTICSEARCH_BRANCH)
elif self.has_remote(): # we can have either a commit hash, branch name, or tag
git.fetch(self.src_dir, remote="origin")
if git.is_branch(self.src_dir, identifier=revision):
Expand Down
8 changes: 6 additions & 2 deletions esrally/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,14 @@ def pull(src_dir, *, remote, branch):


@probed
def pull_ts(src_dir, ts, *, remote, branch):
def pull_ts(src_dir, ts, *, remote, branch, default_branch):
fetch(src_dir, remote=remote)
clean_src = io.escape_path(src_dir)
rev_list_command = f'git -C {clean_src} rev-list -n 1 --before="{ts}" --date=iso8601 {remote}/{branch}'
# non-default ES branches might receive merges from default ES branch which we want to filter out
if branch != default_branch:
rev_list_command = f'git -C {clean_src} rev-list -n 1 --before="{ts}" --date=iso8601 {remote}/{default_branch}..{remote}/{branch}'
else:
rev_list_command = f'git -C {clean_src} rev-list -n 1 --before="{ts}" --date=iso8601 {remote}/{branch}'
revision = process.run_subprocess_with_output(rev_list_command)[0].strip()
if process.run_subprocess_with_logging(f"git -C {clean_src} checkout {revision}"):
raise exceptions.SupplyError("Could not checkout source tree for timestamped revision [%s]" % ts)
Expand Down
2 changes: 1 addition & 1 deletion tests/mechanic/supplier_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def test_checkout_ts(self, mock_is_working_copy, mock_pull_ts, mock_head_revisio
s.fetch("@2023-04-20T11:09:12Z")

mock_is_working_copy.assert_called_with("/src")
mock_pull_ts.assert_called_with("/src", "2023-04-20T11:09:12Z", remote="origin", branch="main")
mock_pull_ts.assert_called_with("/src", "2023-04-20T11:09:12Z", remote="origin", branch="main", default_branch="main")
mock_head_revision.assert_called_with("/src")

@mock.patch("esrally.utils.git.fetch", autospec=True)
Expand Down
21 changes: 18 additions & 3 deletions tests/utils/git_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def setup(request, tmp_path_factory):
cls.local_remote_name = "remote_repo"
cls.local_branch = "rally-unit-test-local-only-branch"
cls.remote_branch = "rally-unit-test-remote-only-branch"
cls.remote_branch_non_master = "rally-unit-test-remote-only-branch-non-master"
cls.rebase_branch = "rally-unit-test-rebase-branch"

cls.local_tmp_src_dir = str(tmp_path_factory.mktemp("rally-unit-test-local-dir"))
Expand Down Expand Up @@ -70,8 +71,16 @@ def setup(request, tmp_path_factory):
cls.old_revision = cls.remote_repo.heads["master"].commit.hexsha
commit(cls.remote_repo)
cls.remote_branch_hash = cls.remote_repo.heads["master"].commit.hexsha

cls.remote_repo.create_head(cls.remote_branch, "HEAD")

# create branch not aligned with master
non_master_branch = cls.remote_repo.create_head(cls.remote_branch_non_master, cls.old_revision)
cls.remote_repo.head.reference = non_master_branch
cls.remote_repo.head.reset(index=True, working_tree=True)
commit(cls.remote_repo, date="2016-01-01 01:00:00+0000")
cls.remote_branch_non_master_hash = cls.remote_repo.heads[cls.remote_branch_non_master].commit.hexsha
commit(cls.remote_repo)

cls.local_repo.create_remote(cls.local_remote_name, cls.remote_tmp_src_dir)
cls.local_repo.remotes[cls.local_remote_name].fetch()

Expand Down Expand Up @@ -185,8 +194,14 @@ def test_head_revision(self):

def test_pull_ts(self):
# minimum 'core.abbrev' is to return 7 char prefixes
git.pull_ts(self.local_tmp_src_dir, "2016-01-01T110000Z", remote=self.local_remote_name, branch=self.remote_branch)
assert git.head_revision(self.local_tmp_src_dir).startswith(self.old_revision)
git.pull_ts(
self.local_tmp_src_dir,
"2016-01-01T110000Z",
remote=self.local_remote_name,
branch=self.remote_branch_non_master,
default_branch="master",
)
assert git.head_revision(self.local_tmp_src_dir).startswith(self.remote_branch_non_master_hash)

def test_rebase(self, setup_teardown_rebase):
# fetch required first to get remote branch
Expand Down

0 comments on commit 0e77d12

Please sign in to comment.