diff --git a/osbenchmark/builder/downloaders/repositories/source_repository_provider.py b/osbenchmark/builder/downloaders/repositories/source_repository_provider.py new file mode 100644 index 000000000..649f1e722 --- /dev/null +++ b/osbenchmark/builder/downloaders/repositories/source_repository_provider.py @@ -0,0 +1,109 @@ +import logging +import os +from collections import OrderedDict + +from osbenchmark.builder.utils.path_manager import PathManager +from osbenchmark.exceptions import SystemSetupError +from osbenchmark.builder.utils.git_manager import GitManager + + +class SourceRepositoryProvider: + def __init__(self, executor, repository_name): + self.logger = logging.getLogger(__name__) + self.executor = executor + self.path_manager = PathManager(executor) + self.git_manager = GitManager(executor) + self.repository_name = repository_name + + self.update_scenarios = self._generate_update_repository_scenarios() + + def _generate_update_repository_scenarios(self): + return OrderedDict([ + ( + lambda revision, is_remote_defined: revision == "latest" and is_remote_defined, + self._update_repository_to_latest + ), + ( + lambda revision, is_remote_defined: revision == "current", + self._update_repository_to_current + ), + ( + lambda revision, is_remote_defined: revision.startswith("@") and is_remote_defined, + self._update_repository_to_timestamp + ), + ( + lambda revision, is_remote_defined: is_remote_defined, + self._update_repository_to_commit_hash + ), + ( + lambda revision, is_remote_defined: True, + self._update_repository_to_local_revision + ), + ]) + + def fetch_repository(self, host, remote_url, revision, target_dir): + if not self.path_manager.is_path_present(host, os.path.join(target_dir, ".git")): + self._initialize_repository(host, remote_url, revision, target_dir) + + self._update_repository(host, remote_url, revision, target_dir) + return self._get_revision(host, revision, target_dir) + + def _initialize_repository(self, host, remote_url, revision, target_dir): + if self._is_remote_defined(remote_url): + self.logger.info("Downloading sources for %s from %s to %s.", self.repository_name, remote_url, target_dir) + self.path_manager.create_path(host, target_dir, create_locally=False) + self.git_manager.clone(host, remote_url, target_dir) + elif self.path_manager.is_path_present(host, target_dir) and self._is_repository_initialization_skippable(revision): + self.logger.info("Skipping repository initialization for %s.", self.repository_name) + else: + raise SystemSetupError(f"A remote repository URL is mandatory for {self.repository_name}") + + def _is_remote_defined(self, remote_url): + return remote_url is not None + + def _is_repository_initialization_skippable(self, revision): + return revision == "current" + + def _update_repository(self, host, remote_url, revision, target_dir): + is_remote_defined = self._is_remote_defined(remote_url) + + for condition, update_function in self.update_scenarios.items(): + if condition(revision, is_remote_defined): + return update_function(host, revision, target_dir) + + def _update_repository_to_latest(self, host, revision, target_dir): + self.logger.info("Getting latest sources for %s from origin/main.", self.repository_name) + self.git_manager.fetch(host, target_dir) + self.git_manager.checkout(host, target_dir) + self.git_manager.rebase(host, target_dir) + + def _update_repository_to_current(self, host, revision, target_dir): + self.logger.info("Skip fetching sources for %s.", self.repository_name) + + def _update_repository_to_timestamp(self, host, revision, target_dir): + # convert timestamp annotated for Benchmark to something git understands -> we strip leading and trailing " and the @. + git_timestamp_revision = revision[1:] + self.logger.info("Fetching from remote and checking out revision with timestamp [%s] for " + "%s.", git_timestamp_revision, self.repository_name) + self.git_manager.fetch(host, target_dir) + revision_from_timestamp = self.git_manager.get_revision_from_timestamp(host, target_dir, git_timestamp_revision) + self.git_manager.checkout(host, target_dir, revision_from_timestamp) + + def _update_repository_to_commit_hash(self, host, revision, target_dir): + self.logger.info("Fetching from remote and checking out revision [%s] for %s.", revision, self.repository_name) + self.git_manager.fetch(host, target_dir) + self.git_manager.checkout(host, target_dir, revision) + + def _update_repository_to_local_revision(self, host, revision, target_dir): + self.logger.info("Checking out local revision [%s] for %s.", revision, self.repository_name) + self.git_manager.checkout(host, target_dir, revision) + + def _get_revision(self, host, revision, target_dir): + if self.path_manager.is_path_present(host, os.path.join(target_dir, ".git")): + git_revision = self.git_manager.get_revision_from_local_repository(host, target_dir) + self.logger.info("User-specified revision [%s] for [%s] results in git revision [%s]", + revision, self.repository_name, git_revision) + + return git_revision + + self.logger.info("Skipping git revision resolution for %s (%s is not a git repository).", self.repository_name, target_dir) diff --git a/osbenchmark/builder/supplier.py b/osbenchmark/builder/supplier.py index 83bdb8dc3..cd1920c4a 100644 --- a/osbenchmark/builder/supplier.py +++ b/osbenchmark/builder/supplier.py @@ -657,6 +657,7 @@ def _update(self, revision): else: self.logger.info("Checking out local revision [%s] for %s.", revision, self.name) git.checkout(self.src_dir, revision) + if git.is_working_copy(self.src_dir): git_revision = git.head_revision(self.src_dir) self.logger.info("User-specified revision [%s] for [%s] results in git revision [%s]", revision, self.name, git_revision) diff --git a/osbenchmark/builder/utils/git_manager.py b/osbenchmark/builder/utils/git_manager.py new file mode 100644 index 000000000..f7a938fa7 --- /dev/null +++ b/osbenchmark/builder/utils/git_manager.py @@ -0,0 +1,23 @@ +class GitManager: + def __init__(self, executor): + self.executor = executor + + def clone(self, host, remote_url, target_dir): + self.executor.execute(host, f"git clone {remote_url} {target_dir}") + + def fetch(self, host, target_dir, remote="origin"): + self.executor.execute(host, f"git -C {target_dir} fetch --prune --tags {remote}") + + def checkout(self, host, target_dir, branch="main"): + self.executor.execute(host, f"git -C {target_dir} checkout {branch}") + + def rebase(self, host, target_dir, remote="origin", branch="main"): + self.executor.execute(host, f"git -C {target_dir} rebase {remote}/{branch}") + + def get_revision_from_timestamp(self, host, target_dir, timestamp): + get_revision_from_timestamp_command = f"git -C {target_dir} rev-list -n 1 --before=\"{timestamp}\" --date=iso8601 origin/main" + + return self.executor.execute(host, get_revision_from_timestamp_command, output=True)[0].strip() + + def get_revision_from_local_repository(self, host, target_dir): + return self.executor.execute(host, f"git -C {target_dir} rev-parse --short HEAD", output=True)[0].strip() diff --git a/osbenchmark/builder/utils/path_manager.py b/osbenchmark/builder/utils/path_manager.py index 569746883..403ef039b 100644 --- a/osbenchmark/builder/utils/path_manager.py +++ b/osbenchmark/builder/utils/path_manager.py @@ -1,3 +1,4 @@ +from osbenchmark.exceptions import ExecutorError from osbenchmark.utils import io @@ -10,6 +11,13 @@ def create_path(self, host, path, create_locally=True): io.ensure_dir(path) self.executor.execute(host, "mkdir -m 0777 -p " + path) + def is_path_present(self, host, path): + try: + self.executor.execute(host, f"test -e {path}") + return True + except ExecutorError: + return False + def delete_path(self, host, path): path_block_list = ["", "*", "/", None] if path in path_block_list: diff --git a/tests/builder/downloaders/repositories/source_repository_provider_test.py b/tests/builder/downloaders/repositories/source_repository_provider_test.py new file mode 100644 index 000000000..d6f3eacb3 --- /dev/null +++ b/tests/builder/downloaders/repositories/source_repository_provider_test.py @@ -0,0 +1,106 @@ +from unittest import TestCase, mock +from unittest.mock import Mock + +from osbenchmark.builder.downloaders.repositories.source_repository_provider import SourceRepositoryProvider +from osbenchmark.exceptions import SystemSetupError + + +class SourceRepositoryProviderTest(TestCase): + def setUp(self): + self.host = None + self.remote_url = "https://git.myrepo.com/repo" + self.revision = "current" + self.target_dir = "/fake/path" + + self.executor = Mock() + + self.source_repo_provider = SourceRepositoryProvider(self.executor, "my repo") + self.source_repo_provider.path_manager = Mock() + self.source_repo_provider.git_manager = Mock() + + self.source_repo_provider.path_manager.is_path_present.return_value = True + + def test_initialize_repo_with_remote(self): + self.source_repo_provider.path_manager.is_path_present.return_value = False + + self.source_repo_provider.fetch_repository(self.host, self.remote_url, self.revision, self.target_dir) + + self.source_repo_provider.path_manager.create_path.assert_has_calls([ + mock.call(self.host, self.target_dir, create_locally=False) + ]) + self.source_repo_provider.git_manager.clone.assert_has_calls([ + mock.call(self.host, self.remote_url, self.target_dir) + ]) + + def test_initialize_repo_skippable(self): + # Check repo/.git, check repo, check repo/.git + self.source_repo_provider.path_manager.is_path_present.side_effect = [False, True, False] + + self.source_repo_provider.fetch_repository(self.host, None, self.revision, self.target_dir) + + self.source_repo_provider.path_manager.create_path.assert_has_calls([]) + self.source_repo_provider.git_manager.clone.assert_has_calls([]) + + def test_initialize_repo_no_remote_not_skippable(self): + self.source_repo_provider.path_manager.is_path_present.return_value = False + + with self.assertRaises(SystemSetupError): + self.source_repo_provider.fetch_repository(self.host, None, "latest", self.target_dir) + + def test_update_repo_to_latest(self): + self.source_repo_provider.fetch_repository(self.host, self.remote_url, "latest", self.target_dir) + + self.source_repo_provider.git_manager.assert_has_calls([ + mock.call.fetch(self.host, self.target_dir), + mock.call.checkout(self.host, self.target_dir), + mock.call.rebase(self.host, self.target_dir), + mock.call.get_revision_from_local_repository(self.host, self.target_dir) + ]) + + def test_update_repo_to_current(self): + self.source_repo_provider.fetch_repository(self.host, self.remote_url, self.revision, self.target_dir) + + self.source_repo_provider.git_manager.assert_has_calls([ + mock.call.get_revision_from_local_repository(self.host, self.target_dir) + ]) + + def test_update_repo_to_timestamp(self): + self.source_repo_provider.git_manager.get_revision_from_timestamp.return_value = "fake rev" + + self.source_repo_provider.fetch_repository(self.host, self.remote_url, "@fake-timestamp", self.target_dir) + + self.source_repo_provider.git_manager.assert_has_calls([ + mock.call.fetch(self.host, self.target_dir), + mock.call.get_revision_from_timestamp(self.host, self.target_dir, "fake-timestamp"), + mock.call.checkout(self.host, self.target_dir, "fake rev"), + mock.call.get_revision_from_local_repository(self.host, self.target_dir) + ]) + + def test_update_repo_to_commit_hash(self): + self.source_repo_provider.fetch_repository(self.host, self.remote_url, "uuid", self.target_dir) + + self.source_repo_provider.git_manager.assert_has_calls([ + mock.call.fetch(self.host, self.target_dir), + mock.call.checkout(self.host, self.target_dir, "uuid"), + mock.call.get_revision_from_local_repository(self.host, self.target_dir) + ]) + + def test_update_repo_to_local_revision(self): + self.source_repo_provider.fetch_repository(self.host, None, "fake rev", self.target_dir) + + self.source_repo_provider.git_manager.assert_has_calls([ + mock.call.checkout(self.host, self.target_dir, "fake rev"), + mock.call.get_revision_from_local_repository(self.host, self.target_dir) + ]) + + def test_get_revision_repo_exists(self): + self.source_repo_provider.git_manager.get_revision_from_local_repository.return_value = "my rev" + + revision = self.source_repo_provider.fetch_repository(self.host, self.remote_url, self.revision, self.target_dir) + self.assertEqual(revision, "my rev") + + def test_get_revision_repo_does_not_exist(self): + self.source_repo_provider.path_manager.is_path_present.return_value = False + + revision = self.source_repo_provider.fetch_repository(self.host, self.remote_url, self.revision, self.target_dir) + self.assertEqual(revision, None) diff --git a/tests/builder/utils/path_manager_test.py b/tests/builder/utils/path_manager_test.py index 86a81475f..cf420428a 100644 --- a/tests/builder/utils/path_manager_test.py +++ b/tests/builder/utils/path_manager_test.py @@ -2,6 +2,7 @@ from unittest.mock import Mock from osbenchmark.builder.utils.path_manager import PathManager +from osbenchmark.exceptions import ExecutorError class PathManagerTest(TestCase): @@ -43,3 +44,15 @@ def test_delete_invalid_path(self): self.path_manager.delete_path(self.host, "/") self.executor.execute.assert_has_calls([]) + + def test_path_is_present(self): + self.executor.execute.return_value = None + + is_path_present = self.path_manager.is_path_present(self.host, self.path) + self.assertEqual(is_path_present, True) + + def test_path_is_not_present(self): + self.executor.execute.side_effect = ExecutorError("fake") + + is_path_present = self.path_manager.is_path_present(self.host, self.path) + self.assertEqual(is_path_present, False)