From e6fe98f2b4be4d49e2a1350695d3ad38d6711dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Roz=C3=A9?= Date: Fri, 16 Feb 2024 09:34:56 +0100 Subject: [PATCH] Switch to getRevision code based on git ls-remote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Repo versions are computed from manifests with updated revisions for each project. The repo tool can only generate a manifest with accurate sha1 hashes once all projects have been downloaded locally (repo manifest -r -o new-manifest.xml). This uses a lot of network bandwith, memory, cpu and disk space and is a huge waste of time. git ls-remote is used instead to fetch revisions much quicker before being injected into the original manifest. A new variable check_jobs is used to spawn concurrent processes to make it close to X times faster Adjust jobs and check_jobs variables based on the git servers capabilities/limitations Signed-off-by: David Rozé --- README.md | 3 +++ repo_resource/check.py | 15 ++++++++++----- repo_resource/common.py | 27 ++++++++++----------------- repo_resource/in_.py | 14 +++++++++----- repo_resource/test_check.py | 29 +++++++++++++++++++---------- repo_resource/test_in.py | 32 ++++++++++++++++++++++++++++++-- 6 files changed, 81 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 23b1d07..17214ef 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,9 @@ Track changes in a [repo](https://gerrit.googlesource.com/git-repo/+/master/#rep * `jobs`: *Optional.* number of jobs to run in parallel (default: 0; based on number of CPU cores) Reduce this if you observe network errors. +* `check_jobs`: for check step only: number of jobs to run in parallel (default: jobs\*2, + 2 if jobs is undefined). + ### Example Resource configuration for a public project using repo (Android) diff --git a/repo_resource/check.py b/repo_resource/check.py index 37699c5..65f2bfd 100644 --- a/repo_resource/check.py +++ b/repo_resource/check.py @@ -33,16 +33,23 @@ def check(instream) -> list: config = common.source_config_from_payload(payload) + standard_versions = [] + for v in payload.get('versions', []): + standard_versions.append(common.Version(v['version']).standard()) + if config.private_key != '_invalid': common.add_private_key_to_agent(config.private_key) + jobs = config.jobs + check_jobs = config.check_jobs or jobs*2 or common.DEFAULT_CHECK_JOBS + try: repo = common.Repo(config.url, config.revision, config.name, config.depth) repo.init() - repo.sync(jobs=config.jobs) + repo.update_manifest(jobs=check_jobs) version = repo.currentVersion() except Exception as e: raise e @@ -51,11 +58,9 @@ def check(instream) -> list: if config.private_key != '_invalid': common.remove_private_key_from_agent() - new_version = {'version': str(version)} - versions = payload.get('versions', []) - if versions.count(new_version) == 0: - versions.append(new_version) + if version.standard() not in standard_versions: + versions.append({'version': str(version)}) return versions diff --git a/repo_resource/common.py b/repo_resource/common.py index 8513339..034a1ae 100644 --- a/repo_resource/common.py +++ b/repo_resource/common.py @@ -275,7 +275,7 @@ def init(self): finally: self.__restore_oldpwd() - def sync(self, version: Version = None, jobs: int = 0): + def sync(self, version: Version, jobs: int = 0): self.__change_to_workdir() try: with redirect_stdout(sys.stderr): @@ -288,15 +288,14 @@ def sync(self, version: Version = None, jobs: int = 0): if jobs > 0: repo_cmd.append('--jobs={}'.format(jobs)) - if version is None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_manifest = os.path.join(tmpdir, 'manifest_tmp') + version.to_file(tmp_manifest) + repo_cmd.append( + '--manifest-name={}'.format(tmp_manifest)) repo._Main(repo_cmd) - else: - with tempfile.TemporaryDirectory() as tmpdir: - tmp_manifest = os.path.join(tmpdir, 'manifest_tmp') - version.to_file(tmp_manifest) - repo_cmd.append( - '--manifest-name={}'.format(tmp_manifest)) - repo._Main(repo_cmd) + if os.listdir(self.__workdir) == []: + raise Exception('Sync failed. Is manifest correct?') except Exception as e: raise (e) finally: @@ -312,17 +311,11 @@ def update_version(self): def save_manifest(self, filename): with redirect_stdout(sys.stderr): full_path = self.__workdir / filename - current_version = self.currentVersion() print('Saving manifest to {}'.format(full_path)) - current_version.to_file(full_path) + self.__version.to_file(full_path) def currentVersion(self) -> Version: - with tempfile.TemporaryDirectory() as tmpdir: - tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot') - self.__manifest_out(tmp_manifest) - version = Version.from_file(tmp_manifest) - - return version + return self.__version def metadata(self): metadata = [] diff --git a/repo_resource/in_.py b/repo_resource/in_.py index 9afbebe..8aafe21 100644 --- a/repo_resource/in_.py +++ b/repo_resource/in_.py @@ -33,6 +33,13 @@ def in_(instream, dest_dir='.'): config = common.source_config_from_payload(payload) requested_version = common.Version(payload['version']['version']) + # Check version is valid xml + common.Version(payload['version']['version']).standard() + + standard_versions = [] + for v in payload.get('versions', []): + standard_versions.append(common.Version(v['version']).standard()) + if config.private_key != '_invalid': common.add_private_key_to_agent(config.private_key) @@ -41,7 +48,7 @@ def in_(instream, dest_dir='.'): config.name, config.depth, workdir=Path(dest_dir)) repo.init() repo.sync(requested_version, config.jobs) - fetched_version = repo.currentVersion() + repo.update_version() except Exception as e: raise e finally: @@ -49,15 +56,12 @@ def in_(instream, dest_dir='.'): if config.private_key != '_invalid': common.remove_private_key_from_agent() - if fetched_version != requested_version: - raise RuntimeError('Could not fetch requested version') - # save a copy of the manifest alongside the sources repo.save_manifest('.repo_manifest.xml') metadata = repo.metadata() - return {"version": {"version": str(fetched_version)}, + return {"version": {"version": str(requested_version)}, "metadata": metadata} diff --git a/repo_resource/test_check.py b/repo_resource/test_check.py index 5025d99..51e50d8 100644 --- a/repo_resource/test_check.py +++ b/repo_resource/test_check.py @@ -11,6 +11,7 @@ from timeit import default_timer as timer import shutil import repo +import xml.etree.ElementTree as ET from . import check from . import common @@ -113,10 +114,8 @@ def test_manifest_name_defaults(self): } instream = StringIO(json.dumps(d)) check.check(instream) - # no assert/assumption to call. repo init and sync should - # just be called. maybe we can check for a file as well - readme = common.CACHEDIR / 'fetch_artifact' / 'README.md' - self.assertTrue(readme.exists()) + manifests = common.CACHEDIR / '.repo' / 'manifests' + self.assertTrue(manifests.exists()) # so here, we init from a public manifest # init is completely working fine @@ -127,7 +126,7 @@ def test_unreachable_projects_in_manifest(self): unreachable_projects_data['source']['name'] = 'unreachable_project.xml' instream = StringIO(json.dumps(unreachable_projects_data)) - with self.assertRaises(SystemExit): + with self.assertRaises(TypeError): check.check(instream) def test_first_revision(self): @@ -141,7 +140,7 @@ def test_same_revision(self): data = self.demo_manifests_source data['versions'] = [{ 'version': - '\n\n \n \n \n \n \n\n' # noqa: E501 + '\n\n \n \n \n \n \n\n' # noqa: E501 }] instream = StringIO(json.dumps(data)) versions = check.check(instream) @@ -158,7 +157,7 @@ def test_known_version(self): # we passed no version as input, so we should just get current version self.assertEqual(len(versions), 1) # and we know that version - expected_version = '\n\n \n \n \n \n \n\n' # noqa: E501 + expected_version = '' # noqa: E501 version = versions[0]['version'] self.assertEqual(version, expected_version) @@ -166,14 +165,24 @@ def test_known_version(self): # but we use a newer version (using a different git branch) def test_new_revision(self): data = self.demo_manifests_source - data['versions'] = [{'version': 'older-shasum'}] + data['versions'] = [{ + 'version': + '' # noqa: E501 + }] instream = StringIO(json.dumps(data)) versions = check.check(instream) self.assertEqual(len(versions), 2) - expected_version = '\n\n \n \n \n \n \n\n' # noqa: E501 + expected_version = '' # noqa: E501 newest_version = versions[-1]['version'] self.assertEqual(newest_version, expected_version) + def test_invalid_revision(self): + data = self.demo_manifests_source + data['versions'] = [{'version': 'invalid-version'}] + instream = StringIO(json.dumps(data)) + with self.assertRaises(ET.ParseError): + check.check(instream) + @unittest.skipUnless( Path('development/ssh/test_key').exists(), "requires ssh test key") def test_ssh_private_key(self): @@ -253,7 +262,7 @@ def test_ssh_private_key_without_project_access(self): instream = StringIO(json.dumps(data)) versions = [] - with self.assertRaises(SystemExit): + with self.assertRaises(TypeError): versions = check.check(instream) self.assertEqual(len(versions), 0) diff --git a/repo_resource/test_in.py b/repo_resource/test_in.py index e4f8667..0df7fba 100644 --- a/repo_resource/test_in.py +++ b/repo_resource/test_in.py @@ -9,6 +9,7 @@ import shutil import unittest from pathlib import Path +import xml.etree.ElementTree as ET from . import check from . import common @@ -41,7 +42,7 @@ def test_fails_on_invalid_version(self): data = self.demo_manifests_source data['version'] = {'version': 'invalid-version'} instream = StringIO(json.dumps(data)) - with self.assertRaises(repo.error.GitError): + with self.assertRaises(ET.ParseError): in_.in_(instream, str(common.CACHEDIR)) def test_dest_dir_is_created(self): @@ -56,6 +57,30 @@ def test_dest_dir_is_created(self): self.assertTrue(common.CACHEDIR.exists()) + def test_sync_ok(self): + data = { + 'source': { + 'url': 'https://android.googlesource.com/tools/manifest', + 'revision': 'fetch_artifact-dev' + }, + } + data['version'] = { + 'version': + '\n\n\n\n\n\n' # noqa: E501 + } + instream = StringIO(json.dumps(data)) + in_.in_(instream, str(common.CACHEDIR)) + # no assert/assumption to call. repo init and sync should + # just be called. maybe we can check for a file as well + readme = common.CACHEDIR / 'fetch_artifact' / 'README.md' + self.assertTrue(readme.exists()) + + def test_no_manifest_version(self): + data = self.demo_manifests_source + instream = StringIO(json.dumps(data)) + with self.assertRaises(KeyError): + in_.in_(instream, str(common.CACHEDIR)) + def test_valid_in(self): data = self.demo_manifests_source data['version'] = { @@ -66,7 +91,10 @@ def test_valid_in(self): instream = StringIO(json.dumps(data)) fetched_version = in_.in_(instream, str(common.CACHEDIR)) - self.assertEqual(fetched_version['version'], data['version']) + self.assertEqual( + common.Version(fetched_version['version']['version']).standard(), + common.Version(data['version']['version']).standard() + ) def test_get_metadata(self): data = self.demo_manifests_source