diff --git a/README.md b/README.md index 23b1d07..17214ef 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,9 @@ Track changes in a [repo](https://gerrit.googlesource.com/git-repo/+/master/#rep * `jobs`: *Optional.* number of jobs to run in parallel (default: 0; based on number of CPU cores) Reduce this if you observe network errors. +* `check_jobs`: for check step only: number of jobs to run in parallel (default: jobs\*2, + 2 if jobs is undefined). + ### Example Resource configuration for a public project using repo (Android) diff --git a/repo_resource/check.py b/repo_resource/check.py index 68504b6..65f2bfd 100644 --- a/repo_resource/check.py +++ b/repo_resource/check.py @@ -33,13 +33,23 @@ def check(instream) -> list: config = common.source_config_from_payload(payload) + standard_versions = [] + for v in payload.get('versions', []): + standard_versions.append(common.Version(v['version']).standard()) + if config.private_key != '_invalid': common.add_private_key_to_agent(config.private_key) + jobs = config.jobs + check_jobs = config.check_jobs or jobs*2 or common.DEFAULT_CHECK_JOBS + try: - repo = common.Repo() - repo.init(config.url, config.revision, config.name, config.depth) - repo.sync(jobs=config.jobs) + repo = common.Repo(config.url, + config.revision, + config.name, + config.depth) + repo.init() + repo.update_manifest(jobs=check_jobs) version = repo.currentVersion() except Exception as e: raise e @@ -48,11 +58,9 @@ def check(instream) -> list: if config.private_key != '_invalid': common.remove_private_key_from_agent() - new_version = {'version': str(version)} - versions = payload.get('versions', []) - if versions.count(new_version) == 0: - versions.append(new_version) + if version.standard() not in standard_versions: + versions.append({'version': str(version)}) return versions diff --git a/repo_resource/common.py b/repo_resource/common.py index 074755f..0b20c87 100644 --- a/repo_resource/common.py +++ b/repo_resource/common.py @@ -12,17 +12,41 @@ import sys import tempfile import warnings +import git +import re +import xml.etree.ElementTree as ET from contextlib import redirect_stdout from pathlib import Path from typing import NamedTuple from urllib.parse import urlparse +from multiprocessing import Pool import ssh_agent_setup from repo import manifest_xml from repo import main as repo + +DEFAULT_CHECK_JOBS = 2 CACHEDIR = Path('/tmp/repo-resource-cache') +SHA1_PATTERN = re.compile(r'^[0-9a-f]{40}$') +EXCLUDE_ATTRS = {'dest-branch', 'upstream'} +# Elements available at +# https://gerrit.googlesource.com/git-repo/+/master/docs/manifest-format.md +TAGS = [ + 'remote', + 'default', + 'manifest-server', + 'project', + 'extend-project', + 'annotation', + 'copyfile', + 'linkfile', + 'remove-project', + 'include', + 'superproject', + 'contactinfo' +] def add_private_key_to_agent(private_key: str): @@ -56,6 +80,43 @@ def remove_private_key_from_agent(): atexit.unregister(ssh_agent_setup._kill_agent) +def is_sha1(s): + return re.match(SHA1_PATTERN, s) + + +def multi_run_wrapper(args): + return getRevision(*args) + + +def getRevision(remote, remoteUrl, project, branch): + """ + Get latest commit sha1 for revision + with git ls-remote command for each project + without downloading the whole repo + """ + # v1.0^{} is the commit referring to tag v1.0 + # git ls-remote returns the tag sha1 if left as is + if branch.startswith('refs/tags'): + branch += '^{}' + try: + with redirect_stdout(sys.stderr): + # return tuple (remote/project, revision) + print('Fetching revision for {}/{}...'.format(remote, project)) + if is_sha1(branch): + return (remote + '/' + project, branch) + g = git.cmd.Git() + url, revision = ( + remote + '/' + project, + g.ls_remote(remoteUrl+'/'+project, branch).split()[0] + ) + print('{}: {}'.format(url, revision)) + return (url, revision) + except Exception as e: + with redirect_stdout(sys.stderr): + print('Cannot fetch project {}/{}'.format(remoteUrl, project)) + print(e) + + class SourceConfiguration(NamedTuple): """ Supported source configuration items when configuring @@ -67,6 +128,7 @@ class SourceConfiguration(NamedTuple): private_key: str = '_invalid' depth: int = -1 jobs: int = 0 + check_jobs: int = DEFAULT_CHECK_JOBS def source_config_from_payload(payload): @@ -76,8 +138,10 @@ def source_config_from_payload(payload): p = SourceConfiguration(**payload['source']) source_url = urlparse(p.url) - if source_url.netloc == 'gitlab.com' and \ - (source_url.scheme == 'http' or source_url.scheme == 'https'): + if ( + source_url.netloc == 'gitlab.com' and + re.fullmatch('https?', source_url.scheme) + ): if not source_url.path.endswith('.git'): raise RuntimeError('gitlab http(s) urls must end with .git') @@ -105,6 +169,30 @@ def to_file(self, filename): def metadata(self) -> str: return '' + def standard(self) -> str: + try: + root = ET.fromstring(self.__version) + for element in root: + if element.tag not in TAGS: + root.remove(element) + # Sort entries in manifest by element position in TAGS + # Default 999 if element not found in TAGS table (comes last) + # and name alphabetically + sorted_xml = sorted(root, key=lambda x: ( + TAGS.index(x.tag) if x.tag in TAGS else 999, + x.get('name') or "")) + manifest = ET.Element('manifest') + manifest.extend(sorted_xml) + return ET.canonicalize( + ET.tostring(manifest), + strip_text=True, + exclude_attrs=EXCLUDE_ATTRS + ) + except ET.ParseError as e: + with redirect_stdout(sys.stderr): + print('Version is not valid xml') + raise e + def __repr__(self) -> str: return self.__version @@ -121,9 +209,16 @@ class Repo: such as init/sync and manifest """ - def __init__(self, workdir=CACHEDIR): + def __init__(self, url, revision='HEAD', name='default.xml', + depth=-1, workdir=CACHEDIR): self.__workdir = workdir self.__oldpwd = None + self.__url = url + self.__revision = revision + self.__name = name + self.__depth = depth + self.__version: Version = None + self.__remote = {} workdir.mkdir(parents=True, exist_ok=True) # gitrepo from https://github.com/grouperenault/gitrepo @@ -145,25 +240,33 @@ def __change_to_workdir(self): def __restore_oldpwd(self): os.chdir(self.__oldpwd) - def init(self, url, revision='HEAD', name='default.xml', depth=-1): + def __add_remote(self, remote, url): + self.__remote[remote] = url + + def __remote_url(self, remote): + return self.__remote[remote] + + def init(self): self.__change_to_workdir() try: # Google's repo prints a lot of information to stdout. # Concourse expects every logs to be emitted to stderr: - # https://concourse-ci.org/implementing-resource-types.html#implementing-resource-types + # https://concourse-ci.org/implementing-resource-types.html#implementing-resource-types # noqa: E501 with redirect_stdout(sys.stderr): repo_cmd = [ - '--no-pager', 'init', '--quiet', '--manifest-url', url, - '--manifest-name', name, - '--no-tags', + '--no-pager', 'init', '--quiet', '--manifest-url', + self.__url, '--manifest-name', + self.__name, '--no-tags', ] - if depth > 0: - repo_cmd.append('--depth={}'.format(depth)) + if self.__depth > 0: + repo_cmd.append('--depth={}'.format(self.__depth)) - if revision is not None: - repo_cmd.append('--manifest-branch={}'.format(revision)) + if self.__revision is not None: + repo_cmd.append( + '--manifest-branch={}'.format(self.__revision) + ) - print('Downloading manifest from {}'.format(url)) + print('Downloading manifest from {}'.format(self.__url)) repo._Main(repo_cmd) print('repo has been initialized in {}'.format(self.__workdir)) @@ -172,7 +275,7 @@ def init(self, url, revision='HEAD', name='default.xml', depth=-1): finally: self.__restore_oldpwd() - def sync(self, version: Version = None, jobs: int = 0): + def sync(self, version: Version, jobs: int = 0): self.__change_to_workdir() try: with redirect_stdout(sys.stderr): @@ -185,34 +288,34 @@ def sync(self, version: Version = None, jobs: int = 0): if jobs > 0: repo_cmd.append('--jobs={}'.format(jobs)) - if version is None: + with tempfile.TemporaryDirectory() as tmpdir: + tmp_manifest = os.path.join(tmpdir, 'manifest_tmp') + version.to_file(tmp_manifest) + repo_cmd.append( + '--manifest-name={}'.format(tmp_manifest)) repo._Main(repo_cmd) - else: - with tempfile.TemporaryDirectory() as tmpdir: - tmp_manifest = os.path.join(tmpdir, 'manifest_tmp') - version.to_file(tmp_manifest) - repo_cmd.append( - '--manifest-name={}'.format(tmp_manifest)) - repo._Main(repo_cmd) + if os.listdir(self.__workdir) == []: + raise Exception('Sync failed. Is manifest correct?') except Exception as e: raise (e) finally: self.__restore_oldpwd() + # Update self.__version after repo sync + def update_version(self): + with tempfile.TemporaryDirectory() as tmpdir: + tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot') + self.__manifest_out(tmp_manifest) + self.__version = Version.from_file(tmp_manifest) + def save_manifest(self, filename): with redirect_stdout(sys.stderr): full_path = self.__workdir / filename - current_version = self.currentVersion() print('Saving manifest to {}'.format(full_path)) - current_version.to_file(full_path) + self.__version.to_file(full_path) def currentVersion(self) -> Version: - with tempfile.TemporaryDirectory() as tmpdir: - tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot') - self.__manifest_out(tmp_manifest) - version = Version.from_file(tmp_manifest) - - return version + return self.__version def metadata(self): metadata = [] @@ -242,3 +345,66 @@ def __manifest_out(self, filename): raise (e) finally: self.__restore_oldpwd() + + def update_manifest(self, jobs): + projects = [] + + jobs = jobs or DEFAULT_CHECK_JOBS + self.__change_to_workdir() + try: + with redirect_stdout(sys.stderr): + print('Updating project revisions in manifest') + xml = ET.parse('.repo/manifests/'+self.__name) + manifest = xml.getroot() + + # Get default values from manifest + defaults = manifest.find('default') + if defaults is not None: + defaultRemote = defaults.get('remote') + defaultBranch = defaults.get('revision') + + for r in manifest.findall('remote'): + url = r.get('fetch').rstrip('/') + if not re.match("[a-zA-Z]+://", url): + url = re.sub('/[a-z-.]*$', '/', self.__url) + url + self.__add_remote(r.get('name'), url) + + for p in manifest.findall('project'): + project = p.get('name') + projectBranch = p.get('revision') or defaultBranch + projectRemote = p.get('remote') or defaultRemote + projectRemoteUrl = self.__remote_url(projectRemote) + projects.append((projectRemote, projectRemoteUrl, + project, projectBranch)) + + with Pool(jobs) as pool: + revisionList = pool.map(multi_run_wrapper, projects) + # Convert (remote/project, revision) tuple list + # to hash table dict[remote/project]=revision + revisionTable = dict((proj, rev) for proj, rev in revisionList) + + # Update revisions + for p in manifest.findall('project'): + project = p.get('name') + projectRemote = p.get('remote') or defaultRemote + p.set('revision', revisionTable[projectRemote+'/'+project]) + + self.__version = Version( + ET.canonicalize( + ET.tostring(manifest, encoding='unicode'), + strip_text=True + ) + ) + + except FileNotFoundError as e: + with redirect_stdout(sys.stderr): + print('cannot open', '.repo/manifests/'+self.__name) + raise e + except TypeError as e: + with redirect_stdout(sys.stderr): + print('Error fetching some project repo') + raise e + except Exception as e: + raise e + finally: + self.__restore_oldpwd() diff --git a/repo_resource/in_.py b/repo_resource/in_.py index 11dc7b5..8aafe21 100644 --- a/repo_resource/in_.py +++ b/repo_resource/in_.py @@ -33,15 +33,22 @@ def in_(instream, dest_dir='.'): config = common.source_config_from_payload(payload) requested_version = common.Version(payload['version']['version']) + # Check version is valid xml + common.Version(payload['version']['version']).standard() + + standard_versions = [] + for v in payload.get('versions', []): + standard_versions.append(common.Version(v['version']).standard()) + if config.private_key != '_invalid': common.add_private_key_to_agent(config.private_key) try: - repo = common.Repo(workdir=Path(dest_dir)) - - repo.init(config.url, config.revision, config.name, config.depth) + repo = common.Repo(config.url, config.revision, + config.name, config.depth, workdir=Path(dest_dir)) + repo.init() repo.sync(requested_version, config.jobs) - fetched_version = repo.currentVersion() + repo.update_version() except Exception as e: raise e finally: @@ -49,15 +56,12 @@ def in_(instream, dest_dir='.'): if config.private_key != '_invalid': common.remove_private_key_from_agent() - if fetched_version != requested_version: - raise RuntimeError('Could not fetch requested version') - # save a copy of the manifest alongside the sources repo.save_manifest('.repo_manifest.xml') metadata = repo.metadata() - return {"version": {"version": str(fetched_version)}, + return {"version": {"version": str(requested_version)}, "metadata": metadata} diff --git a/repo_resource/requirements.txt b/repo_resource/requirements.txt index 5f65a01..54349a8 100644 --- a/repo_resource/requirements.txt +++ b/repo_resource/requirements.txt @@ -1,2 +1,3 @@ -gitrepo==2.31.1 +gitrepo==2.32.2 ssh-agent-setup==2.0.1 +GitPython==3.1.31 diff --git a/repo_resource/test_check.py b/repo_resource/test_check.py index 039a966..51e50d8 100644 --- a/repo_resource/test_check.py +++ b/repo_resource/test_check.py @@ -11,6 +11,7 @@ from timeit import default_timer as timer import shutil import repo +import xml.etree.ElementTree as ET from . import check from . import common @@ -31,6 +32,12 @@ def setUp(self): 'name': 'aosp_device_fixed.xml' } } + self.demo_manifests_source_norev = { + 'source': { + 'url': 'https://github.com/makohoek/demo-manifests.git', + 'name': 'aosp_device_fixed.xml' + } + } self.demo_ssh_manifests_source = { 'source': { 'url': 'https://github.com/makohoek/demo-manifests.git', @@ -94,8 +101,7 @@ def test_unknown_manifest_name(self): check.check(instream) def test_branch_defaults_to_HEAD(self): - no_revision_data = self.demo_manifests_source - no_revision_data['source']['revision'] = None + no_revision_data = self.demo_manifests_source_norev instream = StringIO(json.dumps(no_revision_data)) check.check(instream) @@ -108,10 +114,8 @@ def test_manifest_name_defaults(self): } instream = StringIO(json.dumps(d)) check.check(instream) - # no assert/assumption to call. repo init and sync should - # just be called. maybe we can check for a file as well - readme = common.CACHEDIR / 'fetch_artifact' / 'README.md' - self.assertTrue(readme.exists()) + manifests = common.CACHEDIR / '.repo' / 'manifests' + self.assertTrue(manifests.exists()) # so here, we init from a public manifest # init is completely working fine @@ -122,7 +126,7 @@ def test_unreachable_projects_in_manifest(self): unreachable_projects_data['source']['name'] = 'unreachable_project.xml' instream = StringIO(json.dumps(unreachable_projects_data)) - with self.assertRaises(SystemExit): + with self.assertRaises(TypeError): check.check(instream) def test_first_revision(self): @@ -136,7 +140,7 @@ def test_same_revision(self): data = self.demo_manifests_source data['versions'] = [{ 'version': - '\n\n \n \n \n \n \n\n' # noqa: E501 + '\n\n \n \n \n \n \n\n' # noqa: E501 }] instream = StringIO(json.dumps(data)) versions = check.check(instream) @@ -153,7 +157,7 @@ def test_known_version(self): # we passed no version as input, so we should just get current version self.assertEqual(len(versions), 1) # and we know that version - expected_version = '\n\n \n \n \n \n \n\n' # noqa: E501 + expected_version = '' # noqa: E501 version = versions[0]['version'] self.assertEqual(version, expected_version) @@ -161,14 +165,24 @@ def test_known_version(self): # but we use a newer version (using a different git branch) def test_new_revision(self): data = self.demo_manifests_source - data['versions'] = [{'version': 'older-shasum'}] + data['versions'] = [{ + 'version': + '' # noqa: E501 + }] instream = StringIO(json.dumps(data)) versions = check.check(instream) self.assertEqual(len(versions), 2) - expected_version = '\n\n \n \n \n \n \n\n' # noqa: E501 + expected_version = '' # noqa: E501 newest_version = versions[-1]['version'] self.assertEqual(newest_version, expected_version) + def test_invalid_revision(self): + data = self.demo_manifests_source + data['versions'] = [{'version': 'invalid-version'}] + instream = StringIO(json.dumps(data)) + with self.assertRaises(ET.ParseError): + check.check(instream) + @unittest.skipUnless( Path('development/ssh/test_key').exists(), "requires ssh test key") def test_ssh_private_key(self): @@ -248,7 +262,7 @@ def test_ssh_private_key_without_project_access(self): instream = StringIO(json.dumps(data)) versions = [] - with self.assertRaises(SystemExit): + with self.assertRaises(TypeError): versions = check.check(instream) self.assertEqual(len(versions), 0) diff --git a/repo_resource/test_in.py b/repo_resource/test_in.py index 00626c7..0df7fba 100644 --- a/repo_resource/test_in.py +++ b/repo_resource/test_in.py @@ -9,8 +9,7 @@ import shutil import unittest from pathlib import Path - -import repo +import xml.etree.ElementTree as ET from . import check from . import common @@ -18,7 +17,6 @@ class TestIn(unittest.TestCase): - def setUp(self): self.demo_manifests_source = { 'source': { @@ -44,7 +42,7 @@ def test_fails_on_invalid_version(self): data = self.demo_manifests_source data['version'] = {'version': 'invalid-version'} instream = StringIO(json.dumps(data)) - with self.assertRaises(repo.error.GitError): + with self.assertRaises(ET.ParseError): in_.in_(instream, str(common.CACHEDIR)) def test_dest_dir_is_created(self): @@ -59,6 +57,30 @@ def test_dest_dir_is_created(self): self.assertTrue(common.CACHEDIR.exists()) + def test_sync_ok(self): + data = { + 'source': { + 'url': 'https://android.googlesource.com/tools/manifest', + 'revision': 'fetch_artifact-dev' + }, + } + data['version'] = { + 'version': + '\n\n\n\n\n\n' # noqa: E501 + } + instream = StringIO(json.dumps(data)) + in_.in_(instream, str(common.CACHEDIR)) + # no assert/assumption to call. repo init and sync should + # just be called. maybe we can check for a file as well + readme = common.CACHEDIR / 'fetch_artifact' / 'README.md' + self.assertTrue(readme.exists()) + + def test_no_manifest_version(self): + data = self.demo_manifests_source + instream = StringIO(json.dumps(data)) + with self.assertRaises(KeyError): + in_.in_(instream, str(common.CACHEDIR)) + def test_valid_in(self): data = self.demo_manifests_source data['version'] = { @@ -69,7 +91,10 @@ def test_valid_in(self): instream = StringIO(json.dumps(data)) fetched_version = in_.in_(instream, str(common.CACHEDIR)) - self.assertEqual(fetched_version['version'], data['version']) + self.assertEqual( + common.Version(fetched_version['version']['version']).standard(), + common.Version(data['version']['version']).standard() + ) def test_get_metadata(self): data = self.demo_manifests_source