diff --git a/README.md b/README.md
index 23b1d07..17214ef 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,9 @@ Track changes in a [repo](https://gerrit.googlesource.com/git-repo/+/master/#rep
* `jobs`: *Optional.* number of jobs to run in parallel (default: 0; based on number of CPU cores)
Reduce this if you observe network errors.
+* `check_jobs`: for check step only: number of jobs to run in parallel (default: jobs\*2,
+ 2 if jobs is undefined).
+
### Example
Resource configuration for a public project using repo (Android)
diff --git a/repo_resource/check.py b/repo_resource/check.py
index 68504b6..65f2bfd 100644
--- a/repo_resource/check.py
+++ b/repo_resource/check.py
@@ -33,13 +33,23 @@ def check(instream) -> list:
config = common.source_config_from_payload(payload)
+ standard_versions = []
+ for v in payload.get('versions', []):
+ standard_versions.append(common.Version(v['version']).standard())
+
if config.private_key != '_invalid':
common.add_private_key_to_agent(config.private_key)
+ jobs = config.jobs
+ check_jobs = config.check_jobs or jobs*2 or common.DEFAULT_CHECK_JOBS
+
try:
- repo = common.Repo()
- repo.init(config.url, config.revision, config.name, config.depth)
- repo.sync(jobs=config.jobs)
+ repo = common.Repo(config.url,
+ config.revision,
+ config.name,
+ config.depth)
+ repo.init()
+ repo.update_manifest(jobs=check_jobs)
version = repo.currentVersion()
except Exception as e:
raise e
@@ -48,11 +58,9 @@ def check(instream) -> list:
if config.private_key != '_invalid':
common.remove_private_key_from_agent()
- new_version = {'version': str(version)}
-
versions = payload.get('versions', [])
- if versions.count(new_version) == 0:
- versions.append(new_version)
+ if version.standard() not in standard_versions:
+ versions.append({'version': str(version)})
return versions
diff --git a/repo_resource/common.py b/repo_resource/common.py
index 074755f..0b20c87 100644
--- a/repo_resource/common.py
+++ b/repo_resource/common.py
@@ -12,17 +12,41 @@
import sys
import tempfile
import warnings
+import git
+import re
+import xml.etree.ElementTree as ET
from contextlib import redirect_stdout
from pathlib import Path
from typing import NamedTuple
from urllib.parse import urlparse
+from multiprocessing import Pool
import ssh_agent_setup
from repo import manifest_xml
from repo import main as repo
+
+DEFAULT_CHECK_JOBS = 2
CACHEDIR = Path('/tmp/repo-resource-cache')
+SHA1_PATTERN = re.compile(r'^[0-9a-f]{40}$')
+EXCLUDE_ATTRS = {'dest-branch', 'upstream'}
+# Elements available at
+# https://gerrit.googlesource.com/git-repo/+/master/docs/manifest-format.md
+TAGS = [
+ 'remote',
+ 'default',
+ 'manifest-server',
+ 'project',
+ 'extend-project',
+ 'annotation',
+ 'copyfile',
+ 'linkfile',
+ 'remove-project',
+ 'include',
+ 'superproject',
+ 'contactinfo'
+]
def add_private_key_to_agent(private_key: str):
@@ -56,6 +80,43 @@ def remove_private_key_from_agent():
atexit.unregister(ssh_agent_setup._kill_agent)
+def is_sha1(s):
+ return re.match(SHA1_PATTERN, s)
+
+
+def multi_run_wrapper(args):
+ return getRevision(*args)
+
+
+def getRevision(remote, remoteUrl, project, branch):
+ """
+ Get latest commit sha1 for revision
+ with git ls-remote command for each project
+ without downloading the whole repo
+ """
+ # v1.0^{} is the commit referring to tag v1.0
+ # git ls-remote returns the tag sha1 if left as is
+ if branch.startswith('refs/tags'):
+ branch += '^{}'
+ try:
+ with redirect_stdout(sys.stderr):
+ # return tuple (remote/project, revision)
+ print('Fetching revision for {}/{}...'.format(remote, project))
+ if is_sha1(branch):
+ return (remote + '/' + project, branch)
+ g = git.cmd.Git()
+ url, revision = (
+ remote + '/' + project,
+ g.ls_remote(remoteUrl+'/'+project, branch).split()[0]
+ )
+ print('{}: {}'.format(url, revision))
+ return (url, revision)
+ except Exception as e:
+ with redirect_stdout(sys.stderr):
+ print('Cannot fetch project {}/{}'.format(remoteUrl, project))
+ print(e)
+
+
class SourceConfiguration(NamedTuple):
"""
Supported source configuration items when configuring
@@ -67,6 +128,7 @@ class SourceConfiguration(NamedTuple):
private_key: str = '_invalid'
depth: int = -1
jobs: int = 0
+ check_jobs: int = DEFAULT_CHECK_JOBS
def source_config_from_payload(payload):
@@ -76,8 +138,10 @@ def source_config_from_payload(payload):
p = SourceConfiguration(**payload['source'])
source_url = urlparse(p.url)
- if source_url.netloc == 'gitlab.com' and \
- (source_url.scheme == 'http' or source_url.scheme == 'https'):
+ if (
+ source_url.netloc == 'gitlab.com' and
+ re.fullmatch('https?', source_url.scheme)
+ ):
if not source_url.path.endswith('.git'):
raise RuntimeError('gitlab http(s) urls must end with .git')
@@ -105,6 +169,30 @@ def to_file(self, filename):
def metadata(self) -> str:
return ''
+ def standard(self) -> str:
+ try:
+ root = ET.fromstring(self.__version)
+ for element in root:
+ if element.tag not in TAGS:
+ root.remove(element)
+ # Sort entries in manifest by element position in TAGS
+ # Default 999 if element not found in TAGS table (comes last)
+ # and name alphabetically
+ sorted_xml = sorted(root, key=lambda x: (
+ TAGS.index(x.tag) if x.tag in TAGS else 999,
+ x.get('name') or ""))
+ manifest = ET.Element('manifest')
+ manifest.extend(sorted_xml)
+ return ET.canonicalize(
+ ET.tostring(manifest),
+ strip_text=True,
+ exclude_attrs=EXCLUDE_ATTRS
+ )
+ except ET.ParseError as e:
+ with redirect_stdout(sys.stderr):
+ print('Version is not valid xml')
+ raise e
+
def __repr__(self) -> str:
return self.__version
@@ -121,9 +209,16 @@ class Repo:
such as init/sync and manifest
"""
- def __init__(self, workdir=CACHEDIR):
+ def __init__(self, url, revision='HEAD', name='default.xml',
+ depth=-1, workdir=CACHEDIR):
self.__workdir = workdir
self.__oldpwd = None
+ self.__url = url
+ self.__revision = revision
+ self.__name = name
+ self.__depth = depth
+ self.__version: Version = None
+ self.__remote = {}
workdir.mkdir(parents=True, exist_ok=True)
# gitrepo from https://github.com/grouperenault/gitrepo
@@ -145,25 +240,33 @@ def __change_to_workdir(self):
def __restore_oldpwd(self):
os.chdir(self.__oldpwd)
- def init(self, url, revision='HEAD', name='default.xml', depth=-1):
+ def __add_remote(self, remote, url):
+ self.__remote[remote] = url
+
+ def __remote_url(self, remote):
+ return self.__remote[remote]
+
+ def init(self):
self.__change_to_workdir()
try:
# Google's repo prints a lot of information to stdout.
# Concourse expects every logs to be emitted to stderr:
- # https://concourse-ci.org/implementing-resource-types.html#implementing-resource-types
+ # https://concourse-ci.org/implementing-resource-types.html#implementing-resource-types # noqa: E501
with redirect_stdout(sys.stderr):
repo_cmd = [
- '--no-pager', 'init', '--quiet', '--manifest-url', url,
- '--manifest-name', name,
- '--no-tags',
+ '--no-pager', 'init', '--quiet', '--manifest-url',
+ self.__url, '--manifest-name',
+ self.__name, '--no-tags',
]
- if depth > 0:
- repo_cmd.append('--depth={}'.format(depth))
+ if self.__depth > 0:
+ repo_cmd.append('--depth={}'.format(self.__depth))
- if revision is not None:
- repo_cmd.append('--manifest-branch={}'.format(revision))
+ if self.__revision is not None:
+ repo_cmd.append(
+ '--manifest-branch={}'.format(self.__revision)
+ )
- print('Downloading manifest from {}'.format(url))
+ print('Downloading manifest from {}'.format(self.__url))
repo._Main(repo_cmd)
print('repo has been initialized in {}'.format(self.__workdir))
@@ -172,7 +275,7 @@ def init(self, url, revision='HEAD', name='default.xml', depth=-1):
finally:
self.__restore_oldpwd()
- def sync(self, version: Version = None, jobs: int = 0):
+ def sync(self, version: Version, jobs: int = 0):
self.__change_to_workdir()
try:
with redirect_stdout(sys.stderr):
@@ -185,34 +288,34 @@ def sync(self, version: Version = None, jobs: int = 0):
if jobs > 0:
repo_cmd.append('--jobs={}'.format(jobs))
- if version is None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmp_manifest = os.path.join(tmpdir, 'manifest_tmp')
+ version.to_file(tmp_manifest)
+ repo_cmd.append(
+ '--manifest-name={}'.format(tmp_manifest))
repo._Main(repo_cmd)
- else:
- with tempfile.TemporaryDirectory() as tmpdir:
- tmp_manifest = os.path.join(tmpdir, 'manifest_tmp')
- version.to_file(tmp_manifest)
- repo_cmd.append(
- '--manifest-name={}'.format(tmp_manifest))
- repo._Main(repo_cmd)
+ if os.listdir(self.__workdir) == []:
+ raise Exception('Sync failed. Is manifest correct?')
except Exception as e:
raise (e)
finally:
self.__restore_oldpwd()
+ # Update self.__version after repo sync
+ def update_version(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot')
+ self.__manifest_out(tmp_manifest)
+ self.__version = Version.from_file(tmp_manifest)
+
def save_manifest(self, filename):
with redirect_stdout(sys.stderr):
full_path = self.__workdir / filename
- current_version = self.currentVersion()
print('Saving manifest to {}'.format(full_path))
- current_version.to_file(full_path)
+ self.__version.to_file(full_path)
def currentVersion(self) -> Version:
- with tempfile.TemporaryDirectory() as tmpdir:
- tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot')
- self.__manifest_out(tmp_manifest)
- version = Version.from_file(tmp_manifest)
-
- return version
+ return self.__version
def metadata(self):
metadata = []
@@ -242,3 +345,66 @@ def __manifest_out(self, filename):
raise (e)
finally:
self.__restore_oldpwd()
+
+ def update_manifest(self, jobs):
+ projects = []
+
+ jobs = jobs or DEFAULT_CHECK_JOBS
+ self.__change_to_workdir()
+ try:
+ with redirect_stdout(sys.stderr):
+ print('Updating project revisions in manifest')
+ xml = ET.parse('.repo/manifests/'+self.__name)
+ manifest = xml.getroot()
+
+ # Get default values from manifest
+ defaults = manifest.find('default')
+ if defaults is not None:
+ defaultRemote = defaults.get('remote')
+ defaultBranch = defaults.get('revision')
+
+ for r in manifest.findall('remote'):
+ url = r.get('fetch').rstrip('/')
+ if not re.match("[a-zA-Z]+://", url):
+ url = re.sub('/[a-z-.]*$', '/', self.__url) + url
+ self.__add_remote(r.get('name'), url)
+
+ for p in manifest.findall('project'):
+ project = p.get('name')
+ projectBranch = p.get('revision') or defaultBranch
+ projectRemote = p.get('remote') or defaultRemote
+ projectRemoteUrl = self.__remote_url(projectRemote)
+ projects.append((projectRemote, projectRemoteUrl,
+ project, projectBranch))
+
+ with Pool(jobs) as pool:
+ revisionList = pool.map(multi_run_wrapper, projects)
+ # Convert (remote/project, revision) tuple list
+ # to hash table dict[remote/project]=revision
+ revisionTable = dict((proj, rev) for proj, rev in revisionList)
+
+ # Update revisions
+ for p in manifest.findall('project'):
+ project = p.get('name')
+ projectRemote = p.get('remote') or defaultRemote
+ p.set('revision', revisionTable[projectRemote+'/'+project])
+
+ self.__version = Version(
+ ET.canonicalize(
+ ET.tostring(manifest, encoding='unicode'),
+ strip_text=True
+ )
+ )
+
+ except FileNotFoundError as e:
+ with redirect_stdout(sys.stderr):
+ print('cannot open', '.repo/manifests/'+self.__name)
+ raise e
+ except TypeError as e:
+ with redirect_stdout(sys.stderr):
+ print('Error fetching some project repo')
+ raise e
+ except Exception as e:
+ raise e
+ finally:
+ self.__restore_oldpwd()
diff --git a/repo_resource/in_.py b/repo_resource/in_.py
index 11dc7b5..8aafe21 100644
--- a/repo_resource/in_.py
+++ b/repo_resource/in_.py
@@ -33,15 +33,22 @@ def in_(instream, dest_dir='.'):
config = common.source_config_from_payload(payload)
requested_version = common.Version(payload['version']['version'])
+ # Check version is valid xml
+ common.Version(payload['version']['version']).standard()
+
+ standard_versions = []
+ for v in payload.get('versions', []):
+ standard_versions.append(common.Version(v['version']).standard())
+
if config.private_key != '_invalid':
common.add_private_key_to_agent(config.private_key)
try:
- repo = common.Repo(workdir=Path(dest_dir))
-
- repo.init(config.url, config.revision, config.name, config.depth)
+ repo = common.Repo(config.url, config.revision,
+ config.name, config.depth, workdir=Path(dest_dir))
+ repo.init()
repo.sync(requested_version, config.jobs)
- fetched_version = repo.currentVersion()
+ repo.update_version()
except Exception as e:
raise e
finally:
@@ -49,15 +56,12 @@ def in_(instream, dest_dir='.'):
if config.private_key != '_invalid':
common.remove_private_key_from_agent()
- if fetched_version != requested_version:
- raise RuntimeError('Could not fetch requested version')
-
# save a copy of the manifest alongside the sources
repo.save_manifest('.repo_manifest.xml')
metadata = repo.metadata()
- return {"version": {"version": str(fetched_version)},
+ return {"version": {"version": str(requested_version)},
"metadata": metadata}
diff --git a/repo_resource/requirements.txt b/repo_resource/requirements.txt
index 5f65a01..54349a8 100644
--- a/repo_resource/requirements.txt
+++ b/repo_resource/requirements.txt
@@ -1,2 +1,3 @@
-gitrepo==2.31.1
+gitrepo==2.32.2
ssh-agent-setup==2.0.1
+GitPython==3.1.31
diff --git a/repo_resource/test_check.py b/repo_resource/test_check.py
index 039a966..51e50d8 100644
--- a/repo_resource/test_check.py
+++ b/repo_resource/test_check.py
@@ -11,6 +11,7 @@
from timeit import default_timer as timer
import shutil
import repo
+import xml.etree.ElementTree as ET
from . import check
from . import common
@@ -31,6 +32,12 @@ def setUp(self):
'name': 'aosp_device_fixed.xml'
}
}
+ self.demo_manifests_source_norev = {
+ 'source': {
+ 'url': 'https://github.com/makohoek/demo-manifests.git',
+ 'name': 'aosp_device_fixed.xml'
+ }
+ }
self.demo_ssh_manifests_source = {
'source': {
'url': 'https://github.com/makohoek/demo-manifests.git',
@@ -94,8 +101,7 @@ def test_unknown_manifest_name(self):
check.check(instream)
def test_branch_defaults_to_HEAD(self):
- no_revision_data = self.demo_manifests_source
- no_revision_data['source']['revision'] = None
+ no_revision_data = self.demo_manifests_source_norev
instream = StringIO(json.dumps(no_revision_data))
check.check(instream)
@@ -108,10 +114,8 @@ def test_manifest_name_defaults(self):
}
instream = StringIO(json.dumps(d))
check.check(instream)
- # no assert/assumption to call. repo init and sync should
- # just be called. maybe we can check for a file as well
- readme = common.CACHEDIR / 'fetch_artifact' / 'README.md'
- self.assertTrue(readme.exists())
+ manifests = common.CACHEDIR / '.repo' / 'manifests'
+ self.assertTrue(manifests.exists())
# so here, we init from a public manifest
# init is completely working fine
@@ -122,7 +126,7 @@ def test_unreachable_projects_in_manifest(self):
unreachable_projects_data['source']['name'] = 'unreachable_project.xml'
instream = StringIO(json.dumps(unreachable_projects_data))
- with self.assertRaises(SystemExit):
+ with self.assertRaises(TypeError):
check.check(instream)
def test_first_revision(self):
@@ -136,7 +140,7 @@ def test_same_revision(self):
data = self.demo_manifests_source
data['versions'] = [{
'version':
- '\n\n \n \n \n \n \n\n' # noqa: E501
+ '\n\n \n \n \n \n \n\n' # noqa: E501
}]
instream = StringIO(json.dumps(data))
versions = check.check(instream)
@@ -153,7 +157,7 @@ def test_known_version(self):
# we passed no version as input, so we should just get current version
self.assertEqual(len(versions), 1)
# and we know that version
- expected_version = '\n\n \n \n \n \n \n\n' # noqa: E501
+ expected_version = '' # noqa: E501
version = versions[0]['version']
self.assertEqual(version, expected_version)
@@ -161,14 +165,24 @@ def test_known_version(self):
# but we use a newer version (using a different git branch)
def test_new_revision(self):
data = self.demo_manifests_source
- data['versions'] = [{'version': 'older-shasum'}]
+ data['versions'] = [{
+ 'version':
+ '' # noqa: E501
+ }]
instream = StringIO(json.dumps(data))
versions = check.check(instream)
self.assertEqual(len(versions), 2)
- expected_version = '\n\n \n \n \n \n \n\n' # noqa: E501
+ expected_version = '' # noqa: E501
newest_version = versions[-1]['version']
self.assertEqual(newest_version, expected_version)
+ def test_invalid_revision(self):
+ data = self.demo_manifests_source
+ data['versions'] = [{'version': 'invalid-version'}]
+ instream = StringIO(json.dumps(data))
+ with self.assertRaises(ET.ParseError):
+ check.check(instream)
+
@unittest.skipUnless(
Path('development/ssh/test_key').exists(), "requires ssh test key")
def test_ssh_private_key(self):
@@ -248,7 +262,7 @@ def test_ssh_private_key_without_project_access(self):
instream = StringIO(json.dumps(data))
versions = []
- with self.assertRaises(SystemExit):
+ with self.assertRaises(TypeError):
versions = check.check(instream)
self.assertEqual(len(versions), 0)
diff --git a/repo_resource/test_in.py b/repo_resource/test_in.py
index 00626c7..0df7fba 100644
--- a/repo_resource/test_in.py
+++ b/repo_resource/test_in.py
@@ -9,8 +9,7 @@
import shutil
import unittest
from pathlib import Path
-
-import repo
+import xml.etree.ElementTree as ET
from . import check
from . import common
@@ -18,7 +17,6 @@
class TestIn(unittest.TestCase):
-
def setUp(self):
self.demo_manifests_source = {
'source': {
@@ -44,7 +42,7 @@ def test_fails_on_invalid_version(self):
data = self.demo_manifests_source
data['version'] = {'version': 'invalid-version'}
instream = StringIO(json.dumps(data))
- with self.assertRaises(repo.error.GitError):
+ with self.assertRaises(ET.ParseError):
in_.in_(instream, str(common.CACHEDIR))
def test_dest_dir_is_created(self):
@@ -59,6 +57,30 @@ def test_dest_dir_is_created(self):
self.assertTrue(common.CACHEDIR.exists())
+ def test_sync_ok(self):
+ data = {
+ 'source': {
+ 'url': 'https://android.googlesource.com/tools/manifest',
+ 'revision': 'fetch_artifact-dev'
+ },
+ }
+ data['version'] = {
+ 'version':
+ '\n\n\n\n\n\n' # noqa: E501
+ }
+ instream = StringIO(json.dumps(data))
+ in_.in_(instream, str(common.CACHEDIR))
+ # no assert/assumption to call. repo init and sync should
+ # just be called. maybe we can check for a file as well
+ readme = common.CACHEDIR / 'fetch_artifact' / 'README.md'
+ self.assertTrue(readme.exists())
+
+ def test_no_manifest_version(self):
+ data = self.demo_manifests_source
+ instream = StringIO(json.dumps(data))
+ with self.assertRaises(KeyError):
+ in_.in_(instream, str(common.CACHEDIR))
+
def test_valid_in(self):
data = self.demo_manifests_source
data['version'] = {
@@ -69,7 +91,10 @@ def test_valid_in(self):
instream = StringIO(json.dumps(data))
fetched_version = in_.in_(instream, str(common.CACHEDIR))
- self.assertEqual(fetched_version['version'], data['version'])
+ self.assertEqual(
+ common.Version(fetched_version['version']['version']).standard(),
+ common.Version(data['version']['version']).standard()
+ )
def test_get_metadata(self):
data = self.demo_manifests_source