Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent checks from downloading whole repos #28

Merged
merged 7 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ Track changes in a [repo](https://gerrit.googlesource.com/git-repo/+/master/#rep
* `jobs`: *Optional.* number of jobs to run in parallel (default: 0; based on number of CPU cores)
Reduce this if you observe network errors.

* `check_jobs`: for check step only: number of jobs to run in parallel (default: jobs\*2,
2 if jobs is undefined).

### Example

Resource configuration for a public project using repo (Android)
Expand Down
22 changes: 15 additions & 7 deletions repo_resource/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,23 @@ def check(instream) -> list:

config = common.source_config_from_payload(payload)

standard_versions = []
for v in payload.get('versions', []):
standard_versions.append(common.Version(v['version']).standard())

if config.private_key != '_invalid':
common.add_private_key_to_agent(config.private_key)

jobs = config.jobs
check_jobs = config.check_jobs or jobs*2 or common.DEFAULT_CHECK_JOBS

try:
repo = common.Repo()
repo.init(config.url, config.revision, config.name, config.depth)
repo.sync(jobs=config.jobs)
repo = common.Repo(config.url,
makohoek marked this conversation as resolved.
Show resolved Hide resolved
config.revision,
config.name,
config.depth)
repo.init()
repo.update_manifest(jobs=check_jobs)
version = repo.currentVersion()
except Exception as e:
raise e
Expand All @@ -48,11 +58,9 @@ def check(instream) -> list:
if config.private_key != '_invalid':
common.remove_private_key_from_agent()

new_version = {'version': str(version)}

versions = payload.get('versions', [])
if versions.count(new_version) == 0:
versions.append(new_version)
if version.standard() not in standard_versions:
versions.append({'version': str(version)})

return versions

Expand Down
226 changes: 196 additions & 30 deletions repo_resource/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,41 @@
import sys
import tempfile
import warnings
import git
import re
import xml.etree.ElementTree as ET

from contextlib import redirect_stdout
from pathlib import Path
from typing import NamedTuple
from urllib.parse import urlparse
from multiprocessing import Pool

import ssh_agent_setup
from repo import manifest_xml
from repo import main as repo


DEFAULT_CHECK_JOBS = 2
CACHEDIR = Path('/tmp/repo-resource-cache')
SHA1_PATTERN = re.compile(r'^[0-9a-f]{40}$')
EXCLUDE_ATTRS = {'dest-branch', 'upstream'}
# Elements available at
# https://gerrit.googlesource.com/git-repo/+/master/docs/manifest-format.md
TAGS = [
'remote',
'default',
'manifest-server',
'project',
'extend-project',
'annotation',
'copyfile',
'linkfile',
'remove-project',
'include',
'superproject',
'contactinfo'
]


def add_private_key_to_agent(private_key: str):
Expand Down Expand Up @@ -56,6 +80,43 @@ def remove_private_key_from_agent():
atexit.unregister(ssh_agent_setup._kill_agent)


def is_sha1(s):
return re.match(SHA1_PATTERN, s)


def multi_run_wrapper(args):
return getRevision(*args)


def getRevision(remote, remoteUrl, project, branch):
"""
Get latest commit sha1 for revision
with git ls-remote command for each project
without downloading the whole repo
"""
# v1.0^{} is the commit referring to tag v1.0
# git ls-remote returns the tag sha1 if left as is
if branch.startswith('refs/tags'):
branch += '^{}'
try:
with redirect_stdout(sys.stderr):
# return tuple (remote/project, revision)
print('Fetching revision for {}/{}...'.format(remote, project))
if is_sha1(branch):
return (remote + '/' + project, branch)
david-baylibre marked this conversation as resolved.
Show resolved Hide resolved
g = git.cmd.Git()
url, revision = (
remote + '/' + project,
g.ls_remote(remoteUrl+'/'+project, branch).split()[0]
)
print('{}: {}'.format(url, revision))
return (url, revision)
except Exception as e:
with redirect_stdout(sys.stderr):
print('Cannot fetch project {}/{}'.format(remoteUrl, project))
print(e)


class SourceConfiguration(NamedTuple):
"""
Supported source configuration items when configuring
Expand All @@ -67,6 +128,7 @@ class SourceConfiguration(NamedTuple):
private_key: str = '_invalid'
depth: int = -1
jobs: int = 0
check_jobs: int = DEFAULT_CHECK_JOBS


def source_config_from_payload(payload):
Expand All @@ -76,8 +138,10 @@ def source_config_from_payload(payload):
p = SourceConfiguration(**payload['source'])
source_url = urlparse(p.url)

if source_url.netloc == 'gitlab.com' and \
(source_url.scheme == 'http' or source_url.scheme == 'https'):
if (
source_url.netloc == 'gitlab.com' and
re.fullmatch('https?', source_url.scheme)
david-baylibre marked this conversation as resolved.
Show resolved Hide resolved
):
if not source_url.path.endswith('.git'):
raise RuntimeError('gitlab http(s) urls must end with .git')

Expand Down Expand Up @@ -105,6 +169,30 @@ def to_file(self, filename):
def metadata(self) -> str:
return ''

def standard(self) -> str:
try:
root = ET.fromstring(self.__version)
for element in root:
if element.tag not in TAGS:
root.remove(element)
# Sort entries in manifest by element position in TAGS
# Default 999 if element not found in TAGS table (comes last)
# and name alphabetically
sorted_xml = sorted(root, key=lambda x: (
TAGS.index(x.tag) if x.tag in TAGS else 999,
makohoek marked this conversation as resolved.
Show resolved Hide resolved
x.get('name') or ""))
manifest = ET.Element('manifest')
manifest.extend(sorted_xml)
return ET.canonicalize(
ET.tostring(manifest),
strip_text=True,
exclude_attrs=EXCLUDE_ATTRS
)
except ET.ParseError as e:
with redirect_stdout(sys.stderr):
print('Version is not valid xml')
raise e

def __repr__(self) -> str:
return self.__version

Expand All @@ -121,9 +209,16 @@ class Repo:
such as init/sync and manifest
"""

def __init__(self, workdir=CACHEDIR):
def __init__(self, url, revision='HEAD', name='default.xml',
depth=-1, workdir=CACHEDIR):
self.__workdir = workdir
self.__oldpwd = None
self.__url = url
david-baylibre marked this conversation as resolved.
Show resolved Hide resolved
self.__revision = revision
self.__name = name
self.__depth = depth
self.__version: Version = None
self.__remote = {}
workdir.mkdir(parents=True, exist_ok=True)

# gitrepo from https://github.com/grouperenault/gitrepo
Expand All @@ -145,25 +240,33 @@ def __change_to_workdir(self):
def __restore_oldpwd(self):
os.chdir(self.__oldpwd)

def init(self, url, revision='HEAD', name='default.xml', depth=-1):
def __add_remote(self, remote, url):
self.__remote[remote] = url

def __remote_url(self, remote):
return self.__remote[remote]

def init(self):
self.__change_to_workdir()
try:
# Google's repo prints a lot of information to stdout.
# Concourse expects every logs to be emitted to stderr:
# https://concourse-ci.org/implementing-resource-types.html#implementing-resource-types
# https://concourse-ci.org/implementing-resource-types.html#implementing-resource-types # noqa: E501
with redirect_stdout(sys.stderr):
repo_cmd = [
'--no-pager', 'init', '--quiet', '--manifest-url', url,
'--manifest-name', name,
'--no-tags',
'--no-pager', 'init', '--quiet', '--manifest-url',
self.__url, '--manifest-name',
self.__name, '--no-tags',
]
if depth > 0:
repo_cmd.append('--depth={}'.format(depth))
if self.__depth > 0:
repo_cmd.append('--depth={}'.format(self.__depth))

if revision is not None:
repo_cmd.append('--manifest-branch={}'.format(revision))
if self.__revision is not None:
repo_cmd.append(
'--manifest-branch={}'.format(self.__revision)
)

print('Downloading manifest from {}'.format(url))
print('Downloading manifest from {}'.format(self.__url))
repo._Main(repo_cmd)
print('repo has been initialized in {}'.format(self.__workdir))

Expand All @@ -172,7 +275,7 @@ def init(self, url, revision='HEAD', name='default.xml', depth=-1):
finally:
self.__restore_oldpwd()

def sync(self, version: Version = None, jobs: int = 0):
def sync(self, version: Version, jobs: int = 0):
self.__change_to_workdir()
try:
with redirect_stdout(sys.stderr):
Expand All @@ -185,34 +288,34 @@ def sync(self, version: Version = None, jobs: int = 0):
if jobs > 0:
repo_cmd.append('--jobs={}'.format(jobs))

if version is None:
with tempfile.TemporaryDirectory() as tmpdir:
tmp_manifest = os.path.join(tmpdir, 'manifest_tmp')
version.to_file(tmp_manifest)
repo_cmd.append(
'--manifest-name={}'.format(tmp_manifest))
repo._Main(repo_cmd)
else:
with tempfile.TemporaryDirectory() as tmpdir:
tmp_manifest = os.path.join(tmpdir, 'manifest_tmp')
version.to_file(tmp_manifest)
repo_cmd.append(
'--manifest-name={}'.format(tmp_manifest))
repo._Main(repo_cmd)
if os.listdir(self.__workdir) == []:
raise Exception('Sync failed. Is manifest correct?')
except Exception as e:
raise (e)
finally:
self.__restore_oldpwd()

# Update self.__version after repo sync
def update_version(self):
with tempfile.TemporaryDirectory() as tmpdir:
tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot')
self.__manifest_out(tmp_manifest)
self.__version = Version.from_file(tmp_manifest)

def save_manifest(self, filename):
with redirect_stdout(sys.stderr):
full_path = self.__workdir / filename
current_version = self.currentVersion()
print('Saving manifest to {}'.format(full_path))
current_version.to_file(full_path)
self.__version.to_file(full_path)

def currentVersion(self) -> Version:
with tempfile.TemporaryDirectory() as tmpdir:
tmp_manifest = os.path.join(tmpdir, 'manifest_snapshot')
self.__manifest_out(tmp_manifest)
version = Version.from_file(tmp_manifest)

return version
return self.__version

def metadata(self):
metadata = []
Expand Down Expand Up @@ -242,3 +345,66 @@ def __manifest_out(self, filename):
raise (e)
finally:
self.__restore_oldpwd()

def update_manifest(self, jobs):
projects = []

jobs = jobs or DEFAULT_CHECK_JOBS
self.__change_to_workdir()
try:
with redirect_stdout(sys.stderr):
print('Updating project revisions in manifest')
xml = ET.parse('.repo/manifests/'+self.__name)
manifest = xml.getroot()

# Get default values from manifest
defaults = manifest.find('default')
if defaults is not None:
defaultRemote = defaults.get('remote')
defaultBranch = defaults.get('revision')

for r in manifest.findall('remote'):
url = r.get('fetch').rstrip('/')
if not re.match("[a-zA-Z]+://", url):
url = re.sub('/[a-z-.]*$', '/', self.__url) + url
self.__add_remote(r.get('name'), url)

for p in manifest.findall('project'):
project = p.get('name')
projectBranch = p.get('revision') or defaultBranch
projectRemote = p.get('remote') or defaultRemote
projectRemoteUrl = self.__remote_url(projectRemote)
projects.append((projectRemote, projectRemoteUrl,
project, projectBranch))

with Pool(jobs) as pool:
revisionList = pool.map(multi_run_wrapper, projects)
# Convert (remote/project, revision) tuple list
# to hash table dict[remote/project]=revision
revisionTable = dict((proj, rev) for proj, rev in revisionList)

# Update revisions
for p in manifest.findall('project'):
project = p.get('name')
projectRemote = p.get('remote') or defaultRemote
p.set('revision', revisionTable[projectRemote+'/'+project])

self.__version = Version(
ET.canonicalize(
ET.tostring(manifest, encoding='unicode'),
strip_text=True
)
)

except FileNotFoundError as e:
with redirect_stdout(sys.stderr):
print('cannot open', '.repo/manifests/'+self.__name)
raise e
except TypeError as e:
with redirect_stdout(sys.stderr):
print('Error fetching some project repo')
raise e
except Exception as e:
raise e
finally:
self.__restore_oldpwd()
Loading
Loading