diff --git a/config-bot/Dockerfile b/config-bot/Dockerfile new file mode 100644 index 0000000..3be0258 --- /dev/null +++ b/config-bot/Dockerfile @@ -0,0 +1,8 @@ +FROM registry.fedoraproject.org/fedora:30 + +RUN dnf -y install git python3-toml python3-aiohttp && dnf clean all + +COPY main /usr/lib/config-bot/main +COPY config.toml /etc/config-bot.toml + +CMD /usr/lib/config-bot/main diff --git a/config-bot/README.md b/config-bot/README.md new file mode 100644 index 0000000..4eafc91 --- /dev/null +++ b/config-bot/README.md @@ -0,0 +1,41 @@ +# config-bot + +config-bot performs automated management tasks on the main +[fedora-coreos-config](https://github.com/coreos/fedora-coreos-config) +repo. + +It performs three closely related, but mostly independent +functions. The names below reflect each of those functions. +One can find their corresponding configuration section in +`config.toml` and function names in `main`. + +1. `sync-build-lockfiles`: watches for new builds on a set + of streams, and pushes the generated lockfile to their + corresponding branches +2. `promote-lockfiles`: on some configurable interval, + pushes lockfiles from one config branch to another +3. `propagate-files`: watches for pushes to a config branch + and propagates changes to a subset of files to a set of + target branches. + +All these are currently timer-based. In the future, they +will use fedora-messaging for triggering. + +Similarly, for now all changes are done using `git push`. In +the future, config-bot PR functionality can be added. + +## Testing locally + +Tweak settings as appropriate in `config.toml`, e.g.: +- point to your fork of `fedora-coreos-config` +- use your own GitHub token +- you can comment out whole sections if you'd like to test a + specific function only; e.g. if you only want to test + `promote-lockfiles`, you can comment out + `sync-build-lockfiles` and `propagate-files` + +Then: + +``` +./main --config myconfig.toml +``` diff --git a/config-bot/config.toml b/config-bot/config.toml new file mode 100644 index 0000000..b3da3ab --- /dev/null +++ b/config-bot/config.toml @@ -0,0 +1,36 @@ +[git] +author.name = 'CoreOS Bot' +author.email = 'coreosbot@fedoraproject.org' +github.repo.owner = 'coreos' +github.repo.name = 'fedora-coreos-config' +github.token.username = 'coreosbot' +github.token.path = '/var/run/secrets/coreos.fedoraproject.org/github-token/token' + +[sync-build-lockfiles] +builds-base-url = 'https://builds.coreos.fedoraproject.org/prod/streams' +streams = [ + 'bodhi-updates', +] +trigger.mode = 'periodic' +trigger.period = '15m' +method = 'push' + +#[promote-lockfiles] +#source-ref = 'bodhi-updates' +#target-ref = 'testing-devel' +#trigger.mode = 'periodic' +#trigger.period = '24h' +#method = 'push' + +[propagate-files] +source-ref = 'testing-devel' +target-refs = [ + 'bodhi-updates', +] +skip-files = [ + 'manifest.yaml', + 'manifest-lock.*', +] +trigger.mode = 'periodic' +trigger.period = '15m' +method = 'push' diff --git a/config-bot/main b/config-bot/main new file mode 100755 index 0000000..5d960b2 --- /dev/null +++ b/config-bot/main @@ -0,0 +1,367 @@ +#!/usr/bin/python3 -u + +import os +import re +import sys +import json +import toml +import fnmatch +import aiohttp +import asyncio +import argparse +import tempfile +import subprocess + + +DEFAULT_CONFIG_FILE_PATH = "/etc/config-bot.toml" + +git = None + + +def main(): + args = parse_args() + cfg = load_config(args.config) + + global git + git = Git(cfg['git']) + + loop = asyncio.get_event_loop() + + o = cfg.get('sync-build-lockfiles') + if o is not None: + loop.create_task(sync_build_lockfiles(o)) + + o = cfg.get('promote-lockfiles') + if o is not None: + loop.create_task(promote_lockfiles(o)) + + o = cfg.get('propagate-files') + if o is not None: + loop.create_task(propagate_files(o)) + + loop.run_forever() + loop.close() + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--config', help="Path to TOML config file", + default=DEFAULT_CONFIG_FILE_PATH) + return parser.parse_args() + + +def load_config(fn): + with open(fn) as f: + return toml.load(f) + + +async def sync_build_lockfiles(cfg): + # this is the only mode we support right now + assert cfg['trigger']['mode'] == 'periodic' + period = period_to_seconds(cfg['trigger']['period']) + method = cfg['method'] + + base_url = cfg['builds-base-url'] + + # {stream -> buildid} + last_build_ids = {} + + first = True + while True: + + # for easier hacking; just act immediately on first iteration + # this allows us to safely use `continue` afterwards + if not first: + await asyncio.sleep(period) + first = False + + for stream in cfg['streams']: + builds_json = f'{base_url}/{stream}/builds/builds.json' + builds = json.loads(await http_fetch(builds_json)) + if builds is None: + # problem fetching the json; just ignore and we'll retry + continue + elif len(builds['builds']) == 0: + eprint(f"Stream {stream} has no builds!") + continue + + latest_build_id = builds['builds'][0] + + # is this a new build? + if latest_build_id == last_build_ids.get(stream): + continue + + synced = await sync_one_build_lockfile(base_url, method, stream, + latest_build_id) + if synced: + last_build_ids[stream] = latest_build_id + + +async def http_fetch(url): + async with aiohttp.request(url=url, method='GET') as resp: + if resp.status != 200: + eprint(f"Error fetching {url}: got {resp.status}") + return None + return await resp.read() + + +async def sync_one_build_lockfile(base_url, method, stream, build_id): + # we only support direct git pushes for now + assert method == 'push' + + # XXX: update for new multi-arch build layout when FCOS is migrated + build_dir = f'{base_url}/{stream}/builds/{build_id}' + lockfile = 'manifest-lock.generated.x86_64.json' + data = await http_fetch(f'{build_dir}/{lockfile}') + if data is None: + # got an error while fetching, just leave and we'll retry later + # should probably use a custom Exception for this + return False + + async with git: + git.checkout(stream) + with open(git.path(lockfile), 'wb') as f: + f.write(data) + + if git.has_diff(): + git.commit(f"lockfiles: import from build {build_id}", [lockfile]) + try: + git.push(stream) + except Exception as e: + # this can happen if we raced against someone/something + # else pushing to the ref and we're out of date; we'll retry + print(f"Got exception during push: {e}") + return False + + return True + + +async def promote_lockfiles(cfg): + # this is the only mode we support right now + assert cfg['trigger']['mode'] == 'periodic' + period = period_to_seconds(cfg['trigger']['period']) + + # we only support direct git pushes for now + assert cfg['method'] == 'push' + + source_ref, target_ref = (cfg['source-ref'], cfg['target-ref']) + + last_source_ref_checksum = None + + first = True + while True: + + if not first: + await asyncio.sleep(period) + first = False + + async with git: + # is there a new commit? + source_ref_checksum = git.rev_parse(source_ref) + if last_source_ref_checksum == source_ref_checksum: + continue + + git.checkout(target_ref) + + # get the list of lockfiles from the source ref + all_files = git.cmd_output('ls-tree', source_ref, + '--name-only').splitlines() + locks = [f for f in all_files if + matches_patterns(f, ['manifest-lock.generated.*.json'])] + + if len(locks) == 0: + eprint(f"No lockfiles found in {source_ref}") + last_source_ref_checksum = source_ref_checksum + continue + + # bring it into the index + git.cmd('checkout', source_ref, '--', *locks) + + # and rename it to the non-generated version + for lock in locks: + git.cmd('mv', '--force', lock, lock.replace('.generated', '')) + + if git.has_diff(): + git.commit(f"lockfiles: import from {source_ref}") + try: + git.push(target_ref) + except Exception as e: + print(f"Got exception during push: {e}") + continue + + last_source_ref_checksum = source_ref_checksum + + +async def propagate_files(cfg): + # this is the only mode we support right now + assert cfg['trigger']['mode'] == 'periodic' + period = period_to_seconds(cfg['trigger']['period']) + skip_files = cfg['skip-files'] + + # we only support direct git pushes for now + assert cfg['method'] == 'push' + + source_ref, target_refs = (cfg['source-ref'], cfg['target-refs']) + + last_source_ref_checksum = None + + first = True + while True: + + if not first: + await asyncio.sleep(period) + first = False + + async with git: + # is there a new commit? + source_ref_checksum = git.rev_parse(source_ref) + if last_source_ref_checksum == source_ref_checksum: + continue + + # get the list of files from the source ref + all_files = git.cmd_output('ls-tree', source_ref, + '--name-only').splitlines() + targeted_files = [f for f in all_files + if not matches_patterns(f, skip_files)] + + if len(targeted_files) == 0: + eprint(f"No files to propagate from {source_ref}") + last_source_ref_checksum = source_ref_checksum + continue + + for target_ref in target_refs: + git.checkout(target_ref) + + # bring files into the index + git.cmd('checkout', source_ref, '--', *targeted_files) + + if git.has_diff(): + git.commit(f"tree: import changes from {source_ref}") + try: + git.push(target_ref) + except Exception as e: + print(f"Got exception during push: {e}") + break + else: + last_source_ref_checksum = source_ref_checksum + + +def matches_patterns(fn, patterns): + for pattern in patterns: + if fnmatch.fnmatch(fn, pattern): + return True + return False + + +# normalize to seconds +def period_to_seconds(s): + assert re.match('^[0-9]+[smh]$', s) + multi = {"s": 1, "m": 60, "h": 60*60} + return int(s[:-1]) * multi[s[-1:]] + + +def eprint(*args): + print(*args, file=sys.stderr) + + +class Git: + + ''' + Convenience wrapper around shared git repo. To categorically rule out + leftovers/workdirs left in funky states from various operations, and + ensuring we always push what we mean, we use one main bare repo to + share objects, but do all the work in transient worktrees. + ''' + + def __init__(self, cfg): + self._git_bare = tempfile.TemporaryDirectory(prefix="config-bot.bare.") + self._git_work = None + + self._git_env = dict(os.environ) + self._git_env.update({ + "GIT_AUTHOR_NAME": cfg['author']['name'], + "GIT_AUTHOR_EMAIL": cfg['author']['email'], + "GIT_COMMITTER_NAME": cfg['author']['name'], + "GIT_COMMITTER_EMAIL": cfg['author']['email'], + }) + + gh_owner = cfg['github']['repo']['owner'] + gh_name = cfg['github']['repo']['name'] + token_un = cfg['github']['token']['username'] + with open(cfg['github']['token']['path']) as f: + token_pw = f.read().strip() + + url = f'https://{token_un}:{token_pw}@github.com/{gh_owner}/{gh_name}' + self.cmd('clone', '--bare', url, '.') + + # we don't technically need a lockfile if we make sure that we never + # `await` operations when using `with git`, though that's something I + # can easily imagine regressing on + self._lock = asyncio.Lock() + + def __del__(self): + self._git_bare.cleanup() + + async def __aenter__(self): + await self._lock.acquire() + self.cmd('fetch', 'origin', '--prune', '+refs/heads/*:refs/heads/*') + assert self._git_work is None + d = tempfile.TemporaryDirectory(prefix="config-bot.work.") + self.cmd('worktree', 'add', '--detach', d.name, 'HEAD') + self._git_work = d + + async def __aexit__(self, exc_type, exc, tb): + self._git_work.cleanup() + self._git_work = None + self.cmd('worktree', 'prune') + self._lock.release() + + def cmd(self, *args): + wd = self._git_work or self._git_bare + subprocess.check_call(['git', *args], cwd=wd.name, env=self._git_env) + + def cmd_output(self, *args): + wd = self._git_work or self._git_bare + out = subprocess.check_output(['git', *args], cwd=wd.name, + env=self._git_env) + return out.strip().decode('utf-8') + + def rev_parse(self, ref): + return self.cmd_output('rev-parse', ref) + + def checkout(self, ref): + self.cmd('checkout', '--detach', ref) + + def commit(self, message, files=None): + if files and len(files) > 0: + self.cmd('add', *files) + self.cmd('commit', '-m', message) + + def push(self, ref): + self.cmd('push', 'origin', f'HEAD:{ref}') + + def path(self, file=None): + wd = self._git_work or self._git_bare + if file is None: + return wd.name + return os.path.join(wd.name, file) + + def has_diff(self): + # use ls-files instead of `diff --exit-code` so new untracked files + # also count as a "diff" + out = self.cmd_output('ls-files', '--modified', '--others').strip() + out.strip() + if len(out) > 0: + return True + + # but also check whether we have things staged + out = self.cmd_output('diff', '--staged', '--name-only') + out.strip() + if len(out) > 0: + return True + + return False + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/config-bot/manifest.yaml b/config-bot/manifest.yaml new file mode 100644 index 0000000..8b3c44b --- /dev/null +++ b/config-bot/manifest.yaml @@ -0,0 +1,83 @@ +apiVersion: v1 +kind: Template +metadata: + name: fedora-coreos-config-bot-template + annotations: + description: |- + Fedora CoreOS Config Bot +parameters: + - description: Git source URI for Dockerfile + name: REPO_URL + value: https://github.com/coreos/fedora-coreos-releng-automation + - description: Git branch/tag reference for Dockerfile + name: REPO_REF + value: master + - description: GitHub webhook secret + name: GITHUB_WEBHOOK_SECRET + from: '[A-Z0-9]{32}' + generate: expression +objects: + - kind: ImageStream + apiVersion: v1 + metadata: + name: config-bot + spec: + lookupPolicy: + local: true + - kind: BuildConfig + apiVersion: v1 + metadata: + name: config-bot + spec: + triggers: + - type: ConfigChange + - type: GitHub + github: + secret: ${GITHUB_WEBHOOK_SECRET} + source: + type: Git + git: + uri: ${REPO_URL} + ref: ${REPO_REF} + contextDir: config-bot + strategy: + dockerStrategy: + dockerfilePath: Dockerfile + output: + to: + kind: ImageStreamTag + name: config-bot:latest + successfulBuildsHistoryLimit: 2 + failedBuildsHistoryLimit: 2 + - kind: DeploymentConfig + apiVersion: v1 + metadata: + name: config-bot + spec: + replicas: 1 + template: + metadata: + labels: + name: config-bot + spec: + containers: + - name: config-bot + image: config-bot + volumeMounts: + - name: github-token-mount + mountPath: /var/run/secrets/coreos.fedoraproject.org/github-token + readOnly: true + volumes: + - name: github-token-mount + secret: + secretName: config-bot-github-token + triggers: + - type: ConfigChange + - type: ImageChange + imageChangeParams: + automatic: true + containerNames: + - config-bot + from: + kind: ImageStreamTag + name: config-bot:latest