From 9708ec52aacc4e0fe37ef91cb90fea43756fd234 Mon Sep 17 00:00:00 2001 From: gursewak1997 Date: Wed, 13 Nov 2024 00:10:11 -0800 Subject: [PATCH] gc: merge container-prune into cloud-prune Merged the code of the container gc into the cloud one, and update builds.json. Go through the tags in base-oscontainer data in meta.json and prune every tag except the stream-name itself which are moving tags. --- cmd/coreos-assembler.go | 2 +- src/cmd-cloud-prune | 92 ++++++++++++++++++++++++++++- src/cmd-container-prune | 125 ---------------------------------------- 3 files changed, 91 insertions(+), 128 deletions(-) delete mode 100755 src/cmd-container-prune diff --git a/cmd/coreos-assembler.go b/cmd/coreos-assembler.go index 4cf4ba8fc9..3804d37c05 100644 --- a/cmd/coreos-assembler.go +++ b/cmd/coreos-assembler.go @@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "osbuild", "run", "prune" var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"} var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"} -var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "container-prune", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"} +var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"} var otherCommands = []string{"shell", "meta"} func init() { diff --git a/src/cmd-cloud-prune b/src/cmd-cloud-prune index 5c0563ec13..1c0d78187b 100755 --- a/src/cmd-cloud-prune +++ b/src/cmd-cloud-prune @@ -34,8 +34,10 @@ import argparse import json +import subprocess from urllib.parse import urlparse import pytz +import requests import yaml import collections import datetime @@ -59,6 +61,8 @@ CACHE_MAX_AGE_METADATA = 60 * 5 # is up to date. SUPPORTED = ["amis", "gcp"] UNSUPPORTED = ["aliyun", "azure", "ibmcloud", "powervs"] +# list of known streams with containers +STREAMS = {"next", "testing", "stable", "next-devel", "testing-devel", "rawhide", "branched"} def parse_args(): @@ -70,6 +74,8 @@ def parse_args(): parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth", default=os.environ.get("GCP_JSON_AUTH")) parser.add_argument("--acl", help="ACL for objects", action='store', default='private') parser.add_argument("--aws-config-file", default=os.environ.get("AWS_CONFIG_FILE"), help="Path to AWS config file") + parser.add_argument("--registry-auth-file", default=os.environ.get("REGISTRY_AUTH_FILE"), + help="Path to docker registry auth file. Directly passed to skopeo.") return parser.parse_args() @@ -110,6 +116,12 @@ def main(): builds = builds_json_data["builds"] pruned_build_ids = [] images_to_keep = policy.get(stream, {}).get("images-keep", []) + barrier_releases = set() + # Get the update graph for stable streams + if stream in ['stable', 'testing', 'next']: + update_graph = get_update_graph(stream)['releases'] + # Keep only the barrier releases + barrier_releases = set([release["version"] for release in update_graph if "barrier" in release]) # Iterate through builds from oldest to newest for build in reversed(builds): @@ -125,7 +137,7 @@ def main(): current_build = Build(id=build_id, images=images, arch=arch, meta_json=meta_json) # Iterate over actions (policy types) to apply pruning - for action in ['cloud-uploads', 'images', 'build']: + for action in ['cloud-uploads', 'images', 'build', 'containers']: if action not in policy[stream]: continue action_duration = convert_duration_to_days(policy[stream][action]) @@ -162,7 +174,22 @@ def main(): case "build": prune_build(s3_client, bucket, prefix, build_id, args.dry_run) pruned_build_ids.append(build_id) - + case "containers": + # Our containers are manifest listed, which means deleting the container tag + # for one architecture deletes it for all of them. We'll choose to only prune + # for x86_64 since it is the one architecture that exists for all builds. + if arch == "x86_64": + if build_id in barrier_releases: + # Since containers are used for updates we need to keep around containers for barrier releases. + print(f"Release {build_id} is a barrier release. Skipping container prune.") + continue + # Retrieve container tags excluding the stream name since it updates with each release. + container_tags, container_repo = get_container_tags(meta_json, exclude=[stream]) + if container_tags: + for tag in container_tags: + prune_container(tag, args.dry_run, container_repo, args.registry_auth_file) + else: + print(f"No container tags to prune for build {build_id}.") # Update policy-cleanup after pruning actions for the architecture policy_cleanup = build.setdefault("policy-cleanup", {}) for action in policy[stream].keys(): # Only update actions specified in policy[stream] @@ -174,6 +201,9 @@ def main(): if "images" not in policy_cleanup: policy_cleanup["images"] = True policy_cleanup["images-kept"] = images_to_keep + case "containers": + if "containers" not in policy_cleanup: + policy_cleanup["containers"] = True if pruned_build_ids: if "tombstone-builds" not in builds_json_data: @@ -414,5 +444,63 @@ def prune_build(s3_client, bucket, prefix, build_id, dry_run): raise Exception(f"Error pruning {build_id}: {e.response['Error']['Message']}") +def get_container_tags(meta_json, exclude): + base_oscontainer = meta_json.get("base-oscontainer") + if base_oscontainer: + tags = base_oscontainer.get("tags", []) + filtered_tags = [tag for tag in tags if tag not in exclude] + container_repo = base_oscontainer.get("image", "") + return filtered_tags, container_repo + return [], "" + + +def prune_container(tag, dry_run, container_repo, registry_auth_file): + if dry_run: + print(f"Would prune image {container_repo}:{tag}") + else: + skopeo_delete(container_repo, tag, registry_auth_file) + + +def get_update_graph(stream): + url = f"https://builds.coreos.fedoraproject.org/updates/{stream}.json" + r = requests.get(url, timeout=5) + if r.status_code != 200: + raise Exception(f"Could not download update graph for {stream}. HTTP {r.status_code}") + return r.json() + + +def skopeo_inspect(repo, tag, auth): + skopeo_args = ["skopeo", "inspect", "--no-tags", "--retry-times=10", f"docker://{repo}:{tag}"] + if auth: + skopeo_args.extend(["--authfile", auth]) + try: + subprocess.check_output(skopeo_args, stderr=subprocess.STDOUT) + return True # Inspection succeeded + except subprocess.CalledProcessError as e: + exit_code = e.returncode + error_message = e.output.decode("utf-8") + + # Exit code 2 indicates the image tag does not exist. We will consider it as pruned. + if exit_code == 2: + print(f"Skipping deletion for {repo}:{tag} since the tag does not exist.") + return False + else: + # Handle other types of errors + raise Exception(f"Inspection failed for {repo}:{tag} with exit code {exit_code}: {error_message}") + + +def skopeo_delete(repo, image, auth): + if skopeo_inspect(repo, image, auth): # Only proceed if inspection succeeds + skopeo_args = ["skopeo", "delete", f"docker://{repo}:{image}"] + if auth: + skopeo_args.extend(["--authfile", auth]) + try: + subprocess.check_output(skopeo_args, stderr=subprocess.STDOUT) + print(f"Image {repo}:{image} deleted successfully.") + except subprocess.CalledProcessError as e: + # Throw an exception in case the delete command fail despite the image existing + raise Exception("An error occurred during deletion:", e.output.decode("utf-8")) + + if __name__ == "__main__": main() diff --git a/src/cmd-container-prune b/src/cmd-container-prune deleted file mode 100755 index 4d2e3d4acf..0000000000 --- a/src/cmd-container-prune +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/python3 -u - -""" -Prune containers from a remote registry -according to the images age -See cmd-cloud-prune for a policy file example -""" - -import argparse -import datetime -import json -import os -import subprocess -from dateutil.relativedelta import relativedelta -import requests -import yaml -from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream -from cosalib.cmdlib import convert_duration_to_days - -# Dict of known streams -STREAMS = {"next": 1, "testing": 2, "stable": 3, - "next-devel": 10, "testing-devel": 20, - "rawhide": 91, "branched": 92} - - -def parse_args(): - parser = argparse.ArgumentParser(prog="coreos-assembler container-prune") - parser.add_argument("--policy", required=True, type=str, help="Path to policy YAML file") - parser.add_argument("--dry-run", help="Don't actually delete anything", action='store_true') - parser.add_argument("-v", help="Increase verbosity", action='store_true') - parser.add_argument("--registry-auth-file", default=os.environ.get("REGISTRY_AUTH_FILE"), - help="Path to docker registry auth file. Directly passed to skopeo.") - parser.add_argument("--stream", type=str, help="CoreOS stream", required=True, choices=STREAMS.keys()) - parser.add_argument("repository_url", help="container images URL") - return parser.parse_args() - - -def skopeo_delete(repo, image, auth): - - skopeo_args = ["skopeo", "delete", f"docker://{repo}:{image}"] - if auth is not None: - skopeo_args.append(f"--authfile {auth}") - - subprocess.check_output(skopeo_args) - - -def get_update_graph(stream): - - url = f"https://builds.coreos.fedoraproject.org/updates/{stream}.json" - r = requests.get(url, timeout=5) - if r.status_code != 200: - raise Exception(f"Could not download update graph for {stream}. HTTP {r.status_code}") - return r.json() - - -def main(): - - args = parse_args() - - # Load the policy file - with open(args.policy, "r") as f: - policy = yaml.safe_load(f) - if args.stream not in policy: - print(f"Stream {args.stream} is not defined in policy file; exiting...") - return - if 'containers' not in policy[args.stream]: - print(f"No containers section for {args.stream} stream in policy; exiting...") - return - policy = policy[args.stream]["containers"] - - print(f"Pulling tags from {args.repository_url}") - # This is a JSON object: - # {"Repository": "quay.io/jbtrystramtestimages/fcos", - # "Tags": [ - # "40.20"40.20240301.1.0",.....]} - tags_data = subprocess.check_output(["skopeo", "list-tags", - f"docker://{args.repository_url}"]) - - tags_json = json.loads(tags_data) - tags = tags_json['Tags'] - # Compute the date before we should prune images - # today - prune-policy - today = datetime.datetime.now() - date_limit = today - relativedelta(days=convert_duration_to_days(policy)) - print(f"This will delete any images older than {date_limit} from the stream {args.stream}") - - stream_id = STREAMS[args.stream] - barrier_releases = set() - # Get the update graph for stable streams - if args.stream in ['stable', 'testing', 'next']: - update_graph = get_update_graph(args.stream)['releases'] - # Keep only the barrier releases - barrier_releases = set([release["version"] for release in update_graph if "barrier" in release]) - - for tag in tags: - # silently skip known moving tags (next, stable...) - if tag in STREAMS: - continue - - try: - (build_date, tag_stream) = parse_fcos_version_to_timestamp_and_stream(tag) - except Exception: - print(f"WARNING: Ignoring unexpected tag: {tag}") - continue - if stream_id != int(tag_stream): - if args.v: - print(f"Skipping tag {tag} not in {args.stream} stream") - continue - # Make sure this is not a barrier release (for stable streams) - # For non-production streams barrier_releases will be empty so - # this will be no-op - if tag in barrier_releases: - print(f"Release {tag} is a barrier release, keeping.") - continue - - if build_date < date_limit: - if args.dry_run: - print(f"Dry-run: would prune image {args.repository_url}:{tag}") - else: - print(f"Production tag {tag} is older than {date_limit.strftime("%Y%m%d")}, pruning.") - skopeo_delete(args.repository_url, tag, args.registry_auth_file) - - -if __name__ == "__main__": - main()