Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

convert SIGINT and SIGTERM to Fatal_Error #1727

Merged
merged 9 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions lib/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def __default__(self, tree):
return
except ch.Fatal_Error:
inst.announce_maybe()
inst.prepare_rollback()
raise
if (inst.miss):
if (self.miss_ct == 1):
Expand Down Expand Up @@ -466,10 +467,10 @@ def prepare(self, miss_ct):
time in prepare() should call announce_maybe() as soon as they
know hit/miss status.

2. Errors: Calling ch.FATAL() normally exits immediately, but here
this often happens before the instruction has been announced
(see issue #1486). Therefore, the caller catches Fatal_Error,
announces, and then re-raises.
2. Errors: The caller catches Fatal_Error, announces, calls
prepare_rollback(), and then re-raises. This to ensure the
instruction is announced (see #1486) and any
possibly-inconsistent state is fixed before existing.

3. Modifying image metadata: Instructions like ARG, ENV, FROM,
LABEL, SHELL, and WORKDIR must modify metadata here, not in
Expand All @@ -479,6 +480,9 @@ def prepare(self, miss_ct):
self.git_hash = bu.cache.find_sid(self.sid, self.image.ref.for_path)
return miss_ct + int(self.miss)

def prepare_rollback(self):
pass # typically a no-op

def ready(self):
bu.cache.ready(self.image)

Expand Down Expand Up @@ -1146,6 +1150,25 @@ def prepare(self, miss_ct):
# Done.
return int(self.miss) # will still miss in disabled mode

def prepare_rollback(self):
# AFAICT the only thing that might be busted is the unpack directories
# for either the base image or the image. We could probably be smarter
# about this, but for now just delete them.
try:
base_image = self.base_image
except AttributeError:
base_image = None
try:
image = self.image
except AttributeError:
image = None
if (base_image is not None or image is not None):
ch.INFO("something went wrong, rolling back ...")
if (base_image is not None):
bu.cache.unpack_delete(base_image, missing_ok=True)
if (image is not None):
bu.cache.unpack_delete(image, missing_ok=True)


class Run(Instruction):

Expand Down
7 changes: 4 additions & 3 deletions lib/build_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,10 +1066,10 @@ def pull_lazy(self, img, src_ref, last_layer=None, pullet=None):
# a young hen, especially one less than one year old
pullet = pull.Image_Puller(img, src_ref)
pullet.download()
self.worktree_add(img, "root")
pullet.unpack(last_layer)
sid = self.sid_from_parent(self.root_id, pullet.sid_input)
pullet.done()
self.worktree_adopt(img, "root")
commit = self.commit(img.unpack_path, sid, "PULL %s" % src_ref, [])
self.ready(img)
if (img.ref != src_ref):
Expand Down Expand Up @@ -1220,12 +1220,14 @@ def tree_print(self):
self.git(args + ["--format=%s" % fmt], quiet=False)
print() # blank line to separate from summary

def unpack_delete(self, image):
def unpack_delete(self, image, missing_ok=False):
"""Wrapper for Image.unpack_delete() that first detaches the work tree's
head. If we delete an image's unpack path without first detaching HEAD,
the corresponding work tree must also be deleted before the bucache
branch. This involves multiple calls to worktrees_fix(), which is
clunky, so we use this method instead."""
if (not image.unpack_exist_p and missing_ok):
return
(_, commit) = self.find_commit(image.ref.for_path)
if (commit is not None):
# Off with her head!
Expand Down Expand Up @@ -1381,7 +1383,6 @@ def permissions_fix(self, path):
for i in itertools.chain(subdirs, files):
(dir_ // i).chmod_min()


def pull_lazy(self, img, src_ref, last_layer=None, pullet=None):
if (pullet is None and os.path.exists(img.unpack_path)):
ch.VERBOSE("base image already exists, skipping pull")
Expand Down
40 changes: 36 additions & 4 deletions lib/charliecloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,26 +397,45 @@ def start(self):

class Progress_Writer:
"""Wrapper around a binary file object to maintain a progress meter while
data are written."""
data are written. Overwrite the file if it already exists.

This downloads to a temporary file to ease recovery if the download is
interrupted. This uses a predictable name to support restarts in the
future, which would probably require verification after download. For
now, we just delete any leftover temporary files in Storage.init().

An interesting alternative is to download to an anonymous temporary file
that vanishes if not linked into the filesystem. Recent Linux provides a
very cool procedure to do this -- open(2) with O_TMPFILE followed by
linkat(2) [1] -- but it’s not always supported and the workaround
(create, then immediately unlink(2)) does not support re-linking [2].
This would also not support restarting the download.

[1]: https://man7.org/linux/man-pages/man2/open.2.html
[2]: https://stackoverflow.com/questions/4171713"""

__slots__ = ("fp",
"msg",
"path",
"path_tmp",
"progress")

def __init__(self, path, msg):
self.msg = msg
self.path = path
self.path_tmp = path.with_name("part_" + path.name)
self.progress = None

def close(self):
if (self.progress is not None):
self.progress.done()
close_(self.fp)
self.path.unlink_(missing_ok=True)
self.path_tmp.rename_(self.path)

def start(self, length):
self.progress = Progress(self.msg, "MiB", 2**20, length)
self.fp = self.path.open_("wb")
self.fp = self.path_tmp.open_("wb")

def write(self, data):
self.progress.update(len(data))
Expand Down Expand Up @@ -655,8 +674,10 @@ def init(cli):
log_fp = file_.open_("at")
atexit.register(color_reset, log_fp)
VERBOSE("version: %s" % version.VERSION)
VERBOSE("verbose level: %d (%s))" % (log_level.value,
log_level.name))
VERBOSE("verbose level: %d (%s))" % (log_level.value, log_level.name))
# signal handling
signal.signal(signal.SIGINT, sigterm)
signal.signal(signal.SIGTERM, sigterm)
# storage directory
global storage
storage = fs.Storage(cli.storage)
Expand Down Expand Up @@ -821,6 +842,17 @@ def si_decimal(ct):
ct /= 1000
assert False, "unreachable"

def sigterm(signum, frame):
"Handler for SIGTERM and friends."
# Ignore further signals because we are already cleaning up.
signal.signal(signal.SIGINT, signal.SIG_IGN)
signal.signal(signal.SIGTERM, signal.SIG_IGN)
# Don’t stomp on progress meter if one is being printed.
print()
signame = signal.Signals(signum).name
ERROR("received %s, exiting" % signame)
FATAL("received %s" % signame)

def user():
"Return the current username; exit with error if it can’t be obtained."
try:
Expand Down
20 changes: 20 additions & 0 deletions lib/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,17 @@ def root_env():
def build_large_path(self, name):
return self.build_large // name

def cleanup(self):
"Called during initialization after we know the storage dir is valid."
# Delete partial downloads.
part_ct = 0
for path in self.download_cache.glob("part_*"):
ch.VERBOSE("deleting: %s" % path)
path.unlink_()
part_ct += 1
if (part_ct > 0):
ch.WARNING("deleted %d partially downloaded files" % part_ct)

def fatman_for_download(self, image_ref):
return self.download_cache // ("%s.fat.json" % image_ref.for_path)

Expand Down Expand Up @@ -593,6 +604,7 @@ def init(self):
% (v_found, self.root),
'you can delete and re-initialize with "ch-image reset"')
self.validate_strict()
self.cleanup()

def lock(self):
"""Lock the storage directory. Charliecloud does not at present support
Expand Down Expand Up @@ -655,7 +667,15 @@ def validate_strict(self):
entries.remove(entry)
except KeyError:
ch.FATAL("%s: missing file or directory: %s" % (msg_prefix, entry))
# Ignore some files that may or may not exist.
entries -= { i.name for i in (self.lockfile, self.mount_point) }
# Delete some files that exist only if we crashed.
for i in (self.image_tmp, ):
if (i.name in entries):
ch.WARNING("deleting leftover temporary file/dir: %s" % i.name)
i.rmtree()
entries.remove(i.name)
# If anything is left, yell about it.
if (len(entries) > 0):
ch.FATAL("%s: extraneous file(s): %s"
% (msg_prefix, " ".join(sorted(entries))))
Expand Down
4 changes: 2 additions & 2 deletions lib/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,8 @@ def unpack(self, layer_tars, last_layer=None):
"""Unpack config_json (path to JSON config file) and layer_tars
(sequence of paths to tarballs, with lowest layer first) into the
unpack directory, validating layer contents and dealing with
whiteouts. Empty layers are ignored. The unpack directory must exist,
and either be empty or contain only a single entry called “.git”."""
whiteouts. Empty layers are ignored. The unpack directory must not
exist."""
if (last_layer is None):
last_layer = sys.maxsize
ch.INFO("flattening image")
Expand Down
1 change: 1 addition & 0 deletions lib/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ def bad_key(key):

def unpack(self, last_layer=None):
layer_paths = [self.layer_path(h) for h in self.layer_hashes]
bu.cache.unpack_delete(self.image, missing_ok=True)
self.image.unpack(layer_paths, last_layer)
self.image.metadata_replace(self.config_path)
# Check architecture we got. This is limited because image metadata does
Expand Down