Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

Commit

Permalink
add fast file copy method
Browse files Browse the repository at this point in the history
  • Loading branch information
reidpr committed Oct 19, 2023
1 parent 5000f03 commit ddfbb9d
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 9 deletions.
7 changes: 5 additions & 2 deletions lib/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ def onerror(x):
dst_path.rmtree()
else:
dst_path.unlink_()
ch.copy2(src_path, dst_path, follow_symlinks=False)
src_path.copy(dst_path)

def copy_src_file(self, src, dst):
"""Copy file src to dst. src might be a symlink, but dst is a canonical
Expand All @@ -789,8 +789,11 @@ def copy_src_file(self, src, dst):
assert (not dst.is_symlink())
assert ( (dst.exists() and (dst.is_dir() or dst.is_file()))
or (not dst.exists() and dst.parent.is_dir()))
if (dst.is_dir()):
dst = dst.parent // src.name
src = src.resolve()
ch.DEBUG("copying named file: %s -> %s" % (src, dst))
ch.copy2(src, dst, follow_symlinks=True)
src.copy(dst)

def dest_realpath(self, unpack_path, dst):
"""Return the canonicalized version of path dst within (canonical) image
Expand Down
4 changes: 0 additions & 4 deletions lib/charliecloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,10 +609,6 @@ def color_set(color, fp):
if (fp.isatty()):
print("\033[" + color, end="", flush=True, file=fp)

def copy2(src, dst, **kwargs):
"Wrapper for shutil.copy2() with error checking."
ossafe(shutil.copy2, "can’t copy: %s -> %s" % (src, dst), src, dst, **kwargs)

def dependencies_check():
"""Check more dependencies. If any dependency problems found, here or above
(e.g., lark module checked at import time), then complain and exit."""
Expand Down
72 changes: 70 additions & 2 deletions lib/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@
storage_lock = True


### Functions ###

def copy(src, dst, follow_symlinks=False):
"""Copy file src to dst. Wrapper function providing same signature as
shutil.copy2(). See Path.copy() for lots of gory details. Accepts
follow_symlinks, but the only valid value is False."""
assert (not follow_symlinks)
src.copy(dst)


## Classes ##

class Path(pathlib.PosixPath):
Expand Down Expand Up @@ -187,9 +197,67 @@ def chmod_min(self, st=None):
ch.ossafe(os.chmod, "can’t chmod: %s" % self, self, perms_new)
return (st.st_mode | perms_new)

def copy(self, dst):
"""Copy file myself to dst, including metadata, overwriting dst if it
exists. dst must be the actual destination path, i.e., it may not be
a directory. Does not follow symlinks.
If (a) src is a regular file, (b) src and dst are on the same
filesystem, and (c) Python is version ≥3.8, then use
os.copy_file_range() [1,2], which at a minimum does an in-kernel data
transfer. If that filesystem also (d) supports copy-on-write [3],
then this is a very fast lazy reflink copy.
[1]: https://docs.python.org/3/library/os.html#os.copy_file_range
[2]: https://man7.org/linux/man-pages/man2/copy_file_range.2.html
[3]: https://elixir.bootlin.com/linux/latest/A/ident/remap_file_range
"""
assert (not follow_symlinks)
src_st = self.stat_(follow_symlinks=False)
# dst is not a directory, so parent must be on the same filesystem. We
# *do* want to follow symlinks on the parent.
dst_dev = dst.parent.stat_(follow_symlinks=True).st_dev
if ( stat.S_ISREG(src_st.st_mode)
and src_st.st_dev == dst_dev
and hasattr(os, "copy_file_range")):
# Fast path. The same-filesystem restriction is because reliable
# copy_file_range(2) between filesystems seems quite new (maybe
# kernel 5.18?).
try:
if (dst.exists()):
# If dst is a symlink, we get OLOOP from os.open(). Delete it
# unconditionally though, for simplicity.
dst.unlink()
src_fd = os.open(self, os.O_RDONLY|os.O_NOFOLLOW)
dst_fd = os.open(dst, os.O_WRONLY|os.O_NOFOLLOW|os.O_CREAT)
# I’m not sure why we need to loop this -- there’s no explanation
# of *when* fewer bytes than requested would be copied -- but the
# man page example does.
remaining = src_st.st_size
while (remaining > 0):
copied = os.copy_file_range(src_fd, dst_fd, sys.maxsize)
if (copied == 0):
ch.FATAL("zero bytes copied: %s -> %s" % (self, dst))
remaining -= copied
os.close(src_fd)
os.close(dst_fd)
except OSError as x:
FATAL("can’t copy (fast): %s -> %s: %s" % (self, dst, x.strerror))
else:
# Slow path.
try:
shutil.copyfile(self, dst, follow_symlinks=False)
except OSError as x:
FATAL("can’t copy (slow): %s -> %s: %s" % (self, dst, x.strerror))
try:
# Metadata.
shutil.copystat(self, dst, follow_symlinks=False)
except OSError as x:
FATAL("can’t copy metadata: %s -> %s" % (self, dst, x.strerror))

def copytree(self, *args, **kwargs):
"Wrapper for shutil.copytree() that exits on the first error."
shutil.copytree(str(self), copy_function=ch.copy2, *args, **kwargs)
shutil.copytree(str(self), copy_function=copy, *args, **kwargs)

def disk_bytes(self):
"""Return the number of disk bytes consumed by path. Note this is
Expand Down Expand Up @@ -435,7 +503,7 @@ def stat_(self, links):
follow_symlinks kwarg is absent in pathlib for Python 3.6, which we
want to retain compatibility with."""
return ch.ossafe(os.stat, "can’t stat: %s" % self, self,
follow_symlinks=links)
follow_symlinks=links)

def strip(self, left=0, right=0):
"""Return a copy of myself with n leading components removed. E.g.:
Expand Down
2 changes: 1 addition & 1 deletion lib/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def metadata_replace(self, config_json):
else:
# Copy pulled config file into the image so we still have it.
path = self.metadata_path // "config.pulled.json"
ch.copy2(config_json, path)
config_json.copy(path)
ch.VERBOSE("pulled config path: %s" % path)
self.metadata_merge_from_config(path.json_from_file("config"))
self.metadata_save()
Expand Down

0 comments on commit ddfbb9d

Please sign in to comment.