From cd2448c13ef637b29a378bbcefd0ac3a10bf4b08 Mon Sep 17 00:00:00 2001 From: Andrew Walker Date: Fri, 30 Aug 2024 14:09:13 -0600 Subject: [PATCH] Fix issues with user.update endpoint User may have nologin shell causing the attempt to run cp in a subprocess to fail due to failure to su to the user in question (due to error in pam_open_session()). After review of history for why we were subprocessing out to perform this task, I failed to find a compelling reason why we could not simply copy them without switching the user context. This commit adds a new `copytree` filesystem utility that provides the following features that are relevant to copying home directories. 1. ability to control behavior regarding whether the copy operation will walk into child datasets. Generally this is not desired because we have no design or provisions for creating child datasets within a user home directory. This situation is most likely to arise when users mistakenly set an incorrect path for a home directory, e.g. /mnt/tank/share rather than /mnt/tank/share/wilbur 2. awareness of various ACL types on TrueNAS. We need to rigidly preserve the permissions from the source when writing to a new destination in case the administrator made a configuration mistake like in (1) in order to avoid unintentional data disclosure. 3. awareness of the special .zfs directory. Some users enable snapdir visibility. This means that conventional copy methods can end up attempting to copy the entirety of the .zfs directory to a new path resulting in excessive time spent copying and space usage. 4. ability to use block cloning. This speeds up the actual data copy and reduces the amount of space used on the filesystem when the sysadmin changes a user home directory path. --- .../middlewared/plugins/account.py | 34 +- .../middlewared/plugins/filesystem.py | 10 +- .../pytest/unit/utils/test_copytree.py | 553 ++++++++++++++++ .../pytest/unit/utils/test_statx.py | 8 +- .../middlewared/utils/filesystem/acl.py | 3 + .../middlewared/utils/filesystem/copy.py | 617 ++++++++++++++++++ .../middlewared/utils/filesystem/directory.py | 24 +- .../middlewared/utils/filesystem/stat_x.py | 17 +- .../middlewared/utils/filesystem/utils.py | 13 +- tests/api2/test_user_create_dir.py | 117 ++++ 10 files changed, 1355 insertions(+), 41 deletions(-) create mode 100644 src/middlewared/middlewared/pytest/unit/utils/test_copytree.py create mode 100644 src/middlewared/middlewared/utils/filesystem/copy.py create mode 100644 tests/api2/test_user_create_dir.py diff --git a/src/middlewared/middlewared/plugins/account.py b/src/middlewared/middlewared/plugins/account.py index bc5c96738a748..4a6258506ea38 100644 --- a/src/middlewared/middlewared/plugins/account.py +++ b/src/middlewared/middlewared/plugins/account.py @@ -10,6 +10,7 @@ from pathlib import Path from contextlib import suppress +from dataclasses import asdict from middlewared.api import api_method from middlewared.api.current import * from middlewared.schema import accepts, Bool, Dict, Int, List, Password, Patch, returns, SID, Str @@ -21,6 +22,7 @@ from middlewared.utils import run, filter_list from middlewared.utils.crypto import generate_nt_hash, sha512_crypt from middlewared.utils.directoryservices.constants import DSType, DSStatus +from middlewared.utils.filesystem.copy import copytree, CopyTreeConfig from middlewared.utils.nss import pwd, grp from middlewared.utils.nss.nss_common import NssModule from middlewared.utils.privilege import credential_has_full_admin, privileges_group_mapping @@ -753,18 +755,14 @@ def do_update(self, app, audit_callback, pk, data): # Copy the home directory if it changed home_copy = False home_old = None - if ( - has_home and - 'home' in data and - data['home'] != user['home'] and - not data['home'].startswith(f'{user["home"]}/') - ): - if had_home: - home_copy = True - home_old = user['home'] + if has_home and 'home' in data: if data.get('home_create', False): data['home'] = os.path.join(data['home'], data.get('username') or user['username']) + if had_home and user['home'] != data['home']: + home_copy = True + home_old = user['home'] + # After this point user dict has values from data user.update(data) @@ -1282,32 +1280,30 @@ async def setup_local_administrator(self, app, username, password, options): @private @job(lock=lambda args: f'copy_home_to_{args[1]}') - async def do_home_copy(self, job, home_old, home_new, username, new_mode, uid): - if home_old in DEFAULT_HOME_PATHS: + def do_home_copy(self, job, home_old, home_new, username, new_mode, uid): + if home_old in DEFAULT_HOME_PATH: return + # We need to set permission and strip ACL first before copying files if new_mode is not None: - perm_job = await self.middleware.call('filesystem.setperm', { + perm_job = self.middleware.call_sync('filesystem.setperm', { 'uid': uid, 'path': home_new, 'mode': new_mode, 'options': {'stripacl': True}, }) else: - current_mode = stat.S_IMODE((await self.middleware.call('filesystem.stat', home_old))['mode']) - perm_job = await self.middleware.call('filesystem.setperm', { + current_mode = stat.S_IMODE(self.middleware.call_sync('filesystem.stat', home_old)['mode']) + perm_job = self.middleware.call_sync('filesystem.setperm', { 'uid': uid, 'path': home_new, 'mode': f'{current_mode:03o}', 'options': {'stripacl': True}, }) - await perm_job.wait() + perm_job.wait_sync() - command = f"/bin/cp -a {shlex.quote(home_old) + '/' + '.'} {shlex.quote(home_new + '/')}" - do_copy = await run(["/usr/bin/su", "-", username, "-c", command], check=False) - if do_copy.returncode != 0: - raise CallError(f"Failed to copy homedir [{home_old}] to [{home_new}]: {do_copy.stderr.decode()}") + return asdict(copytree(home_old, home_new, CopyTreeConfig(exist_ok=True, job=job))) @private async def common_validation(self, verrors, data, schema, group_ids, old=None): diff --git a/src/middlewared/middlewared/plugins/filesystem.py b/src/middlewared/middlewared/plugins/filesystem.py index 5c987fbccba00..7795b2dab9632 100644 --- a/src/middlewared/middlewared/plugins/filesystem.py +++ b/src/middlewared/middlewared/plugins/filesystem.py @@ -21,7 +21,7 @@ from middlewared.utils.filesystem.acl import acl_is_present from middlewared.utils.filesystem.constants import FileType from middlewared.utils.filesystem.directory import DirectoryIterator, DirectoryRequestMask -from middlewared.utils.filesystem.utils import timespec_convert +from middlewared.utils.filesystem.utils import timespec_convert_float from middlewared.utils.mount import getmntinfo from middlewared.utils.nss import pwd, grp from middlewared.utils.path import FSLocation, path_location, is_child_realpath @@ -454,10 +454,10 @@ def stat(self, _path): 'mode': st['st'].stx_mode, 'uid': st['st'].stx_uid, 'gid': st['st'].stx_gid, - 'atime': timespec_convert(st['st'].stx_atime), - 'mtime': timespec_convert(st['st'].stx_mtime), - 'ctime': timespec_convert(st['st'].stx_ctime), - 'btime': timespec_convert(st['st'].stx_btime), + 'atime': timespec_convert_float(st['st'].stx_atime), + 'mtime': timespec_convert_float(st['st'].stx_mtime), + 'ctime': timespec_convert_float(st['st'].stx_ctime), + 'btime': timespec_convert_float(st['st'].stx_btime), 'mount_id': st['st'].stx_mnt_id, 'dev': os.makedev(st['st'].stx_dev_major, st['st'].stx_dev_minor), 'inode': st['st'].stx_ino, diff --git a/src/middlewared/middlewared/pytest/unit/utils/test_copytree.py b/src/middlewared/middlewared/pytest/unit/utils/test_copytree.py new file mode 100644 index 0000000000000..433038a3a9411 --- /dev/null +++ b/src/middlewared/middlewared/pytest/unit/utils/test_copytree.py @@ -0,0 +1,553 @@ +import errno +import gc +import os +import pytest +import random +import stat +import subprocess + +from middlewared.utils.filesystem import copy +from operator import eq, ne +from unittest.mock import Mock, patch + +TEST_FILE_DATASZ = 128 * 1024 +TEST_XATTR_DATASZ = 1024 + +TEST_FILES = [ + ('testfile1', random.randbytes(TEST_FILE_DATASZ)), + ('testfile2', random.randbytes(TEST_FILE_DATASZ)), + ('canary', random.randbytes(TEST_FILE_DATASZ)), + ('1234_bob', random.randbytes(TEST_FILE_DATASZ)) +] + +TEST_FILE_XATTRS = [ + ('user.filexat1', random.randbytes(TEST_XATTR_DATASZ)), + ('user.filexat2', random.randbytes(TEST_XATTR_DATASZ)), + ('user.filexat3', random.randbytes(TEST_XATTR_DATASZ)), +] + +TEST_DIRS = [ + 'testdir1', + 'testdir2', + '1234_larry' +] + +TEST_DIR_XATTRS = [ + ('user.dirxat1', random.randbytes(TEST_XATTR_DATASZ)), + ('user.dirxat2', random.randbytes(TEST_XATTR_DATASZ)), + ('user.dirxat3', random.randbytes(TEST_XATTR_DATASZ)), +] + +JENNY = 8675309 + + +class Job: + log = [] + progress = 0 + + def set_progress(self, progress: int, msg: str): + self.progress = progress + self.log.append(msg) + + +def create_test_files(target: str, symlink_target_path: str) -> None: + for filename, data in TEST_FILES: + path = os.path.join(target, filename) + with open(path, 'wb') as f: + f.write(data) + os.fchmod(f.fileno(), 0o666) + os.fchown(f.fileno(), JENNY, JENNY + 1) + f.flush() + + for xat_name, xat_data in TEST_FILE_XATTRS: + os.setxattr(path, xat_name, xat_data) + + # symlink target outside of dirs to be copied around + sl = f'{filename}_sl' + os.symlink(symlink_target_path, os.path.join(target, sl)) + + # this needs to be last op on file to avoid having other + # changes affect atime / mtime + os.utime(path, ns=(JENNY + 1, JENNY + 2)) + + +def create_test_data(target: str, symlink_target_path) -> None: + """ generate test data in randomized temporary directory + + Basic tree of files and directories including some symlinks + """ + os.mkdir(os.path.join(target, 'SOURCE')) + create_test_files(os.path.join(target, 'SOURCE'), symlink_target_path) + + for dirname in TEST_DIRS: + path = os.path.join(target, 'SOURCE', dirname) + os.mkdir(path) + os.chmod(path, 0o777) + os.chown(path, JENNY, JENNY) + + for xat_name, xat_data in TEST_DIR_XATTRS: + os.setxattr(path, xat_name, xat_data) + + # force atime and mtime to some value other than + # current timestamp + os.utime(path, ns=(JENNY + 3, JENNY + 4)) + + # symlink target outside of dirs to be copied around + sl = f'{dirname}_sl' + os.symlink(symlink_target_path, os.path.join(path, sl)) + + # create separate symlink dir for our test files + # _outside_ SOURCE + os.mkdir(os.path.join(target, dirname)) + create_test_files(path, os.path.join(target, dirname)) + os.utime(path, ns=(JENNY + 3, JENNY + 4)) + + +@pytest.fixture(scope="function") +def directory_for_test(tmpdir): + """ generate test data in randomized temporary directory + + Basic tree of files and directories including some symlinks + """ + create_test_data(tmpdir, tmpdir) + return tmpdir + + +def get_fd_count() -> int: + # Make sure we free up any dangling files waiting for garbage + # collection before we get authoritative count for this module + gc.collect() + return len(os.listdir('/proc/self/fd')) + + +@pytest.fixture(scope="module") +def fd_count() -> int: + return get_fd_count() + + +def validate_attributes( + src: str, + dst: str, + flags: copy.CopyFlags +) -> None: + st_src = os.lstat(src) + st_dst = os.lstat(dst) + + assert st_src.st_size == st_dst.st_size + + match (file_type := stat.S_IFMT(st_src.st_mode)): + case stat.S_IFREG | stat.S_IFDIR: + pass + # validate we set owner / group when requested + op = eq if flags & copy.CopyFlags.OWNER else ne + assert op(st_src.st_uid, st_dst.st_uid) + assert op(st_src.st_gid, st_dst.st_gid) + + # validate we preserve file mode when requested + op = eq if flags & copy.CopyFlags.PERMISSIONS else ne + assert op(st_src.st_mode, st_dst.st_mode) + + # validate we preserve timestamps when requested + op = eq if flags & copy.CopyFlags.TIMESTAMPS else ne + + # checking mtime is sufficient. Atime in test runner + # is enabled and so it will get reset on source when + # we're copying data around. + assert op(st_src.st_mtime_ns, st_dst.st_mtime_ns) + case stat.S_IFLNK: + src_tgt = os.readlink(src) + dst_tgt = os.readlink(dst) + assert eq(src_tgt, dst_tgt) + return + case _: + raise ValueError(f'{src}: unexpected file type: {file_type}') + + # validate we set owner / group when requested + op = eq if flags & copy.CopyFlags.OWNER else ne + assert op(st_src.st_uid, st_dst.st_uid) + assert op(st_src.st_gid, st_dst.st_gid) + + # validate we preserve file mode when requested + op = eq if flags & copy.CopyFlags.PERMISSIONS else ne + assert op(st_src.st_mode, st_dst.st_mode) + + # validate we preserve timestamps when requested + # NOTE: futimens on linux only allows setting atime + mtime + op = eq if flags & copy.CopyFlags.TIMESTAMPS else ne + assert op(st_src.st_mtime_ns, st_dst.st_mtime_ns) + + +def validate_xattrs( + src: str, + dst: str, + flags: copy.CopyFlags +) -> None: + if stat.S_ISLNK(os.lstat(src).st_mode): + # Nothing to do since we don't follow symlinks + return + + xat_src = os.listxattr(src) + xat_dst = os.listxattr(dst) + + if flags & copy.CopyFlags.XATTRS: + assert len(xat_src) > 0 + assert len(xat_dst) > 0 + assert xat_src == xat_dst + + for xat_name in xat_src: + xat_data_src = os.getxattr(src, xat_name) + xat_data_dst = os.getxattr(dst, xat_name) + + assert len(xat_data_src) > 0 + + assert xat_data_src == xat_data_dst + + else: + assert len(xat_src) > 0 + assert len(xat_dst) == 0 + + +def validate_data( + src: str, + dst: str, + flags: copy.CopyFlags +) -> None: + match (file_type := stat.S_IFMT(os.lstat(src).st_mode)): + case stat.S_IFLNK: + # readlink performed in validate_attributes + return + + case stat.S_IFDIR: + assert os.listdir(src) == os.listdir(dst) + return + + case stat.S_IFREG: + # validation performed below + pass + + case _: + raise ValueError(f'{src}: unexpected file type: {file_type}') + + with open(src, 'rb') as f: + src_data = f.read() + + with open(dst, 'rb') as f: + dst_data = f.read() + + assert src_data == dst_data + + +def validate_the_things( + src: str, + dst: str, + flags: copy.CopyFlags +) -> None: + for fn in (validate_data, validate_xattrs, validate_attributes): + fn(src, dst, flags) + + +def validate_copy_tree( + src: str, + dst: str, + + flags: copy.CopyFlags +): + with os.scandir(src) as it: + for f in it: + if f.name == 'CHILD': + # skip validation of bind mountpoint + continue + + new_src = os.path.join(src, f.name) + new_dst = os.path.join(dst, f.name) + validate_the_things(new_src, new_dst, flags) + if f.is_dir() and not f.is_symlink(): + validate_copy_tree(new_src, new_dst, flags) + + +def test__copytree_default(directory_for_test, fd_count): + """ test basic behavior of copytree """ + + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + config = copy.CopyTreeConfig() + + assert config.flags == copy.DEF_CP_FLAGS + + stats = copy.copytree(src, dst, config) + + validate_copy_tree(src, dst, config.flags) + assert stats.files != 0 + assert stats.dirs != 0 + assert stats.symlinks != 0 + + assert get_fd_count() == fd_count + + +@pytest.mark.parametrize('is_ctldir', [True, False]) +def test__copytree_exclude_ctldir(directory_for_test, fd_count, is_ctldir): + """ test that we do not recurse into ZFS ctldir """ + + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + + snapdir = os.path.join(src, '.zfs', 'snapshot', 'now') + os.makedirs(snapdir) + with open(os.path.join(snapdir, 'canary'), 'w') as f: + pass + + if is_ctldir: + # Mock over method to determine whether path is in actual .zfs + with patch( + 'middlewared.utils.filesystem.copy.path_in_ctldir', Mock( + return_value=True + ) + ): + copy.copytree(src, dst, copy.CopyTreeConfig()) + + # We should automatically exclude a real .zfs directory + assert not os.path.exists(os.path.join(dst, '.zfs')) + else: + # This .zfs directory does not have special inode number + # and so we know we can copy it. + copy.copytree(src, dst, copy.CopyTreeConfig()) + assert os.path.exists(os.path.join(dst, '.zfs')) + + +@pytest.mark.parametrize('existok', [True, False]) +def test__copytree_existok(directory_for_test, fd_count, existok): + """ test behavior of `exist_ok` configuration option """ + + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + config = copy.CopyTreeConfig(exist_ok=existok) + os.mkdir(dst) + + if existok: + copy.copytree(src, dst, config) + validate_copy_tree(src, dst, config.flags) + + else: + with pytest.raises(FileExistsError): + copy.copytree(src, dst, config) + + assert get_fd_count() == fd_count + + +@pytest.mark.parametrize('flag', [ + copy.CopyFlags.XATTRS, + copy.CopyFlags.PERMISSIONS, + copy.CopyFlags.TIMESTAMPS, + copy.CopyFlags.OWNER +]) +def test__copytree_flags(directory_for_test, fd_count, flag): + """ + copytree allows user to specify what types of metadata to + preserve on copy similar to robocopy on Windows. This tests + that setting individual flags results in copy of _only_ + the specified metadata. + """ + + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + copy.copytree(src, dst, copy.CopyTreeConfig(flags=flag)) + + validate_copy_tree(src, dst, flag) + + assert get_fd_count() == fd_count + + +def test__force_userspace_copy(directory_for_test, fd_count): + """ force use of shutil.copyfileobj wrapper instead of copy_file_range """ + + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + flags = copy.DEF_CP_FLAGS + + copy.copytree(src, dst, copy.CopyTreeConfig(flags=flags, op=copy.CopyTreeOp.USERSPACE)) + + validate_copy_tree(src, dst, flags) + + assert get_fd_count() == fd_count + + +def test__copytree_into_itself_simple(directory_for_test, fd_count): + """ perform a basic copy of a tree into a subdirectory of itself. + This simulates case where user has mistakenly set homedir to FOO + and performs an update of homedir to switch it to FOO/username. + + If logic breaks then we'll end up with this test failing due to + infinite recursion. + """ + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'SOURCE', 'DEST') + + copy.copytree(src, dst, copy.CopyTreeConfig()) + + assert not os.path.exists(os.path.join(directory_for_test, 'SOURCE', 'DEST', 'DEST')) + + assert get_fd_count() == fd_count + + +def test__copytree_into_itself_complex(directory_for_test, fd_count): + """ check recursion guard against deeper nested target """ + + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'SOURCE', 'FOO', 'BAR', 'DEST') + + os.makedirs(os.path.join(directory_for_test, 'SOURCE', 'FOO', 'BAR')) + + copy.copytree(src, dst, copy.CopyTreeConfig()) + + # we expect to copy everything up to the point where we'd start infinite + # recursion + assert os.path.exists(os.path.join(dst, 'FOO', 'BAR')) + + # but not quite get there + assert not os.path.exists(os.path.join(dst, 'FOO', 'BAR', 'DEST')) + + assert get_fd_count() == fd_count + + +def test__copytree_job_log(directory_for_test, fd_count): + """ check that providing job object causes progress to be written properly """ + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + job = Job() + + config = copy.CopyTreeConfig(job=job, job_msg_inc=1) + copy.copytree(src, dst, config) + + assert job.progress == 100 + assert len(job.log) > 0 + last = job.log[-1] + + assert last.startswith('Successfully copied') + + +def test__copytree_job_log_prefix(directory_for_test, fd_count): + """ check that log message prefix gets written as expected """ + src = os.path.join(directory_for_test, 'SOURCE') + dst = os.path.join(directory_for_test, 'DEST') + job = Job() + + config = copy.CopyTreeConfig(job=job, job_msg_inc=1, job_msg_prefix='Canary: ') + copy.copytree(src, dst, config) + + assert job.progress == 100 + assert len(job.log) > 0 + last = job.log[-1] + + assert last.startswith('Canary: Successfully copied') + + +def test__clone_file_somewhat_large(tmpdir): + + src_fd = os.open(os.path.join(tmpdir, 'test_large_clone_src'), os.O_CREAT | os.O_RDWR) + dst_fd = os.open(os.path.join(tmpdir, 'test_large_clone_dst'), os.O_CREAT | os.O_RDWR) + chunk_sz = 1024 ** 2 + + try: + for i in range(0, 128): + payload = random.randbytes(chunk_sz) + os.pwrite(src_fd, payload, i * chunk_sz) + + copy.clone_file(src_fd, dst_fd) + + for i in range(0, 128): + src = os.pread(src_fd, chunk_sz, i * chunk_sz) + dst = os.pread(dst_fd, chunk_sz, i * chunk_sz) + assert src == dst + + finally: + os.close(src_fd) + os.close(dst_fd) + os.unlink(os.path.join(tmpdir, 'test_large_clone_src')) + os.unlink(os.path.join(tmpdir, 'test_large_clone_dst')) + + +def test__copy_default_fallthrough(tmpdir): + """ verify we can fallthrough from CLONE to USERSPACE """ + src_fd = os.open(os.path.join(tmpdir, 'test_default_fallthrough_src'), os.O_CREAT | os.O_RDWR) + dst_fd = os.open(os.path.join(tmpdir, 'test_default_fallthrough_dst'), os.O_CREAT | os.O_RDWR) + chunk_sz = 1024 ** 2 + + try: + for i in range(0, 128): + payload = random.randbytes(chunk_sz) + os.pwrite(src_fd, payload, i * chunk_sz) + + # return value of 0 triggers fallthrough code + with patch('os.sendfile', Mock(return_value=0)): + + # raising EXDEV triggers clone fallthrough + with patch('middlewared.utils.filesystem.copy.clone_file', Mock(side_effect=OSError(errno.EXDEV, 'MOCK'))): + copy.clone_or_copy_file(src_fd, dst_fd) + + for i in range(0, 128): + src = os.pread(src_fd, chunk_sz, i * chunk_sz) + dst = os.pread(dst_fd, chunk_sz, i * chunk_sz) + assert src == dst + + finally: + os.close(src_fd) + os.close(dst_fd) + os.unlink(os.path.join(tmpdir, 'test_default_fallthrough_src')) + os.unlink(os.path.join(tmpdir, 'test_default_fallthrough_dst')) + + +def test__copy_sendfile_fallthrough(tmpdir): + """ verify that fallthrough to userspace copy from copy_sendfile works """ + src_fd = os.open(os.path.join(tmpdir, 'test_sendfile_fallthrough_src'), os.O_CREAT | os.O_RDWR) + dst_fd = os.open(os.path.join(tmpdir, 'test_sendfile_fallthrough_dst'), os.O_CREAT | os.O_RDWR) + chunk_sz = 1024 ** 2 + + try: + for i in range(0, 128): + payload = random.randbytes(chunk_sz) + os.pwrite(src_fd, payload, i * chunk_sz) + + # return value of 0 triggers fallthrough code + with patch('os.sendfile', Mock(return_value=0)): + copy.copy_sendfile(src_fd, dst_fd) + + for i in range(0, 128): + src = os.pread(src_fd, chunk_sz, i * chunk_sz) + dst = os.pread(dst_fd, chunk_sz, i * chunk_sz) + assert src == dst + + finally: + os.close(src_fd) + os.close(dst_fd) + os.unlink(os.path.join(tmpdir, 'test_sendfile_fallthrough_src')) + os.unlink(os.path.join(tmpdir, 'test_sendfile_fallthrough_dst')) + + +def test__copy_sendfile(tmpdir): + """ verify that copy.sendfile preserves file data and does not by default fallthrogh to userspace """ + src_fd = os.open(os.path.join(tmpdir, 'test_large_sendfile_src'), os.O_CREAT | os.O_RDWR) + dst_fd = os.open(os.path.join(tmpdir, 'test_large_sendfile_dst'), os.O_CREAT | os.O_RDWR) + chunk_sz = 1024 ** 2 + + try: + for i in range(0, 128): + payload = random.randbytes(chunk_sz) + os.pwrite(src_fd, payload, i * chunk_sz) + + with patch( + 'middlewared.utils.filesystem.copy.copy_file_userspace', Mock( + side_effect=Exception('Unexpected fallthrough to copy_userspace') + ) + ): + copy.copy_sendfile(src_fd, dst_fd) + + for i in range(0, 128): + src = os.pread(src_fd, chunk_sz, i * chunk_sz) + dst = os.pread(dst_fd, chunk_sz, i * chunk_sz) + assert src == dst + + finally: + os.close(src_fd) + os.close(dst_fd) + os.unlink(os.path.join(tmpdir, 'test_large_sendfile_src')) + os.unlink(os.path.join(tmpdir, 'test_large_sendfile_dst')) diff --git a/src/middlewared/middlewared/pytest/unit/utils/test_statx.py b/src/middlewared/middlewared/pytest/unit/utils/test_statx.py index b8d1ca2788682..d191caba86972 100644 --- a/src/middlewared/middlewared/pytest/unit/utils/test_statx.py +++ b/src/middlewared/middlewared/pytest/unit/utils/test_statx.py @@ -3,7 +3,7 @@ import stat from middlewared.utils.filesystem import stat_x as sx -from middlewared.utils.filesystem.utils import timespec_convert +from middlewared.utils.filesystem.utils import timespec_convert_float BASIC_STAT_ATTRS = [ @@ -39,11 +39,11 @@ def validate_stat(stat_prop, st1, st2): case 'GID': assert st1.st_gid == st2.stx_gid case 'ATIME': - assert st1.st_atime == timespec_convert(st2.stx_atime) + assert st1.st_atime == timespec_convert_float(st2.stx_atime) case 'MTIME': - assert st1.st_mtime == timespec_convert(st2.stx_mtime) + assert st1.st_mtime == timespec_convert_float(st2.stx_mtime) case 'CTIME': - assert st1.st_ctime == timespec_convert(st2.stx_ctime) + assert st1.st_ctime == timespec_convert_float(st2.stx_ctime) case 'INO': assert st1.st_ino == st2.stx_ino case 'DEV': diff --git a/src/middlewared/middlewared/utils/filesystem/acl.py b/src/middlewared/middlewared/utils/filesystem/acl.py index a9b5dbc2c9dac..f43def7561fa7 100644 --- a/src/middlewared/middlewared/utils/filesystem/acl.py +++ b/src/middlewared/middlewared/utils/filesystem/acl.py @@ -9,6 +9,9 @@ class ACLXattr(enum.Enum): ACL_XATTRS = set([xat.value for xat in ACLXattr]) +# ACCESS_ACL_XATTRS is set of ACLs that control access to the file itself. +ACCESS_ACL_XATTRS = set([ACLXattr.POSIX_ACCESS.value, ACLXattr.ZFS_NATIVE.value]) + def acl_is_present(xat_list: list) -> bool: """ diff --git a/src/middlewared/middlewared/utils/filesystem/copy.py b/src/middlewared/middlewared/utils/filesystem/copy.py new file mode 100644 index 0000000000000..d0b201dca93a4 --- /dev/null +++ b/src/middlewared/middlewared/utils/filesystem/copy.py @@ -0,0 +1,617 @@ +# Various utilities related to copying / cloning files and file tree +# test coverage provided by pytest/unit/utils/test_copytree.py + +import enum +import os + +from dataclasses import dataclass +from errno import EXDEV +from middlewared.job import Job +from os import open as posix_open +from os import ( + close, + copy_file_range, + fchmod, + fchown, + fstat, + getxattr, + lseek, + makedev, + mkdir, + path, + readlink, + sendfile, + setxattr, + stat_result, + symlink, + utime, + O_CREAT, + O_DIRECTORY, + O_EXCL, + O_NOFOLLOW, + O_RDONLY, + O_RDWR, + O_TRUNC, + SEEK_CUR, +) +from shutil import copyfileobj +from stat import S_IMODE +from .acl import ACCESS_ACL_XATTRS, ACL_XATTRS +from .directory import ( + dirent_struct, + DirectoryIterator, + DirectoryRequestMask, +) +from .stat_x import StatxEtype +from .utils import path_in_ctldir, timespec_convert_int + +CLONETREE_ROOT_DEPTH = 0 +MAX_RW_SZ = 2147483647 & ~4096 # maximum size of read/write in kernel + + +class CopyFlags(enum.IntFlag): + """ Flags specifying which metadata to copy from source to destination """ + XATTRS = 0x0001 # copy user, trusted, security namespace xattrs + PERMISSIONS = 0x0002 # copy ACL xattrs + TIMESTAMPS = 0x0004 # copy ACL timestamps + OWNER = 0x0008 + + +class CopyTreeOp(enum.Enum): + """ + Available options for customizing the method by which files are copied. DEFAULT + is generally the best option (prefer to do a block clone and use zero-copy method + otherwise). + + USERSPACE should be used for certain types of special filesystems such as procfs + or sysfs that may not properly support copy_file_range or sendfile. + """ + DEFAULT = enum.auto() # try clone and fallthrough eventually to userspace + CLONE = enum.auto() # attempt to block clone and if that fails, fail operation + SENDFILE = enum.auto() # attempt sendfile (with fallthrough to copyfileobj) + USERSPACE = enum.auto() # same as shutil.copyfileobj + + +DEF_CP_FLAGS = CopyFlags.XATTRS | CopyFlags.PERMISSIONS | CopyFlags.OWNER | CopyFlags.TIMESTAMPS + + +@dataclass(frozen=True, slots=True) +class CopyTreeConfig: + """ + Configuration for copytree() operation. + + job: middleware Job object. This is optional and may be passed if the API user + wants to report via job.set_progress + + job_msg_prefix: prefix for progress messages + + job_msg_inc: call set_progress every N files + dirs copied + + raise_error: raise exceptions on metadata copy failures + + exist_ok: do not raise an exception if a file or directory already exists + + traverse: recurse into child datasets + + op: copy tree operation that will be performed (see CopyTreeOp class) + + flags: bitmask of metadata to preserve as part of copy + """ + job: Job | None = None + job_msg_prefix: str = '' + job_msg_inc: int = 1000 + raise_error: bool = True + exist_ok: bool = True + traverse: bool = False + op: CopyTreeOp = CopyTreeOp.DEFAULT + flags: CopyFlags = DEF_CP_FLAGS # flags specifying which metadata to copy + + +@dataclass(slots=True) +class CopyTreeStats: + dirs: int = 0 + files: int = 0 + symlinks: int = 0 + bytes: int = 0 + + +def _copytree_conf_to_dir_request_mask(config: CopyTreeConfig) -> DirectoryRequestMask: + """ internal method to convert CopyTreeConfig to a DirectoryRequestMask """ + mask_out = 0 + if config.flags.value & CopyFlags.XATTRS.value: + mask_out |= DirectoryRequestMask.XATTRS + + if config.flags.value & CopyFlags.PERMISSIONS.value: + # XATTR list is required for getting preserving ACLs + mask_out |= DirectoryRequestMask.ACL | DirectoryRequestMask.XATTRS + + return mask_out + + +def copy_permissions(src_fd: int, dst_fd: int, xattr_list: list[str], mode: int) -> None: + """ Copy permissions from one file to another. + + Params: + src_fd: source file + dst_fd: destination file + xattr_list: list of all xattrs on src_fd + mode: POSIX mode of src_fd + + Returns: + None + + Raises: + PermissionError: was forced to try to fchmod to set permissions, but destination already + inherited an ACL and has a RESTRICTED ZFS aclmode. + + OSError - EOPNOTSUPP: ACL type mismatch between src_fd and dst_fd + OSError: various errnos for reasons specified in syscall manpages for fgetxattr, + fsetxattr, and fchmod + + NOTE: If source file has an ACL containing permissions then fchmod will not be attempted. + """ + if not (access_xattrs := set(xattr_list) & ACCESS_ACL_XATTRS): + # There are no ACLs that encode permissions for _this_ file and so we must use mode + + # NOTE: fchmod will raise PermissionError if ZFS dataset aclmode is RESTRICTED + # and if the dst_fd inherited an ACL from parent. + fchmod(dst_fd, S_IMODE(mode)) + return + + for xat_name in access_xattrs: + xat_buf = getxattr(src_fd, xat_name) + setxattr(dst_fd, xat_name, xat_buf) + + +def copy_xattrs(src_fd: int, dst_fd: int, xattr_list: list[str]) -> None: + """ copy xattrs that aren't for ACLs + + Params: + src_fd: source file + dst_fd: destination file + xattr_list: list of all xattrs on src_fd + + Returns: + None + + Raises: + OSError - EOPNOTSUPP: xattr support disabled on the destination filesystem. + OSError: various errnos for reasons specified in xattr syscall manpages + """ + for xat_name in set(xattr_list) - ACL_XATTRS: + if xat_name.startswith('system'): + # system xattrs typically denote filesystem-specific xattr handlers that + # may not be applicable to file copies. For now we will skip them silently. + continue + + xat_buf = getxattr(src_fd, xat_name) + setxattr(dst_fd, xat_name, xat_buf) + + +def copy_file_userspace(src_fd: int, dst_fd: int) -> None: + """ wrapper around copyfilobj that uses file descriptors + + params: + src_fd: source file + dst_fd: destination file + + Returns: + int: bytes written + + Raises: + Same exceptions as shutil.copyfileobj + OSError: errno will be set to one of the values specified in + the manpage for ile_range() + """ + src = open(src_fd, 'rb', closefd=False) + dst = open(dst_fd, 'wb', closefd=False) + copyfileobj(src, dst) + + # TODO: have better method of getting bytes written than fstat on destination. + return fstat(dst_fd).st_size + + +def copy_sendfile(src_fd: int, dst_fd: int) -> None: + """ Optimized copy of file. First try sendfile and if that fails + perform userspace copy of file. + + params: + src_fd: source file + dst_fd: destination file + + Returns: + int: bytes written + + Raises: + OSError: errno will be set to one of the values specified in + the manpage for sendfile() + """ + offset = 0 + + while (sent := sendfile(dst_fd, src_fd, offset, MAX_RW_SZ)) > 0: + offset += sent + + if offset == 0 and lseek(dst_fd, 0, SEEK_CUR) == 0: + # maintain fallback code from _fastcopy_sendfile + return copy_file_userspace(src_fd, dst_fd) + + return offset + + +def clone_file(src_fd: int, dst_fd: int) -> None: + """ block cloning is implemented via copy_file_range + + params: + src_fd: source file + dst_fd: destination file + + Returns: + int: bytes written + + Raises: + OSError: EXDEV (zfs) source and destination are on different pools. + OSError: EXDEV (non-zfs) source and destination are on filesystems. + OSError: errno will be set to one of the values specified in + the manpage for copy_file_range() + """ + offset = 0 + + # loop until copy_file_range returns 0 catch any possible TOCTOU issues + # that may arrive if data added after initial statx call. + while (copied := copy_file_range( + src_fd, dst_fd, + MAX_RW_SZ, + offset_src=offset, + offset_dst=offset + )) > 0: + offset += copied + + return offset + + +def clone_or_copy_file(src_fd: int, dst_fd: int) -> None: + """ try to clone file via copy_file_range and if fails fall back to + shutil.copyfileobj + + params: + src_fd: source file + dst_fd: destination file + + Returns: + int: bytes written + + Raises: + OSError + """ + try: + return clone_file(src_fd, dst_fd) + except OSError as err: + if err.errno == EXDEV: + # different pool / non-zfs + return copy_sendfile(src_fd, dst_fd) + + # Other error + raise + + +def _do_mkfile( + src: dirent_struct, + src_fd: int, + dst_fd: int, + config: CopyTreeConfig, + stats: CopyTreeStats, + c_fn: callable +) -> None: + """ Perform copy / clone of file, possibly preserving metadata. + + Params: + src: direct_struct of parent directory of the src_fd + src_fd: handle of file being copied + dst_fd: handle of target file + config: configuration of the copy operation + stats: counters to be update with bytes written + c_fn: the copy / clone function to use for writing data to the destination + + Returns: + None + + Raises: + OSError + PermissionError + + NOTE: this is an internal method that should only be called from within copytree. + """ + if config.flags.value & CopyFlags.PERMISSIONS.value: + try: + copy_permissions(src_fd, dst_fd, src.xattrs, src.stat.stx_mode) + except Exception: + if config.raise_error: + raise + + if config.flags.value & CopyFlags.XATTRS.value: + try: + copy_xattrs(src_fd, dst_fd, src.xattrs) + except Exception: + if config.raise_error: + raise + + if config.flags.value & CopyFlags.OWNER.value: + fchown(dst_fd, src.stat.stx_uid, src.stat.stx_gid) + + stats.bytes += c_fn(src_fd, dst_fd) + + # We need to write timestamps after file data to ensure reset atime / mtime + if config.flags.value & CopyFlags.TIMESTAMPS.value: + ns_ts = ( + timespec_convert_int(src.stat.stx_atime), + timespec_convert_int(src.stat.stx_mtime) + ) + try: + utime(dst_fd, ns=ns_ts) + except Exception: + if config.raise_error: + raise + + +def _do_mkdir( + src: dirent_struct, + src_fd: int, + dst_dir_fd: int, + config: CopyTreeConfig +) -> int: + """ Internal method to mkdir and set its permissions and xattrs + + Params: + src: direct_struct of parent directory of the src_fd + src_fd: handle of file being copied + dst_fd: handle of target file + config: configuration of the copy operation + c_fn: the copy / clone function to use for writing data to the destination + + Returns: + file descriptor + + Raises: + OSError + + NOTE: this is an internal method that should only be called from within copytree. + """ + try: + mkdir(src.name, dir_fd=dst_dir_fd) + except FileExistsError: + if not config.exist_ok: + raise + + new_dir_hdl = posix_open(src.name, O_DIRECTORY, dir_fd=dst_dir_fd) + try: + if config.flags.value & CopyFlags.PERMISSIONS.value: + copy_permissions(src_fd, new_dir_hdl, src.xattrs, src.stat.stx_mode) + + if config.flags.value & CopyFlags.XATTRS.value: + copy_xattrs(src_fd, new_dir_hdl, src.xattrs) + + if config.flags.value & CopyFlags.OWNER.value: + fchown(new_dir_hdl, src.stat.stx_uid, src.stat.stx_gid) + + except Exception: + if config.raise_error: + close(new_dir_hdl) + raise + + return new_dir_hdl + + +def _copytree_impl( + d_iter: DirectoryIterator, + dst_str: str, + dst_fd: int, + depth: int, + config: CopyTreeConfig, + target_st: stat_result, + stats: CopyTreeStats +): + """ internal implementation of our copytree method + + NOTE: this method is called recursively for each directory to walk down tree. + This means additional O_DIRECTORY open for duration of life of each DirectoryIterator + object (closed when DirectoryIterator context manager exits). + + Params: + d_iter: directory iterator for current directory + dst_str: target directory of copy + dst_fd: open file handle for target directory + depth: current depth in src directory tree + config: CopyTreeConfig - used to determine what to copy + target_st: stat_result of target directory for initial copy. This is used + to provide device + inode number so that we can avoid copying destination into + itself. + + Returns: + None + + Raises: + OSError + PermissionError + """ + + match config.op: + case CopyTreeOp.DEFAULT: + c_fn = clone_or_copy_file + case CopyTreeOp.CLONE: + c_fn = clone_file + case CopyTreeOp.SENDFILE: + c_fn = copy_sendfile + case CopyTreeOp.USERSPACE: + c_fn = copy_file_userspace + case _: + raise ValueError(f'{config.op}: unexpected copy operation') + + for entry in d_iter: + # We match on `etype` key because our statx wrapper will initially lstat a file + # and if it's a symlink, perform a stat call to get information from symlink target + # This means that S_ISLNK on mode will fail to detect whether it's a symlink. + match entry.etype: + case StatxEtype.DIRECTORY.name: + if not config.traverse: + if entry.stat.stx_mnt_id != d_iter.stat.stx_mnt_id: + # traversal is disabled and entry is in different filesystem + # continue here prevents entering the directory / filesystem + continue + + if entry.name == '.zfs': + # User may have visible snapdir. We definitely don't want to try to copy this + # path_in_ctldir checks inode number to verify it's not reserved number for + # these special paths (definitive indication it's ctldir as opposed to random + # dir user named '.zfs') + if path_in_ctldir(entry.path): + continue + + if entry.stat.stx_ino == target_st.st_ino: + # We use makedev / dev_t in this case to catch potential edge cases where bind mount + # in path (since bind mounts of same filesystem will have same st_dev, but different + # stx_mnt_id. + if makedev(entry.stat.stx_dev_major, entry.stat.stx_dev_minor) == target_st.st_dev: + continue + + # This can fail with OSError and errno set to ELOOP if target was maliciously + # replaced with symlink between our first stat and the open call + entry_fd = posix_open(entry.name, O_DIRECTORY | O_NOFOLLOW, dir_fd=d_iter.dir_fd) + try: + new_dst_fd = _do_mkdir(entry, entry_fd, dst_fd, config) + except Exception: + close(entry_fd) + raise + + # We made directory on destination and copied metadata for it, and so we're safe + # to recurse into it in source and continue our operation. + try: + with DirectoryIterator( + entry.name, + request_mask=d_iter.request_mask, + dir_fd=d_iter.dir_fd, + as_dict=False + ) as c_iter: + _copytree_impl( + c_iter, + path.join(dst_str, entry.name), + new_dst_fd, + depth + 1, + config, + target_st, + stats + ) + + if config.flags.value & CopyFlags.TIMESTAMPS.value: + ns_ts = ( + timespec_convert_int(entry.stat.stx_atime), + timespec_convert_int(entry.stat.stx_mtime) + ) + try: + utime(new_dst_fd, ns=ns_ts) + except Exception: + if config.raise_error: + raise + + finally: + close(new_dst_fd) + close(entry_fd) + + stats.dirs += 1 + + case StatxEtype.FILE.name: + entry_fd = posix_open(entry.name, O_RDONLY | O_NOFOLLOW, dir_fd=d_iter.dir_fd) + try: + flags = O_RDWR | O_NOFOLLOW | O_CREAT | O_TRUNC + if not config.exist_ok: + flags |= O_EXCL + + dst = posix_open(entry.name, flags, dir_fd=dst_fd) + try: + _do_mkfile(entry, entry_fd, dst, config, stats, c_fn) + finally: + close(dst) + finally: + close(entry_fd) + + stats.files += 1 + + case StatxEtype.SYMLINK.name: + stats.symlinks += 1 + dst = readlink(entry.name, dir_fd=d_iter.dir_fd) + try: + symlink(dst, entry.name, dir_fd=dst_fd) + except FileExistsError: + if not config.exist_ok: + raise + + continue + + case _: + continue + + if config.job and ((stats.dirs + stats.files) % config.job_msg_inc) == 0: + config.job.set_progress(100, ( + f'{config.job_msg_prefix}' + f'Copied {entry.path} -> {os.path.join(dst_str, entry.name)}.' + )) + + +def copytree( + src: str, + dst: str, + config: CopyTreeConfig +) -> CopyTreeStats: + """ + Copy all files, directories, and symlinks from src to dst. CopyTreeConfig allows + controlling whether we recurse into child datasets on src side as well as specific + metadata to preserve in the copy. This method also has protection against copying + the zfs snapshot directory if for some reason the user has set it to visible. + + Params: + src: the source directory + dst: the destination directory + config: configuration parameters for the copy + + Returns: + CopyStats + + Raises: + OSError: ELOOP: path was replaced with symbolic link while recursing + this should never happen during normal operations and may indicate + an attempted symlink attack + OSError: EOPNOTSUPP: ACL type mismatch between src and dst + OSError: EOPNOTSUPP: xattrs are disabled on dst + OSError: : various reasons listed in syscall manpages + PermissionError: + Attempt to chmod on destination failed due to RESTRICTED aclmode on dataset. + + """ + for p in (src, dst): + if not path.isabs(p): + raise ValueError(f'{p}: absolute path is required') + + dir_request_mask = _copytree_conf_to_dir_request_mask(config) + try: + os.mkdir(dst) + except FileExistsError: + if not config.exist_ok: + raise + + dst_fd = posix_open(dst, O_DIRECTORY) + + stats = CopyTreeStats() + + try: + with DirectoryIterator(src, request_mask=int(dir_request_mask), as_dict=False) as d_iter: + _copytree_impl(d_iter, dst, dst_fd, CLONETREE_ROOT_DEPTH, config, fstat(dst_fd), stats) + finally: + close(dst_fd) + + if config.job: + config.job.set_progress(100, ( + f'{config.job_msg_prefix}' + f'Successfully copied {stats.dirs} directories, {stats.files} files, ' + f'{stats.symlinks} symlinks for a total of {stats.bytes} bytes of data.' + )) + + return stats diff --git a/src/middlewared/middlewared/utils/filesystem/directory.py b/src/middlewared/middlewared/utils/filesystem/directory.py index f8cd5a3404c70..b22906537ee6c 100644 --- a/src/middlewared/middlewared/utils/filesystem/directory.py +++ b/src/middlewared/middlewared/utils/filesystem/directory.py @@ -16,7 +16,7 @@ from .acl import acl_is_present from .attrs import fget_zfs_file_attributes, zfs_attributes_dump from .constants import FileType -from .stat_x import statx_entry_impl +from .stat_x import statx, statx_entry_impl, ATFlags, StructStatx from .utils import path_in_ctldir @@ -56,7 +56,7 @@ class DirectoryRequestMask(enum.IntFlag): ) dirent_struct = namedtuple('struct_dirent', [ - 'name', 'path', 'realpath', 'stat', 'acl', 'xattrs', 'zfs_attrs', 'is_in_ctldir' + 'name', 'path', 'realpath', 'stat', 'etype', 'acl', 'xattrs', 'zfs_attrs', 'is_in_ctldir' ]) @@ -84,7 +84,7 @@ def close(self): self.__dir_fd = None @property - def fileno(self): + def fileno(self) -> int: return self.__dir_fd @@ -132,6 +132,7 @@ def __init__(self, path, file_type=None, request_mask=None, dir_fd=None, as_dict self.__dir_fd = DirectoryFd(path, dir_fd) self.__file_type = FileType(file_type).name if file_type else None self.__path_iter = os.scandir(self.__dir_fd.fileno) + self.__stat = statx('', dir_fd=self.__dir_fd.fileno, flags=ATFlags.EMPTY_PATH.value) # Explicitly allow zero for request_mask self.__request_mask = request_mask if request_mask is not None else ALL_ATTRS @@ -182,6 +183,7 @@ def __return_dirent(self, dirent, st, realpath, xattrs, acl, zfs_attrs, is_in_ct os.path.join(self.__path, dirent.name), realpath, st['st'], + st['etype'], acl, xattrs, zfs_attrs, @@ -225,6 +227,12 @@ def __next__(self): # `dirent` was most likely deleted while we were generating listing # There's not point in logging an error. Just keep moving on. return self.__next__() + except OSError as err: + if err.errno in (errno.ENXIO, errno.ENODEV): + # this can happen for broken symlinks + return self.__next__() + + raise try: if self.__request_mask & int(DirectoryRequestMask.REALPATH): @@ -268,7 +276,7 @@ def __next__(self): return self.__return_fn(dirent, st, realpath, xattrs, acl, zfs_attrs, is_in_ctldir) @property - def dir_fd(self): + def dir_fd(self) -> int: """ File descriptor for O_DIRECTORY open for target directory. """ @@ -278,10 +286,14 @@ def dir_fd(self): return self.__dir_fd.fileno @property - def request_mask(self): + def request_mask(self) -> DirectoryRequestMask: return self.__request_mask - def close(self, force=False): + @property + def stat(self) -> StructStatx: + return self.__stat + + def close(self, force=False) -> None: try: if self.__path_iter is not None: self.__path_iter.close() diff --git a/src/middlewared/middlewared/utils/filesystem/stat_x.py b/src/middlewared/middlewared/utils/filesystem/stat_x.py index c969098ac523e..abb2c27da1fdc 100644 --- a/src/middlewared/middlewared/utils/filesystem/stat_x.py +++ b/src/middlewared/middlewared/utils/filesystem/stat_x.py @@ -8,11 +8,18 @@ import os import ctypes import stat as statlib -from enum import IntFlag +from enum import auto, Enum, IntFlag from .constants import AT_FDCWD from .utils import path_in_ctldir +class StatxEtype(Enum): + DIRECTORY = auto() + FILE = auto() + SYMLINK = auto() + OTHER = auto() + + class ATFlags(IntFlag): # fcntl.h STATX_SYNC_AS_STAT = 0x0000 @@ -181,20 +188,20 @@ def statx_entry_impl(entry, dir_fd=None, get_ctldir=True): out['attributes'].append(attr.name) if statlib.S_ISDIR(out['st'].stx_mode): - out['etype'] = 'DIRECTORY' + out['etype'] = StatxEtype.DIRECTORY.name elif statlib.S_ISLNK(out['st'].stx_mode): - out['etype'] = 'SYMLINK' + out['etype'] = StatxEtype.SYMLINK.name try: out['st'] = statx(path, dir_fd=dir_fd) except FileNotFoundError: return None elif statlib.S_ISREG(out['st'].stx_mode): - out['etype'] = 'FILE' + out['etype'] = StatxEtype.FILE.name else: - out['etype'] = 'OTHER' + out['etype'] = StatxEtype.OTHER.name if entry.is_absolute(): out['is_ctldir'] = path_in_ctldir(entry) diff --git a/src/middlewared/middlewared/utils/filesystem/utils.py b/src/middlewared/middlewared/utils/filesystem/utils.py index 4beb5e3074718..02193e5763582 100644 --- a/src/middlewared/middlewared/utils/filesystem/utils.py +++ b/src/middlewared/middlewared/utils/filesystem/utils.py @@ -1,7 +1,8 @@ # This file provides various utilities that don't fit cleanly # into specific categories of filesystem areas. # -# timespec_convert() has test coverage via stat_x util tests +# timespec_convert_float() has test coverage via stat_x util tests +# timespec_convert_int() has test coverage via copytree util tests # path_in_ctldir() has test coverage via api tests for filesystem.stat # and filesystem.listdir methods since it requires access to zpool. @@ -36,9 +37,17 @@ def path_in_ctldir(path_in): return is_in_ctldir -def timespec_convert(timespec): +def timespec_convert_float(timespec): """ Convert a timespec struct into float. This is for use where ctype function returns timespec (for example statx()) """ return timespec.tv_sec + timespec.tv_nsec / 1000000000 + + +def timespec_convert_int(timespec): + """ + Convert a timespec struct into int. This is suitable for + when a timespec needs to be passed to os.utime() + """ + return timespec.tv_sec * 1000000000 + timespec.tv_nsec diff --git a/tests/api2/test_user_create_dir.py b/tests/api2/test_user_create_dir.py new file mode 100644 index 0000000000000..50e87035802fb --- /dev/null +++ b/tests/api2/test_user_create_dir.py @@ -0,0 +1,117 @@ +import errno +import os +import pytest + +from middlewared.service_exception import CallError +from middlewared.test.integration.assets.account import user +from middlewared.test.integration.assets.pool import dataset +from middlewared.test.integration.utils import call + +DS_NAME = 'user-create-homedir' + + +@pytest.fixture(scope='function') +def setup_user(): + with dataset(DS_NAME, data={'share_type': 'SMB'}) as ds: + with user({ + 'username': 'usercreate', + 'full_name': 'usercreate', + 'group_create': True, + 'home': os.path.join('/mnt', ds), + 'home_create': False, + 'password': 'ABCD1234' + }) as u: + yield u | {'dataset': ds} + + +def test_create_homedir(setup_user): + """ This test validates we can set create a new homedir within the currently set homedir """ + + call('user.update', setup_user['id'], { + 'home': setup_user['home'], + 'home_create': True + }) + + new = call('user.query', [['id', '=', setup_user['id']]], {'get': True}) + assert new['home'] == os.path.join(setup_user['home'], setup_user['username']) + + # verify that we won't endlessly create new homedirs within existing one if a user + # is not very API / design savvy + call('user.update', setup_user['id'], { + 'home': setup_user['home'], + 'home_create': True + }) + + new2 = call('user.query', [['id', '=', setup_user['id']]], {'get': True}) + assert new2['home'] == new['home'] + + + +def test_user_change_homedir_no_traverse(setup_user): + """ we should not recurse into child datasets """ + with dataset(f'{DS_NAME}/subds') as subds: + + # Verify that new dataset exists in source + call('filesystem.listdir', setup_user['home'], [['name', '=', 'subds']], {'get': True}) + + with dataset('new-path', data={'share_type': 'SMB'}) as ds: + call('user.update', setup_user['id'], { + 'home': os.path.join('/mnt', ds), + 'home_create': True + }) + + new = call('user.query', [['id', '=', setup_user['id']]], {'get': True}) + + # Verify that we did not try to copy over the dataset + with pytest.raises(CallError) as ce: + call('filesystem.stat', os.path.join(new['home'], 'subds')) + + assert ce.value.errno == errno.ENOENT + + +def test_user_change_homedir_no_zfs_ctldir(setup_user): + """ we should not recurse into / try to copy .zfs if snapdir visible """ + call('pool.dataset.update', setup_user['dataset'], {'snapdir': 'VISIBLE'}) + + call('user.update', setup_user['id'], { + 'home': setup_user['home'], + 'home_create': True + }) + + new = call('user.query', [['id', '=', setup_user['id']]], {'get': True}) + assert new['home'] == os.path.join(setup_user['home'], setup_user['username']) + + + with pytest.raises(CallError) as ce: + call('filesystem.stat', os.path.join(new['home'], '.zfs')) + + assert ce.value.errno == errno.ENOENT + + +def test_user_change_homedir_acl_preserve(setup_user): + """ If for some reason files within homedir have ACL, it should be preserved on copy """ + ACL = [{ + 'tag': 'owner@', + 'id': -1, + 'perms': {'BASIC': 'FULL_CONTROL'}, + 'flags': {'BASIC': 'INHERIT'}, + 'type': 'ALLOW' + }] + call('filesystem.mkdir', {'path': os.path.join(setup_user['home'], 'canary')}) + + call('filesystem.setacl', { + 'path': os.path.join(setup_user['home'], 'canary'), + 'dacl': ACL + }, job=True) + + + call('user.update', setup_user['id'], { + 'home': setup_user['home'], + 'home_create': True + }) + + new = call('user.query', [['id', '=', setup_user['id']]], {'get': True}) + + acl = call('filesystem.getacl', os.path.join(new['home'], 'canary'))['acl'] + + assert acl == ACL