From 67442cf3d866057af071d3d19456858c9b299418 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Sat, 6 Jan 2024 12:05:51 -0300 Subject: [PATCH] feat!: add VirtIO 9P device --- src/Makefile | 4 + src/dtb.cpp | 10 + src/machine.cpp | 8 + src/virtio-p9fs.cpp | 1828 +++++++++++++++++++++++++++++++++++++++ src/virtio-p9fs.h | 453 ++++++++++ src/virtio-serializer.h | 178 ++++ 6 files changed, 2481 insertions(+) create mode 100644 src/virtio-p9fs.cpp create mode 100644 src/virtio-p9fs.h create mode 100644 src/virtio-serializer.h diff --git a/src/Makefile b/src/Makefile index 0571dc635..92b4fc1bf 100644 --- a/src/Makefile +++ b/src/Makefile @@ -151,6 +151,9 @@ INCS+= \ -I../third-party/mongoose-7.12 \ $(BOOST_INC) +# Use 64-bit offsets for file operations in POSIX APIs +DEFS+=-D_FILE_OFFSET_BITS=64 + ifeq ($(dump),yes) #DEFS+=-DDUMP_ILLEGAL_INSN_EXCEPTIONS #DEFS+=-DDUMP_EXCEPTIONS @@ -319,6 +322,7 @@ LIBCARTESI_OBJS:= \ virtio-factory.o \ virtio-device.o \ virtio-console.o \ + virtio-p9fs.o \ dtb.o \ os.o \ htif.o \ diff --git a/src/dtb.cpp b/src/dtb.cpp index a59a5b98b..b7f99d33c 100644 --- a/src/dtb.cpp +++ b/src/dtb.cpp @@ -151,6 +151,16 @@ void dtb_init(const machine_config &c, unsigned char *dtb_start, uint64_t dtb_le fdt.prop_u32_list<2>("interrupts-extended", {PLIC_PHANDLE, plic_irq_id}); fdt.end_node(); } + if (c.htif.console_getchar) { // virtio 9p + const uint32_t virtio_idx = 1; + const uint64_t virtio_paddr = PMA_FIRST_VIRTIO_START + virtio_idx * PMA_VIRTIO_LENGTH; + const uint32_t plic_irq_id = virtio_idx + 1; + fdt.begin_node_num("virtio", virtio_paddr); + fdt.prop_string("compatible", "virtio,mmio"); + fdt.prop_u64_list<2>("reg", {virtio_paddr, PMA_VIRTIO_LENGTH}); + fdt.prop_u32_list<2>("interrupts-extended", {PLIC_PHANDLE, plic_irq_id}); + fdt.end_node(); + } fdt.end_node(); } diff --git a/src/machine.cpp b/src/machine.cpp index ab7760036..1ccf53b22 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -47,6 +47,7 @@ #include "unique-c-ptr.h" #include "virtio-console.h" #include "virtio-factory.h" +#include "virtio-p9fs.h" /// \file /// \brief Cartesi machine implementation @@ -448,6 +449,13 @@ machine::machine(const machine_config &c, const machine_runtime_config &r) : make_virtio_pma_entry(PMA_FIRST_VIRTIO_START + vdev_console->get_virtio_index() * PMA_VIRTIO_LENGTH, PMA_VIRTIO_LENGTH, "VirtIO console device", &virtio_driver, vdev_console.get())); m_vdevs.push_back(std::move(vdev_console)); + + // Register VirtIO Plan 9 filesystem device + auto vdev_p9fs = std::make_unique(m_vdevs.size(), "vfs0", "/tmp"); + register_pma_entry( + make_virtio_pma_entry(PMA_FIRST_VIRTIO_START + vdev_p9fs->get_virtio_index() * PMA_VIRTIO_LENGTH, + PMA_VIRTIO_LENGTH, "VirtIO p9fs device", &virtio_driver, vdev_p9fs.get())); + m_vdevs.push_back(std::move(vdev_p9fs)); } // Initialize DTB diff --git a/src/virtio-p9fs.cpp b/src/virtio-p9fs.cpp new file mode 100644 index 000000000..61b562ed1 --- /dev/null +++ b/src/virtio-p9fs.cpp @@ -0,0 +1,1828 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +/// \file +/// \brief VirtIO Plan 9 filesystem. +/// \details \{ +/// +/// The Plan 9 filesystem allows to share host directories +/// with the guest. +/// +/// To mount a filesystem in the guest, execute the following command: +/// +/// mount -t 9p vfs0 /mnt +/// +/// Where "vfs0" is the mount tag chosen on device creation. +/// +/// \} + +// Enable this define to debug VirtIO Plan 9 filesystem operations +// #define DEBUG_VIRTIO_P9FS + +#include "virtio-p9fs.h" + +#include +#include + +#include +#include +#include +#include +#ifdef __APPLE__ +#include +#include +#else +#include +#include +#endif +#include + +namespace cartesi { + +// Aliases for struct names that conflicts with function names +using stat_t = struct stat; +using statfs_t = struct statfs; +using flock_t = struct flock; + +static p9_error host_errno_to_p9(int err) { + switch (err) { + case 0: + return P9_EOK; + case EPERM: + return P9_EPERM; + case ENOENT: + return P9_ENOENT; + case ESRCH: + return P9_ESRCH; + case EINTR: + return P9_EINTR; + case EIO: + return P9_EIO; + case ENXIO: + return P9_ENXIO; + case E2BIG: + return P9_E2BIG; + case ENOEXEC: + return P9_ENOEXEC; + case EBADF: + return P9_EBADF; + case ECHILD: + return P9_ECHILD; + case EAGAIN: + return P9_EAGAIN; + case ENOMEM: + return P9_ENOMEM; + case EACCES: + return P9_EACCES; + case EFAULT: + return P9_EFAULT; + case ENOTBLK: + return P9_ENOTBLK; + case EBUSY: + return P9_EBUSY; + case EEXIST: + return P9_EEXIST; + case EXDEV: + return P9_EXDEV; + case ENODEV: + return P9_ENODEV; + case ENOTDIR: + return P9_ENOTDIR; + case EISDIR: + return P9_EISDIR; + case EINVAL: + return P9_EINVAL; + case ENFILE: + return P9_ENFILE; + case EMFILE: + return P9_EMFILE; + case ENOTTY: + return P9_ENOTTY; + case ETXTBSY: + return P9_ETXTBSY; + case EFBIG: + return P9_EFBIG; + case ENOSPC: + return P9_ENOSPC; + case ESPIPE: + return P9_ESPIPE; + case EROFS: + return P9_EROFS; + case EMLINK: + return P9_EMLINK; + case EPIPE: + return P9_EPIPE; + case EDOM: + return P9_EDOM; + case ERANGE: + return P9_ERANGE; + case EDEADLK: + return P9_EDEADLK; + case ENAMETOOLONG: + return P9_ENAMETOOLONG; + case ENOLCK: + return P9_ENOLCK; + case ENOSYS: + return P9_ENOSYS; + case ENOTEMPTY: + return P9_ENOTEMPTY; + case ELOOP: + return P9_ELOOP; + case ENOMSG: + return P9_ENOMSG; + case EIDRM: + return P9_EIDRM; + case ENOSTR: + return P9_ENOSTR; + case ENODATA: + return P9_ENODATA; + case ETIME: + return P9_ETIME; + case ENOSR: + return P9_ENOSR; + case EREMOTE: + return P9_EREMOTE; + case ENOLINK: + return P9_ENOLINK; + case EPROTO: + return P9_EPROTO; + case EMULTIHOP: + return P9_EMULTIHOP; + case EBADMSG: + return P9_EBADMSG; + case EOVERFLOW: + return P9_EOVERFLOW; + case EILSEQ: + return P9_EILSEQ; + case EUSERS: + return P9_EUSERS; + case ENOTSOCK: + return P9_ENOTSOCK; + case EDESTADDRREQ: + return P9_EDESTADDRREQ; + case EMSGSIZE: + return P9_EMSGSIZE; + case EPROTOTYPE: + return P9_EPROTOTYPE; + case ENOPROTOOPT: + return P9_ENOPROTOOPT; + case EPROTONOSUPPORT: + return P9_EPROTONOSUPPORT; + case ESOCKTNOSUPPORT: + return P9_ESOCKTNOSUPPORT; + case EOPNOTSUPP: + return P9_EOPNOTSUPP; + case EPFNOSUPPORT: + return P9_EPFNOSUPPORT; + case EAFNOSUPPORT: + return P9_EAFNOSUPPORT; + case EADDRINUSE: + return P9_EADDRINUSE; + case EADDRNOTAVAIL: + return P9_EADDRNOTAVAIL; + case ENETDOWN: + return P9_ENETDOWN; + case ENETUNREACH: + return P9_ENETUNREACH; + case ENETRESET: + return P9_ENETRESET; + case ECONNABORTED: + return P9_ECONNABORTED; + case ECONNRESET: + return P9_ECONNRESET; + case ENOBUFS: + return P9_ENOBUFS; + case EISCONN: + return P9_EISCONN; + case ENOTCONN: + return P9_ENOTCONN; + case ESHUTDOWN: + return P9_ESHUTDOWN; + case ETOOMANYREFS: + return P9_ETOOMANYREFS; + case ETIMEDOUT: + return P9_ETIMEDOUT; + case ECONNREFUSED: + return P9_ECONNREFUSED; + case EHOSTDOWN: + return P9_EHOSTDOWN; + case EHOSTUNREACH: + return P9_EHOSTUNREACH; + case EALREADY: + return P9_EALREADY; + case EINPROGRESS: + return P9_EINPROGRESS; + case ESTALE: + return P9_ESTALE; + case EDQUOT: + return P9_EDQUOT; + case ECANCELED: + return P9_ECANCELED; + case EOWNERDEAD: + return P9_EOWNERDEAD; + case ENOTRECOVERABLE: + return P9_ENOTRECOVERABLE; +#ifdef __APPLE__ + case ENOATTR: + return P9_ENODATA; + case ENOTSUP: + return P9_EOPNOTSUPP; +#else + case ECHRNG: + return P9_ECHRNG; + case EL2NSYNC: + return P9_EL2NSYNC; + case EL3HLT: + return P9_EL3HLT; + case EL3RST: + return P9_EL3RST; + case ELNRNG: + return P9_ELNRNG; + case EUNATCH: + return P9_EUNATCH; + case ENOCSI: + return P9_ENOCSI; + case EL2HLT: + return P9_EL2HLT; + case EBADE: + return P9_EBADE; + case EBADR: + return P9_EBADR; + case EXFULL: + return P9_EXFULL; + case ENOANO: + return P9_ENOANO; + case EBADRQC: + return P9_EBADRQC; + case EBADSLT: + return P9_EBADSLT; + case EBFONT: + return P9_EBFONT; + case ENONET: + return P9_ENONET; + case ENOPKG: + return P9_ENOPKG; + case EADV: + return P9_EADV; + case ESRMNT: + return P9_ESRMNT; + case ECOMM: + return P9_ECOMM; + case EDOTDOT: + return P9_EDOTDOT; + case ENOTUNIQ: + return P9_ENOTUNIQ; + case EBADFD: + return P9_EBADFD; + case EREMCHG: + return P9_EREMCHG; + case ELIBACC: + return P9_ELIBACC; + case ELIBBAD: + return P9_ELIBBAD; + case ELIBSCN: + return P9_ELIBSCN; + case ELIBMAX: + return P9_ELIBMAX; + case ELIBEXEC: + return P9_ELIBEXEC; + case ERESTART: + return P9_ERESTART; + case ESTRPIPE: + return P9_ESTRPIPE; + case EUCLEAN: + return P9_EUCLEAN; + case ENOTNAM: + return P9_ENOTNAM; + case ENAVAIL: + return P9_ENAVAIL; + case EISNAM: + return P9_EISNAM; + case EREMOTEIO: + return P9_EREMOTEIO; + case ENOMEDIUM: + return P9_ENOMEDIUM; + case EMEDIUMTYPE: + return P9_EMEDIUMTYPE; + case ENOKEY: + return P9_ENOKEY; + case EKEYEXPIRED: + return P9_EKEYEXPIRED; + case EKEYREVOKED: + return P9_EKEYREVOKED; + case EKEYREJECTED: + return P9_EKEYREJECTED; + case ERFKILL: + return P9_ERFKILL; + case EHWPOISON: + return P9_EHWPOISON; +#endif + default: + return P9_EINVAL; + } +} + +static int p9_open_flags_to_host(uint32_t flags) { + int oflags = 0; + for (uint32_t i = 1; i <= P9_O_SYNC; i = i << 1) { + if (flags & i) { + switch (i) { + case P9_O_RDONLY: + oflags |= O_RDONLY; + break; + case P9_O_WRONLY: + oflags |= O_WRONLY; + break; + case P9_O_RDWR: + oflags |= O_RDWR; + break; + case P9_O_CREAT: + oflags |= O_CREAT; + break; + case P9_O_EXCL: + oflags |= O_EXCL; + break; + case P9_O_NOCTTY: + oflags |= O_NOCTTY; + break; + case P9_O_TRUNC: + oflags |= O_TRUNC; + break; + case P9_O_APPEND: + oflags |= O_APPEND; + break; + case P9_O_NONBLOCK: + oflags |= O_NONBLOCK; + break; + case P9_O_DSYNC: + oflags |= O_DSYNC; + break; + case P9_O_FASYNC: + oflags |= FASYNC; + break; + case P9_O_DIRECTORY: + oflags |= O_DIRECTORY; + break; + case P9_O_NOFOLLOW: + oflags |= O_NOFOLLOW; + break; + case P9_O_CLOEXEC: + oflags |= O_CLOEXEC; + break; + case P9_O_SYNC: + oflags |= O_SYNC; + break; +#ifndef __APPLE__ + case P9_O_DIRECT: + oflags |= O_DIRECT; + break; + case P9_O_LARGEFILE: + oflags |= O_LARGEFILE; + break; + case P9_O_NOATIME: + oflags |= O_NOATIME; + break; +#endif + default: + break; + } + } + } + // Filter non-supported flags + oflags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); + return oflags; +} + +static short p9_lock_type_to_host(uint8_t type) { + switch (type) { + case P9_LOCK_TYPE_RDLCK: + return F_RDLCK; + case P9_LOCK_TYPE_WRLCK: + return F_WRLCK; + case P9_LOCK_TYPE_UNLCK: + return F_UNLCK; + default: + return -1; + } +} + +static uint8_t host_lock_type_to_p9(short type) { + switch (type) { + case F_RDLCK: + return P9_LOCK_TYPE_RDLCK; + case F_WRLCK: + return P9_LOCK_TYPE_WRLCK; + case F_UNLCK: + return P9_LOCK_TYPE_UNLCK; + default: + return 0xff; + } +} + +static p9_qid stat_to_qid(const stat_t &st) { + p9_qid qid{}; + qid.type = P9_QID_FILE; + if (S_ISDIR(st.st_mode)) { + qid.type |= P9_QID_DIR; + } + if (S_ISLNK(st.st_mode)) { + qid.type |= P9_QID_SYMLINK; + } + qid.version = 0; // No caching + qid.path = st.st_ino; + return qid; +} + +static bool is_same_stat_ino(const stat_t *a, const stat_t *b) { + return a->st_dev == b->st_dev && a->st_ino == b->st_ino; +} + +static int close_fid_state(p9_fid_state *fidp) { + if (!fidp) { + return 0; + } + int err = 0; + if (fidp->dirp) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + DIR *dirp = reinterpret_cast(fidp->dirp); + if (closedir(dirp) != 0) { + err = errno; + } + fidp->dirp = nullptr; + } + if (fidp->fd >= 0) { + if (close(fidp->fd) != 0) { + err = errno; + } + fidp->fd = -1; + } + return err; +} + +static std::string join_path_name(const std::string &path, const std::string &name) { + if (path.empty()) { + return name; + } + if (path[path.length() - 1] == '/') { + return path + name; + } else { + std::string s; + s.append(path); + s.append("/"); + s.append(name); + return s; + } +} + +static std::string remove_path_name(const std::string &path) { + const size_t pos = path.rfind('/'); + if (pos != std::string::npos && pos > 0) { + return path.substr(0, pos); + } + return path; +} + +static bool is_name_legal(const std::string &name) { + if (name.empty()) { + return false; + } + if (name.find('/') != std::string::npos) { + return false; + } + if (name == "." || name == "..") { + return false; + } + return true; +} + +virtio_p9fs_device::virtio_p9fs_device(uint32_t virtio_idx, const std::string &mount_tag, + const std::string &root_path) : + virtio_device(virtio_idx, VIRTIO_DEVICE_9P, VIRTIO_9P_F_MOUNT_TAG, VIRTIO_MAX_CONFIG_SPACE_SIZE), + m_msize(P9_MAX_MSIZE), + m_root_path(root_path) { + if (root_path.length() + 1 >= P9_ROOT_PATH_MAX) { + throw std::runtime_error{"host directory length is too large"}; + } + if (mount_tag.length() >= P9_MOUNT_TAG_MAX) { + throw std::runtime_error{"host directory mount tag length is too large"}; + } + stat_t st{}; + if (stat(root_path.c_str(), &st) < 0 || !S_ISDIR(st.st_mode)) { + throw std::runtime_error{"host directory '" + root_path + "' is not a valid directory"}; + } + // Initialize config space + virtio_p9fs_config_space *config = get_config(); + strncpy(config->mount_tag.data(), mount_tag.c_str(), mount_tag.length()); + config->mount_tag_len = mount_tag.length(); + config_space_size = mount_tag.length() + sizeof(uint16_t); +} + +virtio_p9fs_device::~virtio_p9fs_device() { + // Close all file descriptors + for (auto &it : m_fids) { + p9_fid_state *fidp = &it.second; + close_fid_state(fidp); + } + m_fids.clear(); +} + +void virtio_p9fs_device::on_device_reset() { + m_msize = P9_MAX_MSIZE; + // Close all file descriptors + for (auto &it : m_fids) { + p9_fid_state *fidp = &it.second; + close_fid_state(fidp); + } + m_fids.clear(); +} + +void virtio_p9fs_device::on_device_ok(i_device_state_access *a) { + (void) a; + // Nothing to do. +} + +bool virtio_p9fs_device::on_device_queue_available(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len, uint32_t write_avail_len) { + (void) read_avail_len; + (void) write_avail_len; + // We are only interested in queue 0 notifications + if (queue_idx != 0) { + return false; + } + virtq_unserializer msg(a, queue[queue_idx], queue_idx, desc_idx); + uint32_t size{}; + uint8_t opcode{}; + uint16_t tag{}; + if (!msg.unpack(&size, &opcode, &tag)) { + notify_device_needs_reset(a); + return false; + } + // Some operations may allocate temporary strings or grow the fids unordered map, + // which can theoretically throw std::bad_alloc exceptions (although very unlikely). + // We don't want any exception to leak outside this function, so we try to catch any exception here. + try { + switch (opcode) { + case P9_TSTATFS: + return op_statfs(std::move(msg), tag); + case P9_TLOPEN: + return op_lopen(std::move(msg), tag); + case P9_TLCREATE: + return op_lcreate(std::move(msg), tag); + case P9_TSYMLINK: + return op_symlink(std::move(msg), tag); + case P9_TMKNOD: + return op_mknod(std::move(msg), tag); + case P9_TREADLINK: + return op_readlink(std::move(msg), tag); + case P9_TGETATTR: + return op_getattr(std::move(msg), tag); + case P9_TSETATTR: + return op_setattr(std::move(msg), tag); + case P9_TREADDIR: + return op_readdir(std::move(msg), tag); + case P9_TFSYNC: + return op_fsync(std::move(msg), tag); + case P9_TLOCK: + return op_lock(std::move(msg), tag); + case P9_TGETLOCK: + return op_getlock(std::move(msg), tag); + case P9_TLINK: + return op_link(std::move(msg), tag); + case P9_TMKDIR: + return op_mkdir(std::move(msg), tag); + case P9_TRENAMEAT: + return op_renameat(std::move(msg), tag); + case P9_TUNLINKAT: + return op_unlinkat(std::move(msg), tag); + case P9_TVERSION: + return op_version(std::move(msg), tag); + case P9_TATTACH: + return op_attach(std::move(msg), tag); + case P9_TWALK: + return op_walk(std::move(msg), tag); + case P9_TREAD: + return op_read(std::move(msg), tag); + case P9_TWRITE: + return op_write(std::move(msg), tag); + case P9_TCLUNK: + return op_clunk(std::move(msg), tag); + // The following opcode are not needed or unsupported. + case P9_TERROR: // The driver will never send errors + case P9_TXATTRWALK: // File extended attributes is not supported yet + case P9_TXATTRCREATE: // File extended attributes is not supported yet + case P9_TAUTH: // Authentication is not supported + case P9_TFLUSH: // Asynchronous requests is not supported + case P9_TSTAT: // Replaced by P9_TGETATTR in 9P2000.L + case P9_TWSTAT: // Replaced by P9_TSETATTR in 9P2000.L + case P9_TOPEN: // Replaced by P9_TLOPEN in 9P2000.L + case P9_TRENAME: // Replaced by P9_TRENAMEAT in 9P2000.L + case P9_TCREATE: // Replaced by P9_TLCREATE in 9P2000.L + case P9_TREMOVE: // Replaced by P9_TUNLINKAT in 9P2000.L +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs unsupported: tag=%d opcode=%d size=%d\n", tag, opcode, size); +#endif + return send_error(msg, tag, P9_EOPNOTSUPP); + default: +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs UNEXPECTED: tag=%d opcode=%d size=%d\n", tag, opcode, size); +#endif + return send_error(msg, tag, P9_EPROTO); + } + } catch (std::bad_alloc &e) { + // Both std::string and std::unordered_map may throw std::bad_alloc when out of memory + return send_error(msg, tag, P9_EOPNOTSUPP); + } catch (...) { + // Some other unexpected exception + return send_error(msg, tag, P9_EPROTO); + } +} + +bool virtio_p9fs_device::op_statfs(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + if (!msg.unpack(&fid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs statfs: tag=%d fid=%d\n", tag, fid); +#endif + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + statfs_t stfs{}; + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + // Get the filesystem statistics + if (fstatfs(fidp->fd, &stfs) < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + // Get the filesystem statistics + if (statfs(fidp->path.c_str(), &stfs) < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + uint32_t type = static_cast(stfs.f_type); + uint32_t bsize = static_cast(stfs.f_bsize); + uint64_t blocks = static_cast(stfs.f_blocks); + uint64_t bfree = static_cast(stfs.f_bfree); + uint64_t bavail = static_cast(stfs.f_bavail); + uint64_t files = static_cast(stfs.f_files); + uint64_t ffree = static_cast(stfs.f_ffree); +#ifdef __APPLE__ + uint64_t fsid = static_cast(stfs.f_fsid.val[0]) | (static_cast(stfs.f_fsid.val[1]) << 32); + uint32_t namelen = + std::min(static_cast(NAME_MAX), P9_NAME_MAX); // f_namelen does not exist on Darwin +#else + uint64_t fsid = static_cast(stfs.f_fsid.__val[0]) | (static_cast(stfs.f_fsid.__val[1]) << 32); + uint32_t namelen = std::min(static_cast(stfs.f_namelen), P9_NAME_MAX); +#endif + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&type, &bsize, &blocks, &bfree, &bavail, &files, &ffree, &fsid, &namelen)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RSTATFS); +} + +bool virtio_p9fs_device::op_lopen(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint32_t flags{}; + if (!msg.unpack(&fid, &flags)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs lopen: tag=%d fid=%d flags=%d\n", tag, fid, flags); +#endif + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // It's an error if the fid is already open + if (fidp->fd >= 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Open the file + const int oflags = p9_open_flags_to_host(flags); + const int fd = open(fidp->path.c_str(), oflags); + if (fd < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // Get the path qid + stat_t st{}; + if (fstat(fd, &st) != 0) { + (void) close(fd); + return send_error(msg, tag, host_errno_to_p9(errno)); + } + p9_qid qid = stat_to_qid(st); + // Reply + uint32_t iounit = get_iounit(); + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&qid, &iounit)) { + (void) close(fd); + return send_error(msg, tag, P9_EPROTO); + } + if (!send_reply(std::move(out_msg), tag, P9_RLOPEN)) { + (void) close(fd); + return false; + } + // Update fid + fidp->fd = fd; + return true; +} + +bool virtio_p9fs_device::op_lcreate(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint32_t flags{}; + uint32_t mode{}; + uint32_t gid{}; + char name[P9_NAME_MAX]{}; + if (!msg.unpack(&fid, &name, &flags, &mode, &gid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs lcreate: tag=%d fid=%d name=%s flags=%d mode=%d gid=%d\n", tag, fid, name, flags, mode, + gid); +#endif + // Check if name is valid + if (!is_name_legal(name)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // It's an error if the fid is already open + if (fidp->fd >= 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Create the file + const std::string path = join_path_name(fidp->path, name); + const int oflags = p9_open_flags_to_host(flags) | O_CREAT; + const mode_t omode = static_cast(mode); + const int fd = open(path.c_str(), oflags, omode); + if (fd < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // If we fail to change ownership, we silent ignore the error + if (fchown(fd, static_cast(fidp->uid), static_cast(gid)) != 0) { + errno = 0; + } + // Get the path qid + stat_t st{}; + if (fstat(fd, &st) != 0) { + (void) close(fd); + (void) unlink(path.c_str()); + return send_error(msg, tag, host_errno_to_p9(errno)); + } + p9_qid qid = stat_to_qid(st); + // Reply + uint32_t iounit = get_iounit(); + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&qid, &iounit)) { + (void) close(fd); + (void) unlink(path.c_str()); + return send_error(msg, tag, P9_EPROTO); + } + if (!send_reply(std::move(out_msg), tag, P9_RLCREATE)) { + (void) close(fd); + (void) unlink(path.c_str()); + return false; + } + // Update fid to represent the newly opened file + fidp->path = path; + fidp->fd = fd; + return true; +} + +bool virtio_p9fs_device::op_symlink(virtq_unserializer &&msg, uint16_t tag) { + uint32_t dfid{}; + uint32_t gid{}; + char name[P9_NAME_MAX]{}; + char symtgt[P9_PATH_MAX]{}; + if (!msg.unpack(&dfid, &name, &symtgt, &gid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs symlink: tag=%d dfid=%d name=%s symtgt=%s gid=%d\n", tag, dfid, name, symtgt, gid); +#endif + // Check if name is valid + if (!is_name_legal(name)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + p9_fid_state *dfidp = get_fid_state(dfid); + if (!dfidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Create the symlink + const std::string path = join_path_name(dfidp->path, name); + if (symlink(symtgt, path.c_str()) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // If we fail to change ownership, we silent ignore the error + if (lchown(path.c_str(), static_cast(dfidp->uid), static_cast(gid)) != 0) { + errno = 0; + } + // Get the path qid + stat_t st{}; + if (lstat(path.c_str(), &st) != 0) { + (void) unlink(path.c_str()); + return send_error(msg, tag, host_errno_to_p9(errno)); + } + p9_qid qid = stat_to_qid(st); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&qid)) { + (void) unlink(path.c_str()); + return send_error(msg, tag, P9_EPROTO); + } + if (!send_reply(std::move(out_msg), tag, P9_RSYMLINK)) { + (void) unlink(path.c_str()); + return false; + } + return true; +} + +bool virtio_p9fs_device::op_mknod(virtq_unserializer &&msg, uint16_t tag) { + uint32_t dfid{}; + uint32_t mode{}; + uint32_t major{}; + uint32_t minor{}; + uint32_t gid{}; + char name[P9_NAME_MAX]{}; + if (!msg.unpack(&dfid, &name, &mode, &major, &minor, &gid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs mknod: tag=%d dfid=%d name=%s mode=%d major=%d minor=%d gid=%d\n", tag, dfid, name, + mode, major, minor, gid); +#endif + // Check if name is valid + if (!is_name_legal(name)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + p9_fid_state *dfidp = get_fid_state(dfid); + if (!dfidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Create the special or ordinary file + const std::string path = join_path_name(dfidp->path, name); + const dev_t dev = makedev(major, minor); + if (mknod(path.c_str(), static_cast(mode), dev) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // If we fail to change ownership, we silent ignore the error + if (lchown(path.c_str(), static_cast(dfidp->uid), static_cast(gid)) != 0) { + errno = 0; + } + // Get the path qid + stat_t st{}; + if (lstat(path.c_str(), &st) != 0) { + (void) unlink(path.c_str()); + return send_error(msg, tag, host_errno_to_p9(errno)); + } + p9_qid qid = stat_to_qid(st); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&qid)) { + (void) unlink(path.c_str()); + return send_error(msg, tag, P9_EPROTO); + } + if (!send_reply(std::move(out_msg), tag, P9_RMKNOD)) { + (void) unlink(path.c_str()); + return false; + } + return true; +} + +bool virtio_p9fs_device::op_setattr(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint32_t mask{}; + uint32_t mode{}; + uint32_t uid{}; + uint32_t gid{}; + uint64_t size{}; + uint64_t atime_sec{}; + uint64_t atime_nsec{}; + uint64_t mtime_sec{}; + uint64_t mtime_nsec{}; + if (!msg.unpack(&fid, &mask, &mode, &uid, &gid, &size, &atime_sec, &atime_nsec, &mtime_sec, &mtime_nsec)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, + "p9fs setattr: tag=%d fid=%d mask=%d uid=%d gid=%d size=%ld atime_sec=%ld atime_nsec=%ld mtime_sec=%ld " + "mtime_nsec=%ld\n", + tag, fid, mask, uid, gid, size, atime_sec, atime_nsec, mtime_sec, mtime_nsec); +#endif + // Get the fid state + const p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + bool ctime_updated = false; + // Modify ownership + if (mask & (P9_SETATTR_UID | P9_SETATTR_GID)) { + const uid_t newuid = (mask & P9_SETATTR_UID) ? static_cast(uid) : -1; + const gid_t newgid = (mask & P9_SETATTR_GID) ? static_cast(gid) : -1; + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + if (fchown(fidp->fd, newuid, newgid) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + if (lchown(fidp->path.c_str(), newuid, newgid) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + ctime_updated = true; + } + // Modify mode + if (mask & P9_SETATTR_MODE) { + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + if (fchmod(fidp->fd, static_cast(mode)) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + if (chmod(fidp->path.c_str(), static_cast(mode)) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + ctime_updated = true; + } + // Modify size + if (mask & P9_SETATTR_SIZE) { + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + if (ftruncate(fidp->fd, static_cast(size)) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + if (truncate(fidp->path.c_str(), static_cast(size)) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + ctime_updated = true; + } + // Modify times + if (mask & (P9_SETATTR_ATIME | P9_SETATTR_MTIME)) { + timespec ts[2]{}; + if (mask & P9_SETATTR_ATIME) { + if (mask & P9_SETATTR_ATIME_SET) { + ts[0].tv_sec = static_cast(atime_sec); + ts[0].tv_nsec = static_cast(atime_nsec); + } else { + ts[0].tv_sec = 0; + ts[0].tv_nsec = UTIME_NOW; + } + } else { + ts[0].tv_sec = 0; + ts[0].tv_nsec = UTIME_OMIT; + } + if (mask & P9_SETATTR_MTIME) { + if (mask & P9_SETATTR_MTIME_SET) { + ts[1].tv_sec = static_cast(mtime_sec); + ts[1].tv_nsec = static_cast(mtime_nsec); + } else { + ts[1].tv_sec = 0; + ts[1].tv_nsec = UTIME_NOW; + } + } else { + ts[1].tv_sec = 0; + ts[1].tv_nsec = UTIME_OMIT; + } + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + if (futimens(fidp->fd, ts) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + if (utimensat(AT_FDCWD, fidp->path.c_str(), ts, AT_SYMLINK_NOFOLLOW) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + ctime_updated = true; + } + // Modify change time + if ((mask & P9_SETATTR_CTIME) && !ctime_updated) { + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + if (fchown(fidp->fd, -1, -1) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + if (lchown(fidp->path.c_str(), -1, -1) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + } + // Reply + return send_ok(msg, tag, P9_RSETATTR); +} + +bool virtio_p9fs_device::op_readlink(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + if (!msg.unpack(&fid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs readlink: tag=%d fid=%d\n", tag, fid); +#endif + // Get the fid state + const p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Read the link + char target[P9_PATH_MAX]{}; + const ssize_t ret = readlink(fidp->path.c_str(), target, sizeof(target) - 1); + if (ret < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + target[ret] = 0; + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(target)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RREADLINK); +} + +bool virtio_p9fs_device::op_getattr(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint64_t mask{}; + if (!msg.unpack(&fid, &mask)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs getattr: tag=%d fid=%d mask=%lx\n", tag, fid, mask); +#endif + // Get the fid state + const p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + stat_t st{}; + // Use fd when available, because its path might have been removed while fd still open + if (fidp->fd >= 0) { + // Get the attributes + if (fstat(fidp->fd, &st) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + // Get the attributes + if (lstat(fidp->path.c_str(), &st) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + // Get the qid + p9_qid qid = stat_to_qid(st); + // Fill stat attributes + p9_stat rstat{}; + if (mask & P9_GETATTR_MODE) { + rstat.mode = st.st_mode; + } + if (mask & P9_GETATTR_UID) { + rstat.uid = st.st_uid; + } + if (mask & P9_GETATTR_GID) { + rstat.gid = st.st_gid; + } + if (mask & P9_GETATTR_NLINK) { + rstat.nlink = st.st_nlink; + } + if (mask & P9_GETATTR_RDEV) { + rstat.rdev = st.st_rdev; + } + if (mask & P9_GETATTR_SIZE) { + rstat.size = st.st_size; + } + if (mask & P9_GETATTR_BLOCKS) { + rstat.blksize = st.st_blksize; + rstat.blocks = st.st_blocks; + } +#ifdef __APPLE__ + if (mask & P9_GETATTR_ATIME) { + rstat.atime_sec = st.st_atimespec.tv_sec; + rstat.atime_nsec = st.st_atimespec.tv_nsec; + } + if (mask & P9_GETATTR_MTIME) { + rstat.mtime_sec = st.st_mtimespec.tv_sec; + rstat.mtime_nsec = st.st_mtimespec.tv_nsec; + } + if (mask & P9_GETATTR_CTIME) { + rstat.ctime_sec = st.st_ctimespec.tv_sec; + rstat.ctime_nsec = st.st_ctimespec.tv_nsec; + } +#else + if (mask & P9_GETATTR_ATIME) { + rstat.atime_sec = st.st_atim.tv_sec; + rstat.atime_nsec = st.st_atim.tv_nsec; + } + if (mask & P9_GETATTR_MTIME) { + rstat.mtime_sec = st.st_mtim.tv_sec; + rstat.mtime_nsec = st.st_mtim.tv_nsec; + } + if (mask & P9_GETATTR_CTIME) { + rstat.ctime_sec = st.st_ctim.tv_sec; + rstat.ctime_nsec = st.st_ctim.tv_nsec; + } +#endif + // P9_GETATTR_BTIME, P9_GETATTR_GEN, P9_GETATTR_DATA_VERSION are not supported, they are hardwired to zero. + // P9_GETATTR_INO is contained in qid. + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&mask, &qid, &rstat)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RGETATTR); +} + +bool virtio_p9fs_device::op_lock(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint8_t type{}; + uint32_t flags{}; + uint64_t start{}; + uint64_t length{}; + uint32_t proc_id{}; + char client_id[P9_NAME_MAX]{}; + if (!msg.unpack(&fid, &type, &flags, &start, &length, &proc_id, &client_id)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs lock: tag=%d fid=%d type=%d flags=%d start=%ld length=%ld proc_id=%d client_id=%s\n", + tag, fid, type, flags, start, length, proc_id, client_id); +#endif + // Only block flag is supported + if (flags > P9_LOCK_FLAGS_BLOCK) { + return send_error(msg, tag, P9_EINVAL); + } + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp || fidp->fd < 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Lock the file + flock_t fl{}; + fl.l_type = p9_lock_type_to_host(type); + fl.l_whence = SEEK_SET; + fl.l_start = static_cast(start); + fl.l_len = static_cast(length); + uint8_t status = P9_LOCK_SUCCESS; + if (flags & P9_LOCK_FLAGS_BLOCK) { + // Blocking lock + if (fcntl(fidp->fd, F_SETLKW, &fl) == -1) { + status = P9_LOCK_ERROR; + } + } else { + // Non-blocking lock + if (fcntl(fidp->fd, F_SETLK, &fl) == -1) { + status = P9_LOCK_SUCCESS; + } else if (errno == EAGAIN || errno == EACCES) { + status = P9_LOCK_BLOCKED; + } + } + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&status)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RLOCK); +} + +bool virtio_p9fs_device::op_getlock(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint8_t type{}; + uint64_t start{}; + uint64_t length{}; + uint32_t proc_id{}; + char client_id[P9_NAME_MAX]{}; + if (!msg.unpack(&fid, &type, &start, &length, &proc_id, &client_id)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs getlock: tag=%d fid=%d type=%d start=%ld length=%ld proc_id=%d client_id=%s\n", tag, + fid, type, start, length, proc_id, client_id); +#endif + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp || fidp->fd < 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Lock the file + flock_t fl{}; + fl.l_type = p9_lock_type_to_host(type); + fl.l_whence = SEEK_SET; + fl.l_start = static_cast(start); + fl.l_len = static_cast(length); + if (fcntl(fidp->fd, F_GETLK, &fl) == -1) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + uint8_t lock_type = host_lock_type_to_p9(fl.l_type); + uint64_t lock_start = static_cast(fl.l_start); + uint64_t lock_length = static_cast(fl.l_len); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&lock_type, &lock_start, &lock_length, &proc_id, &client_id)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RGETLOCK); +} + +bool virtio_p9fs_device::op_readdir(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint64_t offset{}; + uint32_t count{}; + if (!msg.unpack(&fid, &offset, &count)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs readdir: tag=%d fid=%d offset=%ld count=%d\n", tag, fid, offset, count); +#endif + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + DIR *dirp = reinterpret_cast(fidp->dirp); + // Open directory in case it's not yet + if (!dirp) { + dirp = opendir(fidp->path.c_str()); + if (!dirp) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + fidp->dirp = dirp; + } + constexpr uint32_t start_offset = P9_OUT_MSG_OFFSET + sizeof(uint32_t); + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, start_offset); + // Seek directory + if (offset == 0) { + rewinddir(dirp); + } else { + seekdir(dirp, static_cast(offset)); + } + // Traverse directory entries + while (true) { + const bool first_entry = (msg.offset == start_offset); + // Get the next directory entry + errno = 0; + dirent *dir_entry = readdir(dirp); + if (!dir_entry) { + if (errno != 0 && first_entry) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + break; + } + const char *name = dir_entry->d_name; + // Check if there is enough space to add this entry + const uint32_t data_len = out_msg.offset - start_offset; + const uint32_t entry_len = + sizeof(p9_qid) + sizeof(uint64_t) + sizeof(uint8_t) + +sizeof(uint16_t) + strlen(name); + if (data_len + entry_len > count) { + break; + } + // Get entry offset + const long entry_off = telldir(dirp); + if (entry_off < 0) { + if (first_entry) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + break; + } + // Get entry qid and type + p9_qid qid{}; + uint8_t type = dir_entry->d_type; + // In some filesystems dtype may be DT_UNKNOWN as an optimization to save lstat() calls + if (type == DT_UNKNOWN) { + stat_t st{}; + const std::string path = join_path_name(fidp->path, dir_entry->d_name); + if (lstat(path.c_str(), &st) < 0) { + if (errno != 0 && first_entry) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + break; + } + type = st.st_mode >> 12; + qid = stat_to_qid(st); + } else { + if (type == DT_DIR) { + qid.type = P9_QID_DIR; + } else if (type == DT_LNK) { + qid.type = P9_QID_SYMLINK; + } else { + qid.type = P9_QID_FILE; + } + qid.path = dir_entry->d_ino; + } + // Add the entry to our reply + uint64_t off = static_cast(entry_off); + if (!out_msg.pack(&qid, &off, &type, name)) { + return send_error(msg, tag, P9_EPROTO); + } + } + // Reply + uint32_t data_len = out_msg.length - start_offset; + out_msg.offset = P9_OUT_MSG_OFFSET; + if (!out_msg.pack(&data_len)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RREADDIR); +} + +bool virtio_p9fs_device::op_fsync(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + if (!msg.unpack(&fid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs fsync: tag=%d fid=%d\n", tag, fid); +#endif + // Get the fid state + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp || fidp->fd < 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Sync the file + if (fsync(fidp->fd) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // Reply + return send_ok(msg, tag, P9_RFSYNC); +} + +bool virtio_p9fs_device::op_link(virtq_unserializer &&msg, uint16_t tag) { + uint32_t dfid{}; + uint32_t fid{}; + char name[P9_NAME_MAX]{}; + if (!msg.unpack(&dfid, &fid, &name)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs link: tag=%d dfid=%d fid=%d name=%s\n", tag, dfid, fid, name); +#endif + // Check if name is valid + if (!is_name_legal(name)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + p9_fid_state *dfidp = get_fid_state(dfid); + p9_fid_state *fidp = get_fid_state(fid); + if (!dfidp || !fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Create the hard link + const std::string path = join_path_name(dfidp->path, name); + if (link(fidp->path.c_str(), path.c_str()) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // Reply + if (!send_ok(msg, tag, P9_RLINK)) { + (void) unlink(path.c_str()); + return false; + } + return true; +} + +bool virtio_p9fs_device::op_mkdir(virtq_unserializer &&msg, uint16_t tag) { + uint32_t dfid{}; + uint32_t mode{}; + uint32_t gid{}; + char name[P9_NAME_MAX]{}; + if (!msg.unpack(&dfid, &name, &mode, &gid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs mkdir: tag=%d dfid=%d name=%s mode=%d gid=%d\n", tag, dfid, name, mode, gid); +#endif + // Check if name is valid + if (!is_name_legal(name)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + p9_fid_state *dfidp = get_fid_state(dfid); + if (!dfidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Create the directory + const std::string path = join_path_name(dfidp->path, name); + if (mkdir(path.c_str(), static_cast(mode)) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // If we fail to change ownership, we silent ignore the error + if (lchown(path.c_str(), static_cast(dfidp->uid), static_cast(gid)) != 0) { + errno = 0; + } + // Get the path qid + stat_t st{}; + if (lstat(path.c_str(), &st) != 0) { + (void) rmdir(path.c_str()); + return send_error(msg, tag, host_errno_to_p9(errno)); + } + p9_qid qid = stat_to_qid(st); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&qid)) { + (void) rmdir(path.c_str()); + return send_error(msg, tag, P9_EPROTO); + } + if (!send_reply(std::move(out_msg), tag, P9_RMKDIR)) { + (void) rmdir(path.c_str()); + return false; + } + return true; +} + +bool virtio_p9fs_device::op_renameat(virtq_unserializer &&msg, uint16_t tag) { + uint32_t oldfid{}; + uint32_t newfid{}; + char oldname[P9_NAME_MAX]{}; + char newname[P9_NAME_MAX]{}; + if (!msg.unpack(&oldfid, &oldname, &newfid, &newname)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs renameat: tag=%d oldfid=%d oldname=%s newfid=%d newname=%s\n", tag, oldfid, oldname, + newfid, newname); +#endif + // Check if names are valid + if (!is_name_legal(oldname) || !is_name_legal(newname)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + const p9_fid_state *oldfidp = get_fid_state(oldfid); + const p9_fid_state *newfidp = get_fid_state(newfid); + if (!newfidp || !oldfidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Rename the file + const std::string oldpath = join_path_name(oldfidp->path, oldname); + const std::string newpath = join_path_name(newfidp->path, newname); + const int ret = rename(oldpath.c_str(), newpath.c_str()); + if (ret != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // Reply + if (!send_ok(msg, tag, P9_RRENAMEAT)) { + (void) rename(newpath.c_str(), oldpath.c_str()); + return false; + } + // Fix path for all fids starting with the old path + for (auto &pair : m_fids) { + p9_fid_state *fidp = &pair.second; + // Change fid path to the new path if it starts with old path + if (fidp->path.rfind(oldpath) == 0) { + fidp->path = newpath + fidp->path.substr(oldpath.length(), std::string::npos); + } + } + return true; +} + +bool virtio_p9fs_device::op_unlinkat(virtq_unserializer &&msg, uint16_t tag) { + uint32_t dfid{}; + uint32_t flags{}; + char name[P9_NAME_MAX]{}; + if (!msg.unpack(&dfid, &name, &flags)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs unlinkat: tag=%d dfid=%d name=%s flags=%d\n", tag, dfid, name, flags); +#endif + // Check if name is valid + if (!is_name_legal(name)) { + return send_error(msg, tag, P9_ENOENT); + } + // Get the fid state + const p9_fid_state *dfidp = get_fid_state(dfid); + if (!dfidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Remove the path + const std::string path = join_path_name(dfidp->path, name); + if (flags & P9_AT_REMOVEDIR) { + if (rmdir(path.c_str()) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } else { + if (unlink(path.c_str()) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + } + return send_ok(msg, tag, P9_RUNLINKAT); +} + +bool virtio_p9fs_device::op_version(virtq_unserializer &&msg, uint16_t tag) { + char version[32]{}; + uint32_t msize{}; + if (!msg.unpack(&msize, &version)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs version: tag=%d msize=%d version=%s\n", tag, m_msize, version); +#endif + // Set msize + m_msize = std::min(m_msize, P9_MAX_MSIZE); + // Reply with the protocol version we support + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + const char P9_PROTO_VERSION[] = "9P2000.L"; + if (!out_msg.pack(&m_msize, P9_PROTO_VERSION)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RVERSION); +} + +bool virtio_p9fs_device::op_attach(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint32_t afid{}; + uint32_t uid{}; + char uname[128]{}; + char aname[128]{}; + if (!msg.unpack(&fid, &afid, &uname, &aname, &uid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs attach: tag=%d fid=%d afid=%d uid=%d uname=%s aname=%s\n", tag, fid, afid, uid, uname, + aname); +#endif + // It's an error if the fid already exists + if (get_fid_state(fid)) { + return send_error(msg, tag, P9_EPROTO); + } + // Check if root path exists + stat_t st{}; + if (lstat(m_root_path.c_str(), &st) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // Create the new fid state + p9_fid_state *newfidp = &m_fids[fid]; + // Get the qid + p9_qid qid = stat_to_qid(st); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&qid)) { + (void) m_fids.erase(fid); + return send_error(msg, tag, host_errno_to_p9(errno)); + } + if (!send_reply(std::move(out_msg), tag, P9_RATTACH)) { + (void) m_fids.erase(fid); + return false; + } + // Update new fid state + *newfidp = p9_fid_state{uid, m_root_path, -1}; + return true; +} + +bool virtio_p9fs_device::op_walk(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint32_t newfid{}; + uint16_t nwname{}; + if (!msg.unpack(&fid, &newfid, &nwname)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs walk: tag=%d fid=%d newfid=%d nwname=%d\n", tag, fid, newfid, nwname); +#endif + // A maximum of sixteen name elements or qids may be packed in a single message + if (nwname > P9_MAXWELEM) { + return send_error(msg, tag, P9_EINVAL); + } + // Get the fid state, it must not have been opened for I/O by an open or create message + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // The newfid must not be in use unless it is the same as fid + if (newfid != fid && get_fid_state(newfid)) { + return send_error(msg, tag, P9_EPROTO); + } + // Get the start for the starting path and root path + stat_t st{}; + stat_t root_st{}; + if (lstat(fidp->path.c_str(), &st) != 0 || lstat(m_root_path.c_str(), &root_st) != 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + // Walk path retrieving qid for each name + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET + sizeof(uint16_t)); + std::string path = fidp->path; + uint16_t nwalked = 0; + for (; nwalked < nwname; ++nwalked) { + char namebuf[P9_NAME_MAX]{}; + if (!msg.unpack(&namebuf)) { + return send_error(msg, tag, P9_EPROTO); + } + const std::string &name = namebuf; + // Check if name is valid + if (name.empty() || name.find('/') != std::string::npos) { + return send_error(msg, tag, P9_ENOENT); + } + // A walk of the name ".." in the root directory is equivalent to a walk with no name elements + if (!(name == ".." && is_same_stat_ino(&root_st, &st)) && name != ".") { + std::string next_path; + if (name == "..") { + next_path = remove_path_name(path); + } else { + next_path = join_path_name(path, name); + } + // Get next path qid + if (lstat(next_path.c_str(), &st) != 0) { + // Return an error only for the first walk + if (nwalked == 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } else { + // Otherwise, stop walk on error + break; + } + } + path = std::move(next_path); + } + p9_qid wqid = stat_to_qid(st); + // Store the composed path qid in the reply message + if (!out_msg.pack(&wqid)) { + return send_error(msg, tag, P9_EPROTO); + } + } + // Create the new fid state + const uint32_t uid = fidp->uid; + p9_fid_state *newfidp = nullptr; + if (fid != newfid) { + fidp = nullptr; + newfidp = &m_fids[newfid]; + } else { + newfidp = fidp; + } + // Write amount of wqids in the reply message + out_msg.offset = P9_OUT_MSG_OFFSET; + if (!out_msg.pack(&nwalked)) { + if (fid != newfid) { + (void) m_fids.erase(newfid); + } + return send_error(msg, tag, P9_EPROTO); + } + // Reply + if (!send_reply(std::move(out_msg), tag, P9_RWALK)) { + if (fid != newfid) { + (void) m_fids.erase(newfid); + } + return false; + } + // Update the new fid state + *newfidp = p9_fid_state{uid, path, -1}; + return true; +} + +bool virtio_p9fs_device::op_read(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint64_t offset{}; + uint32_t count{}; + if (!msg.unpack(&fid, &offset, &count)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs read: tag=%d fid=%d offset=%ld count=%d\n", tag, fid, offset, count); +#endif + // Get the fid state, only file fids are accepted + const p9_fid_state *fidp = get_fid_state(fid); + if (!fidp || fidp->fd < 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Prepare temporary output buffer + std::array buf{}; + if (count > buf.size()) { + return send_error(msg, tag, P9_EPROTO); + } + // Read from fd + const ssize_t ret = pread(fidp->fd, buf.data(), static_cast(count), static_cast(offset)); + if (ret < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + uint32_t ret_count = static_cast(ret); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&ret_count) || !out_msg.write_bytes(buf.data(), count)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RREAD); +} + +bool virtio_p9fs_device::op_write(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + uint64_t offset{}; + uint32_t count{}; + if (!msg.unpack(&fid, &offset, &count)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs write: tag=%d fid=%d offset=%ld count=%d\n", tag, fid, offset, count); +#endif + // Get the fid state, only file fids are accepted + const p9_fid_state *fidp = get_fid_state(fid); + if (!fidp || fidp->fd < 0) { + return send_error(msg, tag, P9_EPROTO); + } + // Read from input buffer + std::array buf{}; + if (count > buf.size() || !msg.read_bytes(buf.data(), count)) { + return send_error(msg, tag, P9_EPROTO); + } + // Write to fd + const ssize_t ret = pwrite(fidp->fd, buf.data(), static_cast(count), static_cast(offset)); + if (ret < 0) { + return send_error(msg, tag, host_errno_to_p9(errno)); + } + uint32_t ret_count = static_cast(ret); + // Reply + virtq_serializer out_msg(msg.a, msg.vq, msg.queue_idx, msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&ret_count)) { + return send_error(msg, tag, P9_EPROTO); + } + return send_reply(std::move(out_msg), tag, P9_RREAD); +} + +bool virtio_p9fs_device::op_clunk(virtq_unserializer &&msg, uint16_t tag) { + uint32_t fid{}; + if (!msg.unpack(&fid)) { + return send_error(msg, tag, P9_EPROTO); + } +#ifdef DEBUG_VIRTIO_P9FS + (void) fprintf(stderr, "p9fs clunk: tag=%d fid=%d\n", tag, fid); +#endif + p9_fid_state *fidp = get_fid_state(fid); + if (!fidp) { + return send_error(msg, tag, P9_EPROTO); + } + // Close file descriptors + const int close_errno = close_fid_state(fidp); + // Remove from fid state list even on error + fidp = nullptr; + (void) m_fids.erase(fid); + // Propagate close error if any + if (close_errno != 0) { + return send_error(msg, tag, host_errno_to_p9(close_errno)); + } + // Reply + return send_ok(msg, tag, P9_RCLUNK); +} + +bool virtio_p9fs_device::send_reply(virtq_serializer &&out_msg, uint16_t tag, p9_opcode opcode) { +#ifdef DEBUG_VIRTIO_P9FS + if (opcode != P9_RLERROR) { + (void) fprintf(stderr, "p9fs send_reply: tag=%d opcode=%d\n", tag, opcode); + } +#endif + // Rewind message write offset to its start + out_msg.offset = 0; + // Write message header + uint32_t size = out_msg.length; + if (!out_msg.pack(&size, &opcode, &tag)) { + notify_device_needs_reset(out_msg.a); + return false; + } + // Consume the queue and notify the driver + if (!consume_and_notify_queue(out_msg.a, out_msg.queue_idx, out_msg.desc_idx, out_msg.length, 0)) { + notify_device_needs_reset(out_msg.a); + return false; + } + return true; +} + +bool virtio_p9fs_device::send_ok(const virtq_unserializer &in_msg, uint16_t tag, p9_opcode opcode) { + virtq_serializer out_msg(in_msg.a, in_msg.vq, in_msg.queue_idx, in_msg.desc_idx, P9_OUT_MSG_OFFSET); + return send_reply(std::move(out_msg), tag, opcode); +} + +bool virtio_p9fs_device::send_error(const virtq_unserializer &in_msg, uint16_t tag, p9_error error) { +#ifdef DEBUG_VIRTIO_P9FS + if (error == P9_EPROTO) { + (void) fprintf(stderr, "p9fs PROTOCOL ERROR: tag=%d\n", tag); + } else { + (void) fprintf(stderr, "p9fs send_error: tag=%d error=%d\n", tag, error); + } +#endif + virtq_serializer out_msg(in_msg.a, in_msg.vq, in_msg.queue_idx, in_msg.desc_idx, P9_OUT_MSG_OFFSET); + if (!out_msg.pack(&error)) { + notify_device_needs_reset(in_msg.a); + return false; + } + return send_reply(std::move(out_msg), tag, P9_RLERROR); +} + +} // namespace cartesi diff --git a/src/virtio-p9fs.h b/src/virtio-p9fs.h new file mode 100644 index 000000000..408cd7158 --- /dev/null +++ b/src/virtio-p9fs.h @@ -0,0 +1,453 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef VIRTIO_P9FS_H +#define VIRTIO_P9FS_H + +#include "virtio-device.h" +#include "virtio-serializer.h" + +#include + +namespace cartesi { + +/// \brief VirtIO Plan 9 filesystem features +enum virtio_p9fs_features : uint64_t { + VIRTIO_9P_F_MOUNT_TAG = (UINT64_C(1) << 0), ///< Mount tag supported. +}; + +/// \brief VirtIO Plan 9 filesystem constants +enum virtio_p9fs_constants : uint32_t { + P9_MAXWELEM = 16, ///< Maximum number of elements in a walk operation + P9_NAME_MAX = 256, ///< Maximum file name length + P9_PATH_MAX = 4096, ///< Maximum filesystem path length + P9_ROOT_PATH_MAX = 1024, ///< Maximum root path size + P9_MOUNT_TAG_MAX = VIRTIO_MAX_CONFIG_SPACE_SIZE - sizeof(uint16_t), ///< Maximum mount tag size + P9_IOUNIT_MAX = 8192, ///< Maximum buffer size for IO operations (read/write) + P9_IOUNIT_HEADER_SIZE = 24, ///< Message header size of IO operations (read/write) + P9_MAX_MSIZE = P9_IOUNIT_MAX + P9_IOUNIT_HEADER_SIZE, ///< Maximum message size, including message headers + P9_OUT_MSG_OFFSET = 7, ///< Offset for 9P reply messages +}; + +/// \brief 9P2000.L opcodes +enum p9_opcode : uint8_t { + P9_TLERROR = 6, + P9_RLERROR, + P9_TSTATFS = 8, + P9_RSTATFS, + P9_TLOPEN = 12, + P9_RLOPEN, + P9_TLCREATE = 14, + P9_RLCREATE, + P9_TSYMLINK = 16, + P9_RSYMLINK, + P9_TMKNOD = 18, + P9_RMKNOD, + P9_TRENAME = 20, + P9_RRENAME, + P9_TREADLINK = 22, + P9_RREADLINK, + P9_TGETATTR = 24, + P9_RGETATTR, + P9_TSETATTR = 26, + P9_RSETATTR, + P9_TXATTRWALK = 30, + P9_RXATTRWALK, + P9_TXATTRCREATE = 32, + P9_RXATTRCREATE, + P9_TREADDIR = 40, + P9_RREADDIR, + P9_TFSYNC = 50, + P9_RFSYNC, + P9_TLOCK = 52, + P9_RLOCK, + P9_TGETLOCK = 54, + P9_RGETLOCK, + P9_TLINK = 70, + P9_RLINK, + P9_TMKDIR = 72, + P9_RMKDIR, + P9_TRENAMEAT = 74, + P9_RRENAMEAT, + P9_TUNLINKAT = 76, + P9_RUNLINKAT, + P9_TVERSION = 100, + P9_RVERSION, + P9_TAUTH = 102, + P9_RAUTH, + P9_TATTACH = 104, + P9_RATTACH, + P9_TERROR = 106, + P9_RERROR, + P9_TFLUSH = 108, + P9_RFLUSH, + P9_TWALK = 110, + P9_RWALK, + P9_TOPEN = 112, + P9_ROPEN, + P9_TCREATE = 114, + P9_RCREATE, + P9_TREAD = 116, + P9_RREAD, + P9_TWRITE = 118, + P9_RWRITE, + P9_TCLUNK = 120, + P9_RCLUNK, + P9_TREMOVE = 122, + P9_RREMOVE, + P9_TSTAT = 124, + P9_RSTAT, + P9_TWSTAT = 126, + P9_RWSTAT, +}; + +/// \brief 9P2000.L errors +enum p9_error : uint32_t { + P9_EOK = 0, ///< No error + P9_EPERM = 1, ///< Operation not permitted + P9_ENOENT = 2, ///< No such file or directory + P9_ESRCH = 3, ///< No such process + P9_EINTR = 4, ///< Interrupted system call + P9_EIO = 5, ///< I/O error + P9_ENXIO = 6, ///< No such device or address + P9_E2BIG = 7, ///< Argument list too long + P9_ENOEXEC = 8, ///< Exec format error + P9_EBADF = 9, ///< Bad file number + P9_ECHILD = 10, ///< No child processes + P9_EAGAIN = 11, ///< Try again + P9_ENOMEM = 12, ///< Out of memory + P9_EACCES = 13, ///< Permission denied + P9_EFAULT = 14, ///< Bad address + P9_ENOTBLK = 15, ///< Block device required + P9_EBUSY = 16, ///< Device or resource busy + P9_EEXIST = 17, ///< File exists + P9_EXDEV = 18, ///< Cross-device link + P9_ENODEV = 19, ///< No such device + P9_ENOTDIR = 20, ///< Not a directory + P9_EISDIR = 21, ///< Is a directory + P9_EINVAL = 22, ///< Invalid argument + P9_ENFILE = 23, ///< File table overflow + P9_EMFILE = 24, ///< Too many open files + P9_ENOTTY = 25, ///< Not a typewriter + P9_ETXTBSY = 26, ///< Text file busy + P9_EFBIG = 27, ///< File too large + P9_ENOSPC = 28, ///< No space left on device + P9_ESPIPE = 29, ///< Illegal seek + P9_EROFS = 30, ///< Read-only file system + P9_EMLINK = 31, ///< Too many links + P9_EPIPE = 32, ///< Broken pipe + P9_EDOM = 33, ///< Math argument out of domain of func + P9_ERANGE = 34, ///< Math result not representable + P9_EDEADLK = 35, ///< Resource deadlock would occur + P9_ENAMETOOLONG = 36, ///< File name too long + P9_ENOLCK = 37, ///< No record locks available + P9_ENOSYS = 38, ///< Invalid system call number + P9_ENOTEMPTY = 39, ///< Directory not empty + P9_ELOOP = 40, ///< Too many symbolic links encountered + P9_EWOULDBLOCK = EAGAIN, ///< Operation would block + P9_ENOMSG = 42, ///< No message of desired type + P9_EIDRM = 43, ///< Identifier removed + P9_ECHRNG = 44, ///< Channel number out of range + P9_EL2NSYNC = 45, ///< Level 2 not synchronized + P9_EL3HLT = 46, ///< Level 3 halted + P9_EL3RST = 47, ///< Level 3 reset + P9_ELNRNG = 48, ///< Link number out of range + P9_EUNATCH = 49, ///< Protocol driver not attached + P9_ENOCSI = 50, ///< No CSI structure available + P9_EL2HLT = 51, ///< Level 2 halted + P9_EBADE = 52, ///< Invalid exchange + P9_EBADR = 53, ///< Invalid request descriptor + P9_EXFULL = 54, ///< Exchange full + P9_ENOANO = 55, ///< No anode + P9_EBADRQC = 56, ///< Invalid request code + P9_EBADSLT = 57, ///< Invalid slot + P9_EDEADLOCK = EDEADLK, + P9_EBFONT = 59, ///< Bad font file format + P9_ENOSTR = 60, ///< Device not a stream + P9_ENODATA = 61, ///< No data available + P9_ETIME = 62, ///< Timer expired + P9_ENOSR = 63, ///< Out of streams resources + P9_ENONET = 64, ///< Machine is not on the network + P9_ENOPKG = 65, ///< Package not installed + P9_EREMOTE = 66, ///< Object is remote + P9_ENOLINK = 67, ///< Link has been severed + P9_EADV = 68, ///< Advertise error + P9_ESRMNT = 69, ///< Srmount error + P9_ECOMM = 70, ///< Communication error on send + P9_EPROTO = 71, ///< Protocol error + P9_EMULTIHOP = 72, ///< Multihop attempted + P9_EDOTDOT = 73, ///< RFS specific error + P9_EBADMSG = 74, ///< Not a data message + P9_EOVERFLOW = 75, ///< Value too large for defined data type + P9_ENOTUNIQ = 76, ///< Name not unique on network + P9_EBADFD = 77, ///< File descriptor in bad state + P9_EREMCHG = 78, ///< Remote address changed + P9_ELIBACC = 79, ///< Can not access a needed shared library + P9_ELIBBAD = 80, ///< Accessing a corrupted shared library + P9_ELIBSCN = 81, ///< .lib section in a.out corrupted + P9_ELIBMAX = 82, ///< Attempting to link in too many shared libraries + P9_ELIBEXEC = 83, ///< Cannot exec a shared library directly + P9_EILSEQ = 84, ///< Illegal byte sequence + P9_ERESTART = 85, ///< Interrupted system call should be restarted + P9_ESTRPIPE = 86, ///< Streams pipe error + P9_EUSERS = 87, ///< Too many users + P9_ENOTSOCK = 88, ///< Socket operation on non-socket + P9_EDESTADDRREQ = 89, ///< Destination address required + P9_EMSGSIZE = 90, ///< Message too long + P9_EPROTOTYPE = 91, ///< Protocol wrong type for socket + P9_ENOPROTOOPT = 92, ///< Protocol not available + P9_EPROTONOSUPPORT = 93, ///< Protocol not supported + P9_ESOCKTNOSUPPORT = 94, ///< Socket type not supported + P9_EOPNOTSUPP = 95, ///< Operation not supported on transport endpoint + P9_EPFNOSUPPORT = 96, ///< Protocol family not supported + P9_EAFNOSUPPORT = 97, ///< Address family not supported by protocol + P9_EADDRINUSE = 98, ///< Address already in use + P9_EADDRNOTAVAIL = 99, ///< Cannot assign requested address + P9_ENETDOWN = 100, ///< Network is down + P9_ENETUNREACH = 101, ///< Network is unreachable + P9_ENETRESET = 102, ///< Network dropped connection because of reset + P9_ECONNABORTED = 103, ///< Software caused connection abort + P9_ECONNRESET = 104, ///< Connection reset by peer + P9_ENOBUFS = 105, ///< No buffer space available + P9_EISCONN = 106, ///< Transport endpoint is already connected + P9_ENOTCONN = 107, ///< Transport endpoint is not connected + P9_ESHUTDOWN = 108, ///< Cannot send after transport endpoint shutdown + P9_ETOOMANYREFS = 109, ///< Too many references: cannot splice + P9_ETIMEDOUT = 110, ///< Connection timed out + P9_ECONNREFUSED = 111, ///< Connection refused + P9_EHOSTDOWN = 112, ///< Host is down + P9_EHOSTUNREACH = 113, ///< No route to host + P9_EALREADY = 114, ///< Operation already in progress + P9_EINPROGRESS = 115, ///< Operation now in progress + P9_ESTALE = 116, ///< Stale file handle + P9_EUCLEAN = 117, ///< Structure needs cleaning + P9_ENOTNAM = 118, ///< Not a XENIX named type file + P9_ENAVAIL = 119, ///< No XENIX semaphores available + P9_EISNAM = 120, ///< Is a named type file + P9_EREMOTEIO = 121, ///< Remote I/O error + P9_EDQUOT = 122, ///< Quota exceeded + P9_ENOMEDIUM = 123, ///< No medium found + P9_EMEDIUMTYPE = 124, ///< Wrong medium type + P9_ECANCELED = 125, ///< Operation Canceled + P9_ENOKEY = 126, ///< Required key not available + P9_EKEYEXPIRED = 127, ///< Key has expired + P9_EKEYREVOKED = 128, ///< Key has been revoked + P9_EKEYREJECTED = 129, ///< Key was rejected by service + P9_EOWNERDEAD = 130, ///< Owner died + P9_ENOTRECOVERABLE = 131, ///< State not recoverable + P9_ERFKILL = 132, ///< Operation not possible due to RF-kill + P9_EHWPOISON = 133 ///< Memory page has hardware error +}; + +/// \brief 9P2000.L qid type +enum p9_qid_type : uint8_t { + P9_QID_FILE = 0x00, + P9_QID_LINK = 0x01, + P9_QID_SYMLINK = 0x02, + P9_QID_TMP = 0x04, + P9_QID_AUTH = 0x08, + P9_QID_MOUNT = 0x10, + P9_QID_EXCL = 0x20, + P9_QID_APPEND = 0x40, + P9_QID_DIR = 0x80, +}; + +/// \brief 9P2000.L open flags +enum p9_open_flags : uint32_t { + P9_O_RDONLY = 0x000000, + P9_O_WRONLY = 0x000001, + P9_O_RDWR = 0x000002, + P9_O_NOACCESS = 0x000003, + P9_O_CREAT = 0x000040, + P9_O_EXCL = 0x000080, + P9_O_NOCTTY = 0x000100, + P9_O_TRUNC = 0x000200, + P9_O_APPEND = 0x000400, + P9_O_NONBLOCK = 0x000800, + P9_O_DSYNC = 0x001000, + P9_O_FASYNC = 0x002000, + P9_O_DIRECT = 0x004000, + P9_O_LARGEFILE = 0x008000, + P9_O_DIRECTORY = 0x010000, + P9_O_NOFOLLOW = 0x020000, + P9_O_NOATIME = 0x040000, + P9_O_CLOEXEC = 0x080000, + P9_O_SYNC = 0x100000, +}; + +/// \brief 9P2000.L getattr flags +enum p9_getattr_flags : uint32_t { + P9_GETATTR_MODE = 0x0001, + P9_GETATTR_NLINK = 0x0002, + P9_GETATTR_UID = 0x0004, + P9_GETATTR_GID = 0x0008, + P9_GETATTR_RDEV = 0x0010, + P9_GETATTR_ATIME = 0x0020, + P9_GETATTR_MTIME = 0x0040, + P9_GETATTR_CTIME = 0x0080, + P9_GETATTR_INO = 0x0100, + P9_GETATTR_SIZE = 0x0200, + P9_GETATTR_BLOCKS = 0x0400, + P9_GETATTR_BTIME = 0x0800, + P9_GETATTR_GEN = 0x1000, + P9_GETATTR_DATA_VERSION = 0x2000, +}; + +/// \brief 9P2000.L setattr flags +enum p9_setattr_flags : uint32_t { + P9_SETATTR_MODE = 0x001, + P9_SETATTR_UID = 0x002, + P9_SETATTR_GID = 0x004, + P9_SETATTR_SIZE = 0x008, + P9_SETATTR_ATIME = 0x010, + P9_SETATTR_MTIME = 0x020, + P9_SETATTR_CTIME = 0x040, + P9_SETATTR_ATIME_SET = 0x080, + P9_SETATTR_MTIME_SET = 0x100 +}; + +/// \brief 9P2000.L at flags +enum p9_at_flags : uint32_t { P9_AT_REMOVEDIR = 0x200 }; + +/// \brief 9P2000.L lock flags +enum p9_lock_flags : uint8_t { P9_LOCK_FLAGS_BLOCK = 1, P9_LOCK_FLAGS_RECLAIM = 2 }; + +/// \brief 9P2000.L lock status +enum p9_lock_status : uint8_t { + P9_LOCK_SUCCESS = 0, + P9_LOCK_BLOCKED = 1, + P9_LOCK_ERROR = 2, + P9_LOCK_GRACE = 3, +}; + +/// \brief 9P2000.L lock type +enum p9_lock_type : uint8_t { + P9_LOCK_TYPE_RDLCK = 0, + P9_LOCK_TYPE_WRLCK = 1, + P9_LOCK_TYPE_UNLCK = 2, +}; + +#pragma pack(push, 1) +/// \brief 9P2000.L qid +/// \details A qid is a 13 byte value representing a unique file system object. +struct p9_qid { + uint8_t type; ///< File type (directory/symlink/file) + uint32_t version; ///< Cache version + uint64_t path; ///< The inode representing the path +}; + +/// \brief 9P2000.L file stat +struct p9_stat { + uint32_t mode; + uint32_t uid; + uint32_t gid; + uint64_t nlink; + uint64_t rdev; + uint64_t size; + uint64_t blksize; + uint64_t blocks; + uint64_t atime_sec; ///< Access time (seconds) + uint64_t atime_nsec; ///< Access time (nanoseconds) + uint64_t mtime_sec; ///< Modification time (seconds) + uint64_t mtime_nsec; ///< Modification time (nanoseconds) + uint64_t ctime_sec; ///< Status change time (seconds) + uint64_t ctime_nsec; ///< Status change time (nanoseconds) + uint64_t btime_sec; ///< Reserved for future use + uint64_t btime_nsec; ///< Reserved for future use + uint64_t gen; ///< Reserved for future use + uint64_t data_version; ///< Reserved for future use +}; +#pragma pack(pop) + +/// \brief 9P2000.L fid state +/// \details A fid is a file system object identifier, each one has its own state. +struct p9_fid_state { + uint32_t uid = 0; ///< Guest user id + std::string path; ///< File system path + int fd = -1; ///< Host file descriptor (valid only for opened files) + void *dirp = nullptr; ///< Host directory (valid only for opened directories) +}; + +/// \brief VirtIO Plan 9 filesystem configuration space +struct virtio_p9fs_config_space { + uint16_t mount_tag_len; ///< Length of mount tag + std::array mount_tag; ///< Mount tag, an arbitrary name used in mount command +}; + +/// \brief VirtIO Plan 9 filesystem device +class virtio_p9fs_device final : public virtio_device { + uint32_t m_msize = 0; + std::string m_root_path; + std::unordered_map m_fids; + +public: + virtio_p9fs_device(uint32_t virtio_idx, const std::string &mount_tag, const std::string &root_path); + ~virtio_p9fs_device() override; + virtio_p9fs_device(const virtio_p9fs_device &other) = delete; + virtio_p9fs_device(virtio_p9fs_device &&other) = delete; + virtio_p9fs_device &operator=(const virtio_p9fs_device &other) = delete; + virtio_p9fs_device &operator=(virtio_p9fs_device &&other) = delete; + + void on_device_reset() override; + void on_device_ok(i_device_state_access *a) override; + bool on_device_queue_available(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len, uint32_t write_avail_len) override; + + bool op_statfs(virtq_unserializer &&msg, uint16_t tag); + bool op_lopen(virtq_unserializer &&msg, uint16_t tag); + bool op_lcreate(virtq_unserializer &&msg, uint16_t tag); + bool op_symlink(virtq_unserializer &&msg, uint16_t tag); + bool op_mknod(virtq_unserializer &&msg, uint16_t tag); + bool op_readlink(virtq_unserializer &&msg, uint16_t tag); + bool op_getattr(virtq_unserializer &&msg, uint16_t tag); + bool op_setattr(virtq_unserializer &&msg, uint16_t tag); + bool op_readdir(virtq_unserializer &&msg, uint16_t tag); + bool op_fsync(virtq_unserializer &&msg, uint16_t tag); + bool op_lock(virtq_unserializer &&msg, uint16_t tag); + bool op_getlock(virtq_unserializer &&msg, uint16_t tag); + bool op_link(virtq_unserializer &&msg, uint16_t tag); + bool op_mkdir(virtq_unserializer &&msg, uint16_t tag); + bool op_renameat(virtq_unserializer &&msg, uint16_t tag); + bool op_unlinkat(virtq_unserializer &&msg, uint16_t tag); + bool op_version(virtq_unserializer &&msg, uint16_t tag); + bool op_attach(virtq_unserializer &&msg, uint16_t tag); + bool op_walk(virtq_unserializer &&msg, uint16_t tag); + bool op_read(virtq_unserializer &&msg, uint16_t tag); + bool op_write(virtq_unserializer &&msg, uint16_t tag); + bool op_clunk(virtq_unserializer &&msg, uint16_t tag); + + bool send_reply(virtq_serializer &&msg, uint16_t tag, p9_opcode opcode); + bool send_ok(const virtq_unserializer &in_msg, uint16_t tag, p9_opcode opcode); + bool send_error(const virtq_unserializer &in_msg, uint16_t tag, p9_error error); + + virtio_p9fs_config_space *get_config() { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return reinterpret_cast(config_space.data()); + } + + p9_fid_state *get_fid_state(uint32_t fid) { + auto it = m_fids.find(fid); + return (it != m_fids.end()) ? &it->second : nullptr; + } + + uint32_t get_iounit() const { + return std::min(m_msize - P9_IOUNIT_HEADER_SIZE, P9_IOUNIT_MAX); + } +}; + +} // namespace cartesi + +#endif diff --git a/src/virtio-serializer.h b/src/virtio-serializer.h new file mode 100644 index 000000000..4ee5207fd --- /dev/null +++ b/src/virtio-serializer.h @@ -0,0 +1,178 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef VIRTIO_SERIALIZER_H +#define VIRTIO_SERIALIZER_H + +#include "virtio-device.h" + +namespace cartesi { + +/// \brief Utility for unpacking formatted bytes from a Virtqueue buffer +struct virtq_unserializer { + i_device_state_access *a; + virtq &vq; + uint32_t queue_idx; + uint32_t desc_idx; + uint32_t offset; + + explicit virtq_unserializer(i_device_state_access *a, virtq &vq, uint32_t queue_idx, uint32_t desc_idx, + uint32_t offset = 0) : + a(a), + vq(vq), + queue_idx(queue_idx), + desc_idx(desc_idx), + offset(offset) {} + virtq_unserializer() = delete; + ~virtq_unserializer() = default; + virtq_unserializer(const virtq_unserializer &other) = delete; + virtq_unserializer(virtq_unserializer &&other) = delete; + virtq_unserializer &operator=(const virtq_unserializer &other) = delete; + virtq_unserializer &operator=(virtq_unserializer &&other) = delete; + + bool read_bytes(unsigned char *data, uint32_t data_len) { + if (!vq.read_desc_mem(a, desc_idx, offset, data, data_len)) { + return false; + } + // Advance + offset += data_len; + return true; + } + + template + bool read_value(T *pval) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + if (!vq.read_desc_mem(a, desc_idx, offset, reinterpret_cast(pval), sizeof(T))) { + return false; + } + // Advance + offset += sizeof(T); + return true; + } + + template + bool read_value(char (*pval)[N]) { + return read_u16_string(&pval[0], N); + } + + bool read_u16_string(void *data, uint16_t data_max_len) { + // Read the string size + uint16_t len = 0; + if (!read_value(&len)) { + return false; + } + // Check if data has enough space for string size plus the NULL termination character + if (len + 1 > data_max_len) { + return false; + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + auto *data_uchar = reinterpret_cast(data); + // Read the string + if (!vq.read_desc_mem(a, desc_idx, offset, data_uchar, len)) { + return false; + } + data_uchar[len] = 0; + // Advance + offset += len; + return true; + } + + template + bool unpack(Args... args) { + return (read_value(args) && ...); + } +}; + +/// \brief Utility for packing formatted bytes into a Virtqueue buffer +struct virtq_serializer { + i_device_state_access *a; + virtq &vq; + uint32_t queue_idx; + uint32_t desc_idx; + uint32_t offset; + uint32_t length; + + explicit virtq_serializer(i_device_state_access *a, virtq &vq, uint32_t queue_idx, uint32_t desc_idx, + uint32_t offset = 0) : + a(a), + vq(vq), + queue_idx(queue_idx), + desc_idx(desc_idx), + offset(offset), + length(offset) {} + virtq_serializer() = delete; + ~virtq_serializer() = default; + virtq_serializer(const virtq_serializer &other) = delete; + virtq_serializer(virtq_serializer &&other) = delete; + virtq_serializer &operator=(const virtq_serializer &other) = delete; + virtq_serializer &operator=(virtq_serializer &&other) = delete; + + bool write_bytes(const unsigned char *data, uint32_t data_len) { + if (!vq.write_desc_mem(a, desc_idx, offset, data, data_len)) { + return false; + } + // Advance + offset += data_len; + length = std::max(length, offset); + return true; + } + + bool write_u16_string(const void *data, uint16_t data_len) { + // Write the string size + if (!write_value(&data_len)) { + return false; + } + // Write the string + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + if (!vq.write_desc_mem(a, desc_idx, offset, reinterpret_cast(data), data_len)) { + return false; + } + // Advance + offset += data_len; + length = std::max(length, offset); + return true; + } + + template + bool write_value(const T *pval) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + if (!vq.write_desc_mem(a, desc_idx, offset, reinterpret_cast(pval), sizeof(T))) { + return false; + } + // Advance + offset += sizeof(T); + length = std::max(length, offset); + return true; + } + + bool write_value(const char *pval) { + return write_u16_string(pval, strlen(pval)); + } + + template + bool write_value(const char pval[N]) { + return write_u16_string(pval, strnlen(pval, N)); + } + + template + bool pack(Args... args) { + return (write_value(args) && ...); + } +}; + +} // namespace cartesi + +#endif