From 56b64d6e929a89c5758e364b13263e6ccaaad87a Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Thu, 4 Jan 2024 18:51:21 -0300 Subject: [PATCH] feat!: add VirtIO console device --- src/Makefile | 5 +- src/cartesi-machine.lua | 13 +- src/device-state-access.h | 10 +- src/dtb.cpp | 10 + src/htif.cpp | 3 +- src/i-device-state-access.h | 27 +- src/i-state-access.h | 35 +- src/interpret.cpp | 41 +- src/machine.cpp | 52 ++ src/machine.h | 36 ++ src/os.cpp | 305 +++++++++-- src/os.h | 68 ++- src/rtc.h | 5 +- src/state-access.h | 69 ++- src/virtio-console.cpp | 164 ++++++ src/virtio-console.h | 73 +++ src/virtio-device.cpp | 851 +++++++++++++++++++++++++++++ src/virtio-device.h | 415 ++++++++++++++ src/virtio-factory.cpp | 36 ++ src/virtio-factory.h | 35 ++ uarch/uarch-machine-state-access.h | 18 +- uarch/uarch-runtime.cpp | 7 +- 22 files changed, 2157 insertions(+), 121 deletions(-) create mode 100644 src/virtio-console.cpp create mode 100644 src/virtio-console.h create mode 100644 src/virtio-device.cpp create mode 100644 src/virtio-device.h create mode 100644 src/virtio-factory.cpp create mode 100644 src/virtio-factory.h diff --git a/src/Makefile b/src/Makefile index 1f7ee0025..0571dc635 100644 --- a/src/Makefile +++ b/src/Makefile @@ -316,6 +316,9 @@ LIBCARTESI_OBJS:= \ clint-factory.o \ plic.o \ plic-factory.o \ + virtio-factory.o \ + virtio-device.o \ + virtio-console.o \ dtb.o \ os.o \ htif.o \ @@ -481,7 +484,7 @@ test-c-api: c-api remote-cartesi-machine test-linux-workload: luacartesi $(LUA) ./cartesi-machine.lua -- "$(COVERAGE_WORKLOAD)" # Test interactive mode (to cover mcycle overwriting) - echo uname | $(LUA) ./cartesi-machine.lua -i sh + echo uname | $(LUA) ./cartesi-machine.lua -it sh # Test max mcycle (to cover max mcycle branch) $(LUA) ./cartesi-machine.lua --max-mcycle=1 diff --git a/src/cartesi-machine.lua b/src/cartesi-machine.lua index f9a8ce6ac..97a9ce118 100755 --- a/src/cartesi-machine.lua +++ b/src/cartesi-machine.lua @@ -654,18 +654,13 @@ local options = { function(all) if not all then return false end htif_console_getchar = true + -- Switch from HTIF Console (hvc0) to VirtIO console (hvc1) + bootargs = bootargs:gsub("console=hvc0", "console=hvc1") + -- Expose current terminal features to the virtual terminal local term, lang, lc_all = os.getenv("TERM"), os.getenv("LANG"), os.getenv("LC_ALL") if term then append_init = append_init .. "export TERM=" .. term .. "\n" end if lang then append_init = append_init .. "export LANG=" .. lang .. "\n" end if lc_all then append_init = append_init .. "export LC_ALL=" .. lc_all .. "\n" end - local stty = assert(io.popen("stty size")) - local line = assert(stty:read(), "command failed: stty size") - if line then - local rows, cols = line:match("^([0-9]+) ([0-9]+)$") - if rows and cols then - append_init = append_init .. "busybox stty rows " .. rows .. " cols " .. cols .. "\n" - end - end return true end, }, @@ -896,7 +891,7 @@ local options = { flash_length.root = nil flash_shared.root = nil table.remove(flash_label_order, 1) - bootargs = "quiet earlycon=sbi console=hvc0" + bootargs = bootargs:gsub(" rootfstype=.*$", "") return true end, }, diff --git a/src/device-state-access.h b/src/device-state-access.h index c761b1947..e2a90a8f7 100644 --- a/src/device-state-access.h +++ b/src/device-state-access.h @@ -139,11 +139,13 @@ class device_state_access : public i_device_state_access { return m_a.read_htif_iyield(); } - // LCOV_EXCL_START - void do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t log2_length) override { - return m_a.write_memory(paddr, data, log2_length); + bool do_read_memory(uint64_t paddr, unsigned char *data, uint64_t length) override { + return m_a.read_memory(paddr, data, length); + } + + bool do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t length) override { + return m_a.write_memory(paddr, data, length); } - // LCOV_EXCL_STOP uint64_t do_read_pma_istart(int p) override { return m_a.read_pma_istart(p); diff --git a/src/dtb.cpp b/src/dtb.cpp index ab48521f4..a59a5b98b 100644 --- a/src/dtb.cpp +++ b/src/dtb.cpp @@ -141,6 +141,16 @@ void dtb_init(const machine_config &c, unsigned char *dtb_start, uint64_t dtb_le fdt.prop_u32_list<2>("interrupts-extended", {INTC_PHANDLE, X_HOST}); fdt.end_node(); } + if (c.htif.console_getchar) { // virtio console + const uint32_t virtio_idx = 0; + const uint64_t virtio_paddr = PMA_FIRST_VIRTIO_START + virtio_idx * PMA_VIRTIO_LENGTH; + const uint32_t plic_irq_id = virtio_idx + 1; + fdt.begin_node_num("virtio", virtio_paddr); + fdt.prop_string("compatible", "virtio,mmio"); + fdt.prop_u64_list<2>("reg", {virtio_paddr, PMA_VIRTIO_LENGTH}); + fdt.prop_u32_list<2>("interrupts-extended", {PLIC_PHANDLE, plic_irq_id}); + fdt.end_node(); + } fdt.end_node(); } diff --git a/src/htif.cpp b/src/htif.cpp index d124b1feb..e9d64cb0d 100644 --- a/src/htif.cpp +++ b/src/htif.cpp @@ -109,7 +109,8 @@ static execute_status htif_console(htif_runtime_config *runtime_config, i_device // to every participant in a dispute: where would c come from? So if the code reached here in the // blockchain, there must be some serious bug // In interactive mode, we just get the next character from the console and send it back in the ack - const int c = os_getchar(); + os_poll_tty(0); + const int c = os_getchar() + 1; a->write_htif_fromhost(HTIF_BUILD(HTIF_DEVICE_CONSOLE, cmd, c)); } } diff --git a/src/i-device-state-access.h b/src/i-device-state-access.h index 190ca1655..b26198f14 100644 --- a/src/i-device-state-access.h +++ b/src/i-device-state-access.h @@ -168,17 +168,27 @@ class i_device_state_access { return do_read_htif_iyield(); } - // LCOV_EXCL_START + /// \brief Reads a chunk of data from a memory PMA range. + /// \param address Target physical address. + /// \param data Receives chunk of memory. + /// \param length Size of chunk. + /// \returns True if PMA was found and memory fully read, false otherwise. + /// \details The entire chunk of data must fit inside the same memory + /// PMA range, otherwise it fails. The search for the PMA range is implicit, and not logged. + bool read_memory(uint64_t paddr, unsigned char *data, uint64_t length) { + return do_read_memory(paddr, data, length); + } + /// \brief Writes a chunk of data to a memory PMA range. - /// \param paddr Target physical address. Must be aligned to data size. + /// \param paddr Target physical address. /// \param data Pointer to chunk of data. - /// \param log2_size Log 2 of data size. Must be >= 3 and < 64. + /// \param length Size of chunk. + /// \returns True if PMA was found and memory fully written, false otherwise. /// \details The entire chunk of data must fit inside the same memory - /// PMA range. The search for the PMA range is implicit, and not logged. - void write_memory(uint64_t paddr, const unsigned char *data, uint64_t log2_size) { - return do_write_memory(paddr, data, log2_size); + /// PMA range, otherwise it fails. The search for the PMA range is implicit, and not logged. + bool write_memory(uint64_t paddr, const unsigned char *data, uint64_t length) { + return do_write_memory(paddr, data, length); } - // LCOV_EXCL_STOP /// \brief Reads the istart field of a PMA entry /// \param p Index of PMA @@ -213,7 +223,8 @@ class i_device_state_access { virtual uint64_t do_read_htif_ihalt(void) = 0; virtual uint64_t do_read_htif_iconsole(void) = 0; virtual uint64_t do_read_htif_iyield(void) = 0; - virtual void do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t log2_size) = 0; + virtual bool do_read_memory(uint64_t paddr, unsigned char *data, uint64_t length) = 0; + virtual bool do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t length) = 0; virtual uint64_t do_read_pma_istart(int p) = 0; virtual uint64_t do_read_pma_ilength(int p) = 0; }; diff --git a/src/i-state-access.h b/src/i-state-access.h index 7b2862019..000454193 100644 --- a/src/i-state-access.h +++ b/src/i-state-access.h @@ -602,11 +602,16 @@ class i_state_access { // CRTP return derived().do_read_htif_iyield(); } - /// \brief Polls console for pending input. + /// \brief Poll for external interrupts. /// \param mcycle Current machine mcycle. - /// \returns The new machine mcycle advanced by the relative time elapsed while polling. - uint64_t poll_console(uint64_t mcycle) { - return derived().do_poll_console(mcycle); + /// \param mcycle_max Maximum mcycle to wait for interrupts. + /// \returns A pair, the first value is the new machine mcycle advanced by the relative elapsed time while + /// polling, the second value is a boolean that is true when the poll is stopped due do an external interrupt + /// request. + /// \details When mcycle_max is greater than mcycle, this function will sleep until an external interrupt + /// is triggered or mcycle_max relative elapsed time is reached. + std::pair poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { + return derived().do_poll_external_interrupts(mcycle, mcycle_max); } /// \brief Reads PMA at a given index. @@ -628,14 +633,26 @@ class i_state_access { // CRTP return derived().do_read_pma_ilength(p); } + /// \brief Reads a chunk of data from a memory PMA range. + /// \param paddr Target physical address. + /// \param data Receives chunk of memory. + /// \param length Size of chunk. + /// \returns True if PMA was found and memory fully read, false otherwise. + /// \details The entire chunk of data must fit inside the same memory + /// PMA range, otherwise it fails. The search for the PMA range is implicit, and not logged. + bool read_memory(uint64_t paddr, unsigned char *data, uint64_t length) { + return derived().do_read_memory(paddr, data, length); + } + /// \brief Writes a chunk of data to a memory PMA range. - /// \param paddr Target physical address. Must be aligned to data size. + /// \param paddr Target physical address. /// \param data Pointer to chunk of data. - /// \param log2_size Log 2 of data length. Must be >= 3 and < 64. + /// \param length Size of chunk. + /// \returns True if PMA was found and memory fully written, false otherwise. /// \details The entire chunk of data must fit inside the same memory - /// PMA range. The search for the PMA range is implicit, and not logged. - void write_memory(uint64_t paddr, const unsigned char *data, uint64_t log2_size) { - return derived().do_write_memory(paddr, data, log2_size); + /// PMA range, otherwise it fails. The search for the PMA range is implicit, and not logged. + bool write_memory(uint64_t paddr, const unsigned char *data, uint64_t length) { + return derived().do_write_memory(paddr, data, length); } /// \brief Reads a word from memory. diff --git a/src/interpret.cpp b/src/interpret.cpp index 1986edbc9..c30ff94b4 100644 --- a/src/interpret.cpp +++ b/src/interpret.cpp @@ -525,12 +525,10 @@ static inline uint64_t raise_interrupt_if_any(STATE_ACCESS &a, uint64_t pc) { /// \param mcycle Machine current cycle. template static inline void set_rtc_interrupt(STATE_ACCESS &a, uint64_t mcycle) { - if (rtc_is_tick(mcycle)) { - const uint64_t timecmp_cycle = rtc_time_to_cycle(a.read_clint_mtimecmp()); - if (timecmp_cycle <= mcycle && timecmp_cycle != 0) { - const uint64_t mip = a.read_mip(); - a.write_mip(mip | MIP_MTIP_MASK); - } + const uint64_t timecmp_cycle = rtc_time_to_cycle(a.read_clint_mtimecmp()); + if (timecmp_cycle <= mcycle && timecmp_cycle != 0) { + const uint64_t mip = a.read_mip(); + a.write_mip(mip | MIP_MTIP_MASK); } } @@ -912,7 +910,8 @@ static NO_INLINE std::pair write_virtual_memory_slow(S return {execute_status::success, pc}; } else if (likely(pma.get_istart_IO())) { const uint64_t offset = paddr - pma.get_start(); - auto status = a.write_device(pma, mcycle, offset, val64, log2_size::value); + auto status = + a.write_device(pma, mcycle, offset, static_cast(static_cast(val64)), log2_size::value); // If we do not know how to write, we treat this as a PMA violation if (likely(status != execute_status::failure)) { return {status, pc}; @@ -2617,9 +2616,19 @@ static FORCE_INLINE execute_status execute_WFI(STATE_ACCESS &a, uint64_t &pc, ui if (unlikely(priv == PRV_U || (priv < PRV_M && (mstatus & MSTATUS_TW_MASK)))) { return raise_illegal_insn_exception(a, pc, insn); } - // Poll console, this may advance mcycle when in interactive mode - mcycle = a.poll_console(mcycle); - return advance_to_next_insn(a, pc); + // We wait for interrupts until the next timer interrupt. + const uint64_t mcycle_max = rtc_time_to_cycle(a.read_clint_mtimecmp()); + execute_status status = execute_status::success; + if (mcycle_max > mcycle) { + // Poll for external interrupts (e.g console or network), + // this may advance mcycle only when interactive mode is enabled + const auto [next_mcycle, interrupted] = a.poll_external_interrupts(mcycle, mcycle_max); + mcycle = next_mcycle; + if (interrupted) { + status = execute_status::success_and_serve_interrupts; + } + } + return advance_to_next_insn(a, pc, status); } /// \brief Implementation of the FENCE instruction. @@ -5535,8 +5544,16 @@ NO_INLINE void interpret_loop(STATE_ACCESS &a, uint64_t mcycle_end, uint64_t mcy while (mcycle < mcycle_end) { INC_COUNTER(a.get_statistics(), outer_loop); - // Set interrupt flag for RTC - set_rtc_interrupt(a, mcycle); + if (rtc_is_tick(mcycle)) { + // Set interrupt flag for RTC + set_rtc_interrupt(a, mcycle); + + // Polling external interrupts only in WFI instructions is not enough + // because Linux won't execute WFI instructions while under heavy load, + // yet external interrupts still need to be triggered. + // Therefore we poll for external interrupt once a while in the interpreter loop. + a.poll_external_interrupts(mcycle, mcycle); + } // Raise the highest priority pending interrupt, if any pc = raise_interrupt_if_any(a, pc); diff --git a/src/machine.cpp b/src/machine.cpp index 82ab4e7f0..ab7760036 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -45,6 +45,8 @@ #include "uarch-step-state-access.h" #include "uarch-step.h" #include "unique-c-ptr.h" +#include "virtio-console.h" +#include "virtio-factory.h" /// \file /// \brief Cartesi machine implementation @@ -438,6 +440,16 @@ machine::machine(const machine_config &c, const machine_runtime_config &r) : // Register pma board shadow device register_pma_entry(make_shadow_pmas_pma_entry(PMA_SHADOW_PMAS_START, PMA_SHADOW_PMAS_LENGTH)); + // TODO(edubart): user should be able to configure these devices + if (m_c.htif.console_getchar) { + // Register VirtIO console device + auto vdev_console = std::make_unique(m_vdevs.size()); + register_pma_entry( + make_virtio_pma_entry(PMA_FIRST_VIRTIO_START + vdev_console->get_virtio_index() * PMA_VIRTIO_LENGTH, + PMA_VIRTIO_LENGTH, "VirtIO console device", &virtio_driver, vdev_console.get())); + m_vdevs.push_back(std::move(vdev_console)); + } + // Initialize DTB if (m_c.dtb.image_filename.empty()) { // Write the FDT (flattened device tree) into DTB @@ -494,6 +506,11 @@ machine::machine(const machine_config &c, const machine_runtime_config &r) : // Sort it by increasing start address std::sort(m_mrds.begin(), m_mrds.end(), [](const machine_memory_range_descr &a, const machine_memory_range_descr &b) { return a.start < b.start; }); + + // Disable SIGPIPE handler, because this signal be raised and terminate the emulator process, + // when calling write() on closed file descriptors. + // This can happen with the stdout console file descriptors or network file descriptors. + os_disable_sigpipe(); } static void load_hash(const std::string &dir, machine::hash_type &h) { @@ -520,6 +537,41 @@ machine::machine(const std::string &dir, const machine_runtime_config &r) : mach } } +void machine::prepare_virtio_devices_select(select_fd_sets *fds, uint64_t *timeout_us) { + for (auto &vdev : m_vdevs) { + vdev->prepare_select(fds, timeout_us); + } +} + +bool machine::poll_selected_virtio_devices(int select_ret, select_fd_sets *fds, i_device_state_access *da) { + bool interrupt_requested = false; + for (auto &vdev : m_vdevs) { + interrupt_requested |= vdev->poll_selected(select_ret, fds, da); + } + return interrupt_requested; +} + +bool machine::poll_virtio_devices(uint64_t *timeout_us, i_device_state_access *da) { + return os_select_fds( + [&](select_fd_sets *fds, uint64_t *timeout_us) -> void { prepare_virtio_devices_select(fds, timeout_us); }, + [&](int select_ret, select_fd_sets *fds) -> bool { return poll_selected_virtio_devices(select_ret, fds, da); }, + timeout_us); +} + +bool machine::has_virtio_devices() const { + return !m_vdevs.empty(); +} + +bool machine::has_virtio_console() const { + // When present, the console device is guaranteed to be the first VirtIO device, + // therefore we only need to check the first device. + return !m_vdevs.empty() && m_vdevs[0]->get_device_id() == VIRTIO_DEVICE_CONSOLE; +} + +bool machine::has_htif_console() const { + return static_cast(read_htif_iconsole() & (1 << HTIF_CONSOLE_GETCHAR)); +} + machine_config machine::get_serialization_config(void) const { // Initialize with copy of original config machine_config c = m_c; diff --git a/src/machine.h b/src/machine.h index 751fd3a98..391ffde15 100644 --- a/src/machine.h +++ b/src/machine.h @@ -22,6 +22,8 @@ #include +#include + #include "access-log.h" #include "htif.h" #include "interpret.h" @@ -30,8 +32,10 @@ #include "machine-merkle-tree.h" #include "machine-runtime-config.h" #include "machine-state.h" +#include "os.h" #include "uarch-interpret.h" #include "uarch-machine.h" +#include "virtio-device.h" namespace cartesi { @@ -62,6 +66,8 @@ class machine final { machine_runtime_config m_r; ///< Copy of initialization runtime config machine_memory_range_descrs m_mrds; ///< List of memory ranges returned by get_memory_ranges(). + boost::container::static_vector, VIRTIO_MAX> m_vdevs; ///< Array of VirtIO devices + static const pma_entry::flags m_dtb_flags; ///< PMA flags used for DTB static const pma_entry::flags m_ram_flags; ///< PMA flags used for RAM static const pma_entry::flags m_flash_drive_flags; ///< PMA flags used for flash drives @@ -288,6 +294,36 @@ class machine final { /// \brief Destructor. ~machine(); + /// \brief Fill file descriptors to be polled by select() for all VirtIO devices. + /// \param fds Pointer to sets of read, write and except file descriptors to be updated. + /// \param timeout_us Maximum amount of time to wait in microseconds, this may be updated (always to lower values). + void prepare_virtio_devices_select(select_fd_sets *fds, uint64_t *timeout_us); + + /// \brief Poll file descriptors that were marked as ready by select() for all VirtIO devices. + /// \param select_ret Return value from the most recent select() call. + /// \param fds Pointer to sets of read, write and except file descriptors to be checked. + /// \returns True if an interrupt was requested, false otherwise. + /// \details This function process pending events and trigger interrupt requests (if any). + bool poll_selected_virtio_devices(int select_ret, select_fd_sets *fds, i_device_state_access *da); + + /// \brief Poll file descriptors through select() for all VirtIO devices. + /// \details Basically call prepare_virtio_devices_select(), select() and poll_selected_virtio_devices(). + /// \param timeout_us Maximum amount of time to wait in microseconds, this may be updated (always to lower values). + /// \returns True if an interrupt was requested, false otherwise. + bool poll_virtio_devices(uint64_t *timeout_us, i_device_state_access *da); + + /// \brief Checks if the machine has VirtIO devices. + /// \returns True if at least one VirtIO device is present. + bool has_virtio_devices() const; + + /// \brief Checks if the machine has VirtIO console device. + /// \returns True if at least one VirtIO console is present. + bool has_virtio_console() const; + + /// \brief Checks if the machine has HTIF console device. + /// \returns True if HTIF console is present. + bool has_htif_console() const; + /// \brief Update the Merkle tree so it matches the contents of the machine state. /// \returns true if succeeded, false otherwise. bool update_merkle_tree(void) const; diff --git a/src/os.cpp b/src/os.cpp index 0236f6684..aa51903bc 100644 --- a/src/os.cpp +++ b/src/os.cpp @@ -26,18 +26,24 @@ #define HAVE_TERMIOS #endif +#if !defined(_WIN32) && !defined(__wasi__) && !defined(NO_IOCTL) +#define HAVE_IOCTL +#endif + #if !defined(_WIN32) && !defined(__wasi__) && !defined(NO_MMAP) #define HAVE_MMAP #endif -#if !defined(_WIN32) && !defined(NO_MKDIR) +#if !defined(NO_MKDIR) #define HAVE_MKDIR #endif #include #include +#include #include #include +#include #include #include #include @@ -58,6 +64,9 @@ #ifdef HAVE_TERMIOS #include // tcgetattr/tcsetattr +#ifdef HAVE_IOCTL +#include // ioctl +#endif #endif #ifdef HAVE_MMAP @@ -86,6 +95,9 @@ #define STDOUT_FILENO 0 #endif +#define plat_write _write +#define plat_mkdir(a, mode) _mkdir(a) + #else // not _WIN32 #if defined(HAVE_TTY) || defined(HAVE_MMAP) || defined(HAVE_TERMIOS) @@ -96,8 +108,14 @@ #include // select #endif +#define plat_write write +#define plat_mkdir mkdir + #endif // _WIN32 +// Enable these defines to debug +// #define DEBUG_OS + namespace cartesi { using namespace std::string_literals; @@ -106,9 +124,12 @@ using namespace std::string_literals; /// \brief TTY global state struct tty_state { bool initialized{false}; - std::array buf{}; // Characters in console input buffer + bool resize_pending{false}; + std::array buf{}; // Characters in console input buffer intptr_t buf_pos{}; intptr_t buf_len{}; + unsigned short cols{TTY_DEFAULT_COLS}; + unsigned short rows{TTY_DEFAULT_ROWS}; #ifdef HAVE_TERMIOS int ttyfd{-1}; termios oldtty{}; @@ -153,21 +174,71 @@ static int get_ttyfd(void) { } #endif // HAVE_TERMIOS +/// \brief Signal raised whenever TTY size changes +static void os_SIGWINCH_handler(int sig) { + (void) sig; + auto *s = get_state(); + if (!s->initialized) { + return; + } + // It's not safe to do complex logic in signal handlers, + // therefore we will actually update the console size in the next get size request. + s->resize_pending = true; +} + +bool os_update_tty_size(tty_state *s) { +#ifdef HAVE_TTY +#if defined(HAVE_TERMIOS) && defined(HAVE_IOCTL) + winsize ws{}; + if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) == 0) { + if (ws.ws_col >= 1 && ws.ws_row >= 1) { + s->cols = ws.ws_col; + s->rows = ws.ws_row; + return true; + } + } else { +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_update_tty_size(): ioctl TIOCGWINSZ failed\n"); +#endif + } + +#else + // TODO(edubart): what to do on Windows and MacOS? + +#endif // defined(HAVE_TERMIOS) && defined(HAVE_IOCTL) +#endif // HAVE_TTY + return false; +} + void os_open_tty(void) { #ifdef HAVE_TTY auto *s = get_state(); + if (s->initialized) { + // Already initialized + return; + } + s->initialized = true; #ifdef HAVE_TERMIOS if (s->ttyfd >= 0) { // Already open +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): tty already open\n"); +#endif return; } const int ttyfd = get_ttyfd(); if (ttyfd < 0) { // Failed to open tty fd +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): get_tty() failed\n"); +#endif return; } struct termios tty {}; if (tcgetattr(ttyfd, &tty) < 0) { // Failed to retrieve old mode +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): failed retrieve old mode\n"); +#endif close(ttyfd); return; } @@ -196,6 +267,9 @@ void os_open_tty(void) { tty.c_cc[VMIN] = 1; tty.c_cc[VTIME] = 0; if (tcsetattr(ttyfd, TCSANOW, &tty) < 0) { // Failed to set raw mode +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): failed to set raw mode\n"); +#endif close(ttyfd); return; } @@ -204,6 +278,9 @@ void os_open_tty(void) { // Get stdin handle s->hStdin = GetStdHandle(STD_INPUT_HANDLE); if (!s->hStdin) { +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): GetStdHandle() failed\n"); +#endif return; } // Set console in raw mode @@ -211,10 +288,28 @@ void os_open_tty(void) { DWORD dwMode = s->dwOldStdinMode; dwMode &= ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT); dwMode |= ENABLE_VIRTUAL_TERMINAL_INPUT; - SetConsoleMode(s->hStdin, dwMode); + if (!SetConsoleMode(s->hStdin, dwMode)) { +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): SetConsoleMode() failed\n"); +#endif + } } #endif // HAVE_TERMIOS + // Get tty initial size + os_update_tty_size(s); + + // TODO(edubart): does this handler works on Windows and MacOS? + // Install console resize signal handler + struct sigaction sigact {}; + sigact.sa_flags = SA_RESTART; + sigact.sa_handler = os_SIGWINCH_handler; + if (sigemptyset(&sigact.sa_mask) != 0 || sigaction(SIGWINCH, &sigact, nullptr) != 0) { +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_open_tty(): failed to install SIGWINCH handler\n"); +#endif + } + #else throw std::runtime_error("unable to open console input, stdin is unsupported in this platform"); #endif // HAVE_TTY @@ -241,39 +336,70 @@ void os_close_tty(void) { #endif // HAVE_TTY } -void os_poll_tty(uint64_t wait) { +void os_get_tty_size(uint16_t *pwidth, uint16_t *pheight) { + auto *s = get_state(); + if (!s->initialized) { + // fallback values + *pwidth = TTY_DEFAULT_COLS; + *pheight = TTY_DEFAULT_ROWS; + return; + } + // Update console size after a SIGWINCH signal + if (s->resize_pending) { + if (os_update_tty_size(s)) { + s->resize_pending = false; + } + } + *pwidth = s->cols; + *pheight = s->rows; +} + +void os_prepare_tty_select(select_fd_sets *fds) { #ifdef HAVE_TTY auto *s = get_state(); + // Ignore if TTY is not initialized or stdin was closed if (!s->initialized) { - throw std::runtime_error("can't poll console input, it is not initialized"); + return; } +#ifndef _WIN32 + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + fd_set *readfds = reinterpret_cast(fds->readfds); + FD_SET(STDIN_FILENO, readfds); + if (STDIN_FILENO > fds->maxfd) { + fds->maxfd = STDIN_FILENO; + } +#else + (void) data; +#endif +#endif +} + +bool os_poll_selected_tty(int select_ret, select_fd_sets *fds) { + auto *s = get_state(); + if (!s->initialized) { // We can't poll when TTY is not initialized + return false; + } + // If we have characters left in buffer, we don't need to obtain more characters if (s->buf_pos < s->buf_len) { - // Input buffer still has pending characters to be read - return; + return true; } #ifdef _WIN32 - s->buf_len = -1; + intptr_t len = -1; if (s->hStdin) { - // Wait for an input event - const uint64_t wait_millis = (wait + 999) / 1000; - if (WaitForSingleObject(s->hStdin, wait_millis) != WAIT_OBJECT_0) { - // No input events - return; - } // Consume input events until buffer is full or the event list is empty INPUT_RECORD inputRecord{}; DWORD numberOfEventsRead = 0; while (PeekConsoleInput(s->hStdin, &inputRecord, 1, &numberOfEventsRead)) { if (numberOfEventsRead == 0) { // Nothing to read - return; + return false; } else if (inputRecord.EventType == KEY_EVENT && inputRecord.Event.KeyEvent.bKeyDown) { // Key was pressed DWORD numberOfCharsRead = 0; // We must read input buffer through ReadConsole() to read raw terminal input if (ReadConsole(s->hStdin, s->buf.data(), s->buf.size(), &numberOfCharsRead, NULL)) { - s->buf_len = static_cast(numberOfCharsRead); + len = static_cast(numberOfCharsRead); } break; } else { @@ -282,49 +408,78 @@ void os_poll_tty(uint64_t wait) { } } } - #else - const int fd_max{0}; - fd_set rfds{}; - timeval tv{}; - tv.tv_usec = static_cast(wait); - FD_ZERO(&rfds); // NOLINT: suppress cause on MacOSX it resolves to __builtin_bzero - FD_SET(STDIN_FILENO, &rfds); - if (select(fd_max + 1, &rfds, nullptr, nullptr, &tv) <= 0 || !FD_ISSET(0, &rfds)) { - // Nothing to read - return; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + fd_set *readfds = reinterpret_cast(fds->readfds); + // If the stdin file description is not ready, we can't obtain more characters + if (select_ret <= 0 || !FD_ISSET(STDIN_FILENO, readfds)) { + return false; } - s->buf_len = static_cast(read(STDIN_FILENO, s->buf.data(), s->buf.size())); + const intptr_t len = static_cast(read(STDIN_FILENO, s->buf.data(), s->buf.size())); #endif // _WIN32 // If stdin is closed, pass EOF to client - if (s->buf_len <= 0) { + if (len <= 0) { s->buf_len = 1; - s->buf[0] = 4; // CTRL+D + s->buf[0] = TTY_CTRL_D; + } else { + s->buf_len = len; } s->buf_pos = 0; + return true; +} + +bool os_poll_tty(uint64_t timeout_us) { +#ifdef _WIN32 + auto *s = get_state(); + if (!s->initialized) { // We can't poll when TTY is not initialized + return false; + } + if (s->hStdin) { + // Wait for an input event + const uint64_t wait_millis = (wait_us + 999) / 1000; + if (WaitForSingleObject(s->hStdin, wait_millis) != WAIT_OBJECT_0) { + // No input events + return false; + } + } + return os_poll_selected_tty(-1, nullptr); #else - (void) wait; - throw std::runtime_error("can't poll console input, it is unsupported in this platform"); -#endif // HAVE_TTY + return os_select_fds( + [](select_fd_sets *fds, const uint64_t *timeout_us) -> void { + (void) timeout_us; + os_prepare_tty_select(fds); + }, + [](int select_ret, select_fd_sets *fds) -> bool { return os_poll_selected_tty(select_ret, fds); }, &timeout_us); + +#endif // _WIN32 } int os_getchar(void) { #ifdef HAVE_TTY auto *s = get_state(); if (!s->initialized) { - throw std::runtime_error("can't get char, console input is not initialized"); + return -1; } - os_poll_tty(0); if (s->buf_pos < s->buf_len) { - return s->buf[s->buf_pos++] + 1; + return s->buf[s->buf_pos++]; } -#else - throw std::runtime_error("can't get char, console input is unsupported in this platform"); #endif // HAVE_TTY - return 0; + return -1; +} + +size_t os_getchars(unsigned char *data, size_t max_len) { + size_t i = 0; + for (; i < max_len; ++i) { + const int c = os_getchar(); + if (c < 0) { + break; + } + data[i] = c; + } + return i; } static void fputc_with_line_buffering(uint8_t ch) { @@ -346,27 +501,24 @@ void os_putchar(uint8_t ch) { } else { // In interactive sessions we want to immediately write the character to stdout, // without any buffering. -#ifdef _WIN32 - if (_write(STDOUT_FILENO, &ch, 1) < 1) { + if (plat_write(STDOUT_FILENO, &ch, 1) < 1) { ; } -#else - if (write(STDOUT_FILENO, &ch, 1) < 1) { - ; - } -#endif } #else fputc_with_line_buffering(ch); #endif // HAVE_TTY } +void os_putchars(const uint8_t *data, size_t len) { + for (size_t i = 0; i < len; ++i) { + os_putchar(data[i]); + } +} + int os_mkdir(const char *path, int mode) { #ifdef HAVE_MKDIR - return mkdir(path, mode); -#elif defined(_WIN32) - (void) mode; - return _mkdir(path); + return plat_mkdir(path, mode); #else return -1; #endif // HAVE_MKDIR @@ -565,4 +717,59 @@ bool os_parallel_for(uint64_t n, const std::function(*timeout_us / 1000000); + tv.tv_usec = static_cast(*timeout_us % 1000000); + // Wait for events + // TODO(edubart): consider supporting other OSes + const int select_ret = select(fds.maxfd + 1, &readfds, &writefds, &exceptfds, &tv); + // Process ready fds + return after_cb(select_ret, &fds); +} + +void os_disable_sigpipe() { + struct sigaction sigact {}; + sigact.sa_handler = SIG_IGN; + sigact.sa_flags = SA_RESTART; + if (sigemptyset(&sigact.sa_mask) != 0 || sigaction(SIGPIPE, &sigact, nullptr) != 0) { +#ifdef DEBUG_OS + (void) fprintf(stderr, "os_disable_sigpipe(): failed to disable SIGPIPE handler\n"); +#endif + } +} + +void os_sleep_us(uint64_t timeout_us) { + if (timeout_us == 0) { + return; + } + // Select without fds just to sleep + os_select_fds( + [](select_fd_sets *fds, const uint64_t *timeout_us) -> void { + (void) fds; + (void) timeout_us; + }, + [](int select_ret, select_fd_sets *fds) -> bool { + (void) select_ret; + (void) fds; + return false; + }, + &timeout_us); +} } // namespace cartesi diff --git a/src/os.h b/src/os.h index 7440ef85e..6bb81ba47 100644 --- a/src/os.h +++ b/src/os.h @@ -26,24 +26,66 @@ namespace cartesi { +/// \brief TTY console constants +enum TTY_constants : uint32_t { + TTY_BUF_SIZE = 4096, ///< Number of characters in TTY input buffer + TTY_DEFAULT_COLS = 80, ///< Default width (columns) + TTY_DEFAULT_ROWS = 25, ///< Default height (rows) + TTY_CTRL_D = 4, ///< End of session character (Ctrl+D) +}; + +/// \brief Set of file descriptions to be polled with select(). +struct select_fd_sets { + int maxfd; + void *readfds; + void *writefds; + void *exceptfds; +}; + /// \brief Initialize console void os_open_tty(void); /// \brief Cleanup console initialization void os_close_tty(void); +/// \brief Fill file descriptors to be polled by select() with TTY's file descriptors. +/// \param fds Pointer to sets of read, write and except file descriptors to be updated. +void os_prepare_tty_select(select_fd_sets *fds); + +/// \brief Poll TTY's file descriptors that were marked as ready by select(). +/// \param select_ret Return value from the most recent select() call. +/// \param fds Pointer to sets of read, write and except file descriptors to be checked. +/// \returns True if there are pending TTY characters available to be read, false otherwise. +bool os_poll_selected_tty(int select_ret, select_fd_sets *fds); + /// \brief Polls console for input characters /// \param wait Timeout to wait for characters in microseconds -void os_poll_tty(uint64_t wait); +bool os_poll_tty(uint64_t timeout_us); + +/// \brief Get console size. +/// \param pwidth Receives the console width (number of columns). +/// \param pheight Receives the console height (amount of rows). +void os_get_tty_size(uint16_t *pwidth, uint16_t *pheight); -/// \brief Reads an input character from the console -/// \return Charater read from console +/// \brief Reads a character from the console input. +/// \return Character read from console, it may be -1 if there is no character. int os_getchar(void); -/// \brief Writes an output character to the console +/// \brief Reads multiple characters from the console input. +/// \param data Buffer to receive the console characters. +/// \param max_leng Maximum buffer length. +/// \returns Length of characters read, 0 if no characters were available. +size_t os_getchars(unsigned char *data, size_t max_len); + +/// \brief Writes a character to the console output. /// \param ch Character to write void os_putchar(uint8_t ch); +/// \brief Writes multiple characters to the console output. +/// \param data Buffer of characters to write. +/// \param len Length of buffer. +void os_putchars(const uint8_t *data, size_t len); + /// \brief Creates a new directory int os_mkdir(const char *path, int mode); @@ -88,6 +130,24 @@ struct parallel_for_mutex_guard { /// \return True if all thread tasks succeeded bool os_parallel_for(uint64_t n, const std::function &task); +// Callbacks used by os_select_fds(). +using os_select_before_callback = std::function; +using os_select_after_callback = std::function; + +/// \brief Poll file descriptions for events. +/// \param before_cb Callback called before calling select(). +/// \param after_cb Callback called after calling select(). +/// \param timeout_us Maximum amount of time in microseconds to wait for an event, +/// this value may be updated in case a before_cb() has an deadline timer before the timeout. +bool os_select_fds(const os_select_before_callback &before_cb, const os_select_after_callback &after_cb, + uint64_t *timeout_us); + +/// \brief Disable sigpipe +void os_disable_sigpipe(); + +/// \brief Sleep until timeout_us microseconds elapsed +void os_sleep_us(uint64_t timeout_us); + } // namespace cartesi #endif diff --git a/src/rtc.h b/src/rtc.h index 3e4cbdbce..7e1a21eb1 100644 --- a/src/rtc.h +++ b/src/rtc.h @@ -28,8 +28,9 @@ namespace cartesi { /// \brief RTC constants enum RTC_constants : uint64_t { - RTC_FREQ_DIV = RTC_FREQ_DIV_DEF, ///< Clock divisor is set stone in whitepaper - RTC_CLOCK_FREQ = RTC_CLOCK_FREQ_DEF, ///< Clock frequency + RTC_FREQ_DIV = RTC_FREQ_DIV_DEF, ///< Clock divisor is set stone in whitepaper + RTC_CLOCK_FREQ = RTC_CLOCK_FREQ_DEF, ///< Clock frequency + RTC_CYCLES_PER_US = RTC_CLOCK_FREQ / 1000000, ///< Clock cycles per microsecond }; /// \brief Converts from cycle count to time count diff --git a/src/state-access.h b/src/state-access.h index 1b8c8ee54..609bfd7e2 100644 --- a/src/state-access.h +++ b/src/state-access.h @@ -414,22 +414,41 @@ class state_access : public i_state_access { return m_m.get_state().htif.iyield; } - NO_INLINE uint64_t do_poll_console(uint64_t mcycle) { - const bool htif_console_getchar = static_cast(read_htif_iconsole() & (1 << HTIF_CONSOLE_GETCHAR)); - if (htif_console_getchar) { - const uint64_t warp_cycle = rtc_time_to_cycle(read_clint_mtimecmp()); - if (warp_cycle > mcycle) { - constexpr uint64_t cycles_per_us = RTC_CLOCK_FREQ / 1000000; // CLOCK_FREQ / 10^6 - const uint64_t wait = (warp_cycle - mcycle) / cycles_per_us; - const int64_t start = os_now_us(); - os_poll_tty(wait); - const int64_t end = os_now_us(); - const uint64_t elapsed_us = static_cast(std::max(end - start, INT64_C(0))); - const uint64_t tty_cycle = mcycle + (elapsed_us * cycles_per_us); - mcycle = std::min(std::max(tty_cycle, mcycle), warp_cycle); + NO_INLINE std::pair do_poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { + bool interrupt_raised = false; + const bool has_htif_console = m_m.has_htif_console(); + const bool has_virtio_devices = m_m.has_virtio_devices(); + const bool has_virtio_console = m_m.has_virtio_console(); + // Only poll external interrupts if we are in interactive mode (console is enabled or have VirtIO devices) + if (unlikely(has_htif_console || has_virtio_devices)) { + // Convert the relative interval of cycles we can wait to the interval of host time we can wait + uint64_t timeout_us = (mcycle_max - mcycle) / RTC_CYCLES_PER_US; + int64_t start_us = 0; + if (timeout_us > 0) { + start_us = os_now_us(); + } + device_state_access da(*this, mcycle); + // Poll virtio for events (e.g console stdin, network sockets) + // Timeout may be decremented in case a device has deadline timers (e.g network device) + if (has_virtio_devices && has_virtio_console) { // VirtIO + VirtIO console + m_m.poll_virtio_devices(&timeout_us, &da); + // VirtIO console device will poll TTY + } else if (has_virtio_devices) { // VirtIO without a console + HTIF console + m_m.poll_virtio_devices(&timeout_us, &da); + // Poll tty without waiting more time, because the pool above should have waited enough time + os_poll_tty(0); + } else { // Only HTIF console + os_poll_tty(timeout_us); + } + // If timeout is greater than zero, we should also increment mcycle relative to the elapsed time + if (timeout_us > 0) { + const int64_t end_us = os_now_us(); + const uint64_t elapsed_us = static_cast(std::max(end_us - start_us, INT64_C(0))); + uint64_t next_mcycle = mcycle + (elapsed_us * RTC_CYCLES_PER_US); + mcycle = std::min(std::max(next_mcycle, mcycle), mcycle_max); } } - return mcycle; + return {mcycle, interrupt_raised}; } uint64_t do_read_pma_istart(int i) const { @@ -464,8 +483,26 @@ class state_access : public i_state_access { aliased_aligned_write(hpage + hoffset, val); } - void do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t log2_size) { - m_m.write_memory(paddr, data, UINT64_C(1) << log2_size); + bool do_read_memory(uint64_t paddr, unsigned char *data, uint64_t length) const { + //??(edubart): Treating exceptions here is not ideal, we should probably + // move read_memory() method implementation inside state access later + try { + m_m.read_memory(paddr, data, length); + return true; + } catch (...) { + return false; + } + } + + bool do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t length) { + //??(edubart): Treating exceptions here is not ideal, we should probably + // move write_memory() method implementation inside state access later + try { + m_m.write_memory(paddr, data, length); + return true; + } catch (...) { + return false; + } } template diff --git a/src/virtio-console.cpp b/src/virtio-console.cpp new file mode 100644 index 000000000..b740390c7 --- /dev/null +++ b/src/virtio-console.cpp @@ -0,0 +1,164 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#include "virtio-console.h" +#include "os.h" + +namespace cartesi { + +virtio_console::virtio_console(uint32_t virtio_idx) : + virtio_device(virtio_idx, VIRTIO_DEVICE_CONSOLE, VIRTIO_CONSOLE_F_SIZE, sizeof(virtio_console_config_space)) {} + +void virtio_console::on_device_reset() { + m_stdin_ready = false; +} + +void virtio_console::on_device_ok(i_device_state_access *a) { + // Upon initialization, we need to notify the initial console size + notify_console_size_to_guest(a); +} + +bool virtio_console::on_device_queue_available(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len, uint32_t write_avail_len) { + (void) write_avail_len; + if (queue_idx == VIRTIO_CONSOLE_RECEIVEQ) { // Guest has a new slot available in the write queue + // Do nothing, host stdin characters will be written to the guest in the next poll + return false; + } else if (queue_idx == VIRTIO_CONSOLE_TRANSMITQ) { // Guest sent new characters to the host + // Write guest characters to host stdout + return write_next_chars_to_host(a, queue_idx, desc_idx, read_avail_len); + } else { + // Other queues are unexpected + notify_device_needs_reset(a); + return false; + } +} + +bool virtio_console::write_next_chars_to_host(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len) { + const virtq &vq = queue[queue_idx]; + // Read stdout characters from queue buffer in chunks + std::array chunk{}; + for (uint32_t off = 0; off < read_avail_len; off += chunk.size()) { + // Read from queue buffer + const uint32_t chunk_len = std::min(chunk.size(), read_avail_len - off); + if (!vq.read_desc_mem(a, desc_idx, off, chunk.data(), chunk_len)) { + notify_device_needs_reset(a); + return false; + } + // Write to stdout + os_putchars(chunk.data(), chunk_len); + } + // Consume the queue and notify the driver + if (!consume_and_notify_queue(a, queue_idx, desc_idx)) { + notify_device_needs_reset(a); + return false; + } + return true; +} + +bool virtio_console::write_next_chars_to_guest(i_device_state_access *a) { + if (!driver_ok) { + return false; + } + // Bytes from host stdin must be written to queue 0 (guest input) + constexpr uint32_t queue_idx = VIRTIO_CONSOLE_RECEIVEQ; + const virtq &vq = queue[queue_idx]; + // Prepare queue buffer for writing + uint16_t desc_idx{}; + uint32_t write_avail_len{}; + if (!prepare_queue_write(a, queue_idx, &desc_idx, &write_avail_len)) { + notify_device_needs_reset(a); + return false; + } + // Write buffer length can be zero in case the queue is not ready or full + if (write_avail_len == 0) { + return false; + } + // Read from stdin + std::array chunk{}; + const uint32_t chunk_len = os_getchars(chunk.data(), std::min(write_avail_len, chunk.size())); + // Chunk length is zero when there are no more characters available to write + if (chunk_len == 0) { + return false; + } + // Write to queue buffer + if (!vq.write_desc_mem(a, desc_idx, 0, chunk.data(), chunk_len)) { + notify_device_needs_reset(a); + return false; + } + // Consume the queue and notify the driver + if (!consume_and_notify_queue(a, queue_idx, desc_idx, chunk_len, VIRTQ_USED_F_NO_NOTIFY)) { + notify_device_needs_reset(a); + return false; + } + return true; +} + +bool virtio_console::notify_console_size_to_guest(i_device_state_access *a) { + // Get current console size + uint16_t cols{}; + uint16_t rows{}; + os_get_tty_size(&cols, &rows); + virtio_console_config_space *config = get_config(); + // Notify the driver only when console size changes + if (cols == config->cols && rows == config->rows) { + return false; + } + config->rows = rows; + config->cols = cols; + notify_config_change(a); + return true; +} + +void virtio_console::prepare_select(select_fd_sets *fds, uint64_t *timeout_us) { + // Ignore if driver is not initialized + if (!driver_ok) { + return; + } + // We should not poll console before the guest has started waiting for inputs, + // otherwise the inputs will be sent before the driver console is actually being used, + // then inputs will be consumed before the guest starts an interactive session, + // and this will cause piped commands to work incorrectly. + if (!m_stdin_ready) { + // Unfortunately the Linux driver does not send any event when stdin becomes "ready", + // but a trick is to consider stdin to be ready in the next WFI instruction, + // in that case timeout is non 0 because we will wait for interrupts. + //??(edubart) Maybe this workaround could be removed with multiport feature support? + if (*timeout_us != 0) { + m_stdin_ready = true; + } else { + return; + } + } + os_prepare_tty_select(fds); +} + +bool virtio_console::poll_selected(int select_ret, select_fd_sets *fds, i_device_state_access *da) { + // Ignore if driver is not initialized or stdin is not ready + if (!driver_ok || !m_stdin_ready) { + return false; + } + bool interrupt_requested = notify_console_size_to_guest(da); + if (os_poll_selected_tty(select_ret, fds)) { + while (write_next_chars_to_guest(da)) { + interrupt_requested = true; + } + } + return interrupt_requested; +} + +} // namespace cartesi diff --git a/src/virtio-console.h b/src/virtio-console.h new file mode 100644 index 000000000..d0c4e74a6 --- /dev/null +++ b/src/virtio-console.h @@ -0,0 +1,73 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef VIRTIO_CONSOLE_H +#define VIRTIO_CONSOLE_H + +#include "virtio-device.h" + +namespace cartesi { + +/// \brief VirtIO console features +enum virtio_console_features : uint64_t { + VIRTIO_CONSOLE_F_SIZE = (UINT64_C(1) << 0), ///< Console configuration cols and rows are valid. + VIRTIO_CONSOLE_F_MULTIPORT = (UINT64_C(1) << 1), ///< Device has support for multiple ports + VIRTIO_CONSOLE_F_EMERG_WRITE = (UINT64_C(1) << 2), ///< Device has support for emergency write. +}; + +/// \brief VirtIO console virtqueue indexes +enum virtio_console_virtq : uint32_t { + VIRTIO_CONSOLE_RECEIVEQ = 0, ///< Queue transmitting characters from host to guest + VIRTIO_CONSOLE_TRANSMITQ = 1, ///< Queue transmitting characters from guest to host +}; + +/// \brief VirtIO console config space +struct virtio_console_config_space { + uint16_t cols; ///< Console width + uint16_t rows; ///< Console height + uint32_t max_nr_ports; ///< Maximum number of ports supported + uint32_t emerg_wr; ///< Whether emergency write is supported +}; + +/// \brief VirtIO console device +class virtio_console final : public virtio_device { + bool m_stdin_ready = false; + +public: + virtio_console(uint32_t virtio_idx); + + void on_device_reset() override; + void on_device_ok(i_device_state_access *a) override; + bool on_device_queue_available(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len, uint32_t write_avail_len) override; + + bool write_next_chars_to_host(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len); + bool write_next_chars_to_guest(i_device_state_access *a); + bool notify_console_size_to_guest(i_device_state_access *a); + + void prepare_select(select_fd_sets *fds, uint64_t *timeout_us) override; + bool poll_selected(int select_ret, select_fd_sets *fds, i_device_state_access *da) override; + + virtio_console_config_space *get_config() { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return reinterpret_cast(config_space.data()); + } +}; + +} // namespace cartesi + +#endif diff --git a/src/virtio-device.cpp b/src/virtio-device.cpp new file mode 100644 index 000000000..ef6450a3e --- /dev/null +++ b/src/virtio-device.cpp @@ -0,0 +1,851 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +// Enable these defines to debug VirtIO +// #define DEBUG_VIRTIO +// #define DEBUG_VIRTIO_MMIO +// #define DEBUG_VIRTIO_ERRORS + +#include "virtio-device.h" +#include "plic.h" +#include "strict-aliasing.h" + +namespace cartesi { + +static inline bool is_power_of_2(uint32_t val) { + return (val & (val - 1)) == 0; +} + +static inline void set_low32(uint64_t *paddr, uint32_t val) { + *paddr = (*paddr & ~UINT64_C(0xffffffff)) | static_cast(val); +} + +static inline void set_high32(uint64_t *paddr, uint32_t val) { + *paddr = (*paddr & UINT64_C(0xffffffff)) | (static_cast(val) << 32); +} + +static bool virtq_get_avail_header(const virtq &vq, i_device_state_access *a, virtq_header *pavail_header) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return a->read_memory(vq.avail_addr, reinterpret_cast(pavail_header), sizeof(virtq_header)); +} + +static bool virtq_set_used_header(const virtq &vq, i_device_state_access *a, const virtq_header *pused_header) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return a->write_memory(vq.used_addr, reinterpret_cast(pused_header), sizeof(virtq_header)); +} + +static bool virtq_set_ring_used_elem(const virtq &vq, i_device_state_access *a, uint16_t ring_idx, + const virtq_used_elem *pused_elem) { + const uint64_t addr = vq.used_addr + sizeof(virtq_header) + (ring_idx & (vq.num - 1)) * sizeof(virtq_used_elem); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return a->write_memory(addr, reinterpret_cast(pused_elem), sizeof(virtq_used_elem)); +} + +static bool virtq_get_ring_avail_elem_desc_idx(const virtq &vq, i_device_state_access *a, uint16_t ring_idx, + uint16_t *pdesc_idx) { + const uint64_t addr = vq.avail_addr + sizeof(virtq_header) + (ring_idx & (vq.num - 1)) * sizeof(uint16_t); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return a->read_memory(addr, reinterpret_cast(pdesc_idx), sizeof(uint16_t)); +} + +static bool virtq_get_desc(const virtq &vq, i_device_state_access *a, uint16_t desc_idx, virtq_desc *pdesc) { + const uint64_t addr = vq.desc_addr + (desc_idx & (vq.num - 1)) * sizeof(virtq_desc); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return a->read_memory(addr, reinterpret_cast(pdesc), sizeof(virtq_desc)); +} + +#if defined(DEBUG_VIRTIO_MMIO) || defined(DEBUG_VIRTIO_ERRORS) +static const char *get_virtio_mmio_offset_name(uint64_t offset) { + if (offset >= VIRTIO_MMIO_CONFIG) { + return "VIRTIO_MMIO_CONFIG"; + } + switch (offset) { + case VIRTIO_MMIO_MAGIC_VALUE: + return "VIRTIO_MMIO_MAGIC_VALUE"; + case VIRTIO_MMIO_VERSION: + return "VIRTIO_MMIO_VERSION"; + case VIRTIO_MMIO_DEVICE_ID: + return "VIRTIO_MMIO_DEVICE_ID"; + case VIRTIO_MMIO_VENDOR_ID: + return "VIRTIO_MMIO_VENDOR_ID"; + case VIRTIO_MMIO_DEVICE_FEATURES: + return "VIRTIO_MMIO_DEVICE_FEATURES"; + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: + return "VIRTIO_MMIO_DEVICE_FEATURES_SEL"; + case VIRTIO_MMIO_DRIVER_FEATURES: + return "VIRTIO_MMIO_DRIVER_FEATURES"; + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: + return "VIRTIO_MMIO_DRIVER_FEATURES_SEL"; + case VIRTIO_MMIO_QUEUE_SEL: + return "VIRTIO_MMIO_QUEUE_SEL"; + case VIRTIO_MMIO_QUEUE_NUM_MAX: + return "VIRTIO_MMIO_QUEUE_NUM_MAX"; + case VIRTIO_MMIO_QUEUE_NUM: + return "VIRTIO_MMIO_QUEUE_NUM"; + case VIRTIO_MMIO_QUEUE_READY: + return "VIRTIO_MMIO_QUEUE_READY"; + case VIRTIO_MMIO_QUEUE_NOTIFY: + return "VIRTIO_MMIO_QUEUE_NOTIFY"; + case VIRTIO_MMIO_INTERRUPT_STATUS: + return "VIRTIO_MMIO_INTERRUPT_STATUS"; + case VIRTIO_MMIO_INTERRUPT_ACK: + return "VIRTIO_MMIO_INTERRUPT_ACK"; + case VIRTIO_MMIO_STATUS: + return "VIRTIO_MMIO_STATUS"; + case VIRTIO_MMIO_QUEUE_DESC_LOW: + return "VIRTIO_MMIO_QUEUE_DESC_LOW"; + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + return "VIRTIO_MMIO_QUEUE_DESC_HIGH"; + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + return "VIRTIO_MMIO_QUEUE_AVAIL_LOW"; + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + return "VIRTIO_MMIO_QUEUE_AVAIL_HIGH"; + case VIRTIO_MMIO_QUEUE_USED_LOW: + return "VIRTIO_MMIO_QUEUE_USED_LOW"; + case VIRTIO_MMIO_QUEUE_USED_HIGH: + return "VIRTIO_MMIO_QUEUE_USED_HIGH"; + case VIRTIO_MMIO_SHM_SEL: + return "VIRTIO_MMIO_SHM_SEL"; + case VIRTIO_MMIO_SHM_LEN_LOW: + return "VIRTIO_MMIO_SHM_LEN_LOW"; + case VIRTIO_MMIO_SHM_LEN_HIGH: + return "VIRTIO_MMIO_SHM_LEN_HIGH"; + case VIRTIO_MMIO_SHM_BASE_LOW: + return "VIRTIO_MMIO_SHM_BASE_LOW"; + case VIRTIO_MMIO_SHM_BASE_HIGH: + return "VIRTIO_MMIO_SHM_BASE_HIGH"; + case VIRTIO_MMIO_CONFIG_GENERATION: + return "VIRTIO_MMIO_CONFIG_GENERATION"; + case VIRTIO_MMIO_CONFIG: + return "VIRTIO_MMIO_CONFIG"; + default: + return "UNKNOWN"; + } +} +#endif + +bool virtq::get_desc_rw_avail_len(i_device_state_access *a, uint16_t desc_idx, uint32_t *pread_avail_len, + uint32_t *pwrite_avail_len) const { + // Traverse all buffers in queue + uint32_t read_len = 0; + uint32_t write_len = 0; + bool write_part = false; + bool ret = false; + while (true) { + virtq_desc desc{}; + // Retrieve queue buffer description + if (!virtq_get_desc(*this, a, desc_idx, &desc)) { + break; + } + // We are only interested in read-only buffers + if (desc.flags & VIRTQ_DESC_F_WRITE) { + write_len += desc.len; + write_part = true; + } else { + // The driver must never place a read buffer after a write buffer + if (write_part) { + break; + } + read_len += desc.len; + } + // Stop when there are no more buffers in queue + if (!(desc.flags & VIRTQ_DESC_F_NEXT)) { + ret = true; + break; + } + // Move to the next buffer description + desc_idx = desc.next; + } + if (pread_avail_len) { + *pread_avail_len = read_len; + } + if (pwrite_avail_len) { + *pwrite_avail_len = write_len; + } + return ret; +} + +bool virtq::read_desc_mem(i_device_state_access *a, uint16_t desc_idx, uint32_t start_off, unsigned char *data, + uint32_t len) const { + // Really do nothing when length is 0 + if (len == 0) { + return true; + } + const uint32_t end_off = start_off + len; + uint32_t buf_start_off = 0; + // Traverse all buffers in queue + while (true) { + virtq_desc desc{}; + // Retrieve queue buffer description + if (!virtq_get_desc(*this, a, desc_idx, &desc)) { + return false; + } + // We are only interested in read-only buffers + if (!(desc.flags & VIRTQ_DESC_F_WRITE)) { + // Read from target physical memory in chunks + const uint32_t buf_end_off = buf_start_off + desc.len; + const uint32_t chunk_start_off = std::max(buf_start_off, start_off); + const uint32_t chunk_end_off = std::min(buf_end_off, end_off); + // Copy chunk when it intersects with the desired interval + if (chunk_end_off > chunk_start_off) { + const uint32_t paddr_off = chunk_start_off - buf_start_off; + const uint32_t data_off = chunk_start_off - start_off; + const uint32_t chunk_len = chunk_end_off - chunk_start_off; + // Read chunk from physical memory + if (!a->read_memory(desc.paddr + paddr_off, data + data_off, chunk_len)) { + return false; + } + } + buf_start_off += desc.len; + // Stop when we reach the buffer end offset + if (chunk_end_off >= end_off) { + return true; + } + } + // Stop when there are no more buffers in queue + if (!(desc.flags & VIRTQ_DESC_F_NEXT)) { + // Operation failed because more chunks were expected + return false; + } + // Move to the next buffer description + desc_idx = desc.next; + } +} + +bool virtq::write_desc_mem(i_device_state_access *a, uint16_t desc_idx, uint32_t start_off, const unsigned char *data, + uint32_t len) const { + // Really do nothing when length is 0 + if (len == 0) { + return true; + } + const uint32_t end_off = start_off + len; + uint32_t buf_start_off = 0; + // Traverse all buffers in queue + while (true) { + virtq_desc desc{}; + // Retrieve queue buffer description + if (!virtq_get_desc(*this, a, desc_idx, &desc)) { + return false; + } + // We are only interested in write-only buffers + if (desc.flags & VIRTQ_DESC_F_WRITE) { + // Read from target physical memory in chunks + const uint32_t buf_end_off = buf_start_off + desc.len; + const uint32_t chunk_start_off = std::max(buf_start_off, start_off); + const uint32_t chunk_end_off = std::min(buf_end_off, end_off); + // Copy chunk when it intersects with the desired interval + if (chunk_end_off > chunk_start_off) { + const uint32_t paddr_off = chunk_start_off - buf_start_off; + const uint32_t data_off = chunk_start_off - start_off; + const uint32_t chunk_len = chunk_end_off - chunk_start_off; + // Read chunk from physical memory + if (!a->write_memory(desc.paddr + paddr_off, data + data_off, chunk_len)) { + return false; + } + } + buf_start_off += desc.len; + // Stop when we reach the buffer end offset + if (chunk_end_off >= end_off) { + return true; + } + } + // Stop when there are no more buffers in queue + if (!(desc.flags & VIRTQ_DESC_F_NEXT)) { + // Operation failed because more chunks were expected + return false; + } + // Move to the next buffer description + desc_idx = desc.next; + } +} + +bool virtq::consume_desc(i_device_state_access *a, uint16_t desc_idx, uint32_t written_len, uint16_t used_flags) { + // Sets the used ring element desc index and written length + virtq_used_elem used_elem{}; + used_elem.id = desc_idx; + used_elem.len = written_len; + if (!virtq_set_ring_used_elem(*this, a, last_used_idx, &used_elem)) { + return false; + } + // Note that this increment will eventually wrap around after 65535, + // in both driver and device. + const uint16_t next_last_used_idx = last_used_idx + 1; + // Advance the last used ring index + virtq_header used_header{}; + used_header.flags = used_flags; + used_header.idx = next_last_used_idx; + if (!virtq_set_used_header(*this, a, &used_header)) { + return false; + } + last_used_idx = next_last_used_idx; + return true; +} + +virtio_device::virtio_device(uint32_t virtio_idx, uint32_t device_id, uint64_t device_features, + uint32_t config_space_size) : + virtio_idx(virtio_idx), + device_id(device_id), + device_features(device_features | VIRTIO_F_VERSION_1), + config_space_size(config_space_size) {} + +void virtio_device::reset(i_device_state_access *a) { + on_device_reset(); + // The device MUST initialize device status to 0 upon reset. + device_status = 0; + driver_ok = false; + // The device MUST have all configuration change events cleared upon reset. + driver_features = 0; + queue_sel = 0; + shm_sel = 0; + device_features_sel = 0; + driver_features_sel = 0; + // The device MUST clear all bits in InterruptStatus upon reset. + int_status = 0; + // The device MUST clear ready bits in the QueueReady register for all queues in the device upon reset. + for (auto &vq : queue) { + vq.desc_addr = 0; + vq.avail_addr = 0; + vq.used_addr = 0; + vq.num = 0; + vq.last_used_idx = 0; + vq.ready = 0; + } + // The device MUST have all queue and configuration change events unmapped upon reset. + reset_irq(a, VIRTIO_INT_STATUS_USED_BUFFER | VIRTIO_INT_STATUS_CONFIG_CHANGE); +} + +void virtio_device::set_irq(i_device_state_access *a, uint32_t add_int_status) { + int_status |= add_int_status; +#ifdef DEBUG_VIRTIO + (void) fprintf(stderr, "virtio[%d]: set_irq int_status=%d\n", virtio_idx, int_status); +#endif + // When interrupt status is non-zero, we should set pending IRQ to the PLIC device + if (int_status != 0) { + plic_set_pending_irq(a, get_irq_id()); + } +} + +void virtio_device::reset_irq(i_device_state_access *a, uint32_t rem_int_status) { + int_status &= ~rem_int_status; +#ifdef DEBUG_VIRTIO + (void) fprintf(stderr, "virtio[%d]: reset_irq int_status=%d\n", virtio_idx, int_status); +#endif + // When interrupt status is zero, we should clear pending IRQ from the PLIC device + if (int_status == 0) { + plic_reset_pending_irq(a, get_irq_id()); + } else { + // The IRQ may have to be restored again + plic_set_pending_irq(a, get_irq_id()); + } +} + +void virtio_device::notify_queue_used(i_device_state_access *a) { +#if defined(DEBUG_VIRTIO) + (void) fprintf(stderr, "virtio[%d]: notify_queue_used\n", virtio_idx); +#endif + // A device MUST NOT consume buffers or send any used buffer notifications to the driver before DRIVER_OK. + if (driver_ok) { + set_irq(a, VIRTIO_INT_STATUS_USED_BUFFER); + } +} + +void virtio_device::notify_device_needs_reset(i_device_state_access *a) { + // A fatal failure happened while processing a queue. +#if defined(DEBUG_VIRTIO) || defined(DEBUG_VIRTIO_ERRORS) + (void) fprintf(stderr, "virtio[%d]: notify_device_needs_reset\n", virtio_idx); +#endif + // The device SHOULD set DEVICE_NEEDS_RESET when it enters an error state that a reset is needed. + device_status |= VIRTIO_STATUS_DEVICE_NEEDS_RESET; + // If DRIVER_OK is set, after it sets DEVICE_NEEDS_RESET, + // the device MUST send a device configuration change notification to the driver. + notify_config_change(a); +} + +void virtio_device::notify_config_change(i_device_state_access *a) { + // Whenever device changes the configuration, we MUST changed its config generation, + // so the driver knows that it should re-read its configuration. + config_generation++; +#if defined(DEBUG_VIRTIO) + (void) fprintf(stderr, "virtio[%d]: notify_config_change config_generation=%d\n", virtio_idx, config_generation); +#endif + // A device MUST NOT send config notifications until the driver initializes the device. + if (driver_ok) { + set_irq(a, VIRTIO_INT_STATUS_CONFIG_CHANGE); + } +} + +bool virtio_device::prepare_queue_write(i_device_state_access *a, uint32_t queue_idx, uint16_t *pdesc_idx, + uint32_t *pwrite_avail_len) const { + *pdesc_idx = 0; + *pwrite_avail_len = 0; + // A device MUST NOT send notifications until the driver initializes the device. + assert(driver_ok); + assert(queue_idx < VIRTIO_QUEUE_COUNT); + // Retrieve queue + const virtq &vq = queue[queue_idx]; + // Silently ignore when the queue is not ready yet + if (!vq.ready) { + return true; + } + // Retrieve available buffer + virtq_header avail_header{}; + if (!virtq_get_avail_header(vq, a, &avail_header)) { + return false; + } + const uint16_t last_avail_idx = avail_header.idx; + // Check if have an available index in the ring to write to. + // We can only use equality operator for this check, + // because the last available ring index may wraparound before the last used ring index, + // but eventually the last used ring index will also wraparound. + if (vq.last_used_idx == last_avail_idx) { + // Queue is full, we have to wait the driver to free a queue + return true; + } + // Retrieve descriptor index for the next available ring element + uint16_t desc_idx{}; + if (!virtq_get_ring_avail_elem_desc_idx(vq, a, vq.last_used_idx, &desc_idx)) { + return false; + } + *pdesc_idx = desc_idx; + // Retrieve maximum amount of bytes we can write to queue buffer + uint32_t write_avail_len{}; + if (!vq.get_desc_rw_avail_len(a, desc_idx, nullptr, &write_avail_len)) { + return false; + } + *pwrite_avail_len = write_avail_len; + return true; +} + +bool virtio_device::consume_and_notify_queue(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t written_len, uint16_t used_flags) { + // A device MUST NOT consume buffers or send any used buffer notifications to the driver before DRIVER_OK. + assert(driver_ok); + assert(queue_idx < VIRTIO_QUEUE_COUNT); + // Retrieve queue + virtq &vq = queue[queue_idx]; + // Consume the buffer, so the driver is free to reuse it again + if (!vq.consume_desc(a, desc_idx, written_len, used_flags)) { + return false; + } +#ifdef DEBUG_VIRTIO + (void) fprintf(stderr, "virtio[%d]: consume_and_notify_queue queue_idx=%d desc_idx=%d written_len=%d\n", virtio_idx, + queue_idx, desc_idx, written_len); +#endif + // After consuming a queue, we must notify the driver right-away + notify_queue_used(a); + return true; +} + +void virtio_device::on_device_queue_notify(i_device_state_access *a, uint32_t queue_idx) { + // The device MUST NOT consume buffers or notify the driver before DRIVER_OK + if (!driver_ok) { + return; + } + // Retrieve queue + const virtq &vq = queue[queue_idx]; + // The device MUST NOT access virtual queue contents when QueueReady is zero. + if (!vq.ready) { + return; + } + // When the driver wants to send a buffer to the device, it fills in a slot in the descriptor table + // (or chains several together), and writes the descriptor index into the available ring. + virtq_header avail_header{}; + if (!virtq_get_avail_header(vq, a, &avail_header)) { + notify_device_needs_reset(a); + return; + } + const uint16_t last_avail_idx = avail_header.idx; + // Process all queues until we reach the last available index + while (vq.last_used_idx != last_avail_idx) { + // Retrieve description index for this ring element + const uint32_t last_used_idx = vq.last_used_idx; + uint16_t desc_idx{}; + if (!virtq_get_ring_avail_elem_desc_idx(vq, a, last_used_idx, &desc_idx)) { + notify_device_needs_reset(a); + return; + } + uint32_t read_avail_len{}; + uint32_t write_avail_len{}; + if (!vq.get_desc_rw_avail_len(a, desc_idx, &read_avail_len, &write_avail_len)) { + notify_device_needs_reset(a); + return; + } +#if defined(DEBUG_VIRTIO) + (void) fprintf(stderr, + "virtio[%d]: on_device_queue_available queue_idx=%d last_avail_idx=%d last_used_idx=%d desc_idx=%d " + "read_avail_len=%d write_avail_len=%d\n", + virtio_idx, queue_idx, last_avail_idx, last_used_idx, desc_idx, read_avail_len, write_avail_len); +#endif + // Process the queue + if (!on_device_queue_available(a, queue_idx, desc_idx, read_avail_len, write_avail_len)) { + // The device doesn't want to continue consuming this queue + break; + } + // We expect the device receive to always consume queue before continuing + assert(last_used_idx != vq.last_used_idx); + } +} + +void virtio_device::prepare_select(select_fd_sets *fds, uint64_t *timeout_us) { + (void) fds; + (void) timeout_us; +} + +bool virtio_device::poll_selected(int select_ret, select_fd_sets *fds, i_device_state_access *da) { + (void) select_ret; + (void) fds; + (void) da; + return false; +}; + +bool virtio_device::poll_nowait(i_device_state_access *da) { + uint64_t timeout_us = 0; + return os_select_fds( + [&](select_fd_sets *fds, uint64_t *timeout_us) -> void { this->prepare_select(fds, timeout_us); }, + [&](int select_ret, select_fd_sets *fds) -> bool { return this->poll_selected(select_ret, fds, da); }, + &timeout_us); +} + +uint64_t virtio_device::read_shm_base(uint32_t shm_sel) { + (void) shm_sel; + // Reading from a non-existent region results in a base of 0xffffffffffffffff. + return UINT64_C(-1); +} + +uint64_t virtio_device::read_shm_length(uint32_t shm_sel) { + (void) shm_sel; + // Reading from a non-existent region results in a length of 0xffffffffffffffff. + return UINT64_C(-1); +} + +bool virtio_device::mmio_read_config(i_device_state_access *a, uint64_t offset, uint32_t *pval, int log2_size) { + (void) a; + const int size = 1 << log2_size; + // Only accept aligned reads + if ((offset & (size - 1)) != 0) { + return false; + } + // Only accept reads inside config space + if (offset + size > config_space_size) { + return false; + } + // Only accept 1,2,4 byte config reads + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const unsigned char *config_space_buf = reinterpret_cast(config_space.data()); + switch (log2_size) { + case 0: + *pval = aliased_aligned_read(&config_space_buf[offset]); + return true; + case 1: + *pval = aliased_aligned_read(&config_space_buf[offset]); + return true; + case 2: + *pval = aliased_aligned_read(&config_space_buf[offset]); + return true; + default: + return false; + } +} + +execute_status virtio_device::mmio_write_config(i_device_state_access *a, uint64_t offset, uint32_t val, + int log2_size) { + (void) a; + const int size = 1 << log2_size; + // Only accept aligned writes + if ((offset & (size - 1)) != 0) { + return execute_status::failure; + } + // Only accept writes inside config space + if (offset + size > config_space_size) { + return execute_status::failure; + } + // Only accept 1,2,4 byte config writes + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + unsigned char *config_space_buf = reinterpret_cast(config_space.data()); + switch (log2_size) { + case 0: + aliased_aligned_write(&config_space_buf[offset], val); + return execute_status::success; + case 1: + aliased_aligned_write(&config_space_buf[offset], val); + return execute_status::success; + case 2: + aliased_aligned_write(&config_space_buf[offset], val); + return execute_status::success; + default: + return execute_status::failure; + } +} + +bool virtio_device::mmio_read(i_device_state_access *a, uint64_t offset, uint32_t *pval, int log2_size) { + // If offset is equal or greater than VIRTIO_MMIO_CONFIG, the driver is actually reading a device config + if (offset >= VIRTIO_MMIO_CONFIG) { + return mmio_read_config(a, offset - VIRTIO_MMIO_CONFIG, pval, log2_size); + } + // The driver MUST only use 32 bit wide and aligned reads to access the control registers + if (offset & 3 || log2_size != 2) { + return false; + } + // Support only MMIO readable offsets according to the VirtIO spec + switch (offset) { + case VIRTIO_MMIO_MAGIC_VALUE: + *pval = VIRTIO_MAGIC_VALUE; + return true; + case VIRTIO_MMIO_VERSION: + *pval = VIRTIO_VERSION; + return true; + case VIRTIO_MMIO_DEVICE_ID: + *pval = device_id; + return true; + case VIRTIO_MMIO_VENDOR_ID: + *pval = VIRTIO_VENDOR_ID; + return true; + case VIRTIO_MMIO_DEVICE_FEATURES: + // Reading from this register returns 32 consecutive flag bits, + // the least significant bit depending on the last value written to DeviceFeaturesSel. + switch (device_features_sel) { + case 0: + *pval = static_cast(device_features); + return true; + case 1: + *pval = static_cast(device_features >> 32); + return true; + default: + *pval = 0; + return true; + } + case VIRTIO_MMIO_QUEUE_NUM_MAX: + // Reading from this register returns the maximum size (number of elements) of the queue the device is ready + // to process or zero if the queue is not available. + *pval = (queue_sel < VIRTIO_QUEUE_COUNT) ? static_cast(VIRTIO_QUEUE_NUM_MAX) : 0; + return true; + case VIRTIO_MMIO_QUEUE_READY: + // Reading from this register returns the last value written to it. + *pval = queue_sel < VIRTIO_QUEUE_COUNT ? queue[queue_sel].ready : 0; + return true; + case VIRTIO_MMIO_INTERRUPT_STATUS: + // Reading from this register returns a bit mask of events that caused the device interrupt to be asserted. + *pval = int_status; + return true; + case VIRTIO_MMIO_STATUS: + // Reading from this register returns the current device status flags. + *pval = device_status; + return true; + case VIRTIO_MMIO_CONFIG_GENERATION: + // Reading from this register returns a value describing a version of the device-specific configuration + // space. + *pval = config_generation; + return true; + case VIRTIO_MMIO_SHM_LEN_LOW: + *pval = static_cast(read_shm_length(shm_sel)); + return true; + case VIRTIO_MMIO_SHM_LEN_HIGH: + *pval = static_cast(read_shm_length(shm_sel) >> 32); + return true; + case VIRTIO_MMIO_SHM_BASE_LOW: + *pval = static_cast(read_shm_base(shm_sel)); + return true; + case VIRTIO_MMIO_SHM_BASE_HIGH: + *pval = static_cast(read_shm_base(shm_sel) >> 32); + return true; + default: + // Unsupported offset + return false; + } +} + +execute_status virtio_device::mmio_write(i_device_state_access *a, uint64_t offset, uint32_t val, int log2_size) { + // If offset is equal or greater than VIRTIO_MMIO_CONFIG, the driver is actually writing a device config + if (offset >= VIRTIO_MMIO_CONFIG) { + return mmio_write_config(a, offset - VIRTIO_MMIO_CONFIG, val, log2_size); + } + // The driver MUST only use 32 bit wide and aligned writes to access the control registers + if (offset & 3 || log2_size != 2) { + return execute_status::failure; + } + // Support only MMIO writable offsets according to the VirtIO spec + switch (offset) { + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: + // Writing to this register selects a set of 32 device feature bits accessible by reading from + // DeviceFeatures. + device_features_sel = val; + return execute_status::success; + case VIRTIO_MMIO_DRIVER_FEATURES: + // Writing to this register sets 32 consecutive flag bits, the least significant bit depending on the last + // value written to DriverFeaturesSel. + switch (driver_features_sel) { + case 0: + set_low32(&driver_features, val); + break; + case 1: + set_high32(&driver_features, val); + break; + default: + // Silently ignore it. + break; + } + return execute_status::success; + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: + // Writing to this register selects a set of 32 activated feature bits accessible by writing to + // DriverFeatures. + driver_features_sel = val; + return execute_status::success; + case VIRTIO_MMIO_QUEUE_SEL: + // Writing to this register selects the virtual queue that the following operations on + // QueueNumMax, QueueNum, QueueReady, QueueDescLow, QueueDescHigh, QueueAvailLow, QueueAvailHigh, + // QueueUsedLow and QueueUsedHigh apply to. + queue_sel = val; + return execute_status::success; + case VIRTIO_MMIO_QUEUE_NUM: + // Writing to this register notifies the device what size of the queue the driver will use. + // QueueSize value must always be less than QueueMax and a power of 2. + if (queue_sel < VIRTIO_QUEUE_COUNT && val <= VIRTIO_QUEUE_NUM_MAX && is_power_of_2(val)) { + queue[queue_sel].num = val; + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_READY: + // Writing one to this register notifies the device that it can execute requests from this virtual queue. + if (queue_sel < VIRTIO_QUEUE_COUNT) { + queue[queue_sel].ready = (val == 1) ? 1 : 0; + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_NOTIFY: + // Writing a value to this register notifies the device that there are new buffers to process in a queue. + // The value written should be the queue index. + if (val < VIRTIO_QUEUE_COUNT) { + on_device_queue_notify(a, val); + } + // Most of times we will need to serve interrupts due to either used buffer or config change + // notification + return (int_status != 0) ? execute_status::success_and_serve_interrupts : execute_status::success; + case VIRTIO_MMIO_INTERRUPT_ACK: + // Writing a value with bits set as defined in InterruptStatus to this register notifies the device that + // events causing the interrupt have been handled. + reset_irq(a, val); + return (int_status != 0) ? execute_status::success_and_serve_interrupts : execute_status::success; + case VIRTIO_MMIO_STATUS: + if (val == 0) { + // Writing zero to this registers triggers a device reset. + reset(a); + } else { + const uint32_t old_status = device_status; + const uint64_t enabling_status = (device_status ^ val) & val; + if (enabling_status & VIRTIO_STATUS_FEATURES_OK) { + // The driver will re-read device status to ensure the FEATURES_OK bit is really set. + // We allow the device initialization to succeed only if the driver supports our device + // features. + if (driver_features != device_features) { + return execute_status::success; + } + } + // Writing non-zero values to this register sets the status flags, indicating the driver progress. + device_status = val; + if (enabling_status & VIRTIO_STATUS_DRIVER_OK) { + // If DRIVER_OK is set, after it sets DEVICE_NEEDS_RESET, the device MUST send a device + // configuration change notification to the driver. + if (old_status & VIRTIO_STATUS_DEVICE_NEEDS_RESET) { + set_irq(a, VIRTIO_INT_STATUS_CONFIG_CHANGE); + } else { + driver_ok = true; + on_device_ok(a); + } + } + } + // We may have triggered an interrupt request + return (int_status != 0) ? execute_status::success_and_serve_interrupts : execute_status::success; + case VIRTIO_MMIO_QUEUE_DESC_LOW: + if (queue_sel < VIRTIO_QUEUE_COUNT) { + set_low32(&queue[queue_sel].desc_addr, val); + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + if (queue_sel < VIRTIO_QUEUE_COUNT) { + set_low32(&queue[queue_sel].avail_addr, val); + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_USED_LOW: + if (queue_sel < VIRTIO_QUEUE_COUNT) { + set_low32(&queue[queue_sel].used_addr, val); + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + if (queue_sel < VIRTIO_QUEUE_COUNT) { + set_high32(&queue[queue_sel].desc_addr, val); + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + if (queue_sel < VIRTIO_QUEUE_COUNT) { + set_high32(&queue[queue_sel].avail_addr, val); + } + return execute_status::success; + case VIRTIO_MMIO_QUEUE_USED_HIGH: + if (queue_sel < VIRTIO_QUEUE_COUNT) { + set_high32(&queue[queue_sel].used_addr, val); + } + return execute_status::success; + case VIRTIO_MMIO_SHM_SEL: + // Writing to this register selects the shared memory region + // following operations on SHMLenLow, SHMLenHigh, SHMBaseLow and SHMBaseHigh apply to. + shm_sel = val; + return execute_status::success; + default: + // Unsupported offset + return execute_status::failure; + } +} + +/// \brief VirtIO device read callback. See ::pma_read. +static bool virtio_read(void *context, i_device_state_access *a, uint64_t offset, uint64_t *pval, int log2_size) { + virtio_device *vdev = static_cast(context); + uint32_t val32 = 0; + const bool status = vdev->mmio_read(a, offset, &val32, log2_size); + if (status) { + *pval = val32; + } +#ifdef DEBUG_VIRTIO_MMIO + (void) fprintf(stderr, "virtio[%d]: mmio_read offset=0x%03lx (%s) value=%d size=%d\n", vdev->get_virtio_index(), + offset, get_virtio_mmio_offset_name(offset), val32, 1 << log2_size); +#endif +#if defined(DEBUG_VIRTIO_MMIO) || defined(DEBUG_VIRTIO_ERRORS) + if (!status) { + (void) fprintf(stderr, "virtio[%d]: mmio_read FAILED! offset=0x%03lx(%s) size=%d\n", vdev->get_virtio_index(), + offset, get_virtio_mmio_offset_name(offset), 1 << log2_size); + } +#endif + return status; +} + +/// \brief VirtIO device read callback. See ::pma_write. +static execute_status virtio_write(void *context, i_device_state_access *a, uint64_t offset, uint64_t val, + int log2_size) { + virtio_device *vdev = static_cast(context); +#ifdef DEBUG_VIRTIO_MMIO + (void) fprintf(stderr, "virtio[%d]: mmio_write offset=0x%03lx (%s) value=%ld size=%d\n", vdev->get_virtio_index(), + offset, get_virtio_mmio_offset_name(offset), val, 1 << log2_size); +#endif + const execute_status status = vdev->mmio_write(a, offset, val, log2_size); +#if defined(DEBUG_VIRTIO_MMIO) || defined(DEBUG_VIRTIO_ERRORS) + if (status == execute_status::failure) { + (void) fprintf(stderr, "virtio[%d]: mmio_write FAILED! offset=0x%03lx (%s) value=%ld size=%d\n", + vdev->get_virtio_index(), offset, get_virtio_mmio_offset_name(offset), val, 1 << log2_size); + } +#endif + return status; +} + +const pma_driver virtio_driver = {"VirtIO", virtio_read, virtio_write}; + +} // namespace cartesi diff --git a/src/virtio-device.h b/src/virtio-device.h new file mode 100644 index 000000000..5b5c0c096 --- /dev/null +++ b/src/virtio-device.h @@ -0,0 +1,415 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef VIRTIO_DEVICE_H +#define VIRTIO_DEVICE_H + +#include +#include +#include + +#include + +#include "i-device-state-access.h" +#include "os.h" +#include "pma.h" + +namespace cartesi { + +/// \brief VirtIO constants +enum virtio_constants : uint32_t { + VIRTIO_MAGIC_VALUE = 0x74726976, // Little-endian equivalent of the "virt" string + VIRTIO_VERSION = 0x2, ///< Compliance with VirtIO v1.2 specification for non-legacy devices + VIRTIO_VENDOR_ID = 0xffff, ///< Dummy vendor ID + VIRTIO_QUEUE_COUNT = 2, ///< All devices we implement so far just need 2 queues + VIRTIO_QUEUE_NUM_MAX = 128, ///< Number of elements in queue ring, it should be at least 128 for most drivers + VIRTIO_MAX_CONFIG_SPACE_SIZE = 256, ///< Maximum size of config space + VIRTIO_MAX = 31, ///< Maximum number of virtio devices +}; + +/// \brief VirtIO features flags +enum virtio_features : uint64_t { + VIRTIO_F_INDIRECT_DESC = + (UINT64_C(1) << 28), ///< The driver can use descriptors with the VIRTQ_DESC_F_INDIRECT flag. + VIRTIO_F_EVENT_IDX = (UINT64_C(1) << 29), ///< Enables the used_event and the avail_event fields. + VIRTIO_F_VERSION_1 = (UINT64_C(1) << 32), ///< Compliance with VirtIO v1.2 specification + VIRTIO_F_ACCESS_PLATFORM = (UINT64_C(1) << 33), ///< The device can be used on a platform where device access to + ///< data in memory is limited and/or translated. + VIRTIO_F_RING_PACKED = (UINT64_C(1) << 34), ///< Support for the packed virtqueue layout. + VIRTIO_F_IN_ORDER = (UINT64_C(1) + << 35), ///< All buffers are used by the device in the same order in which they have been made available. + VIRTIO_F_ORDER_PLATFORM = (UINT64_C(1) + << 36), ///< Memory accesses by the driver and the device are ordered in a way described by the platform. + VIRTIO_F_SR_IOV = (UINT64_C(1) << 37), ///< Supports Single Root I/O Virtualization (PCI only). + VIRTIO_F_NOTIFICATION_DATA = (UINT64_C(1) << 38), ///< The driver passes extra data in its device notifications. + VIRTIO_F_NOTIF_CONFIG_DATA = (UINT64_C(1) << 39), ///< The driver uses the data provided by the device as a + ///< virtqueue identifier in available buffer notifications. + VIRTIO_F_RING_RESET = (UINT64_C(1) << 40), ///< The driver can reset a queue individually. +}; + +/// \brief VirtIO device types as defined in VirtIO v1.2 specification +enum virtio_devices : uint32_t { + VIRTIO_DEVICE_INVALID = 0, + VIRTIO_DEVICE_NETWORK = 1, + VIRTIO_DEVICE_BLOCK = 2, + VIRTIO_DEVICE_CONSOLE = 3, + VIRTIO_DEVICE_ENTROPY = 4, + VIRTIO_DEVICE_MEMORY_BALLOONING = 5, + VIRTIO_DEVICE_IOMEM = 6, + VIRTIO_DEVICE_RPMSG = 7, + VIRTIO_DEVICE_SCSI = 8, + VIRTIO_DEVICE_9P = 9, + VIRTIO_DEVICE_WLAN = 10, + VIRTIO_DEVICE_RPROC = 11, + VIRTIO_DEVICE_CAIF = 12, + VIRTIO_DEVICE_MEMORY_BALLOON = 13, + VIRTIO_DEVICE_GPU = 16, + VIRTIO_DEVICE_CLOCK = 17, + VIRTIO_DEVICE_INPUT = 18, + VIRTIO_DEVICE_SOCKET = 19, + VIRTIO_DEVICE_CRYPTO = 20, + VIRTIO_DEVICE_SIGNAL_DISTRIBUTION = 21, + VIRTIO_DEVICE_PSTORE = 22, + VIRTIO_DEVICE_IOMMU = 23, + VIRTIO_DEVICE_MEMORY = 24, + VIRTIO_DEVICE_AUDIO = 25, + VIRTIO_DEVICE_FILE_SYSTEM = 26, + VIRTIO_DEVICE_PMEM = 27, + VIRTIO_DEVICE_RPMB = 28, + VIRTIO_DEVICE_MAC80211_HWSIM = 29, + VIRTIO_DEVICE_VIDEO_ENCODER = 30, + VIRTIO_DEVICE_VIDEO_DECODER = 31, + VIRTIO_DEVICE_SCMI = 32, + VIRTIO_DEVICE_NITROSECURE = 33, + VIRTIO_DEVICE_I2C = 34, + VIRTIO_DEVICE_WATCHDOG = 35, + VIRTIO_DEVICE_CAN = 36, + VIRTIO_DEVICE_PARAMETER_SERVER = 38, + VIRTIO_DEVICE_AUDIO_POLICY = 39, + VIRTIO_DEVICE_BLUETOOTH = 40, + VIRTIO_DEVICE_GPIO = 41, + VIRTIO_DEVICE_RDMA = 42, +}; + +/// \brief VirtIO device status bits +enum virtio_status : uint32_t { + VIRTIO_STATUS_ACKNOWLEDGE = (1 << 0), ///< Guest OS has found the device and recognized it as a valid virtio device. + VIRTIO_STATUS_DRIVER = (1 << 1), ///< Guest OS knows how to drive the device. + VIRTIO_STATUS_DRIVER_OK = (1 << 2), ///< The driver is set up and ready to drive the device. + VIRTIO_STATUS_FEATURES_OK = + (1 << 3), ///< The driver has acknowledged all the features it understands, and feature negotiation is complete. + VIRTIO_STATUS_DEVICE_NEEDS_RESET = (1 << 4), ///< Device has experienced an error from which it can't recover. + VIRTIO_STATUS_FAILED = (1 << 5), ///< Something went wrong in the guest, and it has given up on the device. This + ///< could be an internal error, or the driver didn't like the device for some + ///< reason, or even a fatal error during device operation. +}; + +/// \brief VirtIO memory mapped IO offsets +enum virtio_mmio_offsets : uint64_t { + VIRTIO_MMIO_MAGIC_VALUE = 0x000, ///< Magic value + VIRTIO_MMIO_VERSION = 0x004, ///< Device version number + VIRTIO_MMIO_DEVICE_ID = 0x008, ///< Virtio Subsystem Device ID + VIRTIO_MMIO_VENDOR_ID = 0x00c, ///< Virtio Subsystem Vendor ID + VIRTIO_MMIO_DEVICE_FEATURES = 0x010, ///< Flags representing features the device supports + VIRTIO_MMIO_DEVICE_FEATURES_SEL = 0x014, ///< Device (host) features word selection. + VIRTIO_MMIO_DRIVER_FEATURES = 0x020, ///< Flags representing device features understood and activated by the driver + VIRTIO_MMIO_DRIVER_FEATURES_SEL = 0x024, ///< Driver (guest) features word selection + VIRTIO_MMIO_QUEUE_SEL = 0x030, ///< Virtual queue index + VIRTIO_MMIO_QUEUE_NUM_MAX = 0x034, ///< Maximum virtual queue size + VIRTIO_MMIO_QUEUE_NUM = 0x038, ///< Virtual queue size + VIRTIO_MMIO_QUEUE_READY = 0x044, ///< Virtual queue ready bit + VIRTIO_MMIO_QUEUE_NOTIFY = 0x050, ///< Queue notifier + VIRTIO_MMIO_INTERRUPT_STATUS = 0x060, ///< Interrupt status + VIRTIO_MMIO_INTERRUPT_ACK = 0x064, ///< Interrupt acknowledge + VIRTIO_MMIO_STATUS = 0x070, ///< Device status + VIRTIO_MMIO_QUEUE_DESC_LOW = 0x080, ///< Virtual queue descriptor area - 64 bit long physical address + VIRTIO_MMIO_QUEUE_DESC_HIGH = 0x084, ///< Virtual queue descriptor area - 64 bit long physical address + VIRTIO_MMIO_QUEUE_AVAIL_LOW = 0x090, ///< Virtual queue driver area - 64 bit long physical address + VIRTIO_MMIO_QUEUE_AVAIL_HIGH = 0x094, ///< Virtual queue driver area - 64 bit long physical address + VIRTIO_MMIO_QUEUE_USED_LOW = 0x0a0, ///< Virtual queue device area - 64 bit long physical address + VIRTIO_MMIO_QUEUE_USED_HIGH = 0x0a4, ///< Virtual queue device area - 64 bit long physical address + VIRTIO_MMIO_SHM_SEL = 0x0ac, ///< Shared memory id + VIRTIO_MMIO_SHM_LEN_LOW = 0x0b0, ///< Shared memory region - 64 bit long length + VIRTIO_MMIO_SHM_LEN_HIGH = 0x0b4, ///< Shared memory region - 64 bit long length + VIRTIO_MMIO_SHM_BASE_LOW = 0x0b8, ///< Shared memory region - 64 bit long physical address + VIRTIO_MMIO_SHM_BASE_HIGH = 0x0bc, ///< Shared memory region - 64 bit long physical address + VIRTIO_MMIO_CONFIG_GENERATION = 0x0fc, ///< Configuration atomicity value + VIRTIO_MMIO_CONFIG = 0x100, ///< Configuration space +}; + +/// \brief VirtIO interrupt status +enum virtio_int_status : uint32_t { + VIRTIO_INT_STATUS_USED_BUFFER = 1 << 0, + VIRTIO_INT_STATUS_CONFIG_CHANGE = 1 << 1, +}; + +/// \brief Virtqueue descriptor flags +enum virtq_desc_flags : uint16_t { + VIRTQ_DESC_F_NEXT = 1, ///< This marks a buffer as continuing via the next field. + VIRTQ_DESC_F_WRITE = 2, ///< This marks a buffer as device write-only (otherwise device read-only). + VIRTQ_DESC_F_INDIRECT = 4, ///< This means the buffer contains a list of buffer descriptors. +}; + +/// \brief Virtqueue used flags +enum virtq_used_flags : uint16_t { + VIRTQ_USED_F_NO_NOTIFY = + 1, ///< The device uses this in used flags to advise the driver: don't kick me when you add a buffer. +}; + +/// \brief Virtqueue avail flags +enum virtq_avail_flags : uint16_t { + VIRTQ_AVAIL_F_NO_INTERRUPT = + 1, ///< The driver uses this in avail flags to advise the device: don't interrupt me when you consume a buffer. +}; + +/// \brief Virtqueue buffer descriptor +struct virtq_desc { + uint64_t paddr; ///< Guest physical address + uint32_t len; ///< Guest physical length + uint16_t flags; ///< Descriptor flags + uint16_t next; ///< Next field if flags & VIRTQ_DESC_F_NEXT +}; + +/// \brief Virtqueue used/avail header +struct virtq_header { + uint16_t flags; ///< Used or avail flags (see virtq_used_flags or virtq_avail_flags) + uint16_t idx; ///< Where the driver would put the next descriptor entry in the ring (modulo the queue size) +}; + +/// \brief Virtqueue used element +struct virtq_used_elem { + uint32_t id; ///< Index of start of used descriptor chain + uint32_t len; ///< Total length of the descriptor chain which was written to. +}; + +/// \brief VirtIO's split Virtqueue implementation +struct virtq { + uint64_t desc_addr; ///< Used for describing buffers + uint64_t avail_addr; ///< Data supplied by driver to the device (available ring) + uint64_t used_addr; ///< Data supplied by device to driver (used ring) + uint32_t num; ///< Maximum number of elements in the queue ring + uint16_t last_used_idx; ///< Last used ring index, this always increment + uint16_t ready; ///< Whether the queue is ready + + /// \brief Gets how many bytes are available in queue read/write buffers. + /// \param a The state accessor for the current device. + /// \param desc_idx Index of queue's descriptor be traversed. + /// \param pread_avail_len Receives the available length of the read buffer. + /// \param pwrite_avail_len Receives the available length of the write buffer. + /// \returns True if successful, false if an error happened while parsing the queue buffer. + bool get_desc_rw_avail_len(i_device_state_access *a, uint16_t desc_idx, uint32_t *pread_avail_len, + uint32_t *pwrite_avail_len) const; + + /// \brief Reads bytes from a queue buffer descriptor. + /// \param a The state accessor for the current device. + /// \param desc_idx Index of queue's descriptor be traversed. + /// \param start_off Starting offset in the queue read buffer to be read. + /// \param data Receives the data. + /// \param len Amount of bytes to be read. + /// \returns True if successful, false if an error happened while reading the queue buffer. + bool read_desc_mem(i_device_state_access *a, uint16_t desc_idx, uint32_t start_off, unsigned char *data, + uint32_t len) const; + + /// \brief Writes bytes to a queue buffer descriptor. + /// \param a The state accessor for the current device. + /// \param desc_idx Index of queue's descriptor be traversed. + /// \param start_off Starting offset in the queue write buffer to be written. + /// \param data Data to be written. + /// \param len Amount of bytes to be written. + /// \returns True if successful, false if an error happened while writing the queue buffer. + bool write_desc_mem(i_device_state_access *a, uint16_t desc_idx, uint32_t start_off, const unsigned char *data, + uint32_t len) const; + + /// \brief Consumes a queue buffer, marking it a used to the driver. + /// \brief The driver will notify later when the buffer becomes available again, + /// after it finishes processing the buffer. + /// \param a The state accessor for the current device. + /// \param desc_idx Index of queue's header descriptor to be consumed. + /// \param written_len Amount of bytes written in case of write-only queues, + /// should be 0 for read-only queues. + /// \param flags Used flags to passed to the driver. + /// \returns True if successful, false if an error happened. + bool consume_desc(i_device_state_access *a, uint16_t desc_idx, uint32_t written_len, uint16_t flags); +}; + +/// \brief VirtIO device common interface +class virtio_device { +protected: + uint32_t virtio_idx = 0; ///< VirtIO device index + uint32_t int_status = 0; ///< Interrupt status mask (see virtio_status) + uint32_t device_id = 0; ///< Device id (see virtio_devices) + uint64_t device_features = 0; ///< Features supported by the device + uint64_t driver_features = 0; ///< Features supported by the driver + uint32_t device_features_sel = 0; ///< Device features selector (high/low bits) + uint32_t driver_features_sel = 0; ///< Driver features selector (high/low bits) + uint32_t queue_sel = 0; ///< Queue selector + uint32_t shm_sel = 0; ///< Shared memory selector + uint32_t device_status = 0; ///< Device status mask (see virtio_status) + uint32_t config_generation = 0; ///< Configuration generation counter + uint32_t config_space_size = 0; ///< Configuration size + bool driver_ok = false; ///< True when the device was successfully initialized by the driver + + // Use an array of uint32 instead of uint8, to make sure we can perform 4-byte aligned reads on config space + std::array config_space{}; ///< Configuration space + std::array queue{}; ///< Virtqueues + +public: + explicit virtio_device(uint32_t virtio_idx, uint32_t device_id, uint64_t device_features, + uint32_t config_space_size); + virtio_device() = delete; + virtual ~virtio_device() = default; + virtio_device(const virtio_device &other) = delete; + virtio_device(virtio_device &&other) = delete; + virtio_device &operator=(const virtio_device &other) = delete; + virtio_device &operator=(virtio_device &&other) = delete; + + /// \brief Reset device to uninitialize state, cleaning all its internal state. + /// \details This is only requested by the driver when a fatal failure occurs + /// and the driver is about to reinitialize the device. + /// It's also request by the driver to de-initialize the device. + void reset(i_device_state_access *a); + + /// \brief Set an interrupt request. + /// \params add_int_status Interrupt status bits to be set. + void set_irq(i_device_state_access *a, uint32_t add_int_status); + + /// \brief Clear interrupt requests. + /// \params rem_int_status Interrupt status bits to be unset. + void reset_irq(i_device_state_access *a, uint32_t rem_int_status); + + /// \brief Notify the driver that a fatal failure occurred and it should reset the device state. + /// \details A good driver implementation will issue a reset and reinitialize the device this call. + void notify_device_needs_reset(i_device_state_access *a); + + /// \brief Notify the driver that a queue buffer has just been used. + void notify_queue_used(i_device_state_access *a); + + /// \brief Notify the driver that device has configuration changed. + /// \details The driver will eventually re-read the configuration space to detect the change. + void notify_config_change(i_device_state_access *a); + + /// \brief Prepare a queue descriptor for writing. + /// \param queue_idx Queue index to write to. + /// \param pdesc_idx Receives queue's available descriptor index that can be written to. + /// \param pwrite_avail_len Receives maximum length that can be written to. + /// \returns True if there are no errors, false otherwise. + /// \details In case the queue is full or not ready yet, this function will still return true, + /// however pwrite_avail_len will be set to 0. + bool prepare_queue_write(i_device_state_access *a, uint32_t queue_idx, uint16_t *pdesc_idx, + uint32_t *pwrite_avail_len) const; + + /// Consume an available queue's descriptor (sets it as used) and notify the driver. + /// \param queue_idx Queue index to consume and notify. + /// \param desc_idx Queue's available descriptor index to set as used. + /// \param written_len Amount of bytes written to the descriptor buffer. + /// \param used_flags Used flags, see virtq_used_flags. + /// \returns True if there are no errors, false otherwise. + bool consume_and_notify_queue(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t written_len = 0, uint16_t used_flags = 0); + + /// \brief Called when driver request a device reset, this function must clean-up all device internal state. + virtual void on_device_reset() = 0; + + /// \brief Called when driver finish initializing the device. + virtual void on_device_ok(i_device_state_access *a) = 0; + + /// \brief Process driver notification for pending available queue's descriptors. + /// \params queue_idx Index for the queue that is has an available descriptor to be processed. + void on_device_queue_notify(i_device_state_access *a, uint32_t queue_idx); + + /// \brief Called when driver notifies that a queue descriptor is available to be processed. + /// \param queue_idx Queue index that has at least one available descriptor. + /// \param desc_idx Queue's available descriptor index. + /// \param read_avail_len Total readable length in the descriptor buffer. + /// \param write_avail_len Total writable length in the descriptor buffer. + virtual bool on_device_queue_available(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, + uint32_t read_avail_len, uint32_t write_avail_len) = 0; + + /// \brief Fill file descriptors to be polled by select(). + /// \param fds Pointer to sets of read, write and except file descriptors to be updated. + /// \param timeout_us Maximum amount of time to wait, this may be updated (always to lower values). + virtual void prepare_select(select_fd_sets *fds, uint64_t *timeout_us); + + /// \brief Poll file descriptors that were marked as ready by select(). + /// \param select_ret Return value from the most recent select() call. + /// \param fds Pointer to sets of read, write and except file descriptors to be checked. + /// \returns True if an interrupt was requested, false otherwise. + /// \details This function process pending events and trigger interrupt requests (if any). + virtual bool poll_selected(int select_ret, select_fd_sets *fds, i_device_state_access *da); + + /// \brief Poll pending events without waiting (non-blocking). + /// \details Basically call prepare_select(), select() and poll_selected() with timeout set to 0. + /// \returns True if an interrupt was requested, false otherwise. + bool poll_nowait(i_device_state_access *da); + + /// \brief Reads device's shared memory base address. + /// \returns Guest a valid physical address, or -1 in case shared memory is not supported by the device. + virtual uint64_t read_shm_base(uint32_t shm_sel); + + /// \brief Reads device's shared memory length. + /// \returns Length in bytes, or -1 in case shared memory is not supported by the device. + virtual uint64_t read_shm_length(uint32_t shm_sel); + + /// \brief Reads a value from device's configuration space. + /// \param offset Offset to be read. + /// \param pval Receives the value. + /// \param log2_size log2 of size of value to read. + /// \returns True if there are no errors, false otherwise. + bool mmio_read_config(i_device_state_access *a, uint64_t offset, uint32_t *pval, int log2_size); + + /// \brief Writes a value to device's configuration space. + /// \param offset Offset to write to. + /// \param val The value to write. + /// \param log2_size log2 of size of value to write. + /// \returns A status of the execution, execute_status::failure when there is an error. + execute_status mmio_write_config(i_device_state_access *a, uint64_t offset, uint32_t val, int log2_size); + + /// \brief Reads a value from the device. + /// \param offset Offset to be read. + /// \param pval Receives the value. + /// \param log2_size log2 of size of value to read. + /// \returns True if there are no errors, false otherwise. + bool mmio_read(i_device_state_access *a, uint64_t offset, uint32_t *pval, int log2_size); + + /// \brief Writes a value to the device. + /// \param offset Offset to be written. + /// \param val The value to be written. + /// \param log2_size log2 of size of value to write. + /// \returns execute::failure if operation failed, otherwise other success enumeration if operation succeeded. + execute_status mmio_write(i_device_state_access *a, uint64_t offset, uint32_t val, int log2_size); + + /// \brief Returns the VirtIO device index for this VirtIO device. + uint32_t get_virtio_index() const { + return virtio_idx; + } + + /// \brief Returns the PLIC's interrupt request number for this VirtIO device. + uint32_t get_irq_id() const { + return virtio_idx + 1; + } + + /// \brief Returns the VirtIO device id. + uint32_t get_device_id() const { + return device_id; + } +}; + +/// \brief Global VirtIO driver instance +extern const pma_driver virtio_driver; + +} // namespace cartesi + +#endif diff --git a/src/virtio-factory.cpp b/src/virtio-factory.cpp new file mode 100644 index 000000000..90e12d91d --- /dev/null +++ b/src/virtio-factory.cpp @@ -0,0 +1,36 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#include "virtio-factory.h" + +namespace cartesi { + +pma_entry make_virtio_pma_entry(uint64_t start, uint64_t length, const std::string &description, + const pma_driver *driver, void *context) { + const pma_entry::flags f{ + true, // R + true, // W + false, // X + false, // IR + false, // IW + PMA_ISTART_DID::VIRTIO // DID + }; + // VirtIO devices are not verifiable yet, + // therefore peek will always fail and cause an runtime error when updating the Merkle tree. + return make_device_pma_entry(description, start, length, pma_peek_error, driver, context).set_flags(f); +} + +} // namespace cartesi diff --git a/src/virtio-factory.h b/src/virtio-factory.h new file mode 100644 index 000000000..5217ed977 --- /dev/null +++ b/src/virtio-factory.h @@ -0,0 +1,35 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef VIRTIO_FACTORY_H +#define VIRTIO_FACTORY_H + +#include + +#include "pma.h" + +namespace cartesi { + +/// \brief Creates a PMA entry for a VirtIO device +/// \param start Start address for memory range. +/// \param length Length of memory range. +/// \returns Corresponding PMA entry +pma_entry make_virtio_pma_entry(uint64_t start, uint64_t length, const std::string &description, + const pma_driver *driver, void *context); + +} // namespace cartesi + +#endif diff --git a/uarch/uarch-machine-state-access.h b/uarch/uarch-machine-state-access.h index 130a7781f..630ef994a 100644 --- a/uarch/uarch-machine-state-access.h +++ b/uarch/uarch-machine-state-access.h @@ -522,8 +522,8 @@ class uarch_machine_state_access : public i_state_access(shadow_state_get_csr_abs_addr(shadow_state_csr::htif_iyield)); } - uint64_t do_poll_console(uint64_t mcycle) { - return mcycle; + std::pair do_poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { + return {mcycle, false}; } uint64_t do_read_pma_istart(int i) { @@ -541,7 +541,17 @@ class uarch_machine_state_access : public i_state_access(paddr); } - void do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t log2_size) {} + bool do_read_memory(uint64_t paddr, unsigned char *data, uint64_t length) { + // This is not implemented yet because it's not being used + abort(); + return false; + } + + bool do_write_memory(uint64_t paddr, const unsigned char *data, uint64_t length) { + // This is not implemented yet because it's not being used + abort(); + return false; + } template void do_write_memory_word(uint64_t paddr, const unsigned char *hpage, uint64_t hoffset, T val) { @@ -614,6 +624,8 @@ class uarch_machine_state_access : public i_state_access