diff --git a/CMakeLists.txt b/CMakeLists.txt index b8fbe23..9066fa3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,7 @@ set(HEADERS "${NXBX_ROOT_DIR}/src/hw/video/gpu/ptimer.hpp" "${NXBX_ROOT_DIR}/src/hw/video/gpu/pvga.hpp" "${NXBX_ROOT_DIR}/src/hw/video/gpu/pvideo.hpp" + "${NXBX_ROOT_DIR}/src/hw/video/gpu/user.hpp" ) set(SOURCES @@ -107,6 +108,7 @@ set(SOURCES "${NXBX_ROOT_DIR}/src/hw/video/gpu/ptimer.cpp" "${NXBX_ROOT_DIR}/src/hw/video/gpu/pvga.cpp" "${NXBX_ROOT_DIR}/src/hw/video/gpu/pvideo.cpp" + "${NXBX_ROOT_DIR}/src/hw/video/gpu/user.cpp" ) source_group(TREE ${NXBX_ROOT_DIR} PREFIX header FILES ${HEADERS}) diff --git a/src/hw/machine.hpp b/src/hw/machine.hpp index f4957d4..307959c 100644 --- a/src/hw/machine.hpp +++ b/src/hw/machine.hpp @@ -53,6 +53,7 @@ class machine { { m_cpu.deinit(); m_cmos.deinit(); + m_nv2a.deinit(); } void start() { m_cpu.start(); } void exit() { m_cpu.exit(); } diff --git a/src/hw/video/gpu/nv2a.cpp b/src/hw/video/gpu/nv2a.cpp index 31eaf22..028b8aa 100644 --- a/src/hw/video/gpu/nv2a.cpp +++ b/src/hw/video/gpu/nv2a.cpp @@ -3,6 +3,7 @@ // SPDX-FileCopyrightText: 2024 ergo720 #include "nv2a.hpp" +#include "machine.hpp" bool @@ -38,6 +39,9 @@ nv2a::init() if (!m_pvideo.init()) { return false; } + if (!m_user.init()) { + return false; + } return true; } @@ -47,6 +51,29 @@ nv2a::get_next_update_time(uint64_t now) return m_ptimer.get_next_alarm_time(now); } +dma_obj +nv2a::get_dma_obj(uint32_t addr) +{ + /* + A dma object has the following memory layout: + base+0: flags -> 0:11 class type, 12:13 page table stuff, 16:17 mem type, 20:31 high 12 bits of target addr + base+4: limit -> 0:31 addr limit for the resource at the target addr + base+8: addr -> 12:31 low 20 bits of target addr + */ + + // TODO: this should also consider the endianness bit of NV_PFIFO_CACHE1_DMA_FETCH + uint32_t flags = m_pramin.read(NV_PRAMIN_BASE + addr); + uint32_t limit = m_pramin.read(NV_PRAMIN_BASE + addr + 4); + uint32_t addr_info = m_pramin.read(NV_PRAMIN_BASE + addr + 8); + + return dma_obj{ + .class_type = flags & NV_DMA_CLASS, + .mem_type = (flags & NV_DMA_TARGET) >> 16, + .target_addr = ((flags & NV_DMA_ADJUST) | (addr_info | NV_DMA_ADDRESS)) & (RAM_SIZE128 - 1), + .limit = limit, + }; +} + void nv2a::apply_log_settings() { @@ -60,4 +87,11 @@ nv2a::apply_log_settings() m_pfifo.update_io(); m_pvga.update_io(); m_pvideo.update_io(); + m_user.update_io(); +} + +void +nv2a::deinit() +{ + m_pfifo.deinit(); } diff --git a/src/hw/video/gpu/nv2a.hpp b/src/hw/video/gpu/nv2a.hpp index 7761b9f..b2aa186 100644 --- a/src/hw/video/gpu/nv2a.hpp +++ b/src/hw/video/gpu/nv2a.hpp @@ -14,14 +14,24 @@ #include "pfifo.hpp" #include "pvga.hpp" #include "pvideo.hpp" +#include "user.hpp" #include "cpu.hpp" +struct dma_obj { + uint32_t class_type; + uint32_t mem_type; + uint32_t target_addr; + uint32_t limit; +}; + class nv2a { public: nv2a(machine *machine) : m_pmc(machine), m_pcrtc(machine), m_pramdac(machine), m_ptimer(machine), - m_pfb(machine), m_pbus(machine), m_pramin(machine), m_pfifo(machine), m_pvga(machine), m_pvideo(machine) {} + m_pfb(machine), m_pbus(machine), m_pramin(machine), m_pfifo(machine), m_pvga(machine), m_pvideo(machine), + m_user(machine) {} bool init(); + void deinit(); uint64_t get_next_update_time(uint64_t now); pmc &get_pmc() { return m_pmc; } pcrtc &get_pcrtc() { return m_pcrtc; } @@ -36,6 +46,9 @@ class nv2a { void apply_log_settings(); private: + dma_obj get_dma_obj(uint32_t addr); + + friend class pfifo; pmc m_pmc; pcrtc m_pcrtc; pramdac m_pramdac; @@ -46,6 +59,7 @@ class nv2a { pfifo m_pfifo; pvga m_pvga; pvideo m_pvideo; + user m_user; }; template diff --git a/src/hw/video/gpu/nv2a_defs.hpp b/src/hw/video/gpu/nv2a_defs.hpp index fab1e0b..36e3ae9 100644 --- a/src/hw/video/gpu/nv2a_defs.hpp +++ b/src/hw/video/gpu/nv2a_defs.hpp @@ -11,3 +11,10 @@ #define NV2A_VRAM_BASE 0xF0000000 #define NV2A_VRAM_SIZE64 0x4000000 // = 64 MiB #define NV2A_VRAM_SIZE128 0x8000000 // = 128 MiB +#define NV2A_MAX_NUM_CHANNELS 32 // max num of fifo queues + +// DMA object masks +#define NV_DMA_CLASS 0x00000FFF +#define NV_DMA_ADJUST 0xFFF00000 +#define NV_DMA_ADDRESS 0xFFFFF000 +#define NV_DMA_TARGET 0x0030000 diff --git a/src/hw/video/gpu/pfifo.cpp b/src/hw/video/gpu/pfifo.cpp index e258874..84446a1 100644 --- a/src/hw/video/gpu/pfifo.cpp +++ b/src/hw/video/gpu/pfifo.cpp @@ -3,6 +3,7 @@ // SPDX-FileCopyrightText: 2024 ergo720 #include "machine.hpp" +#include #define MODULE_NAME pfifo @@ -19,20 +20,45 @@ void pfifo::write(uint32_t addr, const uint32_t data) switch (addr) { - case NV_PFIFO_RAMHT: - ramht = data; + case NV_PFIFO_INTR_0: + regs0[REGS0_PFIFO_idx(NV_PFIFO_INTR_0)] &= ~data; + m_machine->get().update_irq(); break; - case NV_PFIFO_RAMFC: - ramfc = data; + case NV_PFIFO_INTR_EN_0: + regs0[REGS0_PFIFO_idx(NV_PFIFO_INTR_0)] = data; + m_machine->get().update_irq(); break; - case NV_PFIFO_RAMRO: - ramro = data; + case NV_PFIFO_CACHE1_DMA_PUSH: + // Mask out read-only bits + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUSH)] = (data & ~(NV_PFIFO_CACHE1_DMA_PUSH_STATE_MASK | NV_PFIFO_CACHE1_DMA_PUSH_BUFFER_MASK)); break; - default: - nxbx_fatal("Unhandled write at address 0x%" PRIX32 " with value 0x%" PRIX32, addr, data); + case NV_PFIFO_CACHE1_DMA_PUT: + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUT)] = data; + signal++; + signal.notify_one(); + break; + + case NV_PFIFO_CACHE1_DMA_GET: + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_GET)] = data; + signal++; + signal.notify_one(); + break; + + default: { + uint32_t reg_idx = ((addr - NV2A_REGISTER_BASE) & 0x1000) >> 12; + uint32_t reg_offset = addr & 0xFFF; + uint32_t low_offset = reg_idx == 0 ? REGS0_OFFSET_BASE : REGS1_OFFSET_BASE; + uint32_t high_offset = reg_idx == 0 ? REGS0_OFFSET_END : REGS1_OFFSET_END; + if (util::in_range(reg_offset, low_offset, high_offset - 4)) { + regs[reg_idx][reg_offset >> 2] = data; + } + else { + nxbx_fatal("Unhandled write at address 0x%" PRIX32 " with value 0x%" PRIX32, addr, data); + } + } } } @@ -44,22 +70,14 @@ uint32_t pfifo::read(uint32_t addr) } uint32_t value = 0; - - switch (addr) - { - case NV_PFIFO_RAMHT: - value = ramht; - break; - - case NV_PFIFO_RAMFC: - value = ramfc; - break; - - case NV_PFIFO_RAMRO: - value = ramro; - break; - - default: + uint32_t reg_idx = ((addr - NV2A_REGISTER_BASE) & 0x1000) >> 12; + uint32_t reg_offset = addr & 0xFFF; + uint32_t low_offset = reg_idx == 0 ? REGS0_OFFSET_BASE : REGS1_OFFSET_BASE; + uint32_t high_offset = reg_idx == 0 ? REGS0_OFFSET_END : REGS1_OFFSET_END; + if (util::in_range(reg_offset, low_offset, high_offset - 4)) { + value = regs[reg_idx][reg_offset >> 2]; + } + else { nxbx_fatal("Unhandled read at address 0x%" PRIX32, addr); } @@ -70,6 +88,167 @@ uint32_t pfifo::read(uint32_t addr) return value; } +void +pfifo::pusher(auto &err_handler) +{ + if (( + ((regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_PUSH0)] & NV_PFIFO_CACHE1_PUSH0_ACCESS_MASK) << 1) | + (regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUSH)] & (NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_MASK | NV_PFIFO_CACHE1_DMA_PUSH_STATUS_MASK)) + ) ^ + (NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_MASK | (NV_PFIFO_CACHE1_PUSH0_ACCESS_MASK << 1))) { + // Pusher is either disabled or suspended, so don't do anything + return; + } + + // We are running, so set the busy flag + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUSH)] |= NV_PFIFO_CACHE1_DMA_PUSH_STATE_MASK; + + uint32_t curr_pb_get = regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_GET)] & ~3; + uint32_t curr_pb_put = regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUT)] & ~3; + // Find the address of the new pb entries from the pb object + dma_obj pb_obj = m_machine->get().get_dma_obj((regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_INSTANCE)] & NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS_MASK) << 4); + + // Process all entries until the fifo is empty + while (curr_pb_get != curr_pb_put) { + if (curr_pb_get >= pb_obj.limit) { + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] |= (NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION << 29); // set mem fault error + err_handler("Pusher error: curr_pb_get >= pb_obj.limit"); + break; + } + uint8_t *pb_addr = m_ram + pb_obj.target_addr + curr_pb_get; // ram host base addr + pb base addr + pb offset + uint32_t pb_entry = *(uint32_t *)pb_addr; + curr_pb_get += 4; + + uint32_t mthd_cnt = regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] & NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT_MASK; // parameter count of method + if (mthd_cnt) { + // A method is already being processed, so the following words must be its parameters + + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_DATA_SHADOW)] = pb_entry; // save in shadow reg the current entry + + uint32_t cache1_put = regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_PUT)] & 0x1FC; + uint32_t dma_state = regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)]; + uint32_t mthd_type = dma_state & NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_MASK; // method type + uint32_t mthd = dma_state & NV_PFIFO_CACHE1_DMA_STATE_METHOD_MASK; // the actual method specified + uint32_t mthd_subchan = dma_state & NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL_MASK; // the bound subchannel + + // Add the method and its parameter to cache1 + uint32_t method_entry = mthd_type | mthd | mthd_subchan; + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_METHOD(cache1_put >> 2))] = method_entry; + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DATA(cache1_put >> 2))] = pb_entry; + + // Update dma state + if (mthd_type == 0) { + dma_state &= ~NV_PFIFO_CACHE1_DMA_STATE_METHOD_MASK; + dma_state |= ((mthd + 4) >> 2); + } + mthd_cnt--; + dma_state &= ~NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT_MASK; + dma_state |= (mthd_cnt << 18); + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] = dma_state; + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_DCOUNT)]++; + + // TODO: this should now either call or notify the puller that there's a new entry in cache1 + nxbx_fatal("Puller not implemented"); + break; + } + else { + // No methods is currently active, so this must be a new one + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_RSVD_SHADOW)] = pb_entry; // save in shadow reg the current entry + + if ((pb_entry & 0xE0000003) == 0x20000000) { + // old jump (nv4+) -> save current pb get addr and jump to the specified addr + // 001JJJJJJJJJJJJJJJJJJJJJJJJJJJ00 -> J: jump addr + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW)] = curr_pb_get; + curr_pb_get = pb_entry & 0x1FFFFFFF; + } + else if ((pb_entry & 3) == 1) { + // jump (nv1a+) -> same as old jump, but with a different method encoding + // JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ01 -> J: jump addr + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW)] = curr_pb_get; + curr_pb_get = pb_entry & 0xFFFFFFFC; + } + else if ((pb_entry & 3) == 2) { + // call (nv1a+) -> save current pb get addr and calls the routine at the specified addr + // JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ10 -> J: call addr + if (regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_SUBROUTINE)] & 1) { + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] |= (NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL << 29); // set call error + err_handler("Pusher error: call command while another subroutine is already active"); + break; + } + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_SUBROUTINE)] = curr_pb_get | 1; + curr_pb_get = pb_entry & 0xFFFFFFFC; + } + else if (pb_entry == 0x00020000) { + // return (nv1a+) -> restore pb get addr from subroutine return addr saved with a previous call + // 00000000000000100000000000000000 + if ((regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_SUBROUTINE)] & 1) == 0) { + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] |= (NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN << 29); // set return error + err_handler("Pusher error: return command while subroutine is not active"); + break; + } + curr_pb_get = regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_SUBROUTINE)] & ~3; + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_SUBROUTINE)] = 0; + } + else if (uint32_t value = pb_entry & 0xE0030003; (value == 0) // increasing methods + || (value == 0x40000000)) { // non-increasing methods + // Specify an new method + // 00/10CCCCCCCCCCC00SSSMMMMMMMMMMM00 -> C: method count, S: subchannel, M: method + uint32_t mthd_state = value == 0 ? 0 : 1; + mthd_state |= ((pb_entry & NV_PFIFO_CACHE1_DMA_STATE_METHOD_MASK) | (pb_entry & NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL_MASK) + | (pb_entry & NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT_MASK)); + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] = mthd_state; + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_DCOUNT)] = 0; + } + else { + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_STATE)] |= (NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD << 29); // set invalid command error + err_handler("Pusher error: encountered unrecognized command"); + break; + } + } + } + + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_GET)] = curr_pb_get; + + // We are done with processing, so clear the busy flag + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUSH)] &= ~NV_PFIFO_CACHE1_DMA_PUSH_STATE_MASK; +} + +void +pfifo::puller() +{ + // TODO +} + +void +pfifo::worker(std::stop_token stok) +{ + // This function is called in a separate thread, and acts as the pfifo pusher and puller + + // This lambda is called when the pusher encounters an error + const auto lambda = [this](const char *msg) { + regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUSH)] |= NV_PFIFO_CACHE1_DMA_PUSH_STATUS_MASK; // suspend pusher + // Currently disabled, because it's not thread safe yet +#if 0 + regs0[REGS0_PFIFO_idx(NV_PFIFO_INTR_0)] |= NV_PFIFO_INTR_0_DMA_PUSHER; // raise pusher interrupt + m_machine->get().update_irq(); +#endif + nxbx_fatal(msg); + }; + + while (true) { + // Wait until there's some work to do + signal.wait(0); + + if (stok.stop_requested()) [[unlikely]] { + return; + } + + pusher(lambda); + + signal--; + } +} + template auto pfifo::get_io_func(bool log, bool enabled, bool is_be) { @@ -123,10 +302,13 @@ pfifo::update_io(bool is_update) void pfifo::reset() { + std::fill(std::begin(regs0), std::end(regs0), 0); + std::fill(std::begin(regs1), std::end(regs1), 0); + regs0[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_STATUS)] = NV_PFIFO_CACHE1_STATUS_LOW_MARK_MASK; // Values dumped from a Retail 1.0 xbox - ramht = 0x00000100; - ramfc = 0x008A0110; - ramro = 0x00000114; + regs0[REGS0_PFIFO_idx(NV_PFIFO_RAMHT)] = 0x00000100; + regs0[REGS0_PFIFO_idx(NV_PFIFO_RAMFC)] = 0x008A0110; + regs0[REGS0_PFIFO_idx(NV_PFIFO_RAMRO)] = 0x00000114; } bool @@ -137,5 +319,17 @@ pfifo::init() } reset(); + signal = 0; + m_ram = get_ram_ptr(m_machine->get()); + jthr = std::jthread(std::bind_front(&pfifo::worker, this)); return true; } + +void +pfifo::deinit() +{ + jthr.request_stop(); + signal++; + signal.notify_one(); + jthr.join(); +} diff --git a/src/hw/video/gpu/pfifo.hpp b/src/hw/video/gpu/pfifo.hpp index 65e5ae8..e2957c1 100644 --- a/src/hw/video/gpu/pfifo.hpp +++ b/src/hw/video/gpu/pfifo.hpp @@ -5,23 +5,81 @@ #pragma once #include +#include +#include #include "nv2a_defs.hpp" #define NV_PFIFO 0x00002000 #define NV_PFIFO_BASE (NV2A_REGISTER_BASE + NV_PFIFO) #define NV_PFIFO_SIZE 0x2000 +#define NV_PFIFO_INTR_0 (NV2A_REGISTER_BASE + 0x00002100) // Pending pfifo interrupts. Writing a 0 has no effect, and writing a 1 clears the interrupt +#define NV_PFIFO_INTR_0_DMA_PUSHER (1 << 12) +#define NV_PFIFO_INTR_EN_0 (NV2A_REGISTER_BASE + 0x00002140) // Enable/disable pfifo interrupts #define NV_PFIFO_RAMHT (NV2A_REGISTER_BASE + 0x00002210) // Contains the base address and size of ramht in ramin #define NV_PFIFO_RAMFC (NV2A_REGISTER_BASE + 0x00002214) // Contains the base address and size of ramfc in ramin #define NV_PFIFO_RAMRO (NV2A_REGISTER_BASE + 0x00002218) // Contains the base address and size of ramro in ramin +#define NV_PFIFO_MODE (NV2A_REGISTER_BASE + 0x00002504) // Indicates the submission mode, one bit for each channel +#define NV_PFIFO_MODE_CHANNEL_MASK(id) (1 << (id)) // pio=0, dma=1 +#define NV_PFIFO_CACHE1_PUSH0 (NV2A_REGISTER_BASE + 0x00003200) // Enable/disable pusher access to cache1 +#define NV_PFIFO_CACHE1_PUSH0_ACCESS_MASK (1 << 0) // enabled=1 +#define NV_PFIFO_CACHE1_PUSH1 (NV2A_REGISTER_BASE + 0x00003204) // The currently active channel id and the mode it uses (cache1) +#define NV_PFIFO_CACHE1_PUSH1_CHID_MASK 0x1F +#define NV_PFIFO_CACHE1_PUSH1_MODE_MASK (1 << 8) // 1=dma +#define NV_PFIFO_CACHE1_PUT (NV2A_REGISTER_BASE + 0x00003210) // The front pointer of cache1 +#define NV_PFIFO_CACHE1_STATUS (NV2A_REGISTER_BASE + 0x00003214) // Empty/full flag of cache1 +#define NV_PFIFO_CACHE1_STATUS_LOW_MARK_MASK (1 << 4) // 1=empty +#define NV_PFIFO_CACHE1_STATUS_HIGH_MARK_MASK (1 << 8) // 1=full +#define NV_PFIFO_CACHE1_DMA_PUSH (NV2A_REGISTER_BASE + 0x00003220) // Status bits of the pusher +#define NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_MASK (1 << 0) // enabled=1 +#define NV_PFIFO_CACHE1_DMA_PUSH_STATE_MASK (1 << 4) // busy=1 +#define NV_PFIFO_CACHE1_DMA_PUSH_BUFFER_MASK (1 << 8) +#define NV_PFIFO_CACHE1_DMA_PUSH_STATUS_MASK (1 << 12) // suspended=1 +#define NV_PFIFO_CACHE1_DMA_PUSH_ACQUIRE_MASK (1 << 16) +#define NV_PFIFO_CACHE1_DMA_FETCH (NV2A_REGISTER_BASE + 0x00003224) // Dma fetch flags +#define NV_PFIFO_CACHE1_DMA_FETCH_ENDIAN_MASK (1 << 31) // 1=big +#define NV_PFIFO_CACHE1_DMA_STATE (NV2A_REGISTER_BASE + 0x00003228) // Current pb processing state of the pusher +#define NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_MASK (1 << 0) // non-increasing=1 +#define NV_PFIFO_CACHE1_DMA_STATE_METHOD_MASK 0x00001FFC +#define NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL_MASK 0x0000E000 +#define NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT_MASK 0x1FFC0000 +#define NV_PFIFO_CACHE1_DMA_STATE_ERROR_MASK 0xE0000000 +#define NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL 0x00000001 +#define NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN 0x00000003 +#define NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD 0x00000004 +#define NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION 0x00000006 +#define NV_PFIFO_CACHE1_DMA_INSTANCE (NV2A_REGISTER_BASE + 0x0000322C) // The addr of the dma pb object +#define NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS_MASK 0xFFFF +#define NV_PFIFO_CACHE1_DMA_PUT (NV2A_REGISTER_BASE + 0x00003240) // The front pointer of the active pb fifo +#define NV_PFIFO_CACHE1_DMA_GET (NV2A_REGISTER_BASE + 0x00003244) // The back pointer of the active pb fifo +#define NV_PFIFO_CACHE1_REF (NV2A_REGISTER_BASE + 0x00003248) // reference count of the active pb (set when the REF_CNT method is executed) +#define NV_PFIFO_CACHE1_DMA_SUBROUTINE (NV2A_REGISTER_BASE + 0x0000324C) // copy of NV_PFIFO_CACHE1_DMA_GET before the call + subroutine active flag +#define NV_PFIFO_CACHE1_GET (NV2A_REGISTER_BASE + 0x00003270) // The back pointer of cache1 +#define NV_PFIFO_CACHE1_DMA_DCOUNT (NV2A_REGISTER_BASE + 0x000032A0) // the number of parameters that have being processed for the current method +#define NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW (NV2A_REGISTER_BASE + 0x000032A4) // copy of NV_PFIFO_CACHE1_DMA_GET before the jump +#define NV_PFIFO_CACHE1_DMA_RSVD_SHADOW (NV2A_REGISTER_BASE + 0x000032A8) // copy of pb entry when new method is processed +#define NV_PFIFO_CACHE1_DMA_DATA_SHADOW (NV2A_REGISTER_BASE + 0x000032AC) // copy of pb entry when the method's parameters are being processed +#define NV_PFIFO_CACHE1_METHOD(i) (NV2A_REGISTER_BASE + 0x00003800 + (i) * 8) // cache1 register array of 128 entries (caches methods) +#define NV_PFIFO_CACHE1_DATA(i) (NV2A_REGISTER_BASE + 0x00003804 + (i) * 8) // cache1 register array of 128 entries (caches parameters) + +#define REGS0_OFFSET_BASE (NV_PFIFO_INTR_0 & 0xFFF) +#define REGS0_OFFSET_END ((NV_PFIFO_MODE + 4) & 0xFFF) +#define REGS1_OFFSET_BASE (NV_PFIFO_CACHE1_PUSH0 & 0xFFF) +#define REGS1_OFFSET_END ((NV_PFIFO_CACHE1_DATA(127) + 4) & 0xFFF) +#define REGS0_PFIFO_idx(addr) ((addr - NV_PFIFO_INTR_0) >> 2) +#define REGS1_PFIFO_idx(addr) ((addr - NV_PFIFO_CACHE1_PUSH0) >> 2) class machine; +class nv2a; +class pmc; +class user; class pfifo { public: pfifo(machine *machine) : m_machine(machine) {} bool init(); + void deinit(); void reset(); void update_io() { update_io(true); } template @@ -33,9 +91,20 @@ class pfifo { bool update_io(bool is_update); template auto get_io_func(bool log, bool enabled, bool is_be); + void worker(std::stop_token stok); + void pusher(auto &err_handler); + void puller(); + friend class nv2a; + friend class pmc; + friend class user; + std::jthread jthr; + std::atomic_uint32_t signal; machine *const m_machine; + uint8_t *m_ram; struct { - uint32_t ramht, ramfc, ramro; + std::atomic_uint32_t regs0[(REGS0_OFFSET_END - REGS0_OFFSET_BASE) / 4]; + std::atomic_uint32_t regs1[(REGS1_OFFSET_END - REGS1_OFFSET_BASE) / 4]; + std::atomic_uint32_t *regs[2] = { regs0, regs1 }; }; }; diff --git a/src/hw/video/gpu/pmc.cpp b/src/hw/video/gpu/pmc.cpp index 913ad01..93324dd 100644 --- a/src/hw/video/gpu/pmc.cpp +++ b/src/hw/video/gpu/pmc.cpp @@ -149,6 +149,14 @@ pmc::update_irq() int_status &= ~(1 << NV_PMC_INTR_0_PTIMER); } + // Check for pending PFIFO interrupts + if (m_machine->get().regs0[REGS0_PFIFO_idx(NV_PFIFO_INTR_0)] & m_machine->get().regs0[REGS0_PFIFO_idx(NV_PFIFO_INTR_EN_0)]) { + int_status |= (1 << NV_PMC_INTR_0_PFIFO); + } + else { + int_status &= ~(1 << NV_PMC_INTR_0_PFIFO); + } + switch (int_enabled) { default: diff --git a/src/hw/video/gpu/pmc.hpp b/src/hw/video/gpu/pmc.hpp index 61b554f..9129852 100644 --- a/src/hw/video/gpu/pmc.hpp +++ b/src/hw/video/gpu/pmc.hpp @@ -23,6 +23,7 @@ #define NV_PMC_BOOT_1_ENDIAN24_LITTLE_MASK (0x00000000 << 24) #define NV_PMC_BOOT_1_ENDIAN24_BIG_MASK (0x00000001 << 24) #define NV_PMC_INTR_0 (NV2A_REGISTER_BASE + 0x00000100) // Pending interrupts of all engines +#define NV_PMC_INTR_0_PFIFO 8 #define NV_PMC_INTR_0_PTIMER 20 #define NV_PMC_INTR_0_PCRTC 24 #define NV_PMC_INTR_0_SOFTWARE 31 @@ -51,6 +52,7 @@ class pvideo; class pbus; class pramdac; class pramin; +class user; class pmc { public: @@ -77,6 +79,7 @@ class pmc { friend class pbus; friend class pramdac; friend class pramin; + friend class user; machine *const m_machine; struct { uint32_t endianness; diff --git a/src/hw/video/gpu/pramin.hpp b/src/hw/video/gpu/pramin.hpp index bc1ec43..5f25357 100644 --- a/src/hw/video/gpu/pramin.hpp +++ b/src/hw/video/gpu/pramin.hpp @@ -11,6 +11,10 @@ #define NV_PRAMIN_BASE (NV2A_REGISTER_BASE + NV_PRAMIN) #define NV_PRAMIN_SIZE 0x100000 // = 1 MiB +#define NV_PRAMIN 0x00700000 +#define NV_PRAMIN_BASE (NV2A_REGISTER_BASE + NV_PRAMIN) +#define NV_PRAMIN_SIZE 0x100000 // = 1 MiB + class machine; diff --git a/src/hw/video/gpu/user.cpp b/src/hw/video/gpu/user.cpp new file mode 100644 index 0000000..208b243 --- /dev/null +++ b/src/hw/video/gpu/user.cpp @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: 2024 ergo720 + +#include "machine.hpp" + +#define MODULE_NAME user + + +template +void user::write(uint32_t addr, const uint32_t data) +{ + if constexpr (log) { + log_io_write(); + } + + uint32_t chan_id = ((addr - NV_USER_BASE) >> 16) & (NV2A_MAX_NUM_CHANNELS - 1); // addr increases of 0x10000 for each channel + uint32_t curr_chan_info = m_machine->get().regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_PUSH1)]; + uint32_t curr_chan_id = curr_chan_info & NV_PFIFO_CACHE1_PUSH1_CHID_MASK; + uint32_t curr_chan_mode = curr_chan_info & NV_PFIFO_CACHE1_PUSH1_MODE_MASK; + + if (curr_chan_id == chan_id) { + if (curr_chan_mode == (NV_PFIFO_CACHE1_PUSH1_MODE_MASK)) { + + // NV_USER is a window to the corresponding pfifo registers + switch (addr) + { + case NV_USER_DMA_PUT: + // The pb put pointer changed, so notify the pusher + m_machine->get().regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUT)] = data; + m_machine->get().signal++; + m_machine->get().signal.notify_one(); + break; + + case NV_USER_DMA_GET: + // This register is read-only + break; + + case NV_USER_REF: + // This register is read-only + break; + + default: + nxbx_fatal("Unhandled read at address 0x%" PRIX32, addr); + } + } + else { + nxbx_fatal("PIO channel mode is not supported"); + } + } + else { + // This should save the current channel state to ramfc and do a context switch + nxbx_fatal("Context switch is not supported"); + } +} + +template +uint32_t user::read(uint32_t addr) +{ + uint32_t value = 0; + uint32_t chan_id = ((addr - NV_USER_BASE) >> 16) & (NV2A_MAX_NUM_CHANNELS - 1); // addr increases of 0x10000 for each channel + uint32_t curr_chan_info = m_machine->get().regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_PUSH1)]; + uint32_t curr_chan_id = curr_chan_info & NV_PFIFO_CACHE1_PUSH1_CHID_MASK; + uint32_t curr_chan_mode = curr_chan_info & NV_PFIFO_CACHE1_PUSH1_MODE_MASK; + + if (curr_chan_id == chan_id) { + if (curr_chan_mode == (NV_PFIFO_CACHE1_PUSH1_MODE_MASK)) { + + // NV_USER is a window to the corresponding pfifo registers + switch (addr) + { + case NV_USER_DMA_PUT: + value = m_machine->get().regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_PUT)]; + break; + + case NV_USER_DMA_GET: + value = m_machine->get().regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_DMA_GET)]; + break; + + case NV_USER_REF: + value = m_machine->get().regs1[REGS1_PFIFO_idx(NV_PFIFO_CACHE1_REF)]; + break; + + default: + nxbx_fatal("Unhandled read at address 0x%" PRIX32, addr); + } + } + else { + nxbx_fatal("PIO channel mode is not supported"); + } + } + else { + // This should save the current channel state to ramfc and do a context switch + nxbx_fatal("Context switch is not supported"); + } + + if constexpr (log) { + log_io_read(); + } + + return value; +} + +template +auto user::get_io_func(bool log, bool is_be) +{ + if constexpr (is_write) { + if (log) { + return is_be ? nv2a_write, true> : nv2a_write>; + } + else { + return is_be ? nv2a_write, true> : nv2a_write>; + } + } + else { + if (log) { + return is_be ? nv2a_read, true> : nv2a_read>; + } + else { + return is_be ? nv2a_read, true> : nv2a_read>; + } + } +} + +bool +user::update_io(bool is_update) +{ + bool log = module_enabled(); + bool is_be = m_machine->get().endianness & NV_PMC_BOOT_1_ENDIAN24_BIG_MASK; + if (!LC86_SUCCESS(mem_init_region_io(m_machine->get(), NV_USER_BASE, NV_USER_SIZE, false, + { + .fnr32 = get_io_func(log, is_be), + .fnw32 = get_io_func(log, is_be) + }, + this, is_update, is_update))) { + logger_en(error, "Failed to update mmio region"); + return false; + } + + return true; +} + +bool +user::init() +{ + if (!update_io(false)) { + return false; + } + + return true; +} diff --git a/src/hw/video/gpu/user.hpp b/src/hw/video/gpu/user.hpp new file mode 100644 index 0000000..4cc6ba4 --- /dev/null +++ b/src/hw/video/gpu/user.hpp @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: 2024 ergo720 + +#pragma once + +#include + +#define NV_USER 0x00800000 +#define NV_USER_BASE (NV2A_REGISTER_BASE + NV_USER) +#define NV_USER_SIZE 0x800000 + +#define NV_USER_DMA_PUT 0x00800040 +#define NV_USER_DMA_GET 0x00800044 +#define NV_USER_REF 0x00800048 + + +class machine; + +class user { +public: + user(machine *machine) : m_machine(machine) {} + bool init(); + void update_io() { update_io(true); } + template + uint32_t read(uint32_t addr); + template + void write(uint32_t addr, const uint32_t data); + +private: + bool update_io(bool is_update); + template + auto get_io_func(bool log, bool is_be); + + machine *const m_machine; +}; diff --git a/src/logger.hpp b/src/logger.hpp index c1037a5..032ef4c 100644 --- a/src/logger.hpp +++ b/src/logger.hpp @@ -52,6 +52,7 @@ enum class log_module : int32_t { ptimer, pvga, pvideo, + user, max, }; @@ -76,6 +77,7 @@ inline constexpr std::array module_to_str = { "NV2A.PTIMER -> ", "NV2A.PVGA -> ", "NV2A.PVIDEO -> ", + "NV2A.USER -> ", }; static_assert(module_to_str.size() == (uint32_t)(log_module::max)); diff --git a/src/settings.cpp b/src/settings.cpp index e0af072..5a92c36 100644 --- a/src/settings.cpp +++ b/src/settings.cpp @@ -59,6 +59,7 @@ settings::load_config_values() else { // ...otherwise, use default log module settings m_core.log_modules[0] = default_log_modules1; + logger("Mismatching logger version, using default log module settings"); } nxbx::update_logging(); } diff --git a/src/settings.hpp b/src/settings.hpp index b62035c..d50cd62 100644 --- a/src/settings.hpp +++ b/src/settings.hpp @@ -44,5 +44,5 @@ class settings { std::string m_ini_path; console_t m_type; static constexpr uint32_t m_version = 1; - static constexpr uint32_t m_log_version = 1; // add one to this every time the log modules change + static constexpr uint32_t m_log_version = 2; // add one to this every time the log modules change }; diff --git a/src/util.hpp b/src/util.hpp index 4d28ce6..6abc073 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -37,6 +37,12 @@ namespace util { } } + template + bool in_range(T val, T low, T high) + { + return !((val < low) || (high < val)); + } + // Case-insensitive variant of std::char_traits, used to compare xbox strings struct xbox_char_traits : public std::char_traits { static bool eq(char c1, char c2)