Skip to content

Commit

Permalink
feat: optimize uarch reset to speed up CI testing
Browse files Browse the repository at this point in the history
  • Loading branch information
edubart committed Dec 20, 2024
1 parent b4a2907 commit 30704dd
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 10 deletions.
6 changes: 6 additions & 0 deletions src/compiler-defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,10 @@

#define PACKED __attribute__((packed))

#if defined(__GNUC__)
#define FORCE_OPTIMIZE_O3 __attribute__((optimize("-O3")))
#else
#define FORCE_OPTIMIZE_O3
#endif

#endif
43 changes: 43 additions & 0 deletions src/is-pristine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright Cartesi and individual authors (see AUTHORS)
// SPDX-License-Identifier: LGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option) any
// later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along
// with this program (see COPYING). If not, see <https://www.gnu.org/licenses/>.
//

#ifndef IS_PRISTINE_H
#define IS_PRISTINE_H

#include "compiler-defines.h"
#include <stddef.h>
#include <stdint.h>

namespace cartesi {

/// \brief This is an optimized function for checking if memory page is pristine.
/// \param data Memory pointer
/// \param length Memory length
/// \details It's instead to be used in situations where length is equal or less than a page size.
// NOLINTNEXTLINE(clang-diagnostic-unknown-attributes)
static inline bool FORCE_OPTIMIZE_O3 is_pristine(const unsigned char *data, size_t length) {
// This tight for loop has no branches, and is optimized to SIMD instructions in x86_64,
// making it very fast to check if a given page is pristine.
unsigned char bits = 0;
for (size_t i = 0; i < length; ++i) {
bits |= data[i];
}
return bits == 0;
}

} // namespace cartesi

#endif
6 changes: 2 additions & 4 deletions src/machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "htif.h"
#include "i-device-state-access.h"
#include "interpret.h"
#include "is-pristine.h"
#include "machine-config.h"
#include "machine-memory-range-descr.h"
#include "machine-runtime-config.h"
Expand Down Expand Up @@ -2030,10 +2031,7 @@ bool machine::update_merkle_tree() const {
return false;
}
if (page_data != nullptr) {
const bool is_pristine = std::all_of(page_data, page_data + PMA_PAGE_SIZE,
[](unsigned char pp) -> bool { return pp == '\0'; });

if (is_pristine) {
if (is_pristine(page_data, PMA_PAGE_SIZE)) {
// The update_page_node_hash function in the machine_merkle_tree is not thread
// safe, so we protect it with a mutex
const parallel_for_mutex_guard lock(mutex);
Expand Down
40 changes: 36 additions & 4 deletions src/pma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <system_error>
#include <tuple>

#include "is-pristine.h"
#include "os.h"
#include "pma-constants.h"
#include "pma-driver.h"
Expand Down Expand Up @@ -162,8 +163,24 @@ void pma_entry::write_memory(uint64_t paddr, const unsigned char *data, uint64_t
if (data == nullptr) {
throw std::invalid_argument{"invalid data buffer"};
}
memcpy(get_memory().get_host_memory() + (paddr - get_start()), data, size);
mark_dirty_pages(paddr, size);
// The case of writing a large range chunk is special and optimized for uarch reset
if (size > PMA_PAGE_SIZE) {
// Copy in chunks of page size, to avoid marking dirty pages unnecessarily
for (uint64_t offset = 0; offset < size; offset += PMA_PAGE_SIZE) {
const uint64_t paddr_offset = paddr + offset;
const uint64_t chunk_len = std::min<uint64_t>(PMA_PAGE_SIZE, size - offset);
const unsigned char *src = data + offset;
unsigned char *dest = get_memory().get_host_memory() + (paddr_offset - get_start());
if (memcmp(dest, src, chunk_len) != 0) {
// Page is different, we have to copy memory
memcpy(dest, src, chunk_len);
mark_dirty_pages(paddr + offset, chunk_len);
}
}
} else {
memcpy(get_memory().get_host_memory() + (paddr - get_start()), data, size);
mark_dirty_pages(paddr, size);
}
}

void pma_entry::fill_memory(uint64_t paddr, unsigned char value, uint64_t size) {
Expand All @@ -173,8 +190,23 @@ void pma_entry::fill_memory(uint64_t paddr, unsigned char value, uint64_t size)
if (!contains(paddr, size)) {
throw std::invalid_argument{"range not contained in pma"};
}
memset(get_memory().get_host_memory() + (paddr - get_start()), value, size);
mark_dirty_pages(paddr, size);
// The case of filling a large range with zeros is special and optimized for uarch reset
if (value == 0 && size > PMA_PAGE_SIZE) {
// Fill in chunks of page size, to avoid marking dirty pages unnecessarily
for (uint64_t offset = 0; offset < size; offset += PMA_PAGE_SIZE) {
const uint64_t paddr_offset = paddr + offset;
const uint64_t chunk_len = std::min<uint64_t>(PMA_PAGE_SIZE, size - offset);
unsigned char *dest = get_memory().get_host_memory() + (paddr_offset - get_start());
if (!is_pristine(dest, chunk_len)) {
// Page is different, we have to fill memory
memset(dest, 0, chunk_len);
mark_dirty_pages(paddr + offset, chunk_len);
}
}
} else {
memset(get_memory().get_host_memory() + (paddr - get_start()), value, size);
mark_dirty_pages(paddr, size);
}
}

bool pma_peek_error(const pma_entry & /*pma*/, const machine & /*m*/, uint64_t /*page_address*/,
Expand Down
3 changes: 2 additions & 1 deletion src/uarch-record-state-access.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,9 @@ class uarch_record_state_access : public i_uarch_state_access<uarch_record_state
for (int i = 1; i < UARCH_X_REG_COUNT; i++) {
m_us.x[i] = UARCH_X_INIT;
}
m_us.ram.fill_memory(m_us.ram.get_start(), 0, m_us.ram.get_length());
m_us.ram.write_memory(m_us.ram.get_start(), uarch_pristine_ram, uarch_pristine_ram_len);
m_us.ram.fill_memory(m_us.ram.get_start() + uarch_pristine_ram_len, 0,
m_us.ram.get_length() - uarch_pristine_ram_len);
if (m_log->get_log_type().has_large_data()) {
// log written data, if debug info is enabled
a.get_written().emplace(get_uarch_state_image());
Expand Down
3 changes: 2 additions & 1 deletion src/uarch-state-access.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,9 @@ class uarch_state_access : public i_uarch_state_access<uarch_state_access> {
if (uarch_pristine_ram_len > m_us.ram.get_length()) {
throw std::runtime_error("embedded uarch ram image does not fit in uarch ram pma");
}
m_us.ram.fill_memory(m_us.ram.get_start(), 0, m_us.ram.get_length());
m_us.ram.write_memory(m_us.ram.get_start(), uarch_pristine_ram, uarch_pristine_ram_len);
m_us.ram.fill_memory(m_us.ram.get_start() + uarch_pristine_ram_len, 0,
m_us.ram.get_length() - uarch_pristine_ram_len);
}
};

Expand Down

0 comments on commit 30704dd

Please sign in to comment.