Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dynamic check for pagemap support #30

Merged
merged 5 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 97 additions & 51 deletions include/chainbase/pagemap_accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
namespace chainbase {

namespace bip = boost::interprocess;
namespace fs = std::filesystem;

template<typename CB>
class scoped_exit {
public:
scoped_exit(CB&& cb): _cb(std::forward<CB>(cb)) {}

~scoped_exit() { try { _cb(); } catch(...) {} }

scoped_exit(scoped_exit&& mv) noexcept = delete;
scoped_exit( const scoped_exit& ) = delete;
scoped_exit& operator=( const scoped_exit& ) = delete;
private:
CB _cb;
};

class pagemap_accessor {
public:
Expand All @@ -22,26 +37,44 @@ class pagemap_accessor {
}

bool clear_refs() const {
if constexpr (!_pagemap_supported)
if (!_pagemap_supported)
return false;

int fd = ::open("/proc/self/clear_refs", O_WRONLY);
if (fd < 0)
return false;

// Clear soft-dirty bits from the task's PTEs.
// This is done by writing "4" into the /proc/PID/clear_refs file of the task in question.
//
// After this, when the task tries to modify a page at some virtual address, the #PF occurs
// and the kernel sets the soft-dirty bit on the respective PTE.
// ----------------------------------------------------------------------------------------
const char *v = "4";
bool res = write(fd, v, 1) == 1;
::close(fd);
return res;
return _clear_refs();
}

static constexpr bool pagemap_supported() {
static bool pagemap_supported() {
assert(_pagemap_support_checked);
return _pagemap_supported;
}

// returns true if pagemap *is* supported and we successfully performed `clear_refs`
bool check_pagemap_support_and_clear_refs() {
if (!_pagemap_support_checked) {
_pagemap_support_checked = true;

#if defined(__linux__) && defined(__x86_64__)
fs::path path = fs::temp_directory_path() / "nodeos_pagemap_check";
if (!fs::exists(path)) {
std::ofstream ofs(path.generic_string(), std::ofstream::trunc);
ofs.close();
}
auto remove_file = scoped_exit([&path]() { fs::remove(path); });

fs::resize_file(path, pagesz);
bip::file_mapping mapping = bip::file_mapping(path.generic_string().c_str(), bip::read_write);
bip::mapped_region region = bip::mapped_region(mapping, bip::read_write);
char* p = (char *)region.get_address();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I doubt you need to go through the trouble of creating a temporary file; you can probably just use a variable on the stack.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, that was way overkill, I simplified it.


if (_clear_refs()) {
if (!_page_dirty((uintptr_t)p)) {
*p = 1;
if (_page_dirty((uintptr_t)p))
_pagemap_supported = true;
}
}
#endif
std::cerr << "CHAINBASE: Detect pagemap support: " << (_pagemap_supported ? "OK" : "Not supported") << '\n';
}
return _pagemap_supported;
}

Expand All @@ -53,13 +86,6 @@ class pagemap_accessor {
return pagesz;
}

bool page_dirty(uintptr_t vaddr) const {
uint64_t data;
if (!read(vaddr, { &data, 1 }))
return true;
return this->is_marked_dirty(data);
}

// /proc/pid/pagemap. This file lets a userspace process find out which physical frame each virtual page
// is mapped to. It contains one 64-bit value for each virtual page, containing the following data
// (from fs/proc/task_mmu.c, above pagemap_read):
Expand All @@ -78,24 +104,9 @@ class pagemap_accessor {
// Here we are just checking bit #55 (the soft-dirty bit).
// ----------------------------------------------------------------------------------------------------
bool read(uintptr_t vaddr, std::span<uint64_t> dest_uint64) const {
if constexpr (!_pagemap_supported)
if (!_pagemap_supported)
return false;

if (!_open()) // make sure file is open
return false;
assert(_pagemap_fd >= 0);
auto dest = std::as_writable_bytes(dest_uint64);
std::byte* cur = dest.data();
size_t bytes_remaining = dest.size();
uintptr_t offset = (vaddr / pagesz) * sizeof(uint64_t);
while (bytes_remaining != 0) {
ssize_t ret = pread(_pagemap_fd, cur, bytes_remaining, offset + (cur - dest.data()));
if (ret < 0)
return false;
bytes_remaining -= (size_t)ret;
cur += ret;
}
return true;
return _read(vaddr, dest_uint64);
}

// copies the modified pages with the virtual address space specified by `rgn` to an
Expand All @@ -104,7 +115,7 @@ class pagemap_accessor {
// region should exist in the disk file.
// --------------------------------------------------------------------------------------
bool update_file_from_region(std::span<std::byte> rgn, bip::file_mapping& mapping, size_t offset, bool flush, size_t& written_pages) const {
if constexpr (!_pagemap_supported)
if (!_pagemap_supported)
return false;

assert(rgn.size() % pagesz == 0);
Expand Down Expand Up @@ -135,8 +146,49 @@ class pagemap_accessor {
}

private:
bool _clear_refs() const {
int fd = ::open("/proc/self/clear_refs", O_WRONLY);
if (fd < 0)
return false;

// Clear soft-dirty bits from the task's PTEs.
// This is done by writing "4" into the /proc/PID/clear_refs file of the task in question.
//
// After this, when the task tries to modify a page at some virtual address, the #PF occurs
// and the kernel sets the soft-dirty bit on the respective PTE.
// ----------------------------------------------------------------------------------------
const char *v = "4";
bool res = write(fd, v, 1) == 1;
::close(fd);
return res;
}

bool _read(uintptr_t vaddr, std::span<uint64_t> dest_uint64) const {
if (!_open()) // make sure file is open
return false;
assert(_pagemap_fd >= 0);
auto dest = std::as_writable_bytes(dest_uint64);
std::byte* cur = dest.data();
size_t bytes_remaining = dest.size();
uintptr_t offset = (vaddr / pagesz) * sizeof(uint64_t);
while (bytes_remaining != 0) {
ssize_t ret = pread(_pagemap_fd, cur, bytes_remaining, offset + (cur - dest.data()));
if (ret < 0)
return false;
bytes_remaining -= (size_t)ret;
cur += ret;
}
return true;
}

bool _page_dirty(uintptr_t vaddr) const {
uint64_t data;
if (!_read(vaddr, { &data, 1 }))
return true;
return this->is_marked_dirty(data);
}

bool _open() const {
assert(_pagemap_supported);
if (_pagemap_fd < 0) {
_pagemap_fd = ::open("/proc/self/pagemap", O_RDONLY);
if (_pagemap_fd < 0)
Expand All @@ -147,21 +199,15 @@ class pagemap_accessor {

bool _close() const {
if (_pagemap_fd >= 0) {
assert(_pagemap_supported);
::close(_pagemap_fd);
_pagemap_fd = -1;
}
return true;
}

static inline size_t pagesz = sysconf(_SC_PAGE_SIZE);

#if defined(__linux__) && defined(__x86_64__)
static constexpr bool _pagemap_supported = true;
#else
static constexpr bool _pagemap_supported = false;
#endif

static inline bool _pagemap_supported = false;
static inline bool _pagemap_support_checked = false;
mutable int _pagemap_fd = -1;
};

Expand Down
11 changes: 6 additions & 5 deletions src/pinnable_mapped_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,11 @@ void pinnable_mapped_file::setup_copy_on_write_mapping() {
// before we clear the Soft-Dirty bits for the whole process, make sure all writable,
// non-sharable chainbase dbs using mapped mode are flushed to disk
// ----------------------------------------------------------------------------------
for (auto pmm : _instance_tracker)
pmm->save_database_file(true);
for (auto pmm : _instance_tracker) {
// we only populate _instance_tracker if pagemap *is* supported
assert(pagemap_accessor::pagemap_supported());
pmm->save_database_file(true);
}

_file_mapped_region = bip::mapped_region(_file_mapping, bip::copy_on_write);
*((char*)_file_mapped_region.get_address()+header_dirty_bit_offset) = dirty; // set dirty bit in our memory mapping
Expand All @@ -238,9 +241,7 @@ void pinnable_mapped_file::setup_copy_on_write_mapping() {
// then clear the Soft-Dirty bits
// ------------------------------
pagemap_accessor pagemap;
if (pagemap.pagemap_supported()) {
if (!pagemap.clear_refs())
BOOST_THROW_EXCEPTION(std::system_error(make_error_code(db_error_code::clear_refs_failed)));
if (pagemap.check_pagemap_support_and_clear_refs()) {
_instance_tracker.push_back(this); // so we can save dirty pages before another instance calls `clear_refs()`
}
}
Expand Down