-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve mapped
and head
modes.
#21
Changes from 23 commits
e5cf68b
8213c33
c6735eb
93cd1d3
19ee2e0
235d956
66d3326
832805a
4cde714
4bc07e4
619ba1f
4dcbb00
219e89b
6b4eda4
6e1aa5a
d6c1dcc
4a8070e
c3352cc
d275422
65eefd4
abc648c
da2910c
7ae2b7c
e7a9b5a
6cce710
4ced7af
44c9a20
4b7cf64
4ab8944
173287c
7ff3038
d928ec5
7817736
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
#pragma once | ||
|
||
#include <fcntl.h> // open | ||
#include <unistd.h> // pread, sysconf | ||
#include <cstring> | ||
#include <cstdlib> | ||
#include <cassert> | ||
#include <optional> | ||
#include <iostream> | ||
#include <fstream> | ||
#include <filesystem> | ||
#include <vector> | ||
#include <span> | ||
#include <boost/interprocess/managed_external_buffer.hpp> | ||
#include <boost/interprocess/anonymous_shared_memory.hpp> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be unused, and causes a compiler warning. Similarly for the same include file in pinnable_mapped_file.cpp There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I removed this and a couple other unneeded includes. |
||
|
||
namespace chainbase { | ||
|
||
namespace bip = boost::interprocess; | ||
|
||
class pagemap_accessor { | ||
public: | ||
~pagemap_accessor() { | ||
_close(); | ||
} | ||
|
||
bool clear_refs() const { | ||
if constexpr (!_pagemap_supported) | ||
return false; | ||
|
||
int fd = ::open("/proc/self/clear_refs", O_WRONLY); | ||
if (fd < 0) | ||
return false; | ||
|
||
// Clear soft-dirty bits from the task's PTEs. | ||
// This is done by writing "4" into the /proc/PID/clear_refs file of the task in question. | ||
// | ||
// After this, when the task tries to modify a page at some virtual address, the #PF occurs | ||
// and the kernel sets the soft-dirty bit on the respective PTE. | ||
// ---------------------------------------------------------------------------------------- | ||
const char *v = "4"; | ||
heifner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
bool res = write(fd, v, 1) == 1; | ||
::close(fd); | ||
return res; | ||
} | ||
|
||
static constexpr bool pagemap_supported() { | ||
return _pagemap_supported; | ||
} | ||
|
||
static bool is_marked_dirty(uint64_t entry) { | ||
return !!(entry & (1Ull << 55)); | ||
} | ||
|
||
static size_t page_size() { | ||
return pagesz; | ||
} | ||
|
||
bool page_dirty(uintptr_t vaddr) const { | ||
uint64_t data; | ||
if (!read(vaddr, { &data, 1 })) | ||
return true; | ||
return this->is_marked_dirty(data); | ||
} | ||
|
||
// /proc/pid/pagemap. This file lets a userspace process find out which physical frame each virtual page | ||
// is mapped to. It contains one 64-bit value for each virtual page, containing the following data | ||
// (from fs/proc/task_mmu.c, above pagemap_read): | ||
// | ||
// Bits 0-54 page frame number (PFN) if present (note: field is zeroed for non-privileged users) | ||
// Bits 0-4 swap type if swapped | ||
// Bits 5-54 swap offset if swapped | ||
// Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) | ||
// Bit 56 page exclusively mapped (since 4.2) | ||
// Bit 57 pte is uffd-wp write-protected (since 5.13) (see Documentation/admin-guide/mm/userfaultfd.rst) | ||
// Bits 58-60 zero | ||
// Bit 61 page is file-page or shared-anon (since 3.5) | ||
// Bit 62 page swapped | ||
// Bit 63 page present | ||
// | ||
// Here we are just checking bit #55 (the soft-dirty bit). | ||
// ---------------------------------------------------------------------------------------------------- | ||
bool read(uintptr_t vaddr, std::span<uint64_t> dest_uint64) const { | ||
if constexpr (!_pagemap_supported) | ||
return false; | ||
|
||
if (!_open()) // make sure file is open | ||
return false; | ||
assert(_pagemap_fd >= 0); | ||
auto dest = std::as_writable_bytes(dest_uint64); | ||
std::byte* cur = dest.data(); | ||
size_t bytes_remaining = dest.size(); | ||
uintptr_t offset = (vaddr / pagesz) * sizeof(uint64_t); | ||
while (bytes_remaining != 0) { | ||
ssize_t ret = pread(_pagemap_fd, cur, bytes_remaining, offset + (cur - dest.data())); | ||
if (ret < 0) | ||
return false; | ||
bytes_remaining -= (size_t)ret; | ||
cur += ret; | ||
} | ||
return true; | ||
} | ||
|
||
// copies the modified pages with the virtual address space specified by `rgn` to an | ||
// equivalent region starting at `offest` within the (open) file pointed by `fd`. | ||
// The specified region *must* be a multiple of the system's page size, and the specified | ||
// region should exist in the disk file. | ||
// -------------------------------------------------------------------------------------- | ||
bool update_file_from_region(std::span<std::byte> rgn, bip::file_mapping& mapping, size_t offset, bool flush) const { | ||
if constexpr (!_pagemap_supported) | ||
return false; | ||
|
||
assert(rgn.size() % pagesz == 0); | ||
size_t num_pages = rgn.size() / pagesz; | ||
std::vector<uint64_t> pm(num_pages); | ||
|
||
// get modified pages | ||
if (!read((uintptr_t)rgn.data(), pm)) | ||
return false; | ||
bip::mapped_region map_rgn(mapping, bip::read_write, offset, rgn.size()); | ||
std::byte* dest = (std::byte*)map_rgn.get_address(); | ||
if (dest) { | ||
for (size_t i=0; i<num_pages; ++i) { | ||
if (is_marked_dirty(pm[i])) { | ||
size_t j = i + 1; | ||
while (j<num_pages && is_marked_dirty(pm[j])) | ||
++j; | ||
memcpy(dest + (i * pagesz), rgn.data() + (i * pagesz), pagesz * (j - i)); | ||
i += j - i - 1; | ||
} | ||
} | ||
if (flush && !map_rgn.flush(0, rgn.size(), /* async = */ false)) | ||
std::cerr << "CHAINBASE: ERROR: flushing buffers failed" << '\n'; | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
private: | ||
bool _open() const { | ||
assert(_pagemap_supported); | ||
if (_pagemap_fd < 0) { | ||
_pagemap_fd = ::open("/proc/self/pagemap", O_RDONLY); | ||
if (_pagemap_fd < 0) | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
bool _close() const { | ||
if (_pagemap_fd >= 0) { | ||
assert(_pagemap_supported); | ||
::close(_pagemap_fd); | ||
_pagemap_fd = -1; | ||
} | ||
return true; | ||
} | ||
|
||
static inline size_t pagesz = sysconf(_SC_PAGE_SIZE); | ||
|
||
#if defined(__linux__) && defined(__x86_64__) | ||
static constexpr bool _pagemap_supported = true; | ||
#else | ||
static constexpr bool _pagemap_supported = false; | ||
#endif | ||
|
||
mutable int _pagemap_fd = -1; | ||
}; | ||
|
||
} // namespace chainbase |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not critical for this PR but something that can be done in the future,
This,
chainbase/CMakeLists.txt
Line 2 in 7817736
should be bumped to 3.12 as that's the first version that knows c++20.
Also this entire
if/elseif/endif
block is logically nonsensical. My guess was it originally required c++11, and it would make sense in that case. I might suggest changing the way this is done to how the bls lib does it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do in the next PR!