diff --git a/include/chainbase/pagemap_accessor.hpp b/include/chainbase/pagemap_accessor.hpp index d52b7d2..c2edb76 100644 --- a/include/chainbase/pagemap_accessor.hpp +++ b/include/chainbase/pagemap_accessor.hpp @@ -22,26 +22,34 @@ class pagemap_accessor { } bool clear_refs() const { - if constexpr (!_pagemap_supported) + if (!_pagemap_supported) return false; - - int fd = ::open("/proc/self/clear_refs", O_WRONLY); - if (fd < 0) - return false; - - // Clear soft-dirty bits from the task's PTEs. - // This is done by writing "4" into the /proc/PID/clear_refs file of the task in question. - // - // After this, when the task tries to modify a page at some virtual address, the #PF occurs - // and the kernel sets the soft-dirty bit on the respective PTE. - // ---------------------------------------------------------------------------------------- - const char *v = "4"; - bool res = write(fd, v, 1) == 1; - ::close(fd); - return res; + return _clear_refs(); } - static constexpr bool pagemap_supported() { + static bool pagemap_supported() { + assert(_pagemap_support_checked); + return _pagemap_supported; + } + + // returns true if pagemap *is* supported and we successfully performed `clear_refs` + bool check_pagemap_support_and_clear_refs() { + if (!_pagemap_support_checked) { + _pagemap_support_checked = true; + +#if defined(__linux__) && defined(__x86_64__) + std::unique_ptr p { (char *)std::aligned_alloc(pagesz, pagesz) }; + + if (_clear_refs()) { + if (!_page_dirty((uintptr_t)p.get())) { + *p = 1; + if (_page_dirty((uintptr_t)p.get())) + _pagemap_supported = true; + } + } +#endif + std::cerr << "CHAINBASE: Detect Soft-Dirty pagemap support: " << (_pagemap_supported ? "OK" : "Not supported") << '\n'; + } return _pagemap_supported; } @@ -53,13 +61,6 @@ class pagemap_accessor { return pagesz; } - bool page_dirty(uintptr_t vaddr) const { - uint64_t data; - if (!read(vaddr, { &data, 1 })) - return true; - return this->is_marked_dirty(data); - } - // /proc/pid/pagemap. This file lets a userspace process find out which physical frame each virtual page // is mapped to. It contains one 64-bit value for each virtual page, containing the following data // (from fs/proc/task_mmu.c, above pagemap_read): @@ -78,24 +79,9 @@ class pagemap_accessor { // Here we are just checking bit #55 (the soft-dirty bit). // ---------------------------------------------------------------------------------------------------- bool read(uintptr_t vaddr, std::span dest_uint64) const { - if constexpr (!_pagemap_supported) - return false; - - if (!_open()) // make sure file is open + if (!_pagemap_supported) return false; - assert(_pagemap_fd >= 0); - auto dest = std::as_writable_bytes(dest_uint64); - std::byte* cur = dest.data(); - size_t bytes_remaining = dest.size(); - uintptr_t offset = (vaddr / pagesz) * sizeof(uint64_t); - while (bytes_remaining != 0) { - ssize_t ret = pread(_pagemap_fd, cur, bytes_remaining, offset + (cur - dest.data())); - if (ret < 0) - return false; - bytes_remaining -= (size_t)ret; - cur += ret; - } - return true; + return _read(vaddr, dest_uint64); } // copies the modified pages with the virtual address space specified by `rgn` to an @@ -104,7 +90,7 @@ class pagemap_accessor { // region should exist in the disk file. // -------------------------------------------------------------------------------------- bool update_file_from_region(std::span rgn, bip::file_mapping& mapping, size_t offset, bool flush, size_t& written_pages) const { - if constexpr (!_pagemap_supported) + if (!_pagemap_supported) return false; assert(rgn.size() % pagesz == 0); @@ -135,8 +121,49 @@ class pagemap_accessor { } private: + bool _clear_refs() const { + int fd = ::open("/proc/self/clear_refs", O_WRONLY); + if (fd < 0) + return false; + + // Clear soft-dirty bits from the task's PTEs. + // This is done by writing "4" into the /proc/PID/clear_refs file of the task in question. + // + // After this, when the task tries to modify a page at some virtual address, the #PF occurs + // and the kernel sets the soft-dirty bit on the respective PTE. + // ---------------------------------------------------------------------------------------- + const char *v = "4"; + bool res = write(fd, v, 1) == 1; + ::close(fd); + return res; + } + + bool _read(uintptr_t vaddr, std::span dest_uint64) const { + if (!_open()) // make sure file is open + return false; + assert(_pagemap_fd >= 0); + auto dest = std::as_writable_bytes(dest_uint64); + std::byte* cur = dest.data(); + size_t bytes_remaining = dest.size(); + uintptr_t offset = (vaddr / pagesz) * sizeof(uint64_t); + while (bytes_remaining != 0) { + ssize_t ret = pread(_pagemap_fd, cur, bytes_remaining, offset + (cur - dest.data())); + if (ret < 0) + return false; + bytes_remaining -= (size_t)ret; + cur += ret; + } + return true; + } + + bool _page_dirty(uintptr_t vaddr) const { + uint64_t data; + if (!_read(vaddr, { &data, 1 })) + return true; + return this->is_marked_dirty(data); + } + bool _open() const { - assert(_pagemap_supported); if (_pagemap_fd < 0) { _pagemap_fd = ::open("/proc/self/pagemap", O_RDONLY); if (_pagemap_fd < 0) @@ -147,7 +174,6 @@ class pagemap_accessor { bool _close() const { if (_pagemap_fd >= 0) { - assert(_pagemap_supported); ::close(_pagemap_fd); _pagemap_fd = -1; } @@ -155,13 +181,8 @@ class pagemap_accessor { } static inline size_t pagesz = sysconf(_SC_PAGE_SIZE); - -#if defined(__linux__) && defined(__x86_64__) - static constexpr bool _pagemap_supported = true; -#else - static constexpr bool _pagemap_supported = false; -#endif - + static inline bool _pagemap_supported = false; + static inline bool _pagemap_support_checked = false; mutable int _pagemap_fd = -1; }; diff --git a/src/pinnable_mapped_file.cpp b/src/pinnable_mapped_file.cpp index e3d16a2..8d2e424 100644 --- a/src/pinnable_mapped_file.cpp +++ b/src/pinnable_mapped_file.cpp @@ -227,8 +227,11 @@ void pinnable_mapped_file::setup_copy_on_write_mapping() { // before we clear the Soft-Dirty bits for the whole process, make sure all writable, // non-sharable chainbase dbs using mapped mode are flushed to disk // ---------------------------------------------------------------------------------- - for (auto pmm : _instance_tracker) - pmm->save_database_file(true); + for (auto pmm : _instance_tracker) { + // we only populate _instance_tracker if pagemap *is* supported + assert(pagemap_accessor::pagemap_supported()); + pmm->save_database_file(true); + } _file_mapped_region = bip::mapped_region(_file_mapping, bip::copy_on_write); *((char*)_file_mapped_region.get_address()+header_dirty_bit_offset) = dirty; // set dirty bit in our memory mapping @@ -238,9 +241,7 @@ void pinnable_mapped_file::setup_copy_on_write_mapping() { // then clear the Soft-Dirty bits // ------------------------------ pagemap_accessor pagemap; - if (pagemap.pagemap_supported()) { - if (!pagemap.clear_refs()) - BOOST_THROW_EXCEPTION(std::system_error(make_error_code(db_error_code::clear_refs_failed))); + if (pagemap.check_pagemap_support_and_clear_refs()) { _instance_tracker.push_back(this); // so we can save dirty pages before another instance calls `clear_refs()` } }