Skip to content

Commit

Permalink
Add --fast-state-serializer and tools for jemalloc
Browse files Browse the repository at this point in the history
* Disable fast state serializer by default unless RAM is >= 90GB
* Print jemalloc stats once a minute
* Dump jemalloc profile on request
  • Loading branch information
SpyCheese committed Aug 5, 2024
1 parent 8714477 commit c81aca6
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 2 deletions.
48 changes: 48 additions & 0 deletions tdutils/td/utils/port/Stat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,4 +413,52 @@ Result<CpuStat> cpu_stat() {
#endif
}

Result<uint64> get_total_ram() {
#if TD_LINUX
TRY_RESULT(fd, FileFd::open("/proc/meminfo", FileFd::Read));
SCOPE_EXIT {
fd.close();
};
constexpr int TMEM_SIZE = 10000;
char mem[TMEM_SIZE];
TRY_RESULT(size, fd.read(MutableSlice(mem, TMEM_SIZE - 1)));
if (size >= TMEM_SIZE - 1) {
return Status::Error("Failed for read /proc/meminfo");
}
mem[size] = 0;
const char* s = mem;
while (*s) {
const char *name_begin = s;
while (*s != 0 && *s != '\n') {
s++;
}
auto name_end = name_begin;
while (is_alpha(*name_end)) {
name_end++;
}
Slice name(name_begin, name_end);
if (name == "MemTotal") {
Slice value(name_end, s);
if (!value.empty() && value[0] == ':') {
value.remove_prefix(1);
}
value = trim(value);
value = split(value).first;
TRY_RESULT_PREFIX(mem, to_integer_safe<uint64>(value), "Invalid value of MemTotal");
if (mem >= 1ULL << (64 - 10)) {
return Status::Error("Invalid value of MemTotal");
}
return mem * 1024;
}
if (*s == 0) {
break;
}
s++;
}
return Status::Error("No MemTotal in /proc/meminfo");
#else
return Status::Error("Not supported");
#endif
}

} // namespace td
2 changes: 2 additions & 0 deletions tdutils/td/utils/port/Stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,6 @@ Status update_atime(CSlice path) TD_WARN_UNUSED_RESULT;

#endif

Result<uint64> get_total_ram() TD_WARN_UNUSED_RESULT;

} // namespace td
4 changes: 4 additions & 0 deletions validator-engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,9 @@ add_executable(validator-engine ${VALIDATOR_ENGINE_SOURCE})
target_link_libraries(validator-engine overlay tdutils tdactor adnl tl_api dht
rldp rldp2 catchain validatorsession full-node validator ton_validator validator
fift-lib memprof git ${JEMALLOC_LIBRARIES})
if (JEMALLOC_FOUND)
target_include_directories(validator-engine PRIVATE ${JEMALLOC_INCLUDE_DIR})
target_compile_definitions(validator-engine PRIVATE -DTON_USE_JEMALLOC=1)
endif()

install(TARGETS validator-engine RUNTIME DESTINATION bin)
91 changes: 89 additions & 2 deletions validator-engine/validator-engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
#include "block/precompiled-smc/PrecompiledSmartContract.h"
#include "interfaces/validator-manager.h"

#if TON_USE_JEMALLOC
#include <jemalloc/jemalloc.h>
#endif

Config::Config() {
out_port = 3278;
full_node = ton::PublicKeyHash::zero();
Expand Down Expand Up @@ -1179,6 +1183,55 @@ class CheckDhtServerStatusQuery : public td::actor::Actor {
td::Promise<td::BufferSlice> promise_;
};

#if TON_USE_JEMALLOC
class JemallocStatsWriter : public td::actor::Actor {
public:
void start_up() override {
alarm();
}

void alarm() override {
alarm_timestamp() = td::Timestamp::in(60.0);
auto r_stats = get_stats();
if (r_stats.is_error()) {
LOG(WARNING) << "Jemalloc stats error : " << r_stats.move_as_error();
} else {
auto s = r_stats.move_as_ok();
LOG(WARNING) << "JEMALLOC_STATS : [ timestamp=" << (ton::UnixTime)td::Clocks::system()
<< " allocated=" << s.allocated << " active=" << s.active << " metadata=" << s.metadata
<< " resident=" << s.resident << " ]";
}
}

private:
struct JemallocStats {
size_t allocated, active, metadata, resident;
};

static td::Result<JemallocStats> get_stats() {
size_t sz = sizeof(size_t);
static size_t epoch = 1;
if (mallctl("epoch", &epoch, &sz, &epoch, sz)) {
return td::Status::Error("Failed to refrash stats");
}
JemallocStats stats;
if (mallctl("stats.allocated", &stats.allocated, &sz, nullptr, 0)) {
return td::Status::Error("Cannot get stats.allocated");
}
if (mallctl("stats.active", &stats.active, &sz, nullptr, 0)) {
return td::Status::Error("Cannot get stats.active");
}
if (mallctl("stats.metadata", &stats.metadata, &sz, nullptr, 0)) {
return td::Status::Error("Cannot get stats.metadata");
}
if (mallctl("stats.resident", &stats.resident, &sz, nullptr, 0)) {
return td::Status::Error("Cannot get stats.resident");
}
return stats;
}
};
#endif

void ValidatorEngine::set_local_config(std::string str) {
local_config_ = str;
}
Expand All @@ -1202,6 +1255,9 @@ void ValidatorEngine::schedule_shutdown(double at) {
}
void ValidatorEngine::start_up() {
alarm_timestamp() = td::Timestamp::in(1.0 + td::Random::fast(0, 100) * 0.01);
#if TON_USE_JEMALLOC
td::actor::create_actor<JemallocStatsWriter>("mem-stat").release();
#endif
}

void ValidatorEngine::alarm() {
Expand Down Expand Up @@ -1412,6 +1468,18 @@ td::Status ValidatorEngine::load_global_config() {
}
validator_options_.write().set_hardforks(std::move(h));

auto r_total_ram = td::get_total_ram();
if (r_total_ram.is_error()) {
LOG(ERROR) << "Failed to get total RAM size: " << r_total_ram.move_as_error();
} else {
td::uint64 total_ram = r_total_ram.move_as_ok();
LOG(WARNING) << "Total RAM = " << td::format::as_size(total_ram);
if (total_ram >= (90ULL << 30)) {
fast_state_serializer_enabled_ = true;
}
}
validator_options_.write().set_fast_state_serializer_enabled(fast_state_serializer_enabled_);

return td::Status::OK();
}

Expand Down Expand Up @@ -3894,7 +3962,7 @@ void need_scheduler_status(int sig) {
need_scheduler_status_flag.store(true);
}

void dump_memory_stats() {
void dump_memprof_stats() {
if (!is_memprof_on()) {
return;
}
Expand All @@ -3919,8 +3987,20 @@ void dump_memory_stats() {
LOG(WARNING) << td::tag("fast_backtrace_success_rate", get_fast_backtrace_success_rate());
}

void dump_jemalloc_prof() {
#if TON_USE_JEMALLOC
const char *filename = "/tmp/validator-jemalloc.dump";
if (mallctl("prof.dump", nullptr, nullptr, &filename, sizeof(const char *)) == 0) {
LOG(ERROR) << "Written jemalloc dump to " << filename;
} else {
LOG(ERROR) << "Failed to write jemalloc dump to " << filename;
}
#endif
}

void dump_stats() {
dump_memory_stats();
dump_memprof_stats();
dump_jemalloc_prof();
LOG(WARNING) << td::NamedThreadSafeCounter::get_default();
}

Expand Down Expand Up @@ -4158,6 +4238,13 @@ int main(int argc, char *argv[]) {
acts.push_back([&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_catchain_max_block_delay, v); });
return td::Status::OK();
});
p.add_option(
'\0', "fast-state-serializer",
"faster persistent state serializer, but requires more RAM (enabled automatically on machines with >= 90GB RAM)",
[&]() {
acts.push_back(
[&x]() { td::actor::send_closure(x, &ValidatorEngine::set_fast_state_serializer_enabled, true); });
});
auto S = p.run(argc, argv);
if (S.is_error()) {
LOG(ERROR) << "failed to parse options: " << S.move_as_error();
Expand Down
4 changes: 4 additions & 0 deletions validator-engine/validator-engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class ValidatorEngine : public td::actor::Actor {
bool started_ = false;
ton::BlockSeqno truncate_seqno_{0};
std::string session_logs_file_;
bool fast_state_serializer_enabled_ = false;

std::set<ton::CatchainSeqno> unsafe_catchains_;
std::map<ton::BlockSeqno, std::pair<ton::CatchainSeqno, td::uint32>> unsafe_catchain_rotations_;
Expand Down Expand Up @@ -299,6 +300,9 @@ class ValidatorEngine : public td::actor::Actor {
void set_catchain_max_block_delay(double value) {
catchain_max_block_delay_ = value;
}
void set_fast_state_serializer_enabled(bool value) {
fast_state_serializer_enabled_ = value;
}
void start_up() override;
ValidatorEngine() {
}
Expand Down
3 changes: 3 additions & 0 deletions validator/state-serializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ class CachedCellDbReader : public vm::CellDbReader {
};

void AsyncStateSerializer::prepare_previous_state_cache(ShardIdFull shard) {
if (!opts_->get_fast_state_serializer_enabled()) {
return;
}
std::vector<ShardIdFull> prev_shards;
for (const auto& [_, prev_shard] : previous_state_files_) {
if (shard_intersects(shard, prev_shard)) {
Expand Down
7 changes: 7 additions & 0 deletions validator/validator-options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
td::Ref<CollatorOptions> get_collator_options() const override {
return collator_options_;
}
bool get_fast_state_serializer_enabled() const override {
return fast_state_serializer_enabled_;
}

void set_zero_block_id(BlockIdExt block_id) override {
zero_block_id_ = block_id;
Expand Down Expand Up @@ -233,6 +236,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
void set_collator_options(td::Ref<CollatorOptions> value) override {
collator_options_ = std::move(value);
}
void set_fast_state_serializer_enabled(bool value) override {
fast_state_serializer_enabled_ = value;
}

ValidatorManagerOptionsImpl *make_copy() const override {
return new ValidatorManagerOptionsImpl(*this);
Expand Down Expand Up @@ -286,6 +292,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
td::optional<double> catchain_max_block_delay_;
bool state_serializer_enabled_ = true;
td::Ref<CollatorOptions> collator_options_{true};
bool fast_state_serializer_enabled_ = false;
};

} // namespace validator
Expand Down
2 changes: 2 additions & 0 deletions validator/validator.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ struct ValidatorManagerOptions : public td::CntObject {
virtual td::optional<double> get_catchain_max_block_delay() const = 0;
virtual bool get_state_serializer_enabled() const = 0;
virtual td::Ref<CollatorOptions> get_collator_options() const = 0;
virtual bool get_fast_state_serializer_enabled() const = 0;

virtual void set_zero_block_id(BlockIdExt block_id) = 0;
virtual void set_init_block_id(BlockIdExt block_id) = 0;
Expand Down Expand Up @@ -137,6 +138,7 @@ struct ValidatorManagerOptions : public td::CntObject {
virtual void set_catchain_max_block_delay(double value) = 0;
virtual void set_state_serializer_enabled(bool value) = 0;
virtual void set_collator_options(td::Ref<CollatorOptions> value) = 0;
virtual void set_fast_state_serializer_enabled(bool value) = 0;

static td::Ref<ValidatorManagerOptions> create(
BlockIdExt zero_block_id, BlockIdExt init_block_id,
Expand Down

0 comments on commit c81aca6

Please sign in to comment.