Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duration measurement issues in the client #24344

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/main/cpp/archive_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ struct PartialZipExtractor : public devtools_ijar::ZipExtractorProcessor {
// it is in place. Concurrency during extraction is handled by
// extracting in a tmp dir and then renaming it into place where it
// becomes visible atomically at the new path.
ExtractionDurationMillis ExtractData(const string &self_path,
DurationMillis ExtractData(const string &self_path,
const vector<string> &archive_contents,
const string &expected_install_md5,
const StartupOptions &startup_options,
Expand All @@ -126,8 +126,7 @@ ExtractionDurationMillis ExtractData(const string &self_path,
BlessFiles(tmp_install);

uint64_t et = GetMillisecondsMonotonic();
const ExtractionDurationMillis extract_data_duration(
et - st, /*archived_extracted=*/true);
const DurationMillis extract_data_duration(st, et);

// Now rename the completed installation to its final name.
int attempts = 0;
Expand Down Expand Up @@ -197,7 +196,7 @@ ExtractionDurationMillis ExtractData(const string &self_path,
<< expected_install_md5
<< "). Remove it or specify a different --install_base.";
}
return ExtractionDurationMillis();
return DurationMillis();
}
}

Expand Down
25 changes: 2 additions & 23 deletions src/main/cpp/archive_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <string>
#include <vector>

#include "src/main/cpp/blaze_util.h"
#include "src/main/cpp/startup_options.h"
#include "src/main/cpp/util/logging.h"

Expand All @@ -29,28 +30,6 @@ void DetermineArchiveContents(const std::string &archive_path,
std::vector<std::string> *files,
std::string *install_md5);

struct DurationMillis {
public:
const uint64_t millis;

DurationMillis() : millis(kUnknownDuration) {}
DurationMillis(const uint64_t ms) : millis(ms) {}

bool IsUnknown() const { return millis == kUnknownDuration; }

private:
// Value representing that a timing event never occurred or is unknown.
static constexpr uint64_t kUnknownDuration = 0;
};

// DurationMillis that tracks if an archive was extracted.
struct ExtractionDurationMillis : DurationMillis {
const bool archive_extracted;
ExtractionDurationMillis() : DurationMillis(), archive_extracted(false) {}
ExtractionDurationMillis(const uint64_t ms, const bool archive_extracted)
: DurationMillis(ms), archive_extracted(archive_extracted) {}
};

// The reason for a blaze server restart.
// Keep in sync with logging.proto.
enum RestartReason {
Expand Down Expand Up @@ -94,7 +73,7 @@ struct LoggingInfo {
// BlessFiles. If the install base, the location the archive is unpacked,
// already exists, extraction is skipped. Kills the client if an error is
// encountered.
ExtractionDurationMillis ExtractData(
DurationMillis ExtractData(
const std::string &self_path,
const std::vector<std::string> &archive_contents,
const std::string &expected_install_md5,
Expand Down
35 changes: 15 additions & 20 deletions src/main/cpp/blaze.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ class BlazeServer final {
explicit BlazeServer(const StartupOptions &startup_options);

// Acquire a lock for the output base this server is running in.
// Returns the number of milliseconds spent waiting for the lock.
uint64_t AcquireLock();
// Returns the time spent waiting for the lock.
DurationMillis AcquireLock();

// Whether there is an active connection to a server.
bool Connected() const { return client_.get(); }
Expand Down Expand Up @@ -275,7 +275,7 @@ static BlazeServer *blaze_server;
// _exit(2) (attributed with ATTRIBUTE_NORETURN) meaning we have to delete the
// objects before those.

uint64_t BlazeServer::AcquireLock() {
DurationMillis BlazeServer::AcquireLock() {
if (output_base_lock_.has_value()) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "AcquireLock() called but the lock is already held.";
Expand All @@ -284,11 +284,11 @@ uint64_t BlazeServer::AcquireLock() {
// commands may not run against the same output base. Note that this lock will
// be released by ReleaseLock() once the server is running, as it can handle
// concurrent clients on its own.
uint64_t wait_time;
output_base_lock_ = blaze::AcquireLock(
auto lock_and_time = blaze::AcquireLock(
"output base", output_base_.GetRelative("lock"), LockMode::kExclusive,
batch_, block_for_lock_, &wait_time);
return wait_time;
batch_, block_for_lock_);
output_base_lock_ = lock_and_time.first;
return lock_and_time.second;
}

void BlazeServer::ReleaseLock() {
Expand Down Expand Up @@ -557,17 +557,14 @@ static void AddLoggingArgs(const LoggingInfo &logging_info,

// The time in ms a command had to wait on a busy Blaze server process.
// This is part of startup_time.
if (command_wait_duration_ms.IsUnknown()) {
args->push_back("--command_wait_time=" +
blaze_util::ToString(command_wait_duration_ms.millis));
}
args->push_back("--command_wait_time=" +
blaze_util::ToString(command_wait_duration_ms.millis));

// The time in ms spent on extracting the new blaze version.
// This is part of startup_time.
if (extract_data_duration.IsUnknown()) {
args->push_back("--extract_data_time=" +
blaze_util::ToString(extract_data_duration.millis));
}
args->push_back("--extract_data_time=" +
blaze_util::ToString(extract_data_duration.millis));

if (logging_info.restart_reason != NO_RESTART) {
args->push_back(string("--restart_reason=") +
ReasonString(logging_info.restart_reason));
Expand Down Expand Up @@ -657,8 +654,7 @@ static void RunBatchMode(
server->KillRunningServer();
}

const DurationMillis client_startup_duration(GetMillisecondsMonotonic() -
logging_info->start_time_ms);
const DurationMillis client_startup_duration(logging_info->start_time_ms, GetMillisecondsMonotonic());

BAZEL_LOG(INFO) << "Starting " << startup_options.product_name
<< " in batch mode.";
Expand Down Expand Up @@ -1093,8 +1089,7 @@ static ATTRIBUTE_NORETURN void RunClientServerMode(
<< server->ProcessInfo().server_pid_ << ").";

// Wall clock time since process startup.
const DurationMillis client_startup_duration =
(GetMillisecondsMonotonic() - logging_info->start_time_ms);
const DurationMillis client_startup_duration(logging_info->start_time_ms, GetMillisecondsMonotonic());

SignalHandler::Get().Install(startup_options.product_name,
startup_options.output_base,
Expand Down Expand Up @@ -1403,7 +1398,7 @@ static void RunLauncher(const string &self_path,

WarnFilesystemType(startup_options.output_base);

const ExtractionDurationMillis extract_data_duration = ExtractData(
const DurationMillis extract_data_duration = ExtractData(
self_path, archive_contents, install_md5, startup_options, logging_info);

blaze_server->Connect();
Expand Down
18 changes: 18 additions & 0 deletions src/main/cpp/blaze_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,24 @@ class WithEnvVars {
~WithEnvVars();
};

struct DurationMillis {
public:
const uint64_t millis;

DurationMillis() : millis(0) {}
DurationMillis(const uint64_t start, const uint64_t end) : millis(ComputeDuration(start, end)) {}

private:
static uint64_t ComputeDuration(const uint64_t start, const uint64_t end) {
if (end < start) {
BAZEL_LOG(WARNING) << "Invalid duration: start=" << start << ", end=" << end;
return 0;
} else {
return end - start;
}
}
};

} // namespace blaze

#endif // BAZEL_SRC_MAIN_CPP_BLAZE_UTIL_H_
11 changes: 5 additions & 6 deletions src/main/cpp/blaze_util_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,11 @@ enum class LockMode {
// Acquires a `mode` lock on `path`, busy-waiting until it becomes available if
// `block` is true, and releasing it on exec if `batch_mode` is false.
// Crashes if the lock cannot be acquired. Returns a handle that can be
// subsequently passed to ReleaseLock. Sets `wait_time` to the number of
// milliseconds spent waiting for the lock. The `name` argument is used to
// distinguish it from other locks in human-readable error messages.
LockHandle AcquireLock(const std::string& name, const blaze_util::Path& path,
LockMode mode, bool batch_mode, bool block,
uint64_t* wait_time);
// subsequently passed to ReleaseLock as well as the time spent waiting for the
// lock. The `name` argument is used to distinguish it from other locks in
// human-readable error messages.
std::pair<LockHandle, DurationMillis> AcquireLock(
const std::string &name, const blaze_util::Path &path, LockMode mode, bool batch_mode, bool block);

// Releases a lock previously obtained from AcquireLock.
void ReleaseLock(LockHandle lock_handle);
Expand Down
14 changes: 8 additions & 6 deletions src/main/cpp/blaze_util_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -632,9 +632,11 @@ static int setlk(int fd, struct flock* lock) {
return -1;
}

LockHandle AcquireLock(const std::string& name, const blaze_util::Path& path,
LockMode mode, bool batch_mode, bool block,
uint64_t* wait_time) {
std::pair<LockHandle, DurationMillis> AcquireLock(const std::string &name,
const blaze_util::Path &path,
LockMode mode,
bool batch_mode,
bool block) {
int flags = O_CREAT;
switch (mode) {
case LockMode::kShared:
Expand Down Expand Up @@ -712,7 +714,8 @@ LockHandle AcquireLock(const std::string& name, const blaze_util::Path& path,
// avoid unnecessary noise in the logs. In this metric, we are only
// interested in knowing how long it took for other commands to complete, not
// how fast acquiring a lock is.
const uint64_t elapsed_time = !multiple_attempts ? 0 : end_time - start_time;
DurationMillis elapsed_time =
!multiple_attempts ? DurationMillis() : DurationMillis(start_time, end_time);

// If taking an exclusive lock, identify ourselves in the lockfile.
// The contents are printed for human consumption when another client
Expand All @@ -731,8 +734,7 @@ LockHandle AcquireLock(const std::string& name, const blaze_util::Path& path,
}
}

*wait_time = elapsed_time;
return static_cast<LockHandle>(fd);
return std::make_pair(static_cast<LockHandle>(fd), elapsed_time);
}

void ReleaseLock(LockHandle lock_handle) {
Expand Down
11 changes: 6 additions & 5 deletions src/main/cpp/blaze_util_windows.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1087,9 +1087,11 @@ uint64_t WindowsClock::GetMilliseconds() const {
return GetMillisecondsAsLargeInt(kFrequency).QuadPart;
}

LockHandle AcquireLock(const std::string& name, const blaze_util::Path& path,
LockMode mode, bool batch_mode, bool block,
uint64_t* wait_time) {
std::pair<LockHandle, DurationMillis> AcquireLock(const std::string &name,
const blaze_util::Path &path,
LockMode mode,
bool batch_mode,
bool block) {
DWORD desired_access = GENERIC_READ;
if (mode == LockMode::kExclusive) {
desired_access |= GENERIC_WRITE;
Expand Down Expand Up @@ -1164,8 +1166,7 @@ LockHandle AcquireLock(const std::string& name, const blaze_util::Path& path,
// a concurrent process can read and display it. On Windows we can't do so
// because locks are mandatory, thus we cannot read the file concurrently.

*wait_time = GetMillisecondsMonotonic() - start_time;
return reinterpret_cast<LockHandle>(handle);
return std::make_pair(reinterpret_cast<LockHandle>(handle), DurationMillis(start_time, GetMillisecondsMonotonic());
}

void ReleaseLock(LockHandle lock_handle) {
Expand Down
11 changes: 5 additions & 6 deletions src/test/cpp/blaze_archive_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ TEST_F(BlazeArchiveTest, TestZipExtractionAndFarOutMTimes) {
set_startup_options(startup_options, blaze_path, output_dir);
LoggingInfo logging_info(blaze_path, blaze::GetMillisecondsMonotonic());

ExtractionDurationMillis extraction_time =
DurationMillis extraction_time =
ExtractData(blaze_path, archive_contents, expected_install_md5,
startup_options, &logging_info);

Expand Down Expand Up @@ -155,15 +155,14 @@ TEST_F(BlazeArchiveTest, TestNoDataExtractionIfInstallBaseExists) {
set_startup_options(startup_options, blaze_path, output_dir);
LoggingInfo logging_info(blaze_path, blaze::GetMillisecondsMonotonic());

ExtractionDurationMillis extraction_time_one =
DurationMillis extraction_time_one =
ExtractData(blaze_path, archive_contents, expected_install_md5,
startup_options, &logging_info);
ASSERT_TRUE(extraction_time_one.archive_extracted);
ASSERT_NEQ(extraction_time_one, DurationMillis(0));

ExtractionDurationMillis extraction_time_two =
DurationMillis extraction_time_two =
ExtractData(blaze_path, archive_contents, expected_install_md5,
startup_options, &logging_info);

ASSERT_FALSE(extraction_time_two.archive_extracted);
ASSERT_EQ(extraction_time_two, DurationMillis(0));
}
} // namespace blaze