Skip to content

Commit

Permalink
log batch stats
Browse files Browse the repository at this point in the history
  • Loading branch information
ispobock committed Jul 14, 2024
1 parent e04bf53 commit b1d261c
Showing 1 changed file with 19 additions and 12 deletions.
31 changes: 19 additions & 12 deletions src/turbomind/models/llama/SequenceManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -402,27 +402,34 @@ auto SequenceManager::Materialize(Sequences sequences,
if (block_trie_->enabled()) {
// verify blocks in trie cache
int valid_count = block_trie_->verify();
if (rank_ == 0) {
TM_LOG_INFO("[PrefixCache] #all_cached_blocks: %d", valid_count - 1);
}

// stats
int num_hit_blocks = 0;
int num_hit_tokens = 0;
int num_input_tokens = 0;

// match prefix cache
for (int i = 0; i < sequences.size(); i++) {
if (!sequences[i]->prompt.empty() && sequences[i]->blocks.empty()) {
auto& seq = const_cast<Sequence&>(*sequences[i]);
block_trie_->match(seq);
seq.cache_len = seq.blocks.size() * block_seq_len_;
if (rank_ == 0) {
TM_LOG_INFO(
"[PrefixCache] sequence_id: %d, #cached_blocks: %d, #cached_tokens: %d, #input_tokens: %d, cache_hit_rate: %.2f%",
seq.id,
seq.blocks.size(),
seq.cache_len,
sequences[i]->prompt.size(),
(100.0 * seq.cache_len) / sequences[i]->prompt.size());
}
num_hit_blocks += seq.blocks.size();
num_hit_tokens += seq.cache_len;
num_input_tokens += seq.prompt.size();
}
}

// log stats info for a batch of sequences
if (rank_ == 0 && num_input_tokens != 0) {
TM_LOG_INFO(
"[PrefixCache] #total_cached_blocks: %d, #hit_cached_blocks: %d, #hit_cached_tokens: %d, #input_tokens: %d, avg_cache_hit_rate: %.2f%",
valid_count - 1,
num_hit_blocks,
num_hit_tokens,
num_input_tokens,
(100.0 * num_hit_tokens) / num_input_tokens);
}
}

auto [input_count1, input_count2] = adjust(sequences, context_lengths);
Expand Down

0 comments on commit b1d261c

Please sign in to comment.