Skip to content

Commit

Permalink
ckpt for nothing
Browse files Browse the repository at this point in the history
  • Loading branch information
Bob-Chen222 committed Nov 6, 2024
1 parent a493f2a commit 5250a3b
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/ops/kernels/inc_multihead_self_attention_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ void update_qkv_in_batch(IncMultiHeadSelfAttentionMeta const *m,
int const max_num_pages =
round_up_pages(BatchConfig::max_sequence_length() +
BatchConfig::max_spec_tree_token_num());
update_qkv_in_batch_verify_kernel<<<GET_BLOCKS(parallelism),
update_qkv_in_batch_kernel<<<GET_BLOCKS(parallelism),
min(CUDA_NUM_THREADS, parallelism),
0,
stream>>>(
Expand Down
8 changes: 7 additions & 1 deletion src/runtime/request_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,9 @@ BatchConfig RequestManager::prepare_decoding_batch() {
int idx_to_physical = append_token_to_block(request, request.tokens.back(), true);
bc.requestsInfo[request_index].num_kv_pages = get_num_blocks_allocated(request);
bc.requestsInfo[request_index].kv_last_page_len = get_len_last_block(request);
bc.requestsInfo[request_index].request_guid = request.guid;
printf("Request %d, token %d, idx_to_physical %d\n", request.guid, request.tokens.back(), idx_to_physical);
printf("Request %d, num_kv_pages %d, kv_last_page_len %d\n", request.guid, bc.requestsInfo[request_index].num_kv_pages, bc.requestsInfo[request_index].kv_last_page_len);

bc.num_tokens++;

Expand Down Expand Up @@ -2521,7 +2524,6 @@ void RequestManager::background_serving_task(
}
// page attention: initalize the page manager here
int kv_cache_size = rm->get_max_kv_cache_size();
printf("KV cache size: %d\n", kv_cache_size);
PageManager::get_page_manager(llm, rm->get_max_kv_cache_size());
if (rm->decoding_mode == INCREMENTAL_DECODING) {
// No SSMs: perform incremental decoding
Expand Down Expand Up @@ -2738,6 +2740,10 @@ void RequestManager::terminate_background_server_at_exit() {

void RequestManager::terminate_background_server() {
if (is_background_server_serving()) {
printf("profiling llm step times size: %ld\n",
profiling.llm_step_times.size());
printf("profiling requests per step size: %ld\n",
profiling.requests_per_step.size());
assert(profiling.llm_step_times.size() ==
profiling.requests_per_step.size());
// Write the last profiling statistics to output file
Expand Down

0 comments on commit 5250a3b

Please sign in to comment.