Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
MaybeShewill-CV committed Nov 26, 2024
1 parent cf0fdd3 commit 7dba7a0
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 14 deletions.
8 changes: 8 additions & 0 deletions conf/model/llm/llama/llama-3.2-1B-instruct.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[LLAMA3]
model_file_path="../weights/llm/llama/Llama-3.2-1B-Instruct/llama-3.2-1B-instruct-Q4_K_M.gguf"
n_gpu_layers=300
main_gpu_device=0
sampler_temp=0.7

[CONTEXT]
context_size=4096
6 changes: 3 additions & 3 deletions src/apps/model_benchmark/llm/llama3_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ int main(int argc, char** argv) {
return -1;
}

std::string input = "<user>\n"
"Can you recommend some beginner-friendly programming languages for someone new to coding?\n"
"</user>";
std::string input = "\n<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nWho creates you?<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n";
LOG(INFO) << "input prompt text: " << input;
std::string out;
model.run(input, out);
LOG(INFO) << "generated output: " << out;

return 0;
}
27 changes: 16 additions & 11 deletions src/models/llm/llama/llama3.inl
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,10 @@ private:
/***
*
* @param prompt_tokens
* @param generate_out
* @return
*/
StatusCode llama_generate(std::vector<llama_token>& prompt_tokens);
StatusCode llama_generate(std::vector<llama_token>& prompt_tokens, std::string& generate_out);
};

/***
Expand Down Expand Up @@ -248,9 +249,16 @@ StatusCode Llama3<INPUT, OUTPUT>::Impl::run(const INPUT& in, OUTPUT& out) {
// tokenize input prompt
std::vector<llama_token> prompt_tokens;
auto status = tokenize_prompt(prompt, prompt_tokens);
if (status != StatusCode::OK) {
return status;
}

// run llama3 generate
status = llama_generate(prompt_tokens);
std::string generate_out;
status = llama_generate(prompt_tokens, generate_out);

// transform output
out = llama_impl::transform_output<OUTPUT>(generate_out);

return status;
}
Expand Down Expand Up @@ -294,8 +302,7 @@ StatusCode Llama3<INPUT, OUTPUT>::Impl::tokenize_prompt(const std::string &promp
* @return
*/
template <typename INPUT, typename OUTPUT>
StatusCode Llama3<INPUT, OUTPUT>::Impl::llama_generate(std::vector<llama_token> &prompt_tokens) {
std::string response;
StatusCode Llama3<INPUT, OUTPUT>::Impl::llama_generate(std::vector<llama_token> &prompt_tokens, std::string& generate_out) {
// prepare a batch for the prompt
llama_batch batch = llama_batch_get_one(prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
llama_token new_token_id;
Expand All @@ -304,9 +311,8 @@ StatusCode Llama3<INPUT, OUTPUT>::Impl::llama_generate(std::vector<llama_token>
int n_ctx = llama_n_ctx(_m_ctx);
int n_ctx_used = llama_get_kv_cache_used_cells(_m_ctx);
if (n_ctx_used + batch.n_tokens > n_ctx) {
printf("\033[0m\n");
fprintf(stderr, "context size exceeded\n");
exit(0);
LOG(ERROR) << "context size exceeded";
return StatusCode::MODEL_RUN_SESSION_FAILED;
}

if (llama_decode(_m_ctx, batch)) {
Expand All @@ -330,14 +336,13 @@ StatusCode Llama3<INPUT, OUTPUT>::Impl::llama_generate(std::vector<llama_token>
return StatusCode::MODEL_RUN_SESSION_FAILED;
}
std::string piece(buf, n);
printf("%s", piece.c_str());
fflush(stdout);
response += piece;
// printf("%s", piece.c_str());
// fflush(stdout);
generate_out += piece;

// prepare the next batch with the sampled token
batch = llama_batch_get_one(&new_token_id, 1);
}
LOG(INFO) << "generate: " << response;

return StatusCode::OK;
}
Expand Down

0 comments on commit 7dba7a0

Please sign in to comment.