Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Oct 15, 2024
1 parent fe40a7b commit a1f43e7
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
2 changes: 1 addition & 1 deletion include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ struct Request {
BatchConfig::RequestGuid guid;
PEFTModelID peft_model_id = PEFTModelID::NO_ID;
int max_length = -1;
int max_new_tokens = 128;
int max_new_tokens = -1;
int initial_len;
int ssm_cache_size = 0;
int llm_cache_size = 0;
Expand Down
2 changes: 1 addition & 1 deletion inference/peft/peft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ void FlexFlow::top_level_task(Task const *task,
printf("Inference prompt[%d]: %s\n", total_num_requests, text.c_str());
Request inference_req;
inference_req.prompt = text;
inference_req.max_length = 128;
inference_req.max_new_tokens = 128;
inference_req.peft_model_id =
(peft_model_id != nullptr) ? *peft_model_id : PEFTModelID::NO_ID;
requests.push_back(inference_req);
Expand Down
21 changes: 13 additions & 8 deletions src/runtime/request_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,13 @@ RequestManager::RequestGuid
request.guid = next_available_guid++;
request.max_length = request_.max_length;
request.max_new_tokens = request_.max_new_tokens;
// both unset
if (request.max_length == -1 && request.max_new_tokens == -1) {
request.max_length = get_max_sequence_length();
}
// both set
if (request.max_length != -1 && request.max_new_tokens != -1) {
request.max_length = -1;
std::cout
<< "Both `max_new_tokens` (=" << request.max_new_tokens
<< ") and `max_length`(=" << request.max_length
Expand Down Expand Up @@ -372,15 +378,14 @@ RequestManager::RequestGuid
request.initial_len = 0;
request.max_length = request_.max_length;
request.max_new_tokens = request_.max_new_tokens;
if (request.max_length != -1) {
std::cout << "Warning: max_length is set for PEFT finetuning, but it will "
"be ignored."
<< std::endl;
}
if (request.max_new_tokens != -1) {
std::cout << "Warning: max_new_tokens is set for PEFT finetuning, but "
"it will be ignored."
<< std::endl;
std::cerr
<< "Error: max_new_tokens is not allowed for PEFT finetuning requests"
<< std::endl;
assert(false);
}
if (request.max_length == -1) {
request.max_length = get_max_sequence_length();
}
request.peft_model_id = request_.peft_model_id;
request.req_type = RequestType::REQ_FINETUNING;
Expand Down

0 comments on commit a1f43e7

Please sign in to comment.