Skip to content

Commit

Permalink
Addressing hwoo's CR
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Oct 20, 2023
1 parent 40a1815 commit 37f337a
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 10 deletions.
10 changes: 6 additions & 4 deletions model_analyzer/config/generate/generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,13 @@ def generate_doubled_list(min_value: int, max_value: int) -> List[int]:
return list

@staticmethod
def extract_max_tokens_from_request_parameter(request_parameter: str) -> int:
# format is max_tokens:<num>:int
_, max_tokens, _ = request_parameter.split(":")
def extract_value_from_request_parameter(request_parameter: str) -> int:
# Format is: <parameter>:<value>:<type>
# Example: max_tokens:10:int

return int(max_tokens)
_, value, _ = request_parameter.split(":")

return int(value)

@staticmethod
def extract_text_input_length_from_input_data(input_data: str) -> int:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,7 @@ def _update_perf_config_based_on_parameter_combination(
) -> None:
if "request-parameter" in parameter_combination:
request_parameter = parameter_combination["request-parameter"]
max_tokens = utils.extract_max_tokens_from_request_parameter(
request_parameter
)
max_tokens = utils.extract_value_from_request_parameter(request_parameter)
parameter_combination["request-period"] = (
max_tokens
if max_tokens < parameter_combination["request-period"]
Expand Down
2 changes: 1 addition & 1 deletion model_analyzer/perf_analyzer/perf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def extract_model_specific_parameters(self):
"concurrency-range": self._args["concurrency-range"],
"request-rate-range": self._args["request-rate-range"],
"periodic-concurrency-range": self._args["periodic-concurrency-range"],
"max-tokens": utils.extract_max_tokens_from_request_parameter(
"max-tokens": utils.extract_value_from_request_parameter(
self._args["request-parameter"]
),
"request-period": self._args["request-period"],
Expand Down
2 changes: 1 addition & 1 deletion model_analyzer/record/types/avg_first_token_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def header(cls, aggregation_tag=False):
metric.
"""

return "Avg First Token-to-Token latency (ms)"
return "Avg First Token latency (ms)"

def __eq__(self, other):
"""
Expand Down
2 changes: 1 addition & 1 deletion model_analyzer/result/result_table_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class ResultTableManager:
"concurrency": "Concurrency",
"periodic_concurrency": "Periodic Concurrency",
"text_input_length": "Text Input Length",
"max_tokens": "Max Token Count",
"max_tokens": "Max Tokens",
"request_period": "Request Period",
"request_rate": "Request Rate",
"model_config_path": "Model Config Path",
Expand Down

0 comments on commit 37f337a

Please sign in to comment.