Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding support for generate endpoint #80

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions genai-perf/genai_perf/inputs/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .tensorrtllm_converter import TensorRTLLMConverter
from .tensorrtllm_engine_converter import TensorRTLLMEngineConverter
from .vllm_converter import VLLMConverter
from .triton_generate_converter import TritonGenerateConverter

__all__ = [
"OpenAIChatCompletionsConverter",
Expand All @@ -40,4 +41,5 @@
"TensorRTLLMConverter",
"TensorRTLLMEngineConverter",
"VLLMConverter",
"TritonGenerateConverter"
]
109 changes: 109 additions & 0 deletions genai-perf/genai_perf/inputs/converters/generate_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random
from copy import deepcopy
Fixed Show fixed Hide fixed
from typing import Dict, List

from genai_perf.inputs.converters.base_converter import BaseConverter
from genai_perf.inputs.input_constants import (
DEFAULT_OUTPUT_TOKENS_MEAN,
EMPTY_JSON_IN_OPENAI_PA_FORMAT,
)
Fixed Show fixed Hide fixed
from genai_perf.inputs.inputs_config import InputsConfig


class GenerateConverter(BaseConverter):
def convert(
self,
generic_dataset: Dict,
config: InputsConfig,
) -> Dict:
(
system_role_headers,
user_role_headers,
text_input_headers,
) = self._determine_json_feature_roles(generic_dataset)

pa_json = self._create_pa_json(
generic_dataset,
system_role_headers,
user_role_headers,
text_input_headers,
config,
)

return pa_json

def _create_pa_json(
self,
generic_dataset: Dict,
system_role_headers: List[str],
user_role_headers: List[str],
text_input_headers: List[str],
config: InputsConfig,
) -> Dict:
pa_json = dict = {"data": [{"payload": [{}]} for _ in generic_dataset["rows"]]}
Fixed Show fixed Hide fixed

for index, entry in enumerate(generic_dataset["rows"]):
iter_model_name = self._select_model_name(config, index)
Fixed Show fixed Hide fixed
text_input = []
for header, content in entry.items():
new_text_input = self._create_new_text_input(
header,
system_role_headers,
user_role_headers,
text_input_headers,
content,
)
if new_text_input:
text_input.append(new_text_input)

text_input = " ".join(text_input)
pa_json["data"][index]["payload"][0]["text_input"] = text_input

self._add_optional_tags(
pa_json["data"][index],
config
)

return pa_json


def _add_optional_tags(
self,
pa_json: Dict,
config,
) -> None:
payload = pa_json["payload"][0]
if config.add_stream:
payload["stream"] = True
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
payload["max_tokens"] = int(
random.gauss(config.output_tokens_mean, config.output_tokens_stddev)
)
for key, value in config.extra_inputs.items():
payload[key] = value
109 changes: 109 additions & 0 deletions genai-perf/genai_perf/inputs/converters/triton_generate_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import random

Check notice

Code scanning / CodeQL

Unused import

Import of 'random' is not used.
from copy import deepcopy
Fixed Show fixed Hide fixed

Check notice

Code scanning / CodeQL

Unused import

Import of 'deepcopy' is not used.
from typing import Dict, List

from genai_perf.inputs.converters.base_converter import BaseConverter
from genai_perf.inputs.input_constants import (
DEFAULT_OUTPUT_TOKENS_MEAN,
EMPTY_JSON_IN_OPENAI_PA_FORMAT,
)
Fixed Show fixed Hide fixed
from genai_perf.inputs.inputs_config import InputsConfig


class TritonGenerateConverter(BaseConverter):
def convert(
self,
generic_dataset: Dict,
config: InputsConfig,
) -> Dict:
(
system_role_headers,
user_role_headers,
text_input_headers,
) = self._determine_json_feature_roles(generic_dataset)

pa_json = self._create_pa_json(
generic_dataset,
system_role_headers,
user_role_headers,
text_input_headers,
config,
)

return pa_json

def _create_pa_json(
self,
generic_dataset: Dict,
system_role_headers: List[str],
user_role_headers: List[str],
text_input_headers: List[str],
config: InputsConfig,
) -> Dict:
pa_json = dict = {"data": [{"payload": [{}]} for _ in generic_dataset["rows"]]}
Fixed Show fixed Hide fixed

for index, entry in enumerate(generic_dataset["rows"]):
iter_model_name = self._select_model_name(config, index)
Fixed Show fixed Hide fixed
text_input = []
for header, content in entry.items():
new_text_input = self._create_new_text_input(
header,
system_role_headers,
user_role_headers,
text_input_headers,
content,
)
if new_text_input:
text_input.append(new_text_input)

text_input = " ".join(text_input)
pa_json["data"][index]["payload"][0]["text_input"] = text_input

self._add_optional_tags(
pa_json["data"][index],
config
)

return pa_json


def _add_optional_tags(
self,
pa_json: Dict,
config,
) -> None:
payload = pa_json["payload"][0]
if config.add_stream:
payload["stream"] = True
if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN:
payload["max_tokens"] = int(
random.gauss(config.output_tokens_mean, config.output_tokens_stddev)
)
for key, value in config.extra_inputs.items():
payload[key] = value
1 change: 1 addition & 0 deletions genai-perf/genai_perf/inputs/input_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class OutputFormat(Enum):
TENSORRTLLM = auto()
VLLM = auto()
TENSORRTLLM_ENGINE = auto()
TRITON_GENERATE = auto()

def to_lowercase(self):
return self.name.lower()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def create(output_format: OutputFormat):
OutputFormat.VLLM: VLLMConverter,
OutputFormat.TENSORRTLLM: TensorRTLLMConverter,
OutputFormat.TENSORRTLLM_ENGINE: TensorRTLLMEngineConverter,
OutputFormat.TRITON_GENERATE: TritonGenerateConverter,

}
if output_format not in converters:
raise GenAIPerfException(f"Output format {output_format} is not supported")
Expand Down
Loading
Loading