From 9444bb14b72483afec84a10108d750df8b4c68f8 Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:57:41 -0800 Subject: [PATCH] LlmInputs - Updated API + generic framework + convert to vLLM format (#486) * Initial API changes. Unit tests passing * Creating and using generic format with openai * Initial code to support vllm output * Refactoring * General cleanup + todos * Fixing codeql * Fix codeql issue * Removing output_filename define --- .../genai-pa/genai_pa/constants.py | 4 + .../genai_pa/llm_inputs/llm_inputs.py | 376 +++++++++++++++--- .../genai-pa/tests/test_llm_inputs.py | 105 +++-- 3 files changed, 409 insertions(+), 76 deletions(-) diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py index 8726fb454..e31d471a8 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py @@ -25,3 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. LOGGER_NAME: str = "genai-pa" + +OPEN_ORCA = "openorca" +CNN_DAILY_MAIL = "cnn_dailymail" +DEFAULT_INPUT_DATA_JSON = "llm_inputs.json" diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_inputs/llm_inputs.py index 816acb4a2..64013c48e 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/llm_inputs/llm_inputs.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/llm_inputs/llm_inputs.py @@ -14,20 +14,38 @@ import json from copy import deepcopy +from enum import Enum, auto from typing import Dict, List, Optional, Tuple import requests +from genai_pa.constants import CNN_DAILY_MAIL, DEFAULT_INPUT_DATA_JSON, OPEN_ORCA from genai_pa.exceptions import GenAiPAException from requests import Response +class InputType(Enum): + URL = auto() + FILE = auto() + SYNTHETIC = auto() + + +class InputFormat(Enum): + OPENAI = auto() + TRTLLM = auto() + VLLM = auto() + + +class OutputFormat(Enum): + OPENAI = auto() + TRTLLM = auto() + VLLM = auto() + + class LlmInputs: """ A library of methods that control the generation of LLM Inputs """ - OUTPUT_FILENAME = "./llm_inputs.json" - OPEN_ORCA_URL = "https://datasets-server.huggingface.co/rows?dataset=Open-Orca%2FOpenOrca&config=default&split=train" CNN_DAILYMAIL_URL = "https://datasets-server.huggingface.co/rows?dataset=cnn_dailymail&config=1.0.0&split=train" @@ -37,54 +55,105 @@ class LlmInputs: DEFAULT_LENGTH = 100 MINIMUM_LENGTH = 1 + EMPTY_JSON_IN_VLLM_PA_FORMAT = {"data": []} + EMPTY_JSON_IN_TRTLLM_PA_FORMAT = {"data": []} EMPTY_JSON_IN_OPENAI_PA_FORMAT = {"data": [{"payload": []}]} + dataset_url_map = {OPEN_ORCA: OPEN_ORCA_URL, CNN_DAILY_MAIL: CNN_DAILYMAIL_URL} + @classmethod - def create_openai_llm_inputs( + def create_llm_inputs( cls, - url: str = OPEN_ORCA_URL, + input_type: InputType, + input_format: InputFormat, + output_format: OutputFormat, + model_name: str = "", + input_filename: str = "", starting_index: int = DEFAULT_STARTING_INDEX, length: int = DEFAULT_LENGTH, - model_name: str = None, + add_model_name: bool = False, add_stream: bool = False, ) -> Dict: """ - Given a URL and indexing parameters, it will write a string of LLM Inputs + Given an input type, input format, and output type. Output a string of LLM Inputs (in a JSON dictionary) to a file - Parameters - ---------- - url: - URL to gather LLM Inputs from + Required Parameters + ------------------- + input_type: + Specify how the input is received (file or URL) + input_format: + Specify the input format + output_format: + Specify the output format + + Optional Parameters + ------------------- + model_name: + The model name starting_index: Offset from within the list to start gathering inputs length: Number of entries to gather - model_name: - If included adds this model name field to each payload + add_model_name: + If true adds a model name field to each payload add_stream: If true adds a steam field to each payload """ - LlmInputs._check_for_valid_args(starting_index, length) - configured_url = LlmInputs._create_configured_url(url, starting_index, length) - dataset = LlmInputs._download_dataset(configured_url, starting_index, length) - dataset_json = LlmInputs._convert_dataset_to_json(dataset) - json_in_pa_format = LlmInputs._convert_json_to_pa_format( - dataset_json, model_name, add_stream + LlmInputs._check_for_valid_args(input_type, model_name, starting_index, length) + + dataset = None + if input_type == InputType.URL: + dataset = LlmInputs._get_input_dataset_from_url( + model_name, starting_index, length + ) + else: + raise GenAiPAException( + "Using file/synthetic to supply LLM Input is not supported at this time" + ) + + generic_dataset_json = LlmInputs._convert_input_dataset_to_generic_json( + input_format, dataset + ) + + json_in_pa_format = LlmInputs._convert_generic_json_to_output_format( + output_format, generic_dataset_json, add_model_name, add_stream, model_name ) LlmInputs._write_json_to_file(json_in_pa_format) return json_in_pa_format @classmethod - def _check_for_valid_args(cls, starting_index: int, length: int) -> None: + def _check_for_valid_args( + cls, input_type: InputType, model_name: str, starting_index: int, length: int + ) -> None: try: + LlmInputs._check_for_model_name_if_input_type_is_url(input_type, model_name) LlmInputs._check_for_valid_starting_index(starting_index) LlmInputs._check_for_valid_length(length) except Exception as e: raise GenAiPAException(e) + @classmethod + def _get_input_dataset_from_url( + cls, model_name: str, starting_index: int, length: int + ) -> Response: + url = LlmInputs._resolve_url(model_name) + configured_url = LlmInputs._create_configured_url(url, starting_index, length) + dataset = LlmInputs._download_dataset(configured_url, starting_index, length) + + return dataset + + @classmethod + def _resolve_url(cls, model_name: str) -> str: + if model_name in LlmInputs.dataset_url_map: + return LlmInputs.dataset_url_map[model_name] + else: + raise GenAiPAException( + f"{model_name} does not have a corresponding URL in the dataset_url_map." + ) + @classmethod def _create_configured_url(cls, url: str, starting_index: int, length: int) -> str: starting_index_str = str(starting_index) @@ -100,28 +169,129 @@ def _download_dataset(cls, configured_url, starting_index, length) -> Response: return dataset @classmethod - def _convert_dataset_to_json(cls, dataset: Response) -> Dict: + def _convert_input_dataset_to_generic_json( + cls, input_format: InputFormat, dataset: Response + ) -> Dict: dataset_json = dataset.json() try: LlmInputs._check_for_error_in_json_of_dataset(dataset_json) except Exception as e: raise GenAiPAException(e) - return dataset_json + if input_format == InputFormat.OPENAI: + generic_dataset_json = LlmInputs._convert_openai_to_generic_input_json( + dataset_json + ) + else: + raise GenAiPAException( + f"Input format {input_format} is not supported at this time" + ) + + return generic_dataset_json @classmethod - def _convert_json_to_pa_format( - cls, dataset_json: Dict, model_name: str, add_stream: bool - ) -> Dict: - system_role_headers, user_role_headers = LlmInputs._determine_json_pa_roles( - dataset_json + def _convert_openai_to_generic_input_json(cls, dataset_json: Dict) -> Dict: + generic_input_json = LlmInputs._add_openai_features_to_generic_json( + {}, dataset_json + ) + generic_input_json = LlmInputs._add_openai_rows_to_generic_json( + generic_input_json, dataset_json ) - pa_json = LlmInputs._populate_openai_pa_json( + + return generic_input_json + + @classmethod + def _add_openai_features_to_generic_json( + cls, generic_input_json: Dict, dataset_json: Dict + ) -> Dict: + if "features" in dataset_json.keys(): + generic_input_json["features"] = [] + for feature in dataset_json["features"]: + generic_input_json["features"].append(feature["name"]) + + return generic_input_json + + @classmethod + def _add_openai_rows_to_generic_json( + cls, generic_input_json: Dict, dataset_json: Dict + ) -> Dict: + generic_input_json["rows"] = [] + for row in dataset_json["rows"]: + generic_input_json["rows"].append(row["row"]) + + return generic_input_json + + @classmethod + def _convert_generic_json_to_output_format( + cls, + output_format: OutputFormat, + generic_dataset: Dict, + add_model_name: bool, + add_stream: bool, + model_name: str = "", + ) -> Dict: + if output_format == OutputFormat.OPENAI: + output_json = LlmInputs._convert_generic_json_to_openai_format( + generic_dataset, add_model_name, add_stream, model_name + ) + elif output_format == OutputFormat.VLLM: + output_json = LlmInputs._convert_generic_json_to_vllm_format( + generic_dataset, add_model_name, add_stream, model_name + ) + else: + raise GenAiPAException( + f"Output format {output_format} is not currently supported" + ) + + return output_json + + @classmethod + def _convert_generic_json_to_openai_format( + cls, + dataset_json: Dict, + add_model_name: bool, + add_stream: bool, + model_name: str = "", + ) -> Dict: + # OPEN: Don't know how to select a role for `text_input` + ( + system_role_headers, + user_role_headers, + _, + ) = LlmInputs._determine_json_feature_roles(dataset_json) + pa_json = LlmInputs._populate_openai_output_json( dataset_json, system_role_headers, user_role_headers, + add_model_name, + add_stream, model_name, + ) + + return pa_json + + @classmethod + def _convert_generic_json_to_vllm_format( + cls, + dataset_json: Dict, + add_model_name: bool, + add_stream: bool, + model_name: str = "", + ) -> Dict: + ( + system_role_headers, + user_role_headers, + text_input_headers, + ) = LlmInputs._determine_json_feature_roles(dataset_json) + + pa_json = LlmInputs._populate_vllm_output_json( + dataset_json, + system_role_headers, + user_role_headers, + text_input_headers, + add_model_name, add_stream, + model_name, ) return pa_json @@ -129,55 +299,98 @@ def _convert_json_to_pa_format( @classmethod def _write_json_to_file(cls, json_in_pa_format: Dict): try: - f = open(LlmInputs.OUTPUT_FILENAME, "w") + f = open(DEFAULT_INPUT_DATA_JSON, "w") f.write(json.dumps(json_in_pa_format, indent=2)) finally: f.close() @classmethod - def _determine_json_pa_roles( + def _determine_json_feature_roles( cls, dataset_json: Dict ) -> Tuple[List[str], List[str]]: SYSTEM_ROLE_LIST = ["system_prompt"] USER_ROLE_LIST = ["question", "article"] + TEXT_INPUT_LIST = ["text_input"] - system_role_headers, user_role_headers = [], [] + system_role_headers, user_role_headers, text_input_headers = [], [], [] if "features" in dataset_json.keys(): for index, feature in enumerate(dataset_json["features"]): - if feature["name"] in SYSTEM_ROLE_LIST: - system_role_headers.append(feature["name"]) - if feature["name"] in USER_ROLE_LIST: - user_role_headers.append(feature["name"]) - - assert system_role_headers is not None or user_role_headers is not None + if feature in SYSTEM_ROLE_LIST: + system_role_headers.append(feature) + if feature in USER_ROLE_LIST: + user_role_headers.append(feature) + if feature in TEXT_INPUT_LIST: + user_role_headers.append(feature) + + assert ( + system_role_headers is not None + or user_role_headers is not None + or text_input_headers is not None + ) - return system_role_headers, user_role_headers + return system_role_headers, user_role_headers, text_input_headers @classmethod - def _populate_openai_pa_json( + def _populate_openai_output_json( cls, dataset_json: Dict, system_role_headers: List[str], user_role_headers: List[str], - model_name: str, + add_model_name: bool, add_stream: bool, + model_name: str = "", ) -> Dict: pa_json = LlmInputs._create_empty_openai_pa_json() - for entry in dataset_json["rows"]: + for index, entry in enumerate(dataset_json["rows"]): pa_json["data"][0]["payload"].append({"messages": []}) - for header in entry["row"]: + for header, content in entry.items(): new_message = LlmInputs._create_new_message( - header, system_role_headers, user_role_headers, entry["row"][header] + header, system_role_headers, user_role_headers, content ) pa_json = LlmInputs._add_new_message_to_json( - pa_json, entry["row_idx"], new_message + pa_json, index, new_message ) - pa_json = LlmInputs._add_optional_tags_to_json( - pa_json, entry["row_idx"], model_name, add_stream + pa_json = LlmInputs._add_optional_tags_to_openai_json( + pa_json, index, add_model_name, add_stream, model_name + ) + + return pa_json + + @classmethod + def _populate_vllm_output_json( + cls, + dataset_json: Dict, + system_role_headers: List[str], + user_role_headers: List[str], + text_input_headers: List[str], + add_model_name: bool, + add_stream: bool, + model_name: str = "", + ) -> Dict: + pa_json = LlmInputs._create_empty_vllm_pa_json() + + for index, entry in enumerate(dataset_json["rows"]): + pa_json["data"].append({"text_input": []}) + + for header, content in entry.items(): + new_text_input = LlmInputs._create_new_text_input( + header, + system_role_headers, + user_role_headers, + text_input_headers, + content, + ) + + pa_json = LlmInputs._add_new_text_input_to_json( + pa_json, index, new_text_input + ) + + pa_json = LlmInputs._add_optional_tags_to_vllm_json( + pa_json, index, add_model_name, add_stream, model_name ) return pa_json @@ -188,6 +401,12 @@ def _create_empty_openai_pa_json(cls) -> Dict: return empty_pa_json + @classmethod + def _create_empty_vllm_pa_json(cls) -> Dict: + empty_pa_json = deepcopy(LlmInputs.EMPTY_JSON_IN_VLLM_PA_FORMAT) + + return empty_pa_json + @classmethod def _create_new_message( cls, @@ -211,6 +430,26 @@ def _create_new_message( return new_message + @classmethod + def _create_new_text_input( + cls, + header: str, + system_role_headers: List[str], + user_role_headers: List[str], + text_input_headers: List[str], + content: str, + ) -> Optional[str]: + new_text_input = "" + + if ( + header in system_role_headers + or header in user_role_headers + or header in text_input_headers + ): + new_text_input = content + + return new_text_input + @classmethod def _add_new_message_to_json( cls, pa_json: Dict, index: int, new_message: Optional[Dict] @@ -221,16 +460,55 @@ def _add_new_message_to_json( return pa_json @classmethod - def _add_optional_tags_to_json( - cls, pa_json: Dict, index: int, model_name: str, add_stream: bool + def _add_new_text_input_to_json( + cls, pa_json: Dict, index: int, new_text_input: str ) -> Dict: - if model_name: + if new_text_input: + pa_json["data"][index]["text_input"].append(new_text_input) + + return pa_json + + @classmethod + def _add_optional_tags_to_openai_json( + cls, + pa_json: Dict, + index: int, + add_model_name: bool, + add_stream: bool, + model_name: str = "", + ) -> Dict: + if add_model_name: pa_json["data"][0]["payload"][index]["model"] = model_name if add_stream: - pa_json["data"][0]["payload"][index]["steam"] = "true" + pa_json["data"][0]["payload"][index]["stream"] = [True] + + return pa_json + + @classmethod + def _add_optional_tags_to_vllm_json( + cls, + pa_json: Dict, + index: int, + add_model_name: bool, + add_stream: bool, + model_name: str = "", + ) -> Dict: + if add_model_name: + pa_json["data"][index]["model"] = model_name + if add_stream: + pa_json["data"][index]["stream"] = [True] return pa_json + @classmethod + def _check_for_model_name_if_input_type_is_url( + cls, input_type: InputType, model_name: str + ) -> None: + if input_type == InputType.URL and not model_name: + raise GenAiPAException( + "Input type is URL, but model_name is not specified." + ) + @classmethod def _check_for_valid_starting_index(cls, starting_index: int) -> None: if not isinstance(starting_index, int): diff --git a/src/c++/perf_analyzer/genai-pa/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-pa/tests/test_llm_inputs.py index bc8855e7f..4832863e5 100644 --- a/src/c++/perf_analyzer/genai-pa/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-pa/tests/test_llm_inputs.py @@ -16,8 +16,14 @@ import os import pytest +from genai_pa.constants import CNN_DAILY_MAIL, DEFAULT_INPUT_DATA_JSON, OPEN_ORCA from genai_pa.exceptions import GenAiPAException -from genai_pa.llm_inputs.llm_inputs import LlmInputs +from genai_pa.llm_inputs.llm_inputs import ( + InputFormat, + InputType, + LlmInputs, + OutputFormat, +) class TestLlmInputs: @@ -31,33 +37,36 @@ def default_configured_url(self): yield default_configured_url + # TODO: Add tests that verify json schemas + + def test_input_type_url_no_model_name(self): + """ + Test for exception when input type is URL and no model name + """ + with pytest.raises(GenAiPAException): + _ = LlmInputs._check_for_model_name_if_input_type_is_url( + input_type=InputType.URL, model_name="" + ) + def test_illegal_starting_index(self): """ Test for exceptions when illegal values are given for starting index """ with pytest.raises(GenAiPAException): - _ = LlmInputs._check_for_valid_args( - starting_index="foo", length=LlmInputs.DEFAULT_LENGTH - ) + _ = LlmInputs._check_for_valid_starting_index(starting_index="foo") with pytest.raises(GenAiPAException): - _ = LlmInputs._check_for_valid_args( - starting_index=-1, length=LlmInputs.DEFAULT_LENGTH - ) + _ = LlmInputs._check_for_valid_starting_index(starting_index=-1) def test_illegal_length(self): """ Test for exceptions when illegal values are given for length """ with pytest.raises(GenAiPAException): - _ = LlmInputs._check_for_valid_args( - starting_index=LlmInputs.DEFAULT_STARTING_INDEX, length="foo" - ) + _ = LlmInputs._check_for_valid_length(length="foo") with pytest.raises(GenAiPAException): - _ = LlmInputs._check_for_valid_args( - starting_index=LlmInputs.DEFAULT_STARTING_INDEX, length=0 - ) + _ = LlmInputs._check_for_valid_length(length=0) def test_create_configured_url(self): """ @@ -92,10 +101,13 @@ def test_llm_inputs_error_in_server_response(self): Test for exception when length is out of range """ with pytest.raises(GenAiPAException): - _ = LlmInputs.create_openai_llm_inputs( - LlmInputs.OPEN_ORCA_URL, - LlmInputs.DEFAULT_STARTING_INDEX, - int(LlmInputs.DEFAULT_LENGTH * 100), + _ = LlmInputs.create_llm_inputs( + input_type=InputType.URL, + input_format=InputFormat.OPENAI, + output_format=OutputFormat.OPENAI, + model_name=OPEN_ORCA, + starting_index=LlmInputs.DEFAULT_STARTING_INDEX, + length=int(LlmInputs.DEFAULT_LENGTH * 100), ) def test_llm_inputs_with_defaults(self, default_configured_url): @@ -107,7 +119,9 @@ def test_llm_inputs_with_defaults(self, default_configured_url): LlmInputs.DEFAULT_STARTING_INDEX, LlmInputs.DEFAULT_LENGTH, ) - dataset_json = LlmInputs._convert_dataset_to_json(dataset) + dataset_json = LlmInputs._convert_input_dataset_to_generic_json( + input_format=InputFormat.OPENAI, dataset=dataset + ) assert dataset_json is not None assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH @@ -126,7 +140,9 @@ def test_llm_inputs_with_non_default_length(self): LlmInputs.DEFAULT_STARTING_INDEX, length=(int(LlmInputs.DEFAULT_LENGTH / 2)), ) - dataset_json = LlmInputs._convert_dataset_to_json(dataset) + dataset_json = LlmInputs._convert_input_dataset_to_generic_json( + input_format=InputFormat.OPENAI, dataset=dataset + ) assert dataset_json is not None assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH / 2 @@ -140,8 +156,15 @@ def test_convert_default_json_to_pa_format(self, default_configured_url): LlmInputs.DEFAULT_STARTING_INDEX, LlmInputs.DEFAULT_LENGTH, ) - dataset_json = LlmInputs._convert_dataset_to_json(dataset) - pa_json = LlmInputs._convert_json_to_pa_format(dataset_json, "", False) + dataset_json = LlmInputs._convert_input_dataset_to_generic_json( + input_format=InputFormat.OPENAI, dataset=dataset + ) + pa_json = LlmInputs._convert_generic_json_to_output_format( + output_format=OutputFormat.OPENAI, + generic_dataset=dataset_json, + add_model_name=False, + add_stream=False, + ) assert pa_json is not None assert len(pa_json["data"][0]["payload"]) == LlmInputs.DEFAULT_LENGTH @@ -150,9 +173,14 @@ def test_create_openai_llm_inputs_cnn_dailymail(self): """ Test CNN_DAILYMAIL can be accessed """ - pa_json = LlmInputs.create_openai_llm_inputs(LlmInputs.CNN_DAILYMAIL_URL) + pa_json = LlmInputs.create_llm_inputs( + input_type=InputType.URL, + input_format=InputFormat.OPENAI, + output_format=OutputFormat.OPENAI, + model_name=CNN_DAILY_MAIL, + ) - os.remove(LlmInputs.OUTPUT_FILENAME) + os.remove(DEFAULT_INPUT_DATA_JSON) assert pa_json is not None assert len(pa_json["data"][0]["payload"]) == LlmInputs.DEFAULT_LENGTH @@ -161,14 +189,37 @@ def test_write_to_file(self): """ Test that write to file is working correctly """ - pa_json = LlmInputs.create_openai_llm_inputs( - model_name="OpenOrca", add_stream=True + pa_json = LlmInputs.create_llm_inputs( + input_type=InputType.URL, + input_format=InputFormat.OPENAI, + output_format=OutputFormat.OPENAI, + model_name=OPEN_ORCA, + add_model_name=True, + add_stream=True, ) try: - f = open(LlmInputs.OUTPUT_FILENAME, "r") + f = open(DEFAULT_INPUT_DATA_JSON, "r") json_str = f.read() finally: f.close() - os.remove(LlmInputs.OUTPUT_FILENAME) + os.remove(DEFAULT_INPUT_DATA_JSON) assert pa_json == json.loads(json_str) + + def test_create_openai_to_vllm(self): + """ + Test conversion of openai to vllm + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=InputType.URL, + input_format=InputFormat.OPENAI, + output_format=OutputFormat.VLLM, + model_name=OPEN_ORCA, + add_model_name=False, + add_stream=True, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH