Skip to content

Commit

Permalink
Add resolve_model_relative_to_config_file config option
Browse files Browse the repository at this point in the history
Signed-off-by: Leon Kiefer <[email protected]>
  • Loading branch information
Legion2 committed Jan 22, 2024
1 parent 52c1c3c commit f6f7d1b
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 5 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ Specifically,
and
[here](https://github.com/vllm-project/vllm/blob/ee8217e5bee5860469204ee57077a91138c9af02/vllm/engine/arg_utils.py#L201).

When using local model files, specify the path to the model in the `model` field.
By default relative paths are resolved relative to the working directory of the Triton server process.
To specify a path relative to the `model.json` file, set the `resolve_model_relative_to_config_file` field to `true`.

For multi-GPU support, EngineArgs like tensor_parallel_size can be specified in
[model.json](samples/model_repository/vllm_model/1/model.json).

Expand Down
15 changes: 15 additions & 0 deletions ci/L0_backend_vllm/vllm_backend/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,26 @@ SAMPLE_MODELS_REPO="../../../samples/model_repository"
EXPECTED_NUM_TESTS=3

rm -rf models && mkdir -p models

# operational vllm model
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt

# python model
mkdir -p models/add_sub/1/
wget -P models/add_sub/1/ https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/model.py
wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/config.pbtxt

# local vllm model
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_local
sed -i 's/"facebook\/opt-125m"/"./local_model"/' models/vllm_local/1/model.json
sed -i '/"model": /a "resolve_model_relative_to_config_file": true,' models/vllm_local/1/model.json
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/config.json
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/merges.txt
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/pytorch_model.bin
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/special_tokens_map.json
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/tokenizer_config.json
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/vocab.json

# Invalid model attribute
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/
sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
Expand All @@ -53,6 +67,7 @@ sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/m
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/
sed -i 's/"facebook\/opt-125m"/"invalid_model"/' models/vllm_invalid_2/1/model.json


RET=0

run_server
Expand Down
23 changes: 18 additions & 5 deletions ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def setUp(self):
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
self.vllm_model_name = "vllm_opt"
self.python_model_name = "add_sub"
self.local_vllm_model_name = "vllm_local"

def test_vllm_triton_backend(self):
# Load both vllm and add_sub models
Expand All @@ -60,9 +61,21 @@ def test_vllm_triton_backend(self):
self.assertFalse(self.triton_client.is_model_ready(self.python_model_name))

# Test vllm model and unload vllm model
self._test_vllm_model(send_parameters_as_tensor=True)
self._test_vllm_model(send_parameters_as_tensor=False)
self._test_vllm_model(self.vllm_model_name, send_parameters_as_tensor=True)
self._test_vllm_model(self.vllm_model_name, send_parameters_as_tensor=False)
self.triton_client.unload_model(self.vllm_model_name)

def test_local_vllm_model(self):
# Load local vllm model
self.triton_client.load_model(self.local_vllm_model_name)
self.assertTrue(self.triton_client.is_model_ready(self.local_vllm_model_name))

# Test local vllm model
self._test_vllm_model(self.local_vllm_model_name, send_parameters_as_tensor=True)
self._test_vllm_model(self.local_vllm_model_name, send_parameters_as_tensor=False)

# Unload local vllm model
self.triton_client.unload_model(self.local_vllm_model_name)

def test_model_with_invalid_attributes(self):
model_name = "vllm_invalid_1"
Expand All @@ -74,7 +87,7 @@ def test_vllm_invalid_model_name(self):
with self.assertRaises(InferenceServerException):
self.triton_client.load_model(model_name)

def _test_vllm_model(self, send_parameters_as_tensor):
def _test_vllm_model(self, model_name, send_parameters_as_tensor):
user_data = UserData()
stream = False
prompts = [
Expand All @@ -92,11 +105,11 @@ def _test_vllm_model(self, send_parameters_as_tensor):
i,
stream,
sampling_parameters,
self.vllm_model_name,
model_name,
send_parameters_as_tensor,
)
self.triton_client.async_stream_infer(
model_name=self.vllm_model_name,
model_name=model_name,
request_id=request_data["request_id"],
inputs=request_data["inputs"],
outputs=request_data["outputs"],
Expand Down
8 changes: 8 additions & 0 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ def initialize(self, args):
with open(engine_args_filepath) as file:
vllm_engine_config = json.load(file)

# Resolve the model path relative to the config file
if vllm_engine_config.pop("resolve_model_relative_to_config_file", False):
vllm_engine_config["model"] = os.path.abspath(
os.path.join(
pb_utils.get_model_dir(), vllm_engine_config["model"]
)
)

# Create an AsyncLLMEngine from the config from JSON
self.llm_engine = AsyncLLMEngine.from_engine_args(
AsyncEngineArgs(**vllm_engine_config)
Expand Down

0 comments on commit f6f7d1b

Please sign in to comment.