From cbfb022a9a3fb5b8f2455c0596f1e69ddf0ef8e6 Mon Sep 17 00:00:00 2001 From: Agata Dobrzyniewicz Date: Thu, 9 Jan 2025 09:53:09 +0200 Subject: [PATCH] send placeholder_index_maps --- vllm/worker/hpu_model_runner.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index 673536cc4abe2..40c8433afb6af 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -865,12 +865,10 @@ def _prepare_prompt( # is always the first token in the sequence. input_positions.append(list(range(context_len, seq_len))) - computed_len = seq_data.get_num_computed_tokens() - positions_range = range(computed_len, seq_len) - if seq_group_metadata.multi_modal_data: + positions = input_positions[0] mm_data, placeholder_maps = MultiModalPlaceholderMap \ - .from_seq_group(seq_group_metadata, positions_range) + .from_seq_group(seq_group_metadata, range(positions[0], positions[0] + len(positions))) if self.mm_registry.has_processor(self.model_config): mm_kwargs = mm_data @@ -988,6 +986,12 @@ def _prepare_prompt( dtype=torch.long, device='cpu') + placeholder_index_maps = { + modality: placeholder_map.index_map() + for modality, placeholder_map in + multi_modal_placeholder_maps.items() + } + # Note: num_prefill_tokens is calculated using the length of # input_tokens after padding. num_prefill_tokens = input_tokens_tensor.numel() @@ -1021,8 +1025,7 @@ def _prepare_prompt( num_prefill_tokens=num_prefill_tokens, num_decode_tokens=0, slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps= - None # FIXME(kzawora): mutli-modality will not work here + multi_modal_placeholder_index_maps=placeholder_index_maps ) multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list) for t in multi_modal_kwargs: