From cbfb022a9a3fb5b8f2455c0596f1e69ddf0ef8e6 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 9 Jan 2025 09:53:09 +0200
Subject: [PATCH] send placeholder_index_maps

---
 vllm/worker/hpu_model_runner.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
index 673536cc4abe2..40c8433afb6af 100755
--- a/vllm/worker/hpu_model_runner.py
+++ b/vllm/worker/hpu_model_runner.py
@@ -865,12 +865,10 @@ def _prepare_prompt(
             # is always the first token in the sequence.
             input_positions.append(list(range(context_len, seq_len)))
 
-            computed_len = seq_data.get_num_computed_tokens()
-            positions_range = range(computed_len, seq_len)
-
             if seq_group_metadata.multi_modal_data:
+                positions = input_positions[0]
                 mm_data, placeholder_maps = MultiModalPlaceholderMap \
-                    .from_seq_group(seq_group_metadata, positions_range)
+                    .from_seq_group(seq_group_metadata, range(positions[0], positions[0] + len(positions)))
 
                 if self.mm_registry.has_processor(self.model_config):
                     mm_kwargs = mm_data
@@ -988,6 +986,12 @@ def _prepare_prompt(
                                            dtype=torch.long,
                                            device='cpu')
 
+        placeholder_index_maps = {
+            modality: placeholder_map.index_map()
+            for modality, placeholder_map in
+            multi_modal_placeholder_maps.items()
+        }
+
         # Note: num_prefill_tokens is calculated using the length of
         # input_tokens after padding.
         num_prefill_tokens = input_tokens_tensor.numel()
@@ -1021,8 +1025,7 @@ def _prepare_prompt(
             num_prefill_tokens=num_prefill_tokens,
             num_decode_tokens=0,
             slot_mapping=slot_mapping,
-            multi_modal_placeholder_index_maps=
-            None  # FIXME(kzawora): mutli-modality will not work here
+            multi_modal_placeholder_index_maps=placeholder_index_maps
         )
         multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
         for t in multi_modal_kwargs: