send placeholder_index_maps

HabanaAI · Jan 9, 2025 · cbfb022 · cbfb022
1 parent 5c59ccd
commit cbfb022
Showing 1 changed file with 9 additions and 6 deletions.
diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
@@ -865,12 +865,10 @@ def _prepare_prompt(
             # is always the first token in the sequence.
             input_positions.append(list(range(context_len, seq_len)))
 
-            computed_len = seq_data.get_num_computed_tokens()
-            positions_range = range(computed_len, seq_len)
-
             if seq_group_metadata.multi_modal_data:
+                positions = input_positions[0]
                 mm_data, placeholder_maps = MultiModalPlaceholderMap \
-                    .from_seq_group(seq_group_metadata, positions_range)
+                    .from_seq_group(seq_group_metadata, range(positions[0], positions[0] + len(positions)))
 
                 if self.mm_registry.has_processor(self.model_config):
                     mm_kwargs = mm_data
@@ -988,6 +986,12 @@ def _prepare_prompt(
                                            dtype=torch.long,
                                            device='cpu')
 
+        placeholder_index_maps = {
+            modality: placeholder_map.index_map()
+            for modality, placeholder_map in
+            multi_modal_placeholder_maps.items()
+        }
+
         # Note: num_prefill_tokens is calculated using the length of
         # input_tokens after padding.
         num_prefill_tokens = input_tokens_tensor.numel()
@@ -1021,8 +1025,7 @@ def _prepare_prompt(
             num_prefill_tokens=num_prefill_tokens,
             num_decode_tokens=0,
             slot_mapping=slot_mapping,
-            multi_modal_placeholder_index_maps=
-            None  # FIXME(kzawora): mutli-modality will not work here
+            multi_modal_placeholder_index_maps=placeholder_index_maps
         )
         multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
         for t in multi_modal_kwargs: