[Hardware] [HPU]add `mark_step` for hpu (#10239)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>

[Hardware] [HPU]add `mark_step` for hpu (#10239)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
76aab90a · Kunshang Ji · GitHub · 8d74b5ae · 76aab90a
Unverified Commit 76aab90a authored Nov 17, 2024 by Kunshang Ji Committed by GitHub Nov 17, 2024
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 0 deletions

vllm/worker/hpu_model_runner.py vllm/worker/hpu_model_runner.py +14 -0

No files found.
--- a/vllm/worker/hpu_model_runner.py
+++ b/vllm/worker/hpu_model_runner.py
@@ -272,6 +272,19 @@ def precompute_indices_and_offsets(block_size, slot_mapping, is_prompt):
    return indices, offsets


+def modify_decoder_layer(module: torch.nn.Module, suffix="DecoderLayer"):
+    if module.__class__.__name__.endswith(suffix):
+
+        def forward_hook(module, args, output):
+            htorch.core.mark_step()
+            return output
+
+        module.register_forward_hook(forward_hook)
+
+    for child_name, child_module in module.named_children():
+        modify_decoder_layer(child_module)
+
+
 class HpuModelAdapter:

    def __init__(self, model, block_size, dtype, enforce_eager):
@@ -636,6 +649,7 @@ class HPUModelRunnerBase(ModelRunnerBase[TModelInputForHPU]):
            else:
                self.model = self.model.to("hpu")
                htcore.mark_step()
+            modify_decoder_layer(self.model)
            torch.hpu.synchronize()

            with HabanaMemoryProfiler() as m_wrap: