Unverified Commit 2e8cbb58 authored by fhl2000's avatar fhl2000 Committed by GitHub
Browse files

[BugFix] Fix full cuda graph slot_mapping (#21228)


Signed-off-by: default avatarfhl2000 <63384265+fhl2000@users.noreply.github.com>
parent 752c6ade
...@@ -2079,7 +2079,7 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -2079,7 +2079,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
block_table_tensor=self.input_batch.block_table[ block_table_tensor=self.input_batch.block_table[
kv_cache_group_id].get_device_tensor()[:num_reqs], kv_cache_group_id].get_device_tensor()[:num_reqs],
slot_mapping=self.input_batch. slot_mapping=self.input_batch.
block_table[kv_cache_group_id].slot_mapping[:num_reqs]) block_table[kv_cache_group_id].slot_mapping[:num_tokens])
attn_metadata_i = self.attn_metadata_builders[ attn_metadata_i = self.attn_metadata_builders[
kv_cache_group_id].build_for_cudagraph_capture( kv_cache_group_id].build_for_cudagraph_capture(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment