Unverified Commit 39f9ea0d authored by Raphaël Rialland's avatar Raphaël Rialland Committed by GitHub
Browse files

[Bugfix] Fix `cudagraph_mode:FULL` dispatch (This does not impact...

[Bugfix] Fix `cudagraph_mode:FULL` dispatch (This does not impact `FULL_AND_PIECEWISE` (default)) (#36165)
parent e4ae148a
...@@ -293,16 +293,14 @@ class CudagraphDispatcher: ...@@ -293,16 +293,14 @@ class CudagraphDispatcher:
) )
effective_num_active_loras = self.vllm_config.lora_config.max_loras + 1 effective_num_active_loras = self.vllm_config.lora_config.max_loras + 1
normalized_uniform = uniform_decode and self.cudagraph_mode.separate_routine()
batch_desc = self._create_padded_batch_descriptor( batch_desc = self._create_padded_batch_descriptor(
num_tokens, uniform_decode, has_lora, effective_num_active_loras num_tokens, normalized_uniform, has_lora, effective_num_active_loras
) )
if CUDAGraphMode.FULL in allowed_modes: if CUDAGraphMode.FULL in allowed_modes:
# check if key exists for full cudagraph # check if key exists for full cudagraph
# For pure FULL mode, keys are registered with uniform=False.
batch_desc_to_check = batch_desc batch_desc_to_check = batch_desc
if self.cudagraph_mode == CUDAGraphMode.FULL:
batch_desc_to_check = replace(batch_desc, uniform=False)
if batch_desc_to_check in self.cudagraph_keys[CUDAGraphMode.FULL]: if batch_desc_to_check in self.cudagraph_keys[CUDAGraphMode.FULL]:
return CUDAGraphMode.FULL, batch_desc_to_check return CUDAGraphMode.FULL, batch_desc_to_check
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment