Unverified Commit 8a74c68b authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

[Misc] Minor patch for draft model runner (#6523)

parent 61e59274
...@@ -15,8 +15,12 @@ from vllm.worker.model_runner import (ModelInputForGPUWithSamplingMetadata, ...@@ -15,8 +15,12 @@ from vllm.worker.model_runner import (ModelInputForGPUWithSamplingMetadata,
logger = init_logger(__name__) logger = init_logger(__name__)
# A flag to enable debug prints for the updated input tensors
# before each step.
debug_advance_input = False debug_advance_input = False
enable_gpu_advance_step = True # A flag to allow GPU advance step for draft model runner.
# Set to False for debugging.
allow_gpu_advance_step = True
class TP1DraftModelRunner(ModelRunner): class TP1DraftModelRunner(ModelRunner):
...@@ -196,7 +200,7 @@ class TP1DraftModelRunner(ModelRunner): ...@@ -196,7 +200,7 @@ class TP1DraftModelRunner(ModelRunner):
3. No LORA 3. No LORA
4. No prompt_adapter_config 4. No prompt_adapter_config
""" """
if not enable_gpu_advance_step: if not allow_gpu_advance_step:
return False return False
# We allow multi-step GPU only in decode mode # We allow multi-step GPU only in decode mode
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment