"vscode:/vscode.git/clone" did not exist on "5d7e3d0176e0dbcf144c64b7d14d996c55e36c50"
Unverified Commit a07c4c59 authored by Ofir Zafrir's avatar Ofir Zafrir Committed by GitHub
Browse files

[BugFix][XPU] Fix speculative decoding on Intel XPU due to bug with...


[BugFix][XPU] Fix speculative decoding on Intel XPU due to bug with `IGC_ForceOCLSIMDWidth=16` (#35298)
Signed-off-by: default avatarOfir Zafrir <ofir.zafrir@intel.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent d3a51da9
...@@ -201,9 +201,6 @@ class XPUPlatform(Platform): ...@@ -201,9 +201,6 @@ class XPUPlatform(Platform):
if vllm_config.lora_config is not None: if vllm_config.lora_config is not None:
compilation_config.mode = CompilationMode.NONE compilation_config.mode = CompilationMode.NONE
# decrease triton kernel compilation scratch space for speculative decoding
if vllm_config.speculative_config is not None:
os.environ["IGC_ForceOCLSIMDWidth"] = "16" # noqa: SIM112
# check and update parallel config # check and update parallel config
parallel_config = vllm_config.parallel_config parallel_config = vllm_config.parallel_config
# Only override worker_cls if it's still the default "auto" # Only override worker_cls if it's still the default "auto"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment