Unverified Commit 2386803f authored by Li, Jiang's avatar Li, Jiang Committed by GitHub
Browse files

[CPU] Change default block_size for CPU backend (#16002)


Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 95862f7b
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import os import os
import sys import sys
from importlib.util import find_spec
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
import psutil import psutil
...@@ -68,8 +69,15 @@ class CpuPlatform(Platform): ...@@ -68,8 +69,15 @@ class CpuPlatform(Platform):
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
ipex_avaliable = find_spec("intel_extension_for_pytorch") is not None
if cache_config and cache_config.block_size is None: if cache_config and cache_config.block_size is None:
cache_config.block_size = 16 cache_config.block_size = 128 if ipex_avaliable else 16
if not ipex_avaliable and cache_config.block_size != 16:
raise RuntimeError(
f"--block-size={cache_config.block_size} requires"
" intel_extension_for_pytorch")
scheduler_config = vllm_config.scheduler_config scheduler_config = vllm_config.scheduler_config
if ((scheduler_config.chunked_prefill_enabled if ((scheduler_config.chunked_prefill_enabled
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment