Unverified Commit f256ebe4 authored by Kunshang Ji's avatar Kunshang Ji Committed by GitHub
Browse files

[Hardware][Intel GPU] add XPU bf16 support (#12392)


Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent f8ece6e1
......@@ -36,7 +36,7 @@ VLLM_TARGET_DEVICE=xpu python setup.py install
:::{note}
- FP16 is the default data type in the current XPU backend. The BF16 data
type will be supported in the future.
type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet.
:::
## Set up using Docker
......
......@@ -66,8 +66,13 @@ class XPUPlatform(Platform):
# check and update model config
model_config = vllm_config.model_config
if model_config.dtype == torch.bfloat16:
bf16_supported = cls.device_support_bf16()
if not bf16_supported:
logger.warning(
"bfloat16 is not fully supported on XPU, casting to float16.")
"bfloat16 is only supported on Intel Data Center GPU, "
"Intel Arc GPU is not supported yet. Your device is %s,"
"which is not supported. will fallback to float16",
cls.get_device_name())
model_config.dtype = torch.float16
if not model_config.enforce_eager:
logger.warning(
......@@ -116,3 +121,15 @@ class XPUPlatform(Platform):
) -> float:
torch.xpu.reset_peak_memory_stats(device)
return torch.xpu.max_memory_allocated(device)
@classmethod
def device_support_bf16(cls) -> bool:
device_name = cls.get_device_name().lower()
if device_name.count("arc") > 0:
return False
elif device_name.count("data center gpu") > 0:
return True
else:
logger.warning("Unknown device name %s, always use float16",
device_name)
return False
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment