Unverified Commit baee28c4 authored by cloudhan's avatar cloudhan Committed by GitHub
Browse files

Reorder kv dtype check to avoid nvcc not found error on AMD platform (#3104)

parent 29e70e3e
...@@ -330,15 +330,14 @@ class CacheConfig: ...@@ -330,15 +330,14 @@ class CacheConfig:
if self.cache_dtype == "auto": if self.cache_dtype == "auto":
pass pass
elif self.cache_dtype == "fp8_e5m2": elif self.cache_dtype == "fp8_e5m2":
if is_hip():
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
nvcc_cuda_version = get_nvcc_cuda_version() nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"): if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
raise ValueError( raise ValueError(
"FP8 is not supported when cuda version is lower than 11.8." "FP8 is not supported when cuda version is lower than 11.8."
) )
device_name = torch.cuda.get_device_name()
if "AMD" in device_name:
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
logger.info( logger.info(
"Using fp8_e5m2 data type to store kv cache. It reduces " "Using fp8_e5m2 data type to store kv cache. It reduces "
"the GPU memory footprint and boosts the performance. " "the GPU memory footprint and boosts the performance. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment