"vscode:/vscode.git/clone" did not exist on "43029870694de0789a10ab49f181f1cba6ec741a"
Unverified Commit baee28c4 authored by cloudhan's avatar cloudhan Committed by GitHub
Browse files

Reorder kv dtype check to avoid nvcc not found error on AMD platform (#3104)

parent 29e70e3e
...@@ -330,15 +330,14 @@ class CacheConfig: ...@@ -330,15 +330,14 @@ class CacheConfig:
if self.cache_dtype == "auto": if self.cache_dtype == "auto":
pass pass
elif self.cache_dtype == "fp8_e5m2": elif self.cache_dtype == "fp8_e5m2":
if is_hip():
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
nvcc_cuda_version = get_nvcc_cuda_version() nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"): if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
raise ValueError( raise ValueError(
"FP8 is not supported when cuda version is lower than 11.8." "FP8 is not supported when cuda version is lower than 11.8."
) )
device_name = torch.cuda.get_device_name()
if "AMD" in device_name:
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
logger.info( logger.info(
"Using fp8_e5m2 data type to store kv cache. It reduces " "Using fp8_e5m2 data type to store kv cache. It reduces "
"the GPU memory footprint and boosts the performance. " "the GPU memory footprint and boosts the performance. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment