Commit fd6bc480 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip fp8

parent 99b471c2
...@@ -167,7 +167,7 @@ set(VLLM_EXT_SRC ...@@ -167,7 +167,7 @@ set(VLLM_EXT_SRC
"csrc/layernorm_kernels.cu" "csrc/layernorm_kernels.cu"
"csrc/quantization/squeezellm/quant_cuda_kernel.cu" "csrc/quantization/squeezellm/quant_cuda_kernel.cu"
"csrc/quantization/gptq/q_gemm.cu" "csrc/quantization/gptq/q_gemm.cu"
"csrc/quantization/fp8/fp8_cuda_kernels.cu" # "csrc/quantization/fp8/fp8_cuda_kernels.cu"
"csrc/cuda_utils_kernels.cu" "csrc/cuda_utils_kernels.cu"
"csrc/moe_align_block_size_kernels.cu" "csrc/moe_align_block_size_kernels.cu"
"csrc/pybind.cpp") "csrc/pybind.cpp")
......
...@@ -119,7 +119,7 @@ function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG) ...@@ -119,7 +119,7 @@ function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG)
list(APPEND GPU_FLAGS list(APPEND GPU_FLAGS
"-DUSE_ROCM" "-DUSE_ROCM"
"-DENABLE_FP8_E4M3" # "-DENABLE_FP8_E4M3"
"-U__HIP_NO_HALF_CONVERSIONS__" "-U__HIP_NO_HALF_CONVERSIONS__"
"-U__HIP_NO_HALF_OPERATORS__" "-U__HIP_NO_HALF_OPERATORS__"
"-fno-gpu-rdc" "-fno-gpu-rdc"
......
...@@ -356,9 +356,9 @@ def get_vllm_version() -> str: ...@@ -356,9 +356,9 @@ def get_vllm_version() -> str:
version += f"+cu{cuda_version_str}" version += f"+cu{cuda_version_str}"
elif _is_hip(): elif _is_hip():
# Get the HIP version # Get the HIP version
hipcc_version = get_hipcc_rocm_version() # hipcc_version = get_hipcc_rocm_version()
if hipcc_version != MAIN_CUDA_VERSION: # if hipcc_version != MAIN_CUDA_VERSION:
rocm_version_str = hipcc_version.replace(".", "")[:3] # rocm_version_str = hipcc_version.replace(".", "")[:3]
# version += f"+rocm{rocm_version_str}" # version += f"+rocm{rocm_version_str}"
version = get_version() version = get_version()
elif _is_neuron(): elif _is_neuron():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment