Commit fd6bc480 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip fp8

parent 99b471c2
......@@ -167,7 +167,7 @@ set(VLLM_EXT_SRC
"csrc/layernorm_kernels.cu"
"csrc/quantization/squeezellm/quant_cuda_kernel.cu"
"csrc/quantization/gptq/q_gemm.cu"
"csrc/quantization/fp8/fp8_cuda_kernels.cu"
# "csrc/quantization/fp8/fp8_cuda_kernels.cu"
"csrc/cuda_utils_kernels.cu"
"csrc/moe_align_block_size_kernels.cu"
"csrc/pybind.cpp")
......
......@@ -119,7 +119,7 @@ function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG)
list(APPEND GPU_FLAGS
"-DUSE_ROCM"
"-DENABLE_FP8_E4M3"
# "-DENABLE_FP8_E4M3"
"-U__HIP_NO_HALF_CONVERSIONS__"
"-U__HIP_NO_HALF_OPERATORS__"
"-fno-gpu-rdc"
......
......@@ -356,9 +356,9 @@ def get_vllm_version() -> str:
version += f"+cu{cuda_version_str}"
elif _is_hip():
# Get the HIP version
hipcc_version = get_hipcc_rocm_version()
if hipcc_version != MAIN_CUDA_VERSION:
rocm_version_str = hipcc_version.replace(".", "")[:3]
# hipcc_version = get_hipcc_rocm_version()
# if hipcc_version != MAIN_CUDA_VERSION:
# rocm_version_str = hipcc_version.replace(".", "")[:3]
# version += f"+rocm{rocm_version_str}"
version = get_version()
elif _is_neuron():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment