"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "6e1100889e6a675d17ad82815acf8f02f1cc419e"
Commit e983c804 authored by zhuwenwen's avatar zhuwenwen
Browse files

add --gpu-max-threads-per-block=1024 to fix rms_norm_kernel accuracy

parent 0dc4c8e9
......@@ -119,10 +119,11 @@ function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG)
list(APPEND GPU_FLAGS
"-DUSE_ROCM"
#"-DENABLE_FP8"
# "-DENABLE_FP8"
"-U__HIP_NO_HALF_CONVERSIONS__"
"-U__HIP_NO_HALF_OPERATORS__"
"-fno-gpu-rdc")
"-fno-gpu-rdc"
"--gpu-max-threads-per-block=1024")
endif()
set(${OUT_GPU_FLAGS} ${GPU_FLAGS} PARENT_SCOPE)
......@@ -451,4 +452,4 @@ function (define_gpu_extension_target GPU_MOD_NAME)
endif()
install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME})
endfunction()
endfunction()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment