Commit 8864e4f7 authored by zhuwenwen's avatar zhuwenwen
Browse files

[Feature] Add --gpu-max-threads-per-block=1024

parent 21723d7a
...@@ -123,7 +123,8 @@ function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG) ...@@ -123,7 +123,8 @@ function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG)
"-U__HIP_NO_HALF_CONVERSIONS__" "-U__HIP_NO_HALF_CONVERSIONS__"
"-U__HIP_NO_HALF_OPERATORS__" "-U__HIP_NO_HALF_OPERATORS__"
"-Werror=unused-variable" "-Werror=unused-variable"
"-fno-gpu-rdc") "-fno-gpu-rdc"
"--gpu-max-threads-per-block=1024")
endif() endif()
set(${OUT_GPU_FLAGS} ${GPU_FLAGS} PARENT_SCOPE) set(${OUT_GPU_FLAGS} ${GPU_FLAGS} PARENT_SCOPE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment