Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6b9a4bd5
"ml/backend/git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "40b8fdbdcacb41b9cf42869051df765f66750036"
Commit
6b9a4bd5
authored
Apr 23, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop-staging-0423' into amd-master
parents
56de337f
c5f1cdf7
Changes
364
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
282 additions
and
84 deletions
+282
-84
.github/CODEOWNERS
.github/CODEOWNERS
+1
-1
CMakeLists.txt
CMakeLists.txt
+19
-3
Jenkinsfile
Jenkinsfile
+6
-3
client_example/02_gemm_add_add_fastgelu/CMakeLists.txt
client_example/02_gemm_add_add_fastgelu/CMakeLists.txt
+18
-16
client_example/03_gemm_layernorm/CMakeLists.txt
client_example/03_gemm_layernorm/CMakeLists.txt
+6
-4
client_example/04_contraction/CMakeLists.txt
client_example/04_contraction/CMakeLists.txt
+12
-11
client_example/07_grouped_convnd_fwd/CMakeLists.txt
client_example/07_grouped_convnd_fwd/CMakeLists.txt
+6
-4
client_example/08_fused_attention/CMakeLists.txt
client_example/08_fused_attention/CMakeLists.txt
+6
-4
client_example/09_quantization/CMakeLists.txt
client_example/09_quantization/CMakeLists.txt
+15
-15
client_example/15_convnd_bwd_data/CMakeLists.txt
client_example/15_convnd_bwd_data/CMakeLists.txt
+6
-4
client_example/15_gemm_add_multiply/CMakeLists.txt
client_example/15_gemm_add_multiply/CMakeLists.txt
+4
-3
client_example/16_convnd_fwd/CMakeLists.txt
client_example/16_convnd_fwd/CMakeLists.txt
+13
-0
client_example/16_convnd_fwd/common.hpp
client_example/16_convnd_fwd/common.hpp
+5
-3
client_example/16_convnd_fwd/conv3d_fwd_bf8.cpp
client_example/16_convnd_fwd/conv3d_fwd_bf8.cpp
+46
-0
client_example/16_convnd_fwd/conv3d_fwd_bf8_fp8.cpp
client_example/16_convnd_fwd/conv3d_fwd_bf8_fp8.cpp
+50
-0
client_example/16_convnd_fwd/conv3d_fwd_fp8_bf8.cpp
client_example/16_convnd_fwd/conv3d_fwd_fp8_bf8.cpp
+50
-0
client_example/17_grouped_gemm_fastgelu/CMakeLists.txt
client_example/17_grouped_gemm_fastgelu/CMakeLists.txt
+4
-2
client_example/20_splitk_gemm/CMakeLists.txt
client_example/20_splitk_gemm/CMakeLists.txt
+1
-1
client_example/21_grouped_gemm_bias/CMakeLists.txt
client_example/21_grouped_gemm_bias/CMakeLists.txt
+4
-2
client_example/22_grouped_gemm/CMakeLists.txt
client_example/22_grouped_gemm/CMakeLists.txt
+10
-8
No files found.
.github/CODEOWNERS
View file @
6b9a4bd5
* @zjing14 @junliume @illsilin @carlushuang @aosewski
* @zjing14 @junliume @illsilin @carlushuang @aosewski
@yigex
# Documentation files
# Documentation files
docs/* @ROCm/rocm-documentation
docs/* @ROCm/rocm-documentation
*.md @ROCm/rocm-documentation
*.md @ROCm/rocm-documentation
...
...
CMakeLists.txt
View file @
6b9a4bd5
...
@@ -81,9 +81,6 @@ endif()
...
@@ -81,9 +81,6 @@ endif()
include
(
getopt
)
include
(
getopt
)
# CK config file to record supported datatypes, etc.
configure_file
(
include/ck/config.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/include/ck/config.h
)
# CK version file to record release version as well as git commit hash
# CK version file to record release version as well as git commit hash
find_package
(
Git REQUIRED
)
find_package
(
Git REQUIRED
)
execute_process
(
COMMAND
"
${
GIT_EXECUTABLE
}
"
rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process
(
COMMAND
"
${
GIT_EXECUTABLE
}
"
rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE
)
...
@@ -143,6 +140,25 @@ if(GPU_TARGETS)
...
@@ -143,6 +140,25 @@ if(GPU_TARGETS)
else
()
else
()
message
(
"Building CK for the following targets:
${
AMDGPU_TARGETS
}
"
)
message
(
"Building CK for the following targets:
${
AMDGPU_TARGETS
}
"
)
endif
()
endif
()
if
(
GPU_TARGETS
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_definitions
(
-DCK_USE_XDL
)
set
(
CK_USE_XDL
"ON"
)
endif
()
if
(
GPU_TARGETS MATCHES
"gfx11"
)
add_definitions
(
-DCK_USE_WMMA
)
set
(
CK_USE_WMMA
"ON"
)
endif
()
else
()
add_definitions
(
-DCK_USE_WMMA -DCK_USE_XDL
)
set
(
CK_USE_XDL
"ON"
)
set
(
CK_USE_WMMA
"ON"
)
endif
()
# CK config file to record supported datatypes, etc.
configure_file
(
include/ck/config.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/include/ck/config.h
)
find_package
(
hip
)
find_package
(
hip
)
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
# SWDEV-413293 and https://reviews.llvm.org/D155213
# SWDEV-413293 and https://reviews.llvm.org/D155213
...
...
Jenkinsfile
View file @
6b9a4bd5
...
@@ -619,6 +619,8 @@ def process_results(Map conf=[:]){
...
@@ -619,6 +619,8 @@ def process_results(Map conf=[:]){
dir
(
"script"
){
dir
(
"script"
){
if
(
params
.
RUN_FULL_QA
){
if
(
params
.
RUN_FULL_QA
){
// unstash perf files to master
// unstash perf files to master
unstash
"ckprofiler_0.2.0_amd64.deb"
sh
"sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
unstash
"perf_gemm.log"
unstash
"perf_gemm.log"
unstash
"perf_resnet50_N256.log"
unstash
"perf_resnet50_N256.log"
unstash
"perf_resnet50_N4.log"
unstash
"perf_resnet50_N4.log"
...
@@ -632,8 +634,6 @@ def process_results(Map conf=[:]){
...
@@ -632,8 +634,6 @@ def process_results(Map conf=[:]){
unstash
"perf_onnx_gemm.log"
unstash
"perf_onnx_gemm.log"
unstash
"perf_mixed_gemm.log"
unstash
"perf_mixed_gemm.log"
sh
"./process_qa_data.sh"
sh
"./process_qa_data.sh"
unstash
"ckprofiler_0.2.0_amd64.deb"
sh
"sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
}
}
else
{
else
{
// unstash perf files to master
// unstash perf files to master
...
@@ -645,10 +645,13 @@ def process_results(Map conf=[:]){
...
@@ -645,10 +645,13 @@ def process_results(Map conf=[:]){
}
}
}
}
catch
(
e
){
catch
(
e
){
echo
"
t
hrowing error exception while processing performance test results"
echo
"
T
hrowing error exception while processing performance test results"
echo
'Exception occurred: '
+
e
.
toString
()
echo
'Exception occurred: '
+
e
.
toString
()
throw
e
throw
e
}
}
finally
{
echo
"Finished processing performance test results"
}
}
}
}
}
}
}
...
...
client_example/02_gemm_add_add_fastgelu/CMakeLists.txt
View file @
6b9a4bd5
add_custom_target
(
client_gemm_fastgelu_examples
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_custom_target
(
client_gemm_fastgelu_examples
)
add_executable
(
client_gemm_add_add_fastgelu gemm_add_add_fastgelu.cpp
)
add_executable
(
client_gemm_add_add_fastgelu gemm_add_add_fastgelu.cpp
)
target_link_libraries
(
client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_gemm_add_fastgelu gemm_add_fastgelu.cpp
)
add_executable
(
client_gemm_add_fastgelu gemm_add_fastgelu.cpp
)
target_link_libraries
(
client_gemm_add_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_add_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_gemm_fastgelu gemm_fastgelu.cpp
)
add_executable
(
client_gemm_fastgelu gemm_fastgelu.cpp
)
target_link_libraries
(
client_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
add_dependencies
(
client_gemm_fastgelu_examples client_gemm_add_add_fastgelu client_gemm_add_fastgelu
add_dependencies
(
client_gemm_fastgelu_examples client_gemm_add_add_fastgelu client_gemm_add_fastgelu
client_gemm_fastgelu
)
client_gemm_fastgelu
)
add_custom_target
(
client_gemm_fastgelu_generic_examples
)
add_custom_target
(
client_gemm_fastgelu_generic_examples
)
add_executable
(
client_gemm_add_add_fastgelu_generic gemm_add_add_fastgelu_generic.cpp
)
add_executable
(
client_gemm_add_add_fastgelu_generic gemm_add_add_fastgelu_generic.cpp
)
target_link_libraries
(
client_gemm_add_add_fastgelu_generic composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_add_add_fastgelu_generic composable_kernel::device_gemm_operations
)
add_executable
(
client_gemm_add_fastgelu_generic gemm_add_fastgelu_generic.cpp
)
add_executable
(
client_gemm_add_fastgelu_generic gemm_add_fastgelu_generic.cpp
)
target_link_libraries
(
client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_gemm_fastgelu_generic gemm_fastgelu_generic.cpp
)
add_executable
(
client_gemm_fastgelu_generic gemm_fastgelu_generic.cpp
)
target_link_libraries
(
client_gemm_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations
)
add_dependencies
(
client_gemm_fastgelu_generic_examples client_gemm_add_add_fastgelu_generic
add_dependencies
(
client_gemm_fastgelu_generic_examples client_gemm_add_add_fastgelu_generic
client_gemm_add_fastgelu_generic client_gemm_fastgelu_generic
)
client_gemm_add_fastgelu_generic client_gemm_fastgelu_generic
)
endif
()
client_example/03_gemm_layernorm/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_gemm_add_add_layernorm_naive gemm_add_add_layernorm_naive.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations
)
add_executable
(
client_gemm_add_add_layernorm_naive gemm_add_add_layernorm_naive.cpp
)
target_link_libraries
(
client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations
)
add_executable
(
client_gemm_add_relu_add_layernorm_welford gemm_add_relu_add_layernorm_welford.cpp
)
add_executable
(
client_gemm_add_relu_add_layernorm_welford gemm_add_relu_add_layernorm_welford.cpp
)
target_link_libraries
(
client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations
)
target_link_libraries
(
client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations
)
endif
()
client_example/04_contraction/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_contraction_scale_fp32 contraction_scale_fp32.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_contraction_scale_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_contraction_scale_fp32 contraction_scale_fp32.cpp
)
target_link_libraries
(
client_contraction_scale_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_contraction_bilinear_fp32 contraction_bilinear_fp32.cpp
)
add_executable
(
client_contraction_bilinear_fp32 contraction_bilinear_fp32.cpp
)
target_link_libraries
(
client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_contraction_scale_fp64 contraction_scale_fp64.cpp
)
add_executable
(
client_contraction_scale_fp64 contraction_scale_fp64.cpp
)
target_link_libraries
(
client_contraction_scale_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_contraction_scale_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_contraction_bilinear_fp64 contraction_bilinear_fp64.cpp
)
add_executable
(
client_contraction_bilinear_fp64 contraction_bilinear_fp64.cpp
)
target_link_libraries
(
client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
add_executable
(
contraction_g1m2n3k1_add_xdl_fp16 contraction_g1m2n3k1_add_xdl_fp16.cpp
)
target_link_libraries
(
contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
add_executable
(
contraction_g1m2n3k1_add_xdl_fp16 contraction_g1m2n3k1_add_xdl_fp16.cpp
)
target_link_libraries
(
contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
endif
()
client_example/07_grouped_convnd_fwd/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_grouped_conv2d_fwd grouped_conv2d_fwd.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_grouped_conv2d_fwd PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_grouped_conv2d_fwd grouped_conv2d_fwd.cpp
)
target_link_libraries
(
client_grouped_conv2d_fwd PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp
)
add_executable
(
client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp
)
target_link_libraries
(
client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations
)
endif
()
\ No newline at end of file
client_example/08_fused_attention/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_fused_attention fused_attention.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_fused_attention PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_fused_attention fused_attention.cpp
)
target_link_libraries
(
client_fused_attention PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_fused_attention_bias fused_attention_bias.cpp
)
add_executable
(
client_fused_attention_bias fused_attention_bias.cpp
)
target_link_libraries
(
client_fused_attention_bias PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_fused_attention_bias PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
endif
()
client_example/09_quantization/CMakeLists.txt
View file @
6b9a4bd5
if
(
DTYPES MATCHES
"int8"
OR NOT DEFINED DTYPES
)
if
(
GPU_TARGETS MATCHES
"gfx9"
AND
(
DTYPES MATCHES
"int8"
OR NOT DEFINED DTYPES
)
)
add_executable
(
client_conv2d_fwd_bias_tanh_perchannel_quantization conv2d_fwd_bias_tanh_perchannel_quantization.cpp
)
add_executable
(
client_conv2d_fwd_bias_tanh_perchannel_quantization conv2d_fwd_bias_tanh_perchannel_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_conv2d_fwd_bias_relu_perchannel_quantization conv2d_fwd_bias_relu_perchannel_quantization.cpp
)
add_executable
(
client_conv2d_fwd_bias_relu_perchannel_quantization conv2d_fwd_bias_relu_perchannel_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_conv2d_fwd_bias_tanh_perlayer_quantization conv2d_fwd_bias_tanh_perlayer_quantization.cpp
)
add_executable
(
client_conv2d_fwd_bias_tanh_perlayer_quantization conv2d_fwd_bias_tanh_perlayer_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_conv2d_fwd_bias_relu_perlayer_quantization conv2d_fwd_bias_relu_perlayer_quantization.cpp
)
add_executable
(
client_conv2d_fwd_bias_relu_perlayer_quantization conv2d_fwd_bias_relu_perlayer_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_conv2d_fwd_perchannel_quantization conv2d_fwd_perchannel_quantization.cpp
)
add_executable
(
client_conv2d_fwd_perchannel_quantization conv2d_fwd_perchannel_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_conv2d_fwd_perlayer_quantization conv2d_fwd_perlayer_quantization.cpp
)
add_executable
(
client_conv2d_fwd_perlayer_quantization conv2d_fwd_perlayer_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
add_executable
(
client_gemm_quantization gemm_quantization.cpp
)
add_executable
(
client_gemm_quantization gemm_quantization.cpp
)
target_link_libraries
(
client_gemm_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
endif
()
endif
()
client_example/15_convnd_bwd_data/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_conv3d_bwd_data_fp16 conv3d_bwd_data_fp16.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_executable
(
client_conv3d_bwd_data_fp32 conv3d_bwd_data_fp32.cpp
)
add_executable
(
client_conv3d_bwd_data_fp16 conv3d_bwd_data_fp16.cpp
)
add_executable
(
client_conv3d_bwd_data_fp32 conv3d_bwd_data_fp32.cpp
)
target_link_libraries
(
client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_conv_operations
)
endif
()
client_example/15_gemm_add_multiply/CMakeLists.txt
View file @
6b9a4bd5
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_executable
(
client_gemm_add_multiply gemm_add_multiply.cpp
)
add_executable
(
client_gemm_add_multiply gemm_add_multiply.cpp
)
target_link_libraries
(
client_gemm_add_multiply PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_gemm_add_multiply PRIVATE composable_kernel::device_gemm_operations
)
\ No newline at end of file
endif
()
client_example/16_convnd_fwd/CMakeLists.txt
View file @
6b9a4bd5
...
@@ -12,6 +12,19 @@ if((DTYPES MATCHES "fp8") OR NOT DEFINED DTYPES)
...
@@ -12,6 +12,19 @@ if((DTYPES MATCHES "fp8") OR NOT DEFINED DTYPES)
target_link_libraries
(
client_conv3d_fwd_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_fp8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
endif
()
if
((
DTYPES MATCHES
"bf8"
)
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv3d_fwd_bf8 conv3d_fwd_bf8.cpp
)
target_link_libraries
(
client_conv3d_fwd_bf8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"bf8"
)
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv3d_fwd_fp8_bf8 conv3d_fwd_fp8_bf8.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp8_bf8 PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_conv3d_fwd_bf8_fp8 conv3d_fwd_bf8_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
if
((
DTYPES MATCHES
"fp32"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp32"
)
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp
)
add_executable
(
client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_conv_operations
)
...
...
client_example/16_convnd_fwd/common.hpp
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <cstdlib>
#include <iomanip>
#include <iomanip>
...
@@ -95,7 +95,8 @@ template <ck::index_t NumDimSpatial,
...
@@ -95,7 +95,8 @@ template <ck::index_t NumDimSpatial,
typename
WeiLayout
,
typename
WeiLayout
,
typename
OutLayout
,
typename
OutLayout
,
ck
::
index_t
NumNonSpatialDim
=
3
,
ck
::
index_t
NumNonSpatialDim
=
3
,
typename
ComputeType
=
InDataType
>
typename
AComputeType
=
InDataType
,
typename
BComputeType
=
AComputeType
>
bool
run_grouped_conv_fwd
(
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
in_lengths
,
bool
run_grouped_conv_fwd
(
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
in_lengths
,
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
wei_lengths
,
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
wei_lengths
,
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
out_lengths
)
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
out_lengths
)
...
@@ -186,7 +187,8 @@ bool run_grouped_conv_fwd(std::array<ck::index_t, NumDimSpatial + NumNonSpatialD
...
@@ -186,7 +187,8 @@ bool run_grouped_conv_fwd(std::array<ck::index_t, NumDimSpatial + NumNonSpatialD
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
ComputeType
>
;
AComputeType
,
BComputeType
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/16_convnd_fwd/conv3d_fwd_bf8.cpp
0 → 100644
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
using
InDataType
=
ck
::
bf8_t
;
using
WeiDataType
=
ck
::
bf8_t
;
using
OutDataType
=
ck
::
f8_t
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGK
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
3
;
static
constexpr
ck
::
index_t
G
=
1
;
static
constexpr
ck
::
index_t
N
=
64
;
static
constexpr
ck
::
index_t
K
=
128
;
static
constexpr
ck
::
index_t
C
=
64
;
static
constexpr
ck
::
index_t
Z
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Di
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
3
;
static
constexpr
ck
::
index_t
Do
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
3
;
int
main
()
{
return
run_grouped_conv_fwd
<
NumDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InLayout
,
WeiLayout
,
OutLayout
,
3
,
ck
::
bf8_t
>
(
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
}
client_example/16_convnd_fwd/conv3d_fwd_bf8_fp8.cpp
0 → 100644
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
using
InDataType
=
ck
::
bf8_t
;
using
WeiDataType
=
ck
::
f8_t
;
using
OutDataType
=
ck
::
f8_t
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGK
;
using
AComputeType
=
ck
::
bf8_t
;
using
BComputeType
=
ck
::
f8_t
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
3
;
static
constexpr
ck
::
index_t
G
=
1
;
static
constexpr
ck
::
index_t
N
=
64
;
static
constexpr
ck
::
index_t
K
=
128
;
static
constexpr
ck
::
index_t
C
=
64
;
static
constexpr
ck
::
index_t
Z
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Di
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
3
;
static
constexpr
ck
::
index_t
Do
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
3
;
int
main
()
{
return
run_grouped_conv_fwd
<
NumDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InLayout
,
WeiLayout
,
OutLayout
,
3
,
AComputeType
,
BComputeType
>
(
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
}
client_example/16_convnd_fwd/conv3d_fwd_fp8_bf8.cpp
0 → 100644
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
using
InDataType
=
ck
::
f8_t
;
using
WeiDataType
=
ck
::
bf8_t
;
using
OutDataType
=
ck
::
f8_t
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGK
;
using
AComputeType
=
ck
::
f8_t
;
using
BComputeType
=
ck
::
bf8_t
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
3
;
static
constexpr
ck
::
index_t
G
=
1
;
static
constexpr
ck
::
index_t
N
=
64
;
static
constexpr
ck
::
index_t
K
=
128
;
static
constexpr
ck
::
index_t
C
=
64
;
static
constexpr
ck
::
index_t
Z
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Di
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
3
;
static
constexpr
ck
::
index_t
Do
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
3
;
int
main
()
{
return
run_grouped_conv_fwd
<
NumDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InLayout
,
WeiLayout
,
OutLayout
,
3
,
AComputeType
,
BComputeType
>
(
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
}
client_example/17_grouped_gemm_fastgelu/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_grouped_gemm_fastgelu grouped_gemm_fastgelu.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_grouped_gemm_fastgelu grouped_gemm_fastgelu.cpp
)
\ No newline at end of file
target_link_libraries
(
client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
endif
()
client_example/20_splitk_gemm/CMakeLists.txt
View file @
6b9a4bd5
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"fp16"
)
OR NOT DEFINED DTYPES
)
if
(
GPU_TARGETS MATCHES
"gfx9"
AND
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"fp16"
)
OR NOT DEFINED DTYPES
)
)
add_executable
(
client_splitK_gemm splitK_gemm_fp16_f8.cpp
)
add_executable
(
client_splitK_gemm splitK_gemm_fp16_f8.cpp
)
target_link_libraries
(
client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations
)
endif
()
endif
()
client_example/21_grouped_gemm_bias/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_grouped_gemm_fixed_nk_bias_fp16 grouped_gemm_fixed_nk_bias_fp16.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_bias_fp16 PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_grouped_gemm_fixed_nk_bias_fp16 grouped_gemm_fixed_nk_bias_fp16.cpp
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_bias_fp16 PRIVATE composable_kernel::device_gemm_operations
)
endif
()
client_example/22_grouped_gemm/CMakeLists.txt
View file @
6b9a4bd5
add_executable
(
client_grouped_gemm_fixed_nk_fp16 grouped_gemm_fixed_nk_fp16.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_fp16 PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_grouped_gemm_fixed_nk_fp16 grouped_gemm_fixed_nk_fp16.cpp
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_fp16 PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_grouped_gemm_fixed_nk_fp8 grouped_gemm_fixed_nk_fp8.cpp
)
add_executable
(
client_grouped_gemm_fixed_nk_fp8 grouped_gemm_fixed_nk_fp8.cpp
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_fp8 PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_fp8 PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_grouped_gemm_fixed_nk_i8 grouped_gemm_fixed_nk_i8.cpp
)
add_executable
(
client_grouped_gemm_fixed_nk_i8 grouped_gemm_fixed_nk_i8.cpp
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_i8 PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_i8 PRIVATE composable_kernel::device_gemm_operations
)
add_executable
(
client_grouped_gemm_fixed_nk_bf16 grouped_gemm_fixed_nk_bf16.cpp
)
add_executable
(
client_grouped_gemm_fixed_nk_bf16 grouped_gemm_fixed_nk_bf16.cpp
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_bf16 PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_grouped_gemm_fixed_nk_bf16 PRIVATE composable_kernel::device_gemm_operations
)
endif
()
Prev
1
2
3
4
5
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment