Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
dda18da0
Unverified
Commit
dda18da0
authored
Oct 14, 2024
by
Illia Silin
Committed by
GitHub
Oct 14, 2024
Browse files
Merge branch 'develop' into ck_migraphx_integration
parents
3b2a7aee
4cf70b36
Changes
86
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
893 additions
and
20 deletions
+893
-20
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+6
-6
script/cmake-ck-dev.sh
script/cmake-ck-dev.sh
+2
-1
script/cmake-ck-release.sh
script/cmake-ck-release.sh
+2
-1
test/CMakeLists.txt
test/CMakeLists.txt
+4
-12
test/data_type/CMakeLists.txt
test/data_type/CMakeLists.txt
+5
-0
test/data_type/test_custom_type.cpp
test/data_type/test_custom_type.cpp
+874
-0
No files found.
profiler/src/CMakeLists.txt
View file @
dda18da0
...
@@ -24,7 +24,7 @@ set(PROFILER_SOURCES
...
@@ -24,7 +24,7 @@ set(PROFILER_SOURCES
profile_permute_scale.cpp
profile_permute_scale.cpp
)
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx9"
)
if
(
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
OR NOT DEFINED DTYPES
)
if
(
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
OR NOT DEFINED DTYPES
)
list
(
APPEND PROFILER_SOURCES profile_contraction_bilinear.cpp
)
list
(
APPEND PROFILER_SOURCES profile_contraction_bilinear.cpp
)
list
(
APPEND PROFILER_SOURCES profile_contraction_scale.cpp
)
list
(
APPEND PROFILER_SOURCES profile_contraction_scale.cpp
)
...
@@ -49,7 +49,7 @@ if(GPU_TARGETS MATCHES "gfx9")
...
@@ -49,7 +49,7 @@ if(GPU_TARGETS MATCHES "gfx9")
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp
)
endif
()
endif
()
list
(
APPEND PROFILER_SOURCES profile_gemm_multiply_add.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_multiply_add.cpp
)
if
(
GPU_TARGETS MATCHES
"gfx94"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx94"
)
list
(
APPEND PROFILER_SOURCES profile_gemm_multiply_multiply.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_multiply_multiply.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_ab_scale.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_ab_scale.cpp
)
endif
()
endif
()
...
@@ -69,7 +69,7 @@ if(GPU_TARGETS MATCHES "gfx9")
...
@@ -69,7 +69,7 @@ if(GPU_TARGETS MATCHES "gfx9")
endif
()
endif
()
if
(
GPU_TARGETS MATCHES
"gfx11"
OR GPU_TARGETS MATCHES
"gfx12"
OR GPU_TARGETS MATCHES
"gfx9"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx11"
OR
SUPPORTED_
GPU_TARGETS MATCHES
"gfx12"
OR
SUPPORTED_
GPU_TARGETS MATCHES
"gfx9"
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
list
(
APPEND PROFILER_SOURCES profile_gemm_bilinear.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_bilinear.cpp
)
endif
()
endif
()
...
@@ -111,7 +111,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_inst
...
@@ -111,7 +111,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_inst
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_transpose_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_transpose_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_permute_scale_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_permute_scale_instance
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx9"
)
if
(
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
OR NOT DEFINED DTYPES
)
if
(
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
OR NOT DEFINED DTYPES
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_contraction_bilinear_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_contraction_bilinear_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_contraction_scale_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_contraction_scale_instance
)
...
@@ -135,7 +135,7 @@ if(GPU_TARGETS MATCHES "gfx9")
...
@@ -135,7 +135,7 @@ if(GPU_TARGETS MATCHES "gfx9")
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_batched_gemm_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_batched_gemm_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_batched_gemm_reduce_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_batched_gemm_reduce_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_multiply_add_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_multiply_add_instance
)
if
(
GPU_TARGETS MATCHES
"gfx94"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx94"
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_multiply_multiply_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_multiply_multiply_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_ab_scale_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_ab_scale_instance
)
endif
()
endif
()
...
@@ -159,7 +159,7 @@ if(GPU_TARGETS MATCHES "gfx9")
...
@@ -159,7 +159,7 @@ if(GPU_TARGETS MATCHES "gfx9")
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_grouped_conv3d_fwd_convinvscale_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_grouped_conv3d_fwd_convinvscale_instance
)
endif
()
endif
()
if
(
GPU_TARGETS MATCHES
"gfx9"
OR GPU_TARGETS MATCHES
"gfx11"
OR GPU_TARGETS MATCHES
"gfx12"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx9"
OR
SUPPORTED_
GPU_TARGETS MATCHES
"gfx11"
OR
SUPPORTED_
GPU_TARGETS MATCHES
"gfx12"
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_bilinear_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_bilinear_instance
)
endif
()
endif
()
...
...
script/cmake-ck-dev.sh
View file @
dda18da0
...
@@ -7,7 +7,8 @@ MY_PROJECT_SOURCE=$1
...
@@ -7,7 +7,8 @@ MY_PROJECT_SOURCE=$1
if
[
$#
-ge
2
]
;
then
if
[
$#
-ge
2
]
;
then
GPU_TARGETS
=
$2
GPU_TARGETS
=
$2
REST_ARGS
=
${
@
:3
}
shift
2
REST_ARGS
=
$@
else
else
GPU_TARGETS
=
"gfx908;gfx90a;gfx940"
GPU_TARGETS
=
"gfx908;gfx90a;gfx940"
REST_ARGS
=
REST_ARGS
=
...
...
script/cmake-ck-release.sh
View file @
dda18da0
...
@@ -7,7 +7,8 @@ MY_PROJECT_SOURCE=$1
...
@@ -7,7 +7,8 @@ MY_PROJECT_SOURCE=$1
if
[
$#
-ge
2
]
;
then
if
[
$#
-ge
2
]
;
then
GPU_TARGETS
=
$2
GPU_TARGETS
=
$2
REST_ARGS
=
${
@
:3
}
shift
2
REST_ARGS
=
$@
else
else
GPU_TARGETS
=
"gfx908;gfx90a;gfx940"
GPU_TARGETS
=
"gfx908;gfx90a;gfx940"
REST_ARGS
=
REST_ARGS
=
...
...
test/CMakeLists.txt
View file @
dda18da0
...
@@ -41,11 +41,7 @@ function(add_test_executable TEST_NAME)
...
@@ -41,11 +41,7 @@ function(add_test_executable TEST_NAME)
endforeach
()
endforeach
()
endif
()
endif
()
if
(
INSTANCES_ONLY
)
set
(
TEST_TARGETS
${
SUPPORTED_GPU_TARGETS
}
)
set
(
TEST_TARGETS
${
DEFAULT_GPU_TARGETS
}
)
else
()
set
(
TEST_TARGETS
${
GPU_TARGETS
}
)
endif
()
foreach
(
source IN LISTS ARGN
)
foreach
(
source IN LISTS ARGN
)
if
(
NOT DEFINED DL_KERNELS AND source MATCHES
"_dl"
)
if
(
NOT DEFINED DL_KERNELS AND source MATCHES
"_dl"
)
...
@@ -122,11 +118,7 @@ function(add_gtest_executable TEST_NAME)
...
@@ -122,11 +118,7 @@ function(add_gtest_executable TEST_NAME)
endforeach
()
endforeach
()
endif
()
endif
()
if
(
INSTANCES_ONLY
)
set
(
TEST_TARGETS
${
SUPPORTED_GPU_TARGETS
}
)
set
(
TEST_TARGETS
${
DEFAULT_GPU_TARGETS
}
)
else
()
set
(
TEST_TARGETS
${
GPU_TARGETS
}
)
endif
()
foreach
(
source IN LISTS ARGN
)
foreach
(
source IN LISTS ARGN
)
if
(
NOT DEFINED DL_KERNELS AND source MATCHES
"_dl"
)
if
(
NOT DEFINED DL_KERNELS AND source MATCHES
"_dl"
)
...
@@ -211,10 +203,10 @@ add_subdirectory(conv_tensor_rearrange)
...
@@ -211,10 +203,10 @@ add_subdirectory(conv_tensor_rearrange)
add_subdirectory
(
transpose
)
add_subdirectory
(
transpose
)
add_subdirectory
(
permute_scale
)
add_subdirectory
(
permute_scale
)
add_subdirectory
(
wrapper
)
add_subdirectory
(
wrapper
)
if
(
GPU_TARGETS MATCHES
"gfx11"
)
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx11"
)
add_subdirectory
(
wmma_op
)
add_subdirectory
(
wmma_op
)
endif
()
endif
()
if
(
GPU_TARGETS MATCHES
"gfx942"
AND CK_HIP_VERSION_MAJOR GREATER_EQUAL 6 AND CK_HIP_VERSION_MINOR GREATER_EQUAL 2
)
# smfmac needs ROCm6.2
if
(
SUPPORTED_
GPU_TARGETS MATCHES
"gfx942"
AND CK_HIP_VERSION_MAJOR GREATER_EQUAL 6 AND CK_HIP_VERSION_MINOR GREATER_EQUAL 2
)
# smfmac needs ROCm6.2
add_subdirectory
(
smfmac_op
)
add_subdirectory
(
smfmac_op
)
endif
()
endif
()
add_subdirectory
(
position_embedding
)
add_subdirectory
(
position_embedding
)
test/data_type/CMakeLists.txt
View file @
dda18da0
...
@@ -18,4 +18,9 @@ if(result EQUAL 0)
...
@@ -18,4 +18,9 @@ if(result EQUAL 0)
target_link_libraries
(
test_bf8 PRIVATE utility
)
target_link_libraries
(
test_bf8 PRIVATE utility
)
endif
()
endif
()
add_gtest_executable
(
test_custom_type test_custom_type.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_custom_type PRIVATE utility
)
endif
()
add_gtest_executable
(
test_type_convert_const type_convert_const.cpp
)
add_gtest_executable
(
test_type_convert_const type_convert_const.cpp
)
test/data_type/test_custom_type.cpp
0 → 100644
View file @
dda18da0
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment