Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ce9d7c8d
Commit
ce9d7c8d
authored
Oct 11, 2023
by
Artur Wojcik
Browse files
enable compilation on Windows
parent
a4f72a31
Changes
36
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
61 additions
and
45 deletions
+61
-45
example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp
example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp
+1
-1
example/34_batchnorm/batchnorm_backward_nhwc.cpp
example/34_batchnorm/batchnorm_backward_nhwc.cpp
+3
-3
example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp
example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp
+3
-3
example/34_batchnorm/batchnorm_forward_training_nhwc.cpp
example/34_batchnorm/batchnorm_forward_training_nhwc.cpp
+6
-6
example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp
...34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp
+6
-6
example/34_batchnorm/batchnorm_infer_impl.hpp
example/34_batchnorm/batchnorm_infer_impl.hpp
+2
-2
example/CMakeLists.txt
example/CMakeLists.txt
+4
-4
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp
.../grid/gridwise_elementwise_layernorm_welford_variance.hpp
+2
-2
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
+1
-1
library/CMakeLists.txt
library/CMakeLists.txt
+3
-0
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+5
-1
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
.../src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
+1
-3
library/src/utility/CMakeLists.txt
library/src/utility/CMakeLists.txt
+10
-6
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+1
-1
test/CMakeLists.txt
test/CMakeLists.txt
+8
-6
No files found.
example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp
View file @
ce9d7c8d
...
...
@@ -79,7 +79,7 @@ std::ostream& show_2d_matrix(std::ostream& os, Tensor<DataType>& matrix)
}
int
main
(
int
argc
,
char
*
argv
[])
{
bool
do_verification
=
0
;
bool
do_verification
=
false
;
int
init_method
=
0
;
bool
time_kernel
=
false
;
...
...
example/34_batchnorm/batchnorm_backward_nhwc.cpp
View file @
ce9d7c8d
...
...
@@ -112,7 +112,7 @@ bool bnorm_bwd_nhwc_test(bool do_verification,
bool
time_kernel
,
const
std
::
vector
<
size_t
>
inOutLengths
,
bool
haveSavedMeanInvVar
,
double
epsilon
)
double
_
epsilon
)
{
// for NHWC BatchNorm calculation of mean and meansquare
constexpr
index_t
Rank
=
4
;
...
...
@@ -292,7 +292,7 @@ bool bnorm_bwd_nhwc_test(bool do_verification,
bnScale_dev
.
GetDeviceBuffer
(),
haveSavedMeanInvVar
?
savedMean_dev
.
GetDeviceBuffer
()
:
nullptr
,
haveSavedMeanInvVar
?
savedInvVar_dev
.
GetDeviceBuffer
()
:
nullptr
,
epsilon
,
_
epsilon
,
PassThroughOp
{},
dx_dev
.
GetDeviceBuffer
(),
dscale_dev
.
GetDeviceBuffer
(),
...
...
@@ -371,7 +371,7 @@ bool bnorm_bwd_nhwc_test(bool do_verification,
bnScale
.
mData
.
data
(),
haveSavedMeanInvVar
?
savedMean
.
mData
.
data
()
:
nullptr
,
haveSavedMeanInvVar
?
savedInvVar
.
mData
.
data
()
:
nullptr
,
epsilon
,
_
epsilon
,
PassThroughOp
{},
dx_ref
.
mData
.
data
(),
dscale_ref
.
mData
.
data
(),
...
...
example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp
View file @
ce9d7c8d
...
...
@@ -119,7 +119,7 @@ bool bnorm_infer_nhwc_test(bool do_verification,
int
init_method
,
bool
time_kernel
,
const
std
::
vector
<
size_t
>
inOutLengths
,
double
epsilon
)
double
_
epsilon
)
{
// for NHWC BatchNorm calculation of mean and meansquare
constexpr
int
Rank
=
4
;
...
...
@@ -251,7 +251,7 @@ bool bnorm_infer_nhwc_test(bool do_verification,
x_dev
.
GetDeviceBuffer
(),
bnScale_dev
.
GetDeviceBuffer
(),
bnBias_dev
.
GetDeviceBuffer
(),
epsilon
,
_
epsilon
,
estimatedMean_dev
.
GetDeviceBuffer
(),
estimatedVariance_dev
.
GetDeviceBuffer
(),
y_dev
.
GetDeviceBuffer
());
...
...
@@ -289,7 +289,7 @@ bool bnorm_infer_nhwc_test(bool do_verification,
x
.
mData
.
data
(),
bnScale
.
mData
.
data
(),
bnBias
.
mData
.
data
(),
epsilon
,
_
epsilon
,
PassThroughOp
{},
estimatedMean
.
mData
.
data
(),
estimatedVariance
.
mData
.
data
(),
...
...
example/34_batchnorm/batchnorm_forward_training_nhwc.cpp
View file @
ce9d7c8d
...
...
@@ -135,8 +135,8 @@ bool bnorm_fwd_nhwc_test(bool do_verification,
const
std
::
vector
<
size_t
>
inOutLengths
,
bool
updateMovingAverage
,
bool
saveMeanAndInvVariance
,
double
averageFactor
,
double
epsilon
)
double
_
averageFactor
,
double
_
epsilon
)
{
// for NHWC BatchNorm calculation of mean and meansquare
constexpr
int
Rank
=
4
;
...
...
@@ -310,12 +310,12 @@ bool bnorm_fwd_nhwc_test(bool do_verification,
x_dev
.
GetDeviceBuffer
(),
bnScale_dev
.
GetDeviceBuffer
(),
bnBias_dev
.
GetDeviceBuffer
(),
epsilon
,
_
epsilon
,
PassThroughOp
{},
y_dev
.
GetDeviceBuffer
(),
saveMeanAndInvVariance
?
resultSaveMean_dev
.
GetDeviceBuffer
()
:
nullptr
,
saveMeanAndInvVariance
?
resultSaveInvVariance_dev
.
GetDeviceBuffer
()
:
nullptr
,
averageFactor
,
_
averageFactor
,
updateMovingAverage
?
resultRunningMean_dev
.
GetDeviceBuffer
()
:
nullptr
,
updateMovingAverage
?
resultRunningVariance_dev
.
GetDeviceBuffer
()
:
nullptr
);
...
...
@@ -392,12 +392,12 @@ bool bnorm_fwd_nhwc_test(bool do_verification,
x
.
mData
.
data
(),
bnScale
.
mData
.
data
(),
bnBias
.
mData
.
data
(),
epsilon
,
_
epsilon
,
PassThroughOp
{},
y_ref
.
mData
.
data
(),
saveMeanAndInvVariance
?
resultSaveMean_ref
.
mData
.
data
()
:
nullptr
,
saveMeanAndInvVariance
?
resultSaveInvVariance_ref
.
mData
.
data
()
:
nullptr
,
averageFactor
,
_
averageFactor
,
updateMovingAverage
?
resultRunningMean_ref
.
mData
.
data
()
:
nullptr
,
updateMovingAverage
?
resultRunningVariance_ref
.
mData
.
data
()
:
nullptr
);
...
...
example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp
View file @
ce9d7c8d
...
...
@@ -135,8 +135,8 @@ bool bnorm_fwd_nhwc_test(bool do_verification,
const
std
::
vector
<
size_t
>
inOutLengths
,
bool
updateMovingAverage
,
bool
saveMeanAndInvVariance
,
double
averageFactor
,
double
epsilon
)
double
_
averageFactor
,
double
_
epsilon
)
{
// for NHWC BatchNorm calculation of mean and meansquare
constexpr
int
Rank
=
4
;
...
...
@@ -310,12 +310,12 @@ bool bnorm_fwd_nhwc_test(bool do_verification,
x_dev
.
GetDeviceBuffer
(),
bnScale_dev
.
GetDeviceBuffer
(),
bnBias_dev
.
GetDeviceBuffer
(),
epsilon
,
_
epsilon
,
PassThroughOp
{},
y_dev
.
GetDeviceBuffer
(),
saveMeanAndInvVariance
?
resultSaveMean_dev
.
GetDeviceBuffer
()
:
nullptr
,
saveMeanAndInvVariance
?
resultSaveInvVariance_dev
.
GetDeviceBuffer
()
:
nullptr
,
averageFactor
,
_
averageFactor
,
updateMovingAverage
?
resultRunningMean_dev
.
GetDeviceBuffer
()
:
nullptr
,
updateMovingAverage
?
resultRunningVariance_dev
.
GetDeviceBuffer
()
:
nullptr
);
...
...
@@ -392,12 +392,12 @@ bool bnorm_fwd_nhwc_test(bool do_verification,
x
.
mData
.
data
(),
bnScale
.
mData
.
data
(),
bnBias
.
mData
.
data
(),
epsilon
,
_
epsilon
,
PassThroughOp
{},
y_ref
.
mData
.
data
(),
saveMeanAndInvVariance
?
resultSaveMean_ref
.
mData
.
data
()
:
nullptr
,
saveMeanAndInvVariance
?
resultSaveInvVariance_ref
.
mData
.
data
()
:
nullptr
,
averageFactor
,
_
averageFactor
,
updateMovingAverage
?
resultRunningMean_ref
.
mData
.
data
()
:
nullptr
,
updateMovingAverage
?
resultRunningVariance_ref
.
mData
.
data
()
:
nullptr
);
...
...
example/34_batchnorm/batchnorm_infer_impl.hpp
View file @
ce9d7c8d
...
...
@@ -36,7 +36,7 @@ int bnorm_infer(
const
void
*
p_x
,
const
void
*
p_scale
,
const
void
*
p_bias
,
double
epsilon
,
double
_
epsilon
,
const
void
*
p_estimatedMean
,
const
void
*
p_estimatedVariance
,
void
*
p_y
)
...
...
@@ -101,7 +101,7 @@ int bnorm_infer(
{
yStrides
},
{
p_x
,
p_estimatedMean
,
p_estimatedVariance
,
p_scale
,
p_bias
},
{
p_y
},
NormalizeInInfer
{
epsilon
});
NormalizeInInfer
{
_
epsilon
});
if
(
!
dev_normalize
.
IsSupportedArgument
(
argument_ptr1
.
get
()))
{
...
...
example/CMakeLists.txt
View file @
ce9d7c8d
...
...
@@ -51,7 +51,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
#only continue if there are some source files left on the list
if
(
FILE_NAME
)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE utility
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE utility
getopt::getopt
)
add_test
(
NAME
${
EXAMPLE_NAME
}
COMMAND $<TARGET_FILE:
${
EXAMPLE_NAME
}
>
${
ARGN
}
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
add_dependencies
(
check
${
EXAMPLE_NAME
}
)
...
...
@@ -60,7 +60,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
endif
()
#message("add_example returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_example_executable EXAMPLE_NAME
)
endfunction
()
function
(
add_example_executable_no_testing EXAMPLE_NAME FILE_NAME
)
message
(
"adding example
${
EXAMPLE_NAME
}
"
)
...
...
@@ -108,14 +108,14 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
#only continue if there are some source files left on the list
if
(
FILE_NAME
)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE utility
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE utility
getopt::getopt
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
rocm_install
(
TARGETS
${
EXAMPLE_NAME
}
COMPONENT examples
)
set
(
result 0
)
endif
()
#message("add_example returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_example_executable_no_testing EXAMPLE_NAME
)
endfunction
()
# add all example subdir
file
(
GLOB dir_list LIST_DIRECTORIES true *
)
...
...
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
ce9d7c8d
...
...
@@ -111,6 +111,11 @@ struct PassThrough
{
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
int4_t
,
int
>
(
int4_t
&
y
,
const
int
&
x
)
const
{
y
=
type_convert
<
int4_t
>
(
x
);
}
#endif
#if defined CK_ENABLE_FP8
...
...
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp
View file @
ce9d7c8d
...
...
@@ -119,7 +119,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
index_t
num_k_block_tile_iteration
,
AccDataType
epsilon
,
const
InDataTypePointerTuple
p_in_global_tuple
,
XDataType
*
const
__restrict__
p_x_lds
,
XDataType
*
const
__restrict__
_
p_x_lds
,
const
GammaDataType
*
const
__restrict__
p_gamma_global
,
const
BetaDataType
*
const
__restrict__
p_beta_global
,
YDataType
*
const
__restrict__
p_y_global
,
...
...
@@ -149,7 +149,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
p_y_global
,
y_grid_desc_m_k
.
GetElementSpaceSize
());
auto
x_lds_val_buf
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Lds
>
(
p_x_lds
,
x_grid_desc_m_k
.
GetElementSpaceSize
()
/
grid_size
);
_
p_x_lds
,
x_grid_desc_m_k
.
GetElementSpaceSize
()
/
grid_size
);
auto
in_thread_buf_tuple
=
generate_tuple
(
[
&
](
auto
)
{
...
...
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
View file @
ce9d7c8d
...
...
@@ -328,7 +328,7 @@ struct WmmaSelector
}
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
template
<
>
static
constexpr
auto
GetWmma
<
int4_t
,
int
,
16
,
16
>
()
static
constexpr
auto
GetWmma
<
int4_t
,
int4_t
,
int
,
16
,
16
>
()
{
return
WmmaInstr
::
wmma_i32_16x16x16_iu4
;
}
...
...
library/CMakeLists.txt
View file @
ce9d7c8d
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
add_subdirectory
(
src/tensor_operation_instance/gpu
)
add_subdirectory
(
src/utility
)
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
ce9d7c8d
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
function
(
add_instance_library INSTANCE_NAME
)
message
(
"adding instance
${
INSTANCE_NAME
}
"
)
set
(
result 1
)
...
...
@@ -128,7 +131,6 @@ set(DEV_OPS_INC_DIRS
${
PROJECT_SOURCE_DIR
}
/library/include/ck/
)
target_compile_features
(
device_operations PUBLIC
)
set_target_properties
(
device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
device_operations PUBLIC
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>
...
...
@@ -154,6 +156,8 @@ target_include_directories(device_operations PUBLIC
target_compile_options
(
device_operations PRIVATE
--offload-arch=gfx908
--offload-arch=gfx90a
--offload-arch=gfx1030
--offload-arch=gfx1100
)
# install(TARGETS device_operations LIBRARY DESTINATION lib)
...
...
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
View file @
ce9d7c8d
set
(
DEVICE_SOFTMAX_INSTANCES
)
list
(
APPEND DEVICE_SOFTMAX_INSTANCES
add_instance_library
(
device_softmax_instance
device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp
...
...
@@ -14,4 +13,3 @@ list(APPEND DEVICE_SOFTMAX_INSTANCES
device_softmax_f32_f32_instance_rank4_reduce2.cpp
device_softmax_f32_f32_instance_rank4_reduce3.cpp
device_softmax_f32_f32_instance_rank4_reduce4.cpp
)
add_instance_library
(
device_softmax_instance
${
DEVICE_SOFTMAX_INSTANCES
}
)
library/src/utility/CMakeLists.txt
View file @
ce9d7c8d
## utility
set
(
UTILITY_SOURCE
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
add_library
(
utility STATIC
device_memory.cpp
host_tensor.cpp
convolution_parameter.cpp
)
add_library
(
utility STATIC
${
UTILITY_SOURCE
}
)
add_library
(
composable_kernel::utility ALIAS utility
)
set_target_properties
(
utility PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_compile_options
(
utility PRIVATE
${
CMAKE_COMPILER_WARNINGS
}
)
target_include_directories
(
utility PUBLIC
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/utility>"
)
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/utility>"
)
if
(
WIN32
)
target_compile_definitions
(
utility PUBLIC NOMINMAX
)
endif
()
rocm_install
(
TARGETS utility
...
...
profiler/src/CMakeLists.txt
View file @
ce9d7c8d
...
...
@@ -51,7 +51,7 @@ set(PROFILER_EXECUTABLE ckProfiler)
add_executable
(
${
PROFILER_EXECUTABLE
}
${
PROFILER_SOURCES
}
)
target_compile_options
(
${
PROFILER_EXECUTABLE
}
PRIVATE -Wno-global-constructors
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility
getopt::getopt
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_splitk_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_add_multiply_instance
)
...
...
test/CMakeLists.txt
View file @
ce9d7c8d
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/
${
PROJECT_SOURCE_DIR
}
/profiler/include
)
include
(
googlet
est
)
include
(
GT
est
)
add_custom_target
(
tests
)
...
...
@@ -54,6 +57,7 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE getopt::getopt
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
...
...
@@ -62,9 +66,7 @@ function(add_test_executable TEST_NAME)
endif
()
#message("add_test returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_test_executable TEST_NAME
)
include
(
GoogleTest
)
endfunction
()
function
(
add_gtest_executable TEST_NAME
)
message
(
"adding gtest
${
TEST_NAME
}
"
)
...
...
@@ -117,14 +119,14 @@ function(add_gtest_executable TEST_NAME)
# suppress gtest warnings
target_compile_options
(
${
TEST_NAME
}
PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
getopt::getopt
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
set
(
result 0
)
endif
()
#message("add_gtest returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_gtest_executable TEST_NAME
)
endfunction
()
add_subdirectory
(
magic_number_division
)
add_subdirectory
(
space_filling_curve
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment