Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
77fa9fda
Unverified
Commit
77fa9fda
authored
Nov 27, 2024
by
arai713
Committed by
GitHub
Nov 27, 2024
Browse files
Merge branch 'develop' into codegen_hiprtc
parents
760ea189
e7b62864
Changes
72
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
31 additions
and
16 deletions
+31
-16
client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp
...int8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp
+1
-1
client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp
...ped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp
+1
-1
codegen/CMakeLists.txt
codegen/CMakeLists.txt
+1
-0
example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
..._grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
+2
-2
example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
+1
-1
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp
...e/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp
+2
-2
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp
+2
-2
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp
...le/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp
+2
-2
example/15_grouped_gemm/run_grouped_gemm_example.inc
example/15_grouped_gemm/run_grouped_gemm_example.inc
+16
-2
example/ck_tile/03_gemm/gemm_mem_pipeline.cpp
example/ck_tile/03_gemm/gemm_mem_pipeline.cpp
+1
-2
example/ck_tile/03_gemm/run_gemm_example.inc
example/ck_tile/03_gemm/run_gemm_example.inc
+2
-1
include/ck/library/utility/algorithm.hpp
include/ck/library/utility/algorithm.hpp
+0
-0
include/ck/library/utility/check_err.hpp
include/ck/library/utility/check_err.hpp
+0
-0
include/ck/library/utility/conv_common.hpp
include/ck/library/utility/conv_common.hpp
+0
-0
include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp
...ary/utility/convolution_host_tensor_descriptor_helper.hpp
+0
-0
include/ck/library/utility/convolution_parameter.hpp
include/ck/library/utility/convolution_parameter.hpp
+0
-0
include/ck/library/utility/device_memory.hpp
include/ck/library/utility/device_memory.hpp
+0
-0
include/ck/library/utility/fill.hpp
include/ck/library/utility/fill.hpp
+0
-0
include/ck/library/utility/host_common_util.hpp
include/ck/library/utility/host_common_util.hpp
+0
-0
include/ck/library/utility/host_gemm.hpp
include/ck/library/utility/host_gemm.hpp
+0
-0
No files found.
client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_bias_fastgelu_xdl_bf16_i8.cpp
View file @
77fa9fda
...
...
@@ -121,7 +121,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
constexpr
ck
::
index_t
NumDTensor
=
2
;
using
GroupedGemmKernelArgument
=
ck
::
tensor_operation
::
device
::
GroupedGemm
TileLoop
KernelArgument
s
<
NumDTensor
>
;
ck
::
tensor_operation
::
device
::
GroupedGemmKernelArgument
<
NumDTensor
>
;
std
::
vector
<
GroupedGemmKernelArgument
>
grouped_gemm_kernel_args_
;
grouped_gemm_kernel_args_
.
reserve
(
group_count
);
...
...
client_example/31_grouped_gemm_bf16Aint8B/grouped_gemm_multiply_xdl_bf16_i8.cpp
View file @
77fa9fda
...
...
@@ -120,7 +120,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
constexpr
ck
::
index_t
NumDTensor
=
1
;
using
GroupedGemmKernelArgument
=
ck
::
tensor_operation
::
device
::
GroupedGemm
TileLoop
KernelArgument
s
<
NumDTensor
>
;
ck
::
tensor_operation
::
device
::
GroupedGemmKernelArgument
<
NumDTensor
>
;
std
::
vector
<
GroupedGemmKernelArgument
>
grouped_gemm_kernel_args_
;
grouped_gemm_kernel_args_
.
reserve
(
group_count
);
...
...
codegen/CMakeLists.txt
View file @
77fa9fda
...
...
@@ -7,6 +7,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_BINARY_DIR
}
/lib
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_BINARY_DIR
}
/bin
)
set
(
CK_ROOT
${
CMAKE_CURRENT_SOURCE_DIR
}
/..
)
configure_file
(
${
CK_ROOT
}
/include/ck/config.h.in
${
CK_ROOT
}
/include/ck/config.h
)
find_package
(
ROCM
)
include
(
ROCMInstallTargets
)
...
...
example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
View file @
77fa9fda
...
...
@@ -246,7 +246,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
// do GEMM
auto
argument
=
gemm
.
MakeArgument
(
p_As
,
p_Bs
,
p_Ds
,
p_Cs
,
gemm_descs
,
a_element_op
,
b_element_op
,
cde_element_op
);
gemm
.
SetKBatchSize
(
argument
,
config
.
k_batch
);
gemm
.
SetKBatchSize
(
&
argument
,
config
.
k_batch
);
if
(
!
gemm
.
IsSupportedArgument
(
argument
))
{
throw
std
::
runtime_error
(
...
...
@@ -257,7 +257,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
gemm
.
SetWorkSpacePointer
(
&
argument
,
gemm_workspace_dev
.
GetDeviceBuffer
());
DeviceMem
gemm_arg_dev_mem
(
gemm
.
GetDeviceKernelArgSize
(
&
argument
));
gemm
.
SetDeviceKernelArgs
(
argument
,
gemm_arg_dev_mem
.
GetDeviceBuffer
());
gemm
.
SetDeviceKernelArgs
(
&
argument
,
gemm_arg_dev_mem
.
GetDeviceBuffer
());
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
...
...
example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
View file @
77fa9fda
...
...
@@ -91,7 +91,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
{
auto
group_count
=
problem_size
.
group_count
;
using
KernelArguments
=
ck
::
tensor_operation
::
device
::
GroupedGemm
TileLoop
KernelArgument
s
<
NumDs
>
;
using
KernelArguments
=
ck
::
tensor_operation
::
device
::
GroupedGemmKernelArgument
<
NumDs
>
;
using
GemmDesc
=
ck
::
tensor_operation
::
device
::
GemmDesc
;
// GEMM shape
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp
View file @
77fa9fda
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
...
...
@@ -254,7 +254,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
gemm
.
GetDeviceKernelArgSize
(
&
argument
),
hipMemcpyHostToDevice
));
gemm
.
SetDeviceKernelArgs
(
argument
,
gemm_kernel_args_dev
.
GetDeviceBuffer
());
gemm
.
SetDeviceKernelArgs
(
&
argument
,
gemm_kernel_args_dev
.
GetDeviceBuffer
());
gemm
.
SetKBatch
(
argument
,
config
.
k_batch
);
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
});
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp
View file @
77fa9fda
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
...
...
@@ -239,7 +239,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
"not support this GEMM problem"
);
}
gemm
.
SetDeviceKernelArgs
(
argument
,
gemm_arg_dev_mem
.
GetDeviceBuffer
());
gemm
.
SetDeviceKernelArgs
(
&
argument
,
gemm_arg_dev_mem
.
GetDeviceBuffer
());
gemm
.
SetKBatch
(
argument
,
config
.
k_batch
);
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
});
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp
View file @
77fa9fda
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
...
...
@@ -240,7 +240,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
"not support this GEMM problem"
);
}
gemm
.
SetDeviceKernelArgs
(
argument
,
gemm_arg_dev_mem
.
GetDeviceBuffer
());
gemm
.
SetDeviceKernelArgs
(
&
argument
,
gemm_arg_dev_mem
.
GetDeviceBuffer
());
gemm
.
SetKBatch
(
argument
,
config
.
k_batch
);
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
});
...
...
example/15_grouped_gemm/run_grouped_gemm_example.inc
View file @
77fa9fda
...
...
@@ -168,9 +168,23 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
auto
argument
=
gemm
.
MakeArgument
(
p_a
,
p_b
,
p_Ds
,
p_c
,
gemm_descs
,
a_element_op
,
b_element_op
,
c_element_op
);
DeviceMem
gemm_desc_workspace
(
gemm
.
GetWorkSpaceSize
(
&
argument
));
std
::
size_t
workspace_size
=
gemm
.
GetWorkSpaceSize
(
&
argument
);
std
::
size_t
kargs_size
=
gemm
.
GetDeviceKernelArgSize
(
&
argument
);
gemm
.
SetWorkSpacePointer
(
&
argument
,
gemm_desc_workspace
.
GetDeviceBuffer
());
DeviceMem
gemm_workspace
,
gemm_kargs
;
// The following is necessary since TwoStage kernel is using additional memory both
// for Workspace and kernel arguments.
if
(
kargs_size
>
0
)
{
gemm_kargs
.
Realloc
(
kargs_size
);
gemm
.
SetDeviceKernelArgs
(
&
argument
,
gemm_kargs
.
GetDeviceBuffer
());
}
if
(
workspace_size
>
0
&&
workspace_size
!=
kargs_size
)
{
gemm_workspace
.
Realloc
(
workspace_size
);
gemm
.
SetWorkSpacePointer
(
&
argument
,
gemm_workspace
.
GetDeviceBuffer
());
}
if
(
!
gemm
.
IsSupportedArgument
(
argument
))
{
...
...
example/ck_tile/03_gemm/gemm_mem_pipeline.cpp
View file @
77fa9fda
...
...
@@ -30,7 +30,6 @@ float gemm_calc(const gemm_basic_args& args, const ck_tile::stream_config& s)
constexpr
ck_tile
::
index_t
M_Warp_Tile
=
32
;
constexpr
ck_tile
::
index_t
N_Warp_Tile
=
32
;
constexpr
ck_tile
::
index_t
K_Warp_Tile
=
8
;
#else
// Compute friendly for Intrawave scheduler
constexpr
ck_tile
::
index_t
M_Tile
=
256
;
...
...
@@ -84,7 +83,7 @@ float gemm_calc(const gemm_basic_args& args, const ck_tile::stream_config& s)
AccDataType
,
GemmShape
,
Traits
,
ck_tile
::
GemmPipelineScheduler
::
Intr
a
wave
,
ck_tile
::
GemmPipelineScheduler
::
Int
e
rwave
,
has_hot_loop_v
,
tail_number_v
>>
;
using
Kernel
=
ck_tile
::
GemmKernel
<
TilePartitioner
,
GemmPipeline
,
GemmEpilogue
>
;
...
...
example/ck_tile/03_gemm/run_gemm_example.inc
View file @
77fa9fda
...
...
@@ -200,7 +200,8 @@ int run_gemm_example(int argc, char* argv[])
return
run_gemm_example_with_layouts
(
argc
,
argv
,
Row
{},
Col
{},
Row
{});
}
// TODO: Fixme: with latest changes to GemmPipelineAGmemBGmemCRegV1DefaultPolicy below do not
// work. else if(a_layout == "C" && b_layout == "C")
// work.
// else if(a_layout == "C" && b_layout == "C")
// {
// return run_gemm_example_with_layouts(argc, argv, Col{}, Col{}, Row{});
// }
...
...
library/
include/ck/library/utility/algorithm.hpp
→
include/ck/library/utility/algorithm.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/check_err.hpp
→
include/ck/library/utility/check_err.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/conv_common.hpp
→
include/ck/library/utility/conv_common.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp
→
include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/convolution_parameter.hpp
→
include/ck/library/utility/convolution_parameter.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/device_memory.hpp
→
include/ck/library/utility/device_memory.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/fill.hpp
→
include/ck/library/utility/fill.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/host_common_util.hpp
→
include/ck/library/utility/host_common_util.hpp
View file @
77fa9fda
File moved
library/
include/ck/library/utility/host_gemm.hpp
→
include/ck/library/utility/host_gemm.hpp
View file @
77fa9fda
File moved
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment