Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e29e5bf8
Commit
e29e5bf8
authored
Apr 05, 2023
by
Adam Osewski
Browse files
Enable set kbatch in profiler.
parent
3af2a90c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
18 deletions
+51
-18
profiler/include/profiler/profile_grouped_gemm_impl.hpp
profiler/include/profiler/profile_grouped_gemm_impl.hpp
+25
-1
profiler/src/profile_grouped_gemm.cpp
profiler/src/profile_grouped_gemm.cpp
+26
-17
No files found.
profiler/include/profiler/profile_grouped_gemm_impl.hpp
View file @
e29e5bf8
...
...
@@ -8,6 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_gemm.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp"
...
...
@@ -39,7 +40,8 @@ bool profile_grouped_gemm_impl(int do_verification,
const
std
::
vector
<
int
>&
Ks
,
const
std
::
vector
<
int
>&
StrideAs
,
const
std
::
vector
<
int
>&
StrideBs
,
const
std
::
vector
<
int
>&
StrideCs
)
const
std
::
vector
<
int
>&
StrideCs
,
int
kbatch
=
1
)
{
bool
pass
=
true
;
...
...
@@ -197,6 +199,28 @@ bool profile_grouped_gemm_impl(int do_verification,
{
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
if
(
kbatch
>
1
)
{
using
DeviceOpSplitK
=
ck
::
tensor_operation
::
device
::
DeviceGroupedGemmSplitK
<
ALayout
,
BLayout
,
ck
::
Tuple
<>
,
CLayout
,
ADataType
,
BDataType
,
ck
::
Tuple
<>
,
CDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
if
(
dynamic_cast
<
DeviceOpSplitK
*>
(
gemm_ptr
.
get
())
!=
nullptr
)
{
dynamic_cast
<
DeviceOpSplitK
*>
(
gemm_ptr
.
get
())
->
SetKBatchSize
(
argument_ptr
.
get
(),
kbatch
);
}
}
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
...
...
profiler/src/profile_grouped_gemm.cpp
View file @
e29e5bf8
...
...
@@ -52,20 +52,24 @@ std::vector<int> argToIntArray(char* input)
int
profile_grouped_gemm
(
int
argc
,
char
*
argv
[])
{
if
(
!
(
argc
==
14
)
)
if
(
argc
<
14
)
{
printf
(
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
printf
(
" 2: A[k, m] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg6: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7: time kernel (0=n0, 1=yes)
\n
"
);
printf
(
"arg8 to 13: Ms, Ns, Ks, StrideAs, StrideBs, StrideCs (e.g., 256,256 128,128 64,64 "
"64,64 64,64 128,128)
\n
"
);
std
::
cout
<<
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
<<
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
<<
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
<<
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
<<
" 2: A[k, m] * B[k, n] = C[m, n];
\n
"
<<
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
<<
"arg4: verification (0: no; 1: yes)
\n
"
<<
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg7: time kernel (0=n0, 1=yes)
\n
"
<<
"arg8 to 13: Ms, Ns, Ks, StrideAs, StrideBs, StrideCs (e.g., 256,256 128,128 64,64 "
"64,64 64,64 128,128)
\n
"
<<
"arg15: kbatch value (default 4)
\n
"
<<
std
::
endl
;
exit
(
1
);
}
...
...
@@ -83,6 +87,7 @@ int profile_grouped_gemm(int argc, char* argv[])
const
auto
StrideAs
=
argToIntArray
(
argv
[
11
]);
const
auto
StrideBs
=
argToIntArray
(
argv
[
12
]);
const
auto
StrideCs
=
argToIntArray
(
argv
[
13
]);
const
int
kbatch
=
argc
==
15
?
std
::
stoi
(
argv
[
14
])
:
1
;
if
(
data_type
==
GemmDataType
::
F16_F16_F16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
...
...
@@ -101,7 +106,8 @@ int profile_grouped_gemm(int argc, char* argv[])
Ks
,
StrideAs
,
StrideBs
,
StrideCs
);
StrideCs
,
kbatch
);
}
else
if
(
data_type
==
GemmDataType
::
F16_F16_F16
&&
layout
==
GemmMatrixLayout
::
MK_NK_MN
)
{
...
...
@@ -120,7 +126,8 @@ int profile_grouped_gemm(int argc, char* argv[])
Ks
,
StrideAs
,
StrideBs
,
StrideCs
);
StrideCs
,
kbatch
);
}
else
if
(
data_type
==
GemmDataType
::
F16_F16_F16
&&
layout
==
GemmMatrixLayout
::
KM_KN_MN
)
{
...
...
@@ -139,7 +146,8 @@ int profile_grouped_gemm(int argc, char* argv[])
Ks
,
StrideAs
,
StrideBs
,
StrideCs
);
StrideCs
,
kbatch
);
}
else
if
(
data_type
==
GemmDataType
::
F16_F16_F16
&&
layout
==
GemmMatrixLayout
::
KM_NK_MN
)
{
...
...
@@ -158,7 +166,8 @@ int profile_grouped_gemm(int argc, char* argv[])
Ks
,
StrideAs
,
StrideBs
,
StrideCs
);
StrideCs
,
kbatch
);
}
else
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment