Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
fd1cf141
Commit
fd1cf141
authored
Jul 21, 2023
by
Jing Zhang
Browse files
add fp32 out client example
parent
abef7c4e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
9 deletions
+5
-9
client_example/20_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16_fp32_out.cpp
...ed_gemm_bias/grouped_gemm_fixed_nk_bias_fp16_fp32_out.cpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp
...brary/tensor_operation_instance/gpu/grouped_gemm_bias.hpp
+4
-8
No files found.
client_example/20_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16_fp32_out
put
.cpp
→
client_example/20_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16_fp32_out.cpp
View file @
fd1cf141
...
...
@@ -26,7 +26,7 @@ using ADataType = F16;
using
BDataType
=
F16
;
using
D0DataType
=
F32
;
using
DsDataType
=
ck
::
Tuple
<
D0DataType
>
;
using
EDataType
=
F
16
;
using
EDataType
=
F
32
;
using
ALayout
=
Row
;
using
BLayout
=
Col
;
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp
View file @
fd1cf141
...
...
@@ -16,7 +16,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
//fp16_output
//
fp16_output
void
add_device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedGemmFixedNK
<
Row
,
Row
,
...
...
@@ -43,7 +43,7 @@ void add_device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instances(
PassThrough
,
AddBias
>>>&
instances
);
//fp32_output
//
fp32_output
void
add_device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedGemmFixedNK
<
Row
,
Row
,
...
...
@@ -70,10 +70,6 @@ void add_device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instances(
PassThrough
,
AddBias
>>>&
instances
);
template
<
typename
ALayout
,
typename
BLayout
,
typename
ELayout
,
...
...
@@ -109,7 +105,7 @@ struct DeviceOperationInstanceFactory<
{
std
::
vector
<
std
::
unique_ptr
<
DeviceOp
>>
op_ptrs
;
//fp16_output
//
fp16_output
if
constexpr
(
is_same_v
<
ADataType
,
half_t
>
&&
is_same_v
<
BDataType
,
half_t
>
&&
is_same_v
<
EDataType
,
half_t
>
)
{
...
...
@@ -125,7 +121,7 @@ struct DeviceOperationInstanceFactory<
}
}
//fp32_output
//
fp32_output
if
constexpr
(
is_same_v
<
ADataType
,
half_t
>
&&
is_same_v
<
BDataType
,
half_t
>
&&
is_same_v
<
EDataType
,
float
>
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment