Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4cf9a393
Commit
4cf9a393
authored
Aug 28, 2023
by
Jing Zhang
Browse files
fixed threadwise_copy
parent
405a15ec
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
22 deletions
+11
-22
example/01_gemm/gemm_xdl_bf16_rtn.cpp
example/01_gemm/gemm_xdl_bf16_rtn.cpp
+1
-18
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+6
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp
+2
-2
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
...library/reference_tensor_operation/cpu/reference_gemm.hpp
+2
-2
No files found.
example/01_gemm/gemm_xdl_bf16_rtn.cpp
View file @
4cf9a393
...
...
@@ -16,26 +16,9 @@ using ALayout = Row;
using
BLayout
=
Col
;
using
CLayout
=
Row
;
struct
ConvertBF16RTN_
{
// convert to bf16 using round to nearest (rtn)
template
<
typename
Y
,
typename
X
>
__host__
__device__
void
operator
()(
Y
&
y
,
const
X
&
x
)
const
{
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
ck
::
bhalf_t
,
float
>
(
ck
::
bhalf_t
&
y
,
const
float
&
x
)
const
{
y
=
ck
::
bf16_convert_rtn
<
ck
::
bhalf_t
,
float
>
(
x
);
}
};
using
AElementOp
=
PassThrough
;
using
BElementOp
=
PassThrough
;
using
CElementOp
=
ConvertBF16RTN_
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
ConvertBF16RTN
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
...
...
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
4cf9a393
...
...
@@ -39,6 +39,12 @@ struct PassThrough
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
half_t
,
float
>
(
half_t
&
y
,
const
float
&
x
)
const
{
y
=
type_convert
<
half_t
>
(
x
);
}
template
<
>
__host__
__device__
void
operator
()
<
bhalf_t
,
bhalf_t
>
(
bhalf_t
&
y
,
const
bhalf_t
&
x
)
const
{
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp
View file @
4cf9a393
...
...
@@ -104,13 +104,13 @@ struct ThreadwiseTensorSliceTransfer_v6r1
// apply pointwise operation
static_for
<
0
,
ScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
Src
Data
v
;
Dst
Data
v
;
// apply element-wise operation
element_op_
(
v
,
src_vector_container
.
template
AsType
<
SrcData
>()[
i
]);
// apply type convert
dst_vector_container
.
template
AsType
<
DstData
>()(
i
)
=
type_convert
<
DstData
>
(
v
)
;
dst_vector_container
.
template
AsType
<
DstData
>()(
i
)
=
v
;
});
const
bool
is_dst_valid
=
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
View file @
4cf9a393
...
...
@@ -92,11 +92,11 @@ struct ReferenceGemm : public device::BaseOperator
ck
::
type_convert
<
AccDataType
>
(
v_a
)
*
ck
::
type_convert
<
AccDataType
>
(
v_b
);
}
Acc
DataType
v_c
;
C
DataType
v_c
;
arg
.
c_element_op_
(
v_c
,
v_acc
);
arg
.
c_m_n_
(
m
,
n
)
=
ck
::
type_convert
<
CDataType
>
(
v_c
)
;
arg
.
c_m_n_
(
m
,
n
)
=
v_c
;
};
make_ParallelTensorFunctor
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment