Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
1670bba9
Commit
1670bba9
authored
Oct 21, 2024
by
chenjun
Browse files
clang-format-12
parent
09852d3b
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
27 additions
and
24 deletions
+27
-24
include/ck/host_utility/flush_cache.hpp
include/ck/host_utility/flush_cache.hpp
+11
-11
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
...k/tensor_operation/gpu/element/element_wise_operation.hpp
+4
-2
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+5
-5
library/src/tensor_operation_instance/gpu/gemm_multiply_multiply/device_gemm_multiply_multiply_xdl_i8_i8_bf16/device_gemm_multiply_multiply_xdl_i8_i8_bf16_mk_nk_mn.hpp
...device_gemm_multiply_multiply_xdl_i8_i8_bf16_mk_nk_mn.hpp
+1
-1
profiler/include/profiler/profile_gemm_multiply_multiply_impl.hpp
.../include/profiler/profile_gemm_multiply_multiply_impl.hpp
+3
-2
profiler/src/profile_gemm_multiply_multiply.cpp
profiler/src/profile_gemm_multiply_multiply.cpp
+3
-3
No files found.
include/ck/host_utility/flush_cache.hpp
View file @
1670bba9
...
@@ -282,7 +282,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -282,7 +282,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
{
{
if
constexpr
(
!
TimePreprocess
)
if
constexpr
(
!
TimePreprocess
)
...
@@ -307,15 +307,15 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -307,15 +307,15 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
hip_check_error
(
hipGetLastError
());
hip_check_error
(
hipGetLastError
());
// end real kernel
// end real kernel
// hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
// hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
// hip_check_error(hipEventSynchronize(stop));
// hip_check_error(hipEventSynchronize(stop));
// float cur_time = 0;
// float cur_time = 0;
// hip_check_error(hipEventElapsedTime(&cur_time, start, stop));
// hip_check_error(hipEventElapsedTime(&cur_time, start, stop));
// #if MEDIAN
// #if MEDIAN
// times.insert(cur_time);
// times.insert(cur_time);
// #else
// #else
// total_time += cur_time;
// total_time += cur_time;
// #endif
// #endif
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
{
...
@@ -351,7 +351,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -351,7 +351,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
}
}
#else
#else
// return total_time / nrepeat;
// return total_time / nrepeat;
return
(
total_time
-
0.01
*
nrepeat
)
/
nrepeat
;
return
(
total_time
-
0.01
*
nrepeat
)
/
nrepeat
;
#endif
#endif
}
}
else
else
...
...
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
View file @
1670bba9
...
@@ -277,7 +277,8 @@ struct MultiplyMultiply
...
@@ -277,7 +277,8 @@ struct MultiplyMultiply
__host__
__device__
constexpr
void
operator
()
<
ck
::
half_t
,
int
,
ck
::
half_t
,
ck
::
half_t
>
(
__host__
__device__
constexpr
void
operator
()
<
ck
::
half_t
,
int
,
ck
::
half_t
,
ck
::
half_t
>
(
ck
::
half_t
&
e
,
const
int
&
c
,
const
ck
::
half_t
&
d0
,
const
ck
::
half_t
&
d1
)
const
ck
::
half_t
&
e
,
const
int
&
c
,
const
ck
::
half_t
&
d0
,
const
ck
::
half_t
&
d1
)
const
{
{
const
float
x0_f
=
ck
::
type_convert
<
float
>
(
c
)
*
ck
::
type_convert
<
float
>
(
d0
)
*
ck
::
type_convert
<
float
>
(
d1
);
const
float
x0_f
=
ck
::
type_convert
<
float
>
(
c
)
*
ck
::
type_convert
<
float
>
(
d0
)
*
ck
::
type_convert
<
float
>
(
d1
);
e
=
ck
::
type_convert
<
ck
::
half_t
>
(
x0_f
);
e
=
ck
::
type_convert
<
ck
::
half_t
>
(
x0_f
);
}
}
...
@@ -286,7 +287,8 @@ struct MultiplyMultiply
...
@@ -286,7 +287,8 @@ struct MultiplyMultiply
__host__
__device__
constexpr
void
operator
()
<
ck
::
bhalf_t
,
int
,
float
,
float
>
(
__host__
__device__
constexpr
void
operator
()
<
ck
::
bhalf_t
,
int
,
float
,
float
>
(
ck
::
bhalf_t
&
e
,
const
int
&
c
,
const
float
&
d0
,
const
float
&
d1
)
const
ck
::
bhalf_t
&
e
,
const
int
&
c
,
const
float
&
d0
,
const
float
&
d1
)
const
{
{
const
float
x0_f
=
ck
::
type_convert
<
float
>
(
c
)
*
ck
::
type_convert
<
float
>
(
d0
)
*
ck
::
type_convert
<
float
>
(
d1
);
const
float
x0_f
=
ck
::
type_convert
<
float
>
(
c
)
*
ck
::
type_convert
<
float
>
(
d0
)
*
ck
::
type_convert
<
float
>
(
d1
);
e
=
ck
::
type_convert
<
ck
::
bhalf_t
>
(
x0_f
);
e
=
ck
::
type_convert
<
ck
::
bhalf_t
>
(
x0_f
);
}
}
...
...
include/ck/utility/amd_xdlops.hpp
View file @
1670bba9
...
@@ -328,11 +328,11 @@ struct intrin_mfma_i32_16x16x32i8<16, 16>
...
@@ -328,11 +328,11 @@ struct intrin_mfma_i32_16x16x32i8<16, 16>
{
{
reg_c
.
template
AsType
<
int32x4_t
>()(
Number
<
0
>
{})
=
reg_c
.
template
AsType
<
int32x4_t
>()(
Number
<
0
>
{})
=
__builtin_amdgcn_mfma_i32_16x16x32_i8
(
bit_cast
<
int64_t
>
(
reg_a
),
__builtin_amdgcn_mfma_i32_16x16x32_i8
(
bit_cast
<
int64_t
>
(
reg_a
),
bit_cast
<
int64_t
>
(
reg_b
),
bit_cast
<
int64_t
>
(
reg_b
),
reg_c
.
template
AsType
<
int32x4_t
>()[
Number
<
0
>
{}],
reg_c
.
template
AsType
<
int32x4_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
,
0
,
0
);
0
);
}
}
};
};
...
...
library/src/tensor_operation_instance/gpu/gemm_multiply_multiply/device_gemm_multiply_multiply_xdl_i8_i8_bf16/device_gemm_multiply_multiply_xdl_i8_i8_bf16_mk_nk_mn.hpp
View file @
1670bba9
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
using
I8
=
int8_t
;
using
I8
=
int8_t
;
using
I32
=
int
;
using
I32
=
int
;
using
BF16
=
bhalf_t
;
using
BF16
=
bhalf_t
;
using
F32
=
float
;
using
F32
=
float
;
...
...
profiler/include/profiler/profile_gemm_multiply_multiply_impl.hpp
View file @
1670bba9
...
@@ -274,8 +274,9 @@ bool profile_gemm_multiply_multiply_impl(int do_verification,
...
@@ -274,8 +274,9 @@ bool profile_gemm_multiply_multiply_impl(int do_verification,
#if defined CK_ENABLE_FP8 || defined CK_ENABLE_INT8
#if defined CK_ENABLE_FP8 || defined CK_ENABLE_INT8
// set softer tolerances for fp8
// set softer tolerances for fp8
if
constexpr
((
is_same_v
<
ADataType
,
f8_t
>
||
is_same_v
<
BDataType
,
f8_t
>
||
if
constexpr
((
is_same_v
<
ADataType
,
f8_t
>
||
is_same_v
<
BDataType
,
f8_t
>
||
is_same_v
<
EDataType
,
f8_t
>
)
||
(
is_same_v
<
ADataType
,
int8_t
>
||
is_same_v
<
EDataType
,
f8_t
>
)
||
is_same_v
<
BDataType
,
int8_t
>
||
is_same_v
<
EDataType
,
int8_t
>
))
(
is_same_v
<
ADataType
,
int8_t
>
||
is_same_v
<
BDataType
,
int8_t
>
||
is_same_v
<
EDataType
,
int8_t
>
))
{
{
std
::
string
msg
=
"Error: Incorrect results!"
;
std
::
string
msg
=
"Error: Incorrect results!"
;
double
rtol
=
1e-1
;
double
rtol
=
1e-1
;
...
...
profiler/src/profile_gemm_multiply_multiply.cpp
View file @
1670bba9
...
@@ -27,7 +27,7 @@ enum struct GemmDataType
...
@@ -27,7 +27,7 @@ enum struct GemmDataType
F16_F8_F16
,
// 5
F16_F8_F16
,
// 5
F16_F16_F16_F8
,
// 6
F16_F16_F16_F8
,
// 6
F8_F8_BF16
,
// 7
F8_F8_BF16
,
// 7
INT8_INT8_BF16
,
// 8
INT8_INT8_BF16
,
// 8
};
};
#define OP_NAME "gemm_multiply_multiply"
#define OP_NAME "gemm_multiply_multiply"
...
@@ -90,8 +90,8 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
...
@@ -90,8 +90,8 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
using
F32
=
float
;
using
F32
=
float
;
using
BF16
=
ck
::
bhalf_t
;
using
BF16
=
ck
::
bhalf_t
;
using
F8
=
ck
::
f8_t
;
using
F8
=
ck
::
f8_t
;
using
I8
=
int8_t
;
using
I8
=
int8_t
;
using
I32
=
int
;
using
I32
=
int
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment