Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
320bc57b
"torchvision/models/vscode:/vscode.git/clone" did not exist on "7998cdfa56b87e69387db6152e0d02749f852499"
Commit
320bc57b
authored
Jan 17, 2022
by
Chao Liu
Browse files
update
parent
8fecf4c3
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
7 deletions
+10
-7
example/3_gemm_xdl_bias_relu_add/gemm_xdl_bias_relu_add.cpp
example/3_gemm_xdl_bias_relu_add/gemm_xdl_bias_relu_add.cpp
+10
-7
No files found.
example/3_gemm_xdl_bias_relu_add/gemm_xdl_bias_relu_add.cpp
View file @
320bc57b
...
...
@@ -52,12 +52,12 @@ struct BiasReluAdd
}
};
struct
BiasAdd
struct
DoSomething
{
#if 1
// correct result
// no scratch memory, good VGPR allocation (59)
// good perf (101Tflops)
// good perf (101Tflops
@ 1089Mhz
)
__host__
__device__
constexpr
float
operator
()(
float
v0
,
ck
::
half_t
v1
,
ck
::
half_t
v2
)
const
{
constexpr
float
alpha
=
0.1
;
...
...
@@ -80,21 +80,20 @@ struct BiasAdd
// wrong result
// lots of scratch memory
// huge perf drop
template
<
typename
T1
,
typename
T2
>
__host__
__device__
constexpr
float
operator
()(
float
v0
,
T1
v1
,
T2
v2
)
const
__host__
__device__
constexpr
float
operator
()(
float
v0
,
ck
::
half_t
v1
,
ck
::
half_t
v2
)
const
{
return
alpha
*
v0
+
beta
*
v1
+
gamma
*
v2
;
}
#elif 0
// correct result
// some scratch memory (68 dword)
// some perf drop (94Tflops)
// some perf drop (94Tflops
@ 1089MHz
)
// fp64 instructions are used
__host__
__device__
constexpr
auto
operator
()(
float
v0
,
ck
::
half_t
v1
,
ck
::
half_t
v2
)
const
{
return
0.1
*
v0
+
0.2
*
v1
+
0.3
*
v2
;
}
#elif
0
#elif
1
// wrong result
// lots of scratch memory
// huge perf drop
...
...
@@ -128,7 +127,11 @@ using CLayout = ck::tensor_layout::gemm::RowMajor;
using
AOp
=
PassThrough
;
using
BOp
=
PassThrough
;
using
COp
=
BiasReluAdd
;
#if 1
using
COp
=
BiasReLuAdd
#else
using
COp
=
DoSomething
;
#endif
// Compilation parameters for NT problem
// clang-format off
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment