Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
0144b4f4
Commit
0144b4f4
authored
May 30, 2024
by
letaoqin
Browse files
Merge branch 'develop' into jizhan/reduce_threadwise_multi_d
parents
300337cd
34f3dfdd
Changes
25
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
201 additions
and
71 deletions
+201
-71
include/ck_tile/host/timer.hpp
include/ck_tile/host/timer.hpp
+79
-0
include/ck_tile/ops/fmha/block/block_position_encoding.hpp
include/ck_tile/ops/fmha/block/block_position_encoding.hpp
+3
-3
include/ck_tile/ops/fmha/kernel/fmha_fwd_kernel.hpp
include/ck_tile/ops/fmha/kernel/fmha_fwd_kernel.hpp
+2
-2
include/ck_tile/ops/fmha/kernel/fmha_fwd_tile_partitioner.hpp
...ude/ck_tile/ops/fmha/kernel/fmha_fwd_tile_partitioner.hpp
+55
-4
test/position_embedding/position_embedding.cpp
test/position_embedding/position_embedding.cpp
+62
-62
No files found.
include/ck_tile/host/timer.hpp
0 → 100644
View file @
0144b4f4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck_tile/core/config.hpp"
#include "ck_tile/host/hip_check_error.hpp"
#include <hip/hip_runtime.h>
#include <cstddef>
#include <chrono>
namespace
ck_tile
{
struct
gpu_timer
{
CK_TILE_HOST
gpu_timer
()
{
HIP_CHECK_ERROR
(
hipEventCreate
(
&
start_evt
));
HIP_CHECK_ERROR
(
hipEventCreate
(
&
stop_evt
));
}
CK_TILE_HOST
~
gpu_timer
()
noexcept
(
false
)
{
HIP_CHECK_ERROR
(
hipEventDestroy
(
start_evt
));
HIP_CHECK_ERROR
(
hipEventDestroy
(
stop_evt
));
}
CK_TILE_HOST
void
start
(
const
hipStream_t
&
s
)
{
HIP_CHECK_ERROR
(
hipDeviceSynchronize
());
HIP_CHECK_ERROR
(
hipEventRecord
(
start_evt
,
s
));
}
CK_TILE_HOST
void
stop
(
const
hipStream_t
&
s
)
{
HIP_CHECK_ERROR
(
hipEventRecord
(
stop_evt
,
s
));
HIP_CHECK_ERROR
(
hipEventSynchronize
(
stop_evt
));
}
// return in ms
CK_TILE_HOST
float
duration
()
const
{
float
ms
=
0
;
HIP_CHECK_ERROR
(
hipEventElapsedTime
(
&
ms
,
start_evt
,
stop_evt
));
return
ms
;
}
private:
hipEvent_t
start_evt
,
stop_evt
;
};
struct
cpu_timer
{
// torch.utils.benchmark.Timer(), there is a sync inside each timer callback
CK_TILE_HOST
void
start
(
const
hipStream_t
&
)
{
HIP_CHECK_ERROR
(
hipDeviceSynchronize
());
start_tick
=
std
::
chrono
::
high_resolution_clock
::
now
();
}
// torch.utils.benchmark.Timer(), there is a sync inside each timer callback
CK_TILE_HOST
void
stop
(
const
hipStream_t
&
)
{
HIP_CHECK_ERROR
(
hipDeviceSynchronize
());
stop_tick
=
std
::
chrono
::
high_resolution_clock
::
now
();
}
// return in ms
CK_TILE_HOST
float
duration
()
const
{
double
sec
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
duration
<
double
>>
(
stop_tick
-
start_tick
)
.
count
();
return
static_cast
<
float
>
(
sec
*
1e3
);
}
private:
std
::
chrono
::
time_point
<
std
::
chrono
::
high_resolution_clock
>
start_tick
;
std
::
chrono
::
time_point
<
std
::
chrono
::
high_resolution_clock
>
stop_tick
;
};
}
// namespace ck_tile
include/ck_tile/ops/fmha/block/block_position_encoding.hpp
View file @
0144b4f4
...
@@ -23,13 +23,13 @@ VERTICAL:
...
@@ -23,13 +23,13 @@ VERTICAL:
[0] 1 2 3 4 5
[0] 1 2 3 4 5
[0] 1 2 3 4 5
[0] 1 2 3 4 5
TOP_LEFT:
TOP_LEFT
(but negative)
:
[0] 1 2 3 4 5
[0] 1 2 3 4 5
1 [0] 1 2 3 4
1 [0] 1 2 3 4
2 1 [0] 1 2 3
2 1 [0] 1 2 3
3 2 1 [0] 1 2
3 2 1 [0] 1 2
FROM_BOTTOM_RIGHT:
FROM_BOTTOM_RIGHT
(but negative)
:
2 1 [0] 1 2 3
2 1 [0] 1 2 3
3 2 1 [0] 1 2
3 2 1 [0] 1 2
4 3 2 1 [0] 1
4 3 2 1 [0] 1
...
@@ -54,7 +54,7 @@ struct Alibi
...
@@ -54,7 +54,7 @@ struct Alibi
index_t
x_total_
,
index_t
x_total_
,
AlibiMode
mode_
=
AlibiMode
::
VERTICAL
)
AlibiMode
mode_
=
AlibiMode
::
VERTICAL
)
{
{
slope
=
mode_
==
AlibiMode
::
VERTICAL
?
slope_
:
-
slope
;
slope
=
mode_
==
AlibiMode
::
VERTICAL
?
slope_
:
-
slope
_
;
shift_left_up
=
[
&
]()
{
shift_left_up
=
[
&
]()
{
if
(
RowMajor
)
if
(
RowMajor
)
...
...
include/ck_tile/ops/fmha/kernel/fmha_fwd_kernel.hpp
View file @
0144b4f4
...
@@ -76,7 +76,7 @@ struct FmhaFwdKernel
...
@@ -76,7 +76,7 @@ struct FmhaFwdKernel
return
n
.
empty
()
?
n
:
std
::
string
(
"p"
)
+
n
;
}();
return
n
.
empty
()
?
n
:
std
::
string
(
"p"
)
+
n
;
}();
return
return
_SS_
(
"fmha_fwd_d"
)
+
_TS_
(
bfs
::
kK0BlockLength
)
+
"_"
+
_SS_
(
t2s
<
QDataType
>::
name
)
+
_SS_
(
"fmha_fwd_d"
)
+
_TS_
(
bfs
::
kK0BlockLength
)
+
"_"
+
_SS_
(
t2s
<
QDataType
>::
name
)
+
"_"
+
(
kIsGroupMode
?
"group"
:
"batch"
)
+
"_"
+
"_"
+
(
kIsGroupMode
?
"group"
:
"batch"
)
+
"_"
+
_SS_
(
TilePartitioner
::
name
)
+
"_"
"b"
+
_TS_
(
bfs
::
kM0
)
+
"x"
+
_TS_
(
bfs
::
kN0
)
+
"x"
+
_TS_
(
bfs
::
kK0
)
+
"x"
+
"b"
+
_TS_
(
bfs
::
kM0
)
+
"x"
+
_TS_
(
bfs
::
kN0
)
+
"x"
+
_TS_
(
bfs
::
kK0
)
+
"x"
+
_TS_
(
bfs
::
kN1
)
+
"x"
+
_TS_
(
bfs
::
kK1
)
+
"x"
+
_TS_
(
bfs
::
kK0BlockLength
)
+
"_"
+
_TS_
(
bfs
::
kN1
)
+
"x"
+
_TS_
(
bfs
::
kK1
)
+
"x"
+
_TS_
(
bfs
::
kK0BlockLength
)
+
"_"
+
"r"
+
_TS_
(
gbr
::
at
(
ck_tile
::
number
<
0
>
{}))
+
"x"
+
_TS_
(
gbr
::
at
(
ck_tile
::
number
<
1
>
{}))
+
"x"
+
_TS_
(
gbr
::
at
(
ck_tile
::
number
<
2
>
{}))
+
"_"
+
"r"
+
_TS_
(
gbr
::
at
(
ck_tile
::
number
<
0
>
{}))
+
"x"
+
_TS_
(
gbr
::
at
(
ck_tile
::
number
<
1
>
{}))
+
"x"
+
_TS_
(
gbr
::
at
(
ck_tile
::
number
<
2
>
{}))
+
"_"
+
...
@@ -702,7 +702,7 @@ struct FmhaFwdKernel
...
@@ -702,7 +702,7 @@ struct FmhaFwdKernel
else
else
{
{
return
Alibi
<
SaccDataType
,
true
>
{
return
Alibi
<
SaccDataType
,
true
>
{
slope
,
kargs
.
seqlen_q
,
kargs
.
seqlen_k
,
AlibiMode
::
VERTICAL
};
slope
,
kargs
.
seqlen_q
,
kargs
.
seqlen_k
,
AlibiMode
::
FROM_BOTTOM_RIGHT
};
}
}
}
}
else
else
...
...
include/ck_tile/ops/fmha/kernel/fmha_fwd_tile_partitioner.hpp
View file @
0144b4f4
This diff is collapsed.
Click to expand it.
test/position_embedding/position_embedding.cpp
View file @
0144b4f4
This diff is collapsed.
Click to expand it.
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment