Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2f987dde
"docs/vscode:/vscode.git/clone" did not exist on "c16fb5dae88969f0e8b4ee3f9f7861fe91c8b2dc"
Commit
2f987dde
authored
Nov 19, 2024
by
zhuwenwen
Browse files
Merge remote-tracking branch 'origin/vllm-0.6.2-zhagnshao' into v0.6.2-dev
parents
99f557d2
c5877810
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
csrc/attention/attention_kernels_opt_tc.cu
csrc/attention/attention_kernels_opt_tc.cu
+4
-4
No files found.
csrc/attention/attention_kernels_opt_tc.cu
View file @
2f987dde
...
...
@@ -567,7 +567,7 @@ __global__ void paged_attention_v1_kernel_TC(
const
int
blocksparse_local_blocks
,
const
int
blocksparse_vert_stride
,
const
int
blocksparse_block_size
,
const
int
blocksparse_head_sliding_step
,
const
int
*
__restrict__
attn_masks
=
nullptr
,
const
int
attn_masks_stride
=
0
)
{
#ifdef
__gfx928__
#if
def
ined(__gfx936__) || defined(
__gfx928__
)
paged_attention_kernel_TC
<
scalar_t
,
cache_t
,
HEAD_SIZE
,
BLOCK_SIZE
,
NUM_THREADS
,
KV_DTYPE
,
IS_BLOCK_SPARSE
,
REUSE_KV_TIMES
,
use_vmac
>
(
/* exp_sums */
nullptr
,
/* max_logits */
nullptr
,
out
,
q
,
k_cache
,
...
...
@@ -607,7 +607,7 @@ __global__ __launch_bounds__(256, 1) void paged_attention_v2_kernel_TC(
const
int
blocksparse_local_blocks
,
const
int
blocksparse_vert_stride
,
const
int
blocksparse_block_size
,
const
int
blocksparse_head_sliding_step
,
const
int
*
__restrict__
attn_masks
=
nullptr
,
const
int
attn_masks_stride
=
0
)
{
#ifdef
__gfx928__
#if
def
ined(__gfx936__) || defined(
__gfx928__
)
paged_attention_kernel_TC
<
scalar_t
,
cache_t
,
HEAD_SIZE
,
BLOCK_SIZE
,
NUM_THREADS
,
KV_DTYPE
,
IS_BLOCK_SPARSE
,
REUSE_KV_TIMES
,
use_vmac
,
PARTITION_SIZE
>
(
...
...
@@ -952,7 +952,7 @@ void paged_attention_v1_opt_tc(
const
int64_t
attn_masks_stride
)
{
const
bool
is_block_sparse
=
(
blocksparse_vert_stride
>
1
);
if
(
kv_cache_dtype
!=
"auto"
||
query
.
dtype
()
==
at
::
ScalarType
::
Float
||
is_block_sparse
||
block_size
!=
16
||
query
.
size
(
2
)
!=
128
||
get_device_name
()
!=
"gfx928"
){
block_size
!=
16
||
query
.
size
(
2
)
!=
128
||
(
get_device_name
()
!=
"gfx928"
&&
get_device_name
()
!=
"gfx936"
)
){
paged_attention_v1_opt
(
out
,
query
,
key_cache
,
value_cache
,
num_kv_heads
,
scale
,
block_tables
,
seq_lens
,
block_size
,
max_seq_len
,
alibi_slopes
,
kv_cache_dtype
,
k_scale
,
v_scale
,
tp_rank
,
blocksparse_local_blocks
,
blocksparse_vert_stride
,
...
...
@@ -1182,7 +1182,7 @@ void paged_attention_v2_opt_tc(
const
int64_t
attn_masks_stride
)
{
const
bool
is_block_sparse
=
(
blocksparse_vert_stride
>
1
);
if
(
kv_cache_dtype
!=
"auto"
||
query
.
dtype
()
==
at
::
ScalarType
::
Float
||
is_block_sparse
||
block_size
!=
16
||
query
.
size
(
2
)
!=
128
||
get_device_name
()
!=
"gfx928"
){
block_size
!=
16
||
query
.
size
(
2
)
!=
128
||
(
get_device_name
()
!=
"gfx928"
&&
get_device_name
()
!=
"gfx936"
)
){
paged_attention_v2_opt
(
out
,
exp_sums
,
max_logits
,
tmp_out
,
query
,
key_cache
,
value_cache
,
num_kv_heads
,
scale
,
block_tables
,
seq_lens
,
block_size
,
max_seq_len
,
alibi_slopes
,
kv_cache_dtype
,
k_scale
,
v_scale
,
tp_rank
,
blocksparse_local_blocks
,
blocksparse_vert_stride
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment