Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5d3fb1d4
Commit
5d3fb1d4
authored
Jul 01, 2025
by
zhuwenwen
Browse files
解决PA崩溃
parent
9d5187eb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
21 deletions
+15
-21
csrc/attention/attention_kernels_opt_tc.cu
csrc/attention/attention_kernels_opt_tc.cu
+15
-21
No files found.
csrc/attention/attention_kernels_opt_tc.cu
View file @
5d3fb1d4
...
@@ -965,18 +965,14 @@ void get_numberthread_and_reuse_kv_v2(int& num_thread,int& reusekv,int& PARTITIO
...
@@ -965,18 +965,14 @@ void get_numberthread_and_reuse_kv_v2(int& num_thread,int& reusekv,int& PARTITIO
)
)
max_num_partitions
=
1
;
max_num_partitions
=
1
;
int
blocks
=
max_num_partitions
*
batchsize
*
qheads
;
int
blocks
=
max_num_partitions
*
batchsize
*
qheads
;
if
(
device_name
==
"gfx928"
||
batchsize
>
100
){
if
(
batchsize
>
100
&&
max_seq_len
>=
2000
){
if
(
batchsize
*
qheads
>
1024
&&
max_seq_len
>=
2000
){
if
(
max_seq_len
<
3900
)
reusekv
=
4
;
max_num_partitions
=
1
;
else
{
if
(
max_seq_len
<
2000
)
reusekv
=
8
;
PARTITION_SIZE
=
1024
;
else
if
(
max_seq_len
<
3900
)
reusekv
=
4
;
reusekv
=
8
;
else
{
max_num_partitions
=
DIVIDE_ROUND_UP
(
max_seq_len
,
PARTITION_SIZE
);
PARTITION_SIZE
=
2048
;
reusekv
=
8
;
max_num_partitions
=
DIVIDE_ROUND_UP
(
max_seq_len
,
PARTITION_SIZE
);
}
return
;
}
}
return
;
}
}
if
(
max_num_partitions
==
1
){
if
(
max_num_partitions
==
1
){
if
(
max_seq_len
<
512
){
if
(
max_seq_len
<
512
){
...
@@ -995,17 +991,14 @@ void get_numberthread_and_reuse_kv_v2(int& num_thread,int& reusekv,int& PARTITIO
...
@@ -995,17 +991,14 @@ void get_numberthread_and_reuse_kv_v2(int& num_thread,int& reusekv,int& PARTITIO
if
(
blocks
<
600
||
qheads
<=
kvheads
*
4
){
reusekv
=
4
;
return
;}
if
(
blocks
<
600
||
qheads
<=
kvheads
*
4
){
reusekv
=
4
;
return
;}
reusekv
=
8
;
return
;
reusekv
=
8
;
return
;
}
}
if
(
device_name
==
"gfx928"
){
if
(
batchsize
>
100
&&
max_seq_len
>=
2000
){
if
(
batchsize
*
qheads
>
1024
&&
max_seq_len
>=
2000
){
if
(
max_seq_len
<
3900
)
reusekv
=
4
;
max_num_partitions
=
1
;
else
{
if
(
max_seq_len
<
3900
)
reusekv
=
4
;
PARTITION_SIZE
=
2048
;
else
{
reusekv
=
4
;
PARTITION_SIZE
=
2048
;
max_num_partitions
=
DIVIDE_ROUND_UP
(
max_seq_len
,
PARTITION_SIZE
);
reusekv
=
4
;
max_num_partitions
=
DIVIDE_ROUND_UP
(
max_seq_len
,
PARTITION_SIZE
);
}
return
;
}
}
return
;
}
}
if
(
max_seq_len
<=
1000
||
if
(
max_seq_len
<=
1000
||
max_seq_len
<=
1500
&&
(
qheads
>
4
&&
batchsize
>=
16
||
batchsize
>=
64
))
max_seq_len
<=
1500
&&
(
qheads
>
4
&&
batchsize
>=
16
||
batchsize
>=
64
))
...
@@ -1068,6 +1061,7 @@ void paged_attention_v2_launcher_opt_tc(
...
@@ -1068,6 +1061,7 @@ void paged_attention_v2_launcher_opt_tc(
int
reusekv
,
num_thread
,
max_num_partitions
,
PARTITION_SIZE
=
512
;
int
reusekv
,
num_thread
,
max_num_partitions
,
PARTITION_SIZE
=
512
;
if
(
!
is_half
&&
max_seq_len
<=
8192
)
PARTITION_SIZE
=
256
;
if
(
!
is_half
&&
max_seq_len
<=
8192
)
PARTITION_SIZE
=
256
;
get_numberthread_and_reuse_kv_v2
(
num_thread
,
reusekv
,
PARTITION_SIZE
,
max_num_partitions
,
num_seqs
,
max_seq_len
,
num_heads
,
num_kv_heads
,
num_blocks
);
get_numberthread_and_reuse_kv_v2
(
num_thread
,
reusekv
,
PARTITION_SIZE
,
max_num_partitions
,
num_seqs
,
max_seq_len
,
num_heads
,
num_kv_heads
,
num_blocks
);
if
(
num_seqs
>
100
&&
max_num_partitions
>
16
)
max_num_partitions
=
16
;
if
(
PA_PARTITION_SIZE
!=
0
){
if
(
PA_PARTITION_SIZE
!=
0
){
PARTITION_SIZE
=
PA_PARTITION_SIZE
;
PARTITION_SIZE
=
PA_PARTITION_SIZE
;
max_num_partitions
=
DIVIDE_ROUND_UP
(
max_seq_len
,
PARTITION_SIZE
);
max_num_partitions
=
DIVIDE_ROUND_UP
(
max_seq_len
,
PARTITION_SIZE
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment