Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d074a953
Commit
d074a953
authored
Jun 12, 2025
by
zhuwenwen
Browse files
解决大batch长seq崩溃问题
parent
04629132
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
7 deletions
+7
-7
csrc/attention/attention_kernels_opt_tc.cu
csrc/attention/attention_kernels_opt_tc.cu
+4
-4
csrc/attention/attention_with_mask_kernels_opt_tc.cu
csrc/attention/attention_with_mask_kernels_opt_tc.cu
+3
-3
No files found.
csrc/attention/attention_kernels_opt_tc.cu
View file @
d074a953
...
@@ -1054,9 +1054,9 @@ void paged_attention_v2_launcher_opt_tc(
...
@@ -1054,9 +1054,9 @@ void paged_attention_v2_launcher_opt_tc(
static
float
*
max_logits_ptr
=
nullptr
;
static
float
*
max_logits_ptr
=
nullptr
;
static
T
*
tmp_out_ptr
=
nullptr
;
static
T
*
tmp_out_ptr
=
nullptr
;
if
(
exp_sums_ptr
==
nullptr
){
if
(
exp_sums_ptr
==
nullptr
){
hipMalloc
(
&
exp_sums_ptr
,
1000000
0
);
// 1
0
m
hipMalloc
(
&
exp_sums_ptr
,
1000000
);
// 1m
hipMalloc
(
&
max_logits_ptr
,
1000000
0
);
// 1
0
m
hipMalloc
(
&
max_logits_ptr
,
1000000
);
// 1m
hipMalloc
(
&
tmp_out_ptr
,
4
00000000
);
//
4
00m
hipMalloc
(
&
tmp_out_ptr
,
1
00000000
);
//
1
00m
}
}
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
query
));
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
query
));
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
...
...
csrc/attention/attention_with_mask_kernels_opt_tc.cu
View file @
d074a953
...
@@ -959,9 +959,9 @@ void paged_attention_v2_launcher_opt_tc_with_mask(
...
@@ -959,9 +959,9 @@ void paged_attention_v2_launcher_opt_tc_with_mask(
static
float
*
max_logits_ptr
=
nullptr
;
static
float
*
max_logits_ptr
=
nullptr
;
static
T
*
tmp_out_ptr
=
nullptr
;
static
T
*
tmp_out_ptr
=
nullptr
;
if
(
exp_sums_ptr
==
nullptr
){
if
(
exp_sums_ptr
==
nullptr
){
hipMalloc
(
&
exp_sums_ptr
,
1000000
0
);
// 1
0
m
hipMalloc
(
&
exp_sums_ptr
,
1000000
);
// 1m
hipMalloc
(
&
max_logits_ptr
,
1000000
0
);
// 1
0
m
hipMalloc
(
&
max_logits_ptr
,
1000000
);
// 1m
hipMalloc
(
&
tmp_out_ptr
,
4
00000000
);
//
4
00m
hipMalloc
(
&
tmp_out_ptr
,
1
00000000
);
//
1
00m
}
}
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
query
));
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
query
));
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment