Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
32d4b669
Unverified
Commit
32d4b669
authored
Apr 23, 2025
by
Yong Hoon Shin
Committed by
GitHub
Apr 23, 2025
Browse files
[BugFix][V1] Fix int32 token index overflow when preparing input ids (#16806)
parent
3cde34a4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
2 deletions
+4
-2
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+2
-1
vllm/v1/worker/tpu_model_runner.py
vllm/v1/worker/tpu_model_runner.py
+2
-1
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
32d4b669
...
@@ -241,10 +241,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -241,10 +241,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
device
=
self
.
device
)
device
=
self
.
device
)
# OPTIMIZATION: Cache the tensors rather than creating them every step.
# OPTIMIZATION: Cache the tensors rather than creating them every step.
# Keep in int64 to avoid overflow with long context
self
.
arange_np
=
np
.
arange
(
max
(
self
.
max_num_reqs
+
1
,
self
.
arange_np
=
np
.
arange
(
max
(
self
.
max_num_reqs
+
1
,
self
.
max_model_len
,
self
.
max_model_len
,
self
.
max_num_tokens
),
self
.
max_num_tokens
),
dtype
=
np
.
int
32
)
dtype
=
np
.
int
64
)
# NOTE(woosuk): These tensors are "stateless", i.e., they are literally
# NOTE(woosuk): These tensors are "stateless", i.e., they are literally
# a faster version of creating a new tensor every time. Thus, we should
# a faster version of creating a new tensor every time. Thus, we should
# not make any assumptions about the values in these tensors.
# not make any assumptions about the values in these tensors.
...
...
vllm/v1/worker/tpu_model_runner.py
View file @
32d4b669
...
@@ -219,7 +219,8 @@ class TPUModelRunner:
...
@@ -219,7 +219,8 @@ class TPUModelRunner:
# Range tensor with values [0 .. self.max_num_tokens - 1].
# Range tensor with values [0 .. self.max_num_tokens - 1].
# Used to initialize positions / context_lens / seq_lens
# Used to initialize positions / context_lens / seq_lens
self
.
arange_np
=
np
.
arange
(
self
.
max_num_tokens
,
dtype
=
np
.
int32
)
# Keep in int64 to avoid overflow with long context
self
.
arange_np
=
np
.
arange
(
self
.
max_num_tokens
,
dtype
=
np
.
int64
)
self
.
num_reqs_paddings
=
_get_req_paddings
(
self
.
num_reqs_paddings
=
_get_req_paddings
(
min_req_size
=
MIN_NUM_SEQS
,
max_req_size
=
self
.
max_num_reqs
)
min_req_size
=
MIN_NUM_SEQS
,
max_req_size
=
self
.
max_num_reqs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment