Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
72c5b972
Unverified
Commit
72c5b972
authored
Apr 28, 2025
by
idouba
Committed by
GitHub
Apr 28, 2025
Browse files
Update tpu_worker.py 's typo (#17288)
parent
fa93cd9f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
2 deletions
+2
-2
vllm/worker/tpu_worker.py
vllm/worker/tpu_worker.py
+2
-2
No files found.
vllm/worker/tpu_worker.py
View file @
72c5b972
...
@@ -163,8 +163,8 @@ class TPUWorker(LoRANotSupportedWorkerBase, LocalOrDistributedWorkerBase):
...
@@ -163,8 +163,8 @@ class TPUWorker(LoRANotSupportedWorkerBase, LocalOrDistributedWorkerBase):
usable_memory_size
=
int
(
total_memory_size
*
usable_memory_size
=
int
(
total_memory_size
*
self
.
cache_config
.
gpu_memory_utilization
)
self
.
cache_config
.
gpu_memory_utilization
)
tpu_kv_cache_bytes
=
max
(
usable_memory_size
-
profiled
,
0
)
tpu_kv_cache_bytes
=
max
(
usable_memory_size
-
profiled
,
0
)
dtype_b
t
yes
=
get_dtype_size
(
self
.
cache_dtype
)
dtype_by
t
es
=
get_dtype_size
(
self
.
cache_dtype
)
block_size_bytes
=
(
dtype_b
t
yes
*
self
.
cache_config
.
block_size
*
block_size_bytes
=
(
dtype_by
t
es
*
self
.
cache_config
.
block_size
*
num_layers
*
2
*
head_size
*
num_kv_heads
)
num_layers
*
2
*
head_size
*
num_kv_heads
)
num_tpu_blocks
=
tpu_kv_cache_bytes
//
block_size_bytes
num_tpu_blocks
=
tpu_kv_cache_bytes
//
block_size_bytes
num_tpu_blocks
=
(
num_tpu_blocks
//
8
)
*
8
# Round down to 8.
num_tpu_blocks
=
(
num_tpu_blocks
//
8
)
*
8
# Round down to 8.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment