Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
951fdd66
"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "7ef40bb9832f4a8fca9f9924a35ae77a69ee7076"
Unverified
Commit
951fdd66
authored
Aug 14, 2024
by
Woosuk Kwon
Committed by
GitHub
Aug 14, 2024
Browse files
[TPU] Set per-rank XLA cache (#7533)
parent
2ecf7b17
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
6 deletions
+6
-6
vllm/worker/tpu_worker.py
vllm/worker/tpu_worker.py
+6
-6
No files found.
vllm/worker/tpu_worker.py
View file @
951fdd66
...
@@ -102,12 +102,12 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
...
@@ -102,12 +102,12 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
# 30-40 graphs for decode. 128 is an arbitrary safe number.
# 30-40 graphs for decode. 128 is an arbitrary safe number.
torch
.
_dynamo
.
config
.
cache_size_limit
=
128
torch
.
_dynamo
.
config
.
cache_size_limit
=
128
# Use persistent cache to avoid XLA recompilation.
# Use persistent cache to avoid XLA recompilation.
# NOTE(woosuk):
This does not completely eliminate the recompilation
# NOTE(woosuk):
Set per-rank cache path since different ranks
#
overhead because dynamo does not cache the compiled result
s.
#
can have slightly different XLA graph
s.
# NOTE(woosuk): Set readonly=False only for the rank 0 process to avoid
world_size
=
self
.
parallel_config
.
world_size
# race conditions.
per_rank_path
=
os
.
path
.
join
(
envs
.
VLLM_XLA_CACHE_PATH
,
xr
.
initialize_cache
(
envs
.
VLLM_XLA_CACHE_PATH
,
f
"tp
{
world_size
}
_rank
{
self
.
rank
}
"
)
readonly
=
not
self
.
is_driver_worker
)
xr
.
initialize_cache
(
per_rank_path
,
readonly
=
False
)
def
load_model
(
self
):
def
load_model
(
self
):
self
.
model_runner
.
load_model
()
self
.
model_runner
.
load_model
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment