Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
73202dbe
Unverified
Commit
73202dbe
authored
Sep 11, 2024
by
bnellnm
Committed by
GitHub
Sep 11, 2024
Browse files
[Kernel][Misc] register ops to prevent graph breaks (#6917)
Co-authored-by:
Sage Moore
<
sage@neuralmagic.com
>
parent
7015417f
Changes
22
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
3 deletions
+9
-3
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+8
-3
vllm/worker/worker.py
vllm/worker/worker.py
+1
-0
No files found.
vllm/worker/model_runner.py
View file @
73202dbe
...
@@ -75,6 +75,10 @@ _NUM_WARMUP_ITERS = 2
...
@@ -75,6 +75,10 @@ _NUM_WARMUP_ITERS = 2
TModelInputForGPU
=
TypeVar
(
'TModelInputForGPU'
,
bound
=
"ModelInputForGPU"
)
TModelInputForGPU
=
TypeVar
(
'TModelInputForGPU'
,
bound
=
"ModelInputForGPU"
)
# For now, bump up cache limits for recompilations during CUDA graph warmups.
torch
.
_dynamo
.
config
.
cache_size_limit
=
128
torch
.
_dynamo
.
config
.
accumulated_cache_size_limit
=
128
@
dataclass
(
frozen
=
True
)
@
dataclass
(
frozen
=
True
)
class
ModelInputForGPU
(
ModelRunnerInputBase
):
class
ModelInputForGPU
(
ModelRunnerInputBase
):
...
@@ -1060,8 +1064,9 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
...
@@ -1060,8 +1064,9 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
"This may lead to less accurate results!"
)
"This may lead to less accurate results!"
)
if
envs
.
VLLM_TEST_DYNAMO_GRAPH_CAPTURE
and
supports_dynamo
():
if
envs
.
VLLM_TEST_DYNAMO_GRAPH_CAPTURE
and
supports_dynamo
():
self
.
model
=
torch
.
compile
(
self
.
model
,
self
.
model
=
torch
.
compile
(
fullgraph
=
True
,
self
.
model
,
fullgraph
=
envs
.
VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE
,
backend
=
"eager"
)
backend
=
"eager"
)
def
save_sharded_state
(
def
save_sharded_state
(
...
...
vllm/worker/worker.py
View file @
73202dbe
...
@@ -166,6 +166,7 @@ class Worker(LocalOrDistributedWorkerBase):
...
@@ -166,6 +166,7 @@ class Worker(LocalOrDistributedWorkerBase):
torch
.
cuda
.
set_device
(
self
.
device
)
torch
.
cuda
.
set_device
(
self
.
device
)
_check_if_gpu_supports_dtype
(
self
.
model_config
.
dtype
)
_check_if_gpu_supports_dtype
(
self
.
model_config
.
dtype
)
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
empty_cache
()
self
.
init_gpu_memory
=
torch
.
cuda
.
mem_get_info
()[
0
]
self
.
init_gpu_memory
=
torch
.
cuda
.
mem_get_info
()[
0
]
else
:
else
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment