Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
802f306c
Unverified
Commit
802f306c
authored
Mar 12, 2026
by
Sage
Committed by
GitHub
Mar 12, 2026
Browse files
[Tests] Skip model weight download for render-only test server (#36813)
Signed-off-by:
Sage Ahrac
<
sagiahrak@gmail.com
>
parent
894843eb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
9 deletions
+25
-9
tests/utils.py
tests/utils.py
+25
-9
No files found.
tests/utils.py
View file @
802f306c
...
@@ -144,6 +144,17 @@ class RemoteVLLMServer:
...
@@ -144,6 +144,17 @@ class RemoteVLLMServer:
"""Subclasses override this method to customize server process launch"""
"""Subclasses override this method to customize server process launch"""
raise
NotImplementedError
raise
NotImplementedError
def
_pre_download_model
(
self
,
model
:
str
,
args
)
->
None
:
"""Download model weights before starting the server to avoid timeout."""
is_local
=
os
.
path
.
isdir
(
model
)
if
not
is_local
:
engine_args
=
AsyncEngineArgs
.
from_cli_args
(
args
)
model_config
=
engine_args
.
create_model_config
()
load_config
=
engine_args
.
create_load_config
()
model_loader
=
get_model_loader
(
load_config
)
model_loader
.
download_model
(
model_config
)
def
__init__
(
def
__init__
(
self
,
self
,
model
:
str
,
model
:
str
,
...
@@ -195,15 +206,7 @@ class RemoteVLLMServer:
...
@@ -195,15 +206,7 @@ class RemoteVLLMServer:
getattr
(
args
,
"show_hidden_metrics_for_version"
,
None
)
is
not
None
getattr
(
args
,
"show_hidden_metrics_for_version"
,
None
)
is
not
None
)
)
# download the model before starting the server to avoid timeout
self
.
_pre_download_model
(
model
,
args
)
is_local
=
os
.
path
.
isdir
(
model
)
if
not
is_local
:
engine_args
=
AsyncEngineArgs
.
from_cli_args
(
args
)
model_config
=
engine_args
.
create_model_config
()
load_config
=
engine_args
.
create_load_config
()
model_loader
=
get_model_loader
(
load_config
)
model_loader
.
download_model
(
model_config
)
# Record GPU memory before server start so we know what
# Record GPU memory before server start so we know what
# "released" looks like.
# "released" looks like.
...
@@ -515,6 +518,19 @@ class RemoteLaunchRenderServer(RemoteVLLMServer):
...
@@ -515,6 +518,19 @@ class RemoteLaunchRenderServer(RemoteVLLMServer):
start_new_session
=
True
,
start_new_session
=
True
,
)
)
def
_pre_download_model
(
self
,
model
:
str
,
args
)
->
None
:
"""Download only the tokenizer files (no model weights needed)."""
is_local
=
os
.
path
.
isdir
(
model
)
if
not
is_local
:
engine_args
=
AsyncEngineArgs
.
from_cli_args
(
args
)
model_config
=
engine_args
.
create_model_config
()
get_tokenizer
(
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
trust_remote_code
=
model_config
.
trust_remote_code
,
revision
=
model_config
.
tokenizer_revision
,
)
def
_wait_for_gpu_memory_release
(
self
,
timeout
:
float
=
30.0
):
def
_wait_for_gpu_memory_release
(
self
,
timeout
:
float
=
30.0
):
pass
# No GPU used
pass
# No GPU used
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment