Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
38217877
Unverified
Commit
38217877
authored
Aug 20, 2025
by
rongfu.leng
Committed by
GitHub
Aug 20, 2025
Browse files
[Fix] fix offline env use local mode path (#22526)
Signed-off-by:
rongfu.leng
<
rongfu.leng@daocloud.io
>
parent
c6d80a7a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
3 deletions
+65
-3
tests/entrypoints/offline_mode/test_offline_mode.py
tests/entrypoints/offline_mode/test_offline_mode.py
+35
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+9
-1
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+21
-2
No files found.
tests/entrypoints/offline_mode/test_offline_mode.py
View file @
38217877
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests for HF_HUB_OFFLINE mode"""
"""Tests for HF_HUB_OFFLINE mode"""
import
dataclasses
import
importlib
import
importlib
import
sys
import
sys
...
@@ -9,6 +10,7 @@ import urllib3
...
@@ -9,6 +10,7 @@ import urllib3
from
vllm
import
LLM
from
vllm
import
LLM
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.engine.arg_utils
import
EngineArgs
MODEL_CONFIGS
=
[
MODEL_CONFIGS
=
[
{
{
...
@@ -108,3 +110,36 @@ def _re_import_modules():
...
@@ -108,3 +110,36 @@ def _re_import_modules():
# Error this test if reloading a module failed
# Error this test if reloading a module failed
if
reload_exception
is
not
None
:
if
reload_exception
is
not
None
:
raise
reload_exception
raise
reload_exception
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
usefixtures
(
"cache_models"
)
def
test_model_from_huggingface_offline
(
monkeypatch
:
pytest
.
MonkeyPatch
):
# Set HF to offline mode and ensure we can still construct an LLM
with
monkeypatch
.
context
()
as
m
:
try
:
m
.
setenv
(
"HF_HUB_OFFLINE"
,
"1"
)
m
.
setenv
(
"VLLM_NO_USAGE_STATS"
,
"1"
)
def
disable_connect
(
*
args
,
**
kwargs
):
raise
RuntimeError
(
"No http calls allowed"
)
m
.
setattr
(
urllib3
.
connection
.
HTTPConnection
,
"connect"
,
disable_connect
,
)
m
.
setattr
(
urllib3
.
connection
.
HTTPSConnection
,
"connect"
,
disable_connect
,
)
# Need to re-import huggingface_hub
# and friends to setup offline mode
_re_import_modules
()
engine_args
=
EngineArgs
(
model
=
"facebook/opt-125m"
)
LLM
(
**
dataclasses
.
asdict
(
engine_args
))
finally
:
# Reset the environment after the test
# NB: Assuming tests are run in online mode
_re_import_modules
()
vllm/engine/arg_utils.py
View file @
38217877
...
@@ -15,6 +15,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
...
@@ -15,6 +15,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
Literal
,
Optional
,
Type
,
TypeVar
,
Union
,
cast
,
get_args
,
Literal
,
Optional
,
Type
,
TypeVar
,
Union
,
cast
,
get_args
,
get_origin
)
get_origin
)
import
huggingface_hub
import
regex
as
re
import
regex
as
re
import
torch
import
torch
from
pydantic
import
TypeAdapter
,
ValidationError
from
pydantic
import
TypeAdapter
,
ValidationError
...
@@ -39,7 +40,7 @@ from vllm.plugins import load_general_plugins
...
@@ -39,7 +40,7 @@ from vllm.plugins import load_general_plugins
from
vllm.ray.lazy_utils
import
is_ray_initialized
from
vllm.ray.lazy_utils
import
is_ray_initialized
from
vllm.reasoning
import
ReasoningParserManager
from
vllm.reasoning
import
ReasoningParserManager
from
vllm.test_utils
import
MODEL_WEIGHTS_S3_BUCKET
,
MODELS_ON_S3
from
vllm.test_utils
import
MODEL_WEIGHTS_S3_BUCKET
,
MODELS_ON_S3
from
vllm.transformers_utils.config
import
is_interleaved
from
vllm.transformers_utils.config
import
get_model_path
,
is_interleaved
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.utils
import
(
STR_DUAL_CHUNK_FLASH_ATTN_VAL
,
FlexibleArgumentParser
,
from
vllm.utils
import
(
STR_DUAL_CHUNK_FLASH_ATTN_VAL
,
FlexibleArgumentParser
,
GiB_bytes
,
get_ip
,
is_in_ray_actor
)
GiB_bytes
,
get_ip
,
is_in_ray_actor
)
...
@@ -457,6 +458,13 @@ class EngineArgs:
...
@@ -457,6 +458,13 @@ class EngineArgs:
# Setup plugins
# Setup plugins
from
vllm.plugins
import
load_general_plugins
from
vllm.plugins
import
load_general_plugins
load_general_plugins
()
load_general_plugins
()
# when use hf offline,replace model id to local model path
if
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
:
model_id
=
self
.
model
self
.
model
=
get_model_path
(
self
.
model
,
self
.
revision
)
logger
.
info
(
"HF_HUB_OFFLINE is True, replace model_id [%s] "
\
"to model_path [%s]"
,
model_id
,
self
.
model
)
@
staticmethod
@
staticmethod
def
add_cli_args
(
parser
:
FlexibleArgumentParser
)
->
FlexibleArgumentParser
:
def
add_cli_args
(
parser
:
FlexibleArgumentParser
)
->
FlexibleArgumentParser
:
...
...
vllm/transformers_utils/config.py
View file @
38217877
...
@@ -14,7 +14,7 @@ from huggingface_hub import get_safetensors_metadata, hf_hub_download
...
@@ -14,7 +14,7 @@ from huggingface_hub import get_safetensors_metadata, hf_hub_download
from
huggingface_hub
import
list_repo_files
as
hf_list_repo_files
from
huggingface_hub
import
list_repo_files
as
hf_list_repo_files
from
huggingface_hub
import
try_to_load_from_cache
from
huggingface_hub
import
try_to_load_from_cache
from
huggingface_hub.utils
import
(
EntryNotFoundError
,
HfHubHTTPError
,
from
huggingface_hub.utils
import
(
EntryNotFoundError
,
HfHubHTTPError
,
HFValidationError
,
LocalEntryNotFoundError
,
LocalEntryNotFoundError
,
RepositoryNotFoundError
,
RepositoryNotFoundError
,
RevisionNotFoundError
)
RevisionNotFoundError
)
from
transformers
import
GenerationConfig
,
PretrainedConfig
from
transformers
import
GenerationConfig
,
PretrainedConfig
...
@@ -335,6 +335,7 @@ def maybe_override_with_speculators_target_model(
...
@@ -335,6 +335,7 @@ def maybe_override_with_speculators_target_model(
gguf_model_repo
=
Path
(
model
).
parent
gguf_model_repo
=
Path
(
model
).
parent
else
:
else
:
gguf_model_repo
=
None
gguf_model_repo
=
None
kwargs
[
"local_files_only"
]
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
config_dict
,
_
=
PretrainedConfig
.
get_config_dict
(
config_dict
,
_
=
PretrainedConfig
.
get_config_dict
(
model
if
gguf_model_repo
is
None
else
gguf_model_repo
,
model
if
gguf_model_repo
is
None
else
gguf_model_repo
,
revision
=
revision
,
revision
=
revision
,
...
@@ -400,6 +401,7 @@ def get_config(
...
@@ -400,6 +401,7 @@ def get_config(
raise
ValueError
(
error_message
)
from
e
raise
ValueError
(
error_message
)
from
e
if
config_format
==
ConfigFormat
.
HF
:
if
config_format
==
ConfigFormat
.
HF
:
kwargs
[
"local_files_only"
]
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
config_dict
,
_
=
PretrainedConfig
.
get_config_dict
(
config_dict
,
_
=
PretrainedConfig
.
get_config_dict
(
model
,
model
,
revision
=
revision
,
revision
=
revision
,
...
@@ -532,7 +534,7 @@ def try_get_local_file(model: Union[str, Path],
...
@@ -532,7 +534,7 @@ def try_get_local_file(model: Union[str, Path],
revision
=
revision
)
revision
=
revision
)
if
isinstance
(
cached_filepath
,
str
):
if
isinstance
(
cached_filepath
,
str
):
return
Path
(
cached_filepath
)
return
Path
(
cached_filepath
)
except
HF
Val
idation
Error
:
except
Val
ue
Error
:
...
...
return
None
return
None
...
@@ -908,3 +910,20 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
...
@@ -908,3 +910,20 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
exc_info
=
e
)
exc_info
=
e
)
return
max_position_embeddings
return
max_position_embeddings
def
get_model_path
(
model
:
Union
[
str
,
Path
],
revision
:
Optional
[
str
]
=
None
):
if
os
.
path
.
exists
(
model
):
return
model
assert
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
common_kwargs
=
{
"local_files_only"
:
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
"revision"
:
revision
,
}
if
envs
.
VLLM_USE_MODELSCOPE
:
from
modelscope.hub.snapshot_download
import
snapshot_download
return
snapshot_download
(
model_id
=
model
,
**
common_kwargs
)
from
huggingface_hub
import
snapshot_download
return
snapshot_download
(
repo_id
=
model
,
**
common_kwargs
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment