Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b854321f
Unverified
Commit
b854321f
authored
Jul 10, 2025
by
Simon Mo
Committed by
GitHub
Jul 10, 2025
Browse files
[Docs] Lazy import gguf (#20785)
Signed-off-by:
simon-mo
<
simon.mo@hey.com
>
parent
5b6fe23d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
2 deletions
+10
-2
vllm/entrypoints/score_utils.py
vllm/entrypoints/score_utils.py
+5
-1
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+5
-1
No files found.
vllm/entrypoints/score_utils.py
View file @
b854321f
...
@@ -11,7 +11,6 @@ from vllm.entrypoints.chat_utils import (
...
@@ -11,7 +11,6 @@ from vllm.entrypoints.chat_utils import (
ChatCompletionContentPartImageParam
,
ChatCompletionContentPartTextParam
,
ChatCompletionContentPartImageParam
,
ChatCompletionContentPartTextParam
,
MultiModalItemTracker
,
_ContentPart
,
_parse_chat_message_content_part
)
MultiModalItemTracker
,
_ContentPart
,
_parse_chat_message_content_part
)
from
vllm.inputs
import
TokensPrompt
from
vllm.inputs
import
TokensPrompt
from
vllm.model_executor.model_loader
import
get_model_cls
from
vllm.model_executor.models.interfaces
import
supports_score_template
from
vllm.model_executor.models.interfaces
import
supports_score_template
from
vllm.multimodal.inputs
import
MultiModalDataDict
from
vllm.multimodal.inputs
import
MultiModalDataDict
from
vllm.outputs
import
PoolingRequestOutput
from
vllm.outputs
import
PoolingRequestOutput
...
@@ -140,6 +139,8 @@ def apply_score_template(
...
@@ -140,6 +139,8 @@ def apply_score_template(
prompt_1
:
str
,
prompt_1
:
str
,
prompt_2
:
str
,
prompt_2
:
str
,
)
->
str
:
)
->
str
:
# NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
from
vllm.model_executor.model_loader
import
get_model_cls
model
=
get_model_cls
(
model_config
)
model
=
get_model_cls
(
model_config
)
if
supports_score_template
(
model
):
if
supports_score_template
(
model
):
...
@@ -162,6 +163,9 @@ def post_process_tokens(
...
@@ -162,6 +163,9 @@ def post_process_tokens(
Note:
Note:
This is an in-place operation.
This is an in-place operation.
"""
"""
# NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
from
vllm.model_executor.model_loader
import
get_model_cls
model
=
get_model_cls
(
model_config
)
model
=
get_model_cls
(
model_config
)
if
supports_score_template
(
model
):
if
supports_score_template
(
model
):
model
.
post_process_tokens
(
prompt
)
model
.
post_process_tokens
(
prompt
)
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
b854321f
...
@@ -14,7 +14,6 @@ from pathlib import Path
...
@@ -14,7 +14,6 @@ from pathlib import Path
from
typing
import
Any
,
Callable
,
Optional
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
Union
import
filelock
import
filelock
import
gguf
import
huggingface_hub.constants
import
huggingface_hub.constants
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
...
@@ -40,6 +39,11 @@ except (ImportError, OSError):
...
@@ -40,6 +39,11 @@ except (ImportError, OSError):
SafetensorsStreamer
=
runai_model_streamer
.
placeholder_attr
(
SafetensorsStreamer
=
runai_model_streamer
.
placeholder_attr
(
"SafetensorsStreamer"
)
"SafetensorsStreamer"
)
try
:
import
gguf
except
ImportError
:
gguf
=
PlaceholderModule
(
"gguf"
)
try
:
try
:
from
fastsafetensors
import
SafeTensorsFileLoader
,
SingleGroup
from
fastsafetensors
import
SafeTensorsFileLoader
,
SingleGroup
except
ImportError
:
except
ImportError
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment