Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4ca65a97
Unverified
Commit
4ca65a97
authored
Sep 02, 2024
by
Isotr0py
Committed by
GitHub
Sep 02, 2024
Browse files
[Core][Bugfix] Accept GGUF model without .gguf extension (#8056)
parent
e2b2aa5a
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
5 deletions
+23
-5
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+2
-1
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+3
-2
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+2
-2
vllm/transformers_utils/utils.py
vllm/transformers_utils/utils.py
+16
-0
No files found.
vllm/engine/arg_utils.py
View file @
4ca65a97
...
...
@@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
from
vllm.executor.executor_base
import
ExecutorBase
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.utils
import
FlexibleArgumentParser
if
TYPE_CHECKING
:
...
...
@@ -753,7 +754,7 @@ class EngineArgs:
def
create_engine_config
(
self
)
->
EngineConfig
:
# gguf file needs a specific model loader and doesn't use hf_repo
if
self
.
model
.
endswith
(
".gguf"
):
if
check_gguf_file
(
self
.
model
):
self
.
quantization
=
self
.
load_format
=
"gguf"
# bitsandbytes quantization needs a specific model loader
...
...
vllm/transformers_utils/config.py
View file @
4ca65a97
...
...
@@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
MedusaConfig
,
MLPSpeculatorConfig
,
MPTConfig
,
NemotronConfig
,
RWConfig
,
UltravoxConfig
)
from
vllm.transformers_utils.utils
import
check_gguf_file
if
VLLM_USE_MODELSCOPE
:
from
modelscope
import
AutoConfig
...
...
@@ -56,7 +57,7 @@ def get_config(
)
->
PretrainedConfig
:
# Separate model folder from file path for GGUF models
is_gguf
=
Path
(
model
).
is_file
()
and
Path
(
model
).
suffix
==
".gguf"
is_gguf
=
check_gguf_file
(
model
)
if
is_gguf
:
kwargs
[
"gguf_file"
]
=
Path
(
model
).
name
model
=
Path
(
model
).
parent
...
...
@@ -112,7 +113,7 @@ def get_hf_image_processor_config(
if
VLLM_USE_MODELSCOPE
:
return
dict
()
# Separate model folder from file path for GGUF models
if
Path
(
model
).
is_file
()
and
Path
(
model
).
suffix
==
".gguf"
:
if
check_gguf_file
(
model
)
:
model
=
Path
(
model
).
parent
return
get_image_processor_config
(
model
,
revision
=
revision
,
**
kwargs
)
...
...
vllm/transformers_utils/tokenizer.py
View file @
4ca65a97
...
...
@@ -12,6 +12,7 @@ from vllm.logger import init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.tokenizers
import
(
BaichuanTokenizer
,
MistralTokenizer
)
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.utils
import
make_async
logger
=
init_logger
(
__name__
)
...
...
@@ -96,8 +97,7 @@ def get_tokenizer(
kwargs
[
"truncation_side"
]
=
"left"
# Separate model folder from file path for GGUF models
is_gguf
=
Path
(
tokenizer_name
).
is_file
()
and
Path
(
tokenizer_name
).
suffix
==
".gguf"
is_gguf
=
check_gguf_file
(
tokenizer_name
)
if
is_gguf
:
kwargs
[
"gguf_file"
]
=
Path
(
tokenizer_name
).
name
tokenizer_name
=
Path
(
tokenizer_name
).
parent
...
...
vllm/transformers_utils/utils.py
0 → 100644
View file @
4ca65a97
from
os
import
PathLike
from
pathlib
import
Path
from
typing
import
Union
def
check_gguf_file
(
model
:
Union
[
str
,
PathLike
])
->
bool
:
"""Check if the file is a GGUF model."""
model
=
Path
(
model
)
if
not
model
.
is_file
():
return
False
elif
model
.
suffix
==
".gguf"
:
return
True
with
open
(
model
,
"rb"
)
as
f
:
header
=
f
.
read
(
4
)
return
header
==
b
"GGUF"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment