Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9f48b238
Commit
9f48b238
authored
May 26, 2025
by
王敏
Browse files
[fix]去掉merge-lora参数
parents
e14b43ff
19470842
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
4 deletions
+6
-4
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+6
-4
No files found.
vllm/engine/arg_utils.py
View file @
9f48b238
...
@@ -420,7 +420,7 @@ class EngineArgs:
...
@@ -420,7 +420,7 @@ class EngineArgs:
'--tokenizer-mode'
,
'--tokenizer-mode'
,
type
=
str
,
type
=
str
,
default
=
EngineArgs
.
tokenizer_mode
,
default
=
EngineArgs
.
tokenizer_mode
,
choices
=
[
'auto'
,
'slow'
,
'mistral'
,
'custom'
],
choices
=
[
'auto'
,
'cpm'
,
'slow'
,
'mistral'
,
'custom'
],
help
=
'The tokenizer mode.
\n\n
* "auto" will use the '
help
=
'The tokenizer mode.
\n\n
* "auto" will use the '
'fast tokenizer if available.
\n
* "slow" will '
'fast tokenizer if available.
\n
* "slow" will '
'always use the slow tokenizer.
\n
* '
'always use the slow tokenizer.
\n
* '
...
@@ -704,9 +704,6 @@ class EngineArgs:
...
@@ -704,9 +704,6 @@ class EngineArgs:
lora_group
.
add_argument
(
'--max-loras'
,
**
lora_kwargs
[
"max_loras"
])
lora_group
.
add_argument
(
'--max-loras'
,
**
lora_kwargs
[
"max_loras"
])
lora_group
.
add_argument
(
'--max-lora-rank'
,
lora_group
.
add_argument
(
'--max-lora-rank'
,
**
lora_kwargs
[
"max_lora_rank"
])
**
lora_kwargs
[
"max_lora_rank"
])
lora_group
.
add_argument
(
'--merge-lora'
,
action
=
argparse
.
BooleanOptionalAction
,
help
=
'If set to True, the weights of the base layer will be merged with the weights of Lora.'
)
lora_group
.
add_argument
(
'--lora-target-modules'
,
lora_group
.
add_argument
(
'--lora-target-modules'
,
**
lora_kwargs
[
"lora_target_modules"
])
**
lora_kwargs
[
"lora_target_modules"
])
lora_group
.
add_argument
(
'--lora-extra-vocab-size'
,
lora_group
.
add_argument
(
'--lora-extra-vocab-size'
,
...
@@ -1381,6 +1378,11 @@ class EngineArgs:
...
@@ -1381,6 +1378,11 @@ class EngineArgs:
from
vllm.attention.utils.fa_utils
import
(
from
vllm.attention.utils.fa_utils
import
(
flash_attn_supports_fp8
)
flash_attn_supports_fp8
)
supported
=
flash_attn_supports_fp8
()
supported
=
flash_attn_supports_fp8
()
int8_attention
=
self
.
kv_cache_dtype
.
startswith
(
"int8"
)
if
int8_attention
:
supported
=
True
if
not
supported
:
if
not
supported
:
_raise_or_fallback
(
feature_name
=
"--kv-cache-dtype"
,
_raise_or_fallback
(
feature_name
=
"--kv-cache-dtype"
,
recommend_to_remove
=
False
)
recommend_to_remove
=
False
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment