Commit 9f48b238 authored by 王敏's avatar 王敏
Browse files

[fix]去掉merge-lora参数

parents e14b43ff 19470842
......@@ -420,7 +420,7 @@ class EngineArgs:
'--tokenizer-mode',
type=str,
default=EngineArgs.tokenizer_mode,
choices=['auto', 'slow', 'mistral', 'custom'],
choices=['auto', 'cpm', 'slow', 'mistral', 'custom'],
help='The tokenizer mode.\n\n* "auto" will use the '
'fast tokenizer if available.\n* "slow" will '
'always use the slow tokenizer. \n* '
......@@ -704,9 +704,6 @@ class EngineArgs:
lora_group.add_argument('--max-loras', **lora_kwargs["max_loras"])
lora_group.add_argument('--max-lora-rank',
**lora_kwargs["max_lora_rank"])
lora_group.add_argument('--merge-lora',
action=argparse.BooleanOptionalAction,
help='If set to True, the weights of the base layer will be merged with the weights of Lora.')
lora_group.add_argument('--lora-target-modules',
**lora_kwargs["lora_target_modules"])
lora_group.add_argument('--lora-extra-vocab-size',
......@@ -1381,6 +1378,11 @@ class EngineArgs:
from vllm.attention.utils.fa_utils import (
flash_attn_supports_fp8)
supported = flash_attn_supports_fp8()
int8_attention = self.kv_cache_dtype.startswith("int8")
if int8_attention:
supported = True
if not supported:
_raise_or_fallback(feature_name="--kv-cache-dtype",
recommend_to_remove=False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment