Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
105b8ce4
"docs/vscode:/vscode.git/clone" did not exist on "11599b0e1ffdbe7f7e5f7d222dfbef69b41b3ad2"
Unverified
Commit
105b8ce4
authored
Feb 22, 2025
by
Jee Jee Li
Committed by
GitHub
Feb 22, 2025
Browse files
[Misc] Reduce LoRA-related static variable (#13166)
parent
2cb8c154
Changes
41
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
35 additions
and
241 deletions
+35
-241
vllm/model_executor/models/internlm2.py
vllm/model_executor/models/internlm2.py
+0
-10
vllm/model_executor/models/jamba.py
vllm/model_executor/models/jamba.py
+0
-4
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+0
-4
vllm/model_executor/models/minicpm.py
vllm/model_executor/models/minicpm.py
+0
-8
vllm/model_executor/models/minicpm3.py
vllm/model_executor/models/minicpm3.py
+0
-16
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+0
-42
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+0
-4
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+0
-20
vllm/model_executor/models/nemotron.py
vllm/model_executor/models/nemotron.py
+0
-3
vllm/model_executor/models/phi.py
vllm/model_executor/models/phi.py
+0
-11
vllm/model_executor/models/phimoe.py
vllm/model_executor/models/phimoe.py
+0
-10
vllm/model_executor/models/qwen.py
vllm/model_executor/models/qwen.py
+0
-9
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+0
-20
vllm/model_executor/models/qwen2_5_vl.py
vllm/model_executor/models/qwen2_5_vl.py
+0
-21
vllm/model_executor/models/qwen2_rm.py
vllm/model_executor/models/qwen2_rm.py
+0
-10
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+0
-18
vllm/model_executor/models/qwen_vl.py
vllm/model_executor/models/qwen_vl.py
+0
-15
vllm/model_executor/models/solar.py
vllm/model_executor/models/solar.py
+0
-8
vllm/model_executor/models/transformers.py
vllm/model_executor/models/transformers.py
+35
-0
vllm/model_executor/models/ultravox.py
vllm/model_executor/models/ultravox.py
+0
-8
No files found.
vllm/model_executor/models/internlm2.py
View file @
105b8ce4
...
@@ -329,16 +329,6 @@ class InternLM2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -329,16 +329,6 @@ class InternLM2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
"gate_up_proj"
:
[
"w1"
,
"w3"
],
"gate_up_proj"
:
[
"w1"
,
"w3"
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"wqkv"
,
"wo"
,
"gate_up_proj"
,
"w2"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
def
__init__
(
self
,
*
,
*
,
vllm_config
:
VllmConfig
,
vllm_config
:
VllmConfig
,
...
...
vllm/model_executor/models/jamba.py
View file @
105b8ce4
...
@@ -380,10 +380,6 @@ class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
...
@@ -380,10 +380,6 @@ class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"embed_tokens"
,
"lm_head"
,
"up_proj"
,
"down_proj"
,
"gate_proj"
,
"out_proj"
,
"in_proj"
,
"x_proj"
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
,
"lm_head"
:
"output_embeddings"
,
...
...
vllm/model_executor/models/llama.py
View file @
105b8ce4
...
@@ -452,10 +452,6 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -452,10 +452,6 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
"embed_tokens"
,
"lm_head"
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
"lm_head"
:
"output_embeddings"
...
...
vllm/model_executor/models/minicpm.py
View file @
105b8ce4
...
@@ -522,14 +522,6 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -522,14 +522,6 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
"embed_tokens"
,
"lm_head"
,
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
,
"lm_head"
:
"output_embeddings"
,
...
...
vllm/model_executor/models/minicpm3.py
View file @
105b8ce4
...
@@ -227,21 +227,5 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
...
@@ -227,21 +227,5 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"kv_a_proj_with_mqa"
,
"q_a_proj"
,
"q_b_proj"
,
"kv_b_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
"embed_tokens"
,
"lm_head"
,
]
# `embedding_modules` and `embedding_padding_modules`
# are inherited from MiniCPMForCausalLM
def
_init_model
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
_init_model
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
return
MiniCPM3Model
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
return
MiniCPM3Model
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
vllm/model_executor/models/minicpmv.py
View file @
105b8ce4
...
@@ -1228,23 +1228,6 @@ class MiniCPMV2_5(MiniCPMVBaseModel, SupportsLoRA):
...
@@ -1228,23 +1228,6 @@ class MiniCPMV2_5(MiniCPMVBaseModel, SupportsLoRA):
"up_proj"
,
"up_proj"
,
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
# vision encoder
"fc1"
,
"fc2"
,
"out_proj"
,
# language model
"qkv_proj"
,
# same name with vision encoder
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
# resampler
"kv_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
...
@@ -1338,23 +1321,6 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
...
@@ -1338,23 +1321,6 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
"up_proj"
,
"up_proj"
,
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
# vision encoder
"fc1"
,
"fc2"
,
"out_proj"
,
# language model
"qkv_proj"
,
# same name with vision encoder
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
# resampler
"kv_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
...
@@ -1460,13 +1426,6 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
...
@@ -1460,13 +1426,6 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
which is not conducive to the current integration logic of LoRA and
which is not conducive to the current integration logic of LoRA and
bitsandbytes in vLLM. Therefore, it is necessary to separate them.
bitsandbytes in vLLM. Therefore, it is necessary to separate them.
"""
"""
# Ensure that the LoRA support check passes when the class is not
# initialized, but set all these attributes to empty.
# These will be updated when an instance class is selected
packed_modules_mapping
=
{}
supported_lora_modules
=
[]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__new__
(
cls
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__new__
(
cls
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
@@ -1487,7 +1446,6 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
...
@@ -1487,7 +1446,6 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
# quant_config references base class members,
# quant_config references base class members,
# so update values before init is called
# so update values before init is called
cls
.
packed_modules_mapping
.
update
(
instance_cls
.
packed_modules_mapping
)
cls
.
packed_modules_mapping
.
update
(
instance_cls
.
packed_modules_mapping
)
cls
.
supported_lora_modules
+=
instance_cls
.
supported_lora_modules
cls
.
embedding_modules
.
update
(
instance_cls
.
embedding_modules
)
cls
.
embedding_modules
.
update
(
instance_cls
.
embedding_modules
)
cls
.
embedding_padding_modules
+=
instance_cls
.
embedding_padding_modules
cls
.
embedding_padding_modules
+=
instance_cls
.
embedding_padding_modules
return
instance_cls
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
return
instance_cls
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
vllm/model_executor/models/mixtral.py
View file @
105b8ce4
...
@@ -332,10 +332,6 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -332,10 +332,6 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"embed_tokens"
,
"lm_head"
,
"w1"
,
"w2"
,
"w3"
,
"gate"
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
,
"lm_head"
:
"output_embeddings"
,
...
...
vllm/model_executor/models/molmo.py
View file @
105b8ce4
...
@@ -1440,26 +1440,6 @@ class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA,
...
@@ -1440,26 +1440,6 @@ class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA,
"merged_linear"
:
[
"gate_proj"
,
"up_proj"
]
# image_projector
"merged_linear"
:
[
"gate_proj"
,
"up_proj"
]
# image_projector
}
}
# LoRA specific attributes
supported_lora_modules
=
[
# language model
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
# same name with image_projector
# vision tower
"wq"
,
"wk"
,
"wv"
,
"wo"
,
"w1"
,
"w2"
,
# image_projector
"merged_linear"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
...
vllm/model_executor/models/nemotron.py
View file @
105b8ce4
...
@@ -389,9 +389,6 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -389,9 +389,6 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"up_proj"
,
"down_proj"
,
"embed_tokens"
,
"lm_head"
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
,
"lm_head"
:
"output_embeddings"
,
...
...
vllm/model_executor/models/phi.py
View file @
105b8ce4
...
@@ -273,17 +273,6 @@ class PhiForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -273,17 +273,6 @@ class PhiForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
]
]
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"dense"
,
"fc1"
,
"fc2"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
...
vllm/model_executor/models/phimoe.py
View file @
105b8ce4
...
@@ -526,16 +526,6 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -526,16 +526,6 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"embed_tokens"
,
"lm_head"
,
"w1"
,
"w2"
,
"w3"
,
"gate"
,
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
,
"lm_head"
:
"output_embeddings"
,
...
...
vllm/model_executor/models/qwen.py
View file @
105b8ce4
...
@@ -354,15 +354,6 @@ class QWenLMHeadModel(QWenBaseModel, SupportsPP, SupportsLoRA):
...
@@ -354,15 +354,6 @@ class QWenLMHeadModel(QWenBaseModel, SupportsPP, SupportsLoRA):
"w1"
,
"w1"
,
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"c_attn"
,
"gate_up_proj"
,
"c_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
...
vllm/model_executor/models/qwen2.py
View file @
105b8ce4
...
@@ -430,16 +430,6 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -430,16 +430,6 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
@@ -528,16 +518,6 @@ class Qwen2EmbeddingModel(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -528,16 +518,6 @@ class Qwen2EmbeddingModel(nn.Module, SupportsLoRA, SupportsPP):
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
...
...
vllm/model_executor/models/qwen2_5_vl.py
View file @
105b8ce4
...
@@ -734,27 +734,6 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -734,27 +734,6 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal,
"up_proj"
,
"up_proj"
,
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
# language model
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
# Same name with vision encoder
# vision tower
"qkv"
,
"gate_proj"
,
"up_proj"
,
"attn.proj"
,
# Distinguish patch_embed.proj
"fc1"
,
"fc2"
,
# projector
"mlp.0"
,
"mlp.2"
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
# To ensure correct weight loading and mapping.
# To ensure correct weight loading and mapping.
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
...
...
vllm/model_executor/models/qwen2_rm.py
View file @
105b8ce4
...
@@ -47,16 +47,6 @@ class Qwen2RewardBaseModel(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -47,16 +47,6 @@ class Qwen2RewardBaseModel(nn.Module, SupportsLoRA, SupportsPP):
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
105b8ce4
...
@@ -1048,24 +1048,6 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1048,24 +1048,6 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
# vision tower
"qkv"
,
"attn.proj"
,
# Distinguish patch_embed.proj
"fc1"
,
"fc2"
,
# projector
"mlp.0"
,
"mlp.2"
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
# To ensure correct weight loading and mapping.
# To ensure correct weight loading and mapping.
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"lm_head."
:
"language_model.lm_head."
,
"lm_head."
:
"language_model.lm_head."
,
...
...
vllm/model_executor/models/qwen_vl.py
View file @
105b8ce4
...
@@ -667,21 +667,6 @@ class QwenVLForConditionalGeneration(QWenBaseModel, SupportsPP, SupportsLoRA,
...
@@ -667,21 +667,6 @@ class QwenVLForConditionalGeneration(QWenBaseModel, SupportsPP, SupportsLoRA,
"w1"
,
"w1"
,
],
],
}
}
# LoRA specific attributes
supported_lora_modules
=
[
"c_attn"
,
"gate_up_proj"
,
"c_proj"
,
# visual module
"out_proj"
,
"in_proj"
,
"c_fc"
,
# resampler
"kv_proj"
,
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
def
get_mm_mapping
(
self
)
->
MultiModelKeys
:
def
get_mm_mapping
(
self
)
->
MultiModelKeys
:
"""
"""
...
...
vllm/model_executor/models/solar.py
View file @
105b8ce4
...
@@ -386,14 +386,6 @@ class SolarForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -386,14 +386,6 @@ class SolarForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
}
# LoRA specific attributes
# LoRA specific attributes
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
,
"embed_tokens"
,
"lm_head"
,
]
embedding_modules
=
{
embedding_modules
=
{
"embed_tokens"
:
"input_embeddings"
,
"embed_tokens"
:
"input_embeddings"
,
"lm_head"
:
"output_embeddings"
,
"lm_head"
:
"output_embeddings"
,
...
...
vllm/model_executor/models/transformers.py
View file @
105b8ce4
...
@@ -27,6 +27,11 @@ from vllm.config import VllmConfig
...
@@ -27,6 +27,11 @@ from vllm.config import VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.utils
import
divide
from
vllm.distributed.utils
import
divide
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.fully_sharded_layers
import
(
ColumnParallelLinearWithShardedLoRA
,
RowParallelLinearWithShardedLoRA
)
from
vllm.lora.layers
import
(
ColumnParallelLinearWithLoRA
,
ReplicatedLinearWithLoRA
,
RowParallelLinearWithLoRA
)
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ReplicatedLinear
,
ReplicatedLinear
,
RowParallelLinear
)
RowParallelLinear
)
...
@@ -103,6 +108,23 @@ def replace_linear_class(
...
@@ -103,6 +108,23 @@ def replace_linear_class(
"rowwise"
:
RowParallelLinear
,
"rowwise"
:
RowParallelLinear
,
}.
get
(
style
,
ReplicatedLinear
)
}.
get
(
style
,
ReplicatedLinear
)
lora_linear_cls
=
{
ColumnParallelLinear
:
{
True
:
ColumnParallelLinearWithShardedLoRA
,
# fully sharded
False
:
ColumnParallelLinearWithLoRA
# not fully sharded
},
RowParallelLinear
:
{
True
:
RowParallelLinearWithShardedLoRA
,
False
:
RowParallelLinearWithLoRA
},
# ReplicatedLinear doesn't support fully sharded LoRA yet,
# so we use the same class for both cases.
ReplicatedLinear
:
{
True
:
ReplicatedLinearWithLoRA
,
False
:
ReplicatedLinearWithLoRA
}
}
class
HFCompatibleLinear
(
vllm_linear_cls
):
class
HFCompatibleLinear
(
vllm_linear_cls
):
"""
"""
Wrapper class that removes `output_bias` from returned output.
Wrapper class that removes `output_bias` from returned output.
...
@@ -111,6 +133,19 @@ def replace_linear_class(
...
@@ -111,6 +133,19 @@ def replace_linear_class(
def
forward
(
self
,
input
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
input
:
torch
.
Tensor
)
->
torch
.
Tensor
:
return
super
().
forward
(
input
)[
0
]
return
super
().
forward
(
input
)[
0
]
@
classmethod
def
get_lora_class
(
cls
,
fully_sharded
:
bool
=
False
):
"""
Get the LoRA class corresponding to the current transformer
linear class.
Args:
fully_sharded (bool): If True, select the LoRA class variant
that supports fully sharded LoRA. Defaults to False.
"""
return
lora_linear_cls
[
vllm_linear_cls
][
fully_sharded
]
return
HFCompatibleLinear
(
return
HFCompatibleLinear
(
input_size
=
linear
.
in_features
,
input_size
=
linear
.
in_features
,
output_size
=
linear
.
out_features
,
output_size
=
linear
.
out_features
,
...
...
vllm/model_executor/models/ultravox.py
View file @
105b8ce4
...
@@ -360,14 +360,6 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
...
@@ -360,14 +360,6 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
"gate_up_proj"
:
[
"gate_proj"
,
"up_proj"
]
"gate_up_proj"
:
[
"gate_proj"
,
"up_proj"
]
}
}
# LoRA specific attributes
# TODO : Add LoRA to the audio tower and projector.
supported_lora_modules
=
[
"qkv_proj"
,
"o_proj"
,
"gate_up_proj"
,
"down_proj"
]
embedding_modules
=
{}
embedding_padding_modules
=
[]
hf_to_vllm_mapper
=
WeightsMapper
(
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"audio_tower.model.encoder."
:
"audio_tower."
})
orig_to_new_prefix
=
{
"audio_tower.model.encoder."
:
"audio_tower."
})
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment