Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
196c34b0
Unverified
Commit
196c34b0
authored
Dec 24, 2024
by
Jee Jee Li
Committed by
GitHub
Dec 24, 2024
Browse files
[Misc] Move weights mapper (#11443)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
5c796324
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
74 additions
and
68 deletions
+74
-68
tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
...dd_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
+3
-2
vllm/model_executor/models/aria.py
vllm/model_executor/models/aria.py
+10
-10
vllm/model_executor/models/bert.py
vllm/model_executor/models/bert.py
+2
-2
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+30
-28
vllm/model_executor/models/phi3v.py
vllm/model_executor/models/phi3v.py
+8
-8
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+3
-2
vllm/model_executor/models/telechat2.py
vllm/model_executor/models/telechat2.py
+14
-13
vllm/model_executor/models/ultravox.py
vllm/model_executor/models/ultravox.py
+4
-3
No files found.
tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
View file @
196c34b0
...
...
@@ -13,6 +13,7 @@ from vllm.sequence import IntermediateTensors, PoolerOutput
class
MyGemma2Embedding
(
nn
.
Module
):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
...
...
@@ -62,8 +63,8 @@ class MyGemma2Embedding(nn.Module):
return
self
.
_pooler
(
hidden_states
,
pooling_metadata
)
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
weights
=
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
self
.
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
((
name
,
data
)
for
name
,
data
in
weights
if
not
name
.
startswith
(
"lm_head."
))
return
self
.
model
.
load_weights
(
weights
)
vllm/model_executor/models/aria.py
View file @
196c34b0
...
...
@@ -521,6 +521,15 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
This model combines a vision tower, a multi-modal projector, and a language
model to perform tasks that involve both image and text inputs.
"""
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"language_model.model"
:
"language_model"
,
"language_model.lm_head"
:
"lm_head"
,
},
orig_to_new_suffix
=
{
"router.weight"
:
"router_weight"
,
},
)
def
__init__
(
self
,
...
...
@@ -662,15 +671,6 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
return
next_tokens
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"language_model.model"
:
"language_model"
,
"language_model.lm_head"
:
"lm_head"
,
},
orig_to_new_suffix
=
{
"router.weight"
:
"router_weight"
,
},
)
loader
=
AutoWeightsLoader
(
self
)
loader
.
load_weights
(
weights
,
mapper
=
hf_to_vllm_mapper
)
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
vllm/model_executor/models/bert.py
View file @
196c34b0
...
...
@@ -409,6 +409,7 @@ class BertEmbeddingModel(nn.Module):
model: An instance of BertModel used for forward operations.
_pooler: An instance of Pooler used for pooling operations.
"""
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
...
...
@@ -441,8 +442,7 @@ class BertEmbeddingModel(nn.Module):
return
self
.
_pooler
(
hidden_states
,
pooling_metadata
)
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
weights
=
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
self
.
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
((
name
,
data
)
for
name
,
data
in
weights
if
not
name
.
startswith
(
"lm_head."
))
self
.
model
.
load_weights
(
weights
)
...
...
vllm/model_executor/models/molmo.py
View file @
196c34b0
...
...
@@ -1123,6 +1123,34 @@ def input_processor_for_molmo(ctx: InputContext, inputs: DecoderOnlyInputs):
@
INPUT_REGISTRY
.
register_input_processor
(
input_processor_for_molmo
)
class
MolmoForCausalLM
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_substr
=
{
# vision backbone mapping
"image_projector.w1."
:
"image_projector.gate_proj."
,
"image_projector.w3."
:
"image_projector.up_proj."
,
"image_projector.w2."
:
"image_projector.down_proj."
,
# language backbone mapping
"att_proj"
:
"self_attn.qkv_proj"
,
"attn_out"
:
"self_attn.o_proj"
,
"q_norm"
:
"self_attn.q_norm"
,
"k_norm"
:
"self_attn.k_norm"
,
"ff_proj"
:
"mlp.gate_up_proj"
,
"ff_out"
:
"mlp.down_proj"
,
"attn_norm"
:
"input_layernorm"
,
"ff_norm"
:
"post_attention_layernorm"
,
},
orig_to_new_prefix
=
{
# vision backbone mapping
"model.vision_backbone."
:
"vision_backbone."
,
# language backbone mapping
"model.transformer.blocks."
:
"model.layers."
,
"model.transformer.ln_f."
:
"model.norm."
,
# lm_head is renamed to model.transformer.mlp.down_proj firstly,
# we need to run a second renaming for it
"model.transformer.mlp.down_proj."
:
"lm_head."
,
},
)
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
...
...
@@ -1298,36 +1326,10 @@ class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
return
next_tokens
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_substr
=
{
# vision backbone mapping
"image_projector.w1."
:
"image_projector.gate_proj."
,
"image_projector.w3."
:
"image_projector.up_proj."
,
"image_projector.w2."
:
"image_projector.down_proj."
,
# language backbone mapping
"att_proj"
:
"self_attn.qkv_proj"
,
"attn_out"
:
"self_attn.o_proj"
,
"q_norm"
:
"self_attn.q_norm"
,
"k_norm"
:
"self_attn.k_norm"
,
"ff_proj"
:
"mlp.gate_up_proj"
,
"ff_out"
:
"mlp.down_proj"
,
"attn_norm"
:
"input_layernorm"
,
"ff_norm"
:
"post_attention_layernorm"
,
},
orig_to_new_prefix
=
{
# vision backbone mapping
"model.vision_backbone."
:
"vision_backbone."
,
# language backbone mapping
"model.transformer.blocks."
:
"model.layers."
,
"model.transformer.ln_f."
:
"model.norm."
,
# lm_head is renamed to model.transformer.mlp.down_proj firstly,
# we need to run a second renaming for it
"model.transformer.mlp.down_proj."
:
"lm_head."
,
},
)
loader
=
AutoWeightsLoader
(
self
)
weights
=
_get_weights_with_merged_embedding
(
weights
)
return
loader
.
load_weights
(
weights
,
mapper
=
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
def
_get_weights_with_merged_embedding
(
...
...
vllm/model_executor/models/phi3v.py
View file @
196c34b0
...
...
@@ -408,6 +408,13 @@ class Phi3VMultiModalProcessor(BaseMultiModalProcessor):
@
MULTIMODAL_REGISTRY
.
register_max_image_tokens
(
get_max_phi3v_image_tokens
)
@
MULTIMODAL_REGISTRY
.
register_processor
(
Phi3VMultiModalProcessor
)
class
Phi3VForCausalLM
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model.vision_embed_tokens.wte"
:
"embed_tokens"
,
"model.vision_embed_tokens."
:
"vision_embed_tokens."
,
"lm_head."
:
"language_model.lm_head."
,
"model."
:
"language_model.model."
,
})
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
...
...
@@ -616,17 +623,10 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]])
->
Set
[
str
]:
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model.vision_embed_tokens.wte"
:
"embed_tokens"
,
"model.vision_embed_tokens."
:
"vision_embed_tokens."
,
"lm_head."
:
"language_model.lm_head."
,
"model."
:
"language_model.model."
,
})
loader
=
AutoWeightsLoader
(
self
)
autoloaded_weights
=
loader
.
load_weights
(
weights
,
mapper
=
hf_to_vllm_mapper
)
mapper
=
self
.
hf_to_vllm_mapper
)
# The HF config doesn't specify whether these are tied,
# so we detect it this way
...
...
vllm/model_executor/models/qwen2.py
View file @
196c34b0
...
...
@@ -529,6 +529,8 @@ class Qwen2EmbeddingModel(nn.Module, SupportsLoRA, SupportsPP):
embedding_modules
=
{}
embedding_padding_modules
=
[]
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
...
...
@@ -577,8 +579,7 @@ class Qwen2EmbeddingModel(nn.Module, SupportsLoRA, SupportsPP):
return
self
.
_pooler
(
hidden_states
,
pooling_metadata
)
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model."
:
""
})
weights
=
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
self
.
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
((
name
,
data
)
for
name
,
data
in
weights
if
not
name
.
startswith
(
"lm_head."
))
self
.
model
.
load_weights
(
weights
)
vllm/model_executor/models/telechat2.py
View file @
196c34b0
...
...
@@ -31,6 +31,19 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
class
TeleChat2Model
(
LlamaModel
):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"transformer."
:
"model."
,
},
orig_to_new_substr
=
{
".h."
:
".layers."
,
".self_attention."
:
".self_attn."
,
".word_embeddings."
:
".embed_tokens."
,
".dense."
:
".o_proj."
,
".ln_f."
:
".norm."
,
},
)
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
# 1. Initialize the LlamaModel with bias
vllm_config
.
model_config
.
hf_config
.
bias
=
True
...
...
@@ -111,21 +124,9 @@ class TeleChat2ForCausalLM(LlamaForCausalLM):
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]])
->
Set
[
str
]:
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"transformer."
:
"model."
,
},
orig_to_new_substr
=
{
".h."
:
".layers."
,
".self_attention."
:
".self_attn."
,
".word_embeddings."
:
".embed_tokens."
,
".dense."
:
".o_proj."
,
".ln_f."
:
".norm."
,
},
)
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
return
loader
.
load_weights
(
weights
,
mapper
=
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
vllm/model_executor/models/ultravox.py
View file @
196c34b0
...
...
@@ -302,6 +302,9 @@ class ModifiedWhisperEncoder(WhisperEncoder):
@
MULTIMODAL_REGISTRY
.
register_processor
(
UltravoxMultiModalProcessor
)
class
UltravoxModel
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"audio_tower.model.encoder."
:
"audio_tower."
})
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
...
...
@@ -494,9 +497,7 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]])
->
Set
[
str
]:
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"audio_tower.model.encoder."
:
"audio_tower."
})
loader
=
AutoWeightsLoader
(
self
,
ignore_unexpected_prefixes
=
[
"audio_tower."
])
return
loader
.
load_weights
(
weights
,
mapper
=
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment