Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f57ee565
Unverified
Commit
f57ee565
authored
Dec 26, 2024
by
Jee Jee Li
Committed by
GitHub
Dec 26, 2024
Browse files
[Model] Modify MolmoForCausalLM MLP (#11510)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
dcb1a944
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
18 deletions
+24
-18
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+24
-18
No files found.
vllm/model_executor/models/molmo.py
View file @
f57ee565
...
...
@@ -464,24 +464,27 @@ class MolmoAttention(nn.Module):
class
MolmoMLP
(
nn
.
Module
):
"""Molmo's LLM mlp."""
def
__init__
(
self
,
def
__init__
(
self
,
config
:
PretrainedConfig
,
input_dim
:
Optional
[
int
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
proj_name
:
str
=
"gate_up_proj"
)
->
None
:
super
().
__init__
()
self
.
hidden_size
=
config
.
hidden_size
self
.
intermediate_size
=
config
.
intermediate_size
//
2
# Feed-forward input projection.
self
.
gate_up_proj
=
MergedColumnParallelLinear
(
# Molmo's LLM proj weights are already merged into the disk, while
# image_projector proj is separate. If the same proj_name were used, it
# would create ambiguity and make it difficult to support BNB and LoRA.
self
.
proj_name
=
proj_name
setattr
(
self
,
proj_name
,
MergedColumnParallelLinear
(
input_dim
or
self
.
hidden_size
,
[
self
.
intermediate_size
]
*
2
,
bias
=
False
,
quant_config
=
quant_config
,
)
))
# Activation function.
self
.
act_fn
=
SiluAndMul
()
...
...
@@ -497,7 +500,7 @@ class MolmoMLP(nn.Module):
self
,
x
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
gate_up
,
_
=
self
.
gate_up_proj
(
x
)
gate_up
,
_
=
getattr
(
self
,
self
.
proj_name
)
(
x
)
x
=
self
.
act_fn
(
gate_up
)
x
,
_
=
self
.
down_proj
(
x
)
return
x
...
...
@@ -520,7 +523,9 @@ class MolmoDecoderLayer(nn.Module):
prefix
=
f
"
{
prefix
}
.self_attn"
)
# MLP block.
self
.
mlp
=
MolmoMLP
(
config
,
quant_config
=
quant_config
)
self
.
mlp
=
MolmoMLP
(
config
,
quant_config
=
quant_config
,
proj_name
=
"gate_up_proj"
)
# LayerNorm
assert
config
.
layer_norm_type
==
"rms"
...
...
@@ -616,6 +621,7 @@ class MolmoVisionBackbone(nn.Module):
config
,
input_dim
=
vision_config
.
image_emb_dim
,
quant_config
=
quant_config
,
proj_name
=
"merged_linear"
,
)
image_dim
=
vision_config
.
image_emb_dim
*
len
(
self
.
vit_layers
)
...
...
@@ -714,8 +720,8 @@ class MolmoVisionBackbone(nn.Module):
torch
.
Tensor
]])
->
Set
[
str
]:
stacked_params_mapping
=
[
# (param_name, shard_name, shard_id)
(
"
gate_up_proj
"
,
"gate_proj"
,
0
),
(
"
gate_up_proj
"
,
"up_proj"
,
1
),
(
"
merged_linear
"
,
"gate_proj"
,
0
),
(
"
merged_linear
"
,
"up_proj"
,
1
),
]
params_dict
=
dict
(
self
.
named_parameters
())
loaded_params
:
Set
[
str
]
=
set
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment