Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
70fdd8a2
"tests/vscode:/vscode.git/clone" did not exist on "ef292944fdda00f08448ea0eac801884cacb9f3e"
Commit
70fdd8a2
authored
Nov 15, 2025
by
guanyu1
Browse files
hunyuan分类模型适配
parent
e0ba5f60
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
672 additions
and
27 deletions
+672
-27
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+48
-1
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+11
-3
vllm/model_executor/models/adapters_custom/__init__.py
vllm/model_executor/models/adapters_custom/__init__.py
+0
-0
vllm/model_executor/models/adapters_custom/adapters_classify.py
...odel_executor/models/adapters_custom/adapters_classify.py
+562
-0
vllm/model_executor/models/hunyuan.py
vllm/model_executor/models/hunyuan.py
+51
-23
No files found.
vllm/model_executor/layers/linear.py
View file @
70fdd8a2
...
...
@@ -3,7 +3,7 @@
import
itertools
from
abc
import
abstractmethod
from
typing
import
Any
,
Literal
,
Optional
,
Union
from
typing
import
Any
,
Iterable
,
Literal
,
Optional
,
Union
import
vllm.envs
as
envs
import
torch
import
torch.nn
as
nn
...
...
@@ -414,6 +414,53 @@ class ReplicatedLinear(LinearBase):
else
:
self
.
register_parameter
(
"bias"
,
None
)
self
.
is_quantization
=
not
isinstance
(
self
.
quant_method
,
UnquantizedLinearMethod
)
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]],
)
->
set
[
str
]:
"""Load parameters from (name, tensor) pairs into this layer."""
params
=
dict
(
self
.
named_parameters
(
recurse
=
False
))
buffers
=
dict
(
self
.
named_buffers
(
recurse
=
False
))
loaded
:
set
[
str
]
=
set
()
for
weight_name
,
loaded_weight
in
weights
:
# Default to the primary weight parameter if no suffix is given.
target_name
=
weight_name
or
"weight"
if
target_name
in
params
:
param
=
params
[
target_name
]
weight_loader
=
getattr
(
param
,
"weight_loader"
,
self
.
weight_loader
)
weight_loader
(
param
,
loaded_weight
)
loaded
.
add
(
target_name
)
continue
if
target_name
in
buffers
:
buffer
=
buffers
[
target_name
]
if
buffer
.
shape
!=
loaded_weight
.
shape
:
raise
ValueError
(
f
"Shape mismatch when loading buffer '
{
target_name
}
': "
f
"expected
{
buffer
.
shape
}
, got
{
loaded_weight
.
shape
}
"
)
buffer
.
copy_
(
loaded_weight
)
loaded
.
add
(
target_name
)
continue
attr
=
getattr
(
self
,
target_name
,
None
)
if
isinstance
(
attr
,
torch
.
Tensor
):
if
attr
.
shape
!=
loaded_weight
.
shape
:
raise
ValueError
(
f
"Shape mismatch when loading tensor '
{
target_name
}
': "
f
"expected
{
attr
.
shape
}
, got
{
loaded_weight
.
shape
}
"
)
attr
.
copy_
(
loaded_weight
)
loaded
.
add
(
target_name
)
continue
raise
ValueError
(
f
"Unexpected weight '
{
target_name
}
' for "
f
"
{
type
(
self
).
__name__
}
"
)
return
loaded
def
weight_loader
(
self
,
param
:
Parameter
,
loaded_weight
:
torch
.
Tensor
):
# If the weight on disk does not have a shape, give it one
...
...
vllm/model_executor/model_loader/utils.py
View file @
70fdd8a2
...
...
@@ -30,7 +30,12 @@ from vllm.utils import is_pin_memory_available
import
vllm.envs
as
envs
logger
=
init_logger
(
__name__
)
from
..models.adapters_custom.adapters_classify
import
(
as_hunyuan_seq_cls_model
,
)
CLASSIFY_CLASSIFY_REGISTRY
=
{
"HunYuanForCausalLM"
:
as_hunyuan_seq_cls_model
,
}
@
contextlib
.
contextmanager
def
set_default_torch_dtype
(
dtype
:
torch
.
dtype
):
...
...
@@ -257,8 +262,11 @@ def _get_model_architecture(
logger
.
debug_once
(
"Converting to embedding model."
)
model_cls
=
as_embedding_model
(
model_cls
)
elif
convert_type
==
"classify"
:
logger
.
debug_once
(
"Converting to sequence classification model."
)
model_cls
=
as_seq_cls_model
(
model_cls
)
if
arch
in
CLASSIFY_CLASSIFY_REGISTRY
.
keys
():
model_cls
=
CLASSIFY_CLASSIFY_REGISTRY
[
arch
](
model_cls
)
else
:
logger
.
debug_once
(
"Converting to sequence classification model."
)
model_cls
=
as_seq_cls_model
(
model_cls
)
elif
convert_type
==
"reward"
:
logger
.
debug_once
(
"Converting to reward model."
)
model_cls
=
as_reward_model
(
model_cls
)
...
...
vllm/model_executor/models/adapters_custom/__init__.py
0 → 100644
View file @
70fdd8a2
vllm/model_executor/models/adapters_custom/adapters_classify.py
0 → 100644
View file @
70fdd8a2
This diff is collapsed.
Click to expand it.
vllm/model_executor/models/hunyuan.py
View file @
70fdd8a2
...
...
@@ -36,6 +36,9 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
QKVParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
)
from
vllm.model_executor.layers.pooler
import
(
ClassifierPooler
,
DispatchPooler
,
Pooler
,
PoolingMethod
,
PoolingType
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
...
...
@@ -553,23 +556,23 @@ class HunYuanDecoderLayer(nn.Module):
residual
:
Optional
[
torch
.
Tensor
],
kv_states
:
Optional
[
Tuple
[
torch
.
Tensor
]]
=
None
,
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
# Self Attention
if
residual
is
None
:
residual
=
hidden_states
hidden_states
=
self
.
input_layernorm
(
hidden_states
)
else
:
hidden_states
,
residual
=
self
.
input_layernorm
(
hidden_states
,
residual
)
residual
=
hidden_states
hidden_states
=
self
.
input_layernorm
(
hidden_states
)
hidden_states
,
ori_kv_states
=
self
.
self_attn
(
positions
=
positions
,
hidden_states
=
hidden_states
,
kv_states
=
kv_states
,
)
hidden_states
=
residual
+
hidden_states
residual
=
hidden_states
hidden_states
=
self
.
post_attention_layernorm
(
hidden_states
)
hidden_states
=
self
.
mlp
(
hidden_states
)
hidden_states
=
hidden_states
+
residual
# Fully Connected
hidden_states
,
residual
=
self
.
post_attention_layernorm
(
hidden_states
,
residual
)
hidden_states
=
self
.
mlp
(
hidden_states
)
return
hidden_states
,
residual
,
ori_kv_states
...
...
@@ -614,11 +617,13 @@ class HunYuanModel(nn.Module):
prefix
=
prefix
,
),
prefix
=
f
"
{
prefix
}
.layers"
)
if
get_pp_group
().
is_last_rank
:
self
.
norm
=
RMSNorm
(
config
.
hidden_size
,
eps
=
config
.
rms_norm_eps
)
else
:
self
.
norm
=
PPMissingLayer
()
def
get_input_embeddings
(
self
,
input_ids
:
torch
.
Tensor
)
->
torch
.
Tensor
:
return
self
.
embed_tokens
(
input_ids
)
...
...
@@ -650,7 +655,7 @@ class HunYuanModel(nn.Module):
residual
,
prev_kv_states
,
)
if
(
getattr
(
self
.
config
,
"use_cla"
,
False
)
and
(
i
-
self
.
start_layer
)
%
cla_factor
==
0
):
prev_kv_states
=
kv_states
...
...
@@ -662,8 +667,8 @@ class HunYuanModel(nn.Module):
"hidden_states"
:
hidden_states
,
"residual"
:
residual
})
hidden_states
,
_
=
self
.
norm
(
hidden_states
,
residual
)
if
not
self
.
config
.
add_classification_head
:
hidden_states
,
_
=
self
.
norm
(
hidden_states
,
residual
)
return
hidden_states
def
_split_qkv_weight
(
self
,
qkv
:
torch
.
Tensor
):
...
...
@@ -732,6 +737,10 @@ class HunYuanModel(nn.Module):
loaded_params
:
set
[
str
]
=
set
()
expert_params_mapping
=
self
.
get_expert_mapping
()
for
name
,
loaded_weight
in
weights
:
if
name
.
startswith
(
"norm."
):
# Some checkpoints omit the final norm; treat as handled.
loaded_params
.
add
(
name
)
continue
if
"rotary_emb.inv_freq"
in
name
:
continue
if
"gate_proj_bias"
in
name
:
...
...
@@ -880,8 +889,16 @@ class HunYuanModel(nn.Module):
default_weight_loader
)
weight_loader
(
param
,
loaded_weight
)
loaded_params
.
add
(
name
)
if
"norm.weight"
in
params_dict
:
loaded_params
.
add
(
"norm.weight"
)
return
loaded_params
class
HunYuanForCausalLM
(
nn
.
Module
,
SupportsLoRA
,
SupportsPP
):
packed_modules_mapping
=
{
"qkv_proj"
:
[
...
...
@@ -902,8 +919,11 @@ class HunYuanForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
quant_config
=
vllm_config
.
quant_config
self
.
config
=
config
self
.
quant_config
=
quant_config
self
.
pad_id
=
self
.
config
.
pad_id
self
.
model
=
HunYuanModel
(
vllm_config
=
vllm_config
,
prefix
=
"model"
)
if
get_pp_group
().
is_last_rank
:
self
.
unpadded_vocab_size
=
config
.
vocab_size
self
.
lm_head
=
ParallelLMHead
(
...
...
@@ -924,6 +944,7 @@ class HunYuanForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
else
:
self
.
lm_head
=
PPMissingLayer
()
def
set_eplb_state
(
self
,
expert_load_view
:
torch
.
Tensor
,
...
...
@@ -957,18 +978,26 @@ class HunYuanForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
moe
.
n_physical_experts
=
num_physical_experts
moe
.
n_redundant_experts
=
self
.
num_redundant_experts
moe
.
experts
.
update_expert_map
()
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
Optional
[
IntermediateTensors
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
)
->
Union
[
torch
.
Tensor
,
IntermediateTensors
]:
model_output
=
self
.
model
(
input_ids
,
positions
,
intermediate_tensors
,
inputs_embeds
)
return
model_output
def
compute_logits
(
self
,
hidden_states
:
torch
.
Tensor
,
...
...
@@ -992,14 +1021,13 @@ class HunYuanForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
skip_prefixes
=
[]
if
self
.
config
.
tie_word_embeddings
:
skip_prefixes
.
append
(
"lm_head."
)
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
skip_prefixes
)
return
loader
.
load_weights
(
weights
)
def
get_input_embeddings
(
self
,
input_ids
:
torch
.
Tensor
)
->
torch
.
Tensor
:
return
self
.
model
.
get_input_embeddings
(
input_ids
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment