Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3908ec24
Commit
3908ec24
authored
Nov 18, 2025
by
guanyu1
Browse files
test2
parent
3c29e139
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
237 additions
and
7 deletions
+237
-7
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+24
-3
vllm/model_executor/models/adapters_custom/adapters_classify.py
...odel_executor/models/adapters_custom/adapters_classify.py
+213
-4
No files found.
vllm/model_executor/model_loader/utils.py
View file @
3908ec24
...
...
@@ -31,10 +31,30 @@ import vllm.envs as envs
logger
=
init_logger
(
__name__
)
from
..models.adapters_custom.adapters_classify
import
(
as_hunyuan_seq_cls_model
,
new_hy_05b_dense_official_classification
,
hy_2b_dense_classification_official_hf_multihead_full_mask
)
def
_hunyuan_classify_selector
(
model_cls
:
type
[
nn
.
Module
],
hf_config
)
->
type
[
nn
.
Module
]:
"""Select appropriate HunYuan seq-cls adapter by hidden size.
For certain HunYuan configs (e.g., hidden_size=2560), we use the
specialized two-layer classification head defined in
``as_hunyuan_seq_cls_model``. For other sizes, fall back to the
generic ``as_seq_cls_model``.
"""
hidden_size
=
hf_config
.
hidden_size
if
hidden_size
==
2560
:
# extend set as needed for other variants
return
hy_2b_dense_classification_official_hf_multihead_full_mask
(
model_cls
)
elif
hidden_size
==
1280
:
return
new_hy_05b_dense_official_classification
(
model_cls
)
else
:
return
None
CLASSIFY_CLASSIFY_REGISTRY
=
{
"HunYuanForCausalLM"
:
as_hunyuan_seq_cls_model
,
# Uses a selector that decides adapter by hidden_size
"HunYuanForCausalLM"
:
_hunyuan_classify_selector
,
}
@
contextlib
.
contextmanager
...
...
@@ -263,7 +283,8 @@ def _get_model_architecture(
model_cls
=
as_embedding_model
(
model_cls
)
elif
convert_type
==
"classify"
:
if
arch
in
CLASSIFY_CLASSIFY_REGISTRY
.
keys
():
model_cls
=
CLASSIFY_CLASSIFY_REGISTRY
[
arch
](
model_cls
)
selector
=
CLASSIFY_CLASSIFY_REGISTRY
[
arch
]
model_cls
=
selector
(
model_cls
,
model_config
.
hf_config
)
else
:
logger
.
debug_once
(
"Converting to sequence classification model."
)
model_cls
=
as_seq_cls_model
(
model_cls
)
...
...
vllm/model_executor/models/adapters_custom/adapters_classify.py
View file @
3908ec24
...
...
@@ -5,7 +5,8 @@ import ast
import
inspect
from
collections.abc
import
Iterable
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
,
TypeVar
,
cast
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
import
torch
import
torch.nn
as
nn
...
...
@@ -269,7 +270,7 @@ def as_embedding_model(cls: _T) -> _T:
return
ModelForEmbedding
# type: ignore
def
as_hunyuan_seq_cls_model
(
cls
:
_T
)
->
_T
:
def
new_hy_05b_dense_official_classification
(
cls
:
_T
)
->
_T
:
"""
Subclass an existing vLLM model to support classify and score tasks.
...
...
@@ -301,8 +302,7 @@ def as_hunyuan_seq_cls_model(cls: _T) -> _T:
def
_init_pooler
(
self
,
vllm_config
:
"VllmConfig"
,
prefix
:
str
=
""
):
config
=
vllm_config
.
model_config
.
hf_config
quant_config
=
vllm_config
.
quant_config
hidden_size
=
get_model_hidden_size
(
config
)
self
.
pad_id
=
config
.
pad_id
pooler_config
=
vllm_config
.
model_config
.
pooler_config
if
self
.
config
.
add_classification_head
:
...
...
@@ -367,6 +367,215 @@ def as_hunyuan_seq_cls_model(cls: _T) -> _T:
intermediate_tensors
:
Optional
[
IntermediateTensors
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
)
->
torch
.
Tensor
:
self
.
input_ids
=
input_ids
return
super
().
forward
(
input_ids
,
positions
,
intermediate_tensors
,
inputs_embeds
)
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
tokens
=
getattr
(
self
.
config
,
"classifier_from_token"
,
None
)
method
=
getattr
(
self
.
config
,
"method"
,
None
)
if
tokens
is
None
and
method
is
None
:
return
super
().
load_weights
(
weights
)
else
:
# Online convert ForCausalLM into
# ForSequenceClassification model.
return
seq_cls_model_loader
(
self
,
weights
)
ModelForSequenceClassification
.
__name__
=
\
_get_pooling_model_name
(
cls
.
__name__
,
"ForSequenceClassification"
)
return
ModelForSequenceClassification
# type: ignore
def
hy_2b_dense_classification_official_hf_multihead_full_mask
(
cls
:
_T
)
->
_T
:
"""
Subclass an existing vLLM model to support classify and score tasks.
By default, the class probabilities are extracted from the softmaxed
hidden state corresponding to the last token.
Note:
We assume that the classification head is a single linear layer
stored as the attribute `score` of the top-level model;
please implement your own model if this is not the case.
"""
# Avoid modifying existing classification models
if
is_pooling_model
(
cls
):
return
cls
# Lazy import
from
vllm.model_executor.layers.linear
import
ReplicatedLinear
from
vllm.model_executor.layers.pooler
import
(
ClassifierPooler
,
DispatchPooler
,
Pooler
,
PoolingMethod
,
PoolingType
,
PoolerIdentity
)
from
vllm.model_executor.models.interfaces
import
SupportsCrossEncoding
from
vllm.sequence
import
IntermediateTensors
from
..utils
import
get_model_hidden_size
,
maybe_prefix
class
ModelForSequenceClassification
(
_create_pooling_model_cls
(
cls
),
SupportsCrossEncoding
):
def
_init_pooler
(
self
,
vllm_config
:
"VllmConfig"
,
prefix
:
str
=
""
):
config
=
vllm_config
.
model_config
.
hf_config
quant_config
=
vllm_config
.
quant_config
self
.
pad_id
=
config
.
pad_id
pooler_config
=
vllm_config
.
model_config
.
pooler_config
if
self
.
config
.
add_classification_head
:
self
.
pool_head
=
ReplicatedLinear
(
config
.
hidden_size
,
config
.
hidden_size
,
bias
=
True
,
quant_config
=
quant_config
,
params_dtype
=
torch
.
float32
,
prefix
=
maybe_prefix
(
prefix
,
"pool_head"
),
return_bias
=
False
,
)
self
.
pool_head2
=
ReplicatedLinear
(
config
.
hidden_size
,
config
.
class_num
,
bias
=
True
,
quant_config
=
quant_config
,
params_dtype
=
torch
.
float32
,
prefix
=
maybe_prefix
(
prefix
,
"pool_head2"
),
return_bias
=
True
,
)
self
.
qfeat_emb
=
ReplicatedLinear
(
2
,
128
,
bias
=
True
,
quant_config
=
quant_config
,
params_dtype
=
torch
.
float32
,
prefix
=
maybe_prefix
(
prefix
,
"qfeat_emb"
),
return_bias
=
False
,
)
self
.
qfeat_emb_topic
=
VocabParallelEmbedding
(
100
,
128
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.qfeat_emb_topic"
,
)
self
.
qfeat_fc1
=
ReplicatedLinear
(
256
,
256
,
bias
=
True
,
quant_config
=
quant_config
,
params_dtype
=
torch
.
float32
,
prefix
=
maybe_prefix
(
prefix
,
"qfeat_fc1"
),
return_bias
=
False
,
)
self
.
qfeat_fc2
=
ReplicatedLinear
(
256
,
3
,
bias
=
True
,
quant_config
=
quant_config
,
params_dtype
=
torch
.
float32
,
prefix
=
maybe_prefix
(
prefix
,
"qfeat_fc2"
),
return_bias
=
False
,
)
self
.
qfeat_fc3
=
ReplicatedLinear
(
256
,
3
,
bias
=
True
,
quant_config
=
quant_config
,
params_dtype
=
torch
.
float32
,
prefix
=
maybe_prefix
(
prefix
,
"qfeat_fc3"
),
return_bias
=
False
,
)
# 兼容 ForSequenceClassification:将 score 直接指向最终分类头
# 不再单独创建一层;pool_head2 即最终打分层
#self.score = self.pool_head2
#Mark this instance as pooling-capable and build DispatchPooler
self
.
is_pooling_model
=
True
assert
pooler_config
is
not
None
,
(
"PoolerConfig must be provided to use classification head"
)
# Determine pooling type (fallback to config.pool_type)
pooling_type_str
=
(
pooler_config
.
pooling_type
if
pooler_config
.
pooling_type
is
not
None
else
getattr
(
config
,
"pool_type"
,
"LAST"
)).
upper
()
if
pooling_type_str
==
"LASTTOKEN"
:
pooling_type_str
=
"LAST"
pooling_type
=
PoolingType
[
pooling_type_str
]
self
.
pooler
=
DispatchPooler
({
"classify"
:
ClassifierPooler
(
pooling
=
PoolingMethod
.
from_pooling_type
(
pooling_type
),
classifier
=
self
.
_classifier
,
act_fn
=
PoolerIdentity
(),
)
})
def
encode_qfeat
(
self
,
qfeat
):
emb1
=
self
.
qfeat_emb
(
qfeat
[:,:
2
])
emb2
=
self
.
qfeat_emb_topic
(
qfeat
[:,
2
].
to
(
torch
.
long
))
hidden
=
torch
.
cat
([
emb1
,
emb2
],
dim
=
1
)
hidden
=
self
.
qfeat_fc1
(
hidden
)
hidden
=
torch
.
relu
(
hidden
)
# hidden = torch.softmax(hidden, dim=1)
return
hidden
def
_classifier
(
self
,
x
:
torch
.
Tensor
):
pooled_output
=
self
.
pool_head
(
x
)
if
isinstance
(
pooled_output
,
tuple
):
pooled_output
=
pooled_output
[
0
]
pooled_output
=
torch
.
tanh
(
pooled_output
)
pooled_output_sat
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_rel
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_time
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_auth
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
qfeat
=
qfeat
.
to
(
dtype
=
pooled_output
.
dtype
)
qhidden
=
self
.
encode_qfeat
(
qfeat
)
a_wei
=
self
.
qfeat_fc2
(
qhidden
)
a_bias
=
self
.
qfeat_fc3
(
qhidden
)
if
self
.
config
.
pool_type
==
"mean"
:
reward
=
pooled_output
.
mean
(
dim
=
1
).
squeeze
(
-
1
)
elif
self
.
config
.
pool_type
==
"last"
:
# bs * hidden_size
seq_length
=
(
self
.
input_ids
!=
self
.
pad_id
).
long
().
sum
(
dim
=
1
)
-
1
batch_size
=
self
.
input_ids
.
size
(
0
)
sat_logits
=
pooled_output_sat
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
1
]
auth_logits
=
pooled_output_auth
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
2
]
time_logits
=
pooled_output_time
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
3
]
rel_logits
=
pooled_output_rel
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
4
]
# a_score = torch.sigmoid(torch.concat([rel_logits, time_logits, auth_logits], dim=1))
multii_logits
=
torch
.
concat
([
rel_logits
,
time_logits
,
auth_logits
],
dim
=
1
)
task_logits
=
(
a_wei
*
multii_logits
+
a_bias
).
sum
(
dim
=
1
,
keepdim
=
True
)
task_logits
=
torch
.
sigmoid
(
task_logits
)
#gate_time = (a_wei * multii_logits + wei_time).sum(dim=1, keepdim=True)
#gate_time = torch.sigmoid(gate_time)
#gate_auth = (a_wei * multii_logits + wei_auth).sum(dim=1, keepdim=True)
#gate_auth = torch.sigmoid(gate_auth)
sat_logits_new
=
task_logits
*
sat_logits
#logits = 2.0 * sat_logits_new.detach() + 0.25 * (qfeat[:,0].float().unsqueeze(1)) * gate_time * time_logits.detach() + 0.5 * (qfeat[:,1].float().unsqueeze(1) + 0.4) * gate_auth * auth_logits.detach()
logits
=
sat_logits_new
reward
=
logits
.
squeeze
(
-
1
)
else
:
reward
=
pooled_output
[:,
0
].
squeeze
(
-
1
)
return
reward
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
Optional
[
IntermediateTensors
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
)
->
torch
.
Tensor
:
self
.
input_ids
=
input_ids
return
super
().
forward
(
input_ids
,
positions
,
intermediate_tensors
,
inputs_embeds
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment