Commit 3908ec24 authored by guanyu1's avatar guanyu1
Browse files

test2

parent 3c29e139
......@@ -31,10 +31,30 @@ import vllm.envs as envs
logger = init_logger(__name__)
from ..models.adapters_custom.adapters_classify import (
as_hunyuan_seq_cls_model,
new_hy_05b_dense_official_classification,
hy_2b_dense_classification_official_hf_multihead_full_mask
)
def _hunyuan_classify_selector(model_cls: type[nn.Module],
hf_config) -> type[nn.Module]:
"""Select appropriate HunYuan seq-cls adapter by hidden size.
For certain HunYuan configs (e.g., hidden_size=2560), we use the
specialized two-layer classification head defined in
``as_hunyuan_seq_cls_model``. For other sizes, fall back to the
generic ``as_seq_cls_model``.
"""
hidden_size=hf_config.hidden_size
if hidden_size ==2560: # extend set as needed for other variants
return hy_2b_dense_classification_official_hf_multihead_full_mask(model_cls)
elif hidden_size ==1280:
return new_hy_05b_dense_official_classification(model_cls)
else:
return None
CLASSIFY_CLASSIFY_REGISTRY = {
"HunYuanForCausalLM": as_hunyuan_seq_cls_model,
# Uses a selector that decides adapter by hidden_size
"HunYuanForCausalLM": _hunyuan_classify_selector,
}
@contextlib.contextmanager
......@@ -263,7 +283,8 @@ def _get_model_architecture(
model_cls = as_embedding_model(model_cls)
elif convert_type == "classify":
if arch in CLASSIFY_CLASSIFY_REGISTRY.keys():
model_cls = CLASSIFY_CLASSIFY_REGISTRY[arch](model_cls)
selector = CLASSIFY_CLASSIFY_REGISTRY[arch]
model_cls = selector(model_cls,model_config.hf_config)
else:
logger.debug_once("Converting to sequence classification model.")
model_cls = as_seq_cls_model(model_cls)
......
......@@ -5,7 +5,8 @@ import ast
import inspect
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, Optional, TypeVar, cast
from vllm.model_executor.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
import torch
import torch.nn as nn
......@@ -269,7 +270,7 @@ def as_embedding_model(cls: _T) -> _T:
return ModelForEmbedding # type: ignore
def as_hunyuan_seq_cls_model(cls: _T) -> _T:
def new_hy_05b_dense_official_classification(cls: _T) -> _T:
"""
Subclass an existing vLLM model to support classify and score tasks.
......@@ -301,8 +302,7 @@ def as_hunyuan_seq_cls_model(cls: _T) -> _T:
def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
hidden_size = get_model_hidden_size(config)
self.pad_id= config.pad_id
pooler_config = vllm_config.model_config.pooler_config
if self.config.add_classification_head:
......@@ -367,6 +367,215 @@ def as_hunyuan_seq_cls_model(cls: _T) -> _T:
intermediate_tensors: Optional[IntermediateTensors] = None,
inputs_embeds: Optional[torch.Tensor] = None,
) -> torch.Tensor:
self.input_ids =input_ids
return super().forward(input_ids, positions, intermediate_tensors,
inputs_embeds)
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
tokens = getattr(self.config, "classifier_from_token", None)
method = getattr(self.config, "method", None)
if tokens is None and method is None:
return super().load_weights(weights)
else:
# Online convert ForCausalLM into
# ForSequenceClassification model.
return seq_cls_model_loader(self, weights)
ModelForSequenceClassification.__name__ = \
_get_pooling_model_name(cls.__name__, "ForSequenceClassification")
return ModelForSequenceClassification # type: ignore
def hy_2b_dense_classification_official_hf_multihead_full_mask(cls: _T) -> _T:
"""
Subclass an existing vLLM model to support classify and score tasks.
By default, the class probabilities are extracted from the softmaxed
hidden state corresponding to the last token.
Note:
We assume that the classification head is a single linear layer
stored as the attribute `score` of the top-level model;
please implement your own model if this is not the case.
"""
# Avoid modifying existing classification models
if is_pooling_model(cls):
return cls
# Lazy import
from vllm.model_executor.layers.linear import ReplicatedLinear
from vllm.model_executor.layers.pooler import (ClassifierPooler,
DispatchPooler, Pooler,
PoolingMethod, PoolingType,PoolerIdentity)
from vllm.model_executor.models.interfaces import SupportsCrossEncoding
from vllm.sequence import IntermediateTensors
from ..utils import get_model_hidden_size, maybe_prefix
class ModelForSequenceClassification(_create_pooling_model_cls(cls),
SupportsCrossEncoding):
def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
self.pad_id= config.pad_id
pooler_config = vllm_config.model_config.pooler_config
if self.config.add_classification_head:
self.pool_head = ReplicatedLinear(
config.hidden_size,
config.hidden_size,
bias=True,
quant_config=quant_config,
params_dtype=torch.float32,
prefix=maybe_prefix(prefix, "pool_head"),
return_bias=False,
)
self.pool_head2 = ReplicatedLinear(
config.hidden_size,
config.class_num,
bias=True,
quant_config=quant_config,
params_dtype=torch.float32,
prefix=maybe_prefix(prefix, "pool_head2"),
return_bias=True,
)
self.qfeat_emb =ReplicatedLinear(
2,
128,
bias=True,
quant_config=quant_config,
params_dtype=torch.float32,
prefix=maybe_prefix(prefix, "qfeat_emb"),
return_bias=False,
)
self.qfeat_emb_topic = VocabParallelEmbedding(
100,
128,
quant_config=quant_config,
prefix=f"{prefix}.qfeat_emb_topic",
)
self.qfeat_fc1 =ReplicatedLinear(
256,
256,
bias=True,
quant_config=quant_config,
params_dtype=torch.float32,
prefix=maybe_prefix(prefix, "qfeat_fc1"),
return_bias=False,
)
self.qfeat_fc2 =ReplicatedLinear(
256,
3,
bias=True,
quant_config=quant_config,
params_dtype=torch.float32,
prefix=maybe_prefix(prefix, "qfeat_fc2"),
return_bias=False,
)
self.qfeat_fc3 =ReplicatedLinear(
256,
3,
bias=True,
quant_config=quant_config,
params_dtype=torch.float32,
prefix=maybe_prefix(prefix, "qfeat_fc3"),
return_bias=False,
)
# 兼容 ForSequenceClassification:将 score 直接指向最终分类头
# 不再单独创建一层;pool_head2 即最终打分层
#self.score = self.pool_head2
#Mark this instance as pooling-capable and build DispatchPooler
self.is_pooling_model = True
assert pooler_config is not None, (
"PoolerConfig must be provided to use classification head")
# Determine pooling type (fallback to config.pool_type)
pooling_type_str = (pooler_config.pooling_type
if pooler_config.pooling_type is not None
else getattr(config, "pool_type", "LAST")).upper()
if pooling_type_str == "LASTTOKEN":
pooling_type_str = "LAST"
pooling_type = PoolingType[pooling_type_str]
self.pooler = DispatchPooler({
"classify": ClassifierPooler(
pooling=PoolingMethod.from_pooling_type(pooling_type),
classifier=self._classifier,
act_fn=PoolerIdentity(),
)
})
def encode_qfeat(self, qfeat):
emb1 = self.qfeat_emb(qfeat[:,:2])
emb2 = self.qfeat_emb_topic(qfeat[:,2].to(torch.long))
hidden = torch.cat([emb1, emb2], dim=1)
hidden = self.qfeat_fc1(hidden)
hidden = torch.relu(hidden)
# hidden = torch.softmax(hidden, dim=1)
return hidden
def _classifier(self, x: torch.Tensor):
pooled_output= self.pool_head(x)
if isinstance(pooled_output, tuple):
pooled_output = pooled_output[0]
pooled_output = torch.tanh(pooled_output)
pooled_output_sat = self.pool_head2(pooled_output).contiguous() # bs * class_num
pooled_output_rel = self.pool_head2(pooled_output).contiguous() # bs * class_num
pooled_output_time = self.pool_head2(pooled_output).contiguous() # bs * class_num
pooled_output_auth = self.pool_head2(pooled_output).contiguous() # bs * class_num
qfeat = qfeat.to(dtype=pooled_output.dtype)
qhidden = self.encode_qfeat(qfeat)
a_wei = self.qfeat_fc2(qhidden)
a_bias = self.qfeat_fc3(qhidden)
if self.config.pool_type == "mean":
reward = pooled_output.mean(dim=1).squeeze(-1)
elif self.config.pool_type == "last":
# bs * hidden_size
seq_length = (self.input_ids != self.pad_id).long().sum(dim=1) - 1
batch_size = self.input_ids.size(0)
sat_logits = pooled_output_sat[torch.arange(batch_size, device=pooled_output.device), seq_length-1]
auth_logits = pooled_output_auth[torch.arange(batch_size, device=pooled_output.device), seq_length-2]
time_logits = pooled_output_time[torch.arange(batch_size, device=pooled_output.device), seq_length-3]
rel_logits = pooled_output_rel[torch.arange(batch_size, device=pooled_output.device), seq_length-4]
# a_score = torch.sigmoid(torch.concat([rel_logits, time_logits, auth_logits], dim=1))
multii_logits = torch.concat([rel_logits, time_logits, auth_logits], dim=1)
task_logits = (a_wei * multii_logits + a_bias).sum(dim=1, keepdim=True)
task_logits = torch.sigmoid(task_logits)
#gate_time = (a_wei * multii_logits + wei_time).sum(dim=1, keepdim=True)
#gate_time = torch.sigmoid(gate_time)
#gate_auth = (a_wei * multii_logits + wei_auth).sum(dim=1, keepdim=True)
#gate_auth = torch.sigmoid(gate_auth)
sat_logits_new = task_logits * sat_logits
#logits = 2.0 * sat_logits_new.detach() + 0.25 * (qfeat[:,0].float().unsqueeze(1)) * gate_time * time_logits.detach() + 0.5 * (qfeat[:,1].float().unsqueeze(1) + 0.4) * gate_auth * auth_logits.detach()
logits = sat_logits_new
reward = logits.squeeze(-1)
else:
reward = pooled_output[:, 0].squeeze(-1)
return reward
def forward(
self,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: Optional[IntermediateTensors] = None,
inputs_embeds: Optional[torch.Tensor] = None,
) -> torch.Tensor:
self.input_ids =input_ids
return super().forward(input_ids, positions, intermediate_tensors,
inputs_embeds)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment