test2

3908ec24 · guanyu1 · 3c29e139 · 3908ec24 · 3908ec24
Commit 3908ec24 authored Nov 18, 2025 by guanyu1
2 changed files
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -31,10 +31,30 @@ import vllm.envs as envs

 logger = init_logger(__name__)
 from ..models.adapters_custom.adapters_classify import (
-    as_hunyuan_seq_cls_model,
+    new_hy_05b_dense_official_classification,
+    hy_2b_dense_classification_official_hf_multihead_full_mask
 )
+def _hunyuan_classify_selector(model_cls: type[nn.Module],
+                               hf_config) -> type[nn.Module]:
+    """Select appropriate HunYuan seq-cls adapter by hidden size.
+
+    For certain HunYuan configs (e.g., hidden_size=2560), we use the
+    specialized two-layer classification head defined in
+    ``as_hunyuan_seq_cls_model``. For other sizes, fall back to the
+    generic ``as_seq_cls_model``.
+    """
+    hidden_size=hf_config.hidden_size
+    if hidden_size ==2560:  # extend set as needed for other variants
+        return hy_2b_dense_classification_official_hf_multihead_full_mask(model_cls)
+    elif hidden_size ==1280:
+        return new_hy_05b_dense_official_classification(model_cls)
+    else:
+        return None
+
+
 CLASSIFY_CLASSIFY_REGISTRY = {
-    "HunYuanForCausalLM": as_hunyuan_seq_cls_model,
+    # Uses a selector that decides adapter by hidden_size
+    "HunYuanForCausalLM": _hunyuan_classify_selector,
 }

 @contextlib.contextmanager
@@ -263,7 +283,8 @@ def _get_model_architecture(
        model_cls = as_embedding_model(model_cls)
    elif convert_type == "classify":
        if arch in CLASSIFY_CLASSIFY_REGISTRY.keys():
-            model_cls = CLASSIFY_CLASSIFY_REGISTRY[arch](model_cls)
+            selector = CLASSIFY_CLASSIFY_REGISTRY[arch]
+            model_cls = selector(model_cls,model_config.hf_config)
        else:
            logger.debug_once("Converting to sequence classification model.")
            model_cls = as_seq_cls_model(model_cls)

--- a/vllm/model_executor/models/adapters_custom/adapters_classify.py
+++ b/vllm/model_executor/models/adapters_custom/adapters_classify.py
@@ -5,7 +5,8 @@ import ast
 import inspect
 from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, Optional, TypeVar, cast
-
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
 import torch
 import torch.nn as nn

@@ -269,7 +270,7 @@ def as_embedding_model(cls: _T) -> _T:
    return ModelForEmbedding  # type: ignore


-def as_hunyuan_seq_cls_model(cls: _T) -> _T:
+def new_hy_05b_dense_official_classification(cls: _T) -> _T:
    """
    Subclass an existing vLLM model to support classify and score tasks.

@@ -301,8 +302,7 @@ def as_hunyuan_seq_cls_model(cls: _T) -> _T:
        def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
            config = vllm_config.model_config.hf_config
            quant_config = vllm_config.quant_config
-            hidden_size = get_model_hidden_size(config)
-
+            self.pad_id= config.pad_id
            pooler_config = vllm_config.model_config.pooler_config

            if self.config.add_classification_head:
@@ -367,6 +367,215 @@ def as_hunyuan_seq_cls_model(cls: _T) -> _T:
            intermediate_tensors: Optional[IntermediateTensors] = None,
            inputs_embeds: Optional[torch.Tensor] = None,
        ) -> torch.Tensor:
+            self.input_ids =input_ids
+            return super().forward(input_ids, positions, intermediate_tensors,
+                                   inputs_embeds)
+
+        def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+            tokens = getattr(self.config, "classifier_from_token", None)
+            method = getattr(self.config, "method", None)
+
+            if tokens is None and method is None:
+                return super().load_weights(weights)
+            else:
+                # Online convert ForCausalLM into
+                # ForSequenceClassification model.
+                return seq_cls_model_loader(self, weights)
+
+
+    ModelForSequenceClassification.__name__ = \
+        _get_pooling_model_name(cls.__name__, "ForSequenceClassification")
+
+    return ModelForSequenceClassification  # type: ignore
+
+
+
+def hy_2b_dense_classification_official_hf_multihead_full_mask(cls: _T) -> _T:
+    """
+    Subclass an existing vLLM model to support classify and score tasks.
+
+    By default, the class probabilities are extracted from the softmaxed
+    hidden state corresponding to the last token.
+
+    Note:
+        We assume that the classification head is a single linear layer
+        stored as the attribute `score` of the top-level model;
+        please implement your own model if this is not the case.
+    """
+    # Avoid modifying existing classification models
+    if is_pooling_model(cls):
+        return cls
+
+    # Lazy import
+    from vllm.model_executor.layers.linear import ReplicatedLinear
+    from vllm.model_executor.layers.pooler import (ClassifierPooler,
+                                                   DispatchPooler, Pooler,
+                                                   PoolingMethod, PoolingType,PoolerIdentity)
+    from vllm.model_executor.models.interfaces import SupportsCrossEncoding
+    from vllm.sequence import IntermediateTensors
+
+    from ..utils import get_model_hidden_size, maybe_prefix
+
+    class ModelForSequenceClassification(_create_pooling_model_cls(cls),
+                                         SupportsCrossEncoding):
+
+        def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
+            config = vllm_config.model_config.hf_config
+            quant_config = vllm_config.quant_config
+            self.pad_id= config.pad_id
+            pooler_config = vllm_config.model_config.pooler_config
+
+            if self.config.add_classification_head:
+                self.pool_head = ReplicatedLinear(
+                    config.hidden_size,
+                    config.hidden_size,
+                    bias=True,
+                    quant_config=quant_config,
+                    params_dtype=torch.float32,
+                    prefix=maybe_prefix(prefix, "pool_head"),
+                    return_bias=False,
+                )
+                self.pool_head2 = ReplicatedLinear(
+                    config.hidden_size,
+                    config.class_num,
+                    bias=True,
+                    quant_config=quant_config,
+                    params_dtype=torch.float32,
+                    prefix=maybe_prefix(prefix, "pool_head2"),
+                    return_bias=True,
+                )
+                self.qfeat_emb =ReplicatedLinear(
+                    2,
+                    128,
+                    bias=True,
+                    quant_config=quant_config,
+                    params_dtype=torch.float32,
+                    prefix=maybe_prefix(prefix, "qfeat_emb"),
+                    return_bias=False,
+                )
+                
+                self.qfeat_emb_topic = VocabParallelEmbedding(
+                100,
+                128,
+                quant_config=quant_config,
+                prefix=f"{prefix}.qfeat_emb_topic",
+            )
+                self.qfeat_fc1 =ReplicatedLinear(
+                    256,
+                    256,
+                    bias=True,
+                    quant_config=quant_config,
+                    params_dtype=torch.float32,
+                    prefix=maybe_prefix(prefix, "qfeat_fc1"),
+                    return_bias=False,
+                )
+                
+                self.qfeat_fc2 =ReplicatedLinear(
+                    256,
+                    3,
+                    bias=True,
+                    quant_config=quant_config,
+                    params_dtype=torch.float32,
+                    prefix=maybe_prefix(prefix, "qfeat_fc2"),
+                    return_bias=False,
+                )
+                
+                self.qfeat_fc3 =ReplicatedLinear(
+                    256,
+                    3,
+                    bias=True,
+                    quant_config=quant_config,
+                    params_dtype=torch.float32,
+                    prefix=maybe_prefix(prefix, "qfeat_fc3"),
+                    return_bias=False,
+                )
+                # 兼容 ForSequenceClassification：将 score 直接指向最终分类头
+                # 不再单独创建一层；pool_head2 即最终打分层
+                #self.score = self.pool_head2
+
+                #Mark this instance as pooling-capable and build DispatchPooler
+                self.is_pooling_model = True
+                assert pooler_config is not None, (
+                    "PoolerConfig must be provided to use classification head")
+
+                # Determine pooling type (fallback to config.pool_type)
+                pooling_type_str = (pooler_config.pooling_type
+                                    if pooler_config.pooling_type is not None
+                                    else getattr(config, "pool_type", "LAST")).upper()
+                if pooling_type_str == "LASTTOKEN":
+                    pooling_type_str = "LAST"
+                pooling_type = PoolingType[pooling_type_str]
+
+                self.pooler = DispatchPooler({
+                "classify": ClassifierPooler(
+                    pooling=PoolingMethod.from_pooling_type(pooling_type),
+                    classifier=self._classifier,
+                    act_fn=PoolerIdentity(),
+                )
+                
+            })
+        def encode_qfeat(self, qfeat):
+            emb1 = self.qfeat_emb(qfeat[:,:2])
+            emb2 = self.qfeat_emb_topic(qfeat[:,2].to(torch.long))
+            hidden = torch.cat([emb1, emb2], dim=1)
+            hidden = self.qfeat_fc1(hidden)
+            hidden = torch.relu(hidden)
+            # hidden = torch.softmax(hidden, dim=1)
+            return hidden
+        def _classifier(self, x: torch.Tensor):
+            pooled_output= self.pool_head(x)
+            if isinstance(pooled_output, tuple):
+                pooled_output = pooled_output[0]
+            pooled_output = torch.tanh(pooled_output)
+            pooled_output_sat = self.pool_head2(pooled_output).contiguous()  # bs * class_num
+            pooled_output_rel = self.pool_head2(pooled_output).contiguous()  # bs * class_num
+            pooled_output_time = self.pool_head2(pooled_output).contiguous()  # bs * class_num
+            pooled_output_auth = self.pool_head2(pooled_output).contiguous()  # bs * class_num
+            qfeat = qfeat.to(dtype=pooled_output.dtype)
+            qhidden = self.encode_qfeat(qfeat)
+            a_wei = self.qfeat_fc2(qhidden)
+            a_bias = self.qfeat_fc3(qhidden)
+
+            if self.config.pool_type == "mean":
+                    reward = pooled_output.mean(dim=1).squeeze(-1)
+            elif self.config.pool_type == "last":
+                # bs * hidden_size
+                seq_length = (self.input_ids != self.pad_id).long().sum(dim=1) - 1
+                batch_size = self.input_ids.size(0)
+                sat_logits = pooled_output_sat[torch.arange(batch_size, device=pooled_output.device), seq_length-1]
+                auth_logits = pooled_output_auth[torch.arange(batch_size, device=pooled_output.device), seq_length-2]
+                time_logits = pooled_output_time[torch.arange(batch_size, device=pooled_output.device), seq_length-3]
+                rel_logits = pooled_output_rel[torch.arange(batch_size, device=pooled_output.device), seq_length-4]
+                # a_score = torch.sigmoid(torch.concat([rel_logits, time_logits, auth_logits], dim=1))
+                multii_logits = torch.concat([rel_logits, time_logits, auth_logits], dim=1)
+                task_logits = (a_wei * multii_logits + a_bias).sum(dim=1, keepdim=True)
+                task_logits = torch.sigmoid(task_logits)
+
+                #gate_time = (a_wei * multii_logits + wei_time).sum(dim=1, keepdim=True)
+                #gate_time = torch.sigmoid(gate_time)
+
+                #gate_auth = (a_wei * multii_logits + wei_auth).sum(dim=1, keepdim=True)
+                #gate_auth = torch.sigmoid(gate_auth)
+
+                sat_logits_new = task_logits * sat_logits
+                
+                #logits = 2.0 * sat_logits_new.detach() + 0.25 * (qfeat[:,0].float().unsqueeze(1)) * gate_time * time_logits.detach() + 0.5 * (qfeat[:,1].float().unsqueeze(1) + 0.4) * gate_auth * auth_logits.detach()
+                logits = sat_logits_new
+                reward = logits.squeeze(-1)
+            else:
+                reward = pooled_output[:, 0].squeeze(-1)
+
+
+            return  reward
+
+        def forward(
+            self,
+            input_ids: torch.Tensor,
+            positions: torch.Tensor,
+            intermediate_tensors: Optional[IntermediateTensors] = None,
+            inputs_embeds: Optional[torch.Tensor] = None,
+        ) -> torch.Tensor:
+            self.input_ids =input_ids
            return super().forward(input_ids, positions, intermediate_tensors,
                                   inputs_embeds)