Unverified Commit 98ac0cb3 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Use `ReplicatedLinear` for SequenceClassification head (#23836)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent c8b3b299
......@@ -96,8 +96,5 @@ def test_rerank_models_mteb_tp(vllm_runner,
"tensor_parallel_size": 2,
}
mteb_test_rerank_models(Qwen3RerankerHfRunner,
vllm_runner,
model_info,
vllm_extra_kwargs,
atol=1.2e-2)
mteb_test_rerank_models(Qwen3RerankerHfRunner, vllm_runner, model_info,
vllm_extra_kwargs)
......@@ -248,7 +248,7 @@ def as_seq_cls_model(cls: _T) -> _T:
return cls
# Lazy import
from vllm.model_executor.layers.linear import RowParallelLinear
from vllm.model_executor.layers.linear import ReplicatedLinear
from vllm.model_executor.layers.pooler import (ClassifierPooler,
DispatchPooler, Pooler,
PoolingMethod, PoolingType)
......@@ -264,10 +264,9 @@ def as_seq_cls_model(cls: _T) -> _T:
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
self.score = RowParallelLinear(
self.score = ReplicatedLinear(
config.hidden_size,
config.num_labels,
input_is_parallel=False,
bias=False,
params_dtype=torch.float32,
quant_config=quant_config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment