Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
96860af6
Unverified
Commit
96860af6
authored
Jan 06, 2026
by
wang.yuqi
Committed by
GitHub
Jan 06, 2026
Browse files
[Model] rename use_pad_token to use_sep_token (#31784)
Signed-off-by:
wang.yuqi
<
yuqi.wang@daocloud.io
>
parent
0202971a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
33 additions
and
25 deletions
+33
-25
examples/pooling/score/convert_model_to_seq_cls.py
examples/pooling/score/convert_model_to_seq_cls.py
+7
-7
tests/entrypoints/pooling/score/test_utils.py
tests/entrypoints/pooling/score/test_utils.py
+8
-8
vllm/config/model.py
vllm/config/model.py
+12
-4
vllm/entrypoints/score_utils.py
vllm/entrypoints/score_utils.py
+3
-3
vllm/model_executor/models/adapters.py
vllm/model_executor/models/adapters.py
+3
-3
No files found.
examples/pooling/score/convert_model_to_seq_cls.py
View file @
96860af6
...
@@ -108,7 +108,7 @@ method_map = {
...
@@ -108,7 +108,7 @@ method_map = {
def
converting
(
def
converting
(
model_name
,
classifier_from_tokens
,
path
,
method
,
use_
pad
_token
=
False
,
device
=
"cpu"
model_name
,
classifier_from_tokens
,
path
,
method
,
use_
sep
_token
=
False
,
device
=
"cpu"
):
):
"""
"""
Main conversion function to transform a CausalLM model to SequenceClassification.
Main conversion function to transform a CausalLM model to SequenceClassification.
...
@@ -118,7 +118,7 @@ def converting(
...
@@ -118,7 +118,7 @@ def converting(
classifier_from_tokens: List of tokens used for classification
classifier_from_tokens: List of tokens used for classification
path: Output path to save the converted model
path: Output path to save the converted model
method: Conversion method ('from_2_way_softmax' or 'no_post_processing')
method: Conversion method ('from_2_way_softmax' or 'no_post_processing')
use_
pad
_token: Whether to use
padd
ing token in the sequence classification model
use_
sep
_token: Whether to use
separat
ing token in the sequence classification model
device: Device to load the model on ('cpu' or 'cuda')
device: Device to load the model on ('cpu' or 'cuda')
"""
"""
assert
method
in
method_map
,
f
"Unknown method:
{
method
}
"
assert
method
in
method_map
,
f
"Unknown method:
{
method
}
"
...
@@ -149,10 +149,10 @@ def converting(
...
@@ -149,10 +149,10 @@ def converting(
causal_lm
,
seq_cls_model
,
tokenizer
,
classifier_from_tokens
,
device
causal_lm
,
seq_cls_model
,
tokenizer
,
classifier_from_tokens
,
device
)
)
# Configure
padd
ing token settings
# Configure
separat
ing token settings
# Note:
Reranker models typically don't use padding tokens by default
# Note:
`llm as reranker` defaults to not using separating token.
seq_cls_model
.
config
.
use_
pad
_token
=
use_
pad
_token
seq_cls_model
.
config
.
use_
sep
_token
=
use_
sep
_token
seq_cls_model
.
config
.
pad
_token_id
=
tokenizer
.
pad
_token_id
seq_cls_model
.
config
.
sep
_token_id
=
tokenizer
.
sep
_token_id
# Save the converted model and tokenizer
# Save the converted model and tokenizer
seq_cls_model
.
save_pretrained
(
path
)
seq_cls_model
.
save_pretrained
(
path
)
...
@@ -203,6 +203,6 @@ if __name__ == "__main__":
...
@@ -203,6 +203,6 @@ if __name__ == "__main__":
model_name
=
args
.
model_name
,
model_name
=
args
.
model_name
,
classifier_from_tokens
=
json
.
loads
(
args
.
classifier_from_tokens
),
classifier_from_tokens
=
json
.
loads
(
args
.
classifier_from_tokens
),
method
=
args
.
method
,
method
=
args
.
method
,
use_
pad
_token
=
args
.
use_
pad
_token
,
use_
sep
_token
=
args
.
use_
sep
_token
,
path
=
args
.
path
,
path
=
args
.
path
,
)
)
tests/entrypoints/pooling/score/test_utils.py
View file @
96860af6
...
@@ -51,9 +51,9 @@ def llm_reranker_model_config():
...
@@ -51,9 +51,9 @@ def llm_reranker_model_config():
CROSS_ENCODER_MODEL_ID
,
CROSS_ENCODER_MODEL_ID
,
runner
=
"pooling"
,
runner
=
"pooling"
,
)
)
# use_
pad
_token is a property that reads from hf_config,
# use_
sep
_token is a property that reads from hf_config,
# so we set it there to override the default (True)
# so we set it there to override the default (True)
config
.
hf_config
.
use_
pad
_token
=
False
config
.
hf_config
.
use_
sep
_token
=
False
return
config
return
config
...
@@ -230,7 +230,7 @@ class TestGetScorePrompt:
...
@@ -230,7 +230,7 @@ class TestGetScorePrompt:
cross_encoder_tokenizer
,
full_prompt
,
engine_prompt
cross_encoder_tokenizer
,
full_prompt
,
engine_prompt
)
)
def
test_fallback_with_
pad
_token
(
def
test_fallback_with_
sep
_token
(
self
,
self
,
cross_encoder_model_config
,
cross_encoder_model_config
,
cross_encoder_tokenizer
,
cross_encoder_tokenizer
,
...
@@ -238,7 +238,7 @@ class TestGetScorePrompt:
...
@@ -238,7 +238,7 @@ class TestGetScorePrompt:
mock_model_no_score_template
,
mock_model_no_score_template
,
):
):
"""Test fallback path when ChatTemplateResolutionError
"""Test fallback path when ChatTemplateResolutionError
and use_
pad
_token=True."""
and use_
sep
_token=True."""
with
(
with
(
patch
(
patch
(
"vllm.model_executor.model_loader.get_model_cls"
,
"vllm.model_executor.model_loader.get_model_cls"
,
...
@@ -250,7 +250,7 @@ class TestGetScorePrompt:
...
@@ -250,7 +250,7 @@ class TestGetScorePrompt:
),
),
):
):
full_prompt
,
engine_prompt
=
get_score_prompt
(
full_prompt
,
engine_prompt
=
get_score_prompt
(
cross_encoder_model_config
,
# use_
pad
_token=True
cross_encoder_model_config
,
# use_
sep
_token=True
cross_encoder_tokenizer
,
cross_encoder_tokenizer
,
tokenization_kwargs
,
tokenization_kwargs
,
"query"
,
"query"
,
...
@@ -281,7 +281,7 @@ class TestGetScorePrompt:
...
@@ -281,7 +281,7 @@ class TestGetScorePrompt:
add_special_tokens
=
False
,
add_special_tokens
=
False
,
)
)
def
test_fallback_without_
pad
_token
(
def
test_fallback_without_
sep
_token
(
self
,
self
,
llm_reranker_model_config
,
llm_reranker_model_config
,
cross_encoder_tokenizer
,
cross_encoder_tokenizer
,
...
@@ -289,7 +289,7 @@ class TestGetScorePrompt:
...
@@ -289,7 +289,7 @@ class TestGetScorePrompt:
mock_model_no_score_template
,
mock_model_no_score_template
,
):
):
"""Test fallback path when ChatTemplateResolutionError
"""Test fallback path when ChatTemplateResolutionError
and use_
pad
_token=False."""
and use_
sep
_token=False."""
with
(
with
(
patch
(
patch
(
"vllm.model_executor.model_loader.get_model_cls"
,
"vllm.model_executor.model_loader.get_model_cls"
,
...
@@ -301,7 +301,7 @@ class TestGetScorePrompt:
...
@@ -301,7 +301,7 @@ class TestGetScorePrompt:
),
),
):
):
full_prompt
,
engine_prompt
=
get_score_prompt
(
full_prompt
,
engine_prompt
=
get_score_prompt
(
llm_reranker_model_config
,
# use_
pad
_token=False
llm_reranker_model_config
,
# use_
sep
_token=False
cross_encoder_tokenizer
,
cross_encoder_tokenizer
,
tokenization_kwargs
,
tokenization_kwargs
,
"query"
,
"query"
,
...
...
vllm/config/model.py
View file @
96860af6
...
@@ -1434,10 +1434,18 @@ class ModelConfig:
...
@@ -1434,10 +1434,18 @@ class ModelConfig:
return
getattr
(
self
.
hf_config
,
"matryoshka_dimensions"
,
None
)
return
getattr
(
self
.
hf_config
,
"matryoshka_dimensions"
,
None
)
@
property
@
property
def
use_pad_token
(
self
)
->
bool
:
def
use_sep_token
(
self
)
->
bool
:
# cross_encoder models defaults to using pad_token.
# cross_encoder models defaults to using separating token.
# `llm as reranker` models defaults to not using pad_token.
# `llm as reranker` defaults to not using separating token.
return
getattr
(
self
.
hf_config
,
"use_pad_token"
,
True
)
use_pad_token
=
getattr
(
self
.
hf_config
,
"use_pad_token"
,
None
)
if
use_pad_token
is
not
None
:
logger
.
warning_once
(
"use_pad_token has been deprecated; please use use_sep_token instead."
)
return
use_pad_token
return
getattr
(
self
.
hf_config
,
"use_sep_token"
,
True
)
@
property
@
property
def
head_dtype
(
self
)
->
torch
.
dtype
:
def
head_dtype
(
self
)
->
torch
.
dtype
:
...
...
vllm/entrypoints/score_utils.py
View file @
96860af6
...
@@ -199,14 +199,14 @@ def get_score_prompt(
...
@@ -199,14 +199,14 @@ def get_score_prompt(
full_prompt
=
_apply_model_score_template
(
model_config
,
prompt_1
,
prompt_2
)
full_prompt
=
_apply_model_score_template
(
model_config
,
prompt_1
,
prompt_2
)
prompt_inputs
=
tokenizer
(
full_prompt
,
**
tokenization_kwargs
)
prompt_inputs
=
tokenizer
(
full_prompt
,
**
tokenization_kwargs
)
else
:
else
:
if
model_config
.
use_
pad
_token
:
if
model_config
.
use_
sep
_token
:
# cross_encoder models defaults to using
pad_
token.
# cross_encoder models defaults to using
separating
token.
prompt_inputs
=
tokenizer
(
prompt_inputs
=
tokenizer
(
text
=
prompt_1
,
text_pair
=
prompt_2
,
**
tokenization_kwargs
text
=
prompt_1
,
text_pair
=
prompt_2
,
**
tokenization_kwargs
)
)
full_prompt
=
tokenizer
.
decode
(
prompt_inputs
[
"input_ids"
])
full_prompt
=
tokenizer
.
decode
(
prompt_inputs
[
"input_ids"
])
else
:
else
:
# `llm as reranker`
models
defaults to not using
pad_
token.
# `llm as reranker` defaults to not using
separating
token.
full_prompt
=
prompt_1
+
prompt_2
full_prompt
=
prompt_1
+
prompt_2
prompt_inputs
=
tokenizer
(
text
=
full_prompt
,
**
tokenization_kwargs
)
prompt_inputs
=
tokenizer
(
text
=
full_prompt
,
**
tokenization_kwargs
)
return
full_prompt
,
prompt_inputs
return
full_prompt
,
prompt_inputs
...
...
vllm/model_executor/models/adapters.py
View file @
96860af6
...
@@ -382,9 +382,9 @@ class SequenceClassificationConfig(VerifyAndUpdateConfig):
...
@@ -382,9 +382,9 @@ class SequenceClassificationConfig(VerifyAndUpdateConfig):
else
:
else
:
text_config
.
num_labels
=
len
(
tokens
)
text_config
.
num_labels
=
len
(
tokens
)
# `llm as reranker` defaults to not using
pad_
token
# `llm as reranker` defaults to not using
separating
token
.
use_
pad
_token
=
getattr
(
text_config
,
"use_
pad
_token"
,
False
)
use_
sep
_token
=
getattr
(
text_config
,
"use_
sep
_token"
,
False
)
text_config
.
use_
pad
_token
=
use_
pad
_token
text_config
.
use_
sep
_token
=
use_
sep
_token
def
load_weights_using_from_2_way_softmax
(
def
load_weights_using_from_2_way_softmax
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment