Unverified Commit 96860af6 authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Model] rename use_pad_token to use_sep_token (#31784)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 0202971a
......@@ -108,7 +108,7 @@ method_map = {
def converting(
model_name, classifier_from_tokens, path, method, use_pad_token=False, device="cpu"
model_name, classifier_from_tokens, path, method, use_sep_token=False, device="cpu"
):
"""
Main conversion function to transform a CausalLM model to SequenceClassification.
......@@ -118,7 +118,7 @@ def converting(
classifier_from_tokens: List of tokens used for classification
path: Output path to save the converted model
method: Conversion method ('from_2_way_softmax' or 'no_post_processing')
use_pad_token: Whether to use padding token in the sequence classification model
use_sep_token: Whether to use separating token in the sequence classification model
device: Device to load the model on ('cpu' or 'cuda')
"""
assert method in method_map, f"Unknown method: {method}"
......@@ -149,10 +149,10 @@ def converting(
causal_lm, seq_cls_model, tokenizer, classifier_from_tokens, device
)
# Configure padding token settings
# Note: Reranker models typically don't use padding tokens by default
seq_cls_model.config.use_pad_token = use_pad_token
seq_cls_model.config.pad_token_id = tokenizer.pad_token_id
# Configure separating token settings
# Note: `llm as reranker` defaults to not using separating token.
seq_cls_model.config.use_sep_token = use_sep_token
seq_cls_model.config.sep_token_id = tokenizer.sep_token_id
# Save the converted model and tokenizer
seq_cls_model.save_pretrained(path)
......@@ -203,6 +203,6 @@ if __name__ == "__main__":
model_name=args.model_name,
classifier_from_tokens=json.loads(args.classifier_from_tokens),
method=args.method,
use_pad_token=args.use_pad_token,
use_sep_token=args.use_sep_token,
path=args.path,
)
......@@ -51,9 +51,9 @@ def llm_reranker_model_config():
CROSS_ENCODER_MODEL_ID,
runner="pooling",
)
# use_pad_token is a property that reads from hf_config,
# use_sep_token is a property that reads from hf_config,
# so we set it there to override the default (True)
config.hf_config.use_pad_token = False
config.hf_config.use_sep_token = False
return config
......@@ -230,7 +230,7 @@ class TestGetScorePrompt:
cross_encoder_tokenizer, full_prompt, engine_prompt
)
def test_fallback_with_pad_token(
def test_fallback_with_sep_token(
self,
cross_encoder_model_config,
cross_encoder_tokenizer,
......@@ -238,7 +238,7 @@ class TestGetScorePrompt:
mock_model_no_score_template,
):
"""Test fallback path when ChatTemplateResolutionError
and use_pad_token=True."""
and use_sep_token=True."""
with (
patch(
"vllm.model_executor.model_loader.get_model_cls",
......@@ -250,7 +250,7 @@ class TestGetScorePrompt:
),
):
full_prompt, engine_prompt = get_score_prompt(
cross_encoder_model_config, # use_pad_token=True
cross_encoder_model_config, # use_sep_token=True
cross_encoder_tokenizer,
tokenization_kwargs,
"query",
......@@ -281,7 +281,7 @@ class TestGetScorePrompt:
add_special_tokens=False,
)
def test_fallback_without_pad_token(
def test_fallback_without_sep_token(
self,
llm_reranker_model_config,
cross_encoder_tokenizer,
......@@ -289,7 +289,7 @@ class TestGetScorePrompt:
mock_model_no_score_template,
):
"""Test fallback path when ChatTemplateResolutionError
and use_pad_token=False."""
and use_sep_token=False."""
with (
patch(
"vllm.model_executor.model_loader.get_model_cls",
......@@ -301,7 +301,7 @@ class TestGetScorePrompt:
),
):
full_prompt, engine_prompt = get_score_prompt(
llm_reranker_model_config, # use_pad_token=False
llm_reranker_model_config, # use_sep_token=False
cross_encoder_tokenizer,
tokenization_kwargs,
"query",
......
......@@ -1434,10 +1434,18 @@ class ModelConfig:
return getattr(self.hf_config, "matryoshka_dimensions", None)
@property
def use_pad_token(self) -> bool:
# cross_encoder models defaults to using pad_token.
# `llm as reranker` models defaults to not using pad_token.
return getattr(self.hf_config, "use_pad_token", True)
def use_sep_token(self) -> bool:
# cross_encoder models defaults to using separating token.
# `llm as reranker` defaults to not using separating token.
use_pad_token = getattr(self.hf_config, "use_pad_token", None)
if use_pad_token is not None:
logger.warning_once(
"use_pad_token has been deprecated; please use use_sep_token instead."
)
return use_pad_token
return getattr(self.hf_config, "use_sep_token", True)
@property
def head_dtype(self) -> torch.dtype:
......
......@@ -199,14 +199,14 @@ def get_score_prompt(
full_prompt = _apply_model_score_template(model_config, prompt_1, prompt_2)
prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs)
else:
if model_config.use_pad_token:
# cross_encoder models defaults to using pad_token.
if model_config.use_sep_token:
# cross_encoder models defaults to using separating token.
prompt_inputs = tokenizer(
text=prompt_1, text_pair=prompt_2, **tokenization_kwargs
)
full_prompt = tokenizer.decode(prompt_inputs["input_ids"])
else:
# `llm as reranker` models defaults to not using pad_token.
# `llm as reranker` defaults to not using separating token.
full_prompt = prompt_1 + prompt_2
prompt_inputs = tokenizer(text=full_prompt, **tokenization_kwargs)
return full_prompt, prompt_inputs
......
......@@ -382,9 +382,9 @@ class SequenceClassificationConfig(VerifyAndUpdateConfig):
else:
text_config.num_labels = len(tokens)
# `llm as reranker` defaults to not using pad_token
use_pad_token = getattr(text_config, "use_pad_token", False)
text_config.use_pad_token = use_pad_token
# `llm as reranker` defaults to not using separating token.
use_sep_token = getattr(text_config, "use_sep_token", False)
text_config.use_sep_token = use_sep_token
def load_weights_using_from_2_way_softmax(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment