Unverified Commit 96860af6 authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Model] rename use_pad_token to use_sep_token (#31784)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 0202971a
...@@ -108,7 +108,7 @@ method_map = { ...@@ -108,7 +108,7 @@ method_map = {
def converting( def converting(
model_name, classifier_from_tokens, path, method, use_pad_token=False, device="cpu" model_name, classifier_from_tokens, path, method, use_sep_token=False, device="cpu"
): ):
""" """
Main conversion function to transform a CausalLM model to SequenceClassification. Main conversion function to transform a CausalLM model to SequenceClassification.
...@@ -118,7 +118,7 @@ def converting( ...@@ -118,7 +118,7 @@ def converting(
classifier_from_tokens: List of tokens used for classification classifier_from_tokens: List of tokens used for classification
path: Output path to save the converted model path: Output path to save the converted model
method: Conversion method ('from_2_way_softmax' or 'no_post_processing') method: Conversion method ('from_2_way_softmax' or 'no_post_processing')
use_pad_token: Whether to use padding token in the sequence classification model use_sep_token: Whether to use separating token in the sequence classification model
device: Device to load the model on ('cpu' or 'cuda') device: Device to load the model on ('cpu' or 'cuda')
""" """
assert method in method_map, f"Unknown method: {method}" assert method in method_map, f"Unknown method: {method}"
...@@ -149,10 +149,10 @@ def converting( ...@@ -149,10 +149,10 @@ def converting(
causal_lm, seq_cls_model, tokenizer, classifier_from_tokens, device causal_lm, seq_cls_model, tokenizer, classifier_from_tokens, device
) )
# Configure padding token settings # Configure separating token settings
# Note: Reranker models typically don't use padding tokens by default # Note: `llm as reranker` defaults to not using separating token.
seq_cls_model.config.use_pad_token = use_pad_token seq_cls_model.config.use_sep_token = use_sep_token
seq_cls_model.config.pad_token_id = tokenizer.pad_token_id seq_cls_model.config.sep_token_id = tokenizer.sep_token_id
# Save the converted model and tokenizer # Save the converted model and tokenizer
seq_cls_model.save_pretrained(path) seq_cls_model.save_pretrained(path)
...@@ -203,6 +203,6 @@ if __name__ == "__main__": ...@@ -203,6 +203,6 @@ if __name__ == "__main__":
model_name=args.model_name, model_name=args.model_name,
classifier_from_tokens=json.loads(args.classifier_from_tokens), classifier_from_tokens=json.loads(args.classifier_from_tokens),
method=args.method, method=args.method,
use_pad_token=args.use_pad_token, use_sep_token=args.use_sep_token,
path=args.path, path=args.path,
) )
...@@ -51,9 +51,9 @@ def llm_reranker_model_config(): ...@@ -51,9 +51,9 @@ def llm_reranker_model_config():
CROSS_ENCODER_MODEL_ID, CROSS_ENCODER_MODEL_ID,
runner="pooling", runner="pooling",
) )
# use_pad_token is a property that reads from hf_config, # use_sep_token is a property that reads from hf_config,
# so we set it there to override the default (True) # so we set it there to override the default (True)
config.hf_config.use_pad_token = False config.hf_config.use_sep_token = False
return config return config
...@@ -230,7 +230,7 @@ class TestGetScorePrompt: ...@@ -230,7 +230,7 @@ class TestGetScorePrompt:
cross_encoder_tokenizer, full_prompt, engine_prompt cross_encoder_tokenizer, full_prompt, engine_prompt
) )
def test_fallback_with_pad_token( def test_fallback_with_sep_token(
self, self,
cross_encoder_model_config, cross_encoder_model_config,
cross_encoder_tokenizer, cross_encoder_tokenizer,
...@@ -238,7 +238,7 @@ class TestGetScorePrompt: ...@@ -238,7 +238,7 @@ class TestGetScorePrompt:
mock_model_no_score_template, mock_model_no_score_template,
): ):
"""Test fallback path when ChatTemplateResolutionError """Test fallback path when ChatTemplateResolutionError
and use_pad_token=True.""" and use_sep_token=True."""
with ( with (
patch( patch(
"vllm.model_executor.model_loader.get_model_cls", "vllm.model_executor.model_loader.get_model_cls",
...@@ -250,7 +250,7 @@ class TestGetScorePrompt: ...@@ -250,7 +250,7 @@ class TestGetScorePrompt:
), ),
): ):
full_prompt, engine_prompt = get_score_prompt( full_prompt, engine_prompt = get_score_prompt(
cross_encoder_model_config, # use_pad_token=True cross_encoder_model_config, # use_sep_token=True
cross_encoder_tokenizer, cross_encoder_tokenizer,
tokenization_kwargs, tokenization_kwargs,
"query", "query",
...@@ -281,7 +281,7 @@ class TestGetScorePrompt: ...@@ -281,7 +281,7 @@ class TestGetScorePrompt:
add_special_tokens=False, add_special_tokens=False,
) )
def test_fallback_without_pad_token( def test_fallback_without_sep_token(
self, self,
llm_reranker_model_config, llm_reranker_model_config,
cross_encoder_tokenizer, cross_encoder_tokenizer,
...@@ -289,7 +289,7 @@ class TestGetScorePrompt: ...@@ -289,7 +289,7 @@ class TestGetScorePrompt:
mock_model_no_score_template, mock_model_no_score_template,
): ):
"""Test fallback path when ChatTemplateResolutionError """Test fallback path when ChatTemplateResolutionError
and use_pad_token=False.""" and use_sep_token=False."""
with ( with (
patch( patch(
"vllm.model_executor.model_loader.get_model_cls", "vllm.model_executor.model_loader.get_model_cls",
...@@ -301,7 +301,7 @@ class TestGetScorePrompt: ...@@ -301,7 +301,7 @@ class TestGetScorePrompt:
), ),
): ):
full_prompt, engine_prompt = get_score_prompt( full_prompt, engine_prompt = get_score_prompt(
llm_reranker_model_config, # use_pad_token=False llm_reranker_model_config, # use_sep_token=False
cross_encoder_tokenizer, cross_encoder_tokenizer,
tokenization_kwargs, tokenization_kwargs,
"query", "query",
......
...@@ -1434,10 +1434,18 @@ class ModelConfig: ...@@ -1434,10 +1434,18 @@ class ModelConfig:
return getattr(self.hf_config, "matryoshka_dimensions", None) return getattr(self.hf_config, "matryoshka_dimensions", None)
@property @property
def use_pad_token(self) -> bool: def use_sep_token(self) -> bool:
# cross_encoder models defaults to using pad_token. # cross_encoder models defaults to using separating token.
# `llm as reranker` models defaults to not using pad_token. # `llm as reranker` defaults to not using separating token.
return getattr(self.hf_config, "use_pad_token", True)
use_pad_token = getattr(self.hf_config, "use_pad_token", None)
if use_pad_token is not None:
logger.warning_once(
"use_pad_token has been deprecated; please use use_sep_token instead."
)
return use_pad_token
return getattr(self.hf_config, "use_sep_token", True)
@property @property
def head_dtype(self) -> torch.dtype: def head_dtype(self) -> torch.dtype:
......
...@@ -199,14 +199,14 @@ def get_score_prompt( ...@@ -199,14 +199,14 @@ def get_score_prompt(
full_prompt = _apply_model_score_template(model_config, prompt_1, prompt_2) full_prompt = _apply_model_score_template(model_config, prompt_1, prompt_2)
prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs) prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs)
else: else:
if model_config.use_pad_token: if model_config.use_sep_token:
# cross_encoder models defaults to using pad_token. # cross_encoder models defaults to using separating token.
prompt_inputs = tokenizer( prompt_inputs = tokenizer(
text=prompt_1, text_pair=prompt_2, **tokenization_kwargs text=prompt_1, text_pair=prompt_2, **tokenization_kwargs
) )
full_prompt = tokenizer.decode(prompt_inputs["input_ids"]) full_prompt = tokenizer.decode(prompt_inputs["input_ids"])
else: else:
# `llm as reranker` models defaults to not using pad_token. # `llm as reranker` defaults to not using separating token.
full_prompt = prompt_1 + prompt_2 full_prompt = prompt_1 + prompt_2
prompt_inputs = tokenizer(text=full_prompt, **tokenization_kwargs) prompt_inputs = tokenizer(text=full_prompt, **tokenization_kwargs)
return full_prompt, prompt_inputs return full_prompt, prompt_inputs
......
...@@ -382,9 +382,9 @@ class SequenceClassificationConfig(VerifyAndUpdateConfig): ...@@ -382,9 +382,9 @@ class SequenceClassificationConfig(VerifyAndUpdateConfig):
else: else:
text_config.num_labels = len(tokens) text_config.num_labels = len(tokens)
# `llm as reranker` defaults to not using pad_token # `llm as reranker` defaults to not using separating token.
use_pad_token = getattr(text_config, "use_pad_token", False) use_sep_token = getattr(text_config, "use_sep_token", False)
text_config.use_pad_token = use_pad_token text_config.use_sep_token = use_sep_token
def load_weights_using_from_2_way_softmax( def load_weights_using_from_2_way_softmax(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment