Unverified Commit b28d2104 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Misc] Change dummy profiling and BOS fallback warns to log once (#8820)

parent 93d364da
...@@ -8,6 +8,7 @@ from vllm.logger import init_logger ...@@ -8,6 +8,7 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.prompt_adapter.request import PromptAdapterRequest
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup
from vllm.utils import print_warning_once
from .data import (EncoderDecoderLLMInputs, LLMInputs, PromptInputs, from .data import (EncoderDecoderLLMInputs, LLMInputs, PromptInputs,
SingletonPromptInputs) SingletonPromptInputs)
...@@ -71,20 +72,21 @@ class InputPreprocessor: ...@@ -71,20 +72,21 @@ class InputPreprocessor:
''' '''
if not self.is_encoder_decoder_model(): if not self.is_encoder_decoder_model():
logger.warning("Using None for decoder start token id because " print_warning_once("Using None for decoder start token id because "
"this is not an encoder/decoder model.") "this is not an encoder/decoder model.")
return None return None
if (self.model_config is None or self.model_config.hf_config is None): if (self.model_config is None or self.model_config.hf_config is None):
logger.warning("Using None for decoder start token id because " print_warning_once("Using None for decoder start token id because "
"model config is not available.") "model config is not available.")
return None return None
dec_start_token_id = getattr(self.model_config.hf_config, dec_start_token_id = getattr(self.model_config.hf_config,
'decoder_start_token_id', None) 'decoder_start_token_id', None)
if dec_start_token_id is None: if dec_start_token_id is None:
logger.warning("Falling back on <BOS> for decoder start token id " print_warning_once("Falling back on <BOS> for decoder start token "
"because decoder start token id is not available.") "id because decoder start token id is not "
"available.")
dec_start_token_id = self.get_bos_token_id() dec_start_token_id = self.get_bos_token_id()
return dec_start_token_id return dec_start_token_id
......
...@@ -9,7 +9,7 @@ from transformers import PretrainedConfig ...@@ -9,7 +9,7 @@ from transformers import PretrainedConfig
from typing_extensions import TypeVar from typing_extensions import TypeVar
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import get_allowed_kwarg_only_overrides from vllm.utils import get_allowed_kwarg_only_overrides, print_warning_once
from .data import LLMInputs from .data import LLMInputs
...@@ -235,9 +235,9 @@ class InputRegistry: ...@@ -235,9 +235,9 @@ class InputRegistry:
num_tokens = seq_data.prompt_token_ids num_tokens = seq_data.prompt_token_ids
if len(num_tokens) < seq_len: if len(num_tokens) < seq_len:
if is_encoder_data: if is_encoder_data:
logger.warning( print_warning_once(
"Expected at least %d dummy encoder tokens for profiling, " f"Expected at least {seq_len} dummy encoder tokens for "
"but found %d tokens instead.", seq_len, len(num_tokens)) f"profiling, but found {len(num_tokens)} tokens instead.")
else: else:
raise AssertionError( raise AssertionError(
f"Expected at least {seq_len} dummy tokens for profiling, " f"Expected at least {seq_len} dummy tokens for profiling, "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment