Unverified Commit 51931c5c authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[UX] Deduplicate sampling parameter startup logs (#32953)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 06b557ec
......@@ -1339,10 +1339,9 @@ class ModelConfig:
Returns:
A dictionary containing the non-default sampling parameters.
"""
if self.generation_config == "vllm":
config = {}
else:
config = self.try_get_generation_config()
src = self.generation_config
config = {} if src == "vllm" else self.try_get_generation_config()
# Overriding with given generation config
config.update(self.override_generation_config)
......@@ -1368,13 +1367,16 @@ class ModelConfig:
else:
diff_sampling_param = {}
if diff_sampling_param:
if diff_sampling_param and src != "vllm":
logger.warning_once(
"Default sampling parameters have been overridden by the "
"model's Hugging Face generation config recommended from the "
"model creator. If this is not intended, please relaunch "
"vLLM instance with `--generation-config vllm`."
"Default vLLM sampling parameters have been overridden by %s: `%s`. "
"If this is not intended, please relaunch vLLM instance "
"with `--generation-config vllm`.",
"the model's `generation_config.json`" if src == "auto" else src,
str(diff_sampling_param),
scope="local",
)
return diff_sampling_param
@property
......
......@@ -143,14 +143,6 @@ class OpenAIServingChat(OpenAIServing):
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
self.default_sampling_params = self.model_config.get_diff_sampling_param()
if self.default_sampling_params:
source = self.model_config.generation_config
source = "model" if source == "auto" else source
logger.info(
"Using default chat sampling params from %s: %s",
source,
self.default_sampling_params,
)
if self.model_config.hf_config.model_type == "kimi_k2":
self.tool_call_id_type = "kimi_k2"
else:
......
......@@ -72,16 +72,9 @@ class OpenAIServingCompletion(OpenAIServing):
self.logits_processors = self.model_config.logits_processors
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.default_sampling_params = self.model_config.get_diff_sampling_param()
self.enable_force_include_usage = enable_force_include_usage
if self.default_sampling_params:
source = self.model_config.generation_config
source = "model" if source == "auto" else source
logger.info(
"Using default completion sampling params from %s: %s",
source,
self.default_sampling_params,
)
self.default_sampling_params = self.model_config.get_diff_sampling_param()
async def render_completion_request(
self,
......
......@@ -221,15 +221,8 @@ class OpenAIServingResponses(OpenAIServing):
)
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
self.default_sampling_params = self.model_config.get_diff_sampling_param()
if self.default_sampling_params:
source = self.model_config.generation_config
source = "model" if source == "auto" else source
logger.info(
"Using default chat sampling params from %s: %s",
source,
self.default_sampling_params,
)
# If False (default), the "store" option is (silently) ignored and the
# response is not stored. If True, the response is stored in memory.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment