Unverified Commit 51010a18 authored by cjackal's avatar cjackal Committed by GitHub
Browse files

[Misc] set single whitespace between log sentences (#13771)


Signed-off-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
parent 7196a3b1
...@@ -290,7 +290,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -290,7 +290,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
expected_expr = str(expected_dims) expected_expr = str(expected_dims)
raise ValueError( raise ValueError(
"The expected shape of pixel values per image per batch " "The expected shape of pixel values per image per batch "
f" per patch is {expected_expr}. " f"per patch is {expected_expr}. "
f"You supplied {tuple(d.shape)}.") f"You supplied {tuple(d.shape)}.")
for d in data: for d in data:
......
...@@ -90,8 +90,8 @@ class GritLMPooler(nn.Module): ...@@ -90,8 +90,8 @@ class GritLMPooler(nn.Module):
# Return no instruction in case of missing BOS token. # Return no instruction in case of missing BOS token.
if prompt_token_ids[0] != self.token_ids["<s>"]: if prompt_token_ids[0] != self.token_ids["<s>"]:
logger.warning("BOS token not found in prompt," logger.warning("BOS token not found in prompt, "
"thus using empty string for instruction." "thus using empty string for instruction. "
"GritLM requires BOS token in prompt.") "GritLM requires BOS token in prompt.")
return instruction_len return instruction_len
...@@ -111,8 +111,8 @@ class GritLMPooler(nn.Module): ...@@ -111,8 +111,8 @@ class GritLMPooler(nn.Module):
if found_embed_pattern_idx != -1: if found_embed_pattern_idx != -1:
instruction_len = found_embed_pattern_idx + len(embed_pattern_ids) instruction_len = found_embed_pattern_idx + len(embed_pattern_ids)
else: else:
logger.warning("Query instruction not found in prompt," logger.warning("Query instruction not found in prompt, "
"thus using BOS token as instruction instead." "thus using BOS token as instruction instead. "
"GritLM requires query instruction in prompt.") "GritLM requires query instruction in prompt.")
instruction_len = 1 instruction_len = 1
......
...@@ -673,7 +673,7 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]): ...@@ -673,7 +673,7 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]):
for modality, count in counts.items(): for modality, count in counts.items():
if modality not in inputs or not inputs[modality]: if modality not in inputs or not inputs[modality]:
raise ValueError(f"None input data of {modality}." raise ValueError(f"None input data of {modality}."
"But prompt requires.") " But prompt requires.")
counter_key = self.get_modality_num_counter(modality) counter_key = self.get_modality_num_counter(modality)
if len(inputs[modality][counter_key]) != count: if len(inputs[modality][counter_key]) != count:
raise ValueError(f"The prompt requires {count} " raise ValueError(f"The prompt requires {count} "
......
...@@ -639,7 +639,7 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, ...@@ -639,7 +639,7 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP,
# 3D tensor # 3D tensor
return list(torch.unbind(image_data, dim=0)) return list(torch.unbind(image_data, dim=0))
raise ValueError( raise ValueError(
"We expect batched 2D tensors;" "We expect batched 2D tensors; "
"this can be either a list of 2D tensors or a single 3D tensor." "this can be either a list of 2D tensors or a single 3D tensor."
) )
......
...@@ -153,8 +153,8 @@ class PrithviGeoSpatialMAE(nn.Module, IsAttentionFree, SupportsMultiModal): ...@@ -153,8 +153,8 @@ class PrithviGeoSpatialMAE(nn.Module, IsAttentionFree, SupportsMultiModal):
vllm_config.model_config.hf_config.to_dict()["pretrained_cfg"]) vllm_config.model_config.hf_config.to_dict()["pretrained_cfg"])
if self.model is None: if self.model is None:
raise ValueError( raise ValueError(
"Unsupported task." "Unsupported task. "
"Only SemanticSegmentationTask is supported for now" "Only SemanticSegmentationTask is supported for now "
"by PrithviGeospatialMAE.") "by PrithviGeospatialMAE.")
def _parse_and_validate_multimodal_data( def _parse_and_validate_multimodal_data(
......
...@@ -160,7 +160,7 @@ class MultiModalProfiler(Generic[_I]): ...@@ -160,7 +160,7 @@ class MultiModalProfiler(Generic[_I]):
if mm_counts.keys() != mm_max_tokens_per_item.keys(): if mm_counts.keys() != mm_max_tokens_per_item.keys():
raise AssertionError( raise AssertionError(
"The keys returned by `get_supported_mm_limits`" "The keys returned by `get_supported_mm_limits` "
f"({set(mm_counts.keys())}) should be the same as those " f"({set(mm_counts.keys())}) should be the same as those "
"returned by `get_mm_max_tokens_per_item` " "returned by `get_mm_max_tokens_per_item` "
f"({set(mm_max_tokens_per_item.keys())})") f"({set(mm_max_tokens_per_item.keys())})")
......
...@@ -190,7 +190,7 @@ class CudaPlatformBase(Platform): ...@@ -190,7 +190,7 @@ class CudaPlatformBase(Platform):
"Cannot use FlashAttention-2 backend for FP8 KV cache.") "Cannot use FlashAttention-2 backend for FP8 KV cache.")
logger.warning( logger.warning(
"Please use FlashInfer backend with FP8 KV Cache for " "Please use FlashInfer backend with FP8 KV Cache for "
"better performance by setting environment variable " "better performance by setting environment variable "
"VLLM_ATTENTION_BACKEND=FLASHINFER") "VLLM_ATTENTION_BACKEND=FLASHINFER")
target_backend = _Backend.XFORMERS target_backend = _Backend.XFORMERS
elif block_size % 16 != 0: elif block_size % 16 != 0:
......
...@@ -97,7 +97,7 @@ class OpenVinoPlatform(Platform): ...@@ -97,7 +97,7 @@ class OpenVinoPlatform(Platform):
if envs.VLLM_OPENVINO_CPU_KV_CACHE_PRECISION == "u8": if envs.VLLM_OPENVINO_CPU_KV_CACHE_PRECISION == "u8":
if not OpenVinoPlatform.is_openvino_cpu(): if not OpenVinoPlatform.is_openvino_cpu():
logger.info("VLLM_OPENVINO_CPU_KV_CACHE_PRECISION is" logger.info("VLLM_OPENVINO_CPU_KV_CACHE_PRECISION is "
"ignored for GPU, f16 data type will be used.") "ignored for GPU, f16 data type will be used.")
cache_config.cache_dtype = ov.Type.f16 cache_config.cache_dtype = ov.Type.f16
else: else:
......
...@@ -73,7 +73,7 @@ class XPUPlatform(Platform): ...@@ -73,7 +73,7 @@ class XPUPlatform(Platform):
logger.warning( logger.warning(
"bfloat16 is only supported on Intel Data Center GPU, " "bfloat16 is only supported on Intel Data Center GPU, "
"Intel Arc GPU is not supported yet. Your device is %s," "Intel Arc GPU is not supported yet. Your device is %s,"
"which is not supported. will fallback to float16", " which is not supported. will fallback to float16",
cls.get_device_name()) cls.get_device_name())
model_config.dtype = torch.float16 model_config.dtype = torch.float16
if not model_config.enforce_eager: if not model_config.enforce_eager:
......
...@@ -226,7 +226,7 @@ class PromptAdapterModelManager(AdapterModelManager): ...@@ -226,7 +226,7 @@ class PromptAdapterModelManager(AdapterModelManager):
def pin_adapter(self, prompt_adapter_id: int) -> bool: def pin_adapter(self, prompt_adapter_id: int) -> bool:
"""Pin a PromptAdapterModel in the manager cache.""" """Pin a PromptAdapterModel in the manager cache."""
raise NotImplementedError( raise NotImplementedError(
"Pinning is not supported in PromptAdapterModelManager." "Pinning is not supported in PromptAdapterModelManager. "
"Use LRUCachePromptAdapterModelManager for pinning" "Use LRUCachePromptAdapterModelManager for pinning"
) # type: ignore ) # type: ignore
......
...@@ -16,7 +16,7 @@ try: ...@@ -16,7 +16,7 @@ try:
ROCmFlashAttentionMetadata as FlashAttentionMetadata) ROCmFlashAttentionMetadata as FlashAttentionMetadata)
except (ModuleNotFoundError, ImportError) as err: except (ModuleNotFoundError, ImportError) as err:
raise RuntimeError( raise RuntimeError(
"Draft model speculative decoding currently only supports" "Draft model speculative decoding currently only supports "
"CUDA and ROCm flash attention backend.") from err "CUDA and ROCm flash attention backend.") from err
from vllm.logger import init_logger from vllm.logger import init_logger
......
...@@ -212,26 +212,26 @@ class JAISConfig(PretrainedConfig): ...@@ -212,26 +212,26 @@ class JAISConfig(PretrainedConfig):
if (not isinstance(self.alibi_scaling, dict) if (not isinstance(self.alibi_scaling, dict)
or len(self.alibi_scaling) != 2): or len(self.alibi_scaling) != 2):
raise ValueError( raise ValueError(
"`alibi_scaling` must be a dictionary with two fields," "`alibi_scaling` must be a dictionary with two fields, "
"`type` and `factor` or `type` and `train_seq_len`, " "`type` and `factor` or `type` and `train_seq_len`, "
f"got {self.alibi_scaling}") f"got {self.alibi_scaling}")
alibi_scaling_type = self.alibi_scaling.get("type", None) alibi_scaling_type = self.alibi_scaling.get("type", None)
alibi_scaling_factor = self.alibi_scaling.get("factor", None) alibi_scaling_factor = self.alibi_scaling.get("factor", None)
alibi_dynamic_scaling = self.alibi_scaling.get("train_seq_len", None) alibi_dynamic_scaling = self.alibi_scaling.get("train_seq_len", None)
if alibi_scaling_type is None or alibi_scaling_type != "linear": if alibi_scaling_type is None or alibi_scaling_type != "linear":
raise ValueError(f"`alibi_scaling`'s type field must be 'linear'," raise ValueError(f"`alibi_scaling`'s type field must be 'linear', "
f"got {alibi_scaling_type}") f"got {alibi_scaling_type}")
if (alibi_scaling_factor is not None if (alibi_scaling_factor is not None
and not isinstance(alibi_scaling_factor, float) and not isinstance(alibi_scaling_factor, float)
or (alibi_scaling_factor is not None or (alibi_scaling_factor is not None
and alibi_scaling_factor <= 1.0)): and alibi_scaling_factor <= 1.0)):
raise ValueError( raise ValueError(
f"`alibi_scaling`'s factor field must be a float > 1.0," f"`alibi_scaling`'s factor field must be a float > 1.0, "
f"got {alibi_scaling_factor}") f"got {alibi_scaling_factor}")
if (alibi_dynamic_scaling is not None if (alibi_dynamic_scaling is not None
and not isinstance(alibi_dynamic_scaling, int) and not isinstance(alibi_dynamic_scaling, int)
or (alibi_dynamic_scaling is not None or (alibi_dynamic_scaling is not None
and alibi_dynamic_scaling <= 1)): and alibi_dynamic_scaling <= 1)):
raise ValueError( raise ValueError(
f"`alibi_scaling`'s `train_seq_len` field must be an" f"`alibi_scaling`'s `train_seq_len` field must be an "
f"integer > 1, got {alibi_dynamic_scaling}") f"integer > 1, got {alibi_dynamic_scaling}")
...@@ -447,7 +447,7 @@ def get_ip() -> str: ...@@ -447,7 +447,7 @@ def get_ip() -> str:
logger.warning( logger.warning(
"The environment variable HOST_IP is deprecated and ignored, as" "The environment variable HOST_IP is deprecated and ignored, as"
" it is often used by Docker and other software to" " it is often used by Docker and other software to"
"interact with the container's network stack. Please " " interact with the container's network stack. Please "
"use VLLM_HOST_IP instead to set the IP address for vLLM processes" "use VLLM_HOST_IP instead to set the IP address for vLLM processes"
" to communicate with each other.") " to communicate with each other.")
if host_ip: if host_ip:
...@@ -2091,8 +2091,8 @@ def set_ulimit(target_soft_limit=65535): ...@@ -2091,8 +2091,8 @@ def set_ulimit(target_soft_limit=65535):
(target_soft_limit, current_hard)) (target_soft_limit, current_hard))
except ValueError as e: except ValueError as e:
logger.warning( logger.warning(
"Found ulimit of %s and failed to automatically increase" "Found ulimit of %s and failed to automatically increase "
"with error %s. This can cause fd limit errors like" "with error %s. This can cause fd limit errors like "
"`OSError: [Errno 24] Too many open files`. Consider " "`OSError: [Errno 24] Too many open files`. Consider "
"increasing with ulimit -n", current_soft, e) "increasing with ulimit -n", current_soft, e)
......
...@@ -277,5 +277,5 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype): ...@@ -277,5 +277,5 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
raise ValueError( raise ValueError(
"Bfloat16 is only supported on GPUs with compute capability " "Bfloat16 is only supported on GPUs with compute capability "
f"of at least 8.0. Your {gpu_name} GPU {compute_str}. " f"of at least 8.0. Your {gpu_name} GPU {compute_str}. "
"You can use float16 instead by explicitly setting the" "You can use float16 instead by explicitly setting the "
"`dtype` flag in CLI, for example: --dtype=half.") "`dtype` flag in CLI, for example: --dtype=half.")
...@@ -545,7 +545,7 @@ class OpenVINOWorker(LoraNotSupportedWorkerBase): ...@@ -545,7 +545,7 @@ class OpenVINOWorker(LoraNotSupportedWorkerBase):
"value. This may cause low performance due to " "value. This may cause low performance due to "
"occupying the majority of available system " "occupying the majority of available system "
"memory. Please consider decreasing " "memory. Please consider decreasing "
"gpu_memory_utilization or explicitly setting" "gpu_memory_utilization or explicitly setting "
"`VLLM_OPENVINO_KVCACHE_SPACE` (GB) environment " "`VLLM_OPENVINO_KVCACHE_SPACE` (GB) environment "
"variable.", memory_utilization) "variable.", memory_utilization)
......
...@@ -525,7 +525,7 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype): ...@@ -525,7 +525,7 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
raise ValueError( raise ValueError(
"Bfloat16 is only supported on GPUs with compute capability " "Bfloat16 is only supported on GPUs with compute capability "
f"of at least 8.0. Your {gpu_name} GPU {compute_str}. " f"of at least 8.0. Your {gpu_name} GPU {compute_str}. "
"You can use float16 instead by explicitly setting the" "You can use float16 instead by explicitly setting the "
"`dtype` flag in CLI, for example: --dtype=half.") "`dtype` flag in CLI, for example: --dtype=half.")
...@@ -533,7 +533,7 @@ def raise_if_cache_size_invalid(num_gpu_blocks, block_size, is_attention_free, ...@@ -533,7 +533,7 @@ def raise_if_cache_size_invalid(num_gpu_blocks, block_size, is_attention_free,
max_model_len) -> None: max_model_len) -> None:
if is_attention_free and num_gpu_blocks != 0: if is_attention_free and num_gpu_blocks != 0:
raise ValueError("No memory should be allocated for the cache blocks " raise ValueError("No memory should be allocated for the cache blocks "
f"for an attention-free model, but {num_gpu_blocks}" f"for an attention-free model, but {num_gpu_blocks} "
"blocks are allocated.") "blocks are allocated.")
if not is_attention_free and num_gpu_blocks <= 0: if not is_attention_free and num_gpu_blocks <= 0:
raise ValueError("No available memory for the cache blocks. " raise ValueError("No available memory for the cache blocks. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment