[Misc] set single whitespace between log sentences (#13771)

Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>

[Misc] set single whitespace between log sentences (#13771)
Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>
51010a18 · cjackal · GitHub · 7196a3b1 · 51010a18 · 51010a18
Unverified Commit 51010a18 authored Feb 25, 2025 by cjackal Committed by GitHub Feb 25, 2025
16 changed files
--- a/vllm/model_executor/models/fuyu.py
+++ b/vllm/model_executor/models/fuyu.py
@@ -290,7 +290,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
                expected_expr = str(expected_dims)
                raise ValueError(
                    "The expected shape of pixel values per image per batch "
-                    f" per patch is {expected_expr}. "
+                    f"per patch is {expected_expr}. "
                    f"You supplied {tuple(d.shape)}.")
        for d in data:

--- a/vllm/model_executor/models/gritlm.py
+++ b/vllm/model_executor/models/gritlm.py
@@ -90,8 +90,8 @@ class GritLMPooler(nn.Module):
        # Return no instruction in case of missing BOS token.
        if prompt_token_ids[0] != self.token_ids["<s>"]:
-            logger.warning("BOS token not found in prompt,"
+            logger.warning("BOS token not found in prompt, "
-                           "thus using empty string for instruction."
+                           "thus using empty string for instruction. "
                           "GritLM requires BOS token in prompt.")
            return instruction_len
@@ -111,8 +111,8 @@ class GritLMPooler(nn.Module):
        if found_embed_pattern_idx != -1:
            instruction_len = found_embed_pattern_idx + len(embed_pattern_ids)
        else:
-            logger.warning("Query instruction not found in prompt,"
+            logger.warning("Query instruction not found in prompt, "
-                           "thus using BOS token as instruction instead."
+                           "thus using BOS token as instruction instead. "
                           "GritLM requires query instruction in prompt.")
            instruction_len = 1

--- a/vllm/model_executor/models/minicpmv.py
+++ b/vllm/model_executor/models/minicpmv.py
@@ -673,7 +673,7 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]):
        for modality, count in counts.items():
            if modality not in inputs or not inputs[modality]:
                raise ValueError(f"None input data of {modality}."
-                                 "But prompt requires.")
+                                 " But prompt requires.")
            counter_key = self.get_modality_num_counter(modality)
            if len(inputs[modality][counter_key]) != count:
                raise ValueError(f"The prompt requires {count} "

--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
@@ -639,7 +639,7 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP,
                # 3D tensor
                return list(torch.unbind(image_data, dim=0))
            raise ValueError(
-                "We expect batched 2D tensors;"
+                "We expect batched 2D tensors; "
                "this can be either a list of 2D tensors or a single 3D tensor."
            )

--- a/vllm/model_executor/models/prithvi_geospatial_mae.py
+++ b/vllm/model_executor/models/prithvi_geospatial_mae.py
@@ -153,8 +153,8 @@ class PrithviGeoSpatialMAE(nn.Module, IsAttentionFree, SupportsMultiModal):
            vllm_config.model_config.hf_config.to_dict()["pretrained_cfg"])
        if self.model is None:
            raise ValueError(
-                "Unsupported task."
+                "Unsupported task. "
-                "Only SemanticSegmentationTask is supported for now"
+                "Only SemanticSegmentationTask is supported for now "
                "by PrithviGeospatialMAE.")
    def _parse_and_validate_multimodal_data(

--- a/vllm/multimodal/profiling.py
+++ b/vllm/multimodal/profiling.py
@@ -160,7 +160,7 @@ class MultiModalProfiler(Generic[_I]):
        if mm_counts.keys() != mm_max_tokens_per_item.keys():
            raise AssertionError(
-                "The keys returned by `get_supported_mm_limits`"
+                "The keys returned by `get_supported_mm_limits` "
                f"({set(mm_counts.keys())}) should be the same as those "
                "returned by `get_mm_max_tokens_per_item` "
                f"({set(mm_max_tokens_per_item.keys())})")

--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -190,7 +190,7 @@ class CudaPlatformBase(Platform):
                "Cannot use FlashAttention-2 backend for FP8 KV cache.")
            logger.warning(
                "Please use FlashInfer backend with FP8 KV Cache for "
-                "better performance by setting environment variable  "
+                "better performance by setting environment variable "
                "VLLM_ATTENTION_BACKEND=FLASHINFER")
            target_backend = _Backend.XFORMERS
        elif block_size % 16 != 0:

--- a/vllm/platforms/openvino.py
+++ b/vllm/platforms/openvino.py
@@ -97,7 +97,7 @@ class OpenVinoPlatform(Platform):
        if envs.VLLM_OPENVINO_CPU_KV_CACHE_PRECISION == "u8":
            if not OpenVinoPlatform.is_openvino_cpu():
-                logger.info("VLLM_OPENVINO_CPU_KV_CACHE_PRECISION is"
+                logger.info("VLLM_OPENVINO_CPU_KV_CACHE_PRECISION is "
                            "ignored for GPU, f16 data type will be used.")
                cache_config.cache_dtype = ov.Type.f16
            else:

--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -73,7 +73,7 @@ class XPUPlatform(Platform):
                logger.warning(
                    "bfloat16 is only supported on Intel Data Center GPU, "
                    "Intel Arc GPU is not supported yet. Your device is %s,"
-                    "which is not supported. will fallback to float16",
+                    " which is not supported. will fallback to float16",
                    cls.get_device_name())
                model_config.dtype = torch.float16
        if not model_config.enforce_eager:

--- a/vllm/prompt_adapter/models.py
+++ b/vllm/prompt_adapter/models.py
@@ -226,7 +226,7 @@ class PromptAdapterModelManager(AdapterModelManager):
    def pin_adapter(self, prompt_adapter_id: int) -> bool:
        """Pin a PromptAdapterModel in the manager cache."""
        raise NotImplementedError(
-            "Pinning is not supported in PromptAdapterModelManager."
+            "Pinning is not supported in PromptAdapterModelManager. "
            "Use LRUCachePromptAdapterModelManager for pinning"
        )  # type: ignore

--- a/vllm/spec_decode/draft_model_runner.py
+++ b/vllm/spec_decode/draft_model_runner.py
@@ -16,7 +16,7 @@ try:
            ROCmFlashAttentionMetadata as FlashAttentionMetadata)
 except (ModuleNotFoundError, ImportError) as err:
    raise RuntimeError(
-        "Draft model speculative decoding currently only supports"
+        "Draft model speculative decoding currently only supports "
        "CUDA and ROCm flash attention backend.") from err
 from vllm.logger import init_logger

--- a/vllm/transformers_utils/configs/jais.py
+++ b/vllm/transformers_utils/configs/jais.py
@@ -212,26 +212,26 @@ class JAISConfig(PretrainedConfig):
        if (not isinstance(self.alibi_scaling, dict)
                or len(self.alibi_scaling) != 2):
            raise ValueError(
-                "`alibi_scaling` must be a dictionary with two fields,"
+                "`alibi_scaling` must be a dictionary with two fields, "
                "`type` and `factor` or `type` and `train_seq_len`, "
                f"got {self.alibi_scaling}")
        alibi_scaling_type = self.alibi_scaling.get("type", None)
        alibi_scaling_factor = self.alibi_scaling.get("factor", None)
        alibi_dynamic_scaling = self.alibi_scaling.get("train_seq_len", None)
        if alibi_scaling_type is None or alibi_scaling_type != "linear":
-            raise ValueError(f"`alibi_scaling`'s type field must be 'linear',"
+            raise ValueError(f"`alibi_scaling`'s type field must be 'linear', "
                             f"got {alibi_scaling_type}")
        if (alibi_scaling_factor is not None
                and not isinstance(alibi_scaling_factor, float)
                or (alibi_scaling_factor is not None
                    and alibi_scaling_factor <= 1.0)):
            raise ValueError(
-                f"`alibi_scaling`'s factor field must be a float > 1.0,"
+                f"`alibi_scaling`'s factor field must be a float > 1.0, "
                f"got {alibi_scaling_factor}")
        if (alibi_dynamic_scaling is not None
                and not isinstance(alibi_dynamic_scaling, int)
                or (alibi_dynamic_scaling is not None
                    and alibi_dynamic_scaling <= 1)):
            raise ValueError(
-                f"`alibi_scaling`'s `train_seq_len` field must be an"
+                f"`alibi_scaling`'s `train_seq_len` field must be an "
                f"integer > 1, got {alibi_dynamic_scaling}")
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -447,7 +447,7 @@ def get_ip() -> str:
        logger.warning(
            "The environment variable HOST_IP is deprecated and ignored, as"
            " it is often used by Docker and other software to"
-            "interact with the container's network stack. Please "
+            " interact with the container's network stack. Please "
            "use VLLM_HOST_IP instead to set the IP address for vLLM processes"
            " to communicate with each other.")
    if host_ip:
@@ -2091,8 +2091,8 @@ def set_ulimit(target_soft_limit=65535):
                               (target_soft_limit, current_hard))
        except ValueError as e:
            logger.warning(
-                "Found ulimit of %s and failed to automatically increase"
+                "Found ulimit of %s and failed to automatically increase "
-                "with error %s. This can cause fd limit errors like"
+                "with error %s. This can cause fd limit errors like "
                "`OSError: [Errno 24] Too many open files`. Consider "
                "increasing with ulimit -n", current_soft, e)

--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -277,5 +277,5 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
            raise ValueError(
                "Bfloat16 is only supported on GPUs with compute capability "
                f"of at least 8.0. Your {gpu_name} GPU {compute_str}. "
-                "You can use float16 instead by explicitly setting the"
+                "You can use float16 instead by explicitly setting the "
                "`dtype` flag in CLI, for example: --dtype=half.")
--- a/vllm/worker/openvino_worker.py
+++ b/vllm/worker/openvino_worker.py
@@ -545,7 +545,7 @@ class OpenVINOWorker(LoraNotSupportedWorkerBase):
                "value. This may cause low performance due to "
                "occupying the majority of available system "
                "memory. Please consider decreasing "
-                "gpu_memory_utilization or explicitly setting"
+                "gpu_memory_utilization or explicitly setting "
                "`VLLM_OPENVINO_KVCACHE_SPACE` (GB) environment "
                "variable.", memory_utilization)

--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -525,7 +525,7 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
            raise ValueError(
                "Bfloat16 is only supported on GPUs with compute capability "
                f"of at least 8.0. Your {gpu_name} GPU {compute_str}. "
-                "You can use float16 instead by explicitly setting the"
+                "You can use float16 instead by explicitly setting the "
                "`dtype` flag in CLI, for example: --dtype=half.")
@@ -533,7 +533,7 @@ def raise_if_cache_size_invalid(num_gpu_blocks, block_size, is_attention_free,
                                max_model_len) -> None:
    if is_attention_free and num_gpu_blocks != 0:
        raise ValueError("No memory should be allocated for the cache blocks "
-                         f"for an attention-free model, but {num_gpu_blocks}"
+                         f"for an attention-free model, but {num_gpu_blocks} "
                         "blocks are allocated.")
    if not is_attention_free and num_gpu_blocks <= 0:
        raise ValueError("No available memory for the cache blocks. "