[Misc] set single whitespace between log sentences (#13771)

Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>

[Misc] set single whitespace between log sentences (#13771)
Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com>
51010a18 · cjackal · GitHub · 7196a3b1 · 51010a18 · 51010a18
Unverified Commit 51010a18 authored Feb 25, 2025 by cjackal Committed by GitHub Feb 25, 2025
20 changed files
--- a/vllm/attention/backends/flashinfer.py
+++ b/vllm/attention/backends/flashinfer.py
@@ -438,7 +438,7 @@ class FlashInferMetadata(AttentionMetadata):
                not in supported_head_sizes:
            raise ValueError(
                f"Only {supported_head_sizes} are supported for head_dim,",
-                f"received {self.head_dim}.")
+                f" received {self.head_dim}.")

    def begin_forward(self):
        if self.num_prefill_tokens > 0:

--- a/vllm/attention/backends/mla/common.py
+++ b/vllm/attention/backends/mla/common.py
@@ -533,7 +533,7 @@ class MLACommonMetadata(AttentionMetadata):
                not in supported_head_sizes:
            raise ValueError(
                f"Only {supported_head_sizes} are supported for head_dim,",
-                f"received {self.head_dim}.")
+                f" received {self.head_dim}.")

    @property
    def prefill_metadata(self) -> Optional["MLACommonMetadata"]:

--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -497,7 +497,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
            if logits_soft_cap is not None:
                raise ValueError(
                    "ROCm Triton FlashAttention does not support attention"
-                    "logits soft capping."
+                    " logits soft capping."
                    " please try using the ROCm CK "
                    "FA backend instead by setting the env var "
                    "`VLLM_USE_TRITON_FLASH_ATTN=0`")
@@ -528,7 +528,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
            if self.use_naive_attn:
                if logits_soft_cap is not None:
                    raise ValueError(
-                        "ROCm Naive FlashAttention does not support"
+                        "ROCm Naive FlashAttention does not support "
                        "attention logits soft capping.")

                self.attn_func = _sdpa_attention

--- a/vllm/config.py
+++ b/vllm/config.py
@@ -924,8 +924,8 @@ class ModelConfig:
            layers_block_type_value = getattr(self.hf_config,
                                              "layers_block_type", None)
            if layers_block_type_value is None:
-                raise ValueError("The model is an hybrid without a"
-                                 "layers_block_type in the hf_config,"
+                raise ValueError("The model is an hybrid without a "
+                                 "layers_block_type in the hf_config, "
                                 "cannot determine the num of "
                                 f"{block_type.value} layers")

@@ -2516,7 +2516,7 @@ def _get_and_verify_dtype(

            if current_platform.is_hpu() and config_dtype == torch.float16:
                logger.info(
-                    "For HPU, we cast models to bfloat16 instead of"
+                    "For HPU, we cast models to bfloat16 instead of "
                    "using float16 by default. Please specify `dtype` if you "
                    "want to use float16.")
                torch_dtype = torch.bfloat16
@@ -2732,7 +2732,7 @@ class DecodingConfig:
            backend=self.guided_decoding_backend).backend_name
        if backend not in valid_guided_backends:
            raise ValueError(f"Invalid guided_decoding_backend '{backend},"
-                             f"must be one of {valid_guided_backends}")
+                             f" must be one of {valid_guided_backends}")


 @dataclass
@@ -3008,7 +3008,7 @@ class CompilationConfig(BaseModel):
        def model_post_init(self, __context: Any) -> None:
            if not self.enable_reshape and self.enable_fusion:
                logger.warning_once(
-                    "Fusion enabled but reshape elimination disabled."
+                    "Fusion enabled but reshape elimination disabled. "
                    "RMSNorm + quant (fp8) fusion might not work")

    pass_config: PassConfig = Field(default_factory=PassConfig)
@@ -3563,7 +3563,7 @@ def set_current_vllm_config(vllm_config: VllmConfig, check_compile=False):
            logger.warning(
                "`torch.compile` is turned on, but the model %s"
                " does not support it. Please open an issue on GitHub"
-                "if you want it to be supported.",
+                " if you want it to be supported.",
                vllm_config.model_config.model)
        _current_vllm_config = old_vllm_config


--- a/vllm/distributed/device_communicators/pynccl_wrapper.py
+++ b/vllm/distributed/device_communicators/pynccl_wrapper.py
@@ -227,10 +227,10 @@ class NCCLLibrary:
            self.lib = NCCLLibrary.path_to_library_cache[so_file]
        except Exception as e:
            logger.error(
-                "Failed to load NCCL library from %s ."
+                "Failed to load NCCL library from %s. "
                "It is expected if you are not running on NVIDIA/AMD GPUs."
                "Otherwise, the nccl library might not exist, be corrupted "
-                "or it does not support the current platform %s."
+                "or it does not support the current platform %s. "
                "If you already have the library, please set the "
                "environment variable VLLM_NCCL_SO_PATH"
                " to point to the correct nccl library path.", so_file,

--- a/vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py
+++ b/vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py
@@ -137,7 +137,7 @@ class MooncakeTransferEngine:
            if metadata_backend not in supported_backend:
                raise ValueError(
                    "Mooncake Configuration error. `metadata_backend`"
-                    f"should be one of {supported_backend}.")
+                    f" should be one of {supported_backend}.")

            self.engine.initializeExt(local_hostname, metadata_server,
                                      protocol, device_name, metadata_backend)

--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -823,7 +823,7 @@ def _parse_chat_message_content_part(
    # content is empty, log a warning and skip
    if part_type in VALID_MESSAGE_CONTENT_MM_PART_TYPES and not content:
        logger.warning(
-            "Skipping multimodal part (type: '%s')"
+            "Skipping multimodal part (type: '%s') "
            "with empty / unparsable content.", part_type)
        return None


--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1342,7 +1342,7 @@ class LLM:
            return params

        if params.guided_decoding is not None:
-            raise ValueError("Cannot set both guided_options_request and"
+            raise ValueError("Cannot set both guided_options_request and "
                             "params.guided_decoding.")

        params.guided_decoding = GuidedDecodingParams(

--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -575,7 +575,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
 async def do_rerank_v1(request: RerankRequest, raw_request: Request):
    logger.warning_once(
        "To indicate that the rerank API is not part of the standard OpenAI"
-        " API, we have located it at `/rerank`. Please update your client"
+        " API, we have located it at `/rerank`. Please update your client "
        "accordingly. (Note: Conforms to JinaAI rerank API)")

    return await do_rerank(request, raw_request)

--- a/vllm/executor/ray_distributed_executor.py
+++ b/vllm/executor/ray_distributed_executor.py
@@ -513,7 +513,7 @@ class RayDistributedExecutor(DistributedExecutorBase):
        if cupy_spec is None and envs.VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL:
            raise ValueError(
                "cupy is not installed but required since "
-                "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL is set."
+                "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL is set. "
                "Run `pip install ray[adag]` and check cupy installation.")

    def _compiled_ray_dag(self, enable_asyncio: bool):

--- a/vllm/executor/ray_utils.py
+++ b/vllm/executor/ray_utils.py
@@ -317,7 +317,7 @@ def initialize_ray_cluster(
        if parallel_config.world_size > device_bundles:
            raise ValueError(
                f"The number of required {device_str}s exceeds the total "
-                f"number of available {device_str}s in the placement group."
+                f"number of available {device_str}s in the placement group. "
                f"Required number of devices: {parallel_config.world_size}. "
                f"Total number of devices: {device_bundles}.")
    else:

--- a/vllm/lora/models.py
+++ b/vllm/lora/models.py
@@ -437,7 +437,7 @@ class LoRAModelManager(AdapterModelManager):
    def pin_adapter(self, lora_id: int) -> bool:
        """Pin a LoRAModel in the manager cache."""
        raise NotImplementedError(
-            "Pinning is not supported in LoRAModelManager."
+            "Pinning is not supported in LoRAModelManager. "
            "Use LRUCacheLoRAModelManager for pinning")  # type: ignore

    def _set_adapter_mapping(self, mapping: LoRAMapping) -> None:

--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -71,7 +71,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
        if not (self.weight_quant.strategy == QuantizationStrategy.TENSOR
                and self.input_quant.strategy == QuantizationStrategy.TENSOR):
            raise ValueError(
-                "For FP8 Fused MoE layers, only per-tensor scales"
+                "For FP8 Fused MoE layers, only per-tensor scales "
                "for weights and activations are supported. Found "
                f"{self.weight_quant}, {self.input_quant}")


--- a/vllm/model_executor/layers/quantization/gptq.py
+++ b/vllm/model_executor/layers/quantization/gptq.py
@@ -74,7 +74,7 @@ class GPTQConfig(QuantizationConfig):
    def __repr__(self) -> str:
        return (f"GPTQConfig(weight_bits={self.weight_bits}, "
                f"group_size={self.group_size}, "
-                f"desc_act={self.desc_act}),"
+                f"desc_act={self.desc_act}), "
                f"lm_head_quantized={self.lm_head_quantized}), "
                f"dynamic={self.dynamic}")


--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -56,7 +56,7 @@ class ModelOptFp8Config(QuantizationConfig):
        quant_method = quant_config["quant_algo"]
        is_checkpoint_fp8_serialized = ("FP8" in quant_method)
        if not is_checkpoint_fp8_serialized:
-            raise ValueError("ModelOpt currently only supports static FP8"
+            raise ValueError("ModelOpt currently only supports static FP8 "
                             "quantization in vLLM. Please check the "
                             "`hf_quant_config.json` file for your model's "
                             "quant configuration.")

--- a/vllm/model_executor/layers/quantization/neuron_quant.py
+++ b/vllm/model_executor/layers/quantization/neuron_quant.py
@@ -25,8 +25,8 @@ class NeuronQuantConfig(QuantizationConfig):
        if self.quant_dtype not in SUPPORTED_QUANT_DTYPE_LIST:
            raise ValueError(
                f"Neuron quantization datatype {self.quant_dtype} is not valid,"
-                f"the quantization datatype should match one of the below types"
-                f"{SUPPORTED_QUANT_DTYPE_LIST}")
+                f" the quantization datatype should match one of the below "
+                f"types {SUPPORTED_QUANT_DTYPE_LIST}")
        self.dequant_dtype = dequant_dtype
        self.quantize_method = quantize_method


--- a/vllm/model_executor/layers/quantization/quark/quark_moe.py
+++ b/vllm/model_executor/layers/quantization/quark/quark_moe.py
@@ -55,7 +55,7 @@ class QuarkW8A8Fp8MoEMethod(QuarkMoEMethod):
        if not (weight_qscheme == "per_tensor"
                and input_qscheme == "per_tensor"):
            raise ValueError(
-                "For FP8 Fused MoE layers, only per-tensor scales"
+                "For FP8 Fused MoE layers, only per-tensor scales "
                "for weights and activations are supported. Found "
                f"{weight_qscheme}, {input_qscheme}")  # noqa E501


--- a/vllm/model_executor/layers/quantization/utils/marlin_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/marlin_utils.py
@@ -118,7 +118,7 @@ def verify_marlin_supports_shape(output_size_per_partition: int,
            and input_size_per_partition % group_size != 0):
        raise ValueError(
            f"Weight input_size_per_partition = {input_size_per_partition}"
-            f" is not divisible by group_size = {group_size}."
+            f" is not divisible by group_size = {group_size}. "
            "Consider reducing tensor_parallel_size or running "
            "with --quantization gptq.")


--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -1088,7 +1088,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
        self.model_type = type(model).__name__

        logger.info("Loading weights with BitsAndBytes quantization. "
-                    " May take a while ...")
+                    "May take a while ...")

        quant_config = getattr(model_config.hf_config, "quantization_config",
                               None)

--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -562,7 +562,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
                # 3D tensor
                return list(torch.unbind(image_data, dim=0))
            raise ValueError(
-                "We expect batched 2D tensors;"
+                "We expect batched 2D tensors; "
                "this can be either a list of 2D tensors or a single 3D tensor."
            )