Unverified Commit 51010a18 authored by cjackal's avatar cjackal Committed by GitHub
Browse files

[Misc] set single whitespace between log sentences (#13771)


Signed-off-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
parent 7196a3b1
...@@ -438,7 +438,7 @@ class FlashInferMetadata(AttentionMetadata): ...@@ -438,7 +438,7 @@ class FlashInferMetadata(AttentionMetadata):
not in supported_head_sizes: not in supported_head_sizes:
raise ValueError( raise ValueError(
f"Only {supported_head_sizes} are supported for head_dim,", f"Only {supported_head_sizes} are supported for head_dim,",
f"received {self.head_dim}.") f" received {self.head_dim}.")
def begin_forward(self): def begin_forward(self):
if self.num_prefill_tokens > 0: if self.num_prefill_tokens > 0:
......
...@@ -533,7 +533,7 @@ class MLACommonMetadata(AttentionMetadata): ...@@ -533,7 +533,7 @@ class MLACommonMetadata(AttentionMetadata):
not in supported_head_sizes: not in supported_head_sizes:
raise ValueError( raise ValueError(
f"Only {supported_head_sizes} are supported for head_dim,", f"Only {supported_head_sizes} are supported for head_dim,",
f"received {self.head_dim}.") f" received {self.head_dim}.")
@property @property
def prefill_metadata(self) -> Optional["MLACommonMetadata"]: def prefill_metadata(self) -> Optional["MLACommonMetadata"]:
......
...@@ -497,7 +497,7 @@ class ROCmFlashAttentionImpl(AttentionImpl): ...@@ -497,7 +497,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
if logits_soft_cap is not None: if logits_soft_cap is not None:
raise ValueError( raise ValueError(
"ROCm Triton FlashAttention does not support attention" "ROCm Triton FlashAttention does not support attention"
"logits soft capping." " logits soft capping."
" please try using the ROCm CK " " please try using the ROCm CK "
"FA backend instead by setting the env var " "FA backend instead by setting the env var "
"`VLLM_USE_TRITON_FLASH_ATTN=0`") "`VLLM_USE_TRITON_FLASH_ATTN=0`")
...@@ -528,7 +528,7 @@ class ROCmFlashAttentionImpl(AttentionImpl): ...@@ -528,7 +528,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
if self.use_naive_attn: if self.use_naive_attn:
if logits_soft_cap is not None: if logits_soft_cap is not None:
raise ValueError( raise ValueError(
"ROCm Naive FlashAttention does not support" "ROCm Naive FlashAttention does not support "
"attention logits soft capping.") "attention logits soft capping.")
self.attn_func = _sdpa_attention self.attn_func = _sdpa_attention
......
...@@ -924,8 +924,8 @@ class ModelConfig: ...@@ -924,8 +924,8 @@ class ModelConfig:
layers_block_type_value = getattr(self.hf_config, layers_block_type_value = getattr(self.hf_config,
"layers_block_type", None) "layers_block_type", None)
if layers_block_type_value is None: if layers_block_type_value is None:
raise ValueError("The model is an hybrid without a" raise ValueError("The model is an hybrid without a "
"layers_block_type in the hf_config," "layers_block_type in the hf_config, "
"cannot determine the num of " "cannot determine the num of "
f"{block_type.value} layers") f"{block_type.value} layers")
...@@ -2516,7 +2516,7 @@ def _get_and_verify_dtype( ...@@ -2516,7 +2516,7 @@ def _get_and_verify_dtype(
if current_platform.is_hpu() and config_dtype == torch.float16: if current_platform.is_hpu() and config_dtype == torch.float16:
logger.info( logger.info(
"For HPU, we cast models to bfloat16 instead of" "For HPU, we cast models to bfloat16 instead of "
"using float16 by default. Please specify `dtype` if you " "using float16 by default. Please specify `dtype` if you "
"want to use float16.") "want to use float16.")
torch_dtype = torch.bfloat16 torch_dtype = torch.bfloat16
...@@ -2732,7 +2732,7 @@ class DecodingConfig: ...@@ -2732,7 +2732,7 @@ class DecodingConfig:
backend=self.guided_decoding_backend).backend_name backend=self.guided_decoding_backend).backend_name
if backend not in valid_guided_backends: if backend not in valid_guided_backends:
raise ValueError(f"Invalid guided_decoding_backend '{backend}," raise ValueError(f"Invalid guided_decoding_backend '{backend},"
f"must be one of {valid_guided_backends}") f" must be one of {valid_guided_backends}")
@dataclass @dataclass
...@@ -3008,7 +3008,7 @@ class CompilationConfig(BaseModel): ...@@ -3008,7 +3008,7 @@ class CompilationConfig(BaseModel):
def model_post_init(self, __context: Any) -> None: def model_post_init(self, __context: Any) -> None:
if not self.enable_reshape and self.enable_fusion: if not self.enable_reshape and self.enable_fusion:
logger.warning_once( logger.warning_once(
"Fusion enabled but reshape elimination disabled." "Fusion enabled but reshape elimination disabled. "
"RMSNorm + quant (fp8) fusion might not work") "RMSNorm + quant (fp8) fusion might not work")
pass_config: PassConfig = Field(default_factory=PassConfig) pass_config: PassConfig = Field(default_factory=PassConfig)
...@@ -3563,7 +3563,7 @@ def set_current_vllm_config(vllm_config: VllmConfig, check_compile=False): ...@@ -3563,7 +3563,7 @@ def set_current_vllm_config(vllm_config: VllmConfig, check_compile=False):
logger.warning( logger.warning(
"`torch.compile` is turned on, but the model %s" "`torch.compile` is turned on, but the model %s"
" does not support it. Please open an issue on GitHub" " does not support it. Please open an issue on GitHub"
"if you want it to be supported.", " if you want it to be supported.",
vllm_config.model_config.model) vllm_config.model_config.model)
_current_vllm_config = old_vllm_config _current_vllm_config = old_vllm_config
......
...@@ -227,10 +227,10 @@ class NCCLLibrary: ...@@ -227,10 +227,10 @@ class NCCLLibrary:
self.lib = NCCLLibrary.path_to_library_cache[so_file] self.lib = NCCLLibrary.path_to_library_cache[so_file]
except Exception as e: except Exception as e:
logger.error( logger.error(
"Failed to load NCCL library from %s ." "Failed to load NCCL library from %s. "
"It is expected if you are not running on NVIDIA/AMD GPUs." "It is expected if you are not running on NVIDIA/AMD GPUs."
"Otherwise, the nccl library might not exist, be corrupted " "Otherwise, the nccl library might not exist, be corrupted "
"or it does not support the current platform %s." "or it does not support the current platform %s. "
"If you already have the library, please set the " "If you already have the library, please set the "
"environment variable VLLM_NCCL_SO_PATH" "environment variable VLLM_NCCL_SO_PATH"
" to point to the correct nccl library path.", so_file, " to point to the correct nccl library path.", so_file,
......
...@@ -137,7 +137,7 @@ class MooncakeTransferEngine: ...@@ -137,7 +137,7 @@ class MooncakeTransferEngine:
if metadata_backend not in supported_backend: if metadata_backend not in supported_backend:
raise ValueError( raise ValueError(
"Mooncake Configuration error. `metadata_backend`" "Mooncake Configuration error. `metadata_backend`"
f"should be one of {supported_backend}.") f" should be one of {supported_backend}.")
self.engine.initializeExt(local_hostname, metadata_server, self.engine.initializeExt(local_hostname, metadata_server,
protocol, device_name, metadata_backend) protocol, device_name, metadata_backend)
......
...@@ -823,7 +823,7 @@ def _parse_chat_message_content_part( ...@@ -823,7 +823,7 @@ def _parse_chat_message_content_part(
# content is empty, log a warning and skip # content is empty, log a warning and skip
if part_type in VALID_MESSAGE_CONTENT_MM_PART_TYPES and not content: if part_type in VALID_MESSAGE_CONTENT_MM_PART_TYPES and not content:
logger.warning( logger.warning(
"Skipping multimodal part (type: '%s')" "Skipping multimodal part (type: '%s') "
"with empty / unparsable content.", part_type) "with empty / unparsable content.", part_type)
return None return None
......
...@@ -1342,7 +1342,7 @@ class LLM: ...@@ -1342,7 +1342,7 @@ class LLM:
return params return params
if params.guided_decoding is not None: if params.guided_decoding is not None:
raise ValueError("Cannot set both guided_options_request and" raise ValueError("Cannot set both guided_options_request and "
"params.guided_decoding.") "params.guided_decoding.")
params.guided_decoding = GuidedDecodingParams( params.guided_decoding = GuidedDecodingParams(
......
...@@ -575,7 +575,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request): ...@@ -575,7 +575,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
async def do_rerank_v1(request: RerankRequest, raw_request: Request): async def do_rerank_v1(request: RerankRequest, raw_request: Request):
logger.warning_once( logger.warning_once(
"To indicate that the rerank API is not part of the standard OpenAI" "To indicate that the rerank API is not part of the standard OpenAI"
" API, we have located it at `/rerank`. Please update your client" " API, we have located it at `/rerank`. Please update your client "
"accordingly. (Note: Conforms to JinaAI rerank API)") "accordingly. (Note: Conforms to JinaAI rerank API)")
return await do_rerank(request, raw_request) return await do_rerank(request, raw_request)
......
...@@ -513,7 +513,7 @@ class RayDistributedExecutor(DistributedExecutorBase): ...@@ -513,7 +513,7 @@ class RayDistributedExecutor(DistributedExecutorBase):
if cupy_spec is None and envs.VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL: if cupy_spec is None and envs.VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL:
raise ValueError( raise ValueError(
"cupy is not installed but required since " "cupy is not installed but required since "
"VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL is set." "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL is set. "
"Run `pip install ray[adag]` and check cupy installation.") "Run `pip install ray[adag]` and check cupy installation.")
def _compiled_ray_dag(self, enable_asyncio: bool): def _compiled_ray_dag(self, enable_asyncio: bool):
......
...@@ -317,7 +317,7 @@ def initialize_ray_cluster( ...@@ -317,7 +317,7 @@ def initialize_ray_cluster(
if parallel_config.world_size > device_bundles: if parallel_config.world_size > device_bundles:
raise ValueError( raise ValueError(
f"The number of required {device_str}s exceeds the total " f"The number of required {device_str}s exceeds the total "
f"number of available {device_str}s in the placement group." f"number of available {device_str}s in the placement group. "
f"Required number of devices: {parallel_config.world_size}. " f"Required number of devices: {parallel_config.world_size}. "
f"Total number of devices: {device_bundles}.") f"Total number of devices: {device_bundles}.")
else: else:
......
...@@ -437,7 +437,7 @@ class LoRAModelManager(AdapterModelManager): ...@@ -437,7 +437,7 @@ class LoRAModelManager(AdapterModelManager):
def pin_adapter(self, lora_id: int) -> bool: def pin_adapter(self, lora_id: int) -> bool:
"""Pin a LoRAModel in the manager cache.""" """Pin a LoRAModel in the manager cache."""
raise NotImplementedError( raise NotImplementedError(
"Pinning is not supported in LoRAModelManager." "Pinning is not supported in LoRAModelManager. "
"Use LRUCacheLoRAModelManager for pinning") # type: ignore "Use LRUCacheLoRAModelManager for pinning") # type: ignore
def _set_adapter_mapping(self, mapping: LoRAMapping) -> None: def _set_adapter_mapping(self, mapping: LoRAMapping) -> None:
......
...@@ -71,7 +71,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod): ...@@ -71,7 +71,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
if not (self.weight_quant.strategy == QuantizationStrategy.TENSOR if not (self.weight_quant.strategy == QuantizationStrategy.TENSOR
and self.input_quant.strategy == QuantizationStrategy.TENSOR): and self.input_quant.strategy == QuantizationStrategy.TENSOR):
raise ValueError( raise ValueError(
"For FP8 Fused MoE layers, only per-tensor scales" "For FP8 Fused MoE layers, only per-tensor scales "
"for weights and activations are supported. Found " "for weights and activations are supported. Found "
f"{self.weight_quant}, {self.input_quant}") f"{self.weight_quant}, {self.input_quant}")
......
...@@ -74,7 +74,7 @@ class GPTQConfig(QuantizationConfig): ...@@ -74,7 +74,7 @@ class GPTQConfig(QuantizationConfig):
def __repr__(self) -> str: def __repr__(self) -> str:
return (f"GPTQConfig(weight_bits={self.weight_bits}, " return (f"GPTQConfig(weight_bits={self.weight_bits}, "
f"group_size={self.group_size}, " f"group_size={self.group_size}, "
f"desc_act={self.desc_act})," f"desc_act={self.desc_act}), "
f"lm_head_quantized={self.lm_head_quantized}), " f"lm_head_quantized={self.lm_head_quantized}), "
f"dynamic={self.dynamic}") f"dynamic={self.dynamic}")
......
...@@ -56,7 +56,7 @@ class ModelOptFp8Config(QuantizationConfig): ...@@ -56,7 +56,7 @@ class ModelOptFp8Config(QuantizationConfig):
quant_method = quant_config["quant_algo"] quant_method = quant_config["quant_algo"]
is_checkpoint_fp8_serialized = ("FP8" in quant_method) is_checkpoint_fp8_serialized = ("FP8" in quant_method)
if not is_checkpoint_fp8_serialized: if not is_checkpoint_fp8_serialized:
raise ValueError("ModelOpt currently only supports static FP8" raise ValueError("ModelOpt currently only supports static FP8 "
"quantization in vLLM. Please check the " "quantization in vLLM. Please check the "
"`hf_quant_config.json` file for your model's " "`hf_quant_config.json` file for your model's "
"quant configuration.") "quant configuration.")
......
...@@ -25,8 +25,8 @@ class NeuronQuantConfig(QuantizationConfig): ...@@ -25,8 +25,8 @@ class NeuronQuantConfig(QuantizationConfig):
if self.quant_dtype not in SUPPORTED_QUANT_DTYPE_LIST: if self.quant_dtype not in SUPPORTED_QUANT_DTYPE_LIST:
raise ValueError( raise ValueError(
f"Neuron quantization datatype {self.quant_dtype} is not valid," f"Neuron quantization datatype {self.quant_dtype} is not valid,"
f"the quantization datatype should match one of the below types" f" the quantization datatype should match one of the below "
f"{SUPPORTED_QUANT_DTYPE_LIST}") f"types {SUPPORTED_QUANT_DTYPE_LIST}")
self.dequant_dtype = dequant_dtype self.dequant_dtype = dequant_dtype
self.quantize_method = quantize_method self.quantize_method = quantize_method
......
...@@ -55,7 +55,7 @@ class QuarkW8A8Fp8MoEMethod(QuarkMoEMethod): ...@@ -55,7 +55,7 @@ class QuarkW8A8Fp8MoEMethod(QuarkMoEMethod):
if not (weight_qscheme == "per_tensor" if not (weight_qscheme == "per_tensor"
and input_qscheme == "per_tensor"): and input_qscheme == "per_tensor"):
raise ValueError( raise ValueError(
"For FP8 Fused MoE layers, only per-tensor scales" "For FP8 Fused MoE layers, only per-tensor scales "
"for weights and activations are supported. Found " "for weights and activations are supported. Found "
f"{weight_qscheme}, {input_qscheme}") # noqa E501 f"{weight_qscheme}, {input_qscheme}") # noqa E501
......
...@@ -118,7 +118,7 @@ def verify_marlin_supports_shape(output_size_per_partition: int, ...@@ -118,7 +118,7 @@ def verify_marlin_supports_shape(output_size_per_partition: int,
and input_size_per_partition % group_size != 0): and input_size_per_partition % group_size != 0):
raise ValueError( raise ValueError(
f"Weight input_size_per_partition = {input_size_per_partition}" f"Weight input_size_per_partition = {input_size_per_partition}"
f" is not divisible by group_size = {group_size}." f" is not divisible by group_size = {group_size}. "
"Consider reducing tensor_parallel_size or running " "Consider reducing tensor_parallel_size or running "
"with --quantization gptq.") "with --quantization gptq.")
......
...@@ -1088,7 +1088,7 @@ class BitsAndBytesModelLoader(BaseModelLoader): ...@@ -1088,7 +1088,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
self.model_type = type(model).__name__ self.model_type = type(model).__name__
logger.info("Loading weights with BitsAndBytes quantization. " logger.info("Loading weights with BitsAndBytes quantization. "
" May take a while ...") "May take a while ...")
quant_config = getattr(model_config.hf_config, "quantization_config", quant_config = getattr(model_config.hf_config, "quantization_config",
None) None)
......
...@@ -562,7 +562,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -562,7 +562,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
# 3D tensor # 3D tensor
return list(torch.unbind(image_data, dim=0)) return list(torch.unbind(image_data, dim=0))
raise ValueError( raise ValueError(
"We expect batched 2D tensors;" "We expect batched 2D tensors; "
"this can be either a list of 2D tensors or a single 3D tensor." "this can be either a list of 2D tensors or a single 3D tensor."
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment