Unverified Commit 6c046382 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Fix per file ruff ignores related to line length (#26262)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 91ac7f76
......@@ -916,8 +916,9 @@ class Qwen3NextDecoderLayer(nn.Module):
)
else:
assert len(hidden_states.shape) == len(self.ffn_layer_scale.shape), (
f"shape must be the same {len(hidden_states.shape)}, {len(self.ffn_layer_scale.shape)}"
) # noqa: E501
f"shape must be the same {len(hidden_states.shape)}, "
f"{len(self.ffn_layer_scale.shape)}"
)
hidden_states = hidden_states * (
self.ffn_layer_scale.to(hidden_states.dtype) + 1
)
......
......@@ -255,8 +255,8 @@ def is_rocm_aiter_fp8bmm_enabled() -> bool:
if is_rocm_aiter_fp8bmm_enabled():
from aiter.ops.triton.batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant import ( # noqa: E501 # isort: skip
batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant as aiter_triton_fp8_bmm,
from aiter.ops.triton.batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant import ( # noqa: E501
batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant as aiter_triton_fp8_bmm, # noqa: E501
)
def dynamic_per_batched_tensor_quant(
......@@ -1284,8 +1284,10 @@ class MLACommonImpl(MLACommonBaseImpl[M], Generic[M]):
actual_seq_lens_q=prefill.query_seq_lens.view(-1, 1, 1, 1),
actual_seq_lens_kv=prefill.query_seq_lens.view(-1, 1, 1, 1),
causal=True,
return_lse=True, # do not support False for now
is_cuda_graph_compatible=True, # Indicates actual_seq_lens are on GPU or CPU.
# Do not support False for now
return_lse=True,
# Indicates actual_seq_lens are on GPU or CPU.
is_cuda_graph_compatible=True,
)
if return_softmax_lse:
return output, lse
......@@ -1342,7 +1344,8 @@ class MLACommonImpl(MLACommonBaseImpl[M], Generic[M]):
),
causal=False,
return_lse=True,
is_cuda_graph_compatible=True, # Indicates actual_seq_lens are on GPU or CPU.
# Indicates actual_seq_lens are on GPU or CPU.
is_cuda_graph_compatible=True,
)
def process_weights_after_loading(self, act_dtype: torch.dtype):
......
......@@ -872,10 +872,13 @@ def wait_for_engine_startup(
EngineHandshakeMetadata(
addresses=addresses,
parallel_config={
"data_parallel_master_ip": parallel_config.data_parallel_master_ip,
"data_parallel_master_port": parallel_config.data_parallel_master_port,
"_data_parallel_master_port_list": parallel_config._data_parallel_master_port_list,
"data_parallel_size": parallel_config.data_parallel_size,
k: getattr(parallel_config, k)
for k in (
"data_parallel_master_ip",
"data_parallel_master_port",
"_data_parallel_master_port_list",
"data_parallel_size",
)
},
)
)
......
......@@ -345,13 +345,15 @@ def report_usage_stats(
from vllm.model_executor.model_loader import get_architecture_class_name
parallel_config = vllm_config.parallel_config
usage_message.report_usage(
get_architecture_class_name(vllm_config.model_config),
usage_context,
extra_kvs={
# Common configuration
"dtype": str(vllm_config.model_config.dtype),
"tensor_parallel_size": vllm_config.parallel_config.tensor_parallel_size,
"tensor_parallel_size": parallel_config.tensor_parallel_size,
"block_size": vllm_config.cache_config.block_size,
"gpu_memory_utilization": vllm_config.cache_config.gpu_memory_utilization,
"kv_cache_memory_bytes": vllm_config.cache_config.kv_cache_memory_bytes,
......@@ -362,7 +364,7 @@ def report_usage_stats(
"enable_lora": bool(vllm_config.lora_config),
"enable_prefix_caching": vllm_config.cache_config.enable_prefix_caching,
"enforce_eager": vllm_config.model_config.enforce_eager,
"disable_custom_all_reduce": vllm_config.parallel_config.disable_custom_all_reduce,
"disable_custom_all_reduce": parallel_config.disable_custom_all_reduce,
},
)
......
......@@ -3391,7 +3391,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
attn_metadata[ubid][layer_name] = attn_metadata_i
else:
assert type(attn_metadata) is dict
attn_metadata_i = attn_group.get_metadata_builder().build_for_cudagraph_capture(
metadata_builder = attn_group.get_metadata_builder()
attn_metadata_i = metadata_builder.build_for_cudagraph_capture(
common_attn_metadata
)
for layer_name in attn_group.layer_names:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment