Unverified Commit 6c046382 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Fix per file ruff ignores related to line length (#26262)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 91ac7f76
......@@ -164,7 +164,7 @@ def invoke_main() -> None:
)
parser.add_argument(
"--batched", action="store_true", help="consider time to prepare batch"
) # noqa: E501
)
parser.add_argument(
"--num-iteration",
type=int,
......
......@@ -909,13 +909,13 @@ def create_argument_parser():
parser.add_argument(
"--tokenizer",
type=str,
help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
help="Name or path of the tokenizer, if not using the default tokenizer.",
)
parser.add_argument(
"--tokenizer-mode",
type=str,
default="auto",
help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
help="Name or path of the tokenizer, if not using the default tokenizer.",
)
parser.add_argument(
"--num-prompts",
......
......@@ -72,8 +72,8 @@ VLLMKernelScheduleTag: dict[
] = {
**KernelScheduleTag, # type: ignore
**{
MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",
MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",
MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",
MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized", # noqa: E501
MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong", # noqa: E501
MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative", # noqa: E501
},
}
......@@ -113,7 +113,7 @@ def run_e5_v(query: Query) -> ModelRequestData:
def _get_vlm2vec_prompt_image(query: Query, image_token: str):
if query["modality"] == "text":
text = query["text"]
prompt = f"Find me an everyday image that matches the given caption: {text}" # noqa: E501
prompt = f"Find me an everyday image that matches the given caption: {text}"
image = None
elif query["modality"] == "image":
prompt = f"{image_token} Find a day-to-day image that looks similar to the provided image." # noqa: E501
......
......@@ -203,9 +203,9 @@ class Proxy:
async with session.post(
url=url, json=data, headers=headers
) as response:
if 200 <= response.status < 300 or 400 <= response.status < 500: # noqa: E501
if 200 <= response.status < 300 or 400 <= response.status < 500:
if use_chunked:
async for chunk_bytes in response.content.iter_chunked( # noqa: E501
async for chunk_bytes in response.content.iter_chunked(
1024
):
yield chunk_bytes
......
......@@ -56,52 +56,6 @@ include = ["vllm*"]
"vllm/third_party/**" = ["ALL"]
"vllm/version.py" = ["F401"]
"vllm/_version.py" = ["ALL"]
# TEMPORARY! These ignores will be fixed forward
## Line length violations
"csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"]
"tests/compile/piecewise/test_simple.py" = ["E501"]
"tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"]
"tests/entrypoints/conftest.py" = ["E501"]
"tests/entrypoints/openai/test_audio.py" = ["E501"]
"tests/entrypoints/openai/test_chat.py" = ["E501"]
"tests/entrypoints/openai/test_chat_template.py" = ["E501"]
"tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"]
"tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"]
"tests/entrypoints/openai/test_video.py" = ["E501"]
"tests/entrypoints/openai/test_vision.py" = ["E501"]
"tests/entrypoints/test_chat_utils.py" = ["E501"]
"tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"]
"tests/models/language/generation/test_gemma.py" = ["E501"]
"tests/models/language/generation/test_mistral.py" = ["E501"]
"tests/models/multimodal/generation/test_ultravox.py" = ["E501"]
"tests/models/multimodal/generation/test_voxtral.py" = ["E501"]
"tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"]
"tests/tool_use/test_tool_choice_required.py" = ["E501"]
"tests/v1/attention/utils.py" = ["E501"]
"tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"]
"tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"]
"tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"]
"tests/v1/logits_processors/test_custom_offline.py" = ["E501"]
"vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"]
"vllm/compilation/collective_fusion.py" = ["E501"]
"vllm/compilation/wrapper.py" = ["E501"]
"vllm/config/vllm.py" = ["E501"]
"vllm/distributed/device_communicators/all2all.py" = ["E501"]
"vllm/entrypoints/openai/protocol.py" = ["E501"]
"vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"]
"vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"]
"vllm/model_executor/models/bailing_moe.py" = ["E501"]
"vllm/model_executor/models/hyperclovax_vision.py" = ["E501"]
"vllm/model_executor/models/llama4_eagle.py" = ["E501"]
"vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"]
"vllm/model_executor/models/phi4mm.py" = ["E501"]
"vllm/model_executor/models/qwen3_next.py" = ["E501"]
"vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"]
"vllm/v1/attention/backends/mla/common.py" = ["E501"]
"vllm/v1/engine/utils.py" = ["E501"]
"vllm/v1/utils.py" = ["E501"]
"vllm/v1/worker/gpu_model_runner.py" = ["E501"]
# End of temporary ignores
[tool.ruff.lint]
select = [
......
......@@ -132,10 +132,14 @@ def test_simple_piecewise_compile(use_inductor):
splitting_ops=["silly.attention"],
use_inductor_graph_partition=False,
use_inductor=use_inductor,
expected_num_piecewise_graphs_seen=5, # 2 * num_layers + 1
expected_num_piecewise_capturable_graphs_seen=3, # 1 + num_layers
expected_num_backend_compilations=3, # num_piecewise_capturable_graphs_seen
expected_num_cudagraph_captured=6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
# 2 * num_layers + 1
expected_num_piecewise_graphs_seen=5,
# 1 + num_layers
expected_num_piecewise_capturable_graphs_seen=3,
# num_piecewise_capturable_graphs_seen
expected_num_backend_compilations=3,
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
expected_num_cudagraph_captured=6,
)
......@@ -147,14 +151,16 @@ def test_simple_inductor_graph_partition(splitting_ops):
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
_run_simple_model(
# inductor graph partition automatically resets splitting_ops
# to be an empty list
# Inductor graph partition automatically resets splitting_ops to an empty list
splitting_ops=splitting_ops,
use_inductor_graph_partition=True,
use_inductor=True,
expected_num_piecewise_graphs_seen=1, # since not splitting at fx graph level
expected_num_piecewise_capturable_graphs_seen=1, # since not splitting at fx graph level
expected_num_backend_compilations=1, # since not splitting at fx graph level
expected_num_cudagraph_captured=6, # inductor graph partition still captures 6
# graph, same as fx graph partition.
# Since not splitting at fx graph level
expected_num_piecewise_graphs_seen=1,
# Since not splitting at fx graph level
expected_num_piecewise_capturable_graphs_seen=1,
# Since not splitting at fx graph level
expected_num_backend_compilations=1,
# Inductor graph partition still captures 6 graph, same as fx graph partition
expected_num_cudagraph_captured=6,
)
......@@ -367,11 +367,14 @@ def test_toy_llama(use_inductor: bool):
kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0}
with compilation_counter.expect(
num_graphs_seen=1, # one graph for the model
# One graph for the model
num_graphs_seen=1,
num_piecewise_graphs_seen=1,
num_piecewise_capturable_graphs_seen=1,
num_backend_compilations=1, # num_piecewise_capturable_graphs_seen
num_cudagraph_captured=2, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
# num_piecewise_capturable_graphs_seen
num_backend_compilations=1,
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
num_cudagraph_captured=2,
**kwargs,
):
outputs.append(
......@@ -478,9 +481,10 @@ def benchmark():
# it is fine here, because we only use the lambda function once.
runtime = do_bench(
lambda: graphs[b][0]( # noqa
input_ids[:b], positions[:b]
input_ids[:b], # noqa
positions[:b], # noqa
)
)
) # noqa
piecewise_cudagraph_time[b] = runtime
else:
runtime = do_bench(lambda: graphs[b][0].replay()) # noqa
......
......@@ -243,7 +243,7 @@ def test_fix_functionalization(model_class: torch.nn.Module, do_fusion: bool):
# check if the functionalization pass is applied
for op in model.ops_in_model(do_fusion):
find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # noqa: E501
assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None
# make sure the ops were all de-functionalized
found = dict()
......
......@@ -565,7 +565,7 @@ def test_attention_quant_pattern(
elif quant_key.dtype == FP4_DTYPE:
assert attn_nodes_post[0].kwargs.get("output_block_scale") is not None, (
"Attention should have output_block_scale after FP4 fusion"
) # noqa: E501
)
# Check that results are close
torch.testing.assert_close(result_unfused, result_fused_1, atol=1e-2, rtol=1e-2)
......@@ -186,7 +186,7 @@ class TestQuantModel(torch.nn.Module):
):
# If fusion happens, the fused op is the one
# we check for (de)functionalization
return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default] # noqa: E501
return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]
else:
# If no fusion, the original ops are checked
return [
......@@ -322,7 +322,7 @@ def sequence_parallelism_pass_on_test_model(
# check if the functionalization pass is applied
for op in model.ops_in_model():
find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # noqa: E501
assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None
# make sure the ops were all de-functionalized
found = dict()
......
......@@ -104,7 +104,7 @@ TEXT_GENERATION_MODELS = {
# [Decoder-only]
# Uses Llama
# "BAAI/AquilaChat-7B": PPTestSettings.fast(),
"Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"), # noqa: E501
"Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"),
"baichuan-inc/Baichuan-7B": PPTestSettings.fast(),
"baichuan-inc/Baichuan2-13B-Chat": PPTestSettings.fast(),
"bigscience/bloomz-1b1": PPTestSettings.fast(),
......@@ -138,7 +138,7 @@ TEXT_GENERATION_MODELS = {
# Uses Llama
# "mistralai/Mistral-7B-Instruct-v0.1": PPTestSettings.fast(),
"state-spaces/mamba-130m-hf": PPTestSettings.fast(),
"mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"), # noqa: E501
"mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"),
"mosaicml/mpt-7b": PPTestSettings.fast(),
"nvidia/Minitron-8B-Base": PPTestSettings.fast(),
"allenai/OLMo-1B-hf": PPTestSettings.fast(),
......@@ -151,13 +151,13 @@ TEXT_GENERATION_MODELS = {
"microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
"microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(
multi_node_only=True, load_format="dummy"
), # noqa: E501
),
"Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
"Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
"Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
"stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(),
"bigcode/starcoder2-3b": PPTestSettings.fast(),
"upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"), # noqa: E501
"upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"),
# FIXME: Cannot load tokenizer in latest transformers version.
# Need to use tokenizer from `meta-llama/Llama-2-7b-chat-hf`
# "xverse/XVERSE-7B-Chat": PPTestSettings.fast(),
......
......@@ -83,7 +83,8 @@ def sample_complex_json_schema():
"type": "array",
"items": {
"type": "string",
"pattern": "^[a-z]{1,10}$", # Combining length and pattern restrictions
# Combining length and pattern restrictions
"pattern": "^[a-z]{1,10}$",
},
},
},
......
......@@ -145,7 +145,7 @@ async def test_single_chat_session_audio_base64encoded(
{
"type": "audio_url",
"audio_url": {
"url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}"
"url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}" # noqa: E501
},
},
{"type": "text", "text": "What's happening in this audio?"},
......
......@@ -835,17 +835,18 @@ async def test_extra_fields_allowed(client: openai.AsyncOpenAI):
@pytest.mark.asyncio
async def test_complex_message_content(client: openai.AsyncOpenAI):
content = [
{
"type": "text",
"text": "what is 1+1? please provide the result without any other text.",
}
]
resp = await client.chat.completions.create(
model=MODEL_NAME,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "what is 1+1? please provide the result without any other text.",
}
],
"content": content,
}
],
temperature=0,
......
......@@ -76,8 +76,8 @@ def test_load_chat_template():
assert (
template_content
== """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""
) # noqa: E501
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}""" # noqa: E501
)
def test_no_load_chat_template_filelike():
......
......@@ -45,12 +45,13 @@ TOOLS = [
"properties": {
"city": {
"type": "string",
"description": "The city to find the weather for, e.g. 'San Francisco'",
"description": "The city to find the weather for, e.g. "
"'San Francisco'",
},
"state": {
"type": "string",
"description": "the two-letter abbreviation for the state that the city is"
" in, e.g. 'CA' which would mean 'California'",
"description": "the two-letter abbreviation for the state that "
"the city is in, e.g. 'CA' which would mean 'California'",
},
"unit": {
"type": "string",
......@@ -69,7 +70,8 @@ MESSAGES = [
{"role": "assistant", "content": "I'm doing well! How can I help you?"},
{
"role": "user",
"content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?",
"content": "Can you tell me what the temperate will be in Dallas, "
"in fahrenheit?",
},
]
......
......@@ -25,12 +25,14 @@ tools = [
"properties": {
"city": {
"type": "string",
"description": "The city to find the weather for, e.g. 'Vienna'",
"description": "The city to find the weather for, e.g. "
"'Vienna'",
"default": "Vienna",
},
"country": {
"type": "string",
"description": "The country that the city is in, e.g. 'Austria'",
"description": "The country that the city is in, e.g. "
"'Austria'",
},
"unit": {
"type": "string",
......@@ -85,12 +87,14 @@ tools = [
"properties": {
"city": {
"type": "string",
"description": "The city to get the forecast for, e.g. 'Vienna'",
"description": "The city to get the forecast for, e.g. "
"'Vienna'",
"default": "Vienna",
},
"country": {
"type": "string",
"description": "The country that the city is in, e.g. 'Austria'",
"description": "The country that the city is in, e.g. "
"'Austria'",
},
"days": {
"type": "integer",
......
......@@ -179,7 +179,7 @@ async def test_single_chat_session_video_base64encoded(
{
"type": "video_url",
"video_url": {
"url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"
"url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}" # noqa: E501
},
},
{"type": "text", "text": "What's in this video?"},
......@@ -238,7 +238,7 @@ async def test_single_chat_session_video_base64encoded_beamsearch(
{
"type": "video_url",
"video_url": {
"url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"
"url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}" # noqa: E501
},
},
{"type": "text", "text": "What's in this video?"},
......
......@@ -233,7 +233,7 @@ async def test_single_chat_session_image_base64encoded(
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
"url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}" # noqa: E501
},
},
{"type": "text", "text": content_text},
......@@ -300,7 +300,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
"url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}" # noqa: E501
},
},
{"type": "text", "text": "What's in this image?"},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment