Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
43c146ca
Unverified
Commit
43c146ca
authored
Oct 06, 2025
by
Roger Wang
Committed by
GitHub
Oct 06, 2025
Browse files
[Misc] Clean up unnecessary E501 ignore (#26274)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
7c2ec0fe
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
38 additions
and
38 deletions
+38
-38
vllm/benchmarks/datasets.py
vllm/benchmarks/datasets.py
+2
-2
vllm/benchmarks/throughput.py
vllm/benchmarks/throughput.py
+2
-2
vllm/compilation/inductor_pass.py
vllm/compilation/inductor_pass.py
+1
-1
vllm/model_executor/layers/quantization/__init__.py
vllm/model_executor/layers/quantization/__init__.py
+1
-1
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
...ers/quantization/compressed_tensors/compressed_tensors.py
+2
-2
vllm/model_executor/models/gemma3n_mm.py
vllm/model_executor/models/gemma3n_mm.py
+1
-1
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+28
-28
vllm/v1/attention/backends/gdn_attn.py
vllm/v1/attention/backends/gdn_attn.py
+1
-1
No files found.
vllm/benchmarks/datasets.py
View file @
43c146ca
...
...
@@ -626,7 +626,7 @@ class RandomDataset(BenchmarkDataset):
# Decode, then re-encode and truncate to preserve token count invariants
total_input_len
=
prefix_len
+
int
(
input_len
)
prompt
,
adjusted_token_sequence
,
token_mismatch
=
(
gen_prompt_decode_to_target_len
(
# noqa: E501
gen_prompt_decode_to_target_len
(
tokenizer
=
tokenizer
,
token_sequence
=
token_sequence
,
target_token_len
=
total_input_len
,
...
...
@@ -2855,7 +2855,7 @@ class PrefixRepetitionRandomDataset(BenchmarkDataset):
for
_
in
range
(
prompts_per_prefix
):
suffix_tokens
,
token_mistmatch
=
_generate_exact_length_tokens
(
suffix_len
)
# noqa: E501
)
token_mismatch_total
+=
token_mistmatch
combined_tokens
=
prefix_tokens
+
suffix_tokens
prompt
=
tokenizer
.
decode
(
combined_tokens
)
...
...
vllm/benchmarks/throughput.py
View file @
43c146ca
...
...
@@ -459,14 +459,14 @@ def validate_args(args):
):
assert
args
.
backend
==
"vllm-chat"
,
(
f
"
{
args
.
dataset_path
}
needs to use vllm-chat as the backend."
)
# noqa: E501
)
elif
args
.
dataset_path
in
(
InstructCoderDataset
.
SUPPORTED_DATASET_PATHS
|
AIMODataset
.
SUPPORTED_DATASET_PATHS
):
assert
args
.
backend
==
"vllm"
,
(
f
"
{
args
.
dataset_path
}
needs to use vllm as the backend."
)
# noqa: E501
)
else
:
raise
ValueError
(
f
"
{
args
.
dataset_path
}
is not supported by hf dataset."
)
...
...
vllm/compilation/inductor_pass.py
View file @
43c146ca
...
...
@@ -19,7 +19,7 @@ if is_torch_equal_or_newer("2.6"):
from
torch._inductor.custom_graph_pass
import
CustomGraphPass
else
:
# CustomGraphPass is not present in 2.5 or lower, import our version
from
.torch25_custom_graph_pass
import
(
# noqa: E501
from
.torch25_custom_graph_pass
import
(
Torch25CustomGraphPass
as
CustomGraphPass
,
)
...
...
vllm/model_executor/layers/quantization/__init__.py
View file @
43c146ca
...
...
@@ -95,7 +95,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
from
.awq_marlin
import
AWQMarlinConfig
from
.bitblas
import
BitBLASConfig
from
.bitsandbytes
import
BitsAndBytesConfig
from
.compressed_tensors.compressed_tensors
import
(
# noqa: E501
from
.compressed_tensors.compressed_tensors
import
(
CompressedTensorsConfig
,
)
from
.deepspeedfp
import
DeepSpeedFPConfig
...
...
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
View file @
43c146ca
...
...
@@ -26,7 +26,7 @@ from vllm.model_executor.layers.linear import (
UnquantizedLinearMethod
,
)
from
vllm.model_executor.layers.quantization
import
QuantizationMethods
from
vllm.model_executor.layers.quantization.base_config
import
(
# noqa: E501
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
,
QuantizeMethodBase
,
)
...
...
@@ -256,7 +256,7 @@ class CompressedTensorsConfig(QuantizationConfig):
)
else
:
target_scheme_map
[
target
][
"input_activations"
]
=
(
QuantizationArgs
.
model_validate
(
# noqa: E501
QuantizationArgs
.
model_validate
(
quant_config
.
get
(
"input_activations"
)
)
)
...
...
vllm/model_executor/models/gemma3n_mm.py
View file @
43c146ca
...
...
@@ -176,7 +176,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
processor
=
self
.
info
.
get_hf_processor
()
audio_feature_extractor
:
Gemma3nAudioFeatureExtractor
=
(
processor
.
feature_extractor
)
# noqa: E501
)
audio_len
=
audio_feature_extractor
.
fft_length
image_processor
:
SiglipImageProcessorFast
=
processor
.
image_processor
img_width
=
image_processor
.
size
.
get
(
"width"
,
224
)
...
...
vllm/model_executor/models/registry.py
View file @
43c146ca
...
...
@@ -120,7 +120,7 @@ _TEXT_GENERATION_MODELS = {
"JambaForCausalLM"
:
(
"jamba"
,
"JambaForCausalLM"
),
"Lfm2ForCausalLM"
:
(
"lfm2"
,
"Lfm2ForCausalLM"
),
"LlamaForCausalLM"
:
(
"llama"
,
"LlamaForCausalLM"
),
"Llama4ForCausalLM"
:
(
"llama4"
,
"Llama4ForCausalLM"
),
# noqa: E501
"Llama4ForCausalLM"
:
(
"llama4"
,
"Llama4ForCausalLM"
),
# For decapoda-research/llama-*
"LLaMAForCausalLM"
:
(
"llama"
,
"LlamaForCausalLM"
),
"LongcatFlashForCausalLM"
:
(
"longcat_flash"
,
"LongcatFlashForCausalLM"
),
...
...
@@ -204,7 +204,7 @@ _EMBEDDING_MODELS = {
"LlavaNextForConditionalGeneration"
:
(
"llava_next"
,
"LlavaNextForConditionalGeneration"
,
),
# noqa: E501
),
"Phi3VForCausalLM"
:
(
"phi3v"
,
"Phi3VForCausalLM"
),
"Qwen2VLForConditionalGeneration"
:
(
"qwen2_vl"
,
"Qwen2VLForConditionalGeneration"
),
# noqa: E501
# Technically Terratorch models work on images, both in
...
...
@@ -240,46 +240,46 @@ _MULTIMODAL_MODELS = {
"AyaVisionForConditionalGeneration"
:
(
"aya_vision"
,
"AyaVisionForConditionalGeneration"
,
),
# noqa: E501
),
"Blip2ForConditionalGeneration"
:
(
"blip2"
,
"Blip2ForConditionalGeneration"
),
"ChameleonForConditionalGeneration"
:
(
"chameleon"
,
"ChameleonForConditionalGeneration"
,
),
# noqa: E501
),
"Cohere2VisionForConditionalGeneration"
:
(
"cohere2_vision"
,
"Cohere2VisionForConditionalGeneration"
,
),
# noqa: E501
),
"DeepseekVLV2ForCausalLM"
:
(
"deepseek_vl2"
,
"DeepseekVLV2ForCausalLM"
),
"DotsOCRForCausalLM"
:
(
"dots_ocr"
,
"DotsOCRForCausalLM"
),
"Ernie4_5_VLMoeForConditionalGeneration"
:
(
"ernie45_vl"
,
"Ernie4_5_VLMoeForConditionalGeneration"
,
),
# noqa: E501
),
"FuyuForCausalLM"
:
(
"fuyu"
,
"FuyuForCausalLM"
),
"Gemma3ForConditionalGeneration"
:
(
"gemma3_mm"
,
"Gemma3ForConditionalGeneration"
),
# noqa: E501
"Gemma3nForConditionalGeneration"
:
(
"gemma3n_mm"
,
"Gemma3nForConditionalGeneration"
,
),
# noqa: E501
),
"GLM4VForCausalLM"
:
(
"glm4v"
,
"GLM4VForCausalLM"
),
"Glm4vForConditionalGeneration"
:
(
"glm4_1v"
,
"Glm4vForConditionalGeneration"
),
# noqa: E501
"Glm4vMoeForConditionalGeneration"
:
(
"glm4_1v"
,
"Glm4vMoeForConditionalGeneration"
),
# noqa: E501
"GraniteSpeechForConditionalGeneration"
:
(
"granite_speech"
,
"GraniteSpeechForConditionalGeneration"
,
),
# noqa: E501
),
"H2OVLChatModel"
:
(
"h2ovl"
,
"H2OVLChatModel"
),
"InternVLChatModel"
:
(
"internvl"
,
"InternVLChatModel"
),
"NemotronH_Nano_VL_V2"
:
(
"nano_nemotron_vl"
,
"NemotronH_Nano_VL_V2"
),
"InternS1ForConditionalGeneration"
:
(
"interns1"
,
"InternS1ForConditionalGeneration"
,
),
# noqa: E501
),
"InternVLForConditionalGeneration"
:
(
"interns1"
,
"InternS1ForConditionalGeneration"
,
),
# noqa: E501
),
"Idefics3ForConditionalGeneration"
:
(
"idefics3"
,
"Idefics3ForConditionalGeneration"
,
...
...
@@ -289,7 +289,7 @@ _MULTIMODAL_MODELS = {
"KeyeVL1_5ForConditionalGeneration"
:
(
"keye_vl1_5"
,
"KeyeVL1_5ForConditionalGeneration"
,
),
# noqa: E501
),
"RForConditionalGeneration"
:
(
"rvl"
,
"RForConditionalGeneration"
),
"KimiVLForConditionalGeneration"
:
(
"kimi_vl"
,
"KimiVLForConditionalGeneration"
),
# noqa: E501
"Llama_Nemotron_Nano_VL"
:
(
"nemotron_vl"
,
"LlamaNemotronVLChatModel"
),
...
...
@@ -298,27 +298,27 @@ _MULTIMODAL_MODELS = {
"LlavaNextForConditionalGeneration"
:
(
"llava_next"
,
"LlavaNextForConditionalGeneration"
,
),
# noqa: E501
),
"LlavaNextVideoForConditionalGeneration"
:
(
"llava_next_video"
,
"LlavaNextVideoForConditionalGeneration"
,
),
# noqa: E501
),
"LlavaOnevisionForConditionalGeneration"
:
(
"llava_onevision"
,
"LlavaOnevisionForConditionalGeneration"
,
),
# noqa: E501
),
"MantisForConditionalGeneration"
:
(
"llava"
,
"MantisForConditionalGeneration"
),
# noqa: E501
"MiDashengLMModel"
:
(
"midashenglm"
,
"MiDashengLMModel"
),
"MiniMaxVL01ForConditionalGeneration"
:
(
"minimax_vl_01"
,
"MiniMaxVL01ForConditionalGeneration"
,
),
# noqa: E501
),
"MiniCPMO"
:
(
"minicpmo"
,
"MiniCPMO"
),
"MiniCPMV"
:
(
"minicpmv"
,
"MiniCPMV"
),
"Mistral3ForConditionalGeneration"
:
(
"mistral3"
,
"Mistral3ForConditionalGeneration"
,
),
# noqa: E501
),
"MolmoForCausalLM"
:
(
"molmo"
,
"MolmoForCausalLM"
),
"NVLM_D"
:
(
"nvlm_d"
,
"NVLM_D_Model"
),
"Ovis"
:
(
"ovis"
,
"Ovis"
),
...
...
@@ -326,7 +326,7 @@ _MULTIMODAL_MODELS = {
"PaliGemmaForConditionalGeneration"
:
(
"paligemma"
,
"PaliGemmaForConditionalGeneration"
,
),
# noqa: E501
),
"Phi3VForCausalLM"
:
(
"phi3v"
,
"Phi3VForCausalLM"
),
"Phi4MMForCausalLM"
:
(
"phi4mm"
,
"Phi4MMForCausalLM"
),
"Phi4MultimodalForCausalLM"
:
(
"phi4_multimodal"
,
"Phi4MultimodalForCausalLM"
),
# noqa: E501
...
...
@@ -336,31 +336,31 @@ _MULTIMODAL_MODELS = {
"Qwen2_5_VLForConditionalGeneration"
:
(
"qwen2_5_vl"
,
"Qwen2_5_VLForConditionalGeneration"
,
),
# noqa: E501
),
"Qwen2AudioForConditionalGeneration"
:
(
"qwen2_audio"
,
"Qwen2AudioForConditionalGeneration"
,
),
# noqa: E501
),
"Qwen2_5OmniModel"
:
(
"qwen2_5_omni_thinker"
,
"Qwen2_5OmniThinkerForConditionalGeneration"
,
),
# noqa: E501
),
"Qwen2_5OmniForConditionalGeneration"
:
(
"qwen2_5_omni_thinker"
,
"Qwen2_5OmniThinkerForConditionalGeneration"
,
),
# noqa: E501
),
"Qwen3VLForConditionalGeneration"
:
(
"qwen3_vl"
,
"Qwen3VLForConditionalGeneration"
),
# noqa: E501
"Qwen3VLMoeForConditionalGeneration"
:
(
"qwen3_vl_moe"
,
"Qwen3VLMoeForConditionalGeneration"
,
),
# noqa: E501
),
"SkyworkR1VChatModel"
:
(
"skyworkr1v"
,
"SkyworkR1VChatModel"
),
"Step3VLForConditionalGeneration"
:
(
"step3_vl"
,
"Step3VLForConditionalGeneration"
),
# noqa: E501
"TarsierForConditionalGeneration"
:
(
"tarsier"
,
"TarsierForConditionalGeneration"
),
# noqa: E501
"Tarsier2ForConditionalGeneration"
:
(
"qwen2_vl"
,
"Tarsier2ForConditionalGeneration"
,
),
# noqa: E501
),
"UltravoxModel"
:
(
"ultravox"
,
"UltravoxModel"
),
"VoxtralForConditionalGeneration"
:
(
"voxtral"
,
"VoxtralForConditionalGeneration"
),
# noqa: E501
# [Encoder-decoder]
...
...
@@ -401,23 +401,23 @@ _TRANSFORMERS_BACKEND_MODELS = {
"TransformersMoEForMultimodalLM"
:
(
"transformers_moe"
,
"TransformersMoEForMultimodalLM"
,
),
# noqa: E501
),
"TransformersEmbeddingModel"
:
(
"transformers_pooling"
,
"TransformersEmbeddingModel"
,
),
# noqa: E501
),
"TransformersForSequenceClassification"
:
(
"transformers_pooling"
,
"TransformersForSequenceClassification"
,
),
# noqa: E501
),
"TransformersMoEForSequenceClassification"
:
(
"transformers_pooling"
,
"TransformersMoEForSequenceClassification"
,
),
# noqa: E501
),
"TransformersMoEEmbeddingModel"
:
(
"transformers_pooling"
,
"TransformersMoEEmbeddingModel"
,
),
# noqa: E501
),
}
_VLLM_MODELS
=
{
...
...
vllm/v1/attention/backends/gdn_attn.py
View file @
43c146ca
...
...
@@ -79,7 +79,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
self
.
speculative_config
=
vllm_config
.
speculative_config
self
.
kv_cache_spec
=
kv_cache_spec
if
self
.
speculative_config
:
self
.
num_spec
=
self
.
speculative_config
.
num_speculative_tokens
# noqa: E501
self
.
num_spec
=
self
.
speculative_config
.
num_speculative_tokens
else
:
self
.
num_spec
=
0
self
.
use_spec_decode
=
self
.
num_spec
>
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment