Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9e6bcda3
Unverified
Commit
9e6bcda3
authored
Nov 29, 2025
by
Cyrus Leung
Committed by
GitHub
Nov 28, 2025
Browse files
[mypy] Enable type checking for more directories (#29674)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
9eec282c
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
35 additions
and
31 deletions
+35
-31
tools/pre_commit/mypy.py
tools/pre_commit/mypy.py
+6
-6
vllm/distributed/kv_transfer/kv_connector/v1/base.py
vllm/distributed/kv_transfer/kv_connector/v1/base.py
+1
-1
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
...buted/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
+1
-1
vllm/distributed/kv_transfer/kv_connector/v1/metrics.py
vllm/distributed/kv_transfer/kv_connector/v1/metrics.py
+6
-6
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
...istributed/kv_transfer/kv_connector/v1/multi_connector.py
+2
-2
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
...distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+4
-4
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+3
-1
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+2
-2
vllm/triton_utils/__init__.py
vllm/triton_utils/__init__.py
+2
-1
vllm/v1/metrics/loggers.py
vllm/v1/metrics/loggers.py
+4
-4
vllm/v1/sample/logits_processor/__init__.py
vllm/v1/sample/logits_processor/__init__.py
+1
-1
vllm/v1/spec_decode/metrics.py
vllm/v1/spec_decode/metrics.py
+3
-2
No files found.
tools/pre_commit/mypy.py
View file @
9e6bcda3
...
...
@@ -27,19 +27,24 @@ FILES = [
"vllm/*.py"
,
"vllm/assets"
,
"vllm/distributed"
,
"vllm/engine"
,
"vllm/entrypoints"
,
"vllm/executor"
,
"vllm/inputs"
,
"vllm/logging_utils"
,
"vllm/multimodal"
,
"vllm/platforms"
,
"vllm/plugins"
,
"vllm/transformers_utils"
,
"vllm/triton_utils"
,
"vllm/usage"
,
"vllm/utils"
,
"vllm/worker"
,
"vllm/v1/core"
,
"vllm/v1/engine"
,
"vllm/v1/metrics"
,
"vllm/v1/pool"
,
"vllm/v1/sample"
,
"vllm/v1/worker"
,
]
...
...
@@ -50,24 +55,19 @@ SEPARATE_GROUPS = [
# v0 related
"vllm/attention"
,
"vllm/compilation"
,
"vllm/engine"
,
"vllm/inputs"
,
"vllm/lora"
,
"vllm/model_executor"
,
"vllm/plugins"
,
"vllm/worker"
,
# v1 related
"vllm/v1/attention"
,
"vllm/v1/executor"
,
"vllm/v1/kv_offload"
,
"vllm/v1/metrics"
,
"vllm/v1/sample"
,
"vllm/v1/spec_decode"
,
"vllm/v1/structured_output"
,
]
# TODO(woosuk): Include the code from Megatron and HuggingFace.
EXCLUDE
=
[
"vllm/engine/arg_utils.py"
,
"vllm/model_executor/parallel_utils"
,
"vllm/model_executor/models"
,
"vllm/model_executor/layers/fla/ops"
,
...
...
vllm/distributed/kv_transfer/kv_connector/v1/base.py
View file @
9e6bcda3
...
...
@@ -565,7 +565,7 @@ class KVConnectorBase_V1(ABC):
vllm_config
:
"VllmConfig"
,
metric_types
:
dict
[
type
[
"PromMetric"
],
type
[
"PromMetricT"
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
)
->
Optional
[
"KVConnectorPromMetrics"
]:
"""
Create a KVConnectorPromMetrics subclass which should register
...
...
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
View file @
9e6bcda3
...
...
@@ -806,7 +806,7 @@ class LMCacheMPConnector(KVConnectorBase_V1):
vllm_config
:
"VllmConfig"
,
metric_types
:
dict
[
type
[
"PromMetric"
],
type
[
"PromMetricT"
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
)
->
Optional
[
"KVConnectorPromMetrics"
]:
"""
Create a KVConnectorPromMetrics subclass which should register
...
...
vllm/distributed/kv_transfer/kv_connector/v1/metrics.py
View file @
9e6bcda3
...
...
@@ -52,13 +52,13 @@ class KVConnectorStats:
class
KVConnectorLogging
:
def
__init__
(
self
,
kv_tranfer_config
:
KVTransferConfig
):
def
__init__
(
self
,
kv_tran
s
fer_config
:
KVTransferConfig
|
None
):
# This should be called on frontend process.
assert
not
has_kv_transfer_group
()
# Instantiate the connector's stats class.
if
kv_tranfer_config
and
kv_tranfer_config
.
kv_connector
:
if
kv_tran
s
fer_config
and
kv_tran
s
fer_config
.
kv_connector
:
self
.
connector_cls
=
KVConnectorFactory
.
get_connector_class
(
kv_tranfer_config
kv_tran
s
fer_config
)
self
.
reset
()
...
...
@@ -120,7 +120,7 @@ class KVConnectorPromMetrics:
vllm_config
:
VllmConfig
,
metric_types
:
dict
[
type
[
PromMetric
],
type
[
PromMetricT
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
):
self
.
_kv_transfer_config
=
vllm_config
.
kv_transfer_config
self
.
_gauge_cls
=
metric_types
[
Gauge
]
...
...
@@ -129,7 +129,7 @@ class KVConnectorPromMetrics:
self
.
_labelnames
=
labelnames
self
.
_per_engine_labelvalues
=
per_engine_labelvalues
def
make_per_engine
(
self
,
metric
:
PromMetric
)
->
PromMetric
:
def
make_per_engine
(
self
,
metric
:
PromMetric
)
->
dict
[
int
,
PromMetric
]
:
"""
Create a per-engine child of a prometheus_client.Metric with
the appropriate labels set. The parent metric must be created
...
...
@@ -165,7 +165,7 @@ class KVConnectorPrometheus:
self
,
vllm_config
:
VllmConfig
,
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
):
self
.
prom_metrics
:
KVConnectorPromMetrics
|
None
=
None
kv_transfer_config
=
vllm_config
.
kv_transfer_config
...
...
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
View file @
9e6bcda3
...
...
@@ -85,7 +85,7 @@ class MultiKVConnectorPromMetrics(KVConnectorPromMetrics):
vllm_config
:
"VllmConfig"
,
metric_types
:
dict
[
type
[
PromMetric
],
type
[
PromMetricT
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
prom_metrics
:
dict
[
str
,
KVConnectorPromMetrics
],
):
super
().
__init__
(
vllm_config
,
metric_types
,
labelnames
,
per_engine_labelvalues
)
...
...
@@ -434,7 +434,7 @@ class MultiConnector(KVConnectorBase_V1):
vllm_config
:
"VllmConfig"
,
metric_types
:
dict
[
type
[
"PromMetric"
],
type
[
"PromMetricT"
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
)
->
KVConnectorPromMetrics
:
prom_metrics
:
dict
[
str
,
KVConnectorPromMetrics
]
=
{}
for
connector_cls
,
temp_config
in
cls
.
_get_connector_classes_and_configs
(
...
...
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
View file @
9e6bcda3
...
...
@@ -288,7 +288,7 @@ class NixlConnector(KVConnectorBase_V1):
vllm_config
:
VllmConfig
,
metric_types
:
dict
[
type
[
PromMetric
],
type
[
PromMetricT
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
)
->
KVConnectorPromMetrics
:
return
NixlPromMetrics
(
vllm_config
,
metric_types
,
labelnames
,
per_engine_labelvalues
...
...
@@ -2345,9 +2345,9 @@ class NixlKVConnectorStats(KVConnectorStats):
return
{
"Num successful transfers"
:
n
,
"Avg xfer time (ms)"
:
round
(
xfer_time
.
mean
()
*
1e3
,
3
),
"P90 xfer time (ms)"
:
round
(
np
.
percentile
(
xfer_time
,
90
)
*
1e3
,
3
),
"P90 xfer time (ms)"
:
round
(
np
.
percentile
(
xfer_time
,
90
)
.
item
()
*
1e3
,
3
),
"Avg post time (ms)"
:
round
(
post_time
.
mean
()
*
1e3
,
3
),
"P90 post time (ms)"
:
round
(
np
.
percentile
(
post_time
,
90
)
*
1e3
,
3
),
"P90 post time (ms)"
:
round
(
np
.
percentile
(
post_time
,
90
)
.
item
()
*
1e3
,
3
),
"Avg MB per transfer"
:
round
(
avg_mb
,
3
),
"Throughput (MB/s)"
:
round
(
throughput_mb_s
,
3
),
"Avg number of descriptors"
:
round
(
descs
.
mean
(),
1
),
...
...
@@ -2364,7 +2364,7 @@ class NixlPromMetrics(KVConnectorPromMetrics):
vllm_config
:
VllmConfig
,
metric_types
:
dict
[
type
[
PromMetric
],
type
[
PromMetricT
]],
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
):
super
().
__init__
(
vllm_config
,
metric_types
,
labelnames
,
per_engine_labelvalues
)
...
...
vllm/engine/arg_utils.py
View file @
9e6bcda3
...
...
@@ -1954,7 +1954,9 @@ class EngineArgs:
self
.
enable_prefix_caching
=
False
def
_set_default_max_num_seqs_and_batched_tokens_args
(
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
self
,
usage_context
:
UsageContext
|
None
,
model_config
:
ModelConfig
,
):
world_size
=
self
.
pipeline_parallel_size
*
self
.
tensor_parallel_size
(
...
...
vllm/transformers_utils/config.py
View file @
9e6bcda3
...
...
@@ -614,12 +614,12 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
def
maybe_override_with_speculators
(
model
:
str
,
tokenizer
:
str
,
tokenizer
:
str
|
None
,
trust_remote_code
:
bool
,
revision
:
str
|
None
=
None
,
vllm_speculative_config
:
dict
[
str
,
Any
]
|
None
=
None
,
**
kwargs
,
)
->
tuple
[
str
,
str
,
dict
[
str
,
Any
]
|
None
]:
)
->
tuple
[
str
,
str
|
None
,
dict
[
str
,
Any
]
|
None
]:
"""
Resolve model configuration when speculators are detected.
...
...
vllm/triton_utils/__init__.py
View file @
9e6bcda3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
TYPE_CHECKING
from
vllm.triton_utils.importing
import
(
HAS_TRITON
,
...
...
@@ -7,7 +8,7 @@ from vllm.triton_utils.importing import (
TritonPlaceholder
,
)
if
HAS_TRITON
:
if
TYPE_CHECKING
or
HAS_TRITON
:
import
triton
import
triton.language
as
tl
import
triton.language.extra.libdevice
as
tldevice
...
...
vllm/v1/metrics/loggers.py
View file @
9e6bcda3
...
...
@@ -104,8 +104,8 @@ class LoggingStatLogger(StatLoggerBase):
self
.
mm_caching_metrics
=
CachingMetrics
()
self
.
spec_decoding_logging
=
SpecDecodingLogging
()
kv_tranfer_config
=
self
.
vllm_config
.
kv_transfer_config
self
.
kv_connector_logging
=
KVConnectorLogging
(
kv_tranfer_config
)
kv_tran
s
fer_config
=
self
.
vllm_config
.
kv_transfer_config
self
.
kv_connector_logging
=
KVConnectorLogging
(
kv_tran
s
fer_config
)
self
.
last_prompt_throughput
:
float
=
0.0
self
.
last_generation_throughput
:
float
=
0.0
self
.
engine_is_idle
=
False
...
...
@@ -380,7 +380,7 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
model_name
=
vllm_config
.
model_config
.
served_model_name
max_model_len
=
vllm_config
.
model_config
.
max_model_len
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]]
=
{
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]]
=
{
idx
:
[
model_name
,
str
(
idx
)]
for
idx
in
engine_indexes
}
...
...
@@ -1052,7 +1052,7 @@ PromMetric: TypeAlias = Gauge | Counter | Histogram
def
make_per_engine
(
metric
:
PromMetric
,
engine_idxs
:
list
[
int
],
model_name
:
str
metric
:
PromMetric
,
engine_idxs
:
list
[
int
],
model_name
:
object
)
->
dict
[
int
,
PromMetric
]:
return
{
idx
:
metric
.
labels
(
model_name
,
str
(
idx
))
for
idx
in
engine_idxs
}
...
...
vllm/v1/sample/logits_processor/__init__.py
View file @
9e6bcda3
...
...
@@ -313,7 +313,7 @@ class AdapterLogitsProcessor(LogitsProcessor):
if
(
len
(
inspect
.
signature
(
req_lp
).
parameters
)
==
3
)
else
[
output_ids
]
)
return
partial
(
req_lp
,
*
args
)
return
partial
(
req_lp
,
*
args
)
# type: ignore[misc]
return
None
def
update_state
(
self
,
batch_update
:
BatchUpdate
|
None
):
...
...
vllm/v1/spec_decode/metrics.py
View file @
9e6bcda3
...
...
@@ -144,7 +144,7 @@ class SpecDecodingProm:
self
,
speculative_config
:
SpeculativeConfig
|
None
,
labelnames
:
list
[
str
],
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]],
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
):
self
.
spec_decoding_enabled
=
speculative_config
is
not
None
if
not
self
.
spec_decoding_enabled
:
...
...
@@ -215,7 +215,8 @@ class SpecDecodingProm:
def
make_per_engine
(
counter
:
prometheus_client
.
Counter
,
per_engine_labelvalues
:
dict
[
int
,
list
[
str
]]
counter
:
prometheus_client
.
Counter
,
per_engine_labelvalues
:
dict
[
int
,
list
[
object
]],
):
"""Create a counter for each label value."""
return
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment