Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
02d411fd
Unverified
Commit
02d411fd
authored
Sep 03, 2025
by
Didier Durand
Committed by
GitHub
Sep 02, 2025
Browse files
[Doc]: fix typos in Python comments (#24115)
Signed-off-by:
Didier Durand
<
durand.didier@gmail.com
>
parent
d7e1e599
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
11 additions
and
11 deletions
+11
-11
.buildkite/nightly-benchmarks/scripts/compare-json-results.py
...ldkite/nightly-benchmarks/scripts/compare-json-results.py
+1
-1
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+1
-1
benchmarks/benchmark_serving_structured_output.py
benchmarks/benchmark_serving_structured_output.py
+1
-1
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+1
-1
tools/profiler/visualize_layerwise_profile.py
tools/profiler/visualize_layerwise_profile.py
+1
-1
vllm/compilation/collective_fusion.py
vllm/compilation/collective_fusion.py
+1
-1
vllm/engine/multiprocessing/engine.py
vllm/engine/multiprocessing/engine.py
+1
-1
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
+1
-1
vllm/model_executor/model_loader/default_loader.py
vllm/model_executor/model_loader/default_loader.py
+1
-1
vllm/v1/worker/xpu_worker.py
vllm/v1/worker/xpu_worker.py
+1
-1
vllm/worker/worker.py
vllm/worker/worker.py
+1
-1
No files found.
.buildkite/nightly-benchmarks/scripts/compare-json-results.py
View file @
02d411fd
...
@@ -218,7 +218,7 @@ if __name__ == "__main__":
...
@@ -218,7 +218,7 @@ if __name__ == "__main__":
"--xaxis"
,
"--xaxis"
,
type
=
str
,
type
=
str
,
default
=
"# of max concurrency."
,
default
=
"# of max concurrency."
,
help
=
"column name to use as X Axis in comparis
i
on graph"
,
help
=
"column name to use as X Axis in comparison graph"
,
)
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
...
...
benchmarks/benchmark_serving.py
View file @
02d411fd
...
@@ -1104,7 +1104,7 @@ def create_argument_parser():
...
@@ -1104,7 +1104,7 @@ def create_argument_parser():
"--percentile-metrics"
,
"--percentile-metrics"
,
type
=
str
,
type
=
str
,
default
=
"ttft,tpot,itl"
,
default
=
"ttft,tpot,itl"
,
help
=
"Comma-separated list of selected metrics to report percentils. "
help
=
"Comma-separated list of selected metrics to report percentil
e
s. "
"This argument specifies the metrics to report percentiles. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Default value is "ttft,tpot,itl".'
,
'Default value is "ttft,tpot,itl".'
,
...
...
benchmarks/benchmark_serving_structured_output.py
View file @
02d411fd
...
@@ -998,7 +998,7 @@ def create_argument_parser():
...
@@ -998,7 +998,7 @@ def create_argument_parser():
"--percentile-metrics"
,
"--percentile-metrics"
,
type
=
str
,
type
=
str
,
default
=
"ttft,tpot,itl"
,
default
=
"ttft,tpot,itl"
,
help
=
"Comma-separated list of selected metrics to report percentils. "
help
=
"Comma-separated list of selected metrics to report percentil
e
s. "
"This argument specifies the metrics to report percentiles. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Default value is "ttft,tpot,itl".'
,
'Default value is "ttft,tpot,itl".'
,
...
...
benchmarks/benchmark_throughput.py
View file @
02d411fd
...
@@ -719,7 +719,7 @@ def create_argument_parser():
...
@@ -719,7 +719,7 @@ def create_argument_parser():
"[length * (1 - range_ratio), length * (1 + range_ratio)]."
,
"[length * (1 - range_ratio), length * (1 + range_ratio)]."
,
)
)
# hf dtaset
# hf d
a
taset
parser
.
add_argument
(
parser
.
add_argument
(
"--hf-subset"
,
type
=
str
,
default
=
None
,
help
=
"Subset of the HF dataset."
"--hf-subset"
,
type
=
str
,
default
=
None
,
help
=
"Subset of the HF dataset."
)
)
...
...
tools/profiler/visualize_layerwise_profile.py
View file @
02d411fd
...
@@ -119,7 +119,7 @@ def attempt_to_make_names_unique(entries_and_traces):
...
@@ -119,7 +119,7 @@ def attempt_to_make_names_unique(entries_and_traces):
if
not
all_the_same
(
trace_eles
)),
None
)
if
not
all_the_same
(
trace_eles
)),
None
)
if
first_trace_difference
is
None
:
if
first_trace_difference
is
None
:
# can't create a unique name, leave the
m
names as the
# can't create a unique name, leave the names as the
y
# are they will get aggregated by the pivot_table call
# are they will get aggregated by the pivot_table call
continue
continue
...
...
vllm/compilation/collective_fusion.py
View file @
02d411fd
...
@@ -513,7 +513,7 @@ if flashinfer_comm is not None:
...
@@ -513,7 +513,7 @@ if flashinfer_comm is not None:
torch
.
ops
.
_C
.
static_scaled_fp8_quant
(
torch
.
ops
.
_C
.
static_scaled_fp8_quant
(
quant_out
,
norm_out
,
scale_factor
)
quant_out
,
norm_out
,
scale_factor
)
if
scale_factor
is
None
or
norm_out
is
not
None
:
if
scale_factor
is
None
or
norm_out
is
not
None
:
# we need to return allreduce outp
p
ut
# we need to return allreduce output
# in cases of non quant fused AR + RMS norm
# in cases of non quant fused AR + RMS norm
# and fused AR + RMS norm + quant without fused add
# and fused AR + RMS norm + quant without fused add
allreduce_in
.
copy_
(
allreduce_out
)
allreduce_in
.
copy_
(
allreduce_out
)
...
...
vllm/engine/multiprocessing/engine.py
View file @
02d411fd
...
@@ -49,7 +49,7 @@ class MQLLMEngine:
...
@@ -49,7 +49,7 @@ class MQLLMEngine:
This class is used to wrap the
This class is used to wrap the
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
in concurr
n
et manner. It runs a background loop and uses zeromq to
in concurre
n
t manner. It runs a background loop and uses zeromq to
receive new requests and stream outputs incrementally via ipc.
receive new requests and stream outputs incrementally via ipc.
The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
...
...
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
View file @
02d411fd
...
@@ -23,7 +23,7 @@ TORCH_DEVICE_IDENTITY = None
...
@@ -23,7 +23,7 @@ TORCH_DEVICE_IDENTITY = None
# The condition to determine if it is on a platform that supports
# The condition to determine if it is on a platform that supports
# torch._scaled_mm rowwise feature.
# torch._scaled_mm rowwise feature.
# The condition is determined once as the operations
# The condition is determined once as the operations
# are time
consuming.
# are time
-
consuming.
USE_ROWWISE_TORCH_SCALED_MM
=
(
current_platform
.
is_rocm
()
and
version
.
parse
(
USE_ROWWISE_TORCH_SCALED_MM
=
(
current_platform
.
is_rocm
()
and
version
.
parse
(
torch
.
__version__
)
>=
version
.
parse
(
"2.7"
)
torch
.
__version__
)
>=
version
.
parse
(
"2.7"
)
and
current_platform
.
has_device_capability
(
94
))
and
current_platform
.
has_device_capability
(
94
))
...
...
vllm/model_executor/model_loader/default_loader.py
View file @
02d411fd
...
@@ -211,7 +211,7 @@ class DefaultModelLoader(BaseModelLoader):
...
@@ -211,7 +211,7 @@ class DefaultModelLoader(BaseModelLoader):
if
not
USE_TPU_COMMONS
:
if
not
USE_TPU_COMMONS
:
# In PyTorch XLA, we should call `xm.mark_step`
# In PyTorch XLA, we should call `xm.mark_step`
# requently so that not too many ops are accumulated
#
f
requently so that not too many ops are accumulated
# in the XLA program. import torch_xla.core.xla_model
# in the XLA program. import torch_xla.core.xla_model
# as xm
# as xm
import
torch_xla.core.xla_model
as
xm
import
torch_xla.core.xla_model
as
xm
...
...
vllm/v1/worker/xpu_worker.py
View file @
02d411fd
...
@@ -84,7 +84,7 @@ class XPUWorker(Worker):
...
@@ -84,7 +84,7 @@ class XPUWorker(Worker):
"""Profiles the peak memory usage of the model to determine how many
"""Profiles the peak memory usage of the model to determine how many
KV blocks may be allocated without OOMs.
KV blocks may be allocated without OOMs.
The engine will first conduct a profiling of the existing memory usage.
The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the maximum possible number of GPU and CPU blocks
Then, it calculate
s
the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
that can be allocated with the remaining free memory.
.. tip::
.. tip::
You may limit the usage of GPU memory
You may limit the usage of GPU memory
...
...
vllm/worker/worker.py
View file @
02d411fd
...
@@ -234,7 +234,7 @@ class Worker(LocalOrDistributedWorkerBase):
...
@@ -234,7 +234,7 @@ class Worker(LocalOrDistributedWorkerBase):
KV blocks may be allocated without OOMs.
KV blocks may be allocated without OOMs.
The engine will first conduct a profiling of the existing memory usage.
The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the maximum possible number of GPU and CPU blocks
Then, it calculate
s
the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
that can be allocated with the remaining free memory.
Tip:
Tip:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment