Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
02d411fd
Unverified
Commit
02d411fd
authored
Sep 03, 2025
by
Didier Durand
Committed by
GitHub
Sep 02, 2025
Browse files
[Doc]: fix typos in Python comments (#24115)
Signed-off-by:
Didier Durand
<
durand.didier@gmail.com
>
parent
d7e1e599
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
11 additions
and
11 deletions
+11
-11
.buildkite/nightly-benchmarks/scripts/compare-json-results.py
...ldkite/nightly-benchmarks/scripts/compare-json-results.py
+1
-1
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+1
-1
benchmarks/benchmark_serving_structured_output.py
benchmarks/benchmark_serving_structured_output.py
+1
-1
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+1
-1
tools/profiler/visualize_layerwise_profile.py
tools/profiler/visualize_layerwise_profile.py
+1
-1
vllm/compilation/collective_fusion.py
vllm/compilation/collective_fusion.py
+1
-1
vllm/engine/multiprocessing/engine.py
vllm/engine/multiprocessing/engine.py
+1
-1
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
+1
-1
vllm/model_executor/model_loader/default_loader.py
vllm/model_executor/model_loader/default_loader.py
+1
-1
vllm/v1/worker/xpu_worker.py
vllm/v1/worker/xpu_worker.py
+1
-1
vllm/worker/worker.py
vllm/worker/worker.py
+1
-1
No files found.
.buildkite/nightly-benchmarks/scripts/compare-json-results.py
View file @
02d411fd
...
...
@@ -218,7 +218,7 @@ if __name__ == "__main__":
"--xaxis"
,
type
=
str
,
default
=
"# of max concurrency."
,
help
=
"column name to use as X Axis in comparis
i
on graph"
,
help
=
"column name to use as X Axis in comparison graph"
,
)
args
=
parser
.
parse_args
()
...
...
benchmarks/benchmark_serving.py
View file @
02d411fd
...
...
@@ -1104,7 +1104,7 @@ def create_argument_parser():
"--percentile-metrics"
,
type
=
str
,
default
=
"ttft,tpot,itl"
,
help
=
"Comma-separated list of selected metrics to report percentils. "
help
=
"Comma-separated list of selected metrics to report percentil
e
s. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Default value is "ttft,tpot,itl".'
,
...
...
benchmarks/benchmark_serving_structured_output.py
View file @
02d411fd
...
...
@@ -998,7 +998,7 @@ def create_argument_parser():
"--percentile-metrics"
,
type
=
str
,
default
=
"ttft,tpot,itl"
,
help
=
"Comma-separated list of selected metrics to report percentils. "
help
=
"Comma-separated list of selected metrics to report percentil
e
s. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Default value is "ttft,tpot,itl".'
,
...
...
benchmarks/benchmark_throughput.py
View file @
02d411fd
...
...
@@ -719,7 +719,7 @@ def create_argument_parser():
"[length * (1 - range_ratio), length * (1 + range_ratio)]."
,
)
# hf dtaset
# hf d
a
taset
parser
.
add_argument
(
"--hf-subset"
,
type
=
str
,
default
=
None
,
help
=
"Subset of the HF dataset."
)
...
...
tools/profiler/visualize_layerwise_profile.py
View file @
02d411fd
...
...
@@ -119,7 +119,7 @@ def attempt_to_make_names_unique(entries_and_traces):
if
not
all_the_same
(
trace_eles
)),
None
)
if
first_trace_difference
is
None
:
# can't create a unique name, leave the
m
names as the
# can't create a unique name, leave the names as the
y
# are they will get aggregated by the pivot_table call
continue
...
...
vllm/compilation/collective_fusion.py
View file @
02d411fd
...
...
@@ -513,7 +513,7 @@ if flashinfer_comm is not None:
torch
.
ops
.
_C
.
static_scaled_fp8_quant
(
quant_out
,
norm_out
,
scale_factor
)
if
scale_factor
is
None
or
norm_out
is
not
None
:
# we need to return allreduce outp
p
ut
# we need to return allreduce output
# in cases of non quant fused AR + RMS norm
# and fused AR + RMS norm + quant without fused add
allreduce_in
.
copy_
(
allreduce_out
)
...
...
vllm/engine/multiprocessing/engine.py
View file @
02d411fd
...
...
@@ -49,7 +49,7 @@ class MQLLMEngine:
This class is used to wrap the
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
in concurr
n
et manner. It runs a background loop and uses zeromq to
in concurre
n
t manner. It runs a background loop and uses zeromq to
receive new requests and stream outputs incrementally via ipc.
The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
...
...
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
View file @
02d411fd
...
...
@@ -23,7 +23,7 @@ TORCH_DEVICE_IDENTITY = None
# The condition to determine if it is on a platform that supports
# torch._scaled_mm rowwise feature.
# The condition is determined once as the operations
# are time
consuming.
# are time
-
consuming.
USE_ROWWISE_TORCH_SCALED_MM
=
(
current_platform
.
is_rocm
()
and
version
.
parse
(
torch
.
__version__
)
>=
version
.
parse
(
"2.7"
)
and
current_platform
.
has_device_capability
(
94
))
...
...
vllm/model_executor/model_loader/default_loader.py
View file @
02d411fd
...
...
@@ -211,7 +211,7 @@ class DefaultModelLoader(BaseModelLoader):
if
not
USE_TPU_COMMONS
:
# In PyTorch XLA, we should call `xm.mark_step`
# requently so that not too many ops are accumulated
#
f
requently so that not too many ops are accumulated
# in the XLA program. import torch_xla.core.xla_model
# as xm
import
torch_xla.core.xla_model
as
xm
...
...
vllm/v1/worker/xpu_worker.py
View file @
02d411fd
...
...
@@ -84,7 +84,7 @@ class XPUWorker(Worker):
"""Profiles the peak memory usage of the model to determine how many
KV blocks may be allocated without OOMs.
The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the maximum possible number of GPU and CPU blocks
Then, it calculate
s
the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
.. tip::
You may limit the usage of GPU memory
...
...
vllm/worker/worker.py
View file @
02d411fd
...
...
@@ -234,7 +234,7 @@ class Worker(LocalOrDistributedWorkerBase):
KV blocks may be allocated without OOMs.
The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the maximum possible number of GPU and CPU blocks
Then, it calculate
s
the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
Tip:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment