Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad5aa6bd
Unverified
Commit
ad5aa6bd
authored
Feb 19, 2026
by
Manrique Vargas
Committed by
GitHub
Feb 18, 2026
Browse files
fix(docs): fix typos in comments and docstrings (#34836)
Signed-off-by:
machov
<
mv1742@nyu.edu
>
parent
9681068c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
6 additions
and
6 deletions
+6
-6
vllm/compilation/backends.py
vllm/compilation/backends.py
+1
-1
vllm/model_executor/layers/fused_moe/oracle/fp8.py
vllm/model_executor/layers/fused_moe/oracle/fp8.py
+1
-1
vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py
...el_executor/layers/fused_moe/runner/default_moe_runner.py
+1
-1
vllm/model_executor/models/gpt_oss.py
vllm/model_executor/models/gpt_oss.py
+1
-1
vllm/utils/torch_utils.py
vllm/utils/torch_utils.py
+2
-2
No files found.
vllm/compilation/backends.py
View file @
ad5aa6bd
...
@@ -285,7 +285,7 @@ class CompilerManager:
...
@@ -285,7 +285,7 @@ class CompilerManager:
with
self
.
compile_context
(
compile_range
):
with
self
.
compile_context
(
compile_range
):
# There is a compilation time optimization here.
# There is a compilation time optimization here.
#
#
# If the (input metdata, graph, compiler config) are the same, then
# If the (input met
a
data, graph, compiler config) are the same, then
# we want to avoid compiling the same artifact again. If we didn't
# we want to avoid compiling the same artifact again. If we didn't
# do this optimization, the backend compilation (InductorAdaptor or
# do this optimization, the backend compilation (InductorAdaptor or
# InductorStandaloneAdaptor)
# InductorStandaloneAdaptor)
...
...
vllm/model_executor/layers/fused_moe/oracle/fp8.py
View file @
ad5aa6bd
...
@@ -420,7 +420,7 @@ def make_fp8_moe_quant_config(
...
@@ -420,7 +420,7 @@ def make_fp8_moe_quant_config(
per_out_ch_quant
:
bool
=
False
,
per_out_ch_quant
:
bool
=
False
,
)
->
FusedMoEQuantConfig
|
None
:
)
->
FusedMoEQuantConfig
|
None
:
"""
"""
Create FusedMoEQuantConfig for the specifed FP8 Backend.
Create FusedMoEQuantConfig for the specif
i
ed FP8 Backend.
The FusedMoEQuantConfig holds the scales that are used
The FusedMoEQuantConfig holds the scales that are used
at runtime by the Modular Kernel abstraction.
at runtime by the Modular Kernel abstraction.
...
...
vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py
View file @
ad5aa6bd
...
@@ -151,7 +151,7 @@ class DefaultMoERunner(MoERunner):
...
@@ -151,7 +151,7 @@ class DefaultMoERunner(MoERunner):
kernels for different parallel execution modes.
kernels for different parallel execution modes.
Eventually, this class will be split up and specialized for different
Eventually, this class will be split up and specialized for different
configurations, e.g. the presen
s
e or absence of shared experts, a gate, etc.
configurations, e.g. the presen
c
e or absence of shared experts, a gate, etc.
"""
"""
def
__init__
(
def
__init__
(
...
...
vllm/model_executor/models/gpt_oss.py
View file @
ad5aa6bd
...
@@ -586,7 +586,7 @@ class GptOssModel(nn.Module):
...
@@ -586,7 +586,7 @@ class GptOssModel(nn.Module):
parts
=
name
.
split
(
"."
)
parts
=
name
.
split
(
"."
)
ids
=
[
s
for
s
in
parts
if
s
.
isdigit
()]
ids
=
[
s
for
s
in
parts
if
s
.
isdigit
()]
# for amd-quark format that each expert is sep
e
rated
# for amd-quark format that each expert is sep
a
rated
# need to extract the parameter name with experts fused.
# need to extract the parameter name with experts fused.
# example model: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8
# example model: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8
if
len
(
ids
)
==
2
:
if
len
(
ids
)
==
2
:
...
...
vllm/utils/torch_utils.py
View file @
ad5aa6bd
...
@@ -567,8 +567,8 @@ def current_stream() -> torch.cuda.Stream:
...
@@ -567,8 +567,8 @@ def current_stream() -> torch.cuda.Stream:
return
_current_stream_tls
.
value
return
_current_stream_tls
.
value
# Global auxilary stream for running operations in background streams.
# Global auxil
i
ary stream for running operations in background streams.
# We have single global auxilary stream to avoid an explosion of streams
# We have single global auxil
i
ary stream to avoid an explosion of streams
# for every layer (and make profiling look sane).
# for every layer (and make profiling look sane).
#
#
# aux_stream() is currently used for:
# aux_stream() is currently used for:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment