Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e1cd7a5f
Unverified
Commit
e1cd7a5f
authored
Jan 05, 2026
by
Michael Goin
Committed by
GitHub
Jan 05, 2026
Browse files
[Bugfix] Add init_workspace_manager to moe kernel benchmarks (#31042)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
a68e703c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
0 deletions
+15
-0
benchmarks/kernels/benchmark_cutlass_moe_fp8.py
benchmarks/kernels/benchmark_cutlass_moe_fp8.py
+5
-0
benchmarks/kernels/benchmark_cutlass_moe_nvfp4.py
benchmarks/kernels/benchmark_cutlass_moe_nvfp4.py
+5
-0
benchmarks/kernels/benchmark_grouped_gemm_cutlass.py
benchmarks/kernels/benchmark_grouped_gemm_cutlass.py
+5
-0
No files found.
benchmarks/kernels/benchmark_cutlass_moe_fp8.py
View file @
e1cd7a5f
...
@@ -15,6 +15,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8
...
@@ -15,6 +15,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8
from
vllm.model_executor.layers.fused_moe.fused_moe
import
fused_experts
,
fused_topk
from
vllm.model_executor.layers.fused_moe.fused_moe
import
fused_experts
,
fused_topk
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.v1.worker.workspace
import
init_workspace_manager
# Weight shapes for different models: [num_experts, topk, hidden_size,
# Weight shapes for different models: [num_experts, topk, hidden_size,
# intermediate_size]
# intermediate_size]
...
@@ -297,6 +298,10 @@ def bench_run(
...
@@ -297,6 +298,10 @@ def bench_run(
def
main
(
args
):
def
main
(
args
):
# Initialize workspace manager (required for CUTLASS MoE kernels)
device
=
torch
.
device
(
"cuda:0"
)
init_workspace_manager
(
device
)
print
(
"Benchmarking models:"
)
print
(
"Benchmarking models:"
)
for
i
,
model
in
enumerate
(
args
.
models
):
for
i
,
model
in
enumerate
(
args
.
models
):
print
(
f
"[
{
i
}
]
{
model
}
"
)
print
(
f
"[
{
i
}
]
{
model
}
"
)
...
...
benchmarks/kernels/benchmark_cutlass_
fp4_moe
.py
→
benchmarks/kernels/benchmark_cutlass_
moe_nvfp4
.py
View file @
e1cd7a5f
...
@@ -21,6 +21,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4
...
@@ -21,6 +21,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4
from
vllm.model_executor.layers.fused_moe.fused_moe
import
fused_experts
,
fused_topk
from
vllm.model_executor.layers.fused_moe.fused_moe
import
fused_experts
,
fused_topk
from
vllm.scalar_type
import
scalar_types
from
vllm.scalar_type
import
scalar_types
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.v1.worker.workspace
import
init_workspace_manager
WEIGHT_SHAPES_MOE
=
{
WEIGHT_SHAPES_MOE
=
{
"nvidia/DeepSeek-R1-FP4"
:
[
"nvidia/DeepSeek-R1-FP4"
:
[
...
@@ -441,6 +442,10 @@ def bench_run(
...
@@ -441,6 +442,10 @@ def bench_run(
def
main
(
args
):
def
main
(
args
):
# Initialize workspace manager (required for CUTLASS MoE kernels)
device
=
torch
.
device
(
"cuda:0"
)
init_workspace_manager
(
device
)
print
(
"Benchmarking models:"
)
print
(
"Benchmarking models:"
)
for
i
,
model
in
enumerate
(
args
.
models
):
for
i
,
model
in
enumerate
(
args
.
models
):
print
(
f
"[
{
i
}
]
{
model
}
"
)
print
(
f
"[
{
i
}
]
{
model
}
"
)
...
...
benchmarks/kernels/benchmark_grouped_gemm_cutlass.py
View file @
e1cd7a5f
...
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
...
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_topk
,
fused_topk
,
)
)
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.v1.worker.workspace
import
init_workspace_manager
DEFAULT_MODELS
=
[
DEFAULT_MODELS
=
[
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
...
@@ -364,6 +365,10 @@ def bench_run(
...
@@ -364,6 +365,10 @@ def bench_run(
def
main
(
args
):
def
main
(
args
):
# Initialize workspace manager (required for CUTLASS MoE kernels)
device
=
torch
.
device
(
"cuda:0"
)
init_workspace_manager
(
device
)
print
(
"Benchmarking models:"
)
print
(
"Benchmarking models:"
)
for
i
,
model
in
enumerate
(
args
.
models
):
for
i
,
model
in
enumerate
(
args
.
models
):
print
(
f
"[
{
i
}
]
{
model
}
"
)
print
(
f
"[
{
i
}
]
{
model
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment