Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
44d2e6af
Unverified
Commit
44d2e6af
authored
Jun 27, 2025
by
Michael Goin
Committed by
GitHub
Jun 26, 2025
Browse files
[Bugfix] Build moe_data for both sm100 and sm90 (#20086)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
2d7779f8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
6 deletions
+17
-6
CMakeLists.txt
CMakeLists.txt
+12
-2
csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
+5
-4
No files found.
CMakeLists.txt
View file @
44d2e6af
...
@@ -513,6 +513,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
...
@@ -513,6 +513,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
CUDA_ARCHS
"
${
FP4_ARCHS
}
"
)
CUDA_ARCHS
"
${
FP4_ARCHS
}
"
)
list
(
APPEND VLLM_EXT_SRC
"
${
SRCS
}
"
)
list
(
APPEND VLLM_EXT_SRC
"
${
SRCS
}
"
)
list
(
APPEND VLLM_GPU_FLAGS
"-DENABLE_NVFP4=1"
)
list
(
APPEND VLLM_GPU_FLAGS
"-DENABLE_NVFP4=1"
)
list
(
APPEND VLLM_GPU_FLAGS
"-DENABLE_CUTLASS_MOE_SM100=1"
)
message
(
STATUS
"Building NVFP4 for archs:
${
FP4_ARCHS
}
"
)
message
(
STATUS
"Building NVFP4 for archs:
${
FP4_ARCHS
}
"
)
else
()
else
()
message
(
STATUS
"Not building NVFP4 as no compatible archs were found."
)
message
(
STATUS
"Not building NVFP4 as no compatible archs were found."
)
...
@@ -547,8 +548,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
...
@@ -547,8 +548,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
# if it's possible to compile MoE kernels that use its output.
# if it's possible to compile MoE kernels that use its output.
cuda_archs_loose_intersection
(
SCALED_MM_ARCHS
"9.0a"
"
${
CUDA_ARCHS
}
"
)
cuda_archs_loose_intersection
(
SCALED_MM_ARCHS
"9.0a"
"
${
CUDA_ARCHS
}
"
)
if
(
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER_EQUAL 12.3 AND SCALED_MM_ARCHS
)
if
(
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER_EQUAL 12.3 AND SCALED_MM_ARCHS
)
set
(
SRCS
"csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x.cu"
set
(
SRCS
"csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x.cu"
)
"csrc/quantization/cutlass_w8a8/moe/moe_data.cu"
)
set_gencode_flags_for_srcs
(
set_gencode_flags_for_srcs
(
SRCS
"
${
SRCS
}
"
SRCS
"
${
SRCS
}
"
CUDA_ARCHS
"
${
SCALED_MM_ARCHS
}
"
)
CUDA_ARCHS
"
${
SCALED_MM_ARCHS
}
"
)
...
@@ -566,6 +566,16 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
...
@@ -566,6 +566,16 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
endif
()
endif
()
endif
()
endif
()
# moe_data.cu is used by all CUTLASS MoE kernels.
cuda_archs_loose_intersection
(
CUTLASS_MOE_DATA_ARCHS
"9.0a;10.0a"
"
${
CUDA_ARCHS
}
"
)
if
(
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER_EQUAL 12.3 AND CUTLASS_MOE_DATA_ARCHS
)
set
(
SRCS
"csrc/quantization/cutlass_w8a8/moe/moe_data.cu"
)
set_gencode_flags_for_srcs
(
SRCS
"
${
SRCS
}
"
CUDA_ARCHS
"
${
CUTLASS_MOE_DATA_ARCHS
}
"
)
list
(
APPEND VLLM_EXT_SRC
"
${
SRCS
}
"
)
endif
()
#
#
# Machete kernels
# Machete kernels
...
...
csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
View file @
44d2e6af
...
@@ -241,7 +241,7 @@ void get_cutlass_moe_mm_data(
...
@@ -241,7 +241,7 @@ void get_cutlass_moe_mm_data(
// mm to run it for.
// mm to run it for.
int32_t
version_num
=
get_sm_version_num
();
int32_t
version_num
=
get_sm_version_num
();
#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) || \
#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) || \
(defined ENABLE_
SCALED_MM
_SM100 && ENABLE_
SCALED_MM
_SM
9
0)
(defined ENABLE_
CUTLASS_MOE
_SM100 && ENABLE_
CUTLASS_MOE
_SM
10
0)
get_cutlass_moe_mm_data_caller
(
topk_ids
,
expert_offsets
,
problem_sizes1
,
get_cutlass_moe_mm_data_caller
(
topk_ids
,
expert_offsets
,
problem_sizes1
,
problem_sizes2
,
input_permutation
,
problem_sizes2
,
input_permutation
,
output_permutation
,
num_experts
,
n
,
k
,
output_permutation
,
num_experts
,
n
,
k
,
...
@@ -252,7 +252,7 @@ void get_cutlass_moe_mm_data(
...
@@ -252,7 +252,7 @@ void get_cutlass_moe_mm_data(
false
,
false
,
"No compiled get_cutlass_moe_mm_data: no cutlass_scaled_mm kernel for "
"No compiled get_cutlass_moe_mm_data: no cutlass_scaled_mm kernel for "
"CUDA device capability: "
,
"CUDA device capability: "
,
version_num
,
". Required capability: 90"
);
version_num
,
". Required capability: 90
or 100
"
);
}
}
void
get_cutlass_pplx_moe_mm_data
(
torch
::
Tensor
&
expert_offsets
,
void
get_cutlass_pplx_moe_mm_data
(
torch
::
Tensor
&
expert_offsets
,
...
@@ -265,7 +265,8 @@ void get_cutlass_pplx_moe_mm_data(torch::Tensor& expert_offsets,
...
@@ -265,7 +265,8 @@ void get_cutlass_pplx_moe_mm_data(torch::Tensor& expert_offsets,
// This function currently gets compiled only if we have a valid cutlass moe
// This function currently gets compiled only if we have a valid cutlass moe
// mm to run it for.
// mm to run it for.
int32_t
version_num
=
get_sm_version_num
();
int32_t
version_num
=
get_sm_version_num
();
#if defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90
#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) || \
(defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100)
get_cutlass_pplx_moe_mm_data_caller
(
expert_offsets
,
problem_sizes1
,
get_cutlass_pplx_moe_mm_data_caller
(
expert_offsets
,
problem_sizes1
,
problem_sizes2
,
expert_num_tokens
,
problem_sizes2
,
expert_num_tokens
,
num_local_experts
,
padded_m
,
n
,
k
);
num_local_experts
,
padded_m
,
n
,
k
);
...
@@ -275,7 +276,7 @@ void get_cutlass_pplx_moe_mm_data(torch::Tensor& expert_offsets,
...
@@ -275,7 +276,7 @@ void get_cutlass_pplx_moe_mm_data(torch::Tensor& expert_offsets,
false
,
false
,
"No compiled get_cutlass_pplx_moe_mm_data: no cutlass_scaled_mm kernel "
"No compiled get_cutlass_pplx_moe_mm_data: no cutlass_scaled_mm kernel "
"for CUDA device capability: "
,
"for CUDA device capability: "
,
version_num
,
". Required capability: 90"
);
version_num
,
". Required capability: 90
or 100
"
);
}
}
void
cutlass_scaled_mm_azp
(
torch
::
Tensor
&
c
,
torch
::
Tensor
const
&
a
,
void
cutlass_scaled_mm_azp
(
torch
::
Tensor
&
c
,
torch
::
Tensor
const
&
a
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment