Unverified Commit d1e82408 authored by Lucas Wilkinson's avatar Lucas Wilkinson Committed by GitHub
Browse files

[Bugfix] Fix spurious "No compiled cutlass_scaled_mm ..." for W8A8 on Turing (#9487)

parent cb6fdaa0
......@@ -252,7 +252,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
else()
message(STATUS "Not building Marlin kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
#
......@@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
else()
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
endif()
......
......@@ -137,9 +137,11 @@ void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
return;
}
if (version_num >= 75) {
// Turing
TORCH_CHECK(version_num >= 75);
cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
return;
}
#endif
TORCH_CHECK_NOT_IMPLEMENTED(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment