Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d31a6471
Unverified
Commit
d31a6471
authored
Jul 16, 2025
by
Lucas Wilkinson
Committed by
GitHub
Jul 15, 2025
Browse files
[BugFix] Fix import error on non-blackwell machines (#21020)
Signed-off-by:
Lucas Wilkinson
<
lwilkins@redhat.com
>
parent
85431bd9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
16 deletions
+12
-16
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
+10
-0
csrc/ops.h
csrc/ops.h
+0
-13
csrc/torch_bindings.cpp
csrc/torch_bindings.cpp
+2
-3
No files found.
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
View file @
d31a6471
...
...
@@ -18,6 +18,7 @@ limitations under the License.
* Taken from SGLANG PR https://github.com/sgl-project/sglang/pull/6929
* by Alcanderian JieXin Liang
*/
#include "core/registration.h"
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
...
...
@@ -270,4 +271,13 @@ int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_ba
}
#endif
TORCH_LIBRARY_IMPL_EXPAND
(
TORCH_EXTENSION_NAME
,
CUDA
,
m
)
{
m
.
impl
(
"sm100_cutlass_mla_decode"
,
&
sm100_cutlass_mla_decode
);
}
TORCH_LIBRARY_IMPL_EXPAND
(
TORCH_EXTENSION_NAME
,
CatchAll
,
m
)
{
m
.
impl
(
"sm100_cutlass_mla_get_workspace_size"
,
&
sm100_cutlass_mla_get_workspace_size
);
}
// clang-format on
csrc/ops.h
View file @
d31a6471
...
...
@@ -167,19 +167,6 @@ void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope,
torch
::
Tensor
const
&
seq_lens
,
torch
::
Tensor
const
&
page_table
,
double
scale
);
void
sm100_cutlass_mla_decode
(
torch
::
Tensor
const
&
out
,
torch
::
Tensor
const
&
q_nope
,
torch
::
Tensor
const
&
q_pe
,
torch
::
Tensor
const
&
kv_c_and_k_pe_cache
,
torch
::
Tensor
const
&
seq_lens
,
torch
::
Tensor
const
&
page_table
,
torch
::
Tensor
const
&
workspace
,
double
sm_scale
,
int64_t
num_kv_splits
=
1
/* Set to 1 to avoid cuda_graph issue by default. */
);
int64_t
sm100_cutlass_mla_get_workspace_size
(
int64_t
max_seq_len
,
int64_t
num_batches
,
int64_t
sm_count
=
0
,
int64_t
num_kv_splits
=
1
/* Set to 1 to avoid cuda_graph issue by default. */
);
torch
::
Tensor
get_cuda_view_from_cpu_tensor
(
torch
::
Tensor
&
cpu_tensor
);
#ifndef USE_ROCM
...
...
csrc/torch_bindings.cpp
View file @
d31a6471
...
...
@@ -521,15 +521,14 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
" Tensor page_table, Tensor workspace, float "
"scale,"
" int num_kv_splits) -> ()"
);
ops
.
impl
(
"sm100_cutlass_mla_decode"
,
torch
::
kCUDA
,
&
sm100_cutlass_mla_decode
);
// conditionally compiled so impl in source file
// SM100 CUTLASS MLA workspace
ops
.
def
(
"sm100_cutlass_mla_get_workspace_size(int max_seq_len, int num_batches,"
" int sm_count, int num_kv_splits) "
"-> int"
);
ops
.
impl
(
"sm100_cutlass_mla_get_workspace_size"
,
&
sm100_cutlass_mla_get_workspace_size
);
// conditionally compiled so impl in source file
// Compute NVFP4 block quantized tensor.
ops
.
def
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment