Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
bee6291d
Unverified
Commit
bee6291d
authored
Apr 28, 2025
by
Atream
Committed by
GitHub
Apr 28, 2025
Browse files
Merge pull request #1220 from kvcache-ai/fix-hopper-flashinfer
fix-hopper-flashinfer
parents
b703cc9c
b0318fc0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
3 deletions
+6
-3
ktransformers/models/custom_modeling_deepseek_v2.py
ktransformers/models/custom_modeling_deepseek_v2.py
+2
-1
ktransformers/models/custom_modeling_deepseek_v3.py
ktransformers/models/custom_modeling_deepseek_v3.py
+2
-1
ktransformers/operators/flashinfer_wrapper.py
ktransformers/operators/flashinfer_wrapper.py
+2
-1
No files found.
ktransformers/models/custom_modeling_deepseek_v2.py
View file @
bee6291d
...
...
@@ -50,7 +50,8 @@ class KDeepseekV2ForCausalLM(DeepseekV2PreTrainedModel):
self
.
wrapper
=
flashinfer
.
mla
.
BatchMLAPagedAttentionWrapper
(
self
.
workspace_buffer
,
use_cuda_graph
=
use_cuda_graph
,
qo_indptr
=
self
.
qo_indptr_buf
,
kv_indptr
=
self
.
paged_kv_indptr_buf
,
kv_indices
=
self
.
paged_kv_indices_buf
,
kv_len_arr
=
self
.
paged_kv_len_buf
kv_indices
=
self
.
paged_kv_indices_buf
,
kv_len_arr
=
self
.
paged_kv_len_buf
,
backend
=
"fa2"
,
)
def
batch_embeddings
(
self
,
batch
:
ForwardBatchInput
,
device
=
"cuda:0"
):
...
...
ktransformers/models/custom_modeling_deepseek_v3.py
View file @
bee6291d
...
...
@@ -54,7 +54,8 @@ class KDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
self
.
workspace_buffer
,
use_cuda_graph
=
use_cuda_graph
,
qo_indptr
=
self
.
qo_indptr_buf
,
kv_indptr
=
self
.
paged_kv_indptr_buf
,
kv_indices
=
self
.
paged_kv_indices_buf
,
kv_len_arr
=
self
.
paged_kv_len_buf
,
bsz_tensor
=
self
.
bsz_tensor_buf
bsz_tensor
=
self
.
bsz_tensor_buf
,
backend
=
"fa2"
,
)
def
batch_embeddings
(
self
,
batch
:
ForwardBatchInput
,
device
=
"cuda:0"
):
...
...
ktransformers/operators/flashinfer_wrapper.py
View file @
bee6291d
...
...
@@ -100,7 +100,8 @@ class MLAWrapper():
kv_indptr
=
self
.
kv_indptr_buf
,
kv_indices
=
self
.
kv_indices_buf
,
kv_len_arr
=
self
.
kv_len_arr_buf
,
bsz_tensor
=
self
.
batch_size_tensor_buf
bsz_tensor
=
self
.
batch_size_tensor_buf
,
backend
=
"fa2"
,
)
self
.
need_plan
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment