Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2dd7d0c5
Unverified
Commit
2dd7d0c5
authored
Mar 04, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 04, 2025
Browse files
Revert "Fix nightly-test CI" (#4065)
parent
0d4e3228
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
13 deletions
+2
-13
python/sglang/srt/layers/attention/flashinfer_backend.py
python/sglang/srt/layers/attention/flashinfer_backend.py
+2
-13
No files found.
python/sglang/srt/layers/attention/flashinfer_backend.py
View file @
2dd7d0c5
...
@@ -427,10 +427,7 @@ class FlashInferAttnBackend(AttentionBackend):
...
@@ -427,10 +427,7 @@ class FlashInferAttnBackend(AttentionBackend):
else
:
else
:
o2
,
s2
=
prefill_wrapper_paged
.
forward_return_lse
(
o2
,
s2
=
prefill_wrapper_paged
.
forward_return_lse
(
q
.
contiguous
().
view
(
-
1
,
layer
.
tp_q_head_num
,
layer
.
head_dim
),
q
.
contiguous
().
view
(
-
1
,
layer
.
tp_q_head_num
,
layer
.
head_dim
),
self
.
_to_dtype
(
forward_batch
.
token_to_kv_pool
.
get_kv_buffer
(
layer
.
layer_id
),
forward_batch
.
token_to_kv_pool
.
get_kv_buffer
(
layer
.
layer_id
),
q
.
dtype
,
),
causal
=
False
,
causal
=
False
,
sm_scale
=
layer
.
scaling
,
sm_scale
=
layer
.
scaling
,
logits_soft_cap
=
layer
.
logit_cap
,
logits_soft_cap
=
layer
.
logit_cap
,
...
@@ -472,9 +469,7 @@ class FlashInferAttnBackend(AttentionBackend):
...
@@ -472,9 +469,7 @@ class FlashInferAttnBackend(AttentionBackend):
o
=
decode_wrapper
.
forward
(
o
=
decode_wrapper
.
forward
(
q
.
contiguous
().
view
(
-
1
,
layer
.
tp_q_head_num
,
layer
.
head_dim
),
q
.
contiguous
().
view
(
-
1
,
layer
.
tp_q_head_num
,
layer
.
head_dim
),
self
.
_to_dtype
(
forward_batch
.
token_to_kv_pool
.
get_kv_buffer
(
layer
.
layer_id
),
forward_batch
.
token_to_kv_pool
.
get_kv_buffer
(
layer
.
layer_id
),
q
.
dtype
),
sm_scale
=
layer
.
scaling
,
sm_scale
=
layer
.
scaling
,
logits_soft_cap
=
layer
.
logit_cap
,
logits_soft_cap
=
layer
.
logit_cap
,
k_scale
=
layer
.
k_scale
,
k_scale
=
layer
.
k_scale
,
...
@@ -483,12 +478,6 @@ class FlashInferAttnBackend(AttentionBackend):
...
@@ -483,12 +478,6 @@ class FlashInferAttnBackend(AttentionBackend):
return
o
.
view
(
-
1
,
layer
.
tp_q_head_num
*
layer
.
head_dim
)
return
o
.
view
(
-
1
,
layer
.
tp_q_head_num
*
layer
.
head_dim
)
def
_to_dtype
(
self
,
kv_tuple
,
dtype
):
if
kv_tuple
[
0
].
dtype
!=
dtype
:
return
tuple
(
t
.
to
(
dtype
)
for
t
in
kv_tuple
)
else
:
return
kv_tuple
def
_get_wrapper_idx
(
self
,
layer
:
RadixAttention
):
def
_get_wrapper_idx
(
self
,
layer
:
RadixAttention
):
if
self
.
num_wrappers
==
1
:
if
self
.
num_wrappers
==
1
:
return
0
return
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment