Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
d9ac6392
"torch_scatter/src/cpu.c" did not exist on "fe98b763dd35b1d8ad6c2d8100b60807532ca12b"
Unverified
Commit
d9ac6392
authored
Jul 02, 2024
by
Yueyang Pan
Committed by
GitHub
Jul 01, 2024
Browse files
Fix flashinfer version (#576)
parent
26294b2f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
9 deletions
+5
-9
python/sglang/srt/layers/radix_attention.py
python/sglang/srt/layers/radix_attention.py
+4
-8
python/sglang/srt/server.py
python/sglang/srt/server.py
+1
-1
No files found.
python/sglang/srt/layers/radix_attention.py
View file @
d9ac6392
...
...
@@ -30,12 +30,8 @@ class RadixAttention(nn.Module):
self
.
prefill_forward
=
self
.
prefill_forward_flashinfer
self
.
extend_forward
=
self
.
prefill_forward_flashinfer
self
.
decode_forward
=
self
.
decode_forward_flashinfer
# flashinfer only accepts a boolean logit_cap argument
if
logit_cap
>
0
:
assert
logit_cap
==
30
self
.
logit_cap
=
True
else
:
self
.
logit_cap
=
False
# flashinfer now accepts float logit_cap argument
self
.
logit_cap
=
logit_cap
if
logit_cap
>
0
else
0
else
:
self
.
prefill_forward
=
self
.
prefill_forward_triton
self
.
extend_forward
=
self
.
extend_forward_triton
...
...
@@ -110,7 +106,7 @@ class RadixAttention(nn.Module):
o
=
input_metadata
.
flashinfer_prefill_wrapper
.
forward
(
q
.
contiguous
().
view
(
-
1
,
self
.
tp_q_head_num
,
self
.
head_dim
),
input_metadata
.
token_to_kv_pool
.
kv_data
[
self
.
layer_id
],
logits_cap
=
self
.
logit_cap
,
logits_
soft_
cap
=
self
.
logit_cap
,
)
return
o
.
view
(
-
1
,
self
.
tp_q_head_num
*
self
.
head_dim
)
...
...
@@ -121,7 +117,7 @@ class RadixAttention(nn.Module):
o
=
input_metadata
.
flashinfer_decode_wrapper
.
forward
(
q
.
contiguous
().
view
(
-
1
,
self
.
tp_q_head_num
,
self
.
head_dim
),
input_metadata
.
token_to_kv_pool
.
kv_data
[
self
.
layer_id
],
logits_cap
=
self
.
logit_cap
,
logits_
soft_
cap
=
self
.
logit_cap
,
)
return
o
.
view
(
-
1
,
self
.
tp_q_head_num
*
self
.
head_dim
)
...
...
python/sglang/srt/server.py
View file @
d9ac6392
...
...
@@ -152,7 +152,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
if
server_args
.
disable_disk_cache
:
disable_cache
()
if
server_args
.
enable_flashinfer
:
assert_pkg_version
(
"flashinfer"
,
"0.0.
5
"
)
assert_pkg_version
(
"flashinfer"
,
"0.0.
7
"
)
if
server_args
.
chat_template
:
# TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api
(
server_args
.
chat_template
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment