Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2f79f588
Unverified
Commit
2f79f588
authored
Jan 27, 2025
by
Yineng Zhang
Committed by
GitHub
Jan 27, 2025
Browse files
feat: use sgl-kernel 0.0.3 in sglang (#3179)
parent
8a96f749
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
21 additions
and
25 deletions
+21
-25
python/pyproject.toml
python/pyproject.toml
+1
-1
python/sglang/srt/layers/activation.py
python/sglang/srt/layers/activation.py
+5
-5
python/sglang/srt/layers/layernorm.py
python/sglang/srt/layers/layernorm.py
+5
-5
python/sglang/srt/layers/sampler.py
python/sglang/srt/layers/sampler.py
+4
-8
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+3
-3
python/sglang/srt/models/minicpm3.py
python/sglang/srt/models/minicpm3.py
+3
-3
No files found.
python/pyproject.toml
View file @
2f79f588
...
...
@@ -27,7 +27,7 @@ runtime_common = [
]
srt
=
[
"sglang[runtime_common]"
,
"cuda-python"
,
"sgl-kernel>=0.0.
2.post18
"
,
"torch"
,
"vllm==0.6.4.post1"
,
"sgl-kernel>=0.0.
3
"
,
"torch"
,
"vllm==0.6.4.post1"
,
"flashinfer==0.1.6"
]
...
...
python/sglang/srt/layers/activation.py
View file @
2f79f588
...
...
@@ -20,10 +20,10 @@ import torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
sglang.srt.utils
import
is_
flashinfer
_available
from
sglang.srt.utils
import
is_
cuda
_available
if
is_
flashinfer
_available
():
from
flashinfer.activation
import
gelu_and_mul
,
gelu_tanh_and_mul
,
silu_and_mul
if
is_
cuda
_available
():
from
sgl_kernel
import
gelu_and_mul
,
gelu_tanh_and_mul
,
silu_and_mul
from
vllm.model_executor.custom_op
import
CustomOp
...
...
@@ -149,8 +149,8 @@ def get_act_fn(
return
act_fn
if
not
is_
flashinfer
_available
():
if
not
is_
cuda
_available
():
logger
.
info
(
"
FlashInfer
is not available on Non-NV platforms. Fallback to other kernel libraries."
"
sgl-kernel
is not available on Non-NV platforms. Fallback to other kernel libraries."
)
from
vllm.model_executor.layers.activation
import
GeluAndMul
,
SiluAndMul
python/sglang/srt/layers/layernorm.py
View file @
2f79f588
...
...
@@ -19,10 +19,10 @@ from typing import Optional, Tuple, Union
import
torch
import
torch.nn
as
nn
from
sglang.srt.utils
import
is_
flashinfer
_available
from
sglang.srt.utils
import
is_
cuda
_available
if
is_
flashinfer
_available
():
from
flashinfer.norm
import
(
if
is_
cuda
_available
():
from
sgl_kernel
import
(
fused_add_rmsnorm
,
gemma_fused_add_rmsnorm
,
gemma_rmsnorm
,
...
...
@@ -121,8 +121,8 @@ class GemmaRMSNorm(CustomOp):
return
out
if
not
is_
flashinfer
_available
():
if
not
is_
cuda
_available
():
logger
.
info
(
"
FlashInfer
is not available on Non-NV platforms. Fallback to other kernel libraries."
"
sgl-kernel
is not available on Non-NV platforms. Fallback to other kernel libraries."
)
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
,
RMSNorm
python/sglang/srt/layers/sampler.py
View file @
2f79f588
...
...
@@ -10,14 +10,10 @@ from sglang.srt.layers.dp_attention import get_attention_tp_group
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.sampling.sampling_batch_info
import
SamplingBatchInfo
from
sglang.srt.utils
import
(
crash_on_warnings
,
get_bool_env_var
,
is_flashinfer_available
,
)
if
is_flashinfer_available
():
from
flashinfer.sampling
import
(
from
sglang.srt.utils
import
crash_on_warnings
,
get_bool_env_var
,
is_cuda_available
if
is_cuda_available
():
from
sgl_kernel
import
(
min_p_sampling_from_probs
,
top_k_renorm_prob
,
top_k_top_p_sampling_from_probs
,
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
2f79f588
...
...
@@ -56,12 +56,12 @@ from sglang.srt.layers.vocab_parallel_embedding import (
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.utils
import
is_
flashinfer
_available
,
is_hip
from
sglang.srt.utils
import
is_
cuda
_available
,
is_hip
is_hip_
=
is_hip
()
if
is_
flashinfer
_available
():
from
flashinfer
import
bmm_fp8
if
is_
cuda
_available
():
from
sgl_kernel
import
bmm_fp8
class
DeepseekV2MLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/minicpm3.py
View file @
2f79f588
...
...
@@ -40,10 +40,10 @@ from sglang.srt.layers.vocab_parallel_embedding import (
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.utils
import
is_
flashinfer
_available
from
sglang.srt.utils
import
is_
cuda
_available
if
is_
flashinfer
_available
():
from
flashinfer
import
bmm_fp8
if
is_
cuda
_available
():
from
sgl_kernel
import
bmm_fp8
class
MiniCPM3MLP
(
nn
.
Module
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment