Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
17299f08
Unverified
Commit
17299f08
authored
May 13, 2025
by
JieXin Liang
Committed by
GitHub
May 13, 2025
Browse files
[misc] deep_gemm fallback to NVRTC when NVCC not found (#6252)
parent
5380cd7e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
1 deletion
+12
-1
python/sglang/srt/layers/quantization/deep_gemm.py
python/sglang/srt/layers/quantization/deep_gemm.py
+12
-1
No files found.
python/sglang/srt/layers/quantization/deep_gemm.py
View file @
17299f08
...
@@ -15,6 +15,7 @@ _ENABLE_JIT_DEEPGEMM = False
...
@@ -15,6 +15,7 @@ _ENABLE_JIT_DEEPGEMM = False
if
is_cuda
():
if
is_cuda
():
import
deep_gemm
import
deep_gemm
from
deep_gemm
import
get_num_sms
from
deep_gemm
import
get_num_sms
from
deep_gemm.jit.compiler
import
get_nvcc_compiler
from
deep_gemm.jit_kernels.gemm
import
get_best_configs
from
deep_gemm.jit_kernels.gemm
import
get_best_configs
from
deep_gemm.jit_kernels.runtime
import
FP8GemmRuntime
,
GemmType
from
deep_gemm.jit_kernels.runtime
import
FP8GemmRuntime
,
GemmType
from
deep_gemm.jit_kernels.tuner
import
jit_tuner
from
deep_gemm.jit_kernels.tuner
import
jit_tuner
...
@@ -48,7 +49,17 @@ os.environ["DG_JIT_CACHE_DIR"] = os.getenv(
...
@@ -48,7 +49,17 @@ os.environ["DG_JIT_CACHE_DIR"] = os.getenv(
# Refer to https://github.com/deepseek-ai/DeepGEMM/commit/d75b218b7b8f4a5dd5406ac87905039ead3ae42f
# Refer to https://github.com/deepseek-ai/DeepGEMM/commit/d75b218b7b8f4a5dd5406ac87905039ead3ae42f
# NVRTC may have performance loss with some cases.
# NVRTC may have performance loss with some cases.
# And NVCC JIT speed is also 9x faster in the ref commit
# And NVCC JIT speed is also 9x faster in the ref commit
os
.
environ
[
"DG_JIT_USE_NVRTC"
]
=
os
.
getenv
(
"SGL_DG_USE_NVRTC"
,
"0"
)
_USE_NVRTC_DEFAULT
=
"0"
if
_ENABLE_JIT_DEEPGEMM
:
try
:
get_nvcc_compiler
()
except
:
logger
.
warning
(
"NVCC Compiler not found, use NVRTC for DeepGEMM JIT "
"and may have performance loss with some cases."
)
_USE_NVRTC_DEFAULT
=
"1"
os
.
environ
[
"DG_JIT_USE_NVRTC"
]
=
os
.
getenv
(
"SGL_DG_USE_NVRTC"
,
_USE_NVRTC_DEFAULT
)
def
update_deep_gemm_config
(
gpu_id
:
int
,
server_args
:
ServerArgs
):
def
update_deep_gemm_config
(
gpu_id
:
int
,
server_args
:
ServerArgs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment