Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2c7f01bc
Unverified
Commit
2c7f01bc
authored
Aug 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Aug 10, 2025
Browse files
Reorganize CI and test files (#9027)
parent
b58ae7a2
Changes
66
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
24 additions
and
59 deletions
+24
-59
scripts/ci/ci_start_disaggregation_servers.sh
scripts/ci/ci_start_disaggregation_servers.sh
+0
-0
scripts/ci/npu_ci_install_dependency.sh
scripts/ci/npu_ci_install_dependency.sh
+0
-0
scripts/ci_cache_models.sh
scripts/ci_cache_models.sh
+0
-40
sgl-kernel/CMakeLists.txt
sgl-kernel/CMakeLists.txt
+19
-14
sgl-kernel/README.md
sgl-kernel/README.md
+5
-5
test/srt/ascend/test_ascend_mla_w8a8int8.py
test/srt/ascend/test_ascend_mla_w8a8int8.py
+0
-0
test/srt/ascend/test_ascend_tp1_bf16.py
test/srt/ascend/test_ascend_tp1_bf16.py
+0
-0
test/srt/ascend/test_ascend_tp2_bf16.py
test/srt/ascend/test_ascend_tp2_bf16.py
+0
-0
test/srt/ep/test_deepep_internode.py
test/srt/ep/test_deepep_internode.py
+0
-0
test/srt/ep/test_deepep_intranode.py
test/srt/ep/test_deepep_intranode.py
+0
-0
test/srt/ep/test_deepep_large.py
test/srt/ep/test_deepep_large.py
+0
-0
test/srt/ep/test_deepep_low_latency.py
test/srt/ep/test_deepep_low_latency.py
+0
-0
test/srt/ep/test_deepep_small.py
test/srt/ep/test_deepep_small.py
+0
-0
test/srt/ep/test_eplb.py
test/srt/ep/test_eplb.py
+0
-0
test/srt/ep/test_hybrid_dp_ep_tp_mtp.py
test/srt/ep/test_hybrid_dp_ep_tp_mtp.py
+0
-0
test/srt/ep/test_moe_deepep.py
test/srt/ep/test_moe_deepep.py
+0
-0
test/srt/ep/test_moe_deepep_eval_accuracy_large.py
test/srt/ep/test_moe_deepep_eval_accuracy_large.py
+0
-0
test/srt/ep/test_moe_ep.py
test/srt/ep/test_moe_ep.py
+0
-0
test/srt/hicache/test_hicache.py
test/srt/hicache/test_hicache.py
+0
-0
test/srt/hicache/test_hicache_mla.py
test/srt/hicache/test_hicache_mla.py
+0
-0
No files found.
scripts/ci_start_disaggregation_servers.sh
→
scripts/ci
/ci
_start_disaggregation_servers.sh
View file @
2c7f01bc
File moved
scripts/npu_ci_install_dependency.sh
→
scripts/
ci/
npu_ci_install_dependency.sh
View file @
2c7f01bc
File moved
scripts/ci_cache_models.sh
deleted
100755 → 0
View file @
b58ae7a2
#!/bin/bash
set
-euxo
pipefail
mapfile
-t
models < <
(
python3
-c
"from sglang.test.test_utils import _get_default_models; print(_get_default_models())"
| jq
-r
'.[]'
)
if
[
${#
models
[@]
}
-eq
0
]
;
then
echo
"Failed to get default models."
exit
1
fi
cache_dir
=
"
${
DEFAULT_MODEL_CACHE_DIR
:-}
"
if
[
-z
"
$cache_dir
"
]
;
then
echo
"DEFAULT_MODEL_CACHE_DIR environment variable is not set."
exit
1
fi
failed_models
=()
for
model
in
"
${
models
[@]
}
"
;
do
local_model_dir
=
"
$cache_dir
/
$model
"
echo
"Caching model:
$model
to
$local_model_dir
"
mkdir
-p
"
$local_model_dir
"
if
!
huggingface-cli download
"
$model
"
\
--local-dir
"
$local_model_dir
"
\
--local-dir-use-symlinks
False 2>/dev/null
;
then
echo
"WARNING: Failed to cache model:
$model
"
rm
-rf
"
$local_model_dir
"
failed_models+
=(
"
$model
"
)
continue
fi
echo
"Successfully cached model:
$model
"
done
if
[
${#
failed_models
[@]
}
-gt
0
]
;
then
echo
-e
"
\n
[Summary] Failed to cache following models:"
printf
' - %s\n'
"
${
failed_models
[@]
}
"
else
echo
-e
"
\n
[Summary] All models cached successfully"
fi
sgl-kernel/CMakeLists.txt
View file @
2c7f01bc
...
...
@@ -87,6 +87,7 @@ FetchContent_Declare(
GIT_SHALLOW OFF
)
FetchContent_Populate
(
repo-flashinfer
)
# flash-attention
FetchContent_Declare
(
repo-flash-attention
...
...
@@ -95,6 +96,7 @@ FetchContent_Declare(
GIT_SHALLOW OFF
)
FetchContent_Populate
(
repo-flash-attention
)
# mscclpp
FetchContent_Declare
(
repo-mscclpp
...
...
@@ -232,6 +234,7 @@ set(SOURCES
"csrc/elementwise/activation.cu"
"csrc/elementwise/fused_add_rms_norm_kernel.cu"
"csrc/elementwise/rope.cu"
"csrc/common_extension.cc"
"csrc/gemm/awq_kernel.cu"
"csrc/gemm/bmm_fp8.cu"
"csrc/gemm/dsv3_fused_a_gemm.cu"
...
...
@@ -251,24 +254,10 @@ set(SOURCES
"csrc/gemm/per_token_quant_fp8.cu"
"csrc/gemm/qserve_w4a8_per_chn_gemm.cu"
"csrc/gemm/qserve_w4a8_per_group_gemm.cu"
"csrc/moe/moe_align_kernel.cu"
"csrc/moe/moe_fused_gate.cu"
"csrc/moe/moe_topk_softmax_kernels.cu"
"csrc/moe/nvfp4_blockwise_moe.cu"
"csrc/moe/fp8_blockwise_moe_kernel.cu"
"csrc/moe/prepare_moe_input.cu"
"csrc/moe/ep_moe_reorder_kernel.cu"
"csrc/moe/ep_moe_silu_and_mul_kernel.cu"
"csrc/speculative/eagle_utils.cu"
"csrc/speculative/packbit.cu"
"csrc/spatial/greenctx_stream.cu"
"csrc/speculative/speculative_sampling.cu"
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu"
"csrc/kvcacheio/transfer.cu"
"csrc/moe/cutlass_moe/w4a8/scaled_mm_entry.cu"
"csrc/moe/cutlass_moe/w4a8/w4a8_moe_data.cu"
"csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cu"
"csrc/common_extension.cc"
"csrc/moe/marlin_moe_wna16/ops.cu"
"csrc/moe/marlin_moe_wna16/gptq_marlin_repack.cu"
"csrc/moe/marlin_moe_wna16/awq_marlin_repack.cu"
...
...
@@ -278,6 +267,19 @@ set(SOURCES
"csrc/moe/marlin_moe_wna16/kernel_fp16_ku4.cu"
"csrc/moe/marlin_moe_wna16/kernel_fp16_ku4b8.cu"
"csrc/moe/marlin_moe_wna16/kernel_fp16_ku8b128.cu"
"csrc/moe/moe_align_kernel.cu"
"csrc/moe/moe_fused_gate.cu"
"csrc/moe/moe_topk_softmax_kernels.cu"
"csrc/moe/nvfp4_blockwise_moe.cu"
"csrc/moe/fp8_blockwise_moe_kernel.cu"
"csrc/moe/prepare_moe_input.cu"
"csrc/moe/ep_moe_reorder_kernel.cu"
"csrc/moe/ep_moe_silu_and_mul_kernel.cu"
"csrc/kvcacheio/transfer.cu"
"csrc/speculative/eagle_utils.cu"
"csrc/speculative/packbit.cu"
"csrc/spatial/greenctx_stream.cu"
"csrc/speculative/speculative_sampling.cu"
"
${
repo-flashinfer_SOURCE_DIR
}
/csrc/norm.cu"
"
${
repo-flashinfer_SOURCE_DIR
}
/csrc/renorm.cu"
"
${
repo-flashinfer_SOURCE_DIR
}
/csrc/sampling.cu"
...
...
@@ -312,12 +314,15 @@ else()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-D_GLIBCXX_USE_CXX11_ABI=1"
)
set
(
CMAKE_CUDA_FLAGS
"
${
CMAKE_CUDA_FLAGS
}
-D_GLIBCXX_USE_CXX11_ABI=1"
)
endif
()
# mscclpp
set
(
MSCCLPP_USE_CUDA ON
)
set
(
MSCCLPP_BYPASS_GPU_CHECK ON
)
set
(
MSCCLPP_BUILD_TESTS OFF
)
add_subdirectory
(
${
repo-mscclpp_SOURCE_DIR
}
)
target_link_libraries
(
common_ops PRIVATE
${
TORCH_LIBRARIES
}
c10 cuda cublas cublasLt mscclpp_static
)
# flash attention
target_compile_definitions
(
common_ops PRIVATE
FLASHATTENTION_DISABLE_BACKWARD
FLASHATTENTION_DISABLE_DROPOUT
...
...
sgl-kernel/README.md
View file @
2c7f01bc
...
...
@@ -5,18 +5,18 @@
[

](https://pypi.org/project/sgl-kernel)
## Installation
For CUDA 11.8:
For CUDA 12.1 and above:
```
bash
pip3
install
sgl-kernel
-i
https://docs.sglang.ai/whl/cu118
pip3
install
sgl-kernel
```
For CUDA 1
2.1 or CUDA 12.4
:
For CUDA 1
1.8
:
```
bash
pip3
install
sgl-kernel
pip3
install
sgl-kernel
-i
https://docs.sglang.ai/whl/cu118
```
## Build from source
Development build:
...
...
test/srt/test_ascend_mla_w8a8int8.py
→
test/srt/
ascend/
test_ascend_mla_w8a8int8.py
View file @
2c7f01bc
File moved
test/srt/test_ascend_tp1_bf16.py
→
test/srt/
ascend/
test_ascend_tp1_bf16.py
View file @
2c7f01bc
File moved
test/srt/test_ascend_tp2_bf16.py
→
test/srt/
ascend/
test_ascend_tp2_bf16.py
View file @
2c7f01bc
File moved
test/srt/test_deepep_internode.py
→
test/srt/
ep/
test_deepep_internode.py
View file @
2c7f01bc
File moved
test/srt/test_deepep_intranode.py
→
test/srt/
ep/
test_deepep_intranode.py
View file @
2c7f01bc
File moved
test/srt/test_deepep_large.py
→
test/srt/
ep/
test_deepep_large.py
View file @
2c7f01bc
File moved
test/srt/test_deepep_low_latency.py
→
test/srt/
ep/
test_deepep_low_latency.py
View file @
2c7f01bc
File moved
test/srt/test_deepep_small.py
→
test/srt/
ep/
test_deepep_small.py
View file @
2c7f01bc
File moved
test/srt/test_eplb.py
→
test/srt/
ep/
test_eplb.py
View file @
2c7f01bc
File moved
test/srt/test_hybrid_dp_ep_tp_mtp.py
→
test/srt/
ep/
test_hybrid_dp_ep_tp_mtp.py
View file @
2c7f01bc
File moved
test/srt/test_moe_deepep.py
→
test/srt/
ep/
test_moe_deepep.py
View file @
2c7f01bc
File moved
test/srt/test_moe_deepep_eval_accuracy_large.py
→
test/srt/
ep/
test_moe_deepep_eval_accuracy_large.py
View file @
2c7f01bc
File moved
test/srt/test_moe_ep.py
→
test/srt/
ep/
test_moe_ep.py
View file @
2c7f01bc
File moved
test/srt/test_hicache.py
→
test/srt/
hicache/
test_hicache.py
View file @
2c7f01bc
File moved
test/srt/test_hicache_mla.py
→
test/srt/
hicache/
test_hicache_mla.py
View file @
2c7f01bc
File moved
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment