Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
5a6400ee
"test/vscode:/vscode.git/clone" did not exist on "e288f6ca01a5009ab32f1db010b31f14756e0303"
Unverified
Commit
5a6400ee
authored
Mar 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 10, 2025
Browse files
Test no vllm custom allreduce (#4256)
parent
cf0ccd40
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
5 additions
and
5 deletions
+5
-5
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+1
-1
python/pyproject.toml
python/pyproject.toml
+1
-1
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+2
-2
scripts/ci_install_dependency.sh
scripts/ci_install_dependency.sh
+1
-1
No files found.
.github/workflows/pr-test.yml
View file @
5a6400ee
...
@@ -266,7 +266,7 @@ jobs:
...
@@ -266,7 +266,7 @@ jobs:
cd test/srt
cd test/srt
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
#
USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
-
name
:
Benchmark single latency + torch.compile (TP=2)
-
name
:
Benchmark single latency + torch.compile (TP=2)
timeout-minutes
:
10
timeout-minutes
:
10
...
...
python/pyproject.toml
View file @
5a6400ee
...
@@ -44,7 +44,7 @@ runtime_common = [
...
@@ -44,7 +44,7 @@ runtime_common = [
srt
=
[
srt
=
[
"sglang[runtime_common]"
,
"sglang[runtime_common]"
,
"sgl-kernel==0.0.4"
,
"sgl-kernel==0.0.4
.post1
"
,
"flashinfer_python==0.2.2.post1"
,
"flashinfer_python==0.2.2.post1"
,
"torch==2.5.1"
,
"torch==2.5.1"
,
"vllm>=0.6.4.post1,<=0.7.2"
,
"vllm>=0.6.4.post1,<=0.7.2"
,
...
...
python/sglang/srt/server_args.py
View file @
5a6400ee
...
@@ -480,7 +480,7 @@ class ServerArgs:
...
@@ -480,7 +480,7 @@ class ServerArgs:
"--chunked-prefill-size"
,
"--chunked-prefill-size"
,
type
=
int
,
type
=
int
,
default
=
ServerArgs
.
chunked_prefill_size
,
default
=
ServerArgs
.
chunked_prefill_size
,
help
=
"The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill"
,
help
=
"The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill
.
"
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--max-prefill-tokens"
,
"--max-prefill-tokens"
,
...
@@ -505,7 +505,7 @@ class ServerArgs:
...
@@ -505,7 +505,7 @@ class ServerArgs:
"--cpu-offload-gb"
,
"--cpu-offload-gb"
,
type
=
int
,
type
=
int
,
default
=
ServerArgs
.
cpu_offload_gb
,
default
=
ServerArgs
.
cpu_offload_gb
,
help
=
"How many GBs of RAM to reserve for CPU offloading"
,
help
=
"How many GBs of RAM to reserve for CPU offloading
.
"
,
)
)
# Other runtime options
# Other runtime options
...
...
scripts/ci_install_dependency.sh
View file @
5a6400ee
...
@@ -26,4 +26,4 @@ pip install transformers==4.45.2 sentence_transformers accelerate peft pandas da
...
@@ -26,4 +26,4 @@ pip install transformers==4.45.2 sentence_transformers accelerate peft pandas da
pip
install
cuda-python nvidia-cuda-nvrtc-cu12
pip
install
cuda-python nvidia-cuda-nvrtc-cu12
# reinstall sgl-kernel
# reinstall sgl-kernel
pip
install
sgl-kernel
==
0.0.4
--force-reinstall
--no-deps
pip
install
sgl-kernel
==
0.0.4
.post1
--force-reinstall
--no-deps
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment