Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d40ee62b
Unverified
Commit
d40ee62b
authored
Mar 12, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 12, 2025
Browse files
Update nightly tests (#4352)
parent
91b19949
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
10 deletions
+9
-10
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+7
-8
test/srt/test_nightly_gsm8k_eval.py
test/srt/test_nightly_gsm8k_eval.py
+2
-2
No files found.
python/sglang/srt/model_executor/model_runner.py
View file @
d40ee62b
...
@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
...
@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader.loader
import
(
DefaultModelLoader
,
device_loading_context
,
get_model_loader
,
)
from
sglang.srt.model_loader.utils
import
set_default_torch_dtype
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.sampling.sampling_batch_info
import
SamplingBatchInfo
from
sglang.srt.sampling.sampling_batch_info
import
SamplingBatchInfo
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
...
@@ -409,13 +415,6 @@ class ModelRunner:
...
@@ -409,13 +415,6 @@ class ModelRunner:
self
,
model_path
:
str
,
load_format
:
str
self
,
model_path
:
str
,
load_format
:
str
)
->
tuple
[
bool
,
str
]:
)
->
tuple
[
bool
,
str
]:
"""Update engine weights in-place from the disk."""
"""Update engine weights in-place from the disk."""
from
sglang.srt.model_loader.loader
import
(
DefaultModelLoader
,
device_loading_context
,
get_model_loader
,
)
from
sglang.srt.model_loader.utils
import
set_default_torch_dtype
logger
.
info
(
logger
.
info
(
f
"Update engine weights online from disk begin. "
f
"Update engine weights online from disk begin. "
f
"avail mem=
{
get_available_gpu_memory
(
self
.
device
,
self
.
gpu_id
):.
2
f
}
GB"
f
"avail mem=
{
get_available_gpu_memory
(
self
.
device
,
self
.
gpu_id
):.
2
f
}
GB"
...
@@ -425,7 +424,7 @@ class ModelRunner:
...
@@ -425,7 +424,7 @@ class ModelRunner:
self
.
model_config
.
model_path
=
model_path
self
.
model_config
.
model_path
=
model_path
load_config
=
LoadConfig
(
load_format
=
load_format
)
load_config
=
LoadConfig
(
load_format
=
load_format
)
# Only support
vllm
DefaultModelLoader for now
# Only support
the
DefaultModelLoader for now
loader
=
get_model_loader
(
load_config
)
loader
=
get_model_loader
(
load_config
)
if
not
isinstance
(
loader
,
DefaultModelLoader
):
if
not
isinstance
(
loader
,
DefaultModelLoader
):
message
=
f
"Failed to get model loader:
{
loader
}
."
message
=
f
"Failed to get model loader:
{
loader
}
."
...
...
test/srt/test_nightly_gsm8k_eval.py
View file @
d40ee62b
...
@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
...
@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.85
,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.85
,
"google/gemma-2-27b-it"
:
0.92
,
"google/gemma-2-27b-it"
:
0.92
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.95
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.95
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.6
3
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.6
4
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.86
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.86
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.83
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.83
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
:
0.84
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
:
0.84
,
"neuralmagic/gemma-2-2b-it-FP8"
:
0.60
,
"neuralmagic/gemma-2-2b-it-FP8"
:
0.60
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.94
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.94
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
:
0.6
2
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
:
0.6
5
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.82
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.82
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
:
0.84
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
:
0.84
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment