Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d40ee62b
"tests/python/vscode:/vscode.git/clone" did not exist on "8909d1ff03974c9012b50a978faca31e3c86d9b3"
Unverified
Commit
d40ee62b
authored
Mar 12, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 12, 2025
Browse files
Update nightly tests (#4352)
parent
91b19949
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
10 deletions
+9
-10
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+7
-8
test/srt/test_nightly_gsm8k_eval.py
test/srt/test_nightly_gsm8k_eval.py
+2
-2
No files found.
python/sglang/srt/model_executor/model_runner.py
View file @
d40ee62b
...
@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
...
@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader.loader
import
(
DefaultModelLoader
,
device_loading_context
,
get_model_loader
,
)
from
sglang.srt.model_loader.utils
import
set_default_torch_dtype
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.sampling.sampling_batch_info
import
SamplingBatchInfo
from
sglang.srt.sampling.sampling_batch_info
import
SamplingBatchInfo
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
...
@@ -409,13 +415,6 @@ class ModelRunner:
...
@@ -409,13 +415,6 @@ class ModelRunner:
self
,
model_path
:
str
,
load_format
:
str
self
,
model_path
:
str
,
load_format
:
str
)
->
tuple
[
bool
,
str
]:
)
->
tuple
[
bool
,
str
]:
"""Update engine weights in-place from the disk."""
"""Update engine weights in-place from the disk."""
from
sglang.srt.model_loader.loader
import
(
DefaultModelLoader
,
device_loading_context
,
get_model_loader
,
)
from
sglang.srt.model_loader.utils
import
set_default_torch_dtype
logger
.
info
(
logger
.
info
(
f
"Update engine weights online from disk begin. "
f
"Update engine weights online from disk begin. "
f
"avail mem=
{
get_available_gpu_memory
(
self
.
device
,
self
.
gpu_id
):.
2
f
}
GB"
f
"avail mem=
{
get_available_gpu_memory
(
self
.
device
,
self
.
gpu_id
):.
2
f
}
GB"
...
@@ -425,7 +424,7 @@ class ModelRunner:
...
@@ -425,7 +424,7 @@ class ModelRunner:
self
.
model_config
.
model_path
=
model_path
self
.
model_config
.
model_path
=
model_path
load_config
=
LoadConfig
(
load_format
=
load_format
)
load_config
=
LoadConfig
(
load_format
=
load_format
)
# Only support
vllm
DefaultModelLoader for now
# Only support
the
DefaultModelLoader for now
loader
=
get_model_loader
(
load_config
)
loader
=
get_model_loader
(
load_config
)
if
not
isinstance
(
loader
,
DefaultModelLoader
):
if
not
isinstance
(
loader
,
DefaultModelLoader
):
message
=
f
"Failed to get model loader:
{
loader
}
."
message
=
f
"Failed to get model loader:
{
loader
}
."
...
...
test/srt/test_nightly_gsm8k_eval.py
View file @
d40ee62b
...
@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
...
@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.85
,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.85
,
"google/gemma-2-27b-it"
:
0.92
,
"google/gemma-2-27b-it"
:
0.92
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.95
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.95
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.6
3
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.6
4
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.86
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.86
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.83
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.83
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
:
0.84
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
:
0.84
,
"neuralmagic/gemma-2-2b-it-FP8"
:
0.60
,
"neuralmagic/gemma-2-2b-it-FP8"
:
0.60
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.94
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.94
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
:
0.6
2
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
:
0.6
5
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.82
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.82
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
:
0.84
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
:
0.84
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment