Unverified Commit d40ee62b authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Update nightly tests (#4352)

parent 91b19949
...@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import ( ...@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader import get_model from sglang.srt.model_loader import get_model
from sglang.srt.model_loader.loader import (
DefaultModelLoader,
device_loading_context,
get_model_loader,
)
from sglang.srt.model_loader.utils import set_default_torch_dtype
from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
...@@ -409,13 +415,6 @@ class ModelRunner: ...@@ -409,13 +415,6 @@ class ModelRunner:
self, model_path: str, load_format: str self, model_path: str, load_format: str
) -> tuple[bool, str]: ) -> tuple[bool, str]:
"""Update engine weights in-place from the disk.""" """Update engine weights in-place from the disk."""
from sglang.srt.model_loader.loader import (
DefaultModelLoader,
device_loading_context,
get_model_loader,
)
from sglang.srt.model_loader.utils import set_default_torch_dtype
logger.info( logger.info(
f"Update engine weights online from disk begin. " f"Update engine weights online from disk begin. "
f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB" f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB"
...@@ -425,7 +424,7 @@ class ModelRunner: ...@@ -425,7 +424,7 @@ class ModelRunner:
self.model_config.model_path = model_path self.model_config.model_path = model_path
load_config = LoadConfig(load_format=load_format) load_config = LoadConfig(load_format=load_format)
# Only support vllm DefaultModelLoader for now # Only support the DefaultModelLoader for now
loader = get_model_loader(load_config) loader = get_model_loader(load_config)
if not isinstance(loader, DefaultModelLoader): if not isinstance(loader, DefaultModelLoader):
message = f"Failed to get model loader: {loader}." message = f"Failed to get model loader: {loader}."
......
...@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = { ...@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85, "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85,
"google/gemma-2-27b-it": 0.92, "google/gemma-2-27b-it": 0.92,
"meta-llama/Llama-3.1-70B-Instruct": 0.95, "meta-llama/Llama-3.1-70B-Instruct": 0.95,
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63, "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64,
"Qwen/Qwen2-57B-A14B-Instruct": 0.86, "Qwen/Qwen2-57B-A14B-Instruct": 0.86,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83, "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84, "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
"neuralmagic/gemma-2-2b-it-FP8": 0.60, "neuralmagic/gemma-2-2b-it-FP8": 0.60,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94, "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.62, "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.65,
"neuralmagic/Qwen2-72B-Instruct-FP8": 0.94, "neuralmagic/Qwen2-72B-Instruct-FP8": 0.94,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82, "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.84, "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.84,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment