Unverified Commit f4a8987f authored by Sai Enduri's avatar Sai Enduri Committed by GitHub
Browse files

Update amd docker and nightly models. (#6687)

parent 41ba767f
...@@ -9,7 +9,7 @@ else ...@@ -9,7 +9,7 @@ else
fi fi
# Pull the image # Pull the image
IMAGE="ghcr.io/saienduri/sglang-aiter-backend-v0.1.2:518" IMAGE="lmsysorg/sglang:v0.4.6.post5-rocm630"
echo "Pulling Docker image: $IMAGE" echo "Pulling Docker image: $IMAGE"
docker pull "$IMAGE" docker pull "$IMAGE"
......
...@@ -26,18 +26,17 @@ MODEL_SCORE_THRESHOLDS = { ...@@ -26,18 +26,17 @@ MODEL_SCORE_THRESHOLDS = {
"meta-llama/Llama-3.1-70B-Instruct": 0.95, "meta-llama/Llama-3.1-70B-Instruct": 0.95,
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64,
"Qwen/Qwen2-57B-A14B-Instruct": 0.86, "Qwen/Qwen2-57B-A14B-Instruct": 0.86,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.82, "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94, "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94,
"neuralmagic/Qwen2-72B-Instruct-FP8": 0.94, "neuralmagic/Qwen2-72B-Instruct-FP8": 0.94,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.86, "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.86,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.61, "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.65,
"google/gemma-2-27b-it": 0.91, "google/gemma-2-27b-it": 0.91,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
} }
# Models currently failing on AMD MI300x.
failing_models = { failing_models = {
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8",
"neuralmagic/gemma-2-2b-it-FP8", "neuralmagic/gemma-2-2b-it-FP8",
} }
...@@ -61,6 +60,16 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = remove_failing_models( ...@@ -61,6 +60,16 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = remove_failing_models(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
) )
NO_MOE_PADDING_MODELS = {"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"}
DISABLE_HF_XET_MODELS = {
"Qwen/Qwen2-57B-A14B-Instruct",
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8",
}
TRITON_MOE_MODELS = {
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8",
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8",
}
def parse_models(model_string): def parse_models(model_string):
return [model.strip() for model in model_string.split(",") if model.strip()] return [model.strip() for model in model_string.split(",") if model.strip()]
...@@ -156,6 +165,16 @@ class TestNightlyGsm8KEval(unittest.TestCase): ...@@ -156,6 +165,16 @@ class TestNightlyGsm8KEval(unittest.TestCase):
for model_group, is_fp8, is_tp2 in self.model_groups: for model_group, is_fp8, is_tp2 in self.model_groups:
for model in model_group: for model in model_group:
with self.subTest(model=model): with self.subTest(model=model):
os.environ["SGLANG_MOE_PADDING"] = (
"0" if model in NO_MOE_PADDING_MODELS else "1"
)
os.environ["HF_HUB_DISABLE_XET"] = (
"1" if model in DISABLE_HF_XET_MODELS else "0"
)
os.environ["SGLANG_AITER_MOE"] = (
"0" if model in TRITON_MOE_MODELS else "1"
)
process = popen_launch_server_wrapper(self.base_url, model, is_tp2) process = popen_launch_server_wrapper(self.base_url, model, is_tp2)
args = SimpleNamespace( args = SimpleNamespace(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment