Unverified Commit bc0a5a0c authored by Vadim Gimpelson's avatar Vadim Gimpelson Committed by GitHub
Browse files

[CI] Add Qwen3-Next-FP8 to Blackwell model tests (#31049)


Signed-off-by: default avatarVadim Gimpelson <vadim.gimpelson@gmail.com>
parent bfa2c0bb
model_name: "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8"
accuracy_threshold: 0.85
num_questions: 1319
num_fewshot: 5
server_args: >-
--max-model-len 4096
--tensor-parallel-size 2
--enable-expert-parallel
--async-scheduling
env:
VLLM_USE_FLASHINFER_MOE_FP8: "1"
...@@ -4,3 +4,4 @@ Qwen1.5-MoE-W4A16-CT.yaml ...@@ -4,3 +4,4 @@ Qwen1.5-MoE-W4A16-CT.yaml
DeepSeek-V2-Lite-Instruct-FP8.yaml DeepSeek-V2-Lite-Instruct-FP8.yaml
Qwen3-30B-A3B-NVFP4.yaml Qwen3-30B-A3B-NVFP4.yaml
Qwen3-Next-80B-A3B-NVFP4-EP2.yaml Qwen3-Next-80B-A3B-NVFP4-EP2.yaml
Qwen3-Next-FP8-EP2.yaml
...@@ -71,6 +71,7 @@ def test_gsm8k_correctness(config_filename): ...@@ -71,6 +71,7 @@ def test_gsm8k_correctness(config_filename):
print(f"Number of questions: {eval_config['num_questions']}") print(f"Number of questions: {eval_config['num_questions']}")
print(f"Number of few-shot examples: {eval_config['num_fewshot']}") print(f"Number of few-shot examples: {eval_config['num_fewshot']}")
print(f"Server args: {' '.join(server_args)}") print(f"Server args: {' '.join(server_args)}")
print(f"Environment variables: {env_dict}")
# Launch server and run evaluation # Launch server and run evaluation
with RemoteOpenAIServer( with RemoteOpenAIServer(
......
...@@ -106,6 +106,7 @@ class RemoteOpenAIServer: ...@@ -106,6 +106,7 @@ class RemoteOpenAIServer:
env.update(env_dict) env.update(env_dict)
serve_cmd = ["vllm", "serve", model, *vllm_serve_args] serve_cmd = ["vllm", "serve", model, *vllm_serve_args]
print(f"Launching RemoteOpenAIServer with: {' '.join(serve_cmd)}") print(f"Launching RemoteOpenAIServer with: {' '.join(serve_cmd)}")
print(f"Environment variables: {env}")
self.proc: subprocess.Popen = subprocess.Popen( self.proc: subprocess.Popen = subprocess.Popen(
serve_cmd, serve_cmd,
env=env, env=env,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment