Unverified Commit d98b81e2 authored by Qiaolin Yu's avatar Qiaolin Yu Committed by GitHub
Browse files

Accelerate deepseek fp4 b200 ci (#11993)

parent bcecf27e
...@@ -32,6 +32,8 @@ class TestDeepseekV3FP4(CustomTestCase): ...@@ -32,6 +32,8 @@ class TestDeepseekV3FP4(CustomTestCase):
"flashinfer_trtllm", "flashinfer_trtllm",
"--quantization", "--quantization",
"modelopt_fp4", "modelopt_fp4",
"--kv-cache-dtype",
"fp8_e4m3",
] ]
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
...@@ -100,6 +102,9 @@ class TestDeepseekV3FP4MTP(CustomTestCase): ...@@ -100,6 +102,9 @@ class TestDeepseekV3FP4MTP(CustomTestCase):
"1", "1",
"--speculative-num-draft-tokens", "--speculative-num-draft-tokens",
"4", "4",
"--kv-cache-dtype",
"fp8_e4m3",
"--enable-beta-spec",
] ]
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment