[CI] Add smoke test for non-uniform AutoFP8 quantization (#6702)

01c16ede · Michael Goin · GitHub · 72fc7048 · 01c16ede
Unverified Commit 01c16ede authored Jul 23, 2024 by Michael Goin Committed by GitHub Jul 23, 2024
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

tests/quantization/test_fp8.py tests/quantization/test_fp8.py +1 -0

No files found.
--- a/tests/quantization/test_fp8.py
+++ b/tests/quantization/test_fp8.py
@@ -13,6 +13,7 @@ from vllm.model_executor.layers.quantization.fp8 import (Fp8KVCacheMethod,
 MODELS = [
    "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
    "nm-testing/Phi-3-mini-128k-instruct-FP8",
+    "nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV",
 ]