[CI] Fix failing FP8 cpu offload test (#13170)

Signed-off-by: mgoin <mgoin64@gmail.com>

[CI] Fix failing FP8 cpu offload test (#13170)
Signed-off-by: mgoin <mgoin64@gmail.com>
14b7899d · Michael Goin · GitHub · 09972e71 · 14b7899d
Unverified Commit 14b7899d authored Feb 12, 2025 by Michael Goin Committed by GitHub Feb 12, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

tests/quantization/test_cpu_offload.py tests/quantization/test_cpu_offload.py +6 -6

No files found.
--- a/tests/quantization/test_cpu_offload.py
+++ b/tests/quantization/test_cpu_offload.py
 # SPDX-License-Identifier: Apache-2.0
 # Expanded quantized model tests for CPU offloading
 # Base tests: tests/basic_correctness/test_cpu_offload.py
@@ -14,13 +14,13 @@ from ..utils import compare_two_settings
                    reason="fp8 is not supported on this GPU type.")
 def test_cpu_offload_fp8():
    # Test quantization of an unquantized checkpoint
-    compare_two_settings("meta-llama/Meta-Llama-3-8B-Instruct",
+    compare_two_settings("meta-llama/Llama-3.2-1B-Instruct",
                         ["--quantization", "fp8"],
-                         ["--quantization", "fp8", "--cpu-offload-gb", "2"],
+                         ["--quantization", "fp8", "--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
    # Test loading a quantized checkpoint
-    compare_two_settings("neuralmagic/Meta-Llama-3-8B-Instruct-FP8", [],
+    compare_two_settings("neuralmagic/Qwen2-1.5B-Instruct-FP8", [],
-                         ["--cpu-offload-gb", "2"],
+                         ["--cpu-offload-gb", "1"],
                         max_wait_seconds=480)