[SpecDecoding] Update MLPSpeculator CI tests to use smaller model (#6714)

c882a7f5 · Nick Hill · GitHub · 5e8ca973 · c882a7f5
Unverified Commit c882a7f5 authored Jul 24, 2024 by Nick Hill Committed by GitHub Jul 24, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

tests/spec_decode/e2e/test_mlp_correctness.py tests/spec_decode/e2e/test_mlp_correctness.py +3 -3

No files found.
--- a/tests/spec_decode/e2e/test_mlp_correctness.py
+++ b/tests/spec_decode/e2e/test_mlp_correctness.py
@@ -24,14 +24,14 @@ import pytest
 from .conftest import run_greedy_equality_correctness_test
 # main model
-MAIN_MODEL = "ibm-granite/granite-3b-code-instruct"
+MAIN_MODEL = "JackFram/llama-160m"
 # speculative model
-SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator"
+SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
 # max. number of speculative tokens: this corresponds to
 # n_predict in the config.json of the speculator model.
-MAX_SPEC_TOKENS = 5
+MAX_SPEC_TOKENS = 3
 # precision
 PRECISION = "float32"