Unverified Commit c882a7f5 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[SpecDecoding] Update MLPSpeculator CI tests to use smaller model (#6714)

parent 5e8ca973
...@@ -24,14 +24,14 @@ import pytest ...@@ -24,14 +24,14 @@ import pytest
from .conftest import run_greedy_equality_correctness_test from .conftest import run_greedy_equality_correctness_test
# main model # main model
MAIN_MODEL = "ibm-granite/granite-3b-code-instruct" MAIN_MODEL = "JackFram/llama-160m"
# speculative model # speculative model
SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator" SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
# max. number of speculative tokens: this corresponds to # max. number of speculative tokens: this corresponds to
# n_predict in the config.json of the speculator model. # n_predict in the config.json of the speculator model.
MAX_SPEC_TOKENS = 5 MAX_SPEC_TOKENS = 3
# precision # precision
PRECISION = "float32" PRECISION = "float32"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment