Unverified Commit f2b20fe4 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Consolidate Llama model usage in tests (#13094)

parent 40932d7a
...@@ -14,7 +14,7 @@ from vllm import SamplingParams ...@@ -14,7 +14,7 @@ from vllm import SamplingParams
from ...conftest import VllmRunner from ...conftest import VllmRunner
MODEL = "meta-llama/Llama-3.2-1B" MODEL = "meta-llama/Llama-3.2-1B-Instruct"
DTYPE = "half" DTYPE = "half"
......
...@@ -11,7 +11,7 @@ RTOL = 0.03 ...@@ -11,7 +11,7 @@ RTOL = 0.03
EXPECTED_VALUE = 0.62 EXPECTED_VALUE = 0.62
# FIXME(rob): enable prefix caching once supported. # FIXME(rob): enable prefix caching once supported.
MODEL = "meta-llama/Llama-3.2-1B" MODEL = "meta-llama/Llama-3.2-1B-Instruct"
MODEL_ARGS = f"pretrained={MODEL},enforce_eager=True,enable_prefix_caching=False" # noqa: E501 MODEL_ARGS = f"pretrained={MODEL},enforce_eager=True,enable_prefix_caching=False" # noqa: E501
SERVER_ARGS = [ SERVER_ARGS = [
"--enforce_eager", "--no_enable_prefix_caching", "--disable-log-requests" "--enforce_eager", "--no_enable_prefix_caching", "--disable-log-requests"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment