Unverified Commit c0292211 authored by Wallas Henrique's avatar Wallas Henrique Committed by GitHub
Browse files

[CI/Build] Replaced some models on tests for smaller ones (#9570)


Signed-off-by: default avatarWallas Santos <wallashss@ibm.com>
parent 74692421
...@@ -19,7 +19,7 @@ from ..utils import multi_gpu_test ...@@ -19,7 +19,7 @@ from ..utils import multi_gpu_test
MODELS = [ MODELS = [
"facebook/opt-125m", "facebook/opt-125m",
"meta-llama/Llama-2-7b-hf", "meta-llama/Llama-3.2-1B",
] ]
TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4") TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")
......
...@@ -16,7 +16,7 @@ from ..utils import multi_gpu_test ...@@ -16,7 +16,7 @@ from ..utils import multi_gpu_test
MODELS = [ MODELS = [
"facebook/opt-125m", "facebook/opt-125m",
"meta-llama/Llama-2-7b-hf", "meta-llama/Llama-3.2-1B",
] ]
......
...@@ -2,5 +2,5 @@ from ..utils import compare_two_settings ...@@ -2,5 +2,5 @@ from ..utils import compare_two_settings
def test_cpu_offload(): def test_cpu_offload():
compare_two_settings("meta-llama/Llama-2-7b-hf", [], compare_two_settings("meta-llama/Llama-3.2-1B", [],
["--cpu-offload-gb", "4"]) ["--cpu-offload-gb", "1"])
...@@ -13,8 +13,7 @@ from ..utils import compare_all_settings ...@@ -13,8 +13,7 @@ from ..utils import compare_all_settings
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model, model_args, pp_size, tp_size, attn_backend, method, fullgraph", "model, model_args, pp_size, tp_size, attn_backend, method, fullgraph",
[ [
("meta-llama/Meta-Llama-3-8B", [], 2, 2, "FLASH_ATTN", "generate", ("meta-llama/Llama-3.2-1B", [], 2, 2, "FLASH_ATTN", "generate", True),
True),
("nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dyn-Per-Token-2048-Samples", ("nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dyn-Per-Token-2048-Samples",
["--quantization", "compressed-tensors" ["--quantization", "compressed-tensors"
], 1, 1, "FLASH_ATTN", "generate", True), ], 1, 1, "FLASH_ATTN", "generate", True),
......
...@@ -8,7 +8,7 @@ from ..openai.test_vision import TEST_IMAGE_URLS ...@@ -8,7 +8,7 @@ from ..openai.test_vision import TEST_IMAGE_URLS
def test_chat(): def test_chat():
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct")
prompt1 = "Explain the concept of entropy." prompt1 = "Explain the concept of entropy."
messages = [ messages = [
...@@ -26,7 +26,7 @@ def test_chat(): ...@@ -26,7 +26,7 @@ def test_chat():
def test_multi_chat(): def test_multi_chat():
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct")
prompt1 = "Explain the concept of entropy." prompt1 = "Explain the concept of entropy."
prompt2 = "Explain what among us is." prompt2 = "Explain what among us is."
......
...@@ -16,9 +16,6 @@ from .test_completion import zephyr_lora_files # noqa: F401 ...@@ -16,9 +16,6 @@ from .test_completion import zephyr_lora_files # noqa: F401
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -6,7 +6,7 @@ import pytest ...@@ -6,7 +6,7 @@ import pytest
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "meta-llama/Llama-3.2-1B"
@pytest.mark.asyncio @pytest.mark.asyncio
......
...@@ -46,9 +46,10 @@ def test_filter_subtensors(): ...@@ -46,9 +46,10 @@ def test_filter_subtensors():
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def llama_2_7b_files(): def llama_2_7b_files():
with TemporaryDirectory() as cache_dir: with TemporaryDirectory() as cache_dir:
input_dir = snapshot_download("meta-llama/Llama-2-7b-hf", input_dir = snapshot_download("meta-llama/Llama-3.2-1B",
cache_dir=cache_dir, cache_dir=cache_dir,
ignore_patterns="*.bin*") ignore_patterns=["*.bin*", "original/*"])
yield input_dir yield input_dir
...@@ -58,9 +59,12 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs): ...@@ -58,9 +59,12 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
# Dump worker states to output directory # Dump worker states to output directory
llm_sharded_writer.llm_engine.model_executor.save_sharded_state( llm_sharded_writer.llm_engine.model_executor.save_sharded_state(
path=output_dir) path=output_dir)
# Copy metadata files to output directory # Copy metadata files to output directory
for file in os.listdir(input_dir): for file in os.listdir(input_dir):
if not any(file.endswith(ext) for ext in weights_patterns): if not any(
file.endswith(ext) and not os.path.isdir(file)
for ext in weights_patterns):
shutil.copy(f"{input_dir}/{file}", output_dir) shutil.copy(f"{input_dir}/{file}", output_dir)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment