Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c0292211
Unverified
Commit
c0292211
authored
Oct 22, 2024
by
Wallas Henrique
Committed by
GitHub
Oct 22, 2024
Browse files
[CI/Build] Replaced some models on tests for smaller ones (#9570)
Signed-off-by:
Wallas Santos
<
wallashss@ibm.com
>
parent
74692421
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
15 additions
and
15 deletions
+15
-15
tests/basic_correctness/test_basic_correctness.py
tests/basic_correctness/test_basic_correctness.py
+1
-1
tests/basic_correctness/test_chunked_prefill.py
tests/basic_correctness/test_chunked_prefill.py
+1
-1
tests/basic_correctness/test_cpu_offload.py
tests/basic_correctness/test_cpu_offload.py
+2
-2
tests/compile/test_basic_correctness.py
tests/compile/test_basic_correctness.py
+1
-2
tests/entrypoints/llm/test_chat.py
tests/entrypoints/llm/test_chat.py
+2
-2
tests/entrypoints/openai/test_chat.py
tests/entrypoints/openai/test_chat.py
+0
-3
tests/entrypoints/openai/test_shutdown.py
tests/entrypoints/openai/test_shutdown.py
+1
-1
tests/test_sharded_state_loader.py
tests/test_sharded_state_loader.py
+7
-3
No files found.
tests/basic_correctness/test_basic_correctness.py
View file @
c0292211
...
...
@@ -19,7 +19,7 @@ from ..utils import multi_gpu_test
MODELS
=
[
"facebook/opt-125m"
,
"meta-llama/Llama-
2-7b-hf
"
,
"meta-llama/Llama-
3.2-1B
"
,
]
TARGET_TEST_SUITE
=
os
.
environ
.
get
(
"TARGET_TEST_SUITE"
,
"L4"
)
...
...
tests/basic_correctness/test_chunked_prefill.py
View file @
c0292211
...
...
@@ -16,7 +16,7 @@ from ..utils import multi_gpu_test
MODELS
=
[
"facebook/opt-125m"
,
"meta-llama/Llama-
2-7b-hf
"
,
"meta-llama/Llama-
3.2-1B
"
,
]
...
...
tests/basic_correctness/test_cpu_offload.py
View file @
c0292211
...
...
@@ -2,5 +2,5 @@ from ..utils import compare_two_settings
def
test_cpu_offload
():
compare_two_settings
(
"meta-llama/Llama-
2-7b-hf
"
,
[],
[
"--cpu-offload-gb"
,
"
4
"
])
compare_two_settings
(
"meta-llama/Llama-
3.2-1B
"
,
[],
[
"--cpu-offload-gb"
,
"
1
"
])
tests/compile/test_basic_correctness.py
View file @
c0292211
...
...
@@ -13,8 +13,7 @@ from ..utils import compare_all_settings
@
pytest
.
mark
.
parametrize
(
"model, model_args, pp_size, tp_size, attn_backend, method, fullgraph"
,
[
(
"meta-llama/Meta-Llama-3-8B"
,
[],
2
,
2
,
"FLASH_ATTN"
,
"generate"
,
True
),
(
"meta-llama/Llama-3.2-1B"
,
[],
2
,
2
,
"FLASH_ATTN"
,
"generate"
,
True
),
(
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dyn-Per-Token-2048-Samples"
,
[
"--quantization"
,
"compressed-tensors"
],
1
,
1
,
"FLASH_ATTN"
,
"generate"
,
True
),
...
...
tests/entrypoints/llm/test_chat.py
View file @
c0292211
...
...
@@ -8,7 +8,7 @@ from ..openai.test_vision import TEST_IMAGE_URLS
def
test_chat
():
llm
=
LLM
(
model
=
"meta-llama/
Meta-
Llama-3
-8
B-Instruct"
)
llm
=
LLM
(
model
=
"meta-llama/Llama-3
.2-1
B-Instruct"
)
prompt1
=
"Explain the concept of entropy."
messages
=
[
...
...
@@ -26,7 +26,7 @@ def test_chat():
def
test_multi_chat
():
llm
=
LLM
(
model
=
"meta-llama/
Meta-
Llama-3
-8
B-Instruct"
)
llm
=
LLM
(
model
=
"meta-llama/Llama-3
.2-1
B-Instruct"
)
prompt1
=
"Explain the concept of entropy."
prompt2
=
"Explain what among us is."
...
...
tests/entrypoints/openai/test_chat.py
View file @
c0292211
...
...
@@ -16,9 +16,6 @@ from .test_completion import zephyr_lora_files # noqa: F401
# any model with a chat template should work here
MODEL_NAME
=
"HuggingFaceH4/zephyr-7b-beta"
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
LORA_NAME
=
"typeof/zephyr-7b-beta-lora"
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
tests/entrypoints/openai/test_shutdown.py
View file @
c0292211
...
...
@@ -6,7 +6,7 @@ import pytest
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"
HuggingFaceH4/zephyr-7b-beta
"
MODEL_NAME
=
"
meta-llama/Llama-3.2-1B
"
@
pytest
.
mark
.
asyncio
...
...
tests/test_sharded_state_loader.py
View file @
c0292211
...
...
@@ -46,9 +46,10 @@ def test_filter_subtensors():
@
pytest
.
fixture
(
scope
=
"module"
)
def
llama_2_7b_files
():
with
TemporaryDirectory
()
as
cache_dir
:
input_dir
=
snapshot_download
(
"meta-llama/Llama-
2-7b-hf
"
,
input_dir
=
snapshot_download
(
"meta-llama/Llama-
3.2-1B
"
,
cache_dir
=
cache_dir
,
ignore_patterns
=
"*.bin*"
)
ignore_patterns
=
[
"*.bin*"
,
"original/*"
])
yield
input_dir
...
...
@@ -58,9 +59,12 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
# Dump worker states to output directory
llm_sharded_writer
.
llm_engine
.
model_executor
.
save_sharded_state
(
path
=
output_dir
)
# Copy metadata files to output directory
for
file
in
os
.
listdir
(
input_dir
):
if
not
any
(
file
.
endswith
(
ext
)
for
ext
in
weights_patterns
):
if
not
any
(
file
.
endswith
(
ext
)
and
not
os
.
path
.
isdir
(
file
)
for
ext
in
weights_patterns
):
shutil
.
copy
(
f
"
{
input_dir
}
/
{
file
}
"
,
output_dir
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment