Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
24f6b9a7
Unverified
Commit
24f6b9a7
authored
Apr 09, 2025
by
Accelerator1996
Committed by
GitHub
Apr 09, 2025
Browse files
[Misc] Fix test_sharded_state_loader.py(#16004) (#16005)
Signed-off-by:
lvfei.lv
<
lvfei.lv@alibaba-inc.com
>
parent
9cdde472
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
9 deletions
+10
-9
tests/test_sharded_state_loader.py
tests/test_sharded_state_loader.py
+10
-9
No files found.
tests/test_sharded_state_loader.py
View file @
24f6b9a7
...
@@ -47,9 +47,7 @@ def test_filter_subtensors():
...
@@ -47,9 +47,7 @@ def test_filter_subtensors():
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
llama_3p2_1b_files
():
def
llama_3p2_1b_files
():
with
TemporaryDirectory
()
as
cache_dir
:
input_dir
=
snapshot_download
(
"meta-llama/Llama-3.2-1B-Instruct"
,
input_dir
=
snapshot_download
(
"meta-llama/Llama-3.2-1B-Instruct"
,
cache_dir
=
cache_dir
,
ignore_patterns
=
[
"*.bin*"
,
"original/*"
])
ignore_patterns
=
[
"*.bin*"
,
"original/*"
])
yield
input_dir
yield
input_dir
...
@@ -64,9 +62,9 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
...
@@ -64,9 +62,9 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
# Copy metadata files to output directory
# Copy metadata files to output directory
for
file
in
os
.
listdir
(
input_dir
):
for
file
in
os
.
listdir
(
input_dir
):
if
not
any
(
if
os
.
path
.
isdir
(
os
.
path
.
join
(
input_dir
,
file
)):
file
.
endswith
(
ext
)
and
not
os
.
path
.
isdir
(
file
)
continue
for
ext
in
weights_patterns
):
if
not
any
(
file
.
endswith
(
ext
)
for
ext
in
weights_patterns
):
shutil
.
copy
(
f
"
{
input_dir
}
/
{
file
}
"
,
output_dir
)
shutil
.
copy
(
f
"
{
input_dir
}
/
{
file
}
"
,
output_dir
)
...
@@ -81,7 +79,8 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
...
@@ -81,7 +79,8 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
@
pytest
.
mark
.
parametrize
(
"enable_lora"
,
[
False
,
True
])
@
pytest
.
mark
.
parametrize
(
"enable_lora"
,
[
False
,
True
])
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
1
,
2
])
def
test_sharded_state_loader
(
enable_lora
,
tp_size
,
num_gpus_available
,
def
test_sharded_state_loader
(
enable_lora
,
tp_size
,
num_gpus_available
,
llama_3p2_1b_files
):
llama_3p2_1b_files
,
monkeypatch
:
pytest
.
MonkeyPatch
):
if
num_gpus_available
<
tp_size
:
if
num_gpus_available
<
tp_size
:
pytest
.
skip
(
f
"Not enough GPUs for tensor parallelism
{
tp_size
}
"
)
pytest
.
skip
(
f
"Not enough GPUs for tensor parallelism
{
tp_size
}
"
)
...
@@ -89,6 +88,8 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
...
@@ -89,6 +88,8 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
gpu_memory_utilization
=
0.8
gpu_memory_utilization
=
0.8
input_dir
=
llama_3p2_1b_files
input_dir
=
llama_3p2_1b_files
ctx
=
mp
.
get_context
(
"spawn"
)
ctx
=
mp
.
get_context
(
"spawn"
)
# The interface in v1 engine has changed, run in v1 engine will hang.
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
# Run in separate processes for memory & CUDA isolation
# Run in separate processes for memory & CUDA isolation
with
TemporaryDirectory
()
as
output_dir
:
with
TemporaryDirectory
()
as
output_dir
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment