Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4706eb62
Unverified
Commit
4706eb62
authored
Aug 16, 2024
by
SangBin Cho
Committed by
GitHub
Aug 16, 2024
Browse files
[aDAG] Unflake aDAG + PP tests (#7600)
parent
bae888cb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
4 deletions
+9
-4
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+2
-2
tests/distributed/test_pipeline_parallel.py
tests/distributed/test_pipeline_parallel.py
+4
-0
tests/utils.py
tests/utils.py
+3
-2
No files found.
.buildkite/test-pipeline.yaml
View file @
4706eb62
...
...
@@ -314,11 +314,11 @@ steps:
num_gpus
:
4
source_file_dependencies
:
-
vllm/
-
tests/distributed/test_pipeline_parallel
-
tests/distributed/test_pp_cudagraph.py
-
tests/distributed/test_pipeline_parallel
commands
:
-
pytest -v -s distributed/test_pipeline_parallel.py
-
pytest -v -s distributed/test_pp_cudagraph.py
-
pytest -v -s distributed/test_pipeline_parallel.py
-
label
:
LoRA Long Context (Distributed)
# 11min
# This test runs llama 13B, so it is required to run on 4 GPUs.
...
...
tests/distributed/test_pipeline_parallel.py
View file @
4706eb62
...
...
@@ -80,6 +80,10 @@ def test_compare_tp(TP_SIZE, PP_SIZE, EAGER_MODE, CHUNKED_PREFILL, MODEL_NAME,
"VLLM_USE_RAY_SPMD_WORKER"
:
"1"
,
"VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL"
:
"1"
,
}
# Temporary. Currently when zeromq + SPMD is used, it does not properly
# terminate because of aDAG issue.
pp_args
.
append
(
"--disable-frontend-multiprocessing"
)
tp_args
.
append
(
"--disable-frontend-multiprocessing"
)
try
:
compare_two_settings
(
MODEL_NAME
,
pp_args
,
tp_args
,
pp_env
)
...
...
tests/utils.py
View file @
4706eb62
...
...
@@ -384,6 +384,7 @@ def fork_new_process_for_each_test(
os
.
setpgrp
()
from
_pytest.outcomes
import
Skipped
pid
=
os
.
fork
()
print
(
f
"Fork a new process to run a test
{
pid
}
"
)
if
pid
==
0
:
try
:
f
(
*
args
,
**
kwargs
)
...
...
@@ -401,11 +402,11 @@ def fork_new_process_for_each_test(
pgid
=
os
.
getpgid
(
pid
)
_pid
,
_exitcode
=
os
.
waitpid
(
pid
,
0
)
# ignore SIGTERM signal itself
old_si
ngla
_handler
=
signal
.
signal
(
signal
.
SIGTERM
,
signal
.
SIG_IGN
)
old_si
gnal
_handler
=
signal
.
signal
(
signal
.
SIGTERM
,
signal
.
SIG_IGN
)
# kill all child processes
os
.
killpg
(
pgid
,
signal
.
SIGTERM
)
# restore the signal handler
signal
.
signal
(
signal
.
SIGTERM
,
old_si
ngla
_handler
)
signal
.
signal
(
signal
.
SIGTERM
,
old_si
gnal
_handler
)
assert
_exitcode
==
0
,
(
f
"function
{
f
}
failed when called with"
f
" args
{
args
}
and kwargs
{
kwargs
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment