Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bd700134
Unverified
Commit
bd700134
authored
Jul 31, 2024
by
Cody Yu
Committed by
GitHub
Jul 31, 2024
Browse files
[MISC] Introduce pipeline parallelism partition strategies (#6920)
Co-authored-by:
youkaichao
<
youkaichao@126.com
>
parent
2ee8d3ba
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
66 additions
and
5 deletions
+66
-5
tests/distributed/test_pipeline_partition.py
tests/distributed/test_pipeline_partition.py
+34
-0
vllm/distributed/utils.py
vllm/distributed/utils.py
+27
-5
vllm/envs.py
vllm/envs.py
+5
-0
No files found.
tests/distributed/test_pipeline_partition.py
0 → 100644
View file @
bd700134
import
os
import
pytest
from
vllm.distributed.utils
import
get_pp_indices
def
test_custom_layer_partition
():
def
_verify
(
partition_str
,
num_layers
,
pp_size
,
goldens
):
bak
=
os
.
environ
.
get
(
"VLLM_PP_LAYER_PARTITION"
,
None
)
os
.
environ
[
"VLLM_PP_LAYER_PARTITION"
]
=
partition_str
for
pp_rank
,
golden
in
enumerate
(
goldens
):
assert
get_pp_indices
(
num_layers
,
pp_rank
,
pp_size
)
==
golden
if
bak
is
not
None
:
os
.
environ
[
"VLLM_PP_LAYER_PARTITION"
]
=
bak
# Even partition
_verify
(
"5,5,5,5"
,
20
,
4
,
[(
0
,
5
),
(
5
,
10
),
(
10
,
15
),
(
15
,
20
)])
# Balanced partition
_verify
(
"4,6,6,4"
,
20
,
4
,
[(
0
,
4
),
(
4
,
10
),
(
10
,
16
),
(
16
,
20
)])
# Put reminder somewhere
_verify
(
"5,6,5,6"
,
22
,
4
,
[(
0
,
5
),
(
5
,
11
),
(
11
,
16
),
(
16
,
22
)])
# Invalid partition strings
with
pytest
.
raises
(
ValueError
):
_verify
(
"5,5,5,5,"
,
20
,
4
,
[(
0
,
5
),
(
5
,
10
),
(
10
,
15
),
(
15
,
20
)])
with
pytest
.
raises
(
ValueError
):
_verify
(
"5,5,5,a"
,
20
,
4
,
[(
0
,
5
),
(
5
,
10
),
(
10
,
15
),
(
15
,
20
)])
# Wrong number of partitions
with
pytest
.
raises
(
ValueError
):
_verify
(
"5,5,5"
,
20
,
4
,
[(
0
,
5
),
(
5
,
10
),
(
10
,
15
),
(
15
,
20
)])
# Wrong number of layers
with
pytest
.
raises
(
ValueError
):
_verify
(
"5,5,5,5"
,
21
,
4
,
[(
0
,
5
),
(
5
,
10
),
(
10
,
15
),
(
15
,
20
)])
vllm/distributed/utils.py
View file @
bd700134
...
...
@@ -6,6 +6,11 @@ from typing import Sequence, Tuple
import
torch
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
def
ensure_divisibility
(
numerator
,
denominator
):
"""Ensure that numerator is divisible by the denominator."""
...
...
@@ -54,6 +59,23 @@ def get_pp_indices(num_hidden_layers: int, pp_rank: int,
If the number of layers is not divisible by the number of partitions,
the last partition will have the remaining layers.
"""
partition_list_str
=
envs
.
VLLM_PP_LAYER_PARTITION
if
partition_list_str
is
not
None
:
try
:
partitions
=
[
int
(
layer
)
for
layer
in
partition_list_str
.
split
(
","
)
]
except
ValueError
as
err
:
raise
ValueError
(
"Invalid partition string: {}"
.
format
(
partition_list_str
))
from
err
if
len
(
partitions
)
!=
pp_size
:
raise
ValueError
(
f
"
{
len
(
partitions
)
=
}
does not match
{
pp_size
=
}
."
)
if
sum
(
partitions
)
!=
num_hidden_layers
:
raise
ValueError
(
f
"
{
sum
(
partitions
)
=
}
does not match
{
num_hidden_layers
=
}
."
)
start_layer
=
sum
(
partitions
[:
pp_rank
])
end_layer
=
start_layer
+
partitions
[
pp_rank
]
else
:
layers_per_partition
=
num_hidden_layers
//
pp_size
start_layer
=
pp_rank
*
layers_per_partition
end_layer
=
start_layer
+
layers_per_partition
...
...
vllm/envs.py
View file @
bd700134
...
...
@@ -28,6 +28,7 @@ if TYPE_CHECKING:
VLLM_LOGGING_CONFIG_PATH
:
Optional
[
str
]
=
None
VLLM_TRACE_FUNCTION
:
int
=
0
VLLM_ATTENTION_BACKEND
:
Optional
[
str
]
=
None
VLLM_PP_LAYER_PARTITION
:
Optional
[
str
]
=
None
VLLM_CPU_KVCACHE_SPACE
:
int
=
0
VLLM_CPU_OMP_THREADS_BIND
:
str
=
""
VLLM_OPENVINO_KVCACHE_SPACE
:
int
=
0
...
...
@@ -242,6 +243,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_ATTENTION_BACKEND"
:
lambda
:
os
.
getenv
(
"VLLM_ATTENTION_BACKEND"
,
None
),
# Pipeline stage partition strategy
"VLLM_PP_LAYER_PARTITION"
:
lambda
:
os
.
getenv
(
"VLLM_PP_LAYER_PARTITION"
,
None
),
# (CPU backend only) CPU key-value cache space.
# default is 4GB
"VLLM_CPU_KVCACHE_SPACE"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment