Unverified Commit bd700134 authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

[MISC] Introduce pipeline parallelism partition strategies (#6920)


Co-authored-by: default avataryoukaichao <youkaichao@126.com>
parent 2ee8d3ba
import os
import pytest
from vllm.distributed.utils import get_pp_indices
def test_custom_layer_partition():
def _verify(partition_str, num_layers, pp_size, goldens):
bak = os.environ.get("VLLM_PP_LAYER_PARTITION", None)
os.environ["VLLM_PP_LAYER_PARTITION"] = partition_str
for pp_rank, golden in enumerate(goldens):
assert get_pp_indices(num_layers, pp_rank, pp_size) == golden
if bak is not None:
os.environ["VLLM_PP_LAYER_PARTITION"] = bak
# Even partition
_verify("5,5,5,5", 20, 4, [(0, 5), (5, 10), (10, 15), (15, 20)])
# Balanced partition
_verify("4,6,6,4", 20, 4, [(0, 4), (4, 10), (10, 16), (16, 20)])
# Put reminder somewhere
_verify("5,6,5,6", 22, 4, [(0, 5), (5, 11), (11, 16), (16, 22)])
# Invalid partition strings
with pytest.raises(ValueError):
_verify("5,5,5,5,", 20, 4, [(0, 5), (5, 10), (10, 15), (15, 20)])
with pytest.raises(ValueError):
_verify("5,5,5,a", 20, 4, [(0, 5), (5, 10), (10, 15), (15, 20)])
# Wrong number of partitions
with pytest.raises(ValueError):
_verify("5,5,5", 20, 4, [(0, 5), (5, 10), (10, 15), (15, 20)])
# Wrong number of layers
with pytest.raises(ValueError):
_verify("5,5,5,5", 21, 4, [(0, 5), (5, 10), (10, 15), (15, 20)])
...@@ -6,6 +6,11 @@ from typing import Sequence, Tuple ...@@ -6,6 +6,11 @@ from typing import Sequence, Tuple
import torch import torch
import vllm.envs as envs
from vllm.logger import init_logger
logger = init_logger(__name__)
def ensure_divisibility(numerator, denominator): def ensure_divisibility(numerator, denominator):
"""Ensure that numerator is divisible by the denominator.""" """Ensure that numerator is divisible by the denominator."""
...@@ -54,6 +59,23 @@ def get_pp_indices(num_hidden_layers: int, pp_rank: int, ...@@ -54,6 +59,23 @@ def get_pp_indices(num_hidden_layers: int, pp_rank: int,
If the number of layers is not divisible by the number of partitions, If the number of layers is not divisible by the number of partitions,
the last partition will have the remaining layers. the last partition will have the remaining layers.
""" """
partition_list_str = envs.VLLM_PP_LAYER_PARTITION
if partition_list_str is not None:
try:
partitions = [
int(layer) for layer in partition_list_str.split(",")
]
except ValueError as err:
raise ValueError("Invalid partition string: {}".format(
partition_list_str)) from err
if len(partitions) != pp_size:
raise ValueError(f"{len(partitions)=} does not match {pp_size=}.")
if sum(partitions) != num_hidden_layers:
raise ValueError(
f"{sum(partitions)=} does not match {num_hidden_layers=}.")
start_layer = sum(partitions[:pp_rank])
end_layer = start_layer + partitions[pp_rank]
else:
layers_per_partition = num_hidden_layers // pp_size layers_per_partition = num_hidden_layers // pp_size
start_layer = pp_rank * layers_per_partition start_layer = pp_rank * layers_per_partition
end_layer = start_layer + layers_per_partition end_layer = start_layer + layers_per_partition
......
...@@ -28,6 +28,7 @@ if TYPE_CHECKING: ...@@ -28,6 +28,7 @@ if TYPE_CHECKING:
VLLM_LOGGING_CONFIG_PATH: Optional[str] = None VLLM_LOGGING_CONFIG_PATH: Optional[str] = None
VLLM_TRACE_FUNCTION: int = 0 VLLM_TRACE_FUNCTION: int = 0
VLLM_ATTENTION_BACKEND: Optional[str] = None VLLM_ATTENTION_BACKEND: Optional[str] = None
VLLM_PP_LAYER_PARTITION: Optional[str] = None
VLLM_CPU_KVCACHE_SPACE: int = 0 VLLM_CPU_KVCACHE_SPACE: int = 0
VLLM_CPU_OMP_THREADS_BIND: str = "" VLLM_CPU_OMP_THREADS_BIND: str = ""
VLLM_OPENVINO_KVCACHE_SPACE: int = 0 VLLM_OPENVINO_KVCACHE_SPACE: int = 0
...@@ -242,6 +243,10 @@ environment_variables: Dict[str, Callable[[], Any]] = { ...@@ -242,6 +243,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_ATTENTION_BACKEND": "VLLM_ATTENTION_BACKEND":
lambda: os.getenv("VLLM_ATTENTION_BACKEND", None), lambda: os.getenv("VLLM_ATTENTION_BACKEND", None),
# Pipeline stage partition strategy
"VLLM_PP_LAYER_PARTITION":
lambda: os.getenv("VLLM_PP_LAYER_PARTITION", None),
# (CPU backend only) CPU key-value cache space. # (CPU backend only) CPU key-value cache space.
# default is 4GB # default is 4GB
"VLLM_CPU_KVCACHE_SPACE": "VLLM_CPU_KVCACHE_SPACE":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment