test_collective_rpc.py 1.16 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import pytest
5
import torch
6
7
8

from vllm import LLM

9
from ...utils import create_new_process_for_each_test
10
11
12
13


@pytest.mark.parametrize("tp_size", [1, 2])
@pytest.mark.parametrize("backend", ["mp", "ray"])
14
@create_new_process_for_each_test()
15
def test_collective_rpc(tp_size, backend, monkeypatch):
16
17
    if torch.cuda.device_count() < tp_size:
        pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
18
19
20
21
22
23
24
25
26
27
    if tp_size == 1 and backend == "ray":
        pytest.skip("Skip duplicate test case")
    if tp_size == 1:
        backend = None

    # intentionally define the method and class in the test function,
    # to test if they can be serialized and sent to the workers
    def echo_rank(self):
        return self.rank

28
    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
29
    llm = LLM(
30
        model="hmellor/tiny-random-LlamaForCausalLM",
31
32
33
34
35
        enforce_eager=True,
        load_format="dummy",
        tensor_parallel_size=tp_size,
        distributed_executor_backend=backend,
    )
36
    assert llm.collective_rpc(echo_rank) == list(range(tp_size))