test_serial_utils.py 8.84 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
from collections import UserDict
from dataclasses import dataclass
5
from typing import Optional
6

7
import msgspec
8
import numpy as np
9
import pytest
10
11
import torch

12
from vllm.multimodal.inputs import (MultiModalBatchedField,
13
                                    MultiModalFieldElem, MultiModalFlatField,
14
15
                                    MultiModalKwargsItem,
                                    MultiModalKwargsItems,
16
                                    MultiModalSharedField, NestedTensors)
17
18
from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder

19
20
pytestmark = pytest.mark.cpu_test

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

class UnrecognizedType(UserDict):

    def __init__(self, an_int: int):
        super().__init__()
        self.an_int = an_int


@dataclass
class MyType:
    tensor1: torch.Tensor
    a_string: str
    list_of_tensors: list[torch.Tensor]
    numpy_array: np.ndarray
    unrecognized: UnrecognizedType
36
37
38
39
    small_f_contig_tensor: torch.Tensor
    large_f_contig_tensor: torch.Tensor
    small_non_contig_tensor: torch.Tensor
    large_non_contig_tensor: torch.Tensor
40
    empty_tensor: torch.Tensor
41
42


43
def test_encode_decode(monkeypatch: pytest.MonkeyPatch):
44
45
    """Test encode/decode loop with zero-copy tensors."""

46
47
    with monkeypatch.context() as m:
        m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
48

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
        obj = MyType(
            tensor1=torch.randint(low=0,
                                  high=100,
                                  size=(1024, ),
                                  dtype=torch.int32),
            a_string="hello",
            list_of_tensors=[
                torch.rand((1, 10), dtype=torch.float32),
                torch.rand((3, 5, 4000), dtype=torch.float64),
                torch.tensor(1984),  # test scalar too
                # Make sure to test bf16 which numpy doesn't support.
                torch.rand((3, 5, 1000), dtype=torch.bfloat16),
                torch.tensor([float("-inf"), float("inf")] * 1024,
                             dtype=torch.bfloat16),
            ],
            numpy_array=np.arange(512),
            unrecognized=UnrecognizedType(33),
            small_f_contig_tensor=torch.rand(5, 4).t(),
            large_f_contig_tensor=torch.rand(1024, 4).t(),
            small_non_contig_tensor=torch.rand(2, 4)[:, 1:3],
            large_non_contig_tensor=torch.rand(1024, 512)[:, 10:20],
            empty_tensor=torch.empty(0),
        )
72

73
74
        encoder = MsgpackEncoder(size_threshold=256)
        decoder = MsgpackDecoder(MyType)
75

76
77
78
79
80
81
        encoded = encoder.encode(obj)

        # There should be the main buffer + 4 large tensor buffers
        # + 1 large numpy array. "large" is <= 512 bytes.
        # The two small tensors are encoded inline.
        assert len(encoded) == 8
82

83
        decoded: MyType = decoder.decode(encoded)
84

85
        assert_equal(decoded, obj)
86

87
        # Test encode_into case
88

89
        preallocated = bytearray()
90

91
        encoded2 = encoder.encode_into(obj, preallocated)
92

93
94
        assert len(encoded2) == 8
        assert encoded2[0] is preallocated
95

96
        decoded2: MyType = decoder.decode(encoded2)
97

98
        assert_equal(decoded2, obj)
99
100


101
class MyRequest(msgspec.Struct):
102
    mm: Optional[list[MultiModalKwargsItems]]
103
104
105


def test_multimodal_kwargs():
106
107
    e1 = MultiModalFieldElem("audio", "a0",
                             torch.zeros(1000, dtype=torch.bfloat16),
108
109
110
111
112
                             MultiModalBatchedField())
    e2 = MultiModalFieldElem(
        "video",
        "v0",
        [torch.zeros(1000, dtype=torch.int8) for _ in range(4)],
113
114
        MultiModalFlatField(
            [[slice(1, 2, 3), slice(4, 5, 6)], [slice(None, 2)]], 0),
115
116
117
118
    )
    e3 = MultiModalFieldElem("image", "i0", torch.zeros(1000,
                                                        dtype=torch.int32),
                             MultiModalSharedField(4))
119
120
121
    e4 = MultiModalFieldElem(
        "image", "i1", torch.zeros(1000, dtype=torch.int32),
        MultiModalFlatField([slice(1, 2, 3), slice(4, 5, 6)], 2))
122
123
124
    audio = MultiModalKwargsItem.from_elems([e1])
    video = MultiModalKwargsItem.from_elems([e2])
    image = MultiModalKwargsItem.from_elems([e3, e4])
125
    mm = MultiModalKwargsItems.from_seq([audio, video, image])
126
127
128
129
130
131
132
133
134
135
136
137
138

    # pack mm kwargs into a mock request so that it can be decoded properly
    req = MyRequest([mm])

    encoder = MsgpackEncoder()
    decoder = MsgpackDecoder(MyRequest)

    encoded = encoder.encode(req)

    assert len(encoded) == 8

    total_len = sum(memoryview(x).cast("B").nbytes for x in encoded)

139
140
141
142
    # expected total encoding length, should be 14306, +-20 for minor changes
    assert 14275 <= total_len <= 14325
    decoded = decoder.decode(encoded).mm[0]
    assert isinstance(decoded, MultiModalKwargsItems)
143
144

    # check all modalities were recovered and do some basic sanity checks
145
146
    assert len(decoded) == 3
    images = decoded["image"]
147
148
149
150
151
    assert len(images) == 1
    assert len(images[0].items()) == 2
    assert list(images[0].keys()) == ["i0", "i1"]

    # check the tensor contents and layout in the main dict
152
153
154
    mm_data = mm.get_data()
    decoded_data = decoded.get_data()
    assert all(nested_equal(mm_data[k], decoded_data[k]) for k in mm_data)
155
156
157
158
159


def nested_equal(a: NestedTensors, b: NestedTensors):
    if isinstance(a, torch.Tensor):
        return torch.equal(a, b)
160
    return all(nested_equal(x, y) for x, y in zip(a, b))
161
162


163
164
165
166
167
168
169
170
def assert_equal(obj1: MyType, obj2: MyType):
    assert torch.equal(obj1.tensor1, obj2.tensor1)
    assert obj1.a_string == obj2.a_string
    assert all(
        torch.equal(a, b)
        for a, b in zip(obj1.list_of_tensors, obj2.list_of_tensors))
    assert np.array_equal(obj1.numpy_array, obj2.numpy_array)
    assert obj1.unrecognized.an_int == obj2.unrecognized.an_int
171
172
173
174
175
176
    assert torch.equal(obj1.small_f_contig_tensor, obj2.small_f_contig_tensor)
    assert torch.equal(obj1.large_f_contig_tensor, obj2.large_f_contig_tensor)
    assert torch.equal(obj1.small_non_contig_tensor,
                       obj2.small_non_contig_tensor)
    assert torch.equal(obj1.large_non_contig_tensor,
                       obj2.large_non_contig_tensor)
177
    assert torch.equal(obj1.empty_tensor, obj2.empty_tensor)
178
179


180
def test_dict_serialization():
181
    """Test encoding and decoding of a generic Python object using pickle."""
182
183
    encoder = MsgpackEncoder()
    decoder = MsgpackDecoder()
184
185
186
187
188
189
190
191
192
193
194
195
196
197

    # Create a sample Python object
    obj = {"key": "value", "number": 42}

    # Encode the object
    encoded = encoder.encode(obj)

    # Decode the object
    decoded = decoder.decode(encoded)

    # Verify the decoded object matches the original
    assert obj == decoded, "Decoded object does not match the original object."


198
def test_tensor_serialization():
199
    """Test encoding and decoding of a torch.Tensor."""
200
201
    encoder = MsgpackEncoder()
    decoder = MsgpackDecoder(torch.Tensor)
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

    # Create a sample tensor
    tensor = torch.rand(10, 10)

    # Encode the tensor
    encoded = encoder.encode(tensor)

    # Decode the tensor
    decoded = decoder.decode(encoded)

    # Verify the decoded tensor matches the original
    assert torch.allclose(
        tensor, decoded), "Decoded tensor does not match the original tensor."


217
def test_numpy_array_serialization():
218
    """Test encoding and decoding of a numpy array."""
219
220
    encoder = MsgpackEncoder()
    decoder = MsgpackDecoder(np.ndarray)
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

    # Create a sample numpy array
    array = np.random.rand(10, 10)

    # Encode the numpy array
    encoded = encoder.encode(array)

    # Decode the numpy array
    decoded = decoder.decode(encoded)

    # Verify the decoded array matches the original
    assert np.allclose(
        array,
        decoded), "Decoded numpy array does not match the original array."


class CustomClass:

    def __init__(self, value):
        self.value = value

    def __eq__(self, other):
        return isinstance(other, CustomClass) and self.value == other.value


246
247
def test_custom_class_serialization_allowed_with_pickle(
        monkeypatch: pytest.MonkeyPatch):
248
249
    """Test that serializing a custom class succeeds when allow_pickle=True."""

250
251
252
253
    with monkeypatch.context() as m:
        m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
        encoder = MsgpackEncoder()
        decoder = MsgpackDecoder(CustomClass)
254

255
        obj = CustomClass("test_value")
256

257
258
        # Encode the custom class
        encoded = encoder.encode(obj)
259

260
261
262
263
264
265
        # Decode the custom class
        decoded = decoder.decode(encoded)

        # Verify the decoded object matches the original
        assert obj == decoded, (
            "Decoded object does not match the original object.")
266
267
268
269


def test_custom_class_serialization_disallowed_without_pickle():
    """Test that serializing a custom class fails when allow_pickle=False."""
270
    encoder = MsgpackEncoder()
271
272
273
274
275
276

    obj = CustomClass("test_value")

    with pytest.raises(TypeError):
        # Attempt to encode the custom class
        encoder.encode(obj)