serialization.py 12.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from dataclasses import asdict, is_dataclass
from typing import Any

import msgspec
import numpy as np
import torch
from msgspec import msgpack
from PIL import Image
from vllm.outputs import CompletionOutput, RequestOutput

# Type markers for custom serialization
_TENSOR_MARKER = "__tensor__"
_NDARRAY_MARKER = "__ndarray__"
_PIL_IMAGE_MARKER = "__pil_image__"

# Keys that identify a RequestOutput dict (for reconstruction)
_REQUEST_OUTPUT_KEYS = frozenset({"request_id", "prompt", "prompt_token_ids", "outputs", "finished"})

# Keys that identify a CompletionOutput dict (for reconstruction)
_COMPLETION_OUTPUT_KEYS = frozenset({"index", "text", "token_ids", "finish_reason"})

# Keys that identify an OmniRequestOutput dict (for reconstruction)
# OmniRequestOutput has 'final_output_type' which is unique, or can be identified by
# having 'finished' and ('images' or 'final_output_type')
_OMNI_REQUEST_OUTPUT_KEYS = frozenset({"finished", "final_output_type"})


class OmniMsgpackEncoder:
    """
    This implementation is adapted from vLLM’s MsgpackEncoder.
    However, zero-copy support has not been implemented yet.
    Handles torch.Tensor, numpy.ndarray, PIL.Image, RequestOutput and
    CompletionOutput by converting them to serializable dict representations.
    TODO: Enable zero-copy support.
    """

    def __init__(self):
        self.encoder = msgpack.Encoder(enc_hook=self._enc_hook)

    def encode(self, obj: Any) -> bytes:
        """Encode an object to bytes."""
        return self.encoder.encode(obj)

    def _enc_hook(self, obj: Any) -> Any:
        """Custom encoding hook for non-standard types."""
        # torch.Tensor
        if isinstance(obj, torch.Tensor):
            return self._encode_tensor(obj)

        # numpy.ndarray (exclude object/void dtypes)
        if isinstance(obj, np.ndarray) and obj.dtype.kind not in ("O", "V"):
            return self._encode_ndarray(obj)

        # PIL.Image
        if isinstance(obj, Image.Image):
            return self._encode_pil_image(obj)

        # RequestOutput (not a dataclass, needs special handling)
        if isinstance(obj, RequestOutput):
            return self._encode_request_output(obj)

        # CompletionOutput (dataclass)
        if isinstance(obj, CompletionOutput):
            return self._encode_completion_output(obj)

        # Other dataclasses
        if is_dataclass(obj) and not isinstance(obj, type):
            return asdict(obj)

        # slice
        if isinstance(obj, slice):
            return (obj.start, obj.stop, obj.step)

        raise TypeError(
            f"Object of type {type(obj).__name__} is not serializable. "
            "Supported types: torch.Tensor, np.ndarray, PIL.Image, dataclass, "
            "RequestOutput, and standard Python types (dict, list, str, int, float, bool, None, bytes)."
        )

    def _encode_tensor(self, tensor: torch.Tensor) -> dict[str, Any]:
        """Encode torch.Tensor to dict."""
        t = tensor.detach().contiguous().cpu()
        # Handle 0-dimensional (scalar) tensors by reshaping to 1D first
        if t.dim() == 0:
            t = t.reshape(1)
        t = t.view(torch.uint8)
        return {
            _TENSOR_MARKER: True,
            "dtype": str(tensor.dtype).removeprefix("torch."),
            "shape": list(tensor.shape),
            "data": t.numpy().tobytes(),
        }

    def _encode_ndarray(self, arr: np.ndarray) -> dict[str, Any]:
        """Encode numpy.ndarray to dict."""
        if not arr.flags.c_contiguous:
            arr = np.ascontiguousarray(arr)
        return {
            _NDARRAY_MARKER: True,
            "dtype": arr.dtype.str,
            "shape": list(arr.shape),
            "data": arr.tobytes(),
        }

    def _encode_pil_image(self, img: Image.Image) -> dict[str, Any]:
        """Encode PIL.Image to dict."""
        arr = np.asarray(img, dtype=np.uint8)
        if not arr.flags.c_contiguous:
            arr = np.ascontiguousarray(arr)
        return {
            _PIL_IMAGE_MARKER: True,
            "mode": img.mode,
            "shape": list(arr.shape),
            "data": arr.tobytes(),
        }

    def _encode_request_output(self, obj: RequestOutput) -> dict[str, Any]:
        """Encode RequestOutput to dict.

        RequestOutput is not a dataclass, so we manually extract its attributes.
        Also handles dynamically added 'multimodal_output' attribute.
        """
        # msgspec can serialize CompletionOutput dataclasses directly, but it
        # drops dynamic fields such as multimodal_output. Encode them manually
        # to preserve multimodal payloads across IPC.
        encoded_outputs = []
        for o in obj.outputs:
            if isinstance(o, CompletionOutput):
                encoded_outputs.append(self._encode_completion_output(o))
            else:
                encoded_outputs.append(o)

        result = {
            "request_id": obj.request_id,
            "prompt": obj.prompt,
            "prompt_token_ids": obj.prompt_token_ids,
            "prompt_logprobs": obj.prompt_logprobs,
            "outputs": encoded_outputs,
            "finished": obj.finished,
            "metrics": obj.metrics,
            "lora_request": obj.lora_request,
            "encoder_prompt": obj.encoder_prompt,
            "encoder_prompt_token_ids": obj.encoder_prompt_token_ids,
            "num_cached_tokens": obj.num_cached_tokens,
            "multi_modal_placeholders": obj.multi_modal_placeholders,
            "kv_transfer_params": obj.kv_transfer_params,
        }
        # Handle dynamically added multimodal_output attribute
        mm_output = getattr(obj, "multimodal_output", None)
        if mm_output is not None:
            result["multimodal_output"] = mm_output
        return result

    def _encode_completion_output(self, obj: CompletionOutput) -> dict[str, Any]:
        """Encode CompletionOutput to dict, preserving multimodal payloads."""
        result = asdict(obj)
        mm_output = getattr(obj, "multimodal_output", None)
        if mm_output is not None:
            result["multimodal_output"] = mm_output
        return result


class OmniMsgpackDecoder:
    """
    This implementation is adapted from vLLM’s MsgpackDecoder.
    However, zero-copy support has not been implemented yet.

    Automatically reconstructs torch.Tensor, numpy.ndarray, PIL.Image,
    RequestOutput and CompletionOutput from their dict representations.
    TODO: Enable zero-copy support.
    """

    def __init__(self):
        self.decoder = msgpack.Decoder()

    def decode(self, data: bytes | bytearray | memoryview) -> Any:
        """Decode bytes to object."""
        result = self.decoder.decode(data)
        return self._post_process(result)

    def _post_process(self, obj: Any) -> Any:
        """Recursively restore tensor/ndarray/image/RequestOutput/OmniRequestOutput from their dict representations."""
        if isinstance(obj, dict):
            # Check for type markers first
            if obj.get(_TENSOR_MARKER):
                return self._decode_tensor(obj)
            if obj.get(_NDARRAY_MARKER):
                return self._decode_ndarray(obj)
            if obj.get(_PIL_IMAGE_MARKER):
                return self._decode_pil_image(obj)

            # Process values recursively first
            processed = {k: self._post_process(v) for k, v in obj.items()}

            # Check if this looks like an OmniRequestOutput (check before RequestOutput
            # since OmniRequestOutput may also have some RequestOutput-like fields)
            if self._is_omni_request_output(processed):
                return self._decode_omni_request_output(processed)

            # Check if this looks like a RequestOutput
            if _REQUEST_OUTPUT_KEYS.issubset(processed.keys()):
                return self._decode_request_output(processed)

            # Check if this looks like a CompletionOutput
            if _COMPLETION_OUTPUT_KEYS.issubset(processed.keys()):
                return self._decode_completion_output(processed)

            return processed

        if isinstance(obj, list):
            return [self._post_process(item) for item in obj]

        if isinstance(obj, tuple):
            return tuple(self._post_process(item) for item in obj)

        return obj

    def _is_omni_request_output(self, obj: dict[str, Any]) -> bool:
        """Check if a dict looks like an OmniRequestOutput.

        OmniRequestOutput can be identified by:
        - Having 'finished' and 'final_output_type' fields (unique to OmniRequestOutput)
        - OR having 'finished' and 'images' fields (diffusion mode)
        """
        # Must have 'finished' field
        if "finished" not in obj:
            return False

        # Check for unique identifier: 'final_output_type'
        if "final_output_type" in obj:
            return True

        # Alternative: check for 'images' field (diffusion mode)
        if "images" in obj:
            return True

        return False

    def _decode_omni_request_output(self, obj: dict[str, Any]) -> Any:
        """Decode dict to OmniRequestOutput.

        OmniRequestOutput is a dataclass, so we can use msgspec.convert
        or construct it directly.
        """
        from vllm_omni.outputs import OmniRequestOutput

        try:
            # Use msgspec.convert for dataclass reconstruction
            return msgspec.convert(obj, OmniRequestOutput)
        except Exception:
            try:
                # Fallback: construct directly if msgspec.convert fails
                # (e.g., if some fields are missing or have wrong types)
                return OmniRequestOutput(**obj)
            except Exception:
                # If both attempts fail, return dict as-is (defensive fallback)
                # This should rarely happen if _is_omni_request_output is correct
                return obj

    def _decode_tensor(self, obj: dict[str, Any]) -> torch.Tensor:
        """Decode dict to torch.Tensor."""
        dtype_str = obj["dtype"]
        shape = obj["shape"]
        data = obj["data"]

        torch_dtype = getattr(torch, dtype_str)
        if not data:
            return torch.empty(shape, dtype=torch_dtype)

        buffer = bytearray(data) if isinstance(data, (bytes, memoryview)) else data
        arr = torch.frombuffer(buffer, dtype=torch.uint8)
        return arr.view(torch_dtype).reshape(shape)

    def _decode_ndarray(self, obj: dict[str, Any]) -> np.ndarray:
        """Decode dict to numpy.ndarray."""
        dtype = obj["dtype"]
        shape = obj["shape"]
        data = obj["data"]
        return np.frombuffer(data, dtype=dtype).reshape(shape)

    def _decode_pil_image(self, obj: dict[str, Any]) -> Image.Image:
        """Decode dict to PIL.Image."""
        mode = obj["mode"]
        shape = obj["shape"]
        data = obj["data"]
        arr = np.frombuffer(data, dtype=np.uint8).reshape(shape)
        return Image.fromarray(arr, mode=mode)

    def _decode_completion_output(self, obj: dict[str, Any]) -> CompletionOutput:
        """Decode dict to CompletionOutput using msgspec.convert."""
        mm_output = obj.pop("multimodal_output", None)
        co = msgspec.convert(obj, CompletionOutput)
        if mm_output is not None:
            setattr(co, "multimodal_output", mm_output)
        return co

    def _decode_request_output(self, obj: dict[str, Any]) -> RequestOutput:
        """Decode dict to RequestOutput.

        RequestOutput is not a dataclass, so msgspec.convert doesn't work.
        We construct it manually, passing all known fields via **kwargs.
        """
        # Extract multimodal_output before constructing (it's dynamically added)
        mm_output = obj.pop("multimodal_output", None)

        # RequestOutput.__init__ accepts **kwargs for forward compatibility
        ro = RequestOutput(**obj)

        # Restore dynamically added multimodal_output attribute
        if mm_output is not None:
            setattr(ro, "multimodal_output", mm_output)
        return ro


class OmniSerde:
    """Serialization/deserialization handler for Omni IPC."""

    def __init__(self):
        self.encoder = OmniMsgpackEncoder()
        self.decoder = OmniMsgpackDecoder()

    def serialize(self, obj: Any) -> bytes:
        """Serialize an object to bytes."""
        return self.encoder.encode(obj)

    def deserialize(self, data: bytes | bytearray | memoryview) -> Any:
        """Deserialize bytes to an object."""
        return self.decoder.decode(data)


# Global instance for simple interface
OmniSerializer = OmniSerde()