tokenizer_manager.py 22.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
"""
Copyright 2023-2024 SGLang Team
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Lianmin Zheng's avatar
Lianmin Zheng committed
16
"""TokenizerManager is a process that tokenizes the text."""
17

Lianmin Zheng's avatar
Lianmin Zheng committed
18
19
20
import asyncio
import concurrent.futures
import dataclasses
21
import logging
22
import multiprocessing as mp
Lianmin Zheng's avatar
Lianmin Zheng committed
23
import os
24
from typing import Dict, List, Tuple, Union
Lianmin Zheng's avatar
Lianmin Zheng committed
25
26
27
28
29
30

import numpy as np
import transformers
import uvloop
import zmq
import zmq.asyncio
31
from fastapi import BackgroundTasks
Liangsheng Yin's avatar
Liangsheng Yin committed
32

Lianmin Zheng's avatar
Lianmin Zheng committed
33
34
35
36
37
38
39
from sglang.srt.hf_transformers_utils import (
    get_config,
    get_context_length,
    get_processor,
    get_tokenizer,
)
from sglang.srt.managers.io_struct import (
40
    AbortReq,
41
    BatchEmbeddingOut,
Lianmin Zheng's avatar
Lianmin Zheng committed
42
    BatchStrOut,
43
    BatchTokenIDOut,
44
    EmbeddingReqInput,
45
    FlushCacheReq,
Lianmin Zheng's avatar
Lianmin Zheng committed
46
    GenerateReqInput,
47
    TokenizedEmbeddingReqInput,
Lianmin Zheng's avatar
Lianmin Zheng committed
48
49
    TokenizedGenerateReqInput,
)
shiyi.c_98's avatar
shiyi.c_98 committed
50
from sglang.srt.mm_utils import expand2square, process_anyres_image
Lianmin Zheng's avatar
Lianmin Zheng committed
51
52
from sglang.srt.sampling_params import SamplingParams
from sglang.srt.server_args import PortArgs, ServerArgs
53
from sglang.srt.utils import is_generation_model, is_multimodal_model, load_image
54
from sglang.utils import get_exception_traceback
Lianmin Zheng's avatar
Lianmin Zheng committed
55
56
57

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

58
59
logger = logging.getLogger(__name__)

Lianmin Zheng's avatar
Lianmin Zheng committed
60
61
62
63
64
65
66
67
68
69
70
71
72

@dataclasses.dataclass
class ReqState:
    out_list: List
    finished: bool
    event: asyncio.Event


class TokenizerManager:
    def __init__(
        self,
        server_args: ServerArgs,
        port_args: PortArgs,
Yuanhan Zhang's avatar
Yuanhan Zhang committed
73
        model_overide_args: dict = None,
Lianmin Zheng's avatar
Lianmin Zheng committed
74
    ):
Liangsheng Yin's avatar
Liangsheng Yin committed
75
76
        self.server_args = server_args

Lianmin Zheng's avatar
Lianmin Zheng committed
77
78
79
80
81
        context = zmq.asyncio.Context(2)
        self.recv_from_detokenizer = context.socket(zmq.PULL)
        self.recv_from_detokenizer.bind(f"tcp://127.0.0.1:{port_args.tokenizer_port}")

        self.send_to_router = context.socket(zmq.PUSH)
Mingyi's avatar
Mingyi committed
82
        self.send_to_router.connect(f"tcp://127.0.0.1:{port_args.controller_port}")
Lianmin Zheng's avatar
Lianmin Zheng committed
83
84

        self.model_path = server_args.model_path
85
        self.served_model_name = server_args.served_model_name
Lianmin Zheng's avatar
Lianmin Zheng committed
86
        self.hf_config = get_config(
Yuanhan Zhang's avatar
Yuanhan Zhang committed
87
88
89
            self.model_path,
            trust_remote_code=server_args.trust_remote_code,
            model_overide_args=model_overide_args,
Lianmin Zheng's avatar
Lianmin Zheng committed
90
        )
91
        self.is_generation = is_generation_model(self.hf_config.architectures)
92

93
94
95
96
        if server_args.context_length is not None:
            self.context_len = server_args.context_length
        else:
            self.context_len = get_context_length(self.hf_config)
Lianmin Zheng's avatar
Lianmin Zheng committed
97
98
99
100
101
102
103
104
105
106

        if is_multimodal_model(self.model_path):
            self.processor = get_processor(
                server_args.tokenizer_path,
                tokenizer_mode=server_args.tokenizer_mode,
                trust_remote_code=server_args.trust_remote_code,
            )
            self.tokenizer = self.processor.tokenizer
            os.environ["TOKENIZERS_PARALLELISM"] = "false"
            self.executor = concurrent.futures.ProcessPoolExecutor(
107
108
109
                initializer=init_global_processor,
                mp_context=mp.get_context("fork"),
                initargs=(server_args,),
Lianmin Zheng's avatar
Lianmin Zheng committed
110
111
112
113
114
115
116
117
118
            )
        else:
            self.tokenizer = get_tokenizer(
                server_args.tokenizer_path,
                tokenizer_mode=server_args.tokenizer_mode,
                trust_remote_code=server_args.trust_remote_code,
            )

        self.to_create_loop = True
119
        self.rid_to_state: Dict[str, ReqState] = {}
Lianmin Zheng's avatar
Lianmin Zheng committed
120
121

    async def get_pixel_values(self, image_data):
Ying Sheng's avatar
Ying Sheng committed
122
        aspect_ratio = getattr(self.hf_config, "image_aspect_ratio", None)
123
124
125
        grid_pinpoints = (
            self.hf_config.image_grid_pinpoints if aspect_ratio == "anyres" else None
        )
Lianmin Zheng's avatar
Lianmin Zheng committed
126
127
128
        if self.executor is not None:
            loop = asyncio.get_event_loop()
            return await loop.run_in_executor(
129
130
131
132
133
                self.executor,
                get_pixel_values,
                image_data,
                aspect_ratio,
                grid_pinpoints,
Lianmin Zheng's avatar
Lianmin Zheng committed
134
135
            )
        else:
136
137
138
            return get_pixel_values(
                image_data, aspect_ratio, grid_pinpoints, self.processor
            )
Lianmin Zheng's avatar
Lianmin Zheng committed
139

140
141
142
    async def generate_request(
        self, obj: Union[GenerateReqInput, EmbeddingReqInput], request=None
    ):
Lianmin Zheng's avatar
Lianmin Zheng committed
143
        if self.to_create_loop:
144
            self.create_handle_loop()
Lianmin Zheng's avatar
Lianmin Zheng committed
145

146
        obj.post_init()
147
148
        is_single = obj.is_single

149
150
151
152
        if is_single:
            async for response in self._handle_single_request(obj, request):
                yield response
        else:
153
154
            if isinstance(obj, EmbeddingReqInput):
                raise NotImplementedError("Please send only one prompt in each request")
155
156
            if obj.stream:
                raise ValueError("Do not support stream for batch mode.")
157

158
159
            async for response in self._handle_batch_request(obj, request):
                yield response
160

161
    async def _handle_single_request(
162
163
164
165
166
        self,
        obj: Union[GenerateReqInput, EmbeddingReqInput],
        request,
        index=None,
        is_cache_for_prefill=False,
167
    ):
yichuan~'s avatar
yichuan~ committed
168
169
170
        if not is_cache_for_prefill:  # The normal case with a single prompt
            not_use_index = index is None

171
172
            rid = obj.rid if not_use_index else obj.rid[index]
            input_text = obj.text if not_use_index else obj.text[index]
173
174
175
176
            if obj.input_ids is None:
                input_ids = self.tokenizer.encode(input_text)
            else:
                input_ids = obj.input_ids if not_use_index else obj.input_ids[index]
Lianmin Zheng's avatar
Lianmin Zheng committed
177

178
            self._validate_input_length(input_ids)
179

180
            sampling_params = self._get_sampling_params(
181
                obj.sampling_params if not_use_index else obj.sampling_params[index]
182
            )
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

            if self.is_generation:
                pixel_values, image_hash, image_size = await self._get_pixel_values(
                    obj.image_data if not_use_index else obj.image_data[index]
                )
                return_logprob = (
                    obj.return_logprob if not_use_index else obj.return_logprob[index]
                )
                logprob_start_len = (
                    obj.logprob_start_len
                    if not_use_index
                    else obj.logprob_start_len[index]
                )
                top_logprobs_num = (
                    obj.top_logprobs_num
                    if not_use_index
                    else obj.top_logprobs_num[index]
                )
yichuan~'s avatar
yichuan~ committed
201
        else:  # A prefill request to cache the common prompt for parallel sampling
202
            assert self.is_generation
yichuan~'s avatar
yichuan~ committed
203
204
205
206
207
208
209
210
            if obj.text is not None:
                if isinstance(obj.text, list):
                    input_text = obj.text[index]
                    rid = obj.rid[index]
                else:
                    input_text = obj.text
                    rid = obj.rid[0]
                input_ids = self.tokenizer.encode(input_text)
211
            else:
yichuan~'s avatar
yichuan~ committed
212
213
214
215
216
217
218
219
220
221
222
                input_text = None
                if isinstance(obj.input_ids, list) and isinstance(
                    obj.input_ids[0], list
                ):
                    # when obj["input_ids"] is List[List[int]]
                    input_ids = obj.input_ids[index]
                    rid = obj.rid[index]
                else:
                    input_ids = obj.input_ids
                    rid = obj.rid[0]

223
224
225
226
227
228
229
230
            sampling_params = SamplingParams(**obj.sampling_params[0])
            sampling_params.max_new_tokens = 0
            pixel_values, image_hash, image_size = await self._get_pixel_values(
                obj.image_data[0]
            )
            return_logprob = obj.return_logprob[0]
            logprob_start_len = obj.logprob_start_len[0]
            top_logprobs_num = obj.top_logprobs_num[0]
Lianmin Zheng's avatar
Lianmin Zheng committed
231

232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
        if self.is_generation:
            tokenized_obj = TokenizedGenerateReqInput(
                rid,
                input_text,
                input_ids,
                pixel_values,
                image_hash,
                image_size,
                sampling_params,
                return_logprob,
                logprob_start_len,
                top_logprobs_num,
                obj.stream,
            )
        else:  # is embedding
            tokenized_obj = TokenizedEmbeddingReqInput(
                rid,
                input_text,
                input_ids,
                sampling_params,
            )

254
255
256
257
258
        self.send_to_router.send_pyobj(tokenized_obj)

        event = asyncio.Event()
        state = ReqState([], False, event)
        self.rid_to_state[rid] = state
259
        if not is_cache_for_prefill:
260
261
262
263
            async for response in self._wait_for_response(
                event, state, obj, rid, request
            ):
                yield response
264
        else:
Ying Sheng's avatar
Ying Sheng committed
265
            assert self.is_generation
266
267
            await self._wait_for_cache_prefill_response(event, state, obj, rid, request)
            yield input_ids
268

269
    async def _handle_batch_request(self, obj: GenerateReqInput, request):
270
        batch_size = obj.batch_size
271
        parallel_sample_num = obj.parallel_sample_num
272
273

        if parallel_sample_num != 1:
274
            # Send prefill requests to cache the common input
275
276
277
278
            parallel_sample_num += 1
            input_id_result = [] if obj.input_ids is None else None
            for i in range(batch_size):
                async for input_id in self._handle_single_request(
279
                    obj, request, index=i, is_cache_for_prefill=True
280
281
282
                ):
                    if input_id_result is not None:
                        input_id_result.append(input_id)
yichuan~'s avatar
yichuan~ committed
283
            if input_id_result is not None and len(input_id_result) > 1:
284
285
286
                obj.input_ids = input_id_result
            elif input_id_result is not None:
                obj.input_ids = input_id_result[0]
yichuan~'s avatar
yichuan~ committed
287

288
289
290
291
        # First send out all requests
        for i in range(batch_size):
            for j in range(parallel_sample_num):
                if j == 0 and parallel_sample_num != 1:
292
                    continue
293
294
                index = i * parallel_sample_num + j
                if parallel_sample_num != 1:
295
                    # Here when using parallel sampling we should consider prefill stage so the index is :  j + i * (parallel_sample_num-1) + batch_size - 1
296
297
298
299
300
301
302
303
304
305
306
                    index += batch_size - 1 - i
                rid = obj.rid[index]
                if parallel_sample_num == 1:
                    ## select operation
                    if obj.input_ids is None:
                        input_text = obj.text[i]
                        input_ids = self.tokenizer.encode(obj.text[i])
                    else:
                        input_text = None
                        input_ids = obj.input_ids[i]
                else:
yichuan~'s avatar
yichuan~ committed
307
                    assert obj.input_ids is not None
308
                    if batch_size == 1:
yichuan~'s avatar
yichuan~ committed
309
                        input_text = None
310
311
                        input_ids = obj.input_ids
                    else:
yichuan~'s avatar
yichuan~ committed
312
                        input_text = None
313
314
315
316
                        input_ids = obj.input_ids[i]
                sampling_params = self._get_sampling_params(obj.sampling_params[index])
                pixel_values, image_hash, image_size = await self._get_pixel_values(
                    obj.image_data[index]
Liangsheng Yin's avatar
Liangsheng Yin committed
317
                )
318

Lianmin Zheng's avatar
Lianmin Zheng committed
319
                tokenized_obj = TokenizedGenerateReqInput(
320
321
322
323
324
325
326
327
328
329
330
                    rid,
                    input_text,
                    input_ids,
                    pixel_values,
                    image_hash,
                    image_size,
                    sampling_params,
                    obj.return_logprob[index],
                    obj.logprob_start_len[index],
                    obj.top_logprobs_num[index],
                    obj.stream,
Lianmin Zheng's avatar
Lianmin Zheng committed
331
332
333
334
                )
                self.send_to_router.send_pyobj(tokenized_obj)

                event = asyncio.Event()
335
                state = ReqState([], False, event)
Lianmin Zheng's avatar
Lianmin Zheng committed
336
                self.rid_to_state[rid] = state
337
338
339
340
341
342
343
344
345
346
        # Then wait for all responses
        output_list = []
        for i in range(batch_size):
            for j in range(parallel_sample_num):
                if j == 0 and parallel_sample_num != 1:
                    continue
                index = i * parallel_sample_num + j
                if parallel_sample_num != 1:
                    index += batch_size - 1 - i
                rid = obj.rid[index]
Lianmin Zheng's avatar
Lianmin Zheng committed
347
                state = self.rid_to_state[rid]
348
349
350

                while True:
                    try:
351
                        await asyncio.wait_for(state.event.wait(), timeout=4)
352
353
354
355
356
357
358
                        break
                    except asyncio.TimeoutError:
                        if request is not None and await request.is_disconnected():
                            for rid in obj.rid:
                                self.abort_request(rid)
                            raise ValueError(f"Abort request {rid}")
                        continue
359
                output_list.append(
Liangsheng Yin's avatar
Liangsheng Yin committed
360
361
                    self.convert_logprob_style(
                        state.out_list[-1],
362
363
                        obj.return_logprob[index],
                        obj.top_logprobs_num[index],
Liangsheng Yin's avatar
Liangsheng Yin committed
364
365
366
                        obj.return_text_in_logprobs,
                    )
                )
Lianmin Zheng's avatar
Lianmin Zheng committed
367
368
                assert state.finished
                del self.rid_to_state[rid]
369
370
        yield output_list

371
    def _validate_input_length(self, input_ids: List[int]):
372
373
374
375
376
377
        if len(input_ids) >= self.context_len:
            raise ValueError(
                f"The input ({len(input_ids)} tokens) is longer than the "
                f"model's context length ({self.context_len} tokens)."
            )

378
    def _get_sampling_params(self, sampling_params_data: dict):
379
380
381
382
383
384
385
386
387
388
389
390
391
392
        sampling_params = SamplingParams(**sampling_params_data)
        if sampling_params.max_new_tokens != 0:
            sampling_params.normalize(self.tokenizer)
            sampling_params.verify()
        return sampling_params

    async def _get_pixel_values(self, image_data):
        if isinstance(image_data, list) and len(image_data) > 0:
            return await self.get_pixel_values(image_data[0])
        elif isinstance(image_data, str):
            return await self.get_pixel_values(image_data)
        else:
            return None, None, None

393
394
395
396
    async def _wait_for_response(
        self,
        event: asyncio.Event,
        state: ReqState,
397
        obj: Union[GenerateReqInput, EmbeddingReqInput],
398
399
400
        rid: str,
        request,
    ):
401
402
403
404
405
406
407
408
409
        while True:
            try:
                await asyncio.wait_for(event.wait(), timeout=4)
            except asyncio.TimeoutError:
                if request is not None and await request.is_disconnected():
                    self.abort_request(rid)
                    raise ValueError(f"Abort request {rid}")
                continue

410
411
412
413
414
415
416
417
418
            if self.is_generation:
                out = self.convert_logprob_style(
                    state.out_list[-1],
                    obj.return_logprob,
                    obj.top_logprobs_num,
                    obj.return_text_in_logprobs,
                )
            else:  # isinstance(obj, EmbeddingReqInput)
                out = state.out_list[-1]
419

Ying Sheng's avatar
Ying Sheng committed
420
            # Log requests
421
            if self.server_args.log_requests and state.finished:
Ying Sheng's avatar
Ying Sheng committed
422
423
424
425
426
                if obj.text is None:
                    in_obj = {"text": self.tokenizer.decode(obj.input_ids)}
                else:
                    in_obj = {"text": obj.text}
                logger.info(f"in={in_obj}, out={out}")
427
428
429
430
431
432
433
434
435
436

            state.out_list = []
            if state.finished:
                del self.rid_to_state[rid]
                yield out
                break

            event.clear()
            yield out

437
438
439
440
441
442
443
444
    async def _wait_for_cache_prefill_response(
        self,
        event: asyncio.Event,
        state: ReqState,
        obj: GenerateReqInput,
        rid: str,
        request,
    ):
445
446
447
448
449
450
451
452
453
454
455
456
457
        while True:
            try:
                await asyncio.wait_for(state.event.wait(), timeout=4)
                break
            except asyncio.TimeoutError:
                if request is not None and await request.is_disconnected():
                    for rid in obj.rid:
                        self.abort_request(rid)
                    raise ValueError(f"Abort request {rid}")
                continue

        assert state.finished
        del self.rid_to_state[rid]
Lianmin Zheng's avatar
Lianmin Zheng committed
458

459
460
461
    def flush_cache(self):
        req = FlushCacheReq()
        self.send_to_router.send_pyobj(req)
Liangsheng Yin's avatar
Liangsheng Yin committed
462

463
    def abort_request(self, rid: str):
464
465
        if rid not in self.rid_to_state:
            return
466
467
468
469
        del self.rid_to_state[rid]
        req = AbortReq(rid)
        self.send_to_router.send_pyobj(req)

Lianmin Zheng's avatar
Lianmin Zheng committed
470
    def create_abort_task(self, obj: GenerateReqInput):
471
472
473
474
475
476
477
478
479
480
481
482
483
        # Abort the request if the client is disconnected.
        async def abort_request():
            await asyncio.sleep(3)
            if obj.is_single:
                self.abort_request(obj.rid)
            else:
                for rid in obj.rids:
                    self.abort_request(rid)

        background_tasks = BackgroundTasks()
        background_tasks.add_task(abort_request)
        return background_tasks

484
    def create_handle_loop(self):
Lianmin Zheng's avatar
Lianmin Zheng committed
485
486
487
488
489
490
        self.to_create_loop = False
        loop = asyncio.get_event_loop()
        loop.create_task(self.handle_loop())

    async def handle_loop(self):
        while True:
491
492
493
494
            recv_obj: Union[BatchStrOut, BatchEmbeddingOut] = (
                await self.recv_from_detokenizer.recv_pyobj()
            )
            assert isinstance(recv_obj, (BatchStrOut, BatchEmbeddingOut))
Lianmin Zheng's avatar
Lianmin Zheng committed
495

496
497
498
499
500
501
            for i, rid in enumerate(recv_obj.rids):
                state = self.rid_to_state.get(rid, None)
                if state is None:
                    continue

                recv_obj.meta_info[i]["id"] = rid
502
503
504
505
506
507
508
509
510
511
512
                if isinstance(recv_obj, BatchStrOut):
                    out_dict = {
                        "text": recv_obj.output_strs[i],
                        "meta_info": recv_obj.meta_info[i],
                    }
                else:
                    assert isinstance(recv_obj, BatchEmbeddingOut)
                    out_dict = {
                        "embedding": recv_obj.embeddings[i],
                        "meta_info": recv_obj.meta_info[i],
                    }
513
514
515
                state.out_list.append(out_dict)
                state.finished = recv_obj.finished_reason[i] is not None
                state.event.set()
516

Liangsheng Yin's avatar
Liangsheng Yin committed
517
    def convert_logprob_style(
518
519
520
521
522
        self,
        ret: dict,
        return_logprob: bool,
        top_logprobs_num: int,
        return_text_in_logprobs: bool,
Liangsheng Yin's avatar
Liangsheng Yin committed
523
    ):
524
        if return_logprob:
525
526
            ret["meta_info"]["input_token_logprobs"] = self.detokenize_logprob_tokens(
                ret["meta_info"]["input_token_logprobs"], return_text_in_logprobs
527
            )
528
529
            ret["meta_info"]["output_token_logprobs"] = self.detokenize_logprob_tokens(
                ret["meta_info"]["output_token_logprobs"], return_text_in_logprobs
530
            )
531
532

            if top_logprobs_num > 0:
533
                ret["meta_info"]["input_top_logprobs"] = (
zhyncs's avatar
zhyncs committed
534
                    self.detokenize_top_logprobs_tokens(
535
                        ret["meta_info"]["input_top_logprobs"],
zhyncs's avatar
zhyncs committed
536
537
                        return_text_in_logprobs,
                    )
538
                )
539
                ret["meta_info"]["output_top_logprobs"] = (
zhyncs's avatar
zhyncs committed
540
                    self.detokenize_top_logprobs_tokens(
541
                        ret["meta_info"]["output_top_logprobs"], return_text_in_logprobs
zhyncs's avatar
zhyncs committed
542
                    )
543
                )
544
545
        return ret

546
547
548
    def detokenize_logprob_tokens(
        self, token_logprobs: List[Tuple[float, int]], decode_to_text: bool
    ):
549
550
551
552
553
554
555
556
557
558
        if not decode_to_text:
            return [(logprob, token_id, None) for logprob, token_id in token_logprobs]

        token_ids = [tid for _, tid in token_logprobs]
        token_texts = self.tokenizer.batch_decode(token_ids)
        return [
            (logprob, token_id, token_text)
            for (logprob, token_id), token_text, in zip(token_logprobs, token_texts)
        ]

559
    def detokenize_top_logprobs_tokens(self, top_logprobs, decode_to_text: bool):
560
561
562
563
564
565
566
        # TODO: The current implementation only batches the detokenization for top-k tokens per single position.
        # We should batch all top-k tokens in all positions.
        for i, token_top_logprobs in enumerate(top_logprobs):
            if token_top_logprobs:
                top_logprobs[i] = self.detokenize_logprob_tokens(
                    token_top_logprobs, decode_to_text
                )
567
        return top_logprobs
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588


global global_processor


def init_global_processor(server_args: ServerArgs):
    global global_processor
    transformers.logging.set_verbosity_error()
    global_processor = get_processor(
        server_args.tokenizer_path,
        tokenizer_mode=server_args.tokenizer_mode,
        trust_remote_code=server_args.trust_remote_code,
    )


def get_pixel_values(
    image_data, image_aspect_ratio=None, image_grid_pinpoints=None, processor=None
):
    try:
        processor = processor or global_processor
        image, image_size = load_image(image_data)
589
        if image_size is not None:
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
            image_hash = hash(image_data)
            pixel_values = processor.image_processor(image)["pixel_values"]
            for _ in range(len(pixel_values)):
                pixel_values[_] = pixel_values[_].astype(np.float16)
            pixel_values = np.stack(pixel_values, axis=0)
            return pixel_values, image_hash, image_size
        else:
            image_hash = hash(image_data)
            if image_aspect_ratio == "pad":
                image = expand2square(
                    image,
                    tuple(int(x * 255) for x in processor.image_processor.image_mean),
                )
                pixel_values = processor.image_processor(image)["pixel_values"][0]
            elif image_aspect_ratio == "anyres":
                pixel_values = process_anyres_image(
                    image, processor.image_processor, image_grid_pinpoints
                )
            else:
                pixel_values = processor.image_processor(image)["pixel_values"][0]
            pixel_values = pixel_values.astype(np.float16)
            return pixel_values, image_hash, image.size
    except Exception:
613
        print("Exception in TokenizerManager:\n" + get_exception_traceback())