Unverified Commit e2c8f1ed authored by Andrew Sansom's avatar Andrew Sansom Committed by GitHub
Browse files

[PERF] Use pybase64 to more quickly decode prompt embeddings (#22469)


Signed-off-by: default avatarAndrew Sansom <andrew@protopia.ai>
parent 1ee5ead5
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import base64
import io
import json
import sys
......@@ -12,6 +11,7 @@ from http import HTTPStatus
from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
TypeVar, Union, cast, overload)
import pybase64
import torch
from fastapi import Request
from pydantic import BaseModel, ConfigDict, Field
......@@ -1008,7 +1008,8 @@ class OpenAIServing:
) -> list[EmbedsPrompt]:
def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
tensor = torch.load(io.BytesIO(base64.b64decode(embed)),
tensor = torch.load(io.BytesIO(
pybase64.b64decode(embed, validate=True)),
weights_only=True)
assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
torch.float32,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment