Unverified Commit d0a64c7e authored by b8zhong's avatar b8zhong Committed by GitHub
Browse files

vlm: enforce pybase64 for image and str encode/decode (#10700)

parent 05d3667a
......@@ -75,12 +75,6 @@ CAT_SHORT2LONG = {
}
# DATA SAVING
def save_json(filename, ds):
with open(filename, "w") as f:
json.dump(ds, f, indent=4)
def get_multi_choice_info(options):
"""
Given the list of options for multiple choice question
......
......@@ -6,7 +6,6 @@ python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-
python3 llava_onevision_server.py
"""
import base64
import io
import os
import sys
......@@ -14,6 +13,7 @@ import time
import numpy as np
import openai
import pybase64
import requests
from decord import VideoReader, cpu
from PIL import Image
......@@ -213,7 +213,7 @@ def prepare_video_messages(video_path):
pil_img = Image.fromarray(frame)
buff = io.BytesIO()
pil_img.save(buff, format="JPEG")
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
base64_frames.append(base64_str)
messages = [{"role": "user", "content": []}]
......
......@@ -31,7 +31,10 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
import aiohttp
import numpy as np
import pybase64
import requests
from datasets import load_dataset
from PIL import Image
from tqdm.asyncio import tqdm
from transformers import (
AutoProcessor,
......@@ -1020,14 +1023,6 @@ def sample_mmmu_requests(
Returns:
List of tuples (prompt, prompt_token_len, output_token_len).
"""
try:
import io
import pybase64
from datasets import load_dataset
except ImportError:
raise ImportError("Please install datasets: pip install datasets")
print("Loading MMMU dataset from HuggingFace...")
try:
......@@ -1396,13 +1391,6 @@ def sample_image_requests(
- Text lengths follow the 'random' dataset sampling rule. ``prompt_len``
only counts text tokens and excludes image data.
"""
try:
import pybase64
from PIL import Image
except ImportError as e:
raise ImportError(
"Please install Pillow to generate random images: pip install pillow"
) from e
# Parse resolution (supports presets and 'heightxwidth')
width, height = parse_image_resolution(image_resolution)
......
import base64
import pickle
import time
from pathlib import Path
from typing import Any, List, Optional
import pybase64
import torch
from sglang.srt.utils import MultiprocessingSerializer
......@@ -77,14 +77,16 @@ class NaiveDistributed:
)
_get_path(self._rank).write_text(
base64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
pybase64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
)
def _read_one(interesting_rank: int):
p = _get_path(interesting_rank)
while True:
if p.exists() and (text := p.read_text()).endswith(text_postfix):
return pickle.loads(base64.b64decode(text[: -len(text_postfix)]))
return pickle.loads(
pybase64.b64decode(text[: -len(text_postfix)], validate=True)
)
time.sleep(0.001)
return [
......
......@@ -872,9 +872,9 @@ def get_image_bytes(image_file: Union[str, bytes]):
return f.read()
elif image_file.startswith("data:"):
image_file = image_file.split(",")[1]
return pybase64.b64decode(image_file)
return pybase64.b64decode(image_file, validate=True)
elif isinstance(image_file, str):
return pybase64.b64decode(image_file)
return pybase64.b64decode(image_file, validate=True)
else:
raise NotImplementedError(f"Invalid image: {image_file}")
......@@ -911,7 +911,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
vr = VideoReader(tmp_file.name, ctx=ctx)
elif video_file.startswith("data:"):
_, encoded = video_file.split(",", 1)
video_bytes = pybase64.b64decode(encoded)
video_bytes = pybase64.b64decode(encoded, validate=True)
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
tmp_file.write(video_bytes)
tmp_file.close()
......@@ -919,7 +919,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
elif os.path.isfile(video_file):
vr = VideoReader(video_file, ctx=ctx)
else:
video_bytes = pybase64.b64decode(video_file)
video_bytes = pybase64.b64decode(video_file, validate=True)
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
tmp_file.write(video_bytes)
tmp_file.close()
......@@ -2083,7 +2083,7 @@ class MultiprocessingSerializer:
if output_str:
# Convert bytes to base64-encoded string
output = pybase64.b64encode(output).decode("utf-8")
pybase64.b64encode(output).decode("utf-8")
return output
......
......@@ -4,7 +4,6 @@ python3 -m unittest test_bnb.TestVisionModel.test_vlm
python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
"""
import base64
import io
import json
import multiprocessing as mp
......@@ -15,6 +14,7 @@ from types import SimpleNamespace
import numpy as np
import openai
import pybase64
import requests
from PIL import Image
......
......@@ -3,7 +3,6 @@ Usage:
python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill
"""
import base64
import io
import os
import unittest
......@@ -11,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor
from typing import Union
import numpy as np
import pybase64
import requests
from PIL import Image
......@@ -45,7 +45,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
pil_img = Image.fromarray(frame)
buff = io.BytesIO()
pil_img.save(buff, format="JPEG")
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
base64_frames.append(base64_str)
messages = [{"role": "user", "content": []}]
......
import base64
import io
import os
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import openai
import pybase64
import requests
from PIL import Image
......@@ -386,7 +386,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
pil_img = Image.fromarray(frame)
buff = io.BytesIO()
pil_img.save(buff, format="JPEG")
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
base64_frames.append(base64_str)
messages = [{"role": "user", "content": []}]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment