"vscode:/vscode.git/clone" did not exist on "9fd2906bd13caf4d4142c14285d5bbd9fdebd613"
Unverified Commit d0a64c7e authored by b8zhong's avatar b8zhong Committed by GitHub
Browse files

vlm: enforce pybase64 for image and str encode/decode (#10700)

parent 05d3667a
...@@ -75,12 +75,6 @@ CAT_SHORT2LONG = { ...@@ -75,12 +75,6 @@ CAT_SHORT2LONG = {
} }
# DATA SAVING
def save_json(filename, ds):
with open(filename, "w") as f:
json.dump(ds, f, indent=4)
def get_multi_choice_info(options): def get_multi_choice_info(options):
""" """
Given the list of options for multiple choice question Given the list of options for multiple choice question
......
...@@ -6,7 +6,6 @@ python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b- ...@@ -6,7 +6,6 @@ python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-
python3 llava_onevision_server.py python3 llava_onevision_server.py
""" """
import base64
import io import io
import os import os
import sys import sys
...@@ -14,6 +13,7 @@ import time ...@@ -14,6 +13,7 @@ import time
import numpy as np import numpy as np
import openai import openai
import pybase64
import requests import requests
from decord import VideoReader, cpu from decord import VideoReader, cpu
from PIL import Image from PIL import Image
...@@ -213,7 +213,7 @@ def prepare_video_messages(video_path): ...@@ -213,7 +213,7 @@ def prepare_video_messages(video_path):
pil_img = Image.fromarray(frame) pil_img = Image.fromarray(frame)
buff = io.BytesIO() buff = io.BytesIO()
pil_img.save(buff, format="JPEG") pil_img.save(buff, format="JPEG")
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
base64_frames.append(base64_str) base64_frames.append(base64_str)
messages = [{"role": "user", "content": []}] messages = [{"role": "user", "content": []}]
......
...@@ -31,7 +31,10 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union ...@@ -31,7 +31,10 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
import aiohttp import aiohttp
import numpy as np import numpy as np
import pybase64
import requests import requests
from datasets import load_dataset
from PIL import Image
from tqdm.asyncio import tqdm from tqdm.asyncio import tqdm
from transformers import ( from transformers import (
AutoProcessor, AutoProcessor,
...@@ -1020,14 +1023,6 @@ def sample_mmmu_requests( ...@@ -1020,14 +1023,6 @@ def sample_mmmu_requests(
Returns: Returns:
List of tuples (prompt, prompt_token_len, output_token_len). List of tuples (prompt, prompt_token_len, output_token_len).
""" """
try:
import io
import pybase64
from datasets import load_dataset
except ImportError:
raise ImportError("Please install datasets: pip install datasets")
print("Loading MMMU dataset from HuggingFace...") print("Loading MMMU dataset from HuggingFace...")
try: try:
...@@ -1396,13 +1391,6 @@ def sample_image_requests( ...@@ -1396,13 +1391,6 @@ def sample_image_requests(
- Text lengths follow the 'random' dataset sampling rule. ``prompt_len`` - Text lengths follow the 'random' dataset sampling rule. ``prompt_len``
only counts text tokens and excludes image data. only counts text tokens and excludes image data.
""" """
try:
import pybase64
from PIL import Image
except ImportError as e:
raise ImportError(
"Please install Pillow to generate random images: pip install pillow"
) from e
# Parse resolution (supports presets and 'heightxwidth') # Parse resolution (supports presets and 'heightxwidth')
width, height = parse_image_resolution(image_resolution) width, height = parse_image_resolution(image_resolution)
......
import base64
import pickle import pickle
import time import time
from pathlib import Path from pathlib import Path
from typing import Any, List, Optional from typing import Any, List, Optional
import pybase64
import torch import torch
from sglang.srt.utils import MultiprocessingSerializer from sglang.srt.utils import MultiprocessingSerializer
...@@ -77,14 +77,16 @@ class NaiveDistributed: ...@@ -77,14 +77,16 @@ class NaiveDistributed:
) )
_get_path(self._rank).write_text( _get_path(self._rank).write_text(
base64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix pybase64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
) )
def _read_one(interesting_rank: int): def _read_one(interesting_rank: int):
p = _get_path(interesting_rank) p = _get_path(interesting_rank)
while True: while True:
if p.exists() and (text := p.read_text()).endswith(text_postfix): if p.exists() and (text := p.read_text()).endswith(text_postfix):
return pickle.loads(base64.b64decode(text[: -len(text_postfix)])) return pickle.loads(
pybase64.b64decode(text[: -len(text_postfix)], validate=True)
)
time.sleep(0.001) time.sleep(0.001)
return [ return [
......
...@@ -872,9 +872,9 @@ def get_image_bytes(image_file: Union[str, bytes]): ...@@ -872,9 +872,9 @@ def get_image_bytes(image_file: Union[str, bytes]):
return f.read() return f.read()
elif image_file.startswith("data:"): elif image_file.startswith("data:"):
image_file = image_file.split(",")[1] image_file = image_file.split(",")[1]
return pybase64.b64decode(image_file) return pybase64.b64decode(image_file, validate=True)
elif isinstance(image_file, str): elif isinstance(image_file, str):
return pybase64.b64decode(image_file) return pybase64.b64decode(image_file, validate=True)
else: else:
raise NotImplementedError(f"Invalid image: {image_file}") raise NotImplementedError(f"Invalid image: {image_file}")
...@@ -911,7 +911,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True): ...@@ -911,7 +911,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
vr = VideoReader(tmp_file.name, ctx=ctx) vr = VideoReader(tmp_file.name, ctx=ctx)
elif video_file.startswith("data:"): elif video_file.startswith("data:"):
_, encoded = video_file.split(",", 1) _, encoded = video_file.split(",", 1)
video_bytes = pybase64.b64decode(encoded) video_bytes = pybase64.b64decode(encoded, validate=True)
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
tmp_file.write(video_bytes) tmp_file.write(video_bytes)
tmp_file.close() tmp_file.close()
...@@ -919,7 +919,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True): ...@@ -919,7 +919,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
elif os.path.isfile(video_file): elif os.path.isfile(video_file):
vr = VideoReader(video_file, ctx=ctx) vr = VideoReader(video_file, ctx=ctx)
else: else:
video_bytes = pybase64.b64decode(video_file) video_bytes = pybase64.b64decode(video_file, validate=True)
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
tmp_file.write(video_bytes) tmp_file.write(video_bytes)
tmp_file.close() tmp_file.close()
...@@ -2083,7 +2083,7 @@ class MultiprocessingSerializer: ...@@ -2083,7 +2083,7 @@ class MultiprocessingSerializer:
if output_str: if output_str:
# Convert bytes to base64-encoded string # Convert bytes to base64-encoded string
output = pybase64.b64encode(output).decode("utf-8") pybase64.b64encode(output).decode("utf-8")
return output return output
......
...@@ -4,7 +4,6 @@ python3 -m unittest test_bnb.TestVisionModel.test_vlm ...@@ -4,7 +4,6 @@ python3 -m unittest test_bnb.TestVisionModel.test_vlm
python3 -m unittest test_bnb.TestLanguageModel.test_mmlu python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
""" """
import base64
import io import io
import json import json
import multiprocessing as mp import multiprocessing as mp
...@@ -15,6 +14,7 @@ from types import SimpleNamespace ...@@ -15,6 +14,7 @@ from types import SimpleNamespace
import numpy as np import numpy as np
import openai import openai
import pybase64
import requests import requests
from PIL import Image from PIL import Image
......
...@@ -3,7 +3,6 @@ Usage: ...@@ -3,7 +3,6 @@ Usage:
python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill
""" """
import base64
import io import io
import os import os
import unittest import unittest
...@@ -11,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor ...@@ -11,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor
from typing import Union from typing import Union
import numpy as np import numpy as np
import pybase64
import requests import requests
from PIL import Image from PIL import Image
...@@ -45,7 +45,7 @@ class TestVisionChunkedPrefill(CustomTestCase): ...@@ -45,7 +45,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
pil_img = Image.fromarray(frame) pil_img = Image.fromarray(frame)
buff = io.BytesIO() buff = io.BytesIO()
pil_img.save(buff, format="JPEG") pil_img.save(buff, format="JPEG")
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
base64_frames.append(base64_str) base64_frames.append(base64_str)
messages = [{"role": "user", "content": []}] messages = [{"role": "user", "content": []}]
......
import base64
import io import io
import os import os
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import numpy as np import numpy as np
import openai import openai
import pybase64
import requests import requests
from PIL import Image from PIL import Image
...@@ -386,7 +386,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase): ...@@ -386,7 +386,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
pil_img = Image.fromarray(frame) pil_img = Image.fromarray(frame)
buff = io.BytesIO() buff = io.BytesIO()
pil_img.save(buff, format="JPEG") pil_img.save(buff, format="JPEG")
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
base64_frames.append(base64_str) base64_frames.append(base64_str)
messages = [{"role": "user", "content": []}] messages = [{"role": "user", "content": []}]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment