Commit 8d75f22e authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.13.0rc1' into v0.13.0rc1-ori

parents ce888aa4 7d80c73d
...@@ -8,6 +8,7 @@ from transformers import AutoModelForSpeechSeq2Seq ...@@ -8,6 +8,7 @@ from transformers import AutoModelForSpeechSeq2Seq
from vllm.logprobs import SampleLogprobs from vllm.logprobs import SampleLogprobs
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.platforms import current_platform
from ....conftest import AudioTestAssets, HfRunner, PromptAudioInput, VllmRunner from ....conftest import AudioTestAssets, HfRunner, PromptAudioInput, VllmRunner
from ...registry import HF_EXAMPLE_MODELS from ...registry import HF_EXAMPLE_MODELS
...@@ -34,6 +35,12 @@ audio_lora_path = MODEL_NAME ...@@ -34,6 +35,12 @@ audio_lora_path = MODEL_NAME
models = [MODEL_NAME] models = [MODEL_NAME]
@pytest.fixture(autouse=True)
def set_attention_backend_for_rocm(monkeypatch):
if current_platform.is_rocm():
monkeypatch.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN")
def run_test( def run_test(
hf_runner: type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner], vllm_runner: type[VllmRunner],
...@@ -111,8 +118,12 @@ def run_test( ...@@ -111,8 +118,12 @@ def run_test(
@pytest.mark.parametrize("model", models) @pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", ["bfloat16"]) @pytest.mark.parametrize(
@pytest.mark.parametrize("max_model_len", [2048]) "dtype", ["float16"] if current_platform.is_rocm() else ["bfloat16"]
)
@pytest.mark.parametrize(
"max_model_len", [512] if current_platform.is_rocm() else [2048]
)
@pytest.mark.parametrize("max_tokens", [128]) @pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10]) @pytest.mark.parametrize("num_logprobs", [10])
def test_models( def test_models(
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
from collections.abc import Sequence
import librosa
import pytest
from huggingface_hub import snapshot_download
from vllm.assets.image import ImageAsset
from vllm.lora.request import LoRARequest
from vllm.multimodal.image import rescale_image_size
from ....conftest import (
IMAGE_ASSETS,
HfRunner,
PromptAudioInput,
PromptImageInput,
VllmRunner,
)
from ....utils import large_gpu_test
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts(
{
"stop_sign": "<|user|>\n<|image|>\nWhat's the content of the image?<|end|>\n<|assistant|>\n", # noqa: E501
"cherry_blossom": "<|user|>\n<|image|>\nPlease infer the season with reason in details.<|end|>\n<|assistant|>\n", # noqa: E501
}
)
HF_MULTIIMAGE_IMAGE_PROMPT = (
"<|user|>\n<|image|>\n<|image|>\nDescribe these images.<|end|>\n<|assistant|>\n" # noqa: E501
)
model_path = snapshot_download(
"microsoft/Phi-4-multimodal-instruct", revision="refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
vision_lora_path = os.path.join(model_path, "vision-lora")
speech_question = os.path.join(
model_path, "examples", "what_is_shown_in_this_image.wav"
)
models = [model_path]
target_dtype = "half"
def run_test(
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
inputs: Sequence[tuple[list[str], PromptImageInput, PromptAudioInput | None]],
model: str,
*,
max_model_len: int,
dtype: str,
max_tokens: int,
num_logprobs: int,
mm_limit: int,
tensor_parallel_size: int,
distributed_executor_backend: str | None = None,
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
Note, the text input is also adjusted to abide by vllm contract.
The text output is sanitized to be able to compare with hf.
"""
# NOTE: take care of the order. run vLLM first, and then run HF.
# vLLM needs a fresh new process without cuda initialization.
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method).
# max_model_len should be greater than image_feature_size
with vllm_runner(
model,
task="generate",
max_model_len=max_model_len,
max_num_seqs=2,
dtype=dtype,
limit_mm_per_prompt={"image": mm_limit},
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enable_lora=True,
max_lora_rank=320,
gpu_memory_utilization=0.8, # set to 0.8 to avoid OOM in CI
enforce_eager=True,
trust_remote_code=False,
) as vllm_model:
lora_request = LoRARequest("vision", 1, vision_lora_path)
vllm_outputs_per_case = [
vllm_model.generate_greedy_logprobs(
prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images,
audios=audios,
lora_request=lora_request,
)
for prompts, images, audios in inputs
]
with hf_runner(model, dtype=dtype) as hf_model:
hf_model.model.load_adapter(
vision_lora_path,
adapter_name="vision",
)
hf_processor = hf_model.processor
eos_token_id = hf_processor.tokenizer.eos_token_id
hf_outputs_per_case = [
hf_model.generate_greedy_logprobs_limit(
prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images,
audios=audios,
eos_token_id=eos_token_id,
)
for prompts, images, audios in inputs
]
for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, vllm_outputs_per_case):
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
"size_factors",
[
# No image
[],
# Single-scale
[1.0],
# Single-scale, batched
[1.0, 1.0, 1.0],
# Multi-scale
[0.25, 0.5, 1.0],
],
)
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_model_len", [12800])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
def test_models(
hf_runner,
vllm_runner,
image_assets,
model,
size_factors,
dtype: str,
max_model_len: int,
max_tokens: int,
num_logprobs: int,
) -> None:
images = [asset.pil_image for asset in image_assets]
inputs_per_image = [
(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
None,
)
for image, prompt in zip(images, HF_IMAGE_PROMPTS)
]
run_test(
hf_runner,
vllm_runner,
inputs_per_image,
model,
dtype=dtype,
max_model_len=max_model_len,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=1,
tensor_parallel_size=1,
)
@large_gpu_test(min_gb=48)
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
"size_factors",
[
# No image
# [],
# Single-scale
[1.0],
# Single-scale, batched
[1.0, 1.0, 1.0],
# Multi-scale
[0.25, 0.5, 1.0],
],
)
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_model_len", [25600])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
def test_multi_images_models(
hf_runner,
vllm_runner,
image_assets,
model,
size_factors,
dtype: str,
max_model_len: int,
max_tokens: int,
num_logprobs: int,
) -> None:
images = [asset.pil_image for asset in image_assets]
inputs_per_case = [
(
[HF_MULTIIMAGE_IMAGE_PROMPT for _ in size_factors],
[
[rescale_image_size(image, factor) for image in images]
for factor in size_factors
],
None,
),
]
run_test(
hf_runner,
vllm_runner,
inputs_per_case,
model,
dtype=dtype,
max_model_len=max_model_len,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=2,
tensor_parallel_size=1,
)
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_model_len", [12800])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [10])
def test_vision_speech_models(
hf_runner,
vllm_runner,
model,
dtype: str,
max_model_len: int,
max_tokens: int,
num_logprobs: int,
) -> None:
# use the example speech question so that the model outputs are reasonable
audio = librosa.load(speech_question, sr=16000)
image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
inputs_vision_speech = [
(
["<|user|><|image|><|audio|><|end|><|assistant|>"],
[image],
[audio],
),
]
run_test(
hf_runner,
vllm_runner,
inputs_vision_speech,
model,
dtype=dtype,
max_model_len=max_model_len,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=1,
tensor_parallel_size=1,
)
...@@ -15,6 +15,7 @@ from transformers import AutoProcessor ...@@ -15,6 +15,7 @@ from transformers import AutoProcessor
from vllm import SamplingParams, TextPrompt, TokensPrompt from vllm import SamplingParams, TextPrompt, TokensPrompt
from vllm.logprobs import Logprob, SampleLogprobs from vllm.logprobs import Logprob, SampleLogprobs
from vllm.multimodal import MultiModalDataBuiltins from vllm.multimodal import MultiModalDataBuiltins
from vllm.platforms import current_platform
from ....utils import VLLM_PATH, large_gpu_test from ....utils import VLLM_PATH, large_gpu_test
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
...@@ -165,6 +166,15 @@ def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs: ...@@ -165,6 +166,15 @@ def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs:
def test_chat( def test_chat(
vllm_runner, max_model_len: int, model: str, dtype: str, local_asset_server vllm_runner, max_model_len: int, model: str, dtype: str, local_asset_server
) -> None: ) -> None:
if (
model == MISTRAL_SMALL_3_1_ID
and max_model_len == 65536
and current_platform.is_rocm()
):
pytest.skip(
"OOM on ROCm: 24B model with 65536 context length exceeds GPU memory"
)
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_CHAT[model]) EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_CHAT[model])
with vllm_runner( with vllm_runner(
model, model,
......
...@@ -62,6 +62,65 @@ def get_filtered_test_settings( ...@@ -62,6 +62,65 @@ def get_filtered_test_settings(
return matching_tests return matching_tests
def get_model_type_cases(
model_type: str,
test_info: VLMTestInfo,
test_type: VLMTestType,
):
# Ensure that something is wrapped as an iterable it's not already
ensure_wrapped = lambda e: e if isinstance(e, (list, tuple)) else (e,)
# This is essentially the same as nesting a bunch of mark.parametrize
# decorators, but we do it programmatically to allow overrides for on
# a per-model basis, while still being able to execute each of these
# as individual test cases in pytest.
iter_kwargs = OrderedDict(
[
("model", ensure_wrapped(test_info.models)),
("max_tokens", ensure_wrapped(test_info.max_tokens)),
("num_logprobs", ensure_wrapped(test_info.num_logprobs)),
("dtype", ensure_wrapped(test_info.dtype)),
(
"distributed_executor_backend",
ensure_wrapped(test_info.distributed_executor_backend),
),
]
)
# num_frames is video only
if test_type == VLMTestType.VIDEO:
iter_kwargs["num_video_frames"] = ensure_wrapped(test_info.num_video_frames)
iter_kwargs["needs_video_metadata"] = ensure_wrapped(
test_info.needs_video_metadata
)
# No sizes passed for custom inputs, since inputs are directly provided
if test_type not in (
VLMTestType.CUSTOM_INPUTS,
VLMTestType.AUDIO,
):
wrapped_sizes = get_wrapped_test_sizes(test_info, test_type)
if wrapped_sizes is None:
raise ValueError(f"Sizes must be set for test type {test_type}")
iter_kwargs["size_wrapper"] = wrapped_sizes
# Otherwise expand the custom test options instead
elif test_type == VLMTestType.CUSTOM_INPUTS:
if test_info.custom_test_opts is None:
raise ValueError("Test has type CUSTOM_INPUTS, but none given")
iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
# Wrap all model cases in a pytest parameter & pass marks through
return [
pytest.param(
model_type,
ExpandableVLMTestArgs(**{k: v for k, v in zip(iter_kwargs.keys(), case)}),
marks=test_info.marks if test_info.marks is not None else [],
)
for case in list(itertools.product(*iter_kwargs.values()))
]
def get_parametrized_options( def get_parametrized_options(
test_settings: dict[str, VLMTestInfo], test_settings: dict[str, VLMTestInfo],
test_type: VLMTestType, test_type: VLMTestType,
...@@ -76,64 +135,11 @@ def get_parametrized_options( ...@@ -76,64 +135,11 @@ def get_parametrized_options(
test_settings, test_type, create_new_process_for_each_test test_settings, test_type, create_new_process_for_each_test
) )
# Ensure that something is wrapped as an iterable it's not already
ensure_wrapped = lambda e: e if isinstance(e, (list, tuple)) else (e,)
def get_model_type_cases(model_type: str, test_info: VLMTestInfo):
# This is essentially the same as nesting a bunch of mark.parametrize
# decorators, but we do it programmatically to allow overrides for on
# a per-model basis, while still being able to execute each of these
# as individual test cases in pytest.
iter_kwargs = OrderedDict(
[
("model", ensure_wrapped(test_info.models)),
("max_tokens", ensure_wrapped(test_info.max_tokens)),
("num_logprobs", ensure_wrapped(test_info.num_logprobs)),
("dtype", ensure_wrapped(test_info.dtype)),
(
"distributed_executor_backend",
ensure_wrapped(test_info.distributed_executor_backend),
),
]
)
# num_frames is video only
if test_type == VLMTestType.VIDEO:
iter_kwargs["num_video_frames"] = ensure_wrapped(test_info.num_video_frames)
iter_kwargs["needs_video_metadata"] = ensure_wrapped(
test_info.needs_video_metadata
)
# No sizes passed for custom inputs, since inputs are directly provided
if test_type not in (VLMTestType.CUSTOM_INPUTS, VLMTestType.AUDIO):
wrapped_sizes = get_wrapped_test_sizes(test_info, test_type)
if wrapped_sizes is None:
raise ValueError(f"Sizes must be set for test type {test_type}")
iter_kwargs["size_wrapper"] = wrapped_sizes
# Otherwise expand the custom test options instead
elif test_type == VLMTestType.CUSTOM_INPUTS:
if test_info.custom_test_opts is None:
raise ValueError("Test has type CUSTOM_INPUTS, but none given")
iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
# Wrap all model cases in a pytest parameter & pass marks through
return [
pytest.param(
model_type,
ExpandableVLMTestArgs(
**{k: v for k, v in zip(iter_kwargs.keys(), case)}
),
marks=test_info.marks if test_info.marks is not None else [],
)
for case in list(itertools.product(*iter_kwargs.values()))
]
# Get a list per model type, where each entry contains a tuple of all of # Get a list per model type, where each entry contains a tuple of all of
# that model type's cases, then flatten them into the top level so that # that model type's cases, then flatten them into the top level so that
# we can consume them in one mark.parametrize call. # we can consume them in one mark.parametrize call.
cases_by_model_type = [ cases_by_model_type = [
get_model_type_cases(model_type, test_info) get_model_type_cases(model_type, test_info, test_type)
for model_type, test_info in matching_tests.items() for model_type, test_info in matching_tests.items()
] ]
return list(itertools.chain(*cases_by_model_type)) return list(itertools.chain(*cases_by_model_type))
......
...@@ -140,7 +140,7 @@ def video_with_metadata_glm4_1v(): ...@@ -140,7 +140,7 @@ def video_with_metadata_glm4_1v():
metadata = VIDEO_ASSETS[0].metadata metadata = VIDEO_ASSETS[0].metadata
question = "Describe the video." question = "Describe the video."
video_prompt = "<|begin_of_video|><|video|><|end_of_video|>" video_prompt = "<|begin_of_video|><|video|><|end_of_video|>"
formatted_prompt = f"<|user|>\n{video_prompt}{question}<|assistant|>\n" formatted_prompt = f"[gMASK]<|user|>\n{video_prompt}{question}<|assistant|>\n"
scales = [0.1, 0.2, 0.25] scales = [0.1, 0.2, 0.25]
video_input = [ video_input = [
......
...@@ -25,6 +25,7 @@ from transformers import ( ...@@ -25,6 +25,7 @@ from transformers import (
from transformers.video_utils import VideoMetadata from transformers.video_utils import VideoMetadata
from vllm.logprobs import SampleLogprobs from vllm.logprobs import SampleLogprobs
from vllm.platforms import current_platform
from vllm.utils.collection_utils import is_list_of from vllm.utils.collection_utils import is_list_of
from .....conftest import HfRunner, ImageAsset, ImageTestAssets from .....conftest import HfRunner, ImageAsset, ImageTestAssets
...@@ -366,6 +367,40 @@ def gemma3_vllm_to_hf_output(vllm_output: RunnerOutput, model: str) -> RunnerOut ...@@ -366,6 +367,40 @@ def gemma3_vllm_to_hf_output(vllm_output: RunnerOutput, model: str) -> RunnerOut
def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner: def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
"""Patches and returns an instance of the HfRunner to use for GLM4V.""" """Patches and returns an instance of the HfRunner to use for GLM4V."""
if current_platform.is_rocm():
import types
config = hf_model.model.config
if hasattr(config, "num_layers") and not hasattr(config, "num_hidden_layers"):
config.num_hidden_layers = config.num_layers
config.output_hidden_states = True
def patched_prepare_cache(
self, generation_config, model_kwargs, *args, **kwargs
):
model_kwargs["past_key_values"] = None
model_kwargs["use_cache"] = False
return model_kwargs
hf_model.model._prepare_cache_for_generation = types.MethodType(
patched_prepare_cache, hf_model.model
)
original_generate = hf_model.model.generate
def patched_generate(*args, **kwargs):
kwargs["output_hidden_states"] = True
kwargs["return_dict_in_generate"] = True
return original_generate(*args, **kwargs)
hf_model.model.generate = patched_generate
original_forward = hf_model.model.forward
def patched_forward(*args, **kwargs):
kwargs["output_hidden_states"] = True
return original_forward(*args, **kwargs)
hf_model.model.forward = patched_forward
hf_processor = hf_model.processor hf_processor = hf_model.processor
def processor(*args, text="", images=None, **kwargs): def processor(*args, text="", images=None, **kwargs):
...@@ -406,7 +441,15 @@ def glm4_1v_patch_hf_runner(hf_model: HfRunner) -> HfRunner: ...@@ -406,7 +441,15 @@ def glm4_1v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
if videos is not None and is_list_of(videos, tuple): if videos is not None and is_list_of(videos, tuple):
# If videos is a list of tuples, we assume each tuple contains # If videos is a list of tuples, we assume each tuple contains
# (video_array, metadata) as in the case of GLM4.1V. # (video_array, metadata) as in the case of GLM4.1V.
video_metadata = [[VideoMetadata(**video[1])] for video in videos] # Filter out 'do_sample_frames' as it's not a valid VideoMetadata arg
video_metadata = [
[
VideoMetadata(
**{k: v for k, v in video[1].items() if k != "do_sample_frames"}
)
]
for video in videos
]
videos = [[video[0]] for video in videos] videos = [[video[0]] for video in videos]
else: else:
video_metadata = None video_metadata = None
......
...@@ -50,8 +50,8 @@ MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PL ...@@ -50,8 +50,8 @@ MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PL
VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?" VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)] IMAGE_SIZE_FACTORS = [(1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)] EMBEDDING_SIZE_FACTORS = [(1.0,), (1.0, 1.0, 1.0)]
RunnerOutput = tuple[list[int], str, SampleLogprobs | None] RunnerOutput = tuple[list[int], str, SampleLogprobs | None]
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Pytest configuration for vLLM pooling tests."""
import os
import warnings
from vllm.platforms import current_platform
def pytest_collection_modifyitems(config, items):
"""Set FLEX_ATTENTION backend for SigLIP tests on ROCm."""
if not current_platform.is_rocm():
return
siglip_tests = [item for item in items if "test_siglip" in item.nodeid]
if siglip_tests:
os.environ["VLLM_ATTENTION_BACKEND"] = "FLEX_ATTENTION"
warnings.warn(
"ROCm: Set VLLM_ATTENTION_BACKEND=FLEX_ATTENTION for SigLIP tests",
UserWarning,
stacklevel=1,
)
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any
import pytest import pytest
from transformers import SiglipModel from transformers import SiglipModel
...@@ -35,7 +37,11 @@ def _run_test( ...@@ -35,7 +37,11 @@ def _run_test(
model: str, model: str,
*, *,
dtype: str, dtype: str,
tokenization_kwargs: dict[str, Any] | None = None,
) -> None: ) -> None:
if tokenization_kwargs is None:
tokenization_kwargs = {}
with vllm_runner( with vllm_runner(
model, model,
runner="pooling", runner="pooling",
...@@ -44,10 +50,14 @@ def _run_test( ...@@ -44,10 +50,14 @@ def _run_test(
max_model_len=64, max_model_len=64,
gpu_memory_utilization=0.7, gpu_memory_utilization=0.7,
) as vllm_model: ) as vllm_model:
vllm_outputs = vllm_model.embed(input_texts, images=input_images) vllm_outputs = vllm_model.embed(
input_texts, images=input_images, tokenization_kwargs=tokenization_kwargs
)
with hf_runner(model, dtype=dtype, auto_cls=SiglipModel) as hf_model: with hf_runner(model, dtype=dtype, auto_cls=SiglipModel) as hf_model:
all_inputs = hf_model.get_inputs(input_texts, images=input_images) all_inputs = hf_model.get_inputs(
input_texts, images=input_images, tokenization_kwargs=tokenization_kwargs
)
all_outputs = [] all_outputs = []
for inputs in all_inputs: for inputs in all_inputs:
...@@ -94,6 +104,10 @@ def test_models_text( ...@@ -94,6 +104,10 @@ def test_models_text(
input_images, # type: ignore input_images, # type: ignore
model, model,
dtype=dtype, dtype=dtype,
tokenization_kwargs={
"padding": "max_length",
"max_length": 64,
}, # siglip2 was trained with this padding setting.
) )
......
...@@ -20,7 +20,7 @@ from vllm.config.multimodal import ( ...@@ -20,7 +20,7 @@ from vllm.config.multimodal import (
) )
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.cache import MultiModalProcessorOnlyCache from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs from vllm.multimodal.inputs import MultiModalInputs, batched_tensors_equal
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import ( from vllm.tokenizers import (
MistralTokenizer, MistralTokenizer,
...@@ -396,28 +396,6 @@ def test_processing_correctness( ...@@ -396,28 +396,6 @@ def test_processing_correctness(
) )
# Phi4MultimodalForCausalLM share same model repo with original format
# Phi4MMForCausalLM, so we add it as a separate test case
# Remove this test after conversion PR merged:
# https://huggingface.co/microsoft/Phi-4-multimodal-instruct/discussions/70
@pytest.mark.parametrize("model_arch", ["Phi4MultimodalForCausalLM"])
@pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
@pytest.mark.parametrize("num_batches", [32])
@pytest.mark.parametrize("simplify_rate", [1.0])
def test_processing_correctness_phi4_multimodal(
model_arch: str,
hit_rate: float,
num_batches: int,
simplify_rate: float,
):
_test_processing_correctness(
model_arch,
hit_rate=hit_rate,
num_batches=num_batches,
simplify_rate=simplify_rate,
)
def _assert_inputs_equal( def _assert_inputs_equal(
a: MultiModalInputs, a: MultiModalInputs,
b: MultiModalInputs, b: MultiModalInputs,
...@@ -440,4 +418,4 @@ def _assert_inputs_equal( ...@@ -440,4 +418,4 @@ def _assert_inputs_equal(
a_data.pop(key, None) a_data.pop(key, None)
b_data.pop(key, None) b_data.pop(key, None)
assert a_data == b_data, msg assert batched_tensors_equal(a_data, b_data), msg
...@@ -5,6 +5,7 @@ import pytest ...@@ -5,6 +5,7 @@ import pytest
from vllm.assets.video import VideoAsset from vllm.assets.video import VideoAsset
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import batched_tensors_equal
from vllm.multimodal.video import OpenCVDynamicVideoBackend, OpenCVVideoBackend from vllm.multimodal.video import OpenCVDynamicVideoBackend, OpenCVVideoBackend
from ...utils import build_model_context from ...utils import build_model_context
...@@ -103,7 +104,7 @@ def test_video_loader_consistency( ...@@ -103,7 +104,7 @@ def test_video_loader_consistency(
dynamic_outputs = processor.apply(prompt, dynamic_mm_data, hf_processor_mm_kwargs) dynamic_outputs = processor.apply(prompt, dynamic_mm_data, hf_processor_mm_kwargs)
assert static_outputs["prompt_token_ids"] == dynamic_outputs["prompt_token_ids"] assert static_outputs["prompt_token_ids"] == dynamic_outputs["prompt_token_ids"]
assert ( assert batched_tensors_equal(
static_outputs["mm_kwargs"].get_data() static_outputs["mm_kwargs"].get_data(),
== dynamic_outputs["mm_kwargs"].get_data() dynamic_outputs["mm_kwargs"].get_data(),
) )
...@@ -130,10 +130,9 @@ def create_batched_mm_kwargs( ...@@ -130,10 +130,9 @@ def create_batched_mm_kwargs(
hf_processor_mm_kwargs=processor_inputs.hf_processor_mm_kwargs, hf_processor_mm_kwargs=processor_inputs.hf_processor_mm_kwargs,
tokenization_kwargs=processor_inputs.tokenization_kwargs, tokenization_kwargs=processor_inputs.tokenization_kwargs,
)["mm_kwargs"].require_data() )["mm_kwargs"].require_data()
items = [item for modality in supported_mm_limits for item in mm_kwargs[modality]]
return group_mm_kwargs_by_modality( return group_mm_kwargs_by_modality(
items, [item for modality in supported_mm_limits for item in mm_kwargs[modality]]
merge_by_field_config=model_cls.merge_by_field_config,
) )
......
...@@ -47,6 +47,12 @@ QWEN2_CONFIG = GGUFTestConfig( ...@@ -47,6 +47,12 @@ QWEN2_CONFIG = GGUFTestConfig(
gguf_filename="qwen2.5-1.5b-instruct-q6_k.gguf", gguf_filename="qwen2.5-1.5b-instruct-q6_k.gguf",
) )
QWEN3_CONFIG = GGUFTestConfig(
original_model="Qwen/Qwen3-0.6B",
gguf_repo="unsloth/Qwen3-0.6B-GGUF",
gguf_filename="Qwen3-0.6B-BF16.gguf",
)
PHI3_CONFIG = GGUFTestConfig( PHI3_CONFIG = GGUFTestConfig(
original_model="microsoft/Phi-3.5-mini-instruct", original_model="microsoft/Phi-3.5-mini-instruct",
gguf_repo="bartowski/Phi-3.5-mini-instruct-GGUF", gguf_repo="bartowski/Phi-3.5-mini-instruct-GGUF",
...@@ -87,6 +93,7 @@ GEMMA3_CONFIG = GGUFTestConfig( ...@@ -87,6 +93,7 @@ GEMMA3_CONFIG = GGUFTestConfig(
MODELS = [ MODELS = [
# LLAMA_CONFIG, # broken: https://github.com/vllm-project/vllm/issues/19458 # LLAMA_CONFIG, # broken: https://github.com/vllm-project/vllm/issues/19458
QWEN2_CONFIG, QWEN2_CONFIG,
QWEN3_CONFIG,
PHI3_CONFIG, PHI3_CONFIG,
GPT2_CONFIG, GPT2_CONFIG,
STABLELM_CONFIG, STABLELM_CONFIG,
......
...@@ -211,10 +211,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -211,10 +211,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True, trust_remote_code=True,
), ),
"CohereForCausalLM": _HfExamplesInfo( "CohereForCausalLM": _HfExamplesInfo(
"CohereForAI/c4ai-command-r-v01", trust_remote_code=True "CohereLabs/c4ai-command-r-v01", trust_remote_code=True
), ),
"Cohere2ForCausalLM": _HfExamplesInfo( "Cohere2ForCausalLM": _HfExamplesInfo(
"CohereForAI/c4ai-command-r7b-12-2024", "CohereLabs/c4ai-command-r7b-12-2024",
trust_remote_code=True, trust_remote_code=True,
), ),
"CwmForCausalLM": _HfExamplesInfo("facebook/cwm", min_transformers_version="4.58"), "CwmForCausalLM": _HfExamplesInfo("facebook/cwm", min_transformers_version="4.58"),
...@@ -416,7 +416,11 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -416,7 +416,11 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True, trust_remote_code=True,
), ),
"Qwen2ForCausalLM": _HfExamplesInfo( "Qwen2ForCausalLM": _HfExamplesInfo(
"Qwen/Qwen2-0.5B-Instruct", extras={"2.5": "Qwen/Qwen2.5-0.5B-Instruct"} "Qwen/Qwen2-0.5B-Instruct",
extras={
"2.5": "Qwen/Qwen2.5-0.5B-Instruct",
"2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
},
), ),
"Qwen2MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen1.5-MoE-A2.7B-Chat"), "Qwen2MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen1.5-MoE-A2.7B-Chat"),
"Qwen3ForCausalLM": _HfExamplesInfo("Qwen/Qwen3-8B"), "Qwen3ForCausalLM": _HfExamplesInfo("Qwen/Qwen3-8B"),
...@@ -577,7 +581,7 @@ _AUTOMATIC_CONVERTED_MODELS = { ...@@ -577,7 +581,7 @@ _AUTOMATIC_CONVERTED_MODELS = {
_MULTIMODAL_EXAMPLE_MODELS = { _MULTIMODAL_EXAMPLE_MODELS = {
# [Decoder-only] # [Decoder-only]
"AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"), "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
"AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereForAI/aya-vision-8b"), "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"),
"BeeForConditionalGeneration": _HfExamplesInfo( "BeeForConditionalGeneration": _HfExamplesInfo(
"Open-Bee/Bee-8B-RL", "Open-Bee/Bee-8B-RL",
trust_remote_code=True, trust_remote_code=True,
...@@ -667,6 +671,10 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -667,6 +671,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"moonshotai/Kimi-VL-A3B-Instruct", "moonshotai/Kimi-VL-A3B-Instruct",
extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"}, extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"},
trust_remote_code=True, trust_remote_code=True,
max_transformers_version="4.53.3",
transformers_version_reason="HF model uses deprecated transformers API "
"(PytorchGELUTanh, DynamicCache.seen_tokens, and more). See: "
"https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/31",
), ),
"LightOnOCRForConditionalGeneration": _HfExamplesInfo( "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
"lightonai/LightOnOCR-1B", "lightonai/LightOnOCR-1B",
...@@ -767,10 +775,6 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -767,10 +775,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Phi4MMForCausalLM": _HfExamplesInfo( "Phi4MMForCausalLM": _HfExamplesInfo(
"microsoft/Phi-4-multimodal-instruct", trust_remote_code=True "microsoft/Phi-4-multimodal-instruct", trust_remote_code=True
), ),
"Phi4MultimodalForCausalLM": _HfExamplesInfo(
"microsoft/Phi-4-multimodal-instruct",
revision="refs/pr/70",
),
"PixtralForConditionalGeneration": _HfExamplesInfo( "PixtralForConditionalGeneration": _HfExamplesInfo(
"mistralai/Pixtral-12B-2409", "mistralai/Pixtral-12B-2409",
extras={ extras={
...@@ -831,7 +835,10 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -831,7 +835,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"), "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"),
"Tarsier2ForConditionalGeneration": _HfExamplesInfo( "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
"omni-research/Tarsier2-Recap-7b", "omni-research/Tarsier2-Recap-7b",
hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]}, hf_overrides={
"architectures": ["Tarsier2ForConditionalGeneration"],
"model_type": "tarsier2",
},
), ),
"VoxtralForConditionalGeneration": _HfExamplesInfo( "VoxtralForConditionalGeneration": _HfExamplesInfo(
"mistralai/Voxtral-Mini-3B-2507", "mistralai/Voxtral-Mini-3B-2507",
......
...@@ -203,7 +203,7 @@ class TestGGUFModelLoader: ...@@ -203,7 +203,7 @@ class TestGGUFModelLoader:
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None) @patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
@patch("vllm.config.model.get_config") @patch("vllm.config.model.get_config")
@patch("vllm.config.model.is_gguf", return_value=False) @patch("vllm.config.model.is_gguf", return_value=False)
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False) @patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
@patch("os.path.isfile", return_value=False) @patch("os.path.isfile", return_value=False)
def test_prepare_weights_invalid_format( def test_prepare_weights_invalid_format(
self, self,
......
...@@ -13,7 +13,6 @@ from vllm.model_executor.models import ( ...@@ -13,7 +13,6 @@ from vllm.model_executor.models import (
) )
from vllm.model_executor.models.adapters import ( from vllm.model_executor.models.adapters import (
as_embedding_model, as_embedding_model,
as_reward_model,
as_seq_cls_model, as_seq_cls_model,
) )
from vllm.model_executor.models.registry import ( from vllm.model_executor.models.registry import (
...@@ -46,7 +45,6 @@ def test_registry_imports(model_arch): ...@@ -46,7 +45,6 @@ def test_registry_imports(model_arch):
# All vLLM models should be convertible to a pooling model # All vLLM models should be convertible to a pooling model
assert is_pooling_model(as_seq_cls_model(model_cls)) assert is_pooling_model(as_seq_cls_model(model_cls))
assert is_pooling_model(as_embedding_model(model_cls)) assert is_pooling_model(as_embedding_model(model_cls))
assert is_pooling_model(as_reward_model(model_cls))
if model_arch in _MULTIMODAL_MODELS: if model_arch in _MULTIMODAL_MODELS:
assert supports_multimodal(model_cls) assert supports_multimodal(model_cls)
......
...@@ -51,7 +51,7 @@ def _dummy_elem( ...@@ -51,7 +51,7 @@ def _dummy_elem(
modality=modality, modality=modality,
key=key, key=key,
data=data, data=data,
field=MultiModalSharedField(1), field=MultiModalSharedField(batch_size=1),
) )
...@@ -85,12 +85,6 @@ def _dummy_items( ...@@ -85,12 +85,6 @@ def _dummy_items(
(_dummy_item("a", {"a1": 100}), 100), (_dummy_item("a", {"a1": 100}), 100),
(_dummy_item("a", {"a1": 100, "a2": 110}), 210), (_dummy_item("a", {"a1": 100, "a2": 110}), 210),
(_dummy_items({"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}), 460), # noqa: E501 (_dummy_items({"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}), 460), # noqa: E501
(
_dummy_items(
{"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}
).get_data(),
460,
), # noqa: E501
], ],
) )
def test_cache_item_size(item, expected_size): def test_cache_item_size(item, expected_size):
...@@ -107,6 +101,9 @@ def test_cache_item_size(item, expected_size): ...@@ -107,6 +101,9 @@ def test_cache_item_size(item, expected_size):
cache[""] = MultiModalProcessorCacheItemMetadata(item, [prompt_update]) cache[""] = MultiModalProcessorCacheItemMetadata(item, [prompt_update])
assert cache.currsize == expected_size assert cache.currsize == expected_size
cache[""] = item.get_data()
assert cache.currsize == expected_size
def _create_vllm_config( def _create_vllm_config(
*, *,
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors
pytestmark = pytest.mark.cpu_test
def assert_nested_tensors_equal(expected: NestedTensors, actual: NestedTensors):
assert type(expected) == type(actual) # noqa: E721
if isinstance(expected, torch.Tensor):
assert torch.equal(expected, actual)
else:
for expected_item, actual_item in zip(expected, actual):
assert_nested_tensors_equal(expected_item, actual_item)
def assert_multimodal_inputs_equal(
expected: MultiModalKwargs, actual: MultiModalKwargs
):
assert set(expected.keys()) == set(actual.keys())
for key in expected:
assert_nested_tensors_equal(expected[key], actual[key])
def test_multimodal_input_batch_single_tensor():
t = torch.rand([1, 2])
result = MultiModalKwargs.batch([{"image": t}])
assert_multimodal_inputs_equal(result, {"image": t.unsqueeze(0)})
def test_multimodal_input_batch_multiple_tensors():
a = torch.rand([1, 1, 2])
b = torch.rand([1, 1, 2])
c = torch.rand([1, 1, 2])
result = MultiModalKwargs.batch([{"image": a}, {"image": b}, {"image": c}])
assert_multimodal_inputs_equal(result, {"image": torch.stack([a, b, c])})
def test_multimodal_input_batch_multiple_heterogeneous_tensors():
a = torch.rand([1, 2, 2])
b = torch.rand([1, 3, 2])
c = torch.rand([1, 4, 2])
result = MultiModalKwargs.batch([{"image": a}, {"image": b}, {"image": c}])
assert_multimodal_inputs_equal(result, {"image": [a, b, c]})
def test_multimodal_input_batch_nested_tensors():
a = torch.rand([2, 3])
b = torch.rand([2, 3])
c = torch.rand([2, 3])
result = MultiModalKwargs.batch([{"image": [a]}, {"image": [b]}, {"image": [c]}])
assert_multimodal_inputs_equal(
result, {"image": torch.stack([a.unsqueeze(0), b.unsqueeze(0), c.unsqueeze(0)])}
)
def test_multimodal_input_batch_heterogeneous_lists():
a = torch.rand([1, 2, 3])
b = torch.rand([1, 2, 3])
c = torch.rand([1, 2, 3])
result = MultiModalKwargs.batch([{"image": [a, b]}, {"image": [c]}])
assert_multimodal_inputs_equal(
result, {"image": [torch.stack([a, b]), c.unsqueeze(0)]}
)
def test_multimodal_input_batch_multiple_batchable_lists():
a = torch.rand([1, 2, 3])
b = torch.rand([1, 2, 3])
c = torch.rand([1, 2, 3])
d = torch.rand([1, 2, 3])
result = MultiModalKwargs.batch([{"image": [a, b]}, {"image": [c, d]}])
assert_multimodal_inputs_equal(
result, {"image": torch.stack([torch.stack([a, b]), torch.stack([c, d])])}
)
def test_multimodal_input_batch_mixed_stacking_depths():
a = torch.rand([1, 2, 3])
b = torch.rand([1, 3, 3])
c = torch.rand([1, 4, 3])
result = MultiModalKwargs.batch([{"image": [a, b]}, {"image": [c]}])
assert_multimodal_inputs_equal(result, {"image": [[a, b], c.unsqueeze(0)]})
result = MultiModalKwargs.batch([{"image": [a]}, {"image": [b, c]}])
assert_multimodal_inputs_equal(result, {"image": [a.unsqueeze(0), [b, c]]})
...@@ -30,5 +30,6 @@ class DummyPlatform(Platform): ...@@ -30,5 +30,6 @@ class DummyPlatform(Platform):
use_mla, use_mla,
has_sink, has_sink,
use_sparse, use_sparse,
use_mm_prefix,
): ):
return "vllm_add_dummy_platform.dummy_attention_backend.DummyAttentionBackend" # noqa E501 return "vllm_add_dummy_platform.dummy_attention_backend.DummyAttentionBackend" # noqa E501
...@@ -10,10 +10,14 @@ import torch ...@@ -10,10 +10,14 @@ import torch
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.layers.quantization.fp8 import ( from vllm.model_executor.layers.quantization.fp8 import (
Fp8Config,
Fp8KVCacheMethod, Fp8KVCacheMethod,
Fp8LinearMethod, Fp8LinearMethod,
Fp8MoEMethod,
) )
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.platforms import current_platform from vllm.platforms import current_platform
MODELS = [ MODELS = [
...@@ -261,3 +265,87 @@ def test_scaled_fp8_quant(dtype) -> None: ...@@ -261,3 +265,87 @@ def test_scaled_fp8_quant(dtype) -> None:
torch.narrow(y_nc_pad, 0, 0, x_nc.shape[0]), inv_scale_nc, dtype torch.narrow(y_nc_pad, 0, 0, x_nc.shape[0]), inv_scale_nc, dtype
), ),
) )
@pytest.mark.parametrize("method_cls", [Fp8LinearMethod, Fp8MoEMethod])
# FP8 weight reloading does not support online quantization
@pytest.mark.parametrize("is_checkpoint_fp8_serialized", [True]) # skip False
@pytest.mark.parametrize("weight_block_size", [None, [1, 1]])
# any postprocessing that is applied to the weights such as padding and repacking
# (excluding device sharding) must also be applied to the reloaded weights
#
# this is the case for marlin as well as per-tensor Fp8MoEMethod
@pytest.mark.parametrize("use_marlin", [False]) # skip True
def test_fp8_reloading(
method_cls, is_checkpoint_fp8_serialized, weight_block_size, use_marlin, dist_init
):
if is_checkpoint_fp8_serialized is False:
pytest.skip("FP8 weight reloading does not support online quantization")
if method_cls is Fp8MoEMethod and weight_block_size is None:
pytest.skip(
"FP8 Tensor weight reloading does not support fusing w13_weight_scale. "
"If this is your use case, consider using a restore function like #26327"
)
with torch.device("cuda:0"):
config = Fp8Config(
is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized,
weight_block_size=weight_block_size,
)
if method_cls is Fp8LinearMethod:
layer = torch.nn.Linear(1, 1)
method = method_cls(config)
method.create_weights(
layer=layer,
input_size_per_partition=1,
output_partition_sizes=[1],
input_size=1,
output_size=1,
params_dtype=torch.bfloat16,
weight_loader=default_weight_loader,
)
else:
layer = FusedMoE(
num_experts=1,
top_k=1,
hidden_size=1,
intermediate_size=1,
)
method = method_cls(config, layer)
method.create_weights(
layer=layer,
num_experts=1,
hidden_size=1,
intermediate_size_per_partition=1,
params_dtype=torch.bfloat16,
weight_loader=default_weight_loader,
)
method.use_marlin = use_marlin
# capture weights format during loading
original_metadata = [
(name, param.shape, getattr(param, "weight_loader", default_weight_loader))
for name, param in layer.named_parameters()
]
# test loading
for name, shape, _ in original_metadata:
param = getattr(layer, name)
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, torch.zeros(shape)) # cannot use empty
method.process_weights_after_loading(layer)
# test reloading works after loading
# assuming that no reshaping occurred
for name, shape, original_weight_loader in original_metadata:
param = getattr(layer, name)
weight_loader = getattr(param, "weight_loader", default_weight_loader)
assert weight_loader is original_weight_loader
weight_loader(param, torch.zeros(shape)) # cannot use empty
method.process_weights_after_loading(layer)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment