Commit ad58e9b3 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.6.1.post2' into v0.6.1.post2-dev

parents 408f663a 9ba0817f
......@@ -6,11 +6,9 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from ..conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import check_logprobs_close
_LIMIT_IMAGE_PER_PROMPT = 4
......@@ -197,7 +195,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype, max_tokens, num_logprobs) -> None:
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......
......@@ -8,10 +8,8 @@ from vllm.multimodal.utils import (rescale_video_size, resize_video,
sample_frames_from_video)
from vllm.sequence import SampleLogprobs
from ..conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
from ...utils import check_logprobs_close
_PREFACE = (
"A chat between a curious human and an artificial intelligence assistant. "
......
......@@ -9,10 +9,8 @@ from transformers import BatchEncoding
from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner
from ...utils import check_logprobs_close
# The image token is placed before "user" on purpose so that the test can pass
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
......@@ -65,7 +63,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......
......@@ -8,10 +8,8 @@ from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from vllm.utils import is_hip
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -69,7 +67,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......
......@@ -9,10 +9,8 @@ from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from vllm.utils import is_cpu, is_hip
from ..conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -71,7 +69,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
"""
import json
import uuid
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
import pytest
from mistral_common.protocol.instruct.messages import ImageURLChunk
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
from vllm import EngineArgs, LLMEngine, SamplingParams, TokensPrompt
from vllm.multimodal import MultiModalDataBuiltins
from vllm.sequence import Logprob, SampleLogprobs
from ....utils import VLLM_PATH
from ...utils import check_logprobs_close
if TYPE_CHECKING:
from _typeshed import StrPath
MODELS = ["mistralai/Pixtral-12B-2409"]
IMG_URLS = [
"https://picsum.photos/id/237/400/300",
"https://picsum.photos/id/231/200/300",
"https://picsum.photos/id/27/500/500",
"https://picsum.photos/id/17/150/600",
]
PROMPT = "Describe each image in one short sentence."
def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
return [{
"role":
"user",
"content": [{
"type": "text",
"text": PROMPT,
}] + [{
"type": "image_url",
"image_url": {
"url": url
}
} for url in urls],
}]
def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
msg = _create_msg_format(urls)
tokenizer = MistralTokenizer.from_model("pixtral")
request = ChatCompletionRequest(messages=msg) # type: ignore[type-var]
tokenized = tokenizer.encode_chat_completion(request)
engine_inputs = TokensPrompt(prompt_token_ids=tokenized.tokens)
images = []
for chunk in request.messages[0].content:
if isinstance(chunk, ImageURLChunk):
images.append(image_from_chunk(chunk))
mm_data = MultiModalDataBuiltins(image=images)
engine_inputs["multi_modal_data"] = mm_data
return engine_inputs
MSGS = [
_create_msg_format(IMG_URLS[:1]),
_create_msg_format(IMG_URLS[:2]),
_create_msg_format(IMG_URLS),
]
ENGINE_INPUTS = [
_create_engine_inputs(IMG_URLS[:1]),
_create_engine_inputs(IMG_URLS[:2]),
_create_engine_inputs(IMG_URLS),
]
SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
LIMIT_MM_PER_PROMPT = dict(image=4)
MAX_MODEL_LEN = [8192, 65536]
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
assert FIXTURES_PATH.exists()
FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
OutputsLogprobs = List[Tuple[List[int], str, Optional[SampleLogprobs]]]
# For the test author to store golden output in JSON
def _dump_outputs_w_logprobs(
outputs: OutputsLogprobs,
filename: "StrPath",
) -> None:
json_data = [(tokens, text,
[{k: asdict(v)
for k, v in token_logprobs.items()}
for token_logprobs in (logprobs or [])])
for tokens, text, logprobs in outputs]
with open(filename, "w") as f:
json.dump(json_data, f)
def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs:
with open(filename, "rb") as f:
json_data = json.load(f)
return [(tokens, text,
[{int(k): Logprob(**v)
for k, v in token_logprobs.items()}
for token_logprobs in logprobs])
for tokens, text, logprobs in json_data]
@pytest.mark.skip(
reason=
"Model is too big, test passed on A100 locally but will OOM on CI machine."
)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN)
@pytest.mark.parametrize("dtype", ["bfloat16"])
def test_chat(
vllm_runner,
max_model_len: int,
model: str,
dtype: str,
) -> None:
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_CHAT)
with vllm_runner(
model,
dtype=dtype,
tokenizer_mode="mistral",
enable_chunked_prefill=False,
max_model_len=max_model_len,
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
) as vllm_model:
outputs = []
for msg in MSGS:
output = vllm_model.model.chat(msg,
sampling_params=SAMPLING_PARAMS)
outputs.extend(output)
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
check_logprobs_close(outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
outputs_1_lst=logprobs,
name_0="h100_ref",
name_1="output")
@pytest.mark.skip(
reason=
"Model is too big, test passed on A100 locally but will OOM on CI machine."
)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["bfloat16"])
def test_model_engine(vllm_runner, model: str, dtype: str) -> None:
EXPECTED_ENGINE_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_ENGINE)
args = EngineArgs(
model=model,
tokenizer_mode="mistral",
enable_chunked_prefill=False,
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
dtype=dtype,
)
engine = LLMEngine.from_engine_args(args)
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[0], SAMPLING_PARAMS)
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[1], SAMPLING_PARAMS)
outputs = []
count = 0
while True:
out = engine.step()
count += 1
for request_output in out:
if request_output.finished:
outputs.append(request_output)
if count == 2:
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[2],
SAMPLING_PARAMS)
if not engine.has_unfinished_requests():
break
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
check_logprobs_close(outputs_0_lst=EXPECTED_ENGINE_LOGPROBS,
outputs_1_lst=logprobs,
name_0="h100_ref",
name_1="output")
import pathlib
from typing import Dict, List, Optional, Tuple, Type, Union
import pytest
import torch
from PIL.Image import Image
from vllm.config import ModelConfig
from vllm.inputs import InputContext, LLMInputs
from vllm.multimodal.base import MultiModalInputs
from vllm.multimodal.utils import cached_get_tokenizer, rescale_image_size
from ....conftest import (IMAGE_ASSETS, HfRunner, ImageAsset, PromptImageInput,
VllmRunner, _ImageAssets)
from ...utils import check_logprobs_close
text_only_models = [
"Qwen/Qwen-7B-Chat" # Has no visual component
]
multimodal_models = ["Qwen/Qwen-VL"]
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
"Picture 1: <img></img>\nWhat's the content of the image?: ",
"cherry_blossom":
"Picture 1: <img></img>\nWhat is the season?: ",
})
HF_MULTIIMAGE_IMAGE_PROMPT = "Picture 1: <img></img>\nPicture 2: <img></img>\nCan you compare these images?\n" # noqa: E501
HF_MULTIIMAGE_IMAGE_PROMPT = "Picture 1: <img></img>\nPicture 2: <img></img>\nDescribe the two images in detail.\n" # noqa: E501
### Multimodal preprocessing tests
SAMPLE_IMAGE = IMAGE_ASSETS[0].pil_image
# These values are specific to Qwen-VL/Chat; we can get these from the model
# config also, but they are hardcoded here to keep the parameterize/fixtures
# easy to read.
IMG_START_ID = 151857
IMG_END_ID = 151858
IMG_PAD_ID = 151859
TOKS_PER_IMG = 256
VIS_ENC_DIM = 4096
IMG_SIZE = 448
def build_model_context(model_name: str,
tokenizer_name: Optional[str] = None,
trust_remote_code: bool = False):
"""Creates an InputContext for a given model.
Args:
model_name: Name of the model being considered.
tokenizer_name: Name of the tokenizer being considered.
trust_remote_code: Whether or not to allow loading remote code.
Returns:
InputContext for the model being considered.
"""
if tokenizer_name is None:
tokenizer_name = model_name
model_config = ModelConfig(
model_name,
tokenizer_name,
tokenizer_mode="auto",
trust_remote_code=trust_remote_code,
dtype="float32",
seed=0,
)
return InputContext(model_config)
@pytest.fixture()
def input_mapper_for_qwen():
# Lazy import to avoid initializing CUDA during test collection
from vllm.model_executor.models.qwen import input_mapper_for_qwen
return input_mapper_for_qwen
@pytest.fixture()
def input_processor_for_qwen():
# Lazy import to avoid initializing CUDA during test collection
from vllm.model_executor.models.qwen import input_processor_for_qwen
return input_processor_for_qwen
@pytest.fixture()
def qwen_vl_context() -> InputContext:
"""Get an InputContext for Qwen-VL."""
return build_model_context(model_name="Qwen/Qwen-VL",
trust_remote_code=True)
# Happy path tests for single/multi-image scenarios for the multimodal
# input processor and mapper, respectively
@pytest.mark.parametrize("num_images", [1, 2])
def test_input_processor_valid_mm_data(input_processor_for_qwen,
qwen_vl_context: InputContext,
num_images: int):
"""Happy cases for image inputs to Qwen's multimodal input processor."""
prompt = "".join(
[f"Picture {num}: <img></img>\n" for num in range(1, num_images + 1)])
inputs = LLMInputs(
prompt=prompt,
# When processing multimodal data for a multimodal model, the qwen
# input processor will overwrite the provided prompt_token_ids with
# the image prompts
prompt_token_ids=None,
multi_modal_data={"image": torch.rand(num_images, TOKS_PER_IMG, 4096)},
)
proc_inputs = input_processor_for_qwen(qwen_vl_context, inputs)
assert isinstance(proc_inputs, dict)
# Each image should have one start / stop and a fixed context of 256
proc_tokens = proc_inputs["prompt_token_ids"]
assert proc_tokens.count(IMG_START_ID) == num_images
assert proc_tokens.count(IMG_END_ID) == num_images
assert proc_tokens.count(IMG_PAD_ID) == num_images * TOKS_PER_IMG
@pytest.mark.parametrize(
"img_data,expected_shape",
[
# single / multi-image
(SAMPLE_IMAGE, (1, 3, IMG_SIZE, IMG_SIZE)),
(2 * [SAMPLE_IMAGE], (2, 3, IMG_SIZE, IMG_SIZE)),
# single / multi-image embeddings
(torch.rand(
(TOKS_PER_IMG, VIS_ENC_DIM)), (1, TOKS_PER_IMG, VIS_ENC_DIM)),
(torch.rand(
(1, TOKS_PER_IMG, VIS_ENC_DIM)), (1, TOKS_PER_IMG, VIS_ENC_DIM)),
(torch.rand(
(2, TOKS_PER_IMG, VIS_ENC_DIM)), (2, TOKS_PER_IMG, VIS_ENC_DIM)),
])
def test_input_mapper_valid_mm_data(input_mapper_for_qwen,
qwen_vl_context: InputContext,
img_data: Union[torch.Tensor, List[Image],
Image],
expected_shape: List[int]):
"""Happy cases for image inputs to Qwen's multimodal input mapper."""
mapped_img_data = input_mapper_for_qwen(qwen_vl_context, img_data)
# Ensure that we get the appropriately shaped pixel_values
# for images and image embeddings, respectively.
assert isinstance(mapped_img_data, MultiModalInputs)
assert "pixel_values" in mapped_img_data
assert mapped_img_data["pixel_values"].shape == expected_shape
# Sad path tests for the multimodal input processor and mapper, respectively
@pytest.mark.parametrize("mm_data", [
{
"image": torch.rand((5))
},
{
"image": torch.rand((5, 5, 5, 5, 5))
},
])
def test_input_processor_invalid_mm_data(input_processor_for_qwen,
qwen_vl_context: InputContext,
mm_data: Dict[str, torch.Tensor]):
"""Test sad cases validated in Qwen's multimodal input processor."""
tokenizer = cached_get_tokenizer(qwen_vl_context.model_config.tokenizer,
trust_remote_code=True)
prompt = "Picture 1: <img></img>\n"
prompt_token_ids = tokenizer.encode(prompt)
inputs = LLMInputs(prompt=prompt,
prompt_token_ids=prompt_token_ids,
multi_modal_data=mm_data)
# Should fail since we have too many or too few dimensions for embeddings
with pytest.raises(ValueError):
input_processor_for_qwen(qwen_vl_context, inputs)
@pytest.mark.parametrize(
"img_data",
[
# Wrong context length
torch.rand((1, TOKS_PER_IMG + 10, VIS_ENC_DIM)),
# Wrong visual encoder output size
torch.rand((1, TOKS_PER_IMG, VIS_ENC_DIM + 10)),
])
def test_input_mapper_invalid_mm_data(
input_mapper_for_qwen,
qwen_vl_context: InputContext,
img_data: Union[torch.Tensor, List[Image], Image],
):
"""Sad cases validated in Qwen VL's multimodal input mapper."""
with pytest.raises(ValueError):
input_mapper_for_qwen(qwen_vl_context, img_data)
### End-to-end generation tests
def get_prompt_with_path(tmp_path: pathlib.PosixPath, prompt: str,
assets: Union[_ImageAssets, List[ImageAsset]]) -> str:
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
the HF version of Qwen-VL can resolve the path and load the image ni its
forward() call.
Args:
tmp_path: Tempdir for test under consideration.
prompt: Prompt with image placeholders.
assets: List of image assets whose len equals the num placeholders.
"""
# Ensure that the number of placeholders matches the number of assets;
# If this is not true, the test is probably written incorrectly.
assert prompt.count("<img></img>") == len(assets)
# Replace the placeholders with local paths to the exported assets
for asset in assets:
image_tmp_path = tmp_path / f"{asset.name}.jpg"
asset.pil_image.save(image_tmp_path)
prompt = prompt.replace(
"<img></img>",
f"<img>{image_tmp_path}</img>",
1,
)
return prompt
def run_test(
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
inputs: List[Tuple[List[str], PromptImageInput]],
model: str,
*,
dtype: str,
max_tokens: int,
num_logprobs: int,
mm_limit: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
Note, the text input is also adjusted to abide by vllm contract.
The text output is sanitized to be able to compare with hf.
"""
# NOTE: take care of the order. run vLLM first, and then run HF.
# vLLM needs a fresh new process without cuda initialization.
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method).
# max_model_len should be greater than image_feature_size
# Qwen encodes each image into a fixed content size of 256
with vllm_runner(model,
max_model_len=1024,
max_num_seqs=1,
dtype=dtype,
limit_mm_per_prompt={"image": mm_limit},
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True) as vllm_model:
vllm_outputs_per_image = [
vllm_model.generate_greedy_logprobs(prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images)
for prompts, images in inputs
]
with hf_runner(model, dtype=dtype) as hf_model:
hf_outputs_per_image = [
hf_model.generate_greedy_logprobs_limit(prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images)
for prompts, images in inputs
]
for hf_outputs, vllm_outputs in zip(hf_outputs_per_image,
vllm_outputs_per_image):
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)
@pytest.mark.parametrize("model", multimodal_models)
@pytest.mark.parametrize(
"size_factors",
[
# No image
[],
# Single-scale
[1.0],
# Single-scale, batched
[1.0, 1.0, 1.0],
# Multi-scale
[0.25, 0.5, 1.0],
],
)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [8])
@pytest.mark.parametrize("num_logprobs", [5])
def test_multimodal_models_single_image(tmp_path: pathlib.PosixPath,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
image_assets: _ImageAssets, model: str,
size_factors: List[float], dtype: str,
max_tokens: int,
num_logprobs: int) -> None:
"""Tests multimodal models with single image prompts."""
images = [asset.pil_image for asset in image_assets]
prompts = [
get_prompt_with_path(tmp_path, prompt, [asset])
for prompt, asset in zip(HF_IMAGE_PROMPTS, image_assets)
]
inputs = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
) for image, prompt in zip(images, prompts)]
run_test(
hf_runner,
vllm_runner,
inputs,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=1,
tensor_parallel_size=1,
)
@pytest.mark.parametrize("model", multimodal_models)
@pytest.mark.parametrize(
"size_factors",
[
# No image
[],
# Single-scale
[1.0],
# Single-scale, batched
[1.0, 1.0, 1.0],
# Multi-scale
[0.25, 0.5, 1.0],
],
)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [5])
def test_multimodal_models_multi_image(tmp_path: pathlib.PosixPath,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
image_assets: _ImageAssets, model: str,
size_factors: List[float], dtype: str,
max_tokens: int,
num_logprobs: int) -> None:
"""Tests multimodal models with multi-image prompts."""
images = [asset.pil_image for asset in image_assets]
# Put all of the images into one prompt.
prompt = get_prompt_with_path(tmp_path, HF_MULTIIMAGE_IMAGE_PROMPT,
image_assets)
inputs = [([prompt for _ in size_factors],
[[rescale_image_size(image, factor) for image in images]
for factor in size_factors])]
run_test(
hf_runner,
vllm_runner,
inputs,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=2,
tensor_parallel_size=1,
)
# Ensure that a text-only Qwen model can still be loaded and
# used for inference in VLLM without throwing.
@pytest.mark.parametrize("model", text_only_models)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
def test_text_only_qwen_model_can_be_loaded_and_run(
vllm_runner: Type[VllmRunner],
example_prompts: List[str],
model: str,
*,
dtype: str,
max_tokens: int,
num_logprobs: int,
):
with vllm_runner(model, dtype=dtype) as vllm_model:
vllm_model.generate_greedy_logprobs(
example_prompts,
max_tokens,
num_logprobs=num_logprobs,
)
"""Compare the outputs of HF and vLLM for BART models using greedy sampling.
Run `pytest tests/models/test_bart.py`.
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
"""
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Type
from vllm.utils import is_cpu
......@@ -16,8 +16,10 @@ if not is_cpu():
from vllm.sequence import SampleLogprobs
from ..conftest import DecoderPromptType
from .utils import check_logprobs_close
from ....conftest import (DecoderPromptType, ExplicitEncoderDecoderPrompt,
HfRunner, VllmRunner)
from ....utils import multi_gpu_test
from ...utils import check_logprobs_close
MODELS = ["facebook/bart-base", "facebook/bart-large-cnn"]
......@@ -34,20 +36,18 @@ if not is_cpu():
return output_ids, hf_output_str, out_logprobs
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["float", "bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.parametrize("decoder_prompt_type", list(DecoderPromptType))
def test_models(
hf_runner,
vllm_runner,
example_encoder_decoder_prompts,
def run_test(
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
decoder_prompt_type: DecoderPromptType,
model: str,
*,
dtype: str,
max_tokens: int,
num_logprobs: int,
decoder_prompt_type: DecoderPromptType,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
) -> None:
'''
Test the vLLM BART model for a variety of encoder/decoder input prompts,
......@@ -116,8 +116,29 @@ if not is_cpu():
token during the process of validating the vLLM decoded output.
'''
test_case_prompts = example_encoder_decoder_prompts[
decoder_prompt_type]
# NOTE: take care of the order. run vLLM first, and then run HF.
# vLLM needs a fresh new process without cuda initialization.
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default).
# Note: currently encoder/decoder models are only compatible with
# enforce_eager=True. Normally this is not a problem because
# for encoder/decoder models vLLM will
# default to enforce_eager=True if enforce_eager
# is left unspecified. However, the
# VllmRunner test fixture (which wraps around the LLM class) defaults to
# enforce_eager=False (a behavior which a number of already-exisitng
# decoder-only unit tests expect), so when testing an encoder/decoder
# model we must explicitly specify enforce_eager=True in the VllmRunner
# constructor.
with vllm_runner(
model,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True) as vllm_model:
vllm_outputs = vllm_model.generate_encoder_decoder_greedy_logprobs(
prompts, max_tokens, num_logprobs)
# Configuration settings for HF baseline
hf_kwargs = {
......@@ -135,26 +156,12 @@ if not is_cpu():
auto_cls=AutoModelForSeq2SeqLM) as hf_model:
hf_outputs = (
hf_model.generate_encoder_decoder_greedy_logprobs_limit(
test_case_prompts,
prompts,
max_tokens,
num_logprobs,
**hf_kwargs,
))
# Note: currently encoder/decoder models are only compatible with
# enforce_eager=True. Normally this is not a problem because
# for encoder/decoder models vLLM will
# default to enforce_eager=True if enforce_eager
# is left unspecified. However, the
# VllmRunner test fixture (which wraps around the LLM class) defaults to
# enforce_eager=False (a behavior which a number of already-exisitng
# decoder-only unit tests expect), so when testing an encoder/decoder
# model we must explicitly specify enforce_eager=True in the VllmRunner
# constructor.
with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
vllm_outputs = vllm_model.generate_encoder_decoder_greedy_logprobs(
test_case_prompts, max_tokens, num_logprobs)
hf_skip_tokens = (1 if decoder_prompt_type == DecoderPromptType.NONE
else 0)
......@@ -168,3 +175,49 @@ if not is_cpu():
name_1="vllm",
num_outputs_0_skip_tokens=hf_skip_tokens,
)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["float", "bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.parametrize("decoder_prompt_type", list(DecoderPromptType))
def test_models(hf_runner, vllm_runner, example_encoder_decoder_prompts,
model, dtype, max_tokens, num_logprobs,
decoder_prompt_type) -> None:
run_test(
hf_runner,
vllm_runner,
example_encoder_decoder_prompts[decoder_prompt_type],
decoder_prompt_type,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
tensor_parallel_size=1,
)
@multi_gpu_test(num_gpus=2)
@pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
@pytest.mark.parametrize("model", ["facebook/bart-large-cnn"])
@pytest.mark.parametrize("dtype", ["float"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.parametrize("decoder_prompt_type", [DecoderPromptType.CUSTOM])
def test_models_distributed(hf_runner, vllm_runner,
example_encoder_decoder_prompts,
distributed_executor_backend, model, dtype,
max_tokens, num_logprobs,
decoder_prompt_type) -> None:
run_test(
hf_runner,
vllm_runner,
example_encoder_decoder_prompts[decoder_prompt_type],
decoder_prompt_type,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
tensor_parallel_size=2,
distributed_executor_backend=distributed_executor_backend,
)
[[[1784, 3937, 6122, 1261, 7244, 10575, 18970, 1408, 1261, 32656, 4691, 1046, 2], "The image shows a black dog sitting on a wooden surface.", [{"1784": {"logprob": -0.11687260121107101, "rank": 1, "decoded_token": "The"}, "4380": {"logprob": -2.366872549057007, "rank": 2, "decoded_token": "This"}, "1049": {"logprob": -4.741872787475586, "rank": 3, "decoded_token": "1"}, "117991": {"logprob": -5.991872787475586, "rank": 4, "decoded_token": "Certain"}, "1785": {"logprob": -5.991872787475586, "rank": 5, "decoded_token": "In"}}, {"3937": {"logprob": -0.28887900710105896, "rank": 1, "decoded_token": " image"}, "2158": {"logprob": -1.4138790369033813, "rank": 2, "decoded_token": " first"}, "3977": {"logprob": -5.788878917694092, "rank": 3, "decoded_token": " top"}, "7244": {"logprob": -6.163878917694092, "rank": 4, "decoded_token": " black"}, "8061": {"logprob": -6.788878917694092, "rank": 5, "decoded_token": " images"}}, {"6122": {"logprob": -0.9653709530830383, "rank": 1, "decoded_token": " shows"}, "51948": {"logprob": -1.4653708934783936, "rank": 2, "decoded_token": " depicts"}, "6971": {"logprob": -1.4653708934783936, "rank": 3, "decoded_token": " features"}, "25981": {"logprob": -2.8403708934783936, "rank": 4, "decoded_token": " displays"}, "8688": {"logprob": -2.8403708934783936, "rank": 5, "decoded_token": " contains"}}, {"1261": {"logprob": -0.003059827256947756, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -6.2530598640441895, "rank": 2, "decoded_token": " an"}, "2295": {"logprob": -7.8780598640441895, "rank": 3, "decoded_token": " two"}, "2342": {"logprob": -7.8780598640441895, "rank": 4, "decoded_token": " only"}, "1278": {"logprob": -8.628059387207031, "rank": 5, "decoded_token": " the"}}, {"7244": {"logprob": -0.17616479098796844, "rank": 1, "decoded_token": " black"}, "6231": {"logprob": -2.3011648654937744, "rank": 2, "decoded_token": " close"}, "4249": {"logprob": -3.4261648654937744, "rank": 3, "decoded_token": " single"}, "4329": {"logprob": -5.113664627075195, "rank": 4, "decoded_token": " large"}, "10575": {"logprob": -5.176164627075195, "rank": 5, "decoded_token": " dog"}}, {"10575": {"logprob": -0.10940006375312805, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.4844000339508057, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -4.109400272369385, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -7.296900272369385, "rank": 4, "decoded_token": " Lab"}, "7990": {"logprob": -7.421900272369385, "rank": 5, "decoded_token": " cat"}}, {"18970": {"logprob": -0.8322296738624573, "rank": 1, "decoded_token": " sitting"}, "1454": {"logprob": -1.5822296142578125, "rank": 2, "decoded_token": " with"}, "28528": {"logprob": -1.9572296142578125, "rank": 3, "decoded_token": " lying"}, "7283": {"logprob": -2.2072296142578125, "rank": 4, "decoded_token": " looking"}, "15866": {"logprob": -3.0197296142578125, "rank": 5, "decoded_token": " standing"}}, {"1408": {"logprob": -0.08769982308149338, "rank": 1, "decoded_token": " on"}, "1321": {"logprob": -3.7126998901367188, "rank": 2, "decoded_token": " and"}, "3675": {"logprob": -3.9626998901367188, "rank": 3, "decoded_token": " against"}, "41132": {"logprob": -4.587699890136719, "rank": 4, "decoded_token": " attent"}, "1454": {"logprob": -5.087699890136719, "rank": 5, "decoded_token": " with"}}, {"1261": {"logprob": -0.5400654673576355, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -0.9150654673576355, "rank": 2, "decoded_token": " wooden"}, "3977": {"logprob": -5.415065288543701, "rank": 3, "decoded_token": " top"}, "12603": {"logprob": -5.540065288543701, "rank": 4, "decoded_token": " wood"}, "44130": {"logprob": -6.290065288543701, "rank": 5, "decoded_token": " rust"}}, {"32656": {"logprob": -0.02516966126859188, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -4.400169849395752, "rank": 2, "decoded_token": " rust"}, "12603": {"logprob": -5.275169849395752, "rank": 3, "decoded_token": " wood"}, "3403": {"logprob": -5.525169849395752, "rank": 4, "decoded_token": " text"}, "17253": {"logprob": -6.962669849395752, "rank": 5, "decoded_token": " weather"}}, {"4691": {"logprob": -0.7264319658279419, "rank": 1, "decoded_token": " surface"}, "11237": {"logprob": -0.8514319658279419, "rank": 2, "decoded_token": " floor"}, "7042": {"logprob": -2.6014318466186523, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -5.226431846618652, "rank": 4, "decoded_token": " deck"}, "1615": {"logprob": -5.726431846618652, "rank": 5, "decoded_token": " pl"}}, {"1046": {"logprob": -0.4668232202529907, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -1.9668232202529907, "rank": 2, "decoded_token": ","}, "1321": {"logprob": -2.466823101043701, "rank": 3, "decoded_token": " and"}, "7283": {"logprob": -2.716823101043701, "rank": 4, "decoded_token": " looking"}, "1454": {"logprob": -2.716823101043701, "rank": 5, "decoded_token": " with"}}, {"2": {"logprob": -0.002247072057798505, "rank": 1, "decoded_token": "</s>"}, "1531": {"logprob": -6.627246856689453, "rank": 2, "decoded_token": " The"}, "1032": {"logprob": -7.127246856689453, "rank": 3, "decoded_token": " "}, "3730": {"logprob": -9.877246856689453, "rank": 4, "decoded_token": " There"}, "1256": {"logprob": -11.127246856689453, "rank": 5, "decoded_token": " "}}]], [[1049, 1046, 1349, 7244, 10575, 1454, 2327, 94766, 32961, 53048, 41132, 3923, 1408, 1261, 32656, 4691, 1626, 1050, 1046, 1349, 15375, 24361, 4521, 1454, 122203, 27469, 94973, 2425, 1261, 16152, 1121, 21283, 1046, 2], "1. A black dog with floppy ears sits attentively on a wooden surface.\n2. A vast mountain range with rugged peaks stretches under a cloudy sky.", [{"1049": {"logprob": -0.42824622988700867, "rank": 1, "decoded_token": "1"}, "1045": {"logprob": -1.553246259689331, "rank": 2, "decoded_token": "-"}, "1065": {"logprob": -2.428246259689331, "rank": 3, "decoded_token": "A"}, "1784": {"logprob": -4.053246021270752, "rank": 4, "decoded_token": "The"}, "69957": {"logprob": -4.428246021270752, "rank": 5, "decoded_token": "Sure"}}, {"1046": {"logprob": -1.9788545614574105e-05, "rank": 1, "decoded_token": "."}, "1058": {"logprob": -11.750020027160645, "rank": 2, "decoded_token": ":"}, "3590": {"logprob": -12.125020027160645, "rank": 3, "decoded_token": ".A"}, "1065": {"logprob": -13.062520027160645, "rank": 4, "decoded_token": "A"}, "1041": {"logprob": -13.750020027160645, "rank": 5, "decoded_token": ")"}}, {"1349": {"logprob": -0.14020134508609772, "rank": 1, "decoded_token": " A"}, "1429": {"logprob": -2.3902013301849365, "rank": 2, "decoded_token": " \""}, "1603": {"logprob": -3.7652013301849365, "rank": 3, "decoded_token": " **"}, "11967": {"logprob": -4.890201568603516, "rank": 4, "decoded_token": " Image"}, "1531": {"logprob": -5.015201568603516, "rank": 5, "decoded_token": " The"}}, {"7244": {"logprob": -0.2003599852323532, "rank": 1, "decoded_token": " black"}, "38462": {"logprob": -3.075360059738159, "rank": 2, "decoded_token": " curious"}, "68076": {"logprob": -3.575360059738159, "rank": 3, "decoded_token": " cute"}, "4329": {"logprob": -3.887860059738159, "rank": 4, "decoded_token": " large"}, "6231": {"logprob": -4.32535982131958, "rank": 5, "decoded_token": " close"}}, {"10575": {"logprob": -0.18818901479244232, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.0631890296936035, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.1881890296936035, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -6.9381890296936035, "rank": 4, "decoded_token": " Lab"}, "8636": {"logprob": -7.3131890296936035, "rank": 5, "decoded_token": " lab"}}, {"1454": {"logprob": -0.5699259042739868, "rank": 1, "decoded_token": " with"}, "53048": {"logprob": -1.2574259042739868, "rank": 2, "decoded_token": " sits"}, "1395": {"logprob": -3.0699257850646973, "rank": 3, "decoded_token": " is"}, "22524": {"logprob": -3.6324257850646973, "rank": 4, "decoded_token": " lies"}, "18970": {"logprob": -3.7574257850646973, "rank": 5, "decoded_token": " sitting"}}, {"2327": {"logprob": -1.2377738952636719, "rank": 1, "decoded_token": " fl"}, "1261": {"logprob": -1.3627738952636719, "rank": 2, "decoded_token": " a"}, "17300": {"logprob": -1.9252738952636719, "rank": 3, "decoded_token": " soul"}, "100089": {"logprob": -2.675273895263672, "rank": 4, "decoded_token": " expressive"}, "6444": {"logprob": -3.237773895263672, "rank": 5, "decoded_token": " soft"}}, {"94766": {"logprob": -0.0025601964443922043, "rank": 1, "decoded_token": "oppy"}, "124603": {"logprob": -6.315060138702393, "rank": 2, "decoded_token": "uffy"}, "1484": {"logprob": -7.877560138702393, "rank": 3, "decoded_token": "op"}, "24897": {"logprob": -8.81506061553955, "rank": 4, "decoded_token": "appy"}, "102477": {"logprob": -9.69006061553955, "rank": 5, "decoded_token": "opping"}}, {"32961": {"logprob": -5.113947918289341e-05, "rank": 1, "decoded_token": " ears"}, "16962": {"logprob": -11.250051498413086, "rank": 2, "decoded_token": " ear"}, "5731": {"logprob": -11.812551498413086, "rank": 3, "decoded_token": " eyes"}, "3351": {"logprob": -12.000051498413086, "rank": 4, "decoded_token": " years"}, "42071": {"logprob": -13.062551498413086, "rank": 5, "decoded_token": " cheeks"}}, {"53048": {"logprob": -0.6179640889167786, "rank": 1, "decoded_token": " sits"}, "10637": {"logprob": -1.9929640293121338, "rank": 2, "decoded_token": " looks"}, "1321": {"logprob": -2.430464029312134, "rank": 3, "decoded_token": " and"}, "1395": {"logprob": -2.617964029312134, "rank": 4, "decoded_token": " is"}, "18970": {"logprob": -3.055464029312134, "rank": 5, "decoded_token": " sitting"}}, {"41132": {"logprob": -0.3746516704559326, "rank": 1, "decoded_token": " attent"}, "1408": {"logprob": -2.3121516704559326, "rank": 2, "decoded_token": " on"}, "106534": {"logprob": -2.3746516704559326, "rank": 3, "decoded_token": " calmly"}, "12276": {"logprob": -2.6246516704559326, "rank": 4, "decoded_token": " alert"}, "6482": {"logprob": -5.124651908874512, "rank": 5, "decoded_token": " patient"}}, {"3923": {"logprob": -8.463501580990851e-05, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -9.50008487701416, "rank": 2, "decoded_token": "ive"}, "6655": {"logprob": -11.87508487701416, "rank": 3, "decoded_token": "atively"}, "3929": {"logprob": -14.00008487701416, "rank": 4, "decoded_token": "ently"}, "47885": {"logprob": -14.62508487701416, "rank": 5, "decoded_token": "edly"}}, {"1408": {"logprob": -0.06439964473247528, "rank": 1, "decoded_token": " on"}, "3675": {"logprob": -3.0643997192382812, "rank": 2, "decoded_token": " against"}, "1294": {"logprob": -4.939399719238281, "rank": 3, "decoded_token": " in"}, "7283": {"logprob": -5.689399719238281, "rank": 4, "decoded_token": " looking"}, "1044": {"logprob": -5.814399719238281, "rank": 5, "decoded_token": ","}}, {"1261": {"logprob": -0.2108541578054428, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -1.710854172706604, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -5.5858540534973145, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -6.0858540534973145, "rank": 4, "decoded_token": " rust"}, "12603": {"logprob": -6.9608540534973145, "rank": 5, "decoded_token": " wood"}}, {"32656": {"logprob": -0.08556432276964188, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -2.710564374923706, "rank": 2, "decoded_token": " rust"}, "17253": {"logprob": -4.710564136505127, "rank": 3, "decoded_token": " weather"}, "12603": {"logprob": -5.960564136505127, "rank": 4, "decoded_token": " wood"}, "3403": {"logprob": -5.960564136505127, "rank": 5, "decoded_token": " text"}}, {"4691": {"logprob": -0.7751782536506653, "rank": 1, "decoded_token": " surface"}, "11237": {"logprob": -0.7751782536506653, "rank": 2, "decoded_token": " floor"}, "7042": {"logprob": -2.9001781940460205, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -4.1501784324646, "rank": 4, "decoded_token": " deck"}, "92504": {"logprob": -6.1501784324646, "rank": 5, "decoded_token": " backdrop"}}, {"1626": {"logprob": -0.12918435037136078, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -2.3791842460632324, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -4.129184246063232, "rank": 3, "decoded_token": "."}, "1338": {"logprob": -5.129184246063232, "rank": 4, "decoded_token": ".\n\n"}, "7283": {"logprob": -5.629184246063232, "rank": 5, "decoded_token": " looking"}}, {"1050": {"logprob": -0.00017474555352237076, "rank": 1, "decoded_token": "2"}, "1256": {"logprob": -9.000174522399902, "rank": 2, "decoded_token": " "}, "1032": {"logprob": -10.875174522399902, "rank": 3, "decoded_token": " "}, "1293": {"logprob": -11.625174522399902, "rank": 4, "decoded_token": " "}, "1051": {"logprob": -12.125174522399902, "rank": 5, "decoded_token": "3"}}, {"1046": {"logprob": -7.629365427419543e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -12.875007629394531, "rank": 2, "decoded_token": ".A"}, "1626": {"logprob": -13.062507629394531, "rank": 3, "decoded_token": ".\n"}, "1338": {"logprob": -14.562507629394531, "rank": 4, "decoded_token": ".\n\n"}, "1058": {"logprob": -14.812507629394531, "rank": 5, "decoded_token": ":"}}, {"1349": {"logprob": -0.558266282081604, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.495766282081604, "rank": 2, "decoded_token": " Maj"}, "37159": {"logprob": -2.2457661628723145, "rank": 3, "decoded_token": " Snow"}, "113465": {"logprob": -3.9957661628723145, "rank": 4, "decoded_token": " Rug"}, "1531": {"logprob": -3.9957661628723145, "rank": 5, "decoded_token": " The"}}, {"15375": {"logprob": -0.6446555852890015, "rank": 1, "decoded_token": " vast"}, "37849": {"logprob": -2.019655704498291, "rank": 2, "decoded_token": " breat"}, "61082": {"logprob": -2.394655704498291, "rank": 3, "decoded_token": " panor"}, "10726": {"logprob": -3.082155704498291, "rank": 4, "decoded_token": " scen"}, "2169": {"logprob": -3.207155704498291, "rank": 5, "decoded_token": " ser"}}, {"24361": {"logprob": -0.7034653425216675, "rank": 1, "decoded_token": " mountain"}, "127945": {"logprob": -1.9534653425216675, "rank": 2, "decoded_token": " mountainous"}, "1044": {"logprob": -2.078465461730957, "rank": 3, "decoded_token": ","}, "4521": {"logprob": -2.328465461730957, "rank": 4, "decoded_token": " range"}, "28035": {"logprob": -2.453465461730957, "rank": 5, "decoded_token": " landscape"}}, {"4521": {"logprob": -0.07058106362819672, "rank": 1, "decoded_token": " range"}, "28035": {"logprob": -2.6955809593200684, "rank": 2, "decoded_token": " landscape"}, "37691": {"logprob": -8.320581436157227, "rank": 3, "decoded_token": " valley"}, "12248": {"logprob": -9.445581436157227, "rank": 4, "decoded_token": " peak"}, "13327": {"logprob": -9.695581436157227, "rank": 5, "decoded_token": " scene"}}, {"1454": {"logprob": -1.1448894739151, "rank": 1, "decoded_token": " with"}, "94973": {"logprob": -1.1448894739151, "rank": 2, "decoded_token": " stretches"}, "2425": {"logprob": -1.8948894739151, "rank": 3, "decoded_token": " under"}, "1395": {"logprob": -2.5198893547058105, "rank": 4, "decoded_token": " is"}, "13875": {"logprob": -3.0198893547058105, "rank": 5, "decoded_token": " covered"}}, {"122203": {"logprob": -1.0288245677947998, "rank": 1, "decoded_token": " rugged"}, "58127": {"logprob": -1.6538245677947998, "rank": 2, "decoded_token": " jag"}, "27469": {"logprob": -2.1538245677948, "rank": 3, "decoded_token": " peaks"}, "23745": {"logprob": -2.6538245677948, "rank": 4, "decoded_token": " snow"}, "95746": {"logprob": -2.8413245677948, "rank": 5, "decoded_token": " rocky"}}, {"27469": {"logprob": -0.20564845204353333, "rank": 1, "decoded_token": " peaks"}, "24765": {"logprob": -2.580648422241211, "rank": 2, "decoded_token": " terrain"}, "130655": {"logprob": -2.955648422241211, "rank": 3, "decoded_token": ""}, "1044": {"logprob": -3.580648422241211, "rank": 4, "decoded_token": ","}, "61263": {"logprob": -4.455648422241211, "rank": 5, "decoded_token": " slopes"}}, {"94973": {"logprob": -1.0839273929595947, "rank": 1, "decoded_token": " stretches"}, "1321": {"logprob": -1.1464273929595947, "rank": 2, "decoded_token": " and"}, "2425": {"logprob": -1.7714273929595947, "rank": 3, "decoded_token": " under"}, "13875": {"logprob": -3.0839273929595947, "rank": 4, "decoded_token": " covered"}, "1395": {"logprob": -3.2714273929595947, "rank": 5, "decoded_token": " is"}}, {"2425": {"logprob": -0.9016233682632446, "rank": 1, "decoded_token": " under"}, "5669": {"logprob": -1.0266233682632446, "rank": 2, "decoded_token": " across"}, "1848": {"logprob": -1.9016233682632446, "rank": 3, "decoded_token": " out"}, "2203": {"logprob": -3.151623249053955, "rank": 4, "decoded_token": " into"}, "8994": {"logprob": -4.026623249053955, "rank": 5, "decoded_token": " towards"}}, {"1261": {"logprob": -0.00555459875613451, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -5.380554676055908, "rank": 2, "decoded_token": " an"}, "1278": {"logprob": -7.630554676055908, "rank": 3, "decoded_token": " the"}, "2136": {"logprob": -9.31805419921875, "rank": 4, "decoded_token": " over"}, "16152": {"logprob": -9.38055419921875, "rank": 5, "decoded_token": " cloud"}}, {"16152": {"logprob": -0.6862213015556335, "rank": 1, "decoded_token": " cloud"}, "6133": {"logprob": -1.4362213611602783, "rank": 2, "decoded_token": " clear"}, "18416": {"logprob": -2.6862213611602783, "rank": 3, "decoded_token": " haz"}, "27254": {"logprob": -3.0612213611602783, "rank": 4, "decoded_token": " partly"}, "4391": {"logprob": -3.1862213611602783, "rank": 5, "decoded_token": " light"}}, {"1121": {"logprob": -0.10446903109550476, "rank": 1, "decoded_token": "y"}, "4527": {"logprob": -2.854469060897827, "rank": 2, "decoded_token": "less"}, "1286": {"logprob": -3.479469060897827, "rank": 3, "decoded_token": "ed"}, "114525": {"logprob": -5.479468822479248, "rank": 4, "decoded_token": "-covered"}, "77187": {"logprob": -5.479468822479248, "rank": 5, "decoded_token": "-filled"}}, {"21283": {"logprob": -0.003459066851064563, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -6.3784589767456055, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -6.8784589767456055, "rank": 3, "decoded_token": ","}, "26549": {"logprob": -7.8784589767456055, "rank": 4, "decoded_token": " gray"}, "34052": {"logprob": -8.503458976745605, "rank": 5, "decoded_token": " grey"}}, {"1046": {"logprob": -0.01103890035301447, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -4.636038780212402, "rank": 2, "decoded_token": ","}, "1338": {"logprob": -7.261038780212402, "rank": 3, "decoded_token": ".\n\n"}, "1294": {"logprob": -8.136038780212402, "rank": 4, "decoded_token": " in"}, "1454": {"logprob": -8.761038780212402, "rank": 5, "decoded_token": " with"}}, {"2": {"logprob": -9.059865078597795e-06, "rank": 1, "decoded_token": "</s>"}, "1032": {"logprob": -11.625008583068848, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -16.125009536743164, "rank": 3, "decoded_token": " "}, "1319": {"logprob": -17.375009536743164, "rank": 4, "decoded_token": " ("}, "1766": {"logprob": -18.750009536743164, "rank": 5, "decoded_token": " ["}}]], [[1049, 1046, 1349, 7244, 10575, 53048, 41132, 3923, 1408, 1261, 32656, 11237, 1626, 1050, 1046, 1349, 15375, 24361, 4521, 94973, 5669, 1278, 48932, 2425, 1261, 16152, 1121, 21283, 1626, 1051, 1046, 8342, 71284, 7377, 1394, 22140, 1294, 1278, 27208, 1513, 97558, 1626, 1052, 1046, 1349, 53301, 59396, 3549, 13335, 2645, 1261, 1295, 3506, 11223, 12097, 1046, 2], "1. A black dog sits attentively on a wooden floor.\n2. A vast mountain range stretches across the horizon under a cloudy sky.\n3. Surfers wait for waves in the ocean at sunset.\n4. A winding gravel path leads through a lush green park.", [{"1049": {"logprob": -0.05001257359981537, "rank": 1, "decoded_token": "1"}, "1045": {"logprob": -3.1750125885009766, "rank": 2, "decoded_token": "-"}, "69957": {"logprob": -5.925012588500977, "rank": 3, "decoded_token": "Sure"}, "11745": {"logprob": -6.425012588500977, "rank": 4, "decoded_token": "Here"}, "1065": {"logprob": -6.425012588500977, "rank": 5, "decoded_token": "A"}}, {"1046": {"logprob": -9.536697689327411e-06, "rank": 1, "decoded_token": "."}, "1058": {"logprob": -11.875009536743164, "rank": 2, "decoded_token": ":"}, "3590": {"logprob": -13.375009536743164, "rank": 3, "decoded_token": ".A"}, "1041": {"logprob": -14.750009536743164, "rank": 4, "decoded_token": ")"}, "1065": {"logprob": -15.687509536743164, "rank": 5, "decoded_token": "A"}}, {"1349": {"logprob": -0.12580634653568268, "rank": 1, "decoded_token": " A"}, "1429": {"logprob": -2.3758063316345215, "rank": 2, "decoded_token": " \""}, "1531": {"logprob": -4.6258063316345215, "rank": 3, "decoded_token": " The"}, "11967": {"logprob": -4.6258063316345215, "rank": 4, "decoded_token": " Image"}, "1603": {"logprob": -5.6258063316345215, "rank": 5, "decoded_token": " **"}}, {"7244": {"logprob": -0.15412142872810364, "rank": 1, "decoded_token": " black"}, "68076": {"logprob": -3.3416213989257812, "rank": 2, "decoded_token": " cute"}, "6231": {"logprob": -3.9666213989257812, "rank": 3, "decoded_token": " close"}, "38462": {"logprob": -4.216621398925781, "rank": 4, "decoded_token": " curious"}, "4329": {"logprob": -4.404121398925781, "rank": 5, "decoded_token": " large"}}, {"10575": {"logprob": -0.12086891382932663, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.3708689212799072, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.9958689212799072, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -7.683368682861328, "rank": 4, "decoded_token": " Lab"}, "8636": {"logprob": -7.808368682861328, "rank": 5, "decoded_token": " lab"}}, {"53048": {"logprob": -0.8729249238967896, "rank": 1, "decoded_token": " sits"}, "1454": {"logprob": -1.1229249238967896, "rank": 2, "decoded_token": " with"}, "1395": {"logprob": -2.4354248046875, "rank": 3, "decoded_token": " is"}, "18970": {"logprob": -2.6854248046875, "rank": 4, "decoded_token": " sitting"}, "22524": {"logprob": -3.6854248046875, "rank": 5, "decoded_token": " lies"}}, {"41132": {"logprob": -0.5888903737068176, "rank": 1, "decoded_token": " attent"}, "106534": {"logprob": -1.2763903141021729, "rank": 2, "decoded_token": " calmly"}, "12276": {"logprob": -2.838890314102173, "rank": 3, "decoded_token": " alert"}, "1408": {"logprob": -2.901390314102173, "rank": 4, "decoded_token": " on"}, "6482": {"logprob": -5.026390552520752, "rank": 5, "decoded_token": " patient"}}, {"3923": {"logprob": -9.16677454370074e-05, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -9.625091552734375, "rank": 2, "decoded_token": "ive"}, "6655": {"logprob": -10.875091552734375, "rank": 3, "decoded_token": "atively"}, "3929": {"logprob": -13.125091552734375, "rank": 4, "decoded_token": "ently"}, "47885": {"logprob": -13.750091552734375, "rank": 5, "decoded_token": "edly"}}, {"1408": {"logprob": -0.052677519619464874, "rank": 1, "decoded_token": " on"}, "3675": {"logprob": -3.802677631378174, "rank": 2, "decoded_token": " against"}, "1454": {"logprob": -4.302677631378174, "rank": 3, "decoded_token": " with"}, "1294": {"logprob": -5.177677631378174, "rank": 4, "decoded_token": " in"}, "7283": {"logprob": -5.427677631378174, "rank": 5, "decoded_token": " looking"}}, {"1261": {"logprob": -0.36706605553627014, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -1.2420660257339478, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -4.617065906524658, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -5.742065906524658, "rank": 4, "decoded_token": " rust"}, "12603": {"logprob": -6.617065906524658, "rank": 5, "decoded_token": " wood"}}, {"32656": {"logprob": -0.07824385166168213, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -2.8282437324523926, "rank": 2, "decoded_token": " rust"}, "17253": {"logprob": -4.703243732452393, "rank": 3, "decoded_token": " weather"}, "12603": {"logprob": -5.828243732452393, "rank": 4, "decoded_token": " wood"}, "3403": {"logprob": -5.953243732452393, "rank": 5, "decoded_token": " text"}}, {"11237": {"logprob": -0.5853750705718994, "rank": 1, "decoded_token": " floor"}, "4691": {"logprob": -1.0853750705718994, "rank": 2, "decoded_token": " surface"}, "7042": {"logprob": -2.7103750705718994, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -3.5853750705718994, "rank": 4, "decoded_token": " deck"}, "92504": {"logprob": -6.08537483215332, "rank": 5, "decoded_token": " backdrop"}}, {"1626": {"logprob": -0.7340722680091858, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -0.8590722680091858, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -3.359072208404541, "rank": 3, "decoded_token": " with"}, "7283": {"logprob": -3.609072208404541, "rank": 4, "decoded_token": " looking"}, "1321": {"logprob": -4.109072208404541, "rank": 5, "decoded_token": " and"}}, {"1050": {"logprob": -1.1324817933200393e-05, "rank": 1, "decoded_token": "2"}, "1051": {"logprob": -11.625011444091797, "rank": 2, "decoded_token": "3"}, "1256": {"logprob": -14.000011444091797, "rank": 3, "decoded_token": " "}, "1049": {"logprob": -14.625011444091797, "rank": 4, "decoded_token": "1"}, "1032": {"logprob": -14.625011444091797, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -2.50339189733495e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.56250286102295, "rank": 2, "decoded_token": ".A"}, "1626": {"logprob": -15.43750286102295, "rank": 3, "decoded_token": ".\n"}, "4700": {"logprob": -15.50000286102295, "rank": 4, "decoded_token": ".M"}, "3051": {"logprob": -16.000001907348633, "rank": 5, "decoded_token": ".S"}}, {"1349": {"logprob": -0.6769706010818481, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.9269706010818481, "rank": 2, "decoded_token": " Maj"}, "37159": {"logprob": -2.1144704818725586, "rank": 3, "decoded_token": " Snow"}, "27260": {"logprob": -2.6144704818725586, "rank": 4, "decoded_token": " Mountain"}, "113465": {"logprob": -2.8644704818725586, "rank": 5, "decoded_token": " Rug"}}, {"15375": {"logprob": -0.9251430034637451, "rank": 1, "decoded_token": " vast"}, "10726": {"logprob": -2.300143003463745, "rank": 2, "decoded_token": " scen"}, "4521": {"logprob": -2.362643003463745, "rank": 3, "decoded_token": " range"}, "122203": {"logprob": -2.425143003463745, "rank": 4, "decoded_token": " rugged"}, "61082": {"logprob": -2.800143003463745, "rank": 5, "decoded_token": " panor"}}, {"24361": {"logprob": -0.5277582406997681, "rank": 1, "decoded_token": " mountain"}, "127945": {"logprob": -1.902758240699768, "rank": 2, "decoded_token": " mountainous"}, "28035": {"logprob": -2.5277581214904785, "rank": 3, "decoded_token": " landscape"}, "4521": {"logprob": -2.5277581214904785, "rank": 4, "decoded_token": " range"}, "1044": {"logprob": -2.7777581214904785, "rank": 5, "decoded_token": ","}}, {"4521": {"logprob": -0.055658817291259766, "rank": 1, "decoded_token": " range"}, "28035": {"logprob": -2.9306588172912598, "rank": 2, "decoded_token": " landscape"}, "37691": {"logprob": -8.430658340454102, "rank": 3, "decoded_token": " valley"}, "13327": {"logprob": -9.055658340454102, "rank": 4, "decoded_token": " scene"}, "3719": {"logprob": -9.805658340454102, "rank": 5, "decoded_token": " view"}}, {"94973": {"logprob": -0.6880245208740234, "rank": 1, "decoded_token": " stretches"}, "2425": {"logprob": -1.7505245208740234, "rank": 2, "decoded_token": " under"}, "1395": {"logprob": -2.3130245208740234, "rank": 3, "decoded_token": " is"}, "1454": {"logprob": -2.6880245208740234, "rank": 4, "decoded_token": " with"}, "7038": {"logprob": -3.2505245208740234, "rank": 5, "decoded_token": " extends"}}, {"5669": {"logprob": -0.4545598328113556, "rank": 1, "decoded_token": " across"}, "2425": {"logprob": -1.4545598030090332, "rank": 2, "decoded_token": " under"}, "1848": {"logprob": -2.454559803009033, "rank": 3, "decoded_token": " out"}, "2203": {"logprob": -4.204559803009033, "rank": 4, "decoded_token": " into"}, "25136": {"logprob": -4.642059803009033, "rank": 5, "decoded_token": " beneath"}}, {"1278": {"logprob": -0.23015151917934418, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -1.6051515340805054, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -5.605151653289795, "rank": 3, "decoded_token": " an"}, "2425": {"logprob": -7.167651653289795, "rank": 4, "decoded_token": " under"}, "1454": {"logprob": -10.167651176452637, "rank": 5, "decoded_token": " with"}}, {"48932": {"logprob": -0.2797861397266388, "rank": 1, "decoded_token": " horizon"}, "21283": {"logprob": -2.0297861099243164, "rank": 2, "decoded_token": " sky"}, "3937": {"logprob": -3.2797861099243164, "rank": 3, "decoded_token": " image"}, "28035": {"logprob": -3.6547861099243164, "rank": 4, "decoded_token": " landscape"}, "3044": {"logprob": -3.7797861099243164, "rank": 5, "decoded_token": " sk"}}, {"2425": {"logprob": -0.28862035274505615, "rank": 1, "decoded_token": " under"}, "1044": {"logprob": -2.4136204719543457, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -2.5386204719543457, "rank": 3, "decoded_token": " with"}, "1626": {"logprob": -3.7886204719543457, "rank": 4, "decoded_token": ".\n"}, "1408": {"logprob": -3.9136204719543457, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -0.04524127021431923, "rank": 1, "decoded_token": " a"}, "16152": {"logprob": -4.045241355895996, "rank": 2, "decoded_token": " cloud"}, "1420": {"logprob": -4.045241355895996, "rank": 3, "decoded_token": " an"}, "2136": {"logprob": -6.107741355895996, "rank": 4, "decoded_token": " over"}, "6133": {"logprob": -6.357741355895996, "rank": 5, "decoded_token": " clear"}}, {"16152": {"logprob": -0.19613930583000183, "rank": 1, "decoded_token": " cloud"}, "6133": {"logprob": -2.883639335632324, "rank": 2, "decoded_token": " clear"}, "27254": {"logprob": -3.508639335632324, "rank": 3, "decoded_token": " partly"}, "18416": {"logprob": -3.883639335632324, "rank": 4, "decoded_token": " haz"}, "4391": {"logprob": -4.321139335632324, "rank": 5, "decoded_token": " light"}}, {"1121": {"logprob": -0.05146069824695587, "rank": 1, "decoded_token": "y"}, "1286": {"logprob": -3.8014607429504395, "rank": 2, "decoded_token": "ed"}, "77187": {"logprob": -4.5514607429504395, "rank": 3, "decoded_token": "-filled"}, "114525": {"logprob": -4.9264607429504395, "rank": 4, "decoded_token": "-covered"}, "4527": {"logprob": -4.9264607429504395, "rank": 5, "decoded_token": "less"}}, {"21283": {"logprob": -0.00033122775494121015, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -8.875330924987793, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -9.500330924987793, "rank": 3, "decoded_token": ","}, "26549": {"logprob": -10.500330924987793, "rank": 4, "decoded_token": " gray"}, "34052": {"logprob": -11.375330924987793, "rank": 5, "decoded_token": " grey"}}, {"1626": {"logprob": -0.00012683063687290996, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -9.500126838684082, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -10.500126838684082, "rank": 3, "decoded_token": "."}, "1454": {"logprob": -10.875126838684082, "rank": 4, "decoded_token": " with"}, "1294": {"logprob": -13.375126838684082, "rank": 5, "decoded_token": " in"}}, {"1051": {"logprob": -3.2186455882765586e-06, "rank": 1, "decoded_token": "3"}, "1052": {"logprob": -12.75000286102295, "rank": 2, "decoded_token": "4"}, "1050": {"logprob": -15.00000286102295, "rank": 3, "decoded_token": "2"}, "1049": {"logprob": -17.000003814697266, "rank": 4, "decoded_token": "1"}, "1032": {"logprob": -17.937503814697266, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -1.9073468138230965e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -14.625001907348633, "rank": 2, "decoded_token": ".A"}, "5226": {"logprob": -15.625001907348633, "rank": 3, "decoded_token": ".D"}, "6847": {"logprob": -15.750001907348633, "rank": 4, "decoded_token": ".T"}, "4700": {"logprob": -16.750001907348633, "rank": 5, "decoded_token": ".M"}}, {"8342": {"logprob": -0.5928499102592468, "rank": 1, "decoded_token": " Sur"}, "1349": {"logprob": -1.6553499698638916, "rank": 2, "decoded_token": " A"}, "22468": {"logprob": -2.5303499698638916, "rank": 3, "decoded_token": " Several"}, "1488": {"logprob": -2.7178499698638916, "rank": 4, "decoded_token": " W"}, "15035": {"logprob": -3.2178499698638916, "rank": 5, "decoded_token": " People"}}, {"71284": {"logprob": -0.003268140833824873, "rank": 1, "decoded_token": "fers"}, "1102": {"logprob": -5.878268241882324, "rank": 2, "decoded_token": "f"}, "1726": {"logprob": -7.753268241882324, "rank": 3, "decoded_token": "fer"}, "61888": {"logprob": -12.315768241882324, "rank": 4, "decoded_token": "fline"}, "2119": {"logprob": -13.065768241882324, "rank": 5, "decoded_token": "fter"}}, {"7377": {"logprob": -1.4883846044540405, "rank": 1, "decoded_token": " wait"}, "1584": {"logprob": -1.7383846044540405, "rank": 2, "decoded_token": " are"}, "88014": {"logprob": -1.9258846044540405, "rank": 3, "decoded_token": " paddle"}, "1294": {"logprob": -1.9258846044540405, "rank": 4, "decoded_token": " in"}, "24434": {"logprob": -2.23838472366333, "rank": 5, "decoded_token": " ride"}}, {"1394": {"logprob": -0.6120346188545227, "rank": 1, "decoded_token": " for"}, "1294": {"logprob": -0.9870346188545227, "rank": 2, "decoded_token": " in"}, "1408": {"logprob": -2.737034559249878, "rank": 3, "decoded_token": " on"}, "6482": {"logprob": -4.487034797668457, "rank": 4, "decoded_token": " patient"}, "1321": {"logprob": -5.612034797668457, "rank": 5, "decoded_token": " and"}}, {"22140": {"logprob": -0.008224429562687874, "rank": 1, "decoded_token": " waves"}, "1278": {"logprob": -5.5082244873046875, "rank": 2, "decoded_token": " the"}, "1261": {"logprob": -5.6332244873046875, "rank": 3, "decoded_token": " a"}, "39460": {"logprob": -8.133224487304688, "rank": 4, "decoded_token": " incoming"}, "1321": {"logprob": -9.758224487304688, "rank": 5, "decoded_token": " and"}}, {"1294": {"logprob": -0.3204176723957062, "rank": 1, "decoded_token": " in"}, "1408": {"logprob": -2.195417642593384, "rank": 2, "decoded_token": " on"}, "1513": {"logprob": -2.320417642593384, "rank": 3, "decoded_token": " at"}, "3016": {"logprob": -3.695417642593384, "rank": 4, "decoded_token": " while"}, "1435": {"logprob": -3.820417642593384, "rank": 5, "decoded_token": " as"}}, {"1278": {"logprob": -0.004615250043570995, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -6.192115306854248, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -6.942115306854248, "rank": 3, "decoded_token": " an"}, "40466": {"logprob": -7.317115306854248, "rank": 4, "decoded_token": " shallow"}, "26517": {"logprob": -7.879615306854248, "rank": 5, "decoded_token": " calm"}}, {"27208": {"logprob": -0.06491076946258545, "rank": 1, "decoded_token": " ocean"}, "7786": {"logprob": -3.439910888671875, "rank": 2, "decoded_token": " distance"}, "5124": {"logprob": -5.314910888671875, "rank": 3, "decoded_token": " early"}, "26517": {"logprob": -5.377410888671875, "rank": 4, "decoded_token": " calm"}, "11196": {"logprob": -5.377410888671875, "rank": 5, "decoded_token": " sea"}}, {"1513": {"logprob": -1.144903540611267, "rank": 1, "decoded_token": " at"}, "1435": {"logprob": -1.269903540611267, "rank": 2, "decoded_token": " as"}, "3184": {"logprob": -1.394903540611267, "rank": 3, "decoded_token": " during"}, "3016": {"logprob": -3.0199036598205566, "rank": 4, "decoded_token": " while"}, "6117": {"logprob": -3.1449036598205566, "rank": 5, "decoded_token": " near"}}, {"97558": {"logprob": -0.12556149065494537, "rank": 1, "decoded_token": " sunset"}, "11729": {"logprob": -2.875561475753784, "rank": 2, "decoded_token": " sun"}, "1266": {"logprob": -3.375561475753784, "rank": 3, "decoded_token": " d"}, "54507": {"logprob": -4.000561714172363, "rank": 4, "decoded_token": " dawn"}, "1261": {"logprob": -5.125561714172363, "rank": 5, "decoded_token": " a"}}, {"1626": {"logprob": -0.26737067103385925, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -2.2673707008361816, "rank": 2, "decoded_token": ","}, "3016": {"logprob": -2.7673707008361816, "rank": 3, "decoded_token": " while"}, "1454": {"logprob": -3.5173707008361816, "rank": 4, "decoded_token": " with"}, "6117": {"logprob": -4.142370700836182, "rank": 5, "decoded_token": " near"}}, {"1052": {"logprob": -2.9802276912960224e-06, "rank": 1, "decoded_token": "4"}, "1051": {"logprob": -13.37500286102295, "rank": 2, "decoded_token": "3"}, "1049": {"logprob": -14.00000286102295, "rank": 3, "decoded_token": "1"}, "1053": {"logprob": -14.56250286102295, "rank": 4, "decoded_token": "5"}, "1032": {"logprob": -16.750003814697266, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -1.6689286894688848e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.500001907348633, "rank": 2, "decoded_token": ".A"}, "6847": {"logprob": -16.562501907348633, "rank": 3, "decoded_token": ".T"}, "1044": {"logprob": -17.312501907348633, "rank": 4, "decoded_token": ","}, "1349": {"logprob": -17.500001907348633, "rank": 5, "decoded_token": " A"}}, {"1349": {"logprob": -0.004883386194705963, "rank": 1, "decoded_token": " A"}, "2048": {"logprob": -5.504883289337158, "rank": 2, "decoded_token": " An"}, "10638": {"logprob": -7.754883289337158, "rank": 3, "decoded_token": " Two"}, "111463": {"logprob": -9.754883766174316, "rank": 4, "decoded_token": " Trees"}, "1531": {"logprob": -10.692383766174316, "rank": 5, "decoded_token": " The"}}, {"53301": {"logprob": -1.5612412691116333, "rank": 1, "decoded_token": " winding"}, "15192": {"logprob": -1.7487412691116333, "rank": 2, "decoded_token": " narrow"}, "47945": {"logprob": -2.1237411499023438, "rank": 3, "decoded_token": " dirt"}, "2169": {"logprob": -2.5612411499023438, "rank": 4, "decoded_token": " ser"}, "59396": {"logprob": -2.6862411499023438, "rank": 5, "decoded_token": " gravel"}}, {"59396": {"logprob": -0.9024254083633423, "rank": 1, "decoded_token": " gravel"}, "3549": {"logprob": -1.1524254083633423, "rank": 2, "decoded_token": " path"}, "47945": {"logprob": -1.6524254083633423, "rank": 3, "decoded_token": " dirt"}, "14801": {"logprob": -3.1524252891540527, "rank": 4, "decoded_token": " pathway"}, "15551": {"logprob": -4.277425289154053, "rank": 5, "decoded_token": " stone"}}, {"3549": {"logprob": -0.021290099248290062, "rank": 1, "decoded_token": " path"}, "14801": {"logprob": -3.8962900638580322, "rank": 2, "decoded_token": " pathway"}, "33659": {"logprob": -7.896290302276611, "rank": 3, "decoded_token": " trail"}, "9480": {"logprob": -9.521289825439453, "rank": 4, "decoded_token": " road"}, "7368": {"logprob": -9.646289825439453, "rank": 5, "decoded_token": "path"}}, {"13335": {"logprob": -0.16593234241008759, "rank": 1, "decoded_token": " leads"}, "39985": {"logprob": -2.8534324169158936, "rank": 2, "decoded_token": " cuts"}, "1639": {"logprob": -3.9784324169158936, "rank": 3, "decoded_token": " me"}, "11500": {"logprob": -4.1034321784973145, "rank": 4, "decoded_token": " runs"}, "2645": {"logprob": -4.2909321784973145, "rank": 5, "decoded_token": " through"}}, {"2645": {"logprob": -0.05767015367746353, "rank": 1, "decoded_token": " through"}, "8994": {"logprob": -4.0576701164245605, "rank": 2, "decoded_token": " towards"}, "2396": {"logprob": -4.1826701164245605, "rank": 3, "decoded_token": " between"}, "2203": {"logprob": -4.5576701164245605, "rank": 4, "decoded_token": " into"}, "1317": {"logprob": -5.5576701164245605, "rank": 5, "decoded_token": " to"}}, {"1261": {"logprob": -0.017209367826581, "rank": 1, "decoded_token": " a"}, "11223": {"logprob": -4.892209529876709, "rank": 2, "decoded_token": " green"}, "1295": {"logprob": -5.017209529876709, "rank": 3, "decoded_token": " l"}, "23170": {"logprob": -6.767209529876709, "rank": 4, "decoded_token": " grass"}, "1420": {"logprob": -7.267209529876709, "rank": 5, "decoded_token": " an"}}, {"1295": {"logprob": -0.9430665969848633, "rank": 1, "decoded_token": " l"}, "11223": {"logprob": -1.3180665969848633, "rank": 2, "decoded_token": " green"}, "23170": {"logprob": -1.9430665969848633, "rank": 3, "decoded_token": " grass"}, "12097": {"logprob": -2.4430665969848633, "rank": 4, "decoded_token": " park"}, "26428": {"logprob": -3.3180665969848633, "rank": 5, "decoded_token": " garden"}}, {"3506": {"logprob": -6.556489552167477e-06, "rank": 1, "decoded_token": "ush"}, "1374": {"logprob": -12.000006675720215, "rank": 2, "decoded_token": "us"}, "90716": {"logprob": -15.625006675720215, "rank": 3, "decoded_token": "USH"}, "16938": {"logprob": -15.875006675720215, "rank": 4, "decoded_token": "usher"}, "13326": {"logprob": -17.1875057220459, "rank": 5, "decoded_token": "inden"}}, {"11223": {"logprob": -0.36697858572006226, "rank": 1, "decoded_token": " green"}, "1044": {"logprob": -1.366978645324707, "rank": 2, "decoded_token": ","}, "26428": {"logprob": -3.491978645324707, "rank": 3, "decoded_token": " garden"}, "12097": {"logprob": -4.116978645324707, "rank": 4, "decoded_token": " park"}, "23170": {"logprob": -5.866978645324707, "rank": 5, "decoded_token": " grass"}}, {"12097": {"logprob": -0.5570574402809143, "rank": 1, "decoded_token": " park"}, "3727": {"logprob": -1.9320573806762695, "rank": 2, "decoded_token": " field"}, "28035": {"logprob": -2.1820573806762695, "rank": 3, "decoded_token": " landscape"}, "26428": {"logprob": -2.4320573806762695, "rank": 4, "decoded_token": " garden"}, "4457": {"logprob": -2.8070573806762695, "rank": 5, "decoded_token": " area"}}, {"1046": {"logprob": -0.7940837144851685, "rank": 1, "decoded_token": "."}, "1454": {"logprob": -1.2940837144851685, "rank": 2, "decoded_token": " with"}, "8994": {"logprob": -2.794083595275879, "rank": 3, "decoded_token": " towards"}, "54410": {"logprob": -3.544083595275879, "rank": 4, "decoded_token": " lined"}, "2425": {"logprob": -3.544083595275879, "rank": 5, "decoded_token": " under"}}, {"2": {"logprob": -2.145764938177308e-06, "rank": 1, "decoded_token": "</s>"}, "1032": {"logprob": -13.125001907348633, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -16.000001907348633, "rank": 3, "decoded_token": " "}, "1293": {"logprob": -18.750001907348633, "rank": 4, "decoded_token": " "}, "1319": {"logprob": -19.687501907348633, "rank": 5, "decoded_token": " ("}}]]]
\ No newline at end of file
[[[1784, 3937, 6122, 1261, 7244, 10575, 18970, 1408, 1261, 32656, 4691, 1046, 2], "The image shows a black dog sitting on a wooden surface.", [{"1784": {"logprob": -0.11685245484113693, "rank": 1, "decoded_token": "The"}, "4380": {"logprob": -2.3668525218963623, "rank": 2, "decoded_token": "This"}, "1049": {"logprob": -4.741852283477783, "rank": 3, "decoded_token": "1"}, "117991": {"logprob": -5.991852283477783, "rank": 4, "decoded_token": "Certain"}, "1785": {"logprob": -5.991852283477783, "rank": 5, "decoded_token": "In"}}, {"3937": {"logprob": -0.2591013014316559, "rank": 1, "decoded_token": " image"}, "2158": {"logprob": -1.5091012716293335, "rank": 2, "decoded_token": " first"}, "3977": {"logprob": -5.884101390838623, "rank": 3, "decoded_token": " top"}, "7244": {"logprob": -6.259101390838623, "rank": 4, "decoded_token": " black"}, "8061": {"logprob": -6.759101390838623, "rank": 5, "decoded_token": " images"}}, {"6122": {"logprob": -0.9660423994064331, "rank": 1, "decoded_token": " shows"}, "51948": {"logprob": -1.466042399406433, "rank": 2, "decoded_token": " depicts"}, "6971": {"logprob": -1.466042399406433, "rank": 3, "decoded_token": " features"}, "25981": {"logprob": -2.8410425186157227, "rank": 4, "decoded_token": " displays"}, "8688": {"logprob": -2.8410425186157227, "rank": 5, "decoded_token": " contains"}}, {"1261": {"logprob": -0.0030613720882683992, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -6.253061294555664, "rank": 2, "decoded_token": " an"}, "2295": {"logprob": -7.878061294555664, "rank": 3, "decoded_token": " two"}, "2342": {"logprob": -7.878061294555664, "rank": 4, "decoded_token": " only"}, "1278": {"logprob": -8.628061294555664, "rank": 5, "decoded_token": " the"}}, {"7244": {"logprob": -0.17649099230766296, "rank": 1, "decoded_token": " black"}, "6231": {"logprob": -2.3014910221099854, "rank": 2, "decoded_token": " close"}, "4249": {"logprob": -3.4264910221099854, "rank": 3, "decoded_token": " single"}, "4329": {"logprob": -5.113990783691406, "rank": 4, "decoded_token": " large"}, "10575": {"logprob": -5.176490783691406, "rank": 5, "decoded_token": " dog"}}, {"10575": {"logprob": -0.10929587483406067, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.4842958450317383, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -4.109295845031738, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -7.296795845031738, "rank": 4, "decoded_token": " Lab"}, "7990": {"logprob": -7.484295845031738, "rank": 5, "decoded_token": " cat"}}, {"18970": {"logprob": -0.830376148223877, "rank": 1, "decoded_token": " sitting"}, "1454": {"logprob": -1.580376148223877, "rank": 2, "decoded_token": " with"}, "28528": {"logprob": -1.955376148223877, "rank": 3, "decoded_token": " lying"}, "7283": {"logprob": -2.205376148223877, "rank": 4, "decoded_token": " looking"}, "15866": {"logprob": -3.017876148223877, "rank": 5, "decoded_token": " standing"}}, {"1408": {"logprob": -0.08554735779762268, "rank": 1, "decoded_token": " on"}, "1321": {"logprob": -3.71054744720459, "rank": 2, "decoded_token": " and"}, "3675": {"logprob": -3.96054744720459, "rank": 3, "decoded_token": " against"}, "41132": {"logprob": -4.71054744720459, "rank": 4, "decoded_token": " attent"}, "1454": {"logprob": -5.08554744720459, "rank": 5, "decoded_token": " with"}}, {"1261": {"logprob": -0.540847897529602, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -0.915847897529602, "rank": 2, "decoded_token": " wooden"}, "12603": {"logprob": -5.4158477783203125, "rank": 3, "decoded_token": " wood"}, "3977": {"logprob": -5.4158477783203125, "rank": 4, "decoded_token": " top"}, "17253": {"logprob": -6.2908477783203125, "rank": 5, "decoded_token": " weather"}}, {"32656": {"logprob": -0.025753861293196678, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -4.400753974914551, "rank": 2, "decoded_token": " rust"}, "12603": {"logprob": -5.275753974914551, "rank": 3, "decoded_token": " wood"}, "3403": {"logprob": -5.400753974914551, "rank": 4, "decoded_token": " text"}, "17253": {"logprob": -6.963253974914551, "rank": 5, "decoded_token": " weather"}}, {"4691": {"logprob": -0.7265751957893372, "rank": 1, "decoded_token": " surface"}, "11237": {"logprob": -0.8515751957893372, "rank": 2, "decoded_token": " floor"}, "7042": {"logprob": -2.6015751361846924, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -5.2265753746032715, "rank": 4, "decoded_token": " deck"}, "1615": {"logprob": -5.7265753746032715, "rank": 5, "decoded_token": " pl"}}, {"1046": {"logprob": -0.4868825674057007, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -1.9868825674057007, "rank": 2, "decoded_token": ","}, "1321": {"logprob": -2.3618826866149902, "rank": 3, "decoded_token": " and"}, "1454": {"logprob": -2.6118826866149902, "rank": 4, "decoded_token": " with"}, "7283": {"logprob": -2.7368826866149902, "rank": 5, "decoded_token": " looking"}}, {"2": {"logprob": -0.0026643513701856136, "rank": 1, "decoded_token": "</s>"}, "1531": {"logprob": -6.502664566040039, "rank": 2, "decoded_token": " The"}, "1032": {"logprob": -6.877664566040039, "rank": 3, "decoded_token": " "}, "3730": {"logprob": -9.752664566040039, "rank": 4, "decoded_token": " There"}, "1256": {"logprob": -11.002664566040039, "rank": 5, "decoded_token": " "}}]], [[1049, 1046, 1349, 7244, 10575, 1454, 2327, 94766, 32961, 53048, 41132, 3923, 1408, 1261, 32656, 4691, 1626, 1050, 1046, 1349, 15375, 24361, 4521, 94973, 5669, 1278, 48932, 2425, 1261, 16152, 1121, 21283, 1046, 2], "1. A black dog with floppy ears sits attentively on a wooden surface.\n2. A vast mountain range stretches across the horizon under a cloudy sky.", [{"1049": {"logprob": -0.42824622988700867, "rank": 1, "decoded_token": "1"}, "1045": {"logprob": -1.553246259689331, "rank": 2, "decoded_token": "-"}, "1065": {"logprob": -2.428246259689331, "rank": 3, "decoded_token": "A"}, "1784": {"logprob": -4.053246021270752, "rank": 4, "decoded_token": "The"}, "69957": {"logprob": -4.428246021270752, "rank": 5, "decoded_token": "Sure"}}, {"1046": {"logprob": -1.811964830267243e-05, "rank": 1, "decoded_token": "."}, "1058": {"logprob": -11.875018119812012, "rank": 2, "decoded_token": ":"}, "3590": {"logprob": -12.250018119812012, "rank": 3, "decoded_token": ".A"}, "1065": {"logprob": -13.062518119812012, "rank": 4, "decoded_token": "A"}, "1041": {"logprob": -13.750018119812012, "rank": 5, "decoded_token": ")"}}, {"1349": {"logprob": -0.13647246360778809, "rank": 1, "decoded_token": " A"}, "1429": {"logprob": -2.386472463607788, "rank": 2, "decoded_token": " \""}, "1603": {"logprob": -3.886472463607788, "rank": 3, "decoded_token": " **"}, "11967": {"logprob": -5.011472702026367, "rank": 4, "decoded_token": " Image"}, "1531": {"logprob": -5.011472702026367, "rank": 5, "decoded_token": " The"}}, {"7244": {"logprob": -0.18561004102230072, "rank": 1, "decoded_token": " black"}, "38462": {"logprob": -3.185610055923462, "rank": 2, "decoded_token": " curious"}, "68076": {"logprob": -3.623110055923462, "rank": 3, "decoded_token": " cute"}, "4329": {"logprob": -3.935610055923462, "rank": 4, "decoded_token": " large"}, "74168": {"logprob": -4.373109817504883, "rank": 5, "decoded_token": " gloss"}}, {"10575": {"logprob": -0.17297746241092682, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.1729774475097656, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.1729774475097656, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -6.985477447509766, "rank": 4, "decoded_token": " Lab"}, "8636": {"logprob": -7.360477447509766, "rank": 5, "decoded_token": " lab"}}, {"1454": {"logprob": -0.5785807967185974, "rank": 1, "decoded_token": " with"}, "53048": {"logprob": -1.2660808563232422, "rank": 2, "decoded_token": " sits"}, "1395": {"logprob": -3.016080856323242, "rank": 3, "decoded_token": " is"}, "22524": {"logprob": -3.578580856323242, "rank": 4, "decoded_token": " lies"}, "18970": {"logprob": -3.703580856323242, "rank": 5, "decoded_token": " sitting"}}, {"2327": {"logprob": -1.2709298133850098, "rank": 1, "decoded_token": " fl"}, "1261": {"logprob": -1.3959298133850098, "rank": 2, "decoded_token": " a"}, "17300": {"logprob": -1.8959298133850098, "rank": 3, "decoded_token": " soul"}, "100089": {"logprob": -2.6459298133850098, "rank": 4, "decoded_token": " expressive"}, "6444": {"logprob": -3.1459298133850098, "rank": 5, "decoded_token": " soft"}}, {"94766": {"logprob": -0.002432247158139944, "rank": 1, "decoded_token": "oppy"}, "124603": {"logprob": -6.377432346343994, "rank": 2, "decoded_token": "uffy"}, "1484": {"logprob": -7.877432346343994, "rank": 3, "decoded_token": "op"}, "24897": {"logprob": -8.877431869506836, "rank": 4, "decoded_token": "appy"}, "102477": {"logprob": -9.752431869506836, "rank": 5, "decoded_token": "opping"}}, {"32961": {"logprob": -5.113947918289341e-05, "rank": 1, "decoded_token": " ears"}, "16962": {"logprob": -11.312551498413086, "rank": 2, "decoded_token": " ear"}, "5731": {"logprob": -11.750051498413086, "rank": 3, "decoded_token": " eyes"}, "3351": {"logprob": -12.000051498413086, "rank": 4, "decoded_token": " years"}, "42071": {"logprob": -13.000051498413086, "rank": 5, "decoded_token": " cheeks"}}, {"53048": {"logprob": -0.6131591200828552, "rank": 1, "decoded_token": " sits"}, "10637": {"logprob": -1.9881591796875, "rank": 2, "decoded_token": " looks"}, "1321": {"logprob": -2.4256591796875, "rank": 3, "decoded_token": " and"}, "1395": {"logprob": -2.6756591796875, "rank": 4, "decoded_token": " is"}, "18970": {"logprob": -3.0506591796875, "rank": 5, "decoded_token": " sitting"}}, {"41132": {"logprob": -0.36187249422073364, "rank": 1, "decoded_token": " attent"}, "1408": {"logprob": -2.361872434616089, "rank": 2, "decoded_token": " on"}, "106534": {"logprob": -2.424372434616089, "rank": 3, "decoded_token": " calmly"}, "12276": {"logprob": -2.611872434616089, "rank": 4, "decoded_token": " alert"}, "6482": {"logprob": -5.174372673034668, "rank": 5, "decoded_token": " patient"}}, {"3923": {"logprob": -8.451581379631534e-05, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -9.50008487701416, "rank": 2, "decoded_token": "ive"}, "6655": {"logprob": -11.87508487701416, "rank": 3, "decoded_token": "atively"}, "3929": {"logprob": -14.00008487701416, "rank": 4, "decoded_token": "ently"}, "47885": {"logprob": -14.75008487701416, "rank": 5, "decoded_token": "edly"}}, {"1408": {"logprob": -0.058125678449869156, "rank": 1, "decoded_token": " on"}, "3675": {"logprob": -3.1831257343292236, "rank": 2, "decoded_token": " against"}, "1294": {"logprob": -4.9331254959106445, "rank": 3, "decoded_token": " in"}, "7283": {"logprob": -5.8081254959106445, "rank": 4, "decoded_token": " looking"}, "1044": {"logprob": -5.9331254959106445, "rank": 5, "decoded_token": ","}}, {"1261": {"logprob": -0.21029606461524963, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -1.7102960348129272, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -5.710296154022217, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -6.085296154022217, "rank": 4, "decoded_token": " rust"}, "12603": {"logprob": -6.960296154022217, "rank": 5, "decoded_token": " wood"}}, {"32656": {"logprob": -0.08548421412706375, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -2.710484266281128, "rank": 2, "decoded_token": " rust"}, "17253": {"logprob": -4.710484027862549, "rank": 3, "decoded_token": " weather"}, "12603": {"logprob": -5.960484027862549, "rank": 4, "decoded_token": " wood"}, "3403": {"logprob": -5.960484027862549, "rank": 5, "decoded_token": " text"}}, {"4691": {"logprob": -0.7172377109527588, "rank": 1, "decoded_token": " surface"}, "11237": {"logprob": -0.8422377109527588, "rank": 2, "decoded_token": " floor"}, "7042": {"logprob": -2.842237710952759, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -4.21723747253418, "rank": 4, "decoded_token": " deck"}, "92504": {"logprob": -6.21723747253418, "rank": 5, "decoded_token": " backdrop"}}, {"1626": {"logprob": -0.12971943616867065, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -2.3797194957733154, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -4.129719257354736, "rank": 3, "decoded_token": "."}, "1338": {"logprob": -5.129719257354736, "rank": 4, "decoded_token": ".\n\n"}, "7283": {"logprob": -5.504719257354736, "rank": 5, "decoded_token": " looking"}}, {"1050": {"logprob": -0.00015698630886618048, "rank": 1, "decoded_token": "2"}, "1256": {"logprob": -9.125157356262207, "rank": 2, "decoded_token": " "}, "1032": {"logprob": -10.875157356262207, "rank": 3, "decoded_token": " "}, "1293": {"logprob": -11.750157356262207, "rank": 4, "decoded_token": " "}, "1051": {"logprob": -12.125157356262207, "rank": 5, "decoded_token": "3"}}, {"1046": {"logprob": -6.6756979322235566e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.062506675720215, "rank": 2, "decoded_token": ".A"}, "1626": {"logprob": -13.187506675720215, "rank": 3, "decoded_token": ".\n"}, "1338": {"logprob": -14.750006675720215, "rank": 4, "decoded_token": ".\n\n"}, "1058": {"logprob": -14.937506675720215, "rank": 5, "decoded_token": ":"}}, {"1349": {"logprob": -0.5863217115402222, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.4613217115402222, "rank": 2, "decoded_token": " Maj"}, "37159": {"logprob": -2.2113218307495117, "rank": 3, "decoded_token": " Snow"}, "113465": {"logprob": -3.8988218307495117, "rank": 4, "decoded_token": " Rug"}, "1531": {"logprob": -3.9613218307495117, "rank": 5, "decoded_token": " The"}}, {"15375": {"logprob": -0.639299213886261, "rank": 1, "decoded_token": " vast"}, "37849": {"logprob": -2.014299154281616, "rank": 2, "decoded_token": " breat"}, "61082": {"logprob": -2.389299154281616, "rank": 3, "decoded_token": " panor"}, "10726": {"logprob": -3.139299154281616, "rank": 4, "decoded_token": " scen"}, "2169": {"logprob": -3.201799154281616, "rank": 5, "decoded_token": " ser"}}, {"24361": {"logprob": -0.702845573425293, "rank": 1, "decoded_token": " mountain"}, "127945": {"logprob": -1.952845573425293, "rank": 2, "decoded_token": " mountainous"}, "1044": {"logprob": -2.077845573425293, "rank": 3, "decoded_token": ","}, "4521": {"logprob": -2.327845573425293, "rank": 4, "decoded_token": " range"}, "28035": {"logprob": -2.452845573425293, "rank": 5, "decoded_token": " landscape"}}, {"4521": {"logprob": -0.07058162242174149, "rank": 1, "decoded_token": " range"}, "28035": {"logprob": -2.6955816745758057, "rank": 2, "decoded_token": " landscape"}, "37691": {"logprob": -8.320581436157227, "rank": 3, "decoded_token": " valley"}, "12248": {"logprob": -9.445581436157227, "rank": 4, "decoded_token": " peak"}, "13327": {"logprob": -9.695581436157227, "rank": 5, "decoded_token": " scene"}}, {"94973": {"logprob": -1.1164050102233887, "rank": 1, "decoded_token": " stretches"}, "1454": {"logprob": -1.1789050102233887, "rank": 2, "decoded_token": " with"}, "2425": {"logprob": -1.8664050102233887, "rank": 3, "decoded_token": " under"}, "1395": {"logprob": -2.5539050102233887, "rank": 4, "decoded_token": " is"}, "13875": {"logprob": -2.9914050102233887, "rank": 5, "decoded_token": " covered"}}, {"5669": {"logprob": -0.3286789357662201, "rank": 1, "decoded_token": " across"}, "1848": {"logprob": -2.078678846359253, "rank": 2, "decoded_token": " out"}, "2425": {"logprob": -2.328678846359253, "rank": 3, "decoded_token": " under"}, "2203": {"logprob": -3.328678846359253, "rank": 4, "decoded_token": " into"}, "8994": {"logprob": -4.766179084777832, "rank": 5, "decoded_token": " towards"}}, {"1278": {"logprob": -0.039004355669021606, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -3.289004325866699, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -7.414004325866699, "rank": 3, "decoded_token": " an"}, "2425": {"logprob": -9.0390043258667, "rank": 4, "decoded_token": " under"}, "1454": {"logprob": -9.2265043258667, "rank": 5, "decoded_token": " with"}}, {"48932": {"logprob": -0.2659883201122284, "rank": 1, "decoded_token": " horizon"}, "21283": {"logprob": -2.140988349914551, "rank": 2, "decoded_token": " sky"}, "3937": {"logprob": -3.015988349914551, "rank": 3, "decoded_token": " image"}, "28035": {"logprob": -3.515988349914551, "rank": 4, "decoded_token": " landscape"}, "3044": {"logprob": -4.265988349914551, "rank": 5, "decoded_token": " sk"}}, {"2425": {"logprob": -0.5356141328811646, "rank": 1, "decoded_token": " under"}, "1044": {"logprob": -1.5356141328811646, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -1.7856141328811646, "rank": 3, "decoded_token": " with"}, "25136": {"logprob": -3.785614013671875, "rank": 4, "decoded_token": " beneath"}, "1408": {"logprob": -5.785614013671875, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -0.006081883795559406, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -5.506082057952881, "rank": 2, "decoded_token": " an"}, "16152": {"logprob": -7.631082057952881, "rank": 3, "decoded_token": " cloud"}, "6133": {"logprob": -7.881082057952881, "rank": 4, "decoded_token": " clear"}, "2136": {"logprob": -8.006081581115723, "rank": 5, "decoded_token": " over"}}, {"16152": {"logprob": -0.6749536991119385, "rank": 1, "decoded_token": " cloud"}, "6133": {"logprob": -1.4249536991119385, "rank": 2, "decoded_token": " clear"}, "18416": {"logprob": -2.8624536991119385, "rank": 3, "decoded_token": " haz"}, "27254": {"logprob": -2.9874536991119385, "rank": 4, "decoded_token": " partly"}, "4391": {"logprob": -3.2374536991119385, "rank": 5, "decoded_token": " light"}}, {"1121": {"logprob": -0.10860869288444519, "rank": 1, "decoded_token": "y"}, "4527": {"logprob": -2.9836087226867676, "rank": 2, "decoded_token": "less"}, "1286": {"logprob": -3.4836087226867676, "rank": 3, "decoded_token": "ed"}, "77187": {"logprob": -4.608608722686768, "rank": 4, "decoded_token": "-filled"}, "114525": {"logprob": -4.858608722686768, "rank": 5, "decoded_token": "-covered"}}, {"21283": {"logprob": -0.002785732736811042, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -6.252785682678223, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -7.627785682678223, "rank": 3, "decoded_token": ","}, "26549": {"logprob": -8.627785682678223, "rank": 4, "decoded_token": " gray"}, "34052": {"logprob": -9.377785682678223, "rank": 5, "decoded_token": " grey"}}, {"1046": {"logprob": -0.047878943383693695, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -3.1728789806365967, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -5.547878742218018, "rank": 3, "decoded_token": " with"}, "1338": {"logprob": -7.172878742218018, "rank": 4, "decoded_token": ".\n\n"}, "1294": {"logprob": -9.172879219055176, "rank": 5, "decoded_token": " in"}}, {"2": {"logprob": -1.3351351299206726e-05, "rank": 1, "decoded_token": "</s>"}, "1032": {"logprob": -11.25001335144043, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -16.00001335144043, "rank": 3, "decoded_token": " "}, "1319": {"logprob": -17.25001335144043, "rank": 4, "decoded_token": " ("}, "1766": {"logprob": -18.50001335144043, "rank": 5, "decoded_token": " ["}}]], [[1049, 1046, 1349, 7244, 10575, 53048, 41132, 3923, 1408, 1261, 32656, 11237, 1626, 1050, 1046, 1349, 15375, 24361, 4521, 94973, 5669, 1278, 48932, 2425, 1261, 16152, 1121, 21283, 1626, 1051, 1046, 8342, 71284, 7377, 1394, 22140, 1294, 1278, 27208, 1513, 97558, 1626, 1052, 1046, 1349, 53301, 59396, 3549, 13335, 2645, 1261, 1295, 3506, 11223, 12097, 1046, 2], "1. A black dog sits attentively on a wooden floor.\n2. A vast mountain range stretches across the horizon under a cloudy sky.\n3. Surfers wait for waves in the ocean at sunset.\n4. A winding gravel path leads through a lush green park.", [{"1049": {"logprob": -0.05001257359981537, "rank": 1, "decoded_token": "1"}, "1045": {"logprob": -3.1750125885009766, "rank": 2, "decoded_token": "-"}, "69957": {"logprob": -5.925012588500977, "rank": 3, "decoded_token": "Sure"}, "11745": {"logprob": -6.425012588500977, "rank": 4, "decoded_token": "Here"}, "1065": {"logprob": -6.425012588500977, "rank": 5, "decoded_token": "A"}}, {"1046": {"logprob": -8.702239938429557e-06, "rank": 1, "decoded_token": "."}, "1058": {"logprob": -12.000008583068848, "rank": 2, "decoded_token": ":"}, "3590": {"logprob": -13.375008583068848, "rank": 3, "decoded_token": ".A"}, "1041": {"logprob": -14.750008583068848, "rank": 4, "decoded_token": ")"}, "1065": {"logprob": -15.687508583068848, "rank": 5, "decoded_token": "A"}}, {"1349": {"logprob": -0.14196155965328217, "rank": 1, "decoded_token": " A"}, "1429": {"logprob": -2.2669615745544434, "rank": 2, "decoded_token": " \""}, "1531": {"logprob": -4.516961574554443, "rank": 3, "decoded_token": " The"}, "11967": {"logprob": -4.516961574554443, "rank": 4, "decoded_token": " Image"}, "1603": {"logprob": -5.391961574554443, "rank": 5, "decoded_token": " **"}}, {"7244": {"logprob": -0.14889711141586304, "rank": 1, "decoded_token": " black"}, "68076": {"logprob": -3.398897171020508, "rank": 2, "decoded_token": " cute"}, "6231": {"logprob": -3.961397171020508, "rank": 3, "decoded_token": " close"}, "38462": {"logprob": -4.273897171020508, "rank": 4, "decoded_token": " curious"}, "4329": {"logprob": -4.398897171020508, "rank": 5, "decoded_token": " large"}}, {"10575": {"logprob": -0.12091328203678131, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.37091326713562, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.99591326713562, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -7.683413505554199, "rank": 4, "decoded_token": " Lab"}, "8636": {"logprob": -7.808413505554199, "rank": 5, "decoded_token": " lab"}}, {"53048": {"logprob": -0.8691943287849426, "rank": 1, "decoded_token": " sits"}, "1454": {"logprob": -1.1191942691802979, "rank": 2, "decoded_token": " with"}, "1395": {"logprob": -2.431694269180298, "rank": 3, "decoded_token": " is"}, "18970": {"logprob": -2.744194269180298, "rank": 4, "decoded_token": " sitting"}, "22524": {"logprob": -3.681694269180298, "rank": 5, "decoded_token": " lies"}}, {"41132": {"logprob": -0.5939557552337646, "rank": 1, "decoded_token": " attent"}, "106534": {"logprob": -1.2814557552337646, "rank": 2, "decoded_token": " calmly"}, "12276": {"logprob": -2.8439557552337646, "rank": 3, "decoded_token": " alert"}, "1408": {"logprob": -2.8439557552337646, "rank": 4, "decoded_token": " on"}, "6482": {"logprob": -4.968955993652344, "rank": 5, "decoded_token": " patient"}}, {"3923": {"logprob": -0.00010084597306558862, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -9.500101089477539, "rank": 2, "decoded_token": "ive"}, "6655": {"logprob": -10.875101089477539, "rank": 3, "decoded_token": "atively"}, "3929": {"logprob": -13.000101089477539, "rank": 4, "decoded_token": "ently"}, "47885": {"logprob": -13.750101089477539, "rank": 5, "decoded_token": "edly"}}, {"1408": {"logprob": -0.056158196181058884, "rank": 1, "decoded_token": " on"}, "3675": {"logprob": -3.6811583042144775, "rank": 2, "decoded_token": " against"}, "1454": {"logprob": -4.306158065795898, "rank": 3, "decoded_token": " with"}, "1294": {"logprob": -5.181158065795898, "rank": 4, "decoded_token": " in"}, "7283": {"logprob": -5.431158065795898, "rank": 5, "decoded_token": " looking"}}, {"1261": {"logprob": -0.33056098222732544, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -1.3305609226226807, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -4.70556116104126, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -5.83056116104126, "rank": 4, "decoded_token": " rust"}, "12603": {"logprob": -6.58056116104126, "rank": 5, "decoded_token": " wood"}}, {"32656": {"logprob": -0.07081110030412674, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -2.9458110332489014, "rank": 2, "decoded_token": " rust"}, "17253": {"logprob": -4.6958112716674805, "rank": 3, "decoded_token": " weather"}, "12603": {"logprob": -5.8208112716674805, "rank": 4, "decoded_token": " wood"}, "3403": {"logprob": -6.0708112716674805, "rank": 5, "decoded_token": " text"}}, {"11237": {"logprob": -0.6428436636924744, "rank": 1, "decoded_token": " floor"}, "4691": {"logprob": -1.0178437232971191, "rank": 2, "decoded_token": " surface"}, "7042": {"logprob": -2.642843723297119, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -3.517843723297119, "rank": 4, "decoded_token": " deck"}, "92504": {"logprob": -6.017843723297119, "rank": 5, "decoded_token": " backdrop"}}, {"1626": {"logprob": -0.7337945103645325, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -0.8587945103645325, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -3.3587944507598877, "rank": 3, "decoded_token": " with"}, "7283": {"logprob": -3.6087944507598877, "rank": 4, "decoded_token": " looking"}, "1321": {"logprob": -4.108794689178467, "rank": 5, "decoded_token": " and"}}, {"1050": {"logprob": -1.0132738680113107e-05, "rank": 1, "decoded_token": "2"}, "1051": {"logprob": -11.75001049041748, "rank": 2, "decoded_token": "3"}, "1256": {"logprob": -14.00001049041748, "rank": 3, "decoded_token": " "}, "1049": {"logprob": -14.62501049041748, "rank": 4, "decoded_token": "1"}, "1032": {"logprob": -14.62501049041748, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -2.861018856492592e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.43750286102295, "rank": 2, "decoded_token": ".A"}, "4700": {"logprob": -15.37500286102295, "rank": 3, "decoded_token": ".M"}, "1626": {"logprob": -15.37500286102295, "rank": 4, "decoded_token": ".\n"}, "3051": {"logprob": -15.87500286102295, "rank": 5, "decoded_token": ".S"}}, {"1349": {"logprob": -0.6794427633285522, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.9294427633285522, "rank": 2, "decoded_token": " Maj"}, "37159": {"logprob": -2.116942882537842, "rank": 3, "decoded_token": " Snow"}, "27260": {"logprob": -2.616942882537842, "rank": 4, "decoded_token": " Mountain"}, "113465": {"logprob": -2.866942882537842, "rank": 5, "decoded_token": " Rug"}}, {"15375": {"logprob": -0.9194075465202332, "rank": 1, "decoded_token": " vast"}, "10726": {"logprob": -2.294407606124878, "rank": 2, "decoded_token": " scen"}, "4521": {"logprob": -2.356907606124878, "rank": 3, "decoded_token": " range"}, "122203": {"logprob": -2.419407606124878, "rank": 4, "decoded_token": " rugged"}, "61082": {"logprob": -2.856907606124878, "rank": 5, "decoded_token": " panor"}}, {"24361": {"logprob": -0.5804797410964966, "rank": 1, "decoded_token": " mountain"}, "127945": {"logprob": -1.8304797410964966, "rank": 2, "decoded_token": " mountainous"}, "28035": {"logprob": -2.455479621887207, "rank": 3, "decoded_token": " landscape"}, "4521": {"logprob": -2.455479621887207, "rank": 4, "decoded_token": " range"}, "1044": {"logprob": -2.705479621887207, "rank": 5, "decoded_token": ","}}, {"4521": {"logprob": -0.0493546724319458, "rank": 1, "decoded_token": " range"}, "28035": {"logprob": -3.0493545532226562, "rank": 2, "decoded_token": " landscape"}, "37691": {"logprob": -8.424354553222656, "rank": 3, "decoded_token": " valley"}, "13327": {"logprob": -9.049354553222656, "rank": 4, "decoded_token": " scene"}, "3719": {"logprob": -9.799354553222656, "rank": 5, "decoded_token": " view"}}, {"94973": {"logprob": -0.6676871180534363, "rank": 1, "decoded_token": " stretches"}, "2425": {"logprob": -1.792687177658081, "rank": 2, "decoded_token": " under"}, "1395": {"logprob": -2.292687177658081, "rank": 3, "decoded_token": " is"}, "1454": {"logprob": -2.730187177658081, "rank": 4, "decoded_token": " with"}, "7038": {"logprob": -3.292687177658081, "rank": 5, "decoded_token": " extends"}}, {"5669": {"logprob": -0.4542117118835449, "rank": 1, "decoded_token": " across"}, "2425": {"logprob": -1.454211711883545, "rank": 2, "decoded_token": " under"}, "1848": {"logprob": -2.454211711883545, "rank": 3, "decoded_token": " out"}, "2203": {"logprob": -4.204211711883545, "rank": 4, "decoded_token": " into"}, "25136": {"logprob": -4.641711711883545, "rank": 5, "decoded_token": " beneath"}}, {"1278": {"logprob": -0.23009441792964935, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -1.6050944328308105, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -5.6050944328308105, "rank": 3, "decoded_token": " an"}, "2425": {"logprob": -7.2300944328308105, "rank": 4, "decoded_token": " under"}, "1454": {"logprob": -10.167593955993652, "rank": 5, "decoded_token": " with"}}, {"48932": {"logprob": -0.3072167932987213, "rank": 1, "decoded_token": " horizon"}, "21283": {"logprob": -1.932216763496399, "rank": 2, "decoded_token": " sky"}, "3937": {"logprob": -3.1822168827056885, "rank": 3, "decoded_token": " image"}, "28035": {"logprob": -3.6822168827056885, "rank": 4, "decoded_token": " landscape"}, "3044": {"logprob": -3.6822168827056885, "rank": 5, "decoded_token": " sk"}}, {"2425": {"logprob": -0.2914469838142395, "rank": 1, "decoded_token": " under"}, "1044": {"logprob": -2.4164469242095947, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -2.5414469242095947, "rank": 3, "decoded_token": " with"}, "1626": {"logprob": -3.7914469242095947, "rank": 4, "decoded_token": ".\n"}, "1408": {"logprob": -3.7914469242095947, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -0.0460360012948513, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -3.9210360050201416, "rank": 2, "decoded_token": " an"}, "16152": {"logprob": -4.1085357666015625, "rank": 3, "decoded_token": " cloud"}, "2136": {"logprob": -6.1710357666015625, "rank": 4, "decoded_token": " over"}, "6133": {"logprob": -6.4210357666015625, "rank": 5, "decoded_token": " clear"}}, {"16152": {"logprob": -0.20367540419101715, "rank": 1, "decoded_token": " cloud"}, "6133": {"logprob": -2.8286755084991455, "rank": 2, "decoded_token": " clear"}, "27254": {"logprob": -3.5161755084991455, "rank": 3, "decoded_token": " partly"}, "18416": {"logprob": -3.8286755084991455, "rank": 4, "decoded_token": " haz"}, "4391": {"logprob": -4.328675270080566, "rank": 5, "decoded_token": " light"}}, {"1121": {"logprob": -0.05241352692246437, "rank": 1, "decoded_token": "y"}, "1286": {"logprob": -3.8024134635925293, "rank": 2, "decoded_token": "ed"}, "77187": {"logprob": -4.552413463592529, "rank": 3, "decoded_token": "-filled"}, "4527": {"logprob": -4.802413463592529, "rank": 4, "decoded_token": "less"}, "114525": {"logprob": -4.927413463592529, "rank": 5, "decoded_token": "-covered"}}, {"21283": {"logprob": -0.0003716255014296621, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -8.750371932983398, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -9.375371932983398, "rank": 3, "decoded_token": ","}, "26549": {"logprob": -10.375371932983398, "rank": 4, "decoded_token": " gray"}, "34052": {"logprob": -11.250371932983398, "rank": 5, "decoded_token": " grey"}}, {"1626": {"logprob": -0.00012730741582345217, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -9.500126838684082, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -10.500126838684082, "rank": 3, "decoded_token": "."}, "1454": {"logprob": -10.875126838684082, "rank": 4, "decoded_token": " with"}, "1294": {"logprob": -13.250126838684082, "rank": 5, "decoded_token": " in"}}, {"1051": {"logprob": -3.2186455882765586e-06, "rank": 1, "decoded_token": "3"}, "1052": {"logprob": -12.75000286102295, "rank": 2, "decoded_token": "4"}, "1050": {"logprob": -15.00000286102295, "rank": 3, "decoded_token": "2"}, "1049": {"logprob": -16.937503814697266, "rank": 4, "decoded_token": "1"}, "1032": {"logprob": -17.875003814697266, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -1.6689286894688848e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -14.687501907348633, "rank": 2, "decoded_token": ".A"}, "5226": {"logprob": -15.687501907348633, "rank": 3, "decoded_token": ".D"}, "6847": {"logprob": -15.812501907348633, "rank": 4, "decoded_token": ".T"}, "48426": {"logprob": -16.812501907348633, "rank": 5, "decoded_token": ".The"}}, {"8342": {"logprob": -0.5730464458465576, "rank": 1, "decoded_token": " Sur"}, "1349": {"logprob": -1.6980464458465576, "rank": 2, "decoded_token": " A"}, "22468": {"logprob": -2.5730464458465576, "rank": 3, "decoded_token": " Several"}, "1488": {"logprob": -2.6980464458465576, "rank": 4, "decoded_token": " W"}, "15035": {"logprob": -3.1980464458465576, "rank": 5, "decoded_token": " People"}}, {"71284": {"logprob": -0.0033258858602494, "rank": 1, "decoded_token": "fers"}, "1102": {"logprob": -5.878325939178467, "rank": 2, "decoded_token": "f"}, "1726": {"logprob": -7.628325939178467, "rank": 3, "decoded_token": "fer"}, "61888": {"logprob": -12.253325462341309, "rank": 4, "decoded_token": "fline"}, "2119": {"logprob": -13.003325462341309, "rank": 5, "decoded_token": "fter"}}, {"7377": {"logprob": -1.4996429681777954, "rank": 1, "decoded_token": " wait"}, "1584": {"logprob": -1.7496429681777954, "rank": 2, "decoded_token": " are"}, "88014": {"logprob": -1.9371429681777954, "rank": 3, "decoded_token": " paddle"}, "1294": {"logprob": -1.9371429681777954, "rank": 4, "decoded_token": " in"}, "24434": {"logprob": -2.187142848968506, "rank": 5, "decoded_token": " ride"}}, {"1394": {"logprob": -0.6126739382743835, "rank": 1, "decoded_token": " for"}, "1294": {"logprob": -0.9876739382743835, "rank": 2, "decoded_token": " in"}, "1408": {"logprob": -2.7376739978790283, "rank": 3, "decoded_token": " on"}, "6482": {"logprob": -4.425173759460449, "rank": 4, "decoded_token": " patient"}, "1321": {"logprob": -5.612673759460449, "rank": 5, "decoded_token": " and"}}, {"22140": {"logprob": -0.00729279313236475, "rank": 1, "decoded_token": " waves"}, "1278": {"logprob": -5.632292747497559, "rank": 2, "decoded_token": " the"}, "1261": {"logprob": -5.757292747497559, "rank": 3, "decoded_token": " a"}, "39460": {"logprob": -8.257292747497559, "rank": 4, "decoded_token": " incoming"}, "1321": {"logprob": -9.757292747497559, "rank": 5, "decoded_token": " and"}}, {"1294": {"logprob": -0.3071398138999939, "rank": 1, "decoded_token": " in"}, "1408": {"logprob": -2.1821398735046387, "rank": 2, "decoded_token": " on"}, "1513": {"logprob": -2.4321398735046387, "rank": 3, "decoded_token": " at"}, "3016": {"logprob": -3.6821398735046387, "rank": 4, "decoded_token": " while"}, "1435": {"logprob": -3.8071398735046387, "rank": 5, "decoded_token": " as"}}, {"1278": {"logprob": -0.004646694287657738, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -6.1921467781066895, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -6.9421467781066895, "rank": 3, "decoded_token": " an"}, "40466": {"logprob": -7.2546467781066895, "rank": 4, "decoded_token": " shallow"}, "26517": {"logprob": -7.8796467781066895, "rank": 5, "decoded_token": " calm"}}, {"27208": {"logprob": -0.0658877044916153, "rank": 1, "decoded_token": " ocean"}, "7786": {"logprob": -3.440887689590454, "rank": 2, "decoded_token": " distance"}, "5124": {"logprob": -5.253387928009033, "rank": 3, "decoded_token": " early"}, "26517": {"logprob": -5.315887928009033, "rank": 4, "decoded_token": " calm"}, "11196": {"logprob": -5.378387928009033, "rank": 5, "decoded_token": " sea"}}, {"1513": {"logprob": -1.1504861116409302, "rank": 1, "decoded_token": " at"}, "1435": {"logprob": -1.2754861116409302, "rank": 2, "decoded_token": " as"}, "3184": {"logprob": -1.4004861116409302, "rank": 3, "decoded_token": " during"}, "3016": {"logprob": -2.9004859924316406, "rank": 4, "decoded_token": " while"}, "6117": {"logprob": -3.1504859924316406, "rank": 5, "decoded_token": " near"}}, {"97558": {"logprob": -0.12151996046304703, "rank": 1, "decoded_token": " sunset"}, "11729": {"logprob": -2.8715200424194336, "rank": 2, "decoded_token": " sun"}, "1266": {"logprob": -3.4965200424194336, "rank": 3, "decoded_token": " d"}, "54507": {"logprob": -3.9965200424194336, "rank": 4, "decoded_token": " dawn"}, "1261": {"logprob": -5.121520042419434, "rank": 5, "decoded_token": " a"}}, {"1626": {"logprob": -0.3073118329048157, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -2.182311773300171, "rank": 2, "decoded_token": ","}, "3016": {"logprob": -2.557311773300171, "rank": 3, "decoded_token": " while"}, "1454": {"logprob": -3.432311773300171, "rank": 4, "decoded_token": " with"}, "6117": {"logprob": -4.05731201171875, "rank": 5, "decoded_token": " near"}}, {"1052": {"logprob": -3.3378546504536644e-06, "rank": 1, "decoded_token": "4"}, "1051": {"logprob": -13.25000286102295, "rank": 2, "decoded_token": "3"}, "1049": {"logprob": -13.93750286102295, "rank": 3, "decoded_token": "1"}, "1053": {"logprob": -14.43750286102295, "rank": 4, "decoded_token": "5"}, "1032": {"logprob": -16.687503814697266, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -1.6689286894688848e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.500001907348633, "rank": 2, "decoded_token": ".A"}, "6847": {"logprob": -16.437501907348633, "rank": 3, "decoded_token": ".T"}, "1044": {"logprob": -17.312501907348633, "rank": 4, "decoded_token": ","}, "1349": {"logprob": -17.375001907348633, "rank": 5, "decoded_token": " A"}}, {"1349": {"logprob": -0.004292916506528854, "rank": 1, "decoded_token": " A"}, "2048": {"logprob": -5.629292964935303, "rank": 2, "decoded_token": " An"}, "10638": {"logprob": -7.879292964935303, "rank": 3, "decoded_token": " Two"}, "111463": {"logprob": -10.004292488098145, "rank": 4, "decoded_token": " Trees"}, "1531": {"logprob": -10.879292488098145, "rank": 5, "decoded_token": " The"}}, {"53301": {"logprob": -1.5473321676254272, "rank": 1, "decoded_token": " winding"}, "15192": {"logprob": -1.7348321676254272, "rank": 2, "decoded_token": " narrow"}, "47945": {"logprob": -2.109832286834717, "rank": 3, "decoded_token": " dirt"}, "2169": {"logprob": -2.609832286834717, "rank": 4, "decoded_token": " ser"}, "59396": {"logprob": -2.672332286834717, "rank": 5, "decoded_token": " gravel"}}, {"59396": {"logprob": -0.8954829573631287, "rank": 1, "decoded_token": " gravel"}, "3549": {"logprob": -1.1454830169677734, "rank": 2, "decoded_token": " path"}, "47945": {"logprob": -1.6454830169677734, "rank": 3, "decoded_token": " dirt"}, "14801": {"logprob": -3.2704830169677734, "rank": 4, "decoded_token": " pathway"}, "15551": {"logprob": -4.270483016967773, "rank": 5, "decoded_token": " stone"}}, {"3549": {"logprob": -0.02117946185171604, "rank": 1, "decoded_token": " path"}, "14801": {"logprob": -3.896179437637329, "rank": 2, "decoded_token": " pathway"}, "33659": {"logprob": -8.14617919921875, "rank": 3, "decoded_token": " trail"}, "9480": {"logprob": -9.64617919921875, "rank": 4, "decoded_token": " road"}, "7368": {"logprob": -9.64617919921875, "rank": 5, "decoded_token": "path"}}, {"13335": {"logprob": -0.18962937593460083, "rank": 1, "decoded_token": " leads"}, "39985": {"logprob": -2.752129316329956, "rank": 2, "decoded_token": " cuts"}, "1639": {"logprob": -3.877129316329956, "rank": 3, "decoded_token": " me"}, "11500": {"logprob": -3.939629316329956, "rank": 4, "decoded_token": " runs"}, "2645": {"logprob": -4.189629554748535, "rank": 5, "decoded_token": " through"}}, {"2645": {"logprob": -0.05349981039762497, "rank": 1, "decoded_token": " through"}, "8994": {"logprob": -4.053499698638916, "rank": 2, "decoded_token": " towards"}, "2396": {"logprob": -4.303499698638916, "rank": 3, "decoded_token": " between"}, "2203": {"logprob": -4.678499698638916, "rank": 4, "decoded_token": " into"}, "1317": {"logprob": -5.678499698638916, "rank": 5, "decoded_token": " to"}}, {"1261": {"logprob": -0.017386287450790405, "rank": 1, "decoded_token": " a"}, "11223": {"logprob": -4.892386436462402, "rank": 2, "decoded_token": " green"}, "1295": {"logprob": -5.017386436462402, "rank": 3, "decoded_token": " l"}, "23170": {"logprob": -6.642386436462402, "rank": 4, "decoded_token": " grass"}, "1420": {"logprob": -7.267386436462402, "rank": 5, "decoded_token": " an"}}, {"1295": {"logprob": -0.9453322887420654, "rank": 1, "decoded_token": " l"}, "11223": {"logprob": -1.3203322887420654, "rank": 2, "decoded_token": " green"}, "23170": {"logprob": -1.9453322887420654, "rank": 3, "decoded_token": " grass"}, "12097": {"logprob": -2.4453322887420654, "rank": 4, "decoded_token": " park"}, "26428": {"logprob": -3.3203322887420654, "rank": 5, "decoded_token": " garden"}}, {"3506": {"logprob": -6.556489552167477e-06, "rank": 1, "decoded_token": "ush"}, "1374": {"logprob": -12.000006675720215, "rank": 2, "decoded_token": "us"}, "90716": {"logprob": -15.625006675720215, "rank": 3, "decoded_token": "USH"}, "16938": {"logprob": -15.875006675720215, "rank": 4, "decoded_token": "usher"}, "13326": {"logprob": -17.1875057220459, "rank": 5, "decoded_token": "inden"}}, {"11223": {"logprob": -0.3668670654296875, "rank": 1, "decoded_token": " green"}, "1044": {"logprob": -1.3668670654296875, "rank": 2, "decoded_token": ","}, "26428": {"logprob": -3.4918670654296875, "rank": 3, "decoded_token": " garden"}, "12097": {"logprob": -4.1168670654296875, "rank": 4, "decoded_token": " park"}, "23170": {"logprob": -5.8668670654296875, "rank": 5, "decoded_token": " grass"}}, {"12097": {"logprob": -0.5530153512954712, "rank": 1, "decoded_token": " park"}, "3727": {"logprob": -2.0530152320861816, "rank": 2, "decoded_token": " field"}, "28035": {"logprob": -2.1780152320861816, "rank": 3, "decoded_token": " landscape"}, "26428": {"logprob": -2.3030152320861816, "rank": 4, "decoded_token": " garden"}, "4457": {"logprob": -2.8030152320861816, "rank": 5, "decoded_token": " area"}}, {"1046": {"logprob": -0.7924000024795532, "rank": 1, "decoded_token": "."}, "1454": {"logprob": -1.2924000024795532, "rank": 2, "decoded_token": " with"}, "8994": {"logprob": -2.7923998832702637, "rank": 3, "decoded_token": " towards"}, "54410": {"logprob": -3.5423998832702637, "rank": 4, "decoded_token": " lined"}, "2425": {"logprob": -3.5423998832702637, "rank": 5, "decoded_token": " under"}}, {"2": {"logprob": -1.9073468138230965e-06, "rank": 1, "decoded_token": "</s>"}, "1032": {"logprob": -13.250001907348633, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -16.250001907348633, "rank": 3, "decoded_token": " "}, "1293": {"logprob": -19.000001907348633, "rank": 4, "decoded_token": " "}, "1319": {"logprob": -20.000001907348633, "rank": 5, "decoded_token": " ("}}]]]
\ No newline at end of file
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
"""
import pytest
from vllm.sampling_params import SamplingParams
pytestmark = pytest.mark.vlm
MODELS = ["mistralai/Pixtral-12B-2409"]
@pytest.mark.skip(
reason=
"Model is too big, test passed on A100 locally but will OOM on CI machine."
)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
def test_models(
vllm_runner,
example_prompts,
model: str,
dtype: str,
max_tokens: int,
num_logprobs: int,
) -> None:
image_urls = [
"https://picsum.photos/id/237/200/300",
"https://picsum.photos/seed/picsum/200/300"
]
expected = [
"The image depicts a black dog lying on a wooden surface, looking directly at the camera with a calm expression.", # noqa
"The image depicts a serene landscape with a snow-covered mountain under a pastel-colored sky during sunset." # noqa
]
prompt = "Describe the image in one short sentence."
sampling_params = SamplingParams(max_tokens=512, temperature=0.0)
with vllm_runner(model, dtype=dtype,
tokenizer_mode="mistral") as vllm_model:
for i, image_url in enumerate(image_urls):
messages = [
{
"role":
"user",
"content": [{
"type": "text",
"text": prompt
}, {
"type": "image_url",
"image_url": {
"url": image_url
}
}]
},
]
outputs = vllm_model.model.chat(messages,
sampling_params=sampling_params)
assert outputs[0].outputs[0].text == expected[i]
import pathlib
from typing import List, Optional, Type
import pytest
from vllm.multimodal.utils import rescale_image_size
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
text_only_models = [
"Qwen/Qwen-7B-Chat" # Has no visual component
]
multimodal_models = ["Qwen/Qwen-VL"]
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
"Picture 1: <img></img>\nWhat's the content of the image?: ",
"cherry_blossom":
"Picture 1: <img></img>\nWhat is the season?: ",
})
### Tests for multimodal Qwen models
def run_test(
tmp_path: pathlib.PosixPath,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
image_assets: _ImageAssets,
model: str,
*,
size_factors: List[float],
dtype: str,
max_tokens: int,
num_logprobs: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
Note, the text input is also adjusted to abide by vllm contract.
The text output is sanitized to be able to compare with hf.
"""
images = [asset.pil_image for asset in image_assets]
# Export the images to a tempdir and substitute it into the hf prompt;
# the contents between <img>/</img> will be ignored by VLLM, but the
# transformers implementation for the visual transformer parses this to
# reload it in the forward call; the contents are treated as a URL or a
# local path.
for idx, asset in enumerate(image_assets):
image_tmp_path = tmp_path / f"{asset.name}.jpg"
asset.pil_image.save(image_tmp_path)
HF_IMAGE_PROMPTS[idx] = HF_IMAGE_PROMPTS[idx].replace(
"<img></img>", f"<img>{image_tmp_path}</img>")
inputs_per_image = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
# NOTE: take care of the order. run vLLM first, and then run HF.
# vLLM needs a fresh new process without cuda initialization.
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method).
# max_model_len should be greater than image_feature_size
# Qwen encodes images into a fixed content size of 256
with vllm_runner(model,
max_model_len=300,
max_num_seqs=1,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True) as vllm_model:
vllm_outputs_per_image = [
vllm_model.generate_greedy_logprobs(prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images)
for prompts, images in inputs_per_image
]
with hf_runner(model, dtype=dtype) as hf_model:
hf_outputs_per_image = [
hf_model.generate_greedy_logprobs_limit(prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images)
for prompts, images in inputs_per_image
]
for hf_outputs, vllm_outputs in zip(hf_outputs_per_image,
vllm_outputs_per_image):
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)
@pytest.mark.parametrize("model", multimodal_models)
@pytest.mark.parametrize(
"size_factors",
[
# No image
[],
# Single-scale
[1.0],
# Single-scale, batched
[1.0, 1.0, 1.0],
# Multi-scale
[0.25, 0.5, 1.0],
],
)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [8])
@pytest.mark.parametrize("num_logprobs", [5])
def test_multimodal_models(tmp_path, hf_runner, vllm_runner, image_assets,
model, size_factors, dtype, max_tokens,
num_logprobs) -> None:
run_test(
tmp_path,
hf_runner,
vllm_runner,
image_assets,
model,
size_factors=size_factors,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
tensor_parallel_size=1,
)
# Ensure that a text-only Qwen model can still be loaded and
# used for inference in VLLM without throwing.
@pytest.mark.parametrize("model", text_only_models)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
def test_text_only_qwen_model_can_be_loaded_and_run(
vllm_runner: Type[VllmRunner],
example_prompts,
model: str,
*,
dtype: str,
max_tokens: int,
num_logprobs: int,
):
with vllm_runner(model, dtype=dtype) as vllm_model:
vllm_model.generate_greedy_logprobs(
example_prompts,
max_tokens,
num_logprobs=num_logprobs,
)
# Test the AsyncLLMEngine with multi-step-decoding
from typing import List, Optional
import pytest
from tests.kernels.utils import override_backend_env_variable
from ..models.utils import check_logprobs_close
from ..utils import (completions_with_server_args, get_client_text_generations,
get_client_text_logprob_generations)
......@@ -33,8 +34,9 @@ DEFAULT_SERVER_ARGS: List[str] = [
@pytest.mark.parametrize("eager_mode", [False, True])
@pytest.mark.parametrize("num_scheduler_steps", NUM_SCHEDULER_STEPS)
@pytest.mark.parametrize("num_prompts", NUM_PROMPTS)
@pytest.mark.parametrize("num_logprobs", [None, 5])
@pytest.mark.parametrize("is_async", [False, True])
@pytest.mark.parametrize("num_logprobs", [5])
@pytest.mark.parametrize("is_async", [True])
@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"])
@pytest.mark.asyncio
async def test_multi_step(
example_prompts,
......@@ -46,6 +48,8 @@ async def test_multi_step(
num_prompts: int,
is_async: bool,
num_logprobs: Optional[int],
attention_backend: str,
monkeypatch,
) -> None:
"""Test vLLM engine with multi-step scheduling in an OpenAI-protocol
client/server environment.
......@@ -71,6 +75,8 @@ async def test_multi_step(
completions endpoint; `None` -> no logprobs
"""
override_backend_env_variable(monkeypatch, attention_backend)
prompts = example_prompts
if len(prompts) < num_prompts:
prompts = prompts * ((num_prompts // len(prompts)) + 1)
......
......@@ -10,6 +10,8 @@ import torch
from tests.quantization.utils import is_quant_method_supported
from ..utils import fork_new_process_for_each_test
models_4bit_to_test = [
('huggyllama/llama-7b', 'quantize model inflight'),
]
......@@ -29,6 +31,7 @@ models_pre_quant_8bit_to_test = [
@pytest.mark.skipif(not is_quant_method_supported("bitsandbytes"),
reason='bitsandbytes is not supported on this GPU type.')
@pytest.mark.parametrize("model_name, description", models_4bit_to_test)
@fork_new_process_for_each_test
def test_load_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
model_name, description) -> None:
......@@ -41,6 +44,7 @@ def test_load_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
reason='bitsandbytes is not supported on this GPU type.')
@pytest.mark.parametrize("model_name, description",
models_pre_qaunt_4bit_to_test)
@fork_new_process_for_each_test
def test_load_pre_quant_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
model_name, description) -> None:
......@@ -52,6 +56,7 @@ def test_load_pre_quant_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
reason='bitsandbytes is not supported on this GPU type.')
@pytest.mark.parametrize("model_name, description",
models_pre_quant_8bit_to_test)
@fork_new_process_for_each_test
def test_load_8bit_bnb_model(hf_runner, vllm_runner, example_prompts,
model_name, description) -> None:
......@@ -77,18 +82,8 @@ def validate_generated_texts(hf_runner,
model_name,
hf_model_kwargs=None):
if hf_model_kwargs is None:
hf_model_kwargs = {}
# Run with HF runner
with hf_runner(model_name, model_kwargs=hf_model_kwargs) as llm:
hf_outputs = llm.generate_greedy(prompts, 8)
hf_logs = log_generated_texts(prompts, hf_outputs, "HfRunner")
# Clean up the GPU memory for the next test
torch.cuda.synchronize()
gc.collect()
torch.cuda.empty_cache()
# NOTE: run vLLM first, as it requires a clean process
# when using distributed inference
#Run with vLLM runner
with vllm_runner(model_name,
......@@ -104,6 +99,19 @@ def validate_generated_texts(hf_runner,
gc.collect()
torch.cuda.empty_cache()
if hf_model_kwargs is None:
hf_model_kwargs = {}
# Run with HF runner
with hf_runner(model_name, model_kwargs=hf_model_kwargs) as llm:
hf_outputs = llm.generate_greedy(prompts, 8)
hf_logs = log_generated_texts(prompts, hf_outputs, "HfRunner")
# Clean up the GPU memory for the next test
torch.cuda.synchronize()
gc.collect()
torch.cuda.empty_cache()
# Compare the generated strings
for hf_log, vllm_log in zip(hf_logs, vllm_logs):
hf_str = hf_log["generated_text"]
......
import torch
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.platforms import current_platform
def is_quant_method_supported(quant_method: str) -> bool:
# Currently, all quantization methods require Nvidia or AMD GPUs
if not torch.cuda.is_available():
if not (current_platform.is_cuda() or current_platform.is_rocm()):
return False
capability = current_platform.get_device_capability()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment