Commit ca796e19 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.8.1' into v0.8.1-ori

parents e983c804 61c7a1b8
......@@ -34,7 +34,7 @@ def phi3v_model_config():
tokenizer=PHI3V_MODEL_ID,
tokenizer_mode="auto",
trust_remote_code=True,
dtype="bfloat16",
dtype="auto",
seed=0,
limit_mm_per_prompt={
"image": 2,
......@@ -58,7 +58,7 @@ def mllama_model_config():
tokenizer=MLLAMA_MODEL_ID,
tokenizer_mode="auto",
trust_remote_code=True,
dtype="bfloat16",
dtype="auto",
seed=0,
limit_mm_per_prompt={
"image": 2,
......@@ -669,7 +669,7 @@ def test_multimodal_image_parsing_matches_hf(model, image_url):
tokenizer=MLLAMA_MODEL_ID,
tokenizer_mode="auto",
trust_remote_code=True,
dtype="bfloat16",
dtype="auto",
seed=0,
limit_mm_per_prompt={
"image": 2,
......
......@@ -5,11 +5,10 @@ from typing import Optional
import numpy as np
import pytest
import pytest_asyncio
from transformers import AutoModel, AutoTokenizer, BatchEncoding
from transformers import AutoModel, AutoTokenizer
from vllm.multimodal.audio import resample_audio
from vllm.sequence import SampleLogprobs
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ....conftest import HfRunner, VllmRunner
from ....utils import RemoteOpenAIServer
......@@ -107,8 +106,6 @@ def run_test(
**kwargs,
):
"""Inference result should be the same between hf and vllm."""
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
# NOTE: take care of the order. run vLLM first, and then run HF.
# vLLM needs a fresh new process without cuda initialization.
# if we run HF first, the cuda initialization will be done and it
......@@ -124,15 +121,7 @@ def run_test(
for vllm_prompt, _, audio in prompts_and_audios
]
def process(hf_inputs: BatchEncoding, **kwargs):
hf_inputs["audio_values"] = hf_inputs["audio_values"] \
.to(torch_dtype) # type: ignore
return hf_inputs
with hf_runner(model,
dtype=dtype,
postprocess_inputs=process,
auto_cls=AutoModel) as hf_model:
with hf_runner(model, dtype=dtype, auto_cls=AutoModel) as hf_model:
hf_outputs_per_audio = [
hf_model.generate_greedy_logprobs_limit(
[hf_prompt],
......
......@@ -9,7 +9,7 @@ from pathlib import PosixPath
import pytest
from packaging.version import Version
from transformers import AutoModelForPreTraining, AutoModelForVision2Seq
from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
from transformers import __version__ as TRANSFORMERS_VERSION
from vllm.platforms import current_platform
......@@ -101,7 +101,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
convert_assets_to_embeddings=model_utils.get_llava_embeddings,
max_model_len=4096,
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
custom_test_opts=[CustomTestOptions(
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
......@@ -121,10 +121,7 @@ VLM_TEST_SETTINGS = {
"stop_sign": "caption es",
"cherry_blossom": "What is in the picture?",
}),
auto_cls=AutoModelForVision2Seq,
postprocess_inputs=model_utils.cast_dtype_post_processor(
"pixel_values"
),
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
dtype="bfloat16",
marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")], # noqa: E501
......@@ -179,7 +176,6 @@ VLM_TEST_SETTINGS = {
# "cherry_blossom": "<vlm_image>Please infer the season with reason.", # noqa: E501
# }),
# multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.", # noqa: E501
# postprocess_inputs=model_utils.cast_dtype_post_processor("pixel_values"), # noqa: E501
# stop_str=["<|im_end|>"],
# image_size_factors=[(0.10, 0.15)],
# max_tokens=64,
......@@ -190,7 +186,7 @@ VLM_TEST_SETTINGS = {
test_type=VLMTestType.IMAGE,
prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
img_idx_to_prompt=lambda idx: "",
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
),
"chameleon": VLMTestInfo(
......@@ -199,10 +195,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForVision2Seq,
postprocess_inputs=model_utils.cast_dtype_post_processor(
"pixel_values"
),
auto_cls=AutoModelForImageTextToText,
# For chameleon, we only compare the sequences
vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
hf_output_post_proc = lambda hf_output, model: hf_output[:2],
......@@ -222,7 +215,6 @@ VLM_TEST_SETTINGS = {
}),
multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?", # noqa: E501
patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
postprocess_inputs=model_utils.cast_dtype_post_processor("images"),
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
......@@ -240,6 +232,7 @@ VLM_TEST_SETTINGS = {
img_idx_to_prompt=lambda idx: "",
max_model_len=2048,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
use_tokenizer_eos=True,
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
num_logprobs=10,
......@@ -256,9 +249,7 @@ VLM_TEST_SETTINGS = {
multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
# TODO: Use AutoModelForVision2Seq once transformers supports this
auto_cls=AutoModelForPreTraining,
dtype="bfloat16",
auto_cls=AutoModelForImageTextToText,
vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
patch_hf_runner=model_utils.gemma3_patch_hf_runner,
),
......@@ -272,7 +263,6 @@ VLM_TEST_SETTINGS = {
}),
max_model_len=2048,
max_num_seqs=2,
dtype="bfloat16",
get_stop_token_ids=lambda tok: [151329, 151336, 151338],
patch_hf_runner=model_utils.glm4v_patch_hf_runner,
# The image embeddings match with HF but the outputs of the language
......@@ -295,7 +285,6 @@ VLM_TEST_SETTINGS = {
}),
multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.", # noqa: E501
max_model_len=8192,
dtype="bfloat16",
use_tokenizer_eos=True,
num_logprobs=10,
patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
......@@ -307,7 +296,7 @@ VLM_TEST_SETTINGS = {
img_idx_to_prompt=lambda idx: "<image>",
max_model_len=8192,
max_num_seqs=2,
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
),
"intern_vl": VLMTestInfo(
......@@ -324,10 +313,6 @@ VLM_TEST_SETTINGS = {
}),
multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.", # noqa: E501
max_model_len=4096,
# NOTE: Mono-InternVL-2B doesn't work with fp16,
# it will result NaN during inference.
# See: https://huggingface.co/OpenGVLab/Mono-InternVL-2B/discussions/9
dtype="bfloat16",
use_tokenizer_eos=True,
patch_hf_runner=model_utils.internvl_patch_hf_runner,
),
......@@ -336,7 +321,7 @@ VLM_TEST_SETTINGS = {
test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
max_model_len=10240,
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
custom_test_opts=[CustomTestOptions(
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
......@@ -351,9 +336,6 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
num_video_frames=16,
max_model_len=16384,
postprocess_inputs=model_utils.cast_dtype_post_processor(
"pixel_values_videos"
),
auto_cls=AutoModelForVision2Seq,
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
custom_test_opts=[CustomTestOptions(
......@@ -378,11 +360,8 @@ VLM_TEST_SETTINGS = {
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", # noqa: E501
max_model_len=4096,
postprocess_inputs=model_utils.cast_dtype_post_processor(
"pixel_values"
),
get_stop_token_ids=lambda tok: [128009],
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
patch_hf_runner=model_utils.mantis_patch_hf_runner,
marks=[
......@@ -400,8 +379,8 @@ VLM_TEST_SETTINGS = {
max_model_len=4096,
max_num_seqs=2,
get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
postprocess_inputs=model_utils.wrap_inputs_post_processor,
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner,
),
"minicpmo_26": VLMTestInfo(
models=["openbmb/MiniCPM-o-2_6"],
......@@ -411,11 +390,8 @@ VLM_TEST_SETTINGS = {
max_model_len=4096,
max_num_seqs=2,
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
postprocess_inputs=model_utils.ignore_inputs_post_processor(
"image_sizes"
),
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
patch_hf_runner=model_utils.minicpmo_patch_hf_runner
patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
),
"minicpmv_26": VLMTestInfo(
models=["openbmb/MiniCPM-V-2_6"],
......@@ -425,10 +401,8 @@ VLM_TEST_SETTINGS = {
max_model_len=4096,
max_num_seqs=2,
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
postprocess_inputs=model_utils.ignore_inputs_post_processor(
"image_sizes"
),
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
),
"molmo": VLMTestInfo(
models=["allenai/Molmo-7B-D-0924"],
......@@ -437,7 +411,6 @@ VLM_TEST_SETTINGS = {
max_model_len=4096,
max_num_seqs=2,
patch_hf_runner=model_utils.molmo_patch_hf_runner,
postprocess_inputs=model_utils.molmo_post_processor,
),
# Tests for phi3v currently live in another file because of a bug in
# transformers. Once this issue is fixed, we can enable them here instead.
......@@ -463,7 +436,7 @@ VLM_TEST_SETTINGS = {
img_idx_to_prompt=lambda idx: "[IMG]",
max_model_len=8192,
max_num_seqs=2,
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
marks=[large_gpu_mark(min_gb=48)],
),
"qwen_vl": VLMTestInfo(
......@@ -481,10 +454,7 @@ VLM_TEST_SETTINGS = {
models=["facebook/chameleon-7b"],
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
max_model_len=4096,
auto_cls=AutoModelForVision2Seq,
postprocess_inputs=model_utils.cast_dtype_post_processor(
"pixel_values"
),
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
hf_output_post_proc = lambda hf_output, model: hf_output[:2],
comparator=check_outputs_equal,
......@@ -495,7 +465,7 @@ VLM_TEST_SETTINGS = {
models=["llava-hf/llava-1.5-7b-hf"],
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
max_model_len=4096,
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
marks=multi_gpu_marks(num_gpus=2),
**COMMON_BROADCAST_SETTINGS # type: ignore
......@@ -504,7 +474,7 @@ VLM_TEST_SETTINGS = {
models=["llava-hf/llava-v1.6-mistral-7b-hf"],
prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
max_model_len=10240,
auto_cls=AutoModelForVision2Seq,
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
marks=multi_gpu_marks(num_gpus=2),
**COMMON_BROADCAST_SETTINGS # type: ignore
......@@ -529,9 +499,6 @@ VLM_TEST_SETTINGS = {
test_type=VLMTestType.CUSTOM_INPUTS,
max_model_len=16384,
max_num_seqs=2,
postprocess_inputs=model_utils.cast_dtype_post_processor(
"pixel_values"
),
auto_cls=AutoModelForVision2Seq,
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
custom_test_opts=[CustomTestOptions(
......
......@@ -4,7 +4,6 @@
Run `pytest tests/models/test_mistral.py`.
"""
import json
import uuid
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Optional
......@@ -16,8 +15,7 @@ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
from transformers import AutoProcessor
from vllm import (EngineArgs, LLMEngine, RequestOutput, SamplingParams,
TextPrompt, TokensPrompt)
from vllm import RequestOutput, SamplingParams, TextPrompt, TokensPrompt
from vllm.multimodal import MultiModalDataBuiltins
from vllm.multimodal.inputs import PlaceholderRange
from vllm.sequence import Logprob, SampleLogprobs
......@@ -28,7 +26,11 @@ from ...utils import check_logprobs_close
if TYPE_CHECKING:
from _typeshed import StrPath
MODELS = ["mistralai/Pixtral-12B-2409"]
PIXTRAL_ID = "mistralai/Pixtral-12B-2409"
MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID]
IMG_URLS = [
"https://picsum.photos/id/237/400/300",
"https://picsum.photos/id/231/200/300",
......@@ -125,8 +127,10 @@ MAX_MODEL_LEN = [8192, 65536]
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
assert FIXTURES_PATH.exists()
FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
FIXTURE_LOGPROBS_CHAT = {
PIXTRAL_ID: FIXTURES_PATH / "pixtral_chat.json",
MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
}
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
......@@ -166,12 +170,12 @@ def test_chat(
model: str,
dtype: str,
) -> None:
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_CHAT)
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(
FIXTURE_LOGPROBS_CHAT[model])
with vllm_runner(
model,
dtype=dtype,
tokenizer_mode="mistral",
enable_chunked_prefill=False,
max_model_len=max_model_len,
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
) as vllm_model:
......@@ -183,70 +187,40 @@ def test_chat(
outputs.extend(output)
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
# Remove last `None` prompt_logprobs to compare with fixture
for i in range(len(logprobs)):
assert logprobs[i][-1] is None
logprobs[i] = logprobs[i][:-1]
check_logprobs_close(outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
outputs_1_lst=logprobs,
name_0="h100_ref",
name_1="output")
@large_gpu_test(min_gb=80)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["bfloat16"])
def test_model_engine(vllm_runner, model: str, dtype: str) -> None:
EXPECTED_ENGINE_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_ENGINE)
args = EngineArgs(
model=model,
tokenizer_mode="mistral",
enable_chunked_prefill=False,
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
dtype=dtype,
)
engine = LLMEngine.from_engine_args(args)
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[0], SAMPLING_PARAMS)
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[1], SAMPLING_PARAMS)
outputs = []
count = 0
while True:
out = engine.step()
count += 1
for request_output in out:
if request_output.finished:
outputs.append(request_output)
if count == 2:
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[2],
SAMPLING_PARAMS)
if not engine.has_unfinished_requests():
break
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
check_logprobs_close(outputs_0_lst=EXPECTED_ENGINE_LOGPROBS,
outputs_1_lst=logprobs,
name_0="h100_ref",
name_1="output")
@large_gpu_test(min_gb=48)
@pytest.mark.parametrize(
"prompt,expected_ranges",
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
"offset": 10,
"offset": 11,
"length": 494
}]),
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
"offset": 10,
"offset": 11,
"length": 266
}, {
"offset": 276,
"offset": 277,
"length": 1056
}, {
"offset": 1332,
"offset": 1333,
"length": 418
}])])
def test_multi_modal_placeholders(
vllm_runner, prompt, expected_ranges: list[PlaceholderRange]) -> None:
def test_multi_modal_placeholders(vllm_runner, prompt,
expected_ranges: list[PlaceholderRange],
monkeypatch) -> None:
# This placeholder checking test only works with V0 engine
# where `multi_modal_placeholders` is returned with `RequestOutput`
monkeypatch.setenv("VLLM_USE_V1", "0")
with vllm_runner(
"mistral-community/pixtral-12b",
max_model_len=8192,
......
......@@ -4,7 +4,6 @@ from typing import Any, Callable, Optional, Union
import torch
from PIL.Image import Image
from transformers import BatchEncoding
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm.config import TaskOption
......@@ -31,7 +30,6 @@ def run_test(
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
auto_cls: type[_BaseAutoModelClass],
use_tokenizer_eos: bool,
postprocess_inputs: Callable[[BatchEncoding], BatchEncoding],
comparator: Callable[..., None],
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
stop_str: Optional[list[str]],
......@@ -101,7 +99,6 @@ def run_test(
hf_model = hf_runner(model,
dtype=dtype,
auto_cls=auto_cls,
postprocess_inputs=postprocess_inputs,
model_kwargs=hf_model_kwargs)
# Some models need to patch things like the model processor, e.g., internvl
......
......@@ -6,16 +6,15 @@ typically specific to a small subset of models.
import re
import types
from pathlib import PosixPath
from typing import Callable, Optional, Union
from typing import Optional, Union
import torch
from PIL.Image import Image
from transformers import (AutoConfig, AutoTokenizer, BatchEncoding,
from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
GenerationConfig)
from vllm.sequence import SampleLogprobs
from vllm.transformers_utils.tokenizer import patch_padding_side
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from .....conftest import HfRunner, ImageAsset, _ImageAssets
from .types import RunnerOutput
......@@ -211,40 +210,6 @@ def get_llava_embeddings(image_assets: _ImageAssets):
return [asset.image_embeds for asset in image_assets]
####### postprocessors to run on HF BatchEncoding
def cast_dtype_post_processor(
hf_inp_key: str) -> Callable[[BatchEncoding, str], BatchEncoding]:
"""Gets a handle to a post processor which converts a given key into a
target data type."""
def process(hf_inputs: BatchEncoding, dtype: str):
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
hf_inputs[hf_inp_key] = hf_inputs[hf_inp_key].to(torch_dtype)
return hf_inputs
return process
def ignore_inputs_post_processor(
hf_inp_key: str) -> Callable[[BatchEncoding, str], BatchEncoding]:
"""Gets a handle to a post processor which ignores a given key."""
def process(hf_inputs: BatchEncoding, dtype: str):
del hf_inputs[hf_inp_key]
return hf_inputs
return process
def wrap_inputs_post_processor(hf_inputs: BatchEncoding, dtype: str):
return {"model_inputs": hf_inputs}
def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):
hf_inputs = cast_dtype_post_processor("images")(hf_inputs, dtype)
return {k: v.unsqueeze(0) for k, v in hf_inputs.items()}
####### Prompt path encoders for models that need models on disk
def qwen_prompt_path_encoder(
tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset],
......@@ -295,8 +260,7 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
for k in inputs.keys() # noqa
if k not in ("seq_lens", "sft_format")
}
inputs = BatchEncoding(data=inputs, tensor_type="pt")
return inputs
return BatchFeature(data=inputs, tensor_type="pt")
hf_model.processor = processor
hf_model.model.get_output_embeddings = lambda: \
......@@ -529,10 +493,52 @@ def mantis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return hf_model
def minicpmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def minicpmv_25_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
orig_generate = hf_model.model.generate
def _generate(self, *args, **kwargs):
def _generate(
self,
*args,
input_ids=None,
pixel_values=None,
image_sizes=None,
image_bound=None,
tgt_sizes=None,
**kwargs,
):
model_inputs = {
"input_ids": input_ids,
"pixel_values": pixel_values,
"image_sizes": image_sizes,
"image_bound": image_bound,
"tgt_sizes": tgt_sizes,
}
for k in list(model_inputs.keys()):
if model_inputs[k] is None:
model_inputs.pop(k)
return orig_generate(model_inputs, *args, decode_text=False, **kwargs)
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
return hf_model
def minicpmo_26_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
orig_generate = hf_model.model.generate
def _generate(self, *args, image_sizes=None, **kwargs):
return orig_generate(*args, decode_text=False, **kwargs)
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
return hf_model
def minicpmv_26_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
orig_generate = hf_model.model.generate
def _generate(self, *args, image_sizes=None, **kwargs):
return orig_generate(*args, decode_text=False, **kwargs)
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
......@@ -551,10 +557,11 @@ def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def _generate(self, max_new_tokens=None, do_sample=None, **kwargs):
batch = {
k: kwargs.pop(k)
k: kwargs.pop(k).unsqueeze(0)
for k in ("input_ids", "images", "image_input_idx", "image_masks")
if k in kwargs
}
batch = BatchFeature(batch).to(dtype=self.dtype)
return self.generate_from_batch(
batch,
......
......@@ -8,13 +8,12 @@ from typing import Any, Callable, NamedTuple, Optional, Union
import torch
from PIL.Image import Image
from pytest import MarkDecorator
from transformers import AutoModelForCausalLM, BatchEncoding
from transformers import AutoModelForCausalLM
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm.config import TaskOption
from vllm.sequence import SampleLogprobs
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import identity
from .....conftest import IMAGE_ASSETS, HfRunner, ImageAsset, _ImageAssets
from ....utils import check_logprobs_close
......@@ -110,11 +109,6 @@ class VLMTestInfo(NamedTuple):
# Indicates we should explicitly pass the EOS from the tokenizer
use_tokenizer_eos: bool = False
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
# Callable to pass to the HF runner to run on inputs; for now, we also pass
# the data type to input post processing, because almost all of the uses of
# postprocess_inputs are to fix the data types of BatchEncoding values.
postprocess_inputs: Callable[[BatchEncoding, str],
BatchEncoding] = identity
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]] = None
# Post processors that if defined, will run oun the outputs of the
......@@ -130,7 +124,7 @@ class VLMTestInfo(NamedTuple):
# is all combinations of .models + all fields below
max_tokens: Union[int, tuple[int]] = 128
num_logprobs: Union[int, tuple[int]] = 5
dtype: Union[str, Iterable[str]] = "half"
dtype: Union[str, Union[list[str], tuple[str, ...]]] = "auto"
distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
# Only expanded in video tests
num_video_frames: Union[int, tuple[int]] = 16
......@@ -171,7 +165,6 @@ class VLMTestInfo(NamedTuple):
"vllm_output_post_proc": self.vllm_output_post_proc,
"auto_cls": self.auto_cls,
"use_tokenizer_eos": self.use_tokenizer_eos,
"postprocess_inputs": self.postprocess_inputs,
"comparator": self.comparator,
"get_stop_token_ids": self.get_stop_token_ids,
"hf_model_kwargs": self.hf_model_kwargs,
......
# SPDX-License-Identifier: Apache-2.0
from functools import partial
from typing import Callable
import pytest
import torch
import torch.nn.functional as F
from PIL import Image
from transformers import BatchEncoding, Qwen2VLForConditionalGeneration
from transformers import Qwen2VLForConditionalGeneration
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
from ....utils import large_gpu_test
......@@ -75,10 +75,6 @@ def apply_chat_template_and_add_eos(
return prompt
def postprocess_inputs(hf_model: HfRunner, inputs: BatchEncoding, **kwargs):
return hf_model.model.prepare_inputs_for_generation(**inputs, **kwargs)
def _run_test(
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
......@@ -118,14 +114,8 @@ def _run_test(
with hf_runner(model,
dtype=dtype,
auto_cls=Qwen2VLForConditionalGeneration) as hf_model:
hf_model.postprocess_inputs = partial(
postprocess_inputs,
hf_model,
cache_position=torch.arange(
0,
1, # 1 for batch size
requires_grad=False),
use_cache=False)
prompts = []
for text, image, embed_text in zip(input_texts, input_images,
embed_texts):
# dse requires non-standard input processing
......@@ -133,20 +123,34 @@ def _run_test(
messages = get_messages(image, text, embed_text)
prompt = apply_chat_template_and_add_eos(
messages, hf_model.processor.apply_chat_template)
inputs = hf_model.get_inputs(
prompts=[[prompt]],
images=[[image]],
)
with torch.no_grad():
prompts.append(prompt)
all_inputs = hf_model.get_inputs(
prompts=prompts,
images=input_images,
)
with torch.no_grad():
all_outputs = []
for inputs in all_inputs:
inputs = hf_model.model.prepare_inputs_for_generation(
**inputs,
cache_position=torch.arange(1), # 1 for batch size
use_cache=False,
)
outputs = hf_model.model(
**hf_model.wrap_device(inputs[0],
device=hf_model.model.device.type),
**hf_model.wrap_device(inputs),
return_dict=True,
output_hidden_states=True,
)
pooled_output = torch.nn.functional.normalize(
outputs.hidden_states[-1][0, -1], p=2, dim=-1)
hf_outputs.append(pooled_output.tolist())
pooled_output = F.normalize(outputs.hidden_states[-1][0, -1],
p=2,
dim=-1)
all_outputs.append(pooled_output.tolist())
hf_outputs = all_outputs
check_embeddings_close(
embeddings_0_lst=hf_outputs,
......
......@@ -2,7 +2,7 @@
import pytest
import torch.nn.functional as F
from transformers import AutoModelForVision2Seq
from transformers import AutoModelForImageTextToText
from vllm.platforms import current_platform
......@@ -70,7 +70,7 @@ def _run_test(
vllm_outputs = vllm_model.encode(input_texts, images=input_images)
with hf_runner(model, dtype=dtype,
auto_cls=AutoModelForVision2Seq) as hf_model:
auto_cls=AutoModelForImageTextToText) as hf_model:
# Patch the issue where generation_config.json is missing
hf_model.processor.patch_size = \
hf_model.model.config.vision_config.patch_size
......@@ -86,8 +86,7 @@ def _run_test(
for inputs in all_inputs:
# Based on: https://huggingface.co/royokong/e5-v
outputs = hf_model.model(
**hf_model.wrap_device(inputs,
device=hf_model.model.device.type),
**hf_model.wrap_device(inputs),
return_dict=True,
output_hidden_states=True,
)
......
......@@ -53,8 +53,7 @@ def _run_test(
for inputs in all_inputs:
# Based on: https://github.com/TIGER-AI-Lab/VLM2Vec/blob/db3b951bccabba220c1f53ab46a734e50dd2fc08/src/model.py
outputs = hf_model.model(
**hf_model.wrap_device(inputs,
device=hf_model.model.device.type),
**hf_model.wrap_device(inputs),
return_dict=True,
output_hidden_states=True,
)
......
......@@ -4,8 +4,7 @@ from typing import Optional, overload
import pytest
import torch
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
BatchEncoding)
from transformers import AutoConfig, AutoModelForImageTextToText, AutoTokenizer
from vllm import LLM, SamplingParams
from vllm.attention.backends.flash_attn import FlashAttentionMetadata
......@@ -227,14 +226,10 @@ def _run_test(
for prompts, images in inputs
]
def process(hf_inputs: BatchEncoding, **kwargs):
return hf_inputs
with hf_runner(model,
dtype=dtype,
model_kwargs={"device_map": "auto"},
postprocess_inputs=process,
auto_cls=AutoModelForVision2Seq) as hf_model:
auto_cls=AutoModelForImageTextToText) as hf_model:
hf_outputs_per_image = [
hf_model.generate_greedy_logprobs_limit(prompts,
max_tokens,
......
[[[1784, 3937, 6122, 1261, 7244, 10575, 28528, 1408, 1261, 32656, 11237, 1044, 7283, 2015, 1454, 1261, 38462, 4818, 1046, 2], "The image shows a black dog lying on a wooden floor, looking up with a curious expression.", [{"1784": {"logprob": -0.4740446209907532, "rank": 1, "decoded_token": "The"}, "1065": {"logprob": -1.0990445613861084, "rank": 2, "decoded_token": "A"}, "4380": {"logprob": -3.3490445613861084, "rank": 3, "decoded_token": "This"}, "1785": {"logprob": -5.0990447998046875, "rank": 4, "decoded_token": "In"}, "11745": {"logprob": -6.4740447998046875, "rank": 5, "decoded_token": "Here"}}, {"3937": {"logprob": -0.06349722295999527, "rank": 1, "decoded_token": " image"}, "7244": {"logprob": -2.813497304916382, "rank": 2, "decoded_token": " black"}, "16649": {"logprob": -7.563497066497803, "rank": 3, "decoded_token": " photo"}, "18390": {"logprob": -7.688497066497803, "rank": 4, "decoded_token": " photograph"}, "10575": {"logprob": -8.438497543334961, "rank": 5, "decoded_token": " dog"}}, {"6122": {"logprob": -0.25453490018844604, "rank": 1, "decoded_token": " shows"}, "6971": {"logprob": -1.8795349597930908, "rank": 2, "decoded_token": " features"}, "51948": {"logprob": -2.754534959793091, "rank": 3, "decoded_token": " depicts"}, "25981": {"logprob": -5.629534721374512, "rank": 4, "decoded_token": " displays"}, "1395": {"logprob": -6.129534721374512, "rank": 5, "decoded_token": " is"}}, {"1261": {"logprob": -0.0001245659514097497, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -9.00012493133545, "rank": 2, "decoded_token": " an"}, "1278": {"logprob": -14.25012493133545, "rank": 3, "decoded_token": " the"}, "7244": {"logprob": -14.87512493133545, "rank": 4, "decoded_token": " black"}, "1925": {"logprob": -16.125123977661133, "rank": 5, "decoded_token": " one"}}, {"7244": {"logprob": -0.009403933770954609, "rank": 1, "decoded_token": " black"}, "6231": {"logprob": -5.259403705596924, "rank": 2, "decoded_token": " close"}, "16450": {"logprob": -6.759403705596924, "rank": 3, "decoded_token": " sle"}, "8500": {"logprob": -7.009403705596924, "rank": 4, "decoded_token": " dark"}, "4329": {"logprob": -7.696903705596924, "rank": 5, "decoded_token": " large"}}, {"10575": {"logprob": -0.7522680163383484, "rank": 1, "decoded_token": " dog"}, "119075": {"logprob": -1.0022680759429932, "rank": 2, "decoded_token": " Labrador"}, "116572": {"logprob": -1.8772680759429932, "rank": 3, "decoded_token": " puppy"}, "8636": {"logprob": -5.627267837524414, "rank": 4, "decoded_token": " lab"}, "15812": {"logprob": -5.814767837524414, "rank": 5, "decoded_token": " Lab"}}, {"28528": {"logprob": -0.2941223084926605, "rank": 1, "decoded_token": " lying"}, "7283": {"logprob": -2.1691222190856934, "rank": 2, "decoded_token": " looking"}, "1454": {"logprob": -2.5441222190856934, "rank": 3, "decoded_token": " with"}, "60700": {"logprob": -3.2941222190856934, "rank": 4, "decoded_token": " laying"}, "18970": {"logprob": -4.794122219085693, "rank": 5, "decoded_token": " sitting"}}, {"1408": {"logprob": -0.3170951306819916, "rank": 1, "decoded_token": " on"}, "3151": {"logprob": -1.317095160484314, "rank": 2, "decoded_token": " down"}, "14038": {"logprob": -7.3170952796936035, "rank": 3, "decoded_token": " flat"}, "104248": {"logprob": -7.4420952796936035, "rank": 4, "decoded_token": " comfortably"}, "1321": {"logprob": -7.6920952796936035, "rank": 5, "decoded_token": " and"}}, {"1261": {"logprob": -0.08228635042905807, "rank": 1, "decoded_token": " a"}, "2246": {"logprob": -3.2072863578796387, "rank": 2, "decoded_token": " its"}, "32656": {"logprob": -3.3322863578796387, "rank": 3, "decoded_token": " wooden"}, "3977": {"logprob": -6.957286357879639, "rank": 4, "decoded_token": " top"}, "1278": {"logprob": -7.207286357879639, "rank": 5, "decoded_token": " the"}}, {"32656": {"logprob": -0.03605202957987785, "rank": 1, "decoded_token": " wooden"}, "3403": {"logprob": -3.9110519886016846, "rank": 2, "decoded_token": " text"}, "44130": {"logprob": -4.911052227020264, "rank": 3, "decoded_token": " rust"}, "12603": {"logprob": -6.036052227020264, "rank": 4, "decoded_token": " wood"}, "8500": {"logprob": -6.473552227020264, "rank": 5, "decoded_token": " dark"}}, {"11237": {"logprob": -0.6433407068252563, "rank": 1, "decoded_token": " floor"}, "4691": {"logprob": -0.7683407068252563, "rank": 2, "decoded_token": " surface"}, "1615": {"logprob": -5.268340587615967, "rank": 3, "decoded_token": " pl"}, "3403": {"logprob": -6.018340587615967, "rank": 4, "decoded_token": " text"}, "18645": {"logprob": -7.143340587615967, "rank": 5, "decoded_token": " flo"}}, {"1044": {"logprob": -0.6826052665710449, "rank": 1, "decoded_token": ","}, "1321": {"logprob": -1.682605266571045, "rank": 2, "decoded_token": " and"}, "7283": {"logprob": -1.807605266571045, "rank": 3, "decoded_token": " looking"}, "1046": {"logprob": -2.682605266571045, "rank": 4, "decoded_token": "."}, "1454": {"logprob": -3.182605266571045, "rank": 5, "decoded_token": " with"}}, {"7283": {"logprob": -0.07239976525306702, "rank": 1, "decoded_token": " looking"}, "11589": {"logprob": -3.197399854660034, "rank": 2, "decoded_token": " gaz"}, "35542": {"logprob": -3.822399854660034, "rank": 3, "decoded_token": " staring"}, "1454": {"logprob": -6.384899616241455, "rank": 4, "decoded_token": " with"}, "22116": {"logprob": -6.572399616241455, "rank": 5, "decoded_token": " facing"}}, {"2015": {"logprob": -0.9646494388580322, "rank": 2, "decoded_token": " up"}, "7655": {"logprob": -0.9646494388580322, "rank": 1, "decoded_token": " directly"}, "74606": {"logprob": -2.0896494388580322, "rank": 3, "decoded_token": " upwards"}, "40022": {"logprob": -3.0896494388580322, "rank": 4, "decoded_token": " upward"}, "1935": {"logprob": -4.152149200439453, "rank": 5, "decoded_token": " int"}}, {"1454": {"logprob": -0.8447978496551514, "rank": 1, "decoded_token": " with"}, "1513": {"logprob": -1.2197978496551514, "rank": 2, "decoded_token": " at"}, "41132": {"logprob": -2.2197978496551514, "rank": 3, "decoded_token": " attent"}, "1935": {"logprob": -2.9697978496551514, "rank": 4, "decoded_token": " int"}, "7655": {"logprob": -3.0947978496551514, "rank": 5, "decoded_token": " directly"}}, {"1261": {"logprob": -0.7162021994590759, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -1.3412022590637207, "rank": 2, "decoded_token": " an"}, "41132": {"logprob": -2.2162022590637207, "rank": 3, "decoded_token": " attent"}, "2246": {"logprob": -3.2162022590637207, "rank": 4, "decoded_token": " its"}, "38462": {"logprob": -3.9662022590637207, "rank": 5, "decoded_token": " curious"}}, {"38462": {"logprob": -0.7836517095565796, "rank": 1, "decoded_token": " curious"}, "26517": {"logprob": -1.8461517095565796, "rank": 2, "decoded_token": " calm"}, "26905": {"logprob": -2.533651828765869, "rank": 3, "decoded_token": " gentle"}, "11304": {"logprob": -3.408651828765869, "rank": 4, "decoded_token": " serious"}, "97680": {"logprob": -3.596151828765869, "rank": 5, "decoded_token": " thoughtful"}}, {"4818": {"logprob": -0.047154705971479416, "rank": 1, "decoded_token": " expression"}, "1321": {"logprob": -3.922154664993286, "rank": 2, "decoded_token": " and"}, "1505": {"logprob": -4.047154903411865, "rank": 3, "decoded_token": " or"}, "22131": {"logprob": -4.797154903411865, "rank": 4, "decoded_token": " gaze"}, "1044": {"logprob": -9.047154426574707, "rank": 5, "decoded_token": ","}}, {"1046": {"logprob": -0.0008031480247154832, "rank": 1, "decoded_token": "."}, "1408": {"logprob": -7.250802993774414, "rank": 2, "decoded_token": " on"}, "1321": {"logprob": -10.500802993774414, "rank": 3, "decoded_token": " and"}, "1338": {"logprob": -11.000802993774414, "rank": 4, "decoded_token": ".\n\n"}, "3016": {"logprob": -11.500802993774414, "rank": 5, "decoded_token": " while"}}, {"2": {"logprob": -0.0008517451351508498, "rank": 1, "decoded_token": " "}, "1032": {"logprob": -7.125851631164551, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -10.00085163116455, "rank": 3, "decoded_token": " The"}}]], [[1049, 1046, 1349, 7244, 10575, 1395, 28528, 1408, 1261, 32656, 11237, 1044, 7283, 2015, 1513, 1278, 13424, 1626, 1050, 1046, 1349, 10726, 1290, 3719, 1307, 122203, 35463, 1454, 11223, 1321, 95746, 24765, 2425, 1261, 6133, 21283, 1046, 2], "1. A black dog is lying on a wooden floor, looking up at the camera.\n2. A scenic view of rugged mountains with green and rocky terrain under a clear sky.", [{"1049": {"logprob": -0.05050129443407059, "rank": 1, "decoded_token": "1"}, "11745": {"logprob": -3.5505013465881348, "rank": 2, "decoded_token": "Here"}, "69957": {"logprob": -4.175501346588135, "rank": 3, "decoded_token": "Sure"}, "117991": {"logprob": -6.175501346588135, "rank": 4, "decoded_token": "Certain"}, "1045": {"logprob": -6.550501346588135, "rank": 5, "decoded_token": "-"}}, {"1046": {"logprob": -5.364403477869928e-06, "rank": 1, "decoded_token": "."}, "1041": {"logprob": -12.500005722045898, "rank": 2, "decoded_token": ")"}, "1058": {"logprob": -13.875005722045898, "rank": 3, "decoded_token": ":"}, "1044": {"logprob": -15.687505722045898, "rank": 4, "decoded_token": ","}, "1045": {"logprob": -15.875005722045898, "rank": 5, "decoded_token": "-"}}, {"1349": {"logprob": -0.4890742003917694, "rank": 1, "decoded_token": " A"}, "1531": {"logprob": -1.1140742301940918, "rank": 2, "decoded_token": " The"}, "1603": {"logprob": -3.364074230194092, "rank": 3, "decoded_token": " **"}, "1656": {"logprob": -4.364074230194092, "rank": 4, "decoded_token": " In"}, "2409": {"logprob": -4.989074230194092, "rank": 5, "decoded_token": " This"}}, {"7244": {"logprob": -0.08685152232646942, "rank": 1, "decoded_token": " black"}, "6231": {"logprob": -3.4618515968322754, "rank": 2, "decoded_token": " close"}, "16450": {"logprob": -3.5868515968322754, "rank": 3, "decoded_token": " sle"}, "4329": {"logprob": -4.899351596832275, "rank": 4, "decoded_token": " large"}, "8500": {"logprob": -5.399351596832275, "rank": 5, "decoded_token": " dark"}}, {"10575": {"logprob": -0.20338763296604156, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -1.8283876180648804, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.95338773727417, "rank": 3, "decoded_token": " Labrador"}, "28404": {"logprob": -6.95338773727417, "rank": 4, "decoded_token": " pup"}, "8636": {"logprob": -7.07838773727417, "rank": 5, "decoded_token": " lab"}}, {"1395": {"logprob": -0.532414972782135, "rank": 1, "decoded_token": " is"}, "22524": {"logprob": -1.7824149131774902, "rank": 2, "decoded_token": " lies"}, "1454": {"logprob": -2.1574149131774902, "rank": 3, "decoded_token": " with"}, "10637": {"logprob": -3.2824149131774902, "rank": 4, "decoded_token": " looks"}, "28528": {"logprob": -3.4074149131774902, "rank": 5, "decoded_token": " lying"}}, {"28528": {"logprob": -0.4258010685443878, "rank": 1, "decoded_token": " lying"}, "7283": {"logprob": -1.6758010387420654, "rank": 2, "decoded_token": " looking"}, "60700": {"logprob": -2.9258010387420654, "rank": 3, "decoded_token": " laying"}, "38235": {"logprob": -3.6758010387420654, "rank": 4, "decoded_token": " resting"}, "18970": {"logprob": -3.6758010387420654, "rank": 5, "decoded_token": " sitting"}}, {"1408": {"logprob": -0.3588743805885315, "rank": 1, "decoded_token": " on"}, "3151": {"logprob": -1.2338743209838867, "rank": 2, "decoded_token": " down"}, "41132": {"logprob": -6.358874320983887, "rank": 3, "decoded_token": " attent"}, "14038": {"logprob": -6.546374320983887, "rank": 4, "decoded_token": " flat"}, "1321": {"logprob": -6.733874320983887, "rank": 5, "decoded_token": " and"}}, {"1261": {"logprob": -0.07801607996225357, "rank": 1, "decoded_token": " a"}, "2246": {"logprob": -2.9530160427093506, "rank": 2, "decoded_token": " its"}, "32656": {"logprob": -4.20301628112793, "rank": 3, "decoded_token": " wooden"}, "1278": {"logprob": -5.20301628112793, "rank": 4, "decoded_token": " the"}, "3977": {"logprob": -6.57801628112793, "rank": 5, "decoded_token": " top"}}, {"32656": {"logprob": -0.06541638821363449, "rank": 1, "decoded_token": " wooden"}, "3403": {"logprob": -3.4404163360595703, "rank": 2, "decoded_token": " text"}, "44130": {"logprob": -3.9404163360595703, "rank": 3, "decoded_token": " rust"}, "17253": {"logprob": -5.81541633605957, "rank": 4, "decoded_token": " weather"}, "12603": {"logprob": -5.94041633605957, "rank": 5, "decoded_token": " wood"}}, {"11237": {"logprob": -0.4574064016342163, "rank": 1, "decoded_token": " floor"}, "4691": {"logprob": -1.0824064016342163, "rank": 2, "decoded_token": " surface"}, "1615": {"logprob": -4.082406520843506, "rank": 3, "decoded_token": " pl"}, "3403": {"logprob": -5.207406520843506, "rank": 4, "decoded_token": " text"}, "28984": {"logprob": -6.582406520843506, "rank": 5, "decoded_token": " deck"}}, {"1044": {"logprob": -0.9594833850860596, "rank": 1, "decoded_token": ","}, "7283": {"logprob": -1.2094833850860596, "rank": 2, "decoded_token": " looking"}, "1321": {"logprob": -2.2094833850860596, "rank": 3, "decoded_token": " and"}, "1454": {"logprob": -2.4594833850860596, "rank": 4, "decoded_token": " with"}, "1626": {"logprob": -2.5844833850860596, "rank": 5, "decoded_token": ".\n"}}, {"7283": {"logprob": -0.15972694754600525, "rank": 1, "decoded_token": " looking"}, "11589": {"logprob": -2.534726858139038, "rank": 2, "decoded_token": " gaz"}, "35542": {"logprob": -2.909726858139038, "rank": 3, "decoded_token": " staring"}, "22116": {"logprob": -6.034727096557617, "rank": 4, "decoded_token": " facing"}, "1454": {"logprob": -6.409727096557617, "rank": 5, "decoded_token": " with"}}, {"2015": {"logprob": -0.894250750541687, "rank": 1, "decoded_token": " up"}, "7655": {"logprob": -1.269250750541687, "rank": 2, "decoded_token": " directly"}, "74606": {"logprob": -1.769250750541687, "rank": 3, "decoded_token": " upwards"}, "40022": {"logprob": -2.6442508697509766, "rank": 4, "decoded_token": " upward"}, "1935": {"logprob": -4.081750869750977, "rank": 5, "decoded_token": " int"}}, {"1513": {"logprob": -0.5085363388061523, "rank": 1, "decoded_token": " at"}, "1454": {"logprob": -1.5085363388061523, "rank": 2, "decoded_token": " with"}, "1626": {"logprob": -2.6335363388061523, "rank": 3, "decoded_token": ".\n"}, "1935": {"logprob": -3.3835363388061523, "rank": 4, "decoded_token": " int"}, "41132": {"logprob": -3.6335363388061523, "rank": 5, "decoded_token": " attent"}}, {"1278": {"logprob": -0.0010482537327334285, "rank": 1, "decoded_token": " the"}, "4433": {"logprob": -7.0010480880737305, "rank": 2, "decoded_token": " something"}, "2246": {"logprob": -10.25104808807373, "rank": 3, "decoded_token": " its"}, "1261": {"logprob": -10.25104808807373, "rank": 4, "decoded_token": " a"}, "1636": {"logprob": -10.50104808807373, "rank": 5, "decoded_token": " you"}}, {"13424": {"logprob": -0.0003800861886702478, "rank": 1, "decoded_token": " camera"}, "56268": {"logprob": -8.250380516052246, "rank": 2, "decoded_token": " viewer"}, "68439": {"logprob": -9.250380516052246, "rank": 3, "decoded_token": " photographer"}, "2965": {"logprob": -12.375380516052246, "rank": 4, "decoded_token": " person"}, "37967": {"logprob": -12.500380516052246, "rank": 5, "decoded_token": " ceiling"}}, {"1626": {"logprob": -0.34197133779525757, "rank": 1, "decoded_token": ".\n"}, "1454": {"logprob": -1.4669713973999023, "rank": 2, "decoded_token": " with"}, "1046": {"logprob": -3.3419713973999023, "rank": 3, "decoded_token": "."}, "1338": {"logprob": -3.9669713973999023, "rank": 4, "decoded_token": ".\n\n"}, "1935": {"logprob": -5.966971397399902, "rank": 5, "decoded_token": " int"}}, {"1050": {"logprob": -0.002148107625544071, "rank": 1, "decoded_token": "2"}, "1256": {"logprob": -6.877148151397705, "rank": 2, "decoded_token": " "}, "1293": {"logprob": -7.127148151397705, "rank": 3, "decoded_token": " "}, "1032": {"logprob": -8.252147674560547, "rank": 4, "decoded_token": " "}, "1049": {"logprob": -10.752147674560547, "rank": 5, "decoded_token": "1"}}, {"1046": {"logprob": -7.510157047363464e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.437507629394531, "rank": 2, "decoded_token": ".A"}, "1626": {"logprob": -13.437507629394531, "rank": 3, "decoded_token": ".\n"}, "48426": {"logprob": -13.687507629394531, "rank": 4, "decoded_token": ".The"}, "1044": {"logprob": -14.062507629394531, "rank": 5, "decoded_token": ","}}, {"1349": {"logprob": -0.2843300700187683, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -2.034330129623413, "rank": 2, "decoded_token": " Maj"}, "113465": {"logprob": -3.534330129623413, "rank": 3, "decoded_token": " Rug"}, "22468": {"logprob": -4.409329891204834, "rank": 4, "decoded_token": " Several"}, "1531": {"logprob": -4.534329891204834, "rank": 5, "decoded_token": " The"}}, {"10726": {"logprob": -1.3984904289245605, "rank": 1, "decoded_token": " scen"}, "122203": {"logprob": -1.7734904289245605, "rank": 2, "decoded_token": " rugged"}, "61082": {"logprob": -1.7734904289245605, "rank": 3, "decoded_token": " panor"}, "15375": {"logprob": -2.5234904289245605, "rank": 4, "decoded_token": " vast"}, "13770": {"logprob": -2.6484904289245605, "rank": 5, "decoded_token": " maj"}}, {"1290": {"logprob": -3.099436753473128e-06, "rank": 1, "decoded_token": "ic"}, "2981": {"logprob": -13.56250286102295, "rank": 2, "decoded_token": "ically"}, "1702": {"logprob": -14.31250286102295, "rank": 3, "decoded_token": "ice"}, "4965": {"logprob": -16.625003814697266, "rank": 4, "decoded_token": "etic"}, "4336": {"logprob": -16.687503814697266, "rank": 5, "decoded_token": "icro"}}, {"3719": {"logprob": -0.1252945065498352, "rank": 1, "decoded_token": " view"}, "28035": {"logprob": -2.8752944469451904, "rank": 2, "decoded_token": " landscape"}, "24361": {"logprob": -3.2502944469451904, "rank": 3, "decoded_token": " mountain"}, "127945": {"logprob": -5.1252946853637695, "rank": 4, "decoded_token": " mountainous"}, "1044": {"logprob": -5.3752946853637695, "rank": 5, "decoded_token": ","}}, {"1307": {"logprob": -0.09058280289173126, "rank": 1, "decoded_token": " of"}, "89995": {"logprob": -3.465582847595215, "rank": 2, "decoded_token": " showc"}, "6122": {"logprob": -3.715582847595215, "rank": 3, "decoded_token": " shows"}, "6971": {"logprob": -4.590582847595215, "rank": 4, "decoded_token": " features"}, "66583": {"logprob": -5.090582847595215, "rank": 5, "decoded_token": " captures"}}, {"122203": {"logprob": -0.5323622226715088, "rank": 1, "decoded_token": " rugged"}, "1261": {"logprob": -2.032362222671509, "rank": 2, "decoded_token": " a"}, "6245": {"logprob": -2.532362222671509, "rank": 3, "decoded_token": " multiple"}, "127945": {"logprob": -3.157362222671509, "rank": 4, "decoded_token": " mountainous"}, "35463": {"logprob": -3.532362222671509, "rank": 5, "decoded_token": " mountains"}}, {"35463": {"logprob": -0.6520033478736877, "rank": 1, "decoded_token": " mountains"}, "1044": {"logprob": -1.027003288269043, "rank": 2, "decoded_token": ","}, "24361": {"logprob": -2.527003288269043, "rank": 3, "decoded_token": " mountain"}, "127945": {"logprob": -3.902003288269043, "rank": 4, "decoded_token": " mountainous"}, "11223": {"logprob": -4.652003288269043, "rank": 5, "decoded_token": " green"}}, {"1454": {"logprob": -0.39697548747062683, "rank": 1, "decoded_token": " with"}, "13875": {"logprob": -2.146975517272949, "rank": 2, "decoded_token": " covered"}, "1321": {"logprob": -2.271975517272949, "rank": 3, "decoded_token": " and"}, "2425": {"logprob": -3.459475517272949, "rank": 4, "decoded_token": " under"}, "47948": {"logprob": -4.459475517272949, "rank": 5, "decoded_token": " stretching"}}, {"11223": {"logprob": -1.3947651386260986, "rank": 1, "decoded_token": " green"}, "24880": {"logprob": -1.8947651386260986, "rank": 2, "decoded_token": " varying"}, "95746": {"logprob": -2.0822651386260986, "rank": 3, "decoded_token": " rocky"}, "1295": {"logprob": -3.0197651386260986, "rank": 4, "decoded_token": " l"}, "19546": {"logprob": -3.0822651386260986, "rank": 5, "decoded_token": " varied"}}, {"1321": {"logprob": -0.8649212121963501, "rank": 1, "decoded_token": " and"}, "61263": {"logprob": -1.73992121219635, "rank": 2, "decoded_token": " slopes"}, "47260": {"logprob": -1.86492121219635, "rank": 3, "decoded_token": " vegetation"}, "50373": {"logprob": -1.98992121219635, "rank": 4, "decoded_token": " patches"}, "23170": {"logprob": -3.4899210929870605, "rank": 5, "decoded_token": " grass"}}, {"95746": {"logprob": -0.21662631630897522, "rank": 1, "decoded_token": " rocky"}, "22980": {"logprob": -1.9666262865066528, "rank": 2, "decoded_token": " brown"}, "26549": {"logprob": -3.8416264057159424, "rank": 3, "decoded_token": " gray"}, "4266": {"logprob": -4.216626167297363, "rank": 4, "decoded_token": " bar"}, "34052": {"logprob": -4.966626167297363, "rank": 5, "decoded_token": " grey"}}, {"24765": {"logprob": -0.32041722536087036, "rank": 1, "decoded_token": " terrain"}, "57912": {"logprob": -1.8204171657562256, "rank": 2, "decoded_token": " terrains"}, "61263": {"logprob": -2.6954171657562256, "rank": 3, "decoded_token": " slopes"}, "84497": {"logprob": -3.9454171657562256, "rank": 4, "decoded_token": " landscapes"}, "17764": {"logprob": -4.695417404174805, "rank": 5, "decoded_token": " surfaces"}}, {"2425": {"logprob": -0.4664109945297241, "rank": 1, "decoded_token": " under"}, "1046": {"logprob": -1.4664109945297241, "rank": 2, "decoded_token": "."}, "1044": {"logprob": -3.4664111137390137, "rank": 3, "decoded_token": ","}, "22923": {"logprob": -3.9664111137390137, "rank": 4, "decoded_token": " extending"}, "47948": {"logprob": -4.091411113739014, "rank": 5, "decoded_token": " stretching"}}, {"1261": {"logprob": -0.015043734572827816, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -4.76504373550415, "rank": 2, "decoded_token": " an"}, "6133": {"logprob": -6.01504373550415, "rank": 3, "decoded_token": " clear"}, "1278": {"logprob": -6.26504373550415, "rank": 4, "decoded_token": " the"}, "16152": {"logprob": -7.26504373550415, "rank": 5, "decoded_token": " cloud"}}, {"6133": {"logprob": -0.7420746684074402, "rank": 1, "decoded_token": " clear"}, "18416": {"logprob": -1.492074728012085, "rank": 2, "decoded_token": " haz"}, "16152": {"logprob": -1.992074728012085, "rank": 3, "decoded_token": " cloud"}, "27254": {"logprob": -3.367074728012085, "rank": 4, "decoded_token": " partly"}, "4391": {"logprob": -3.617074728012085, "rank": 5, "decoded_token": " light"}}, {"21283": {"logprob": -0.007355513051152229, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -5.257355690002441, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -6.382355690002441, "rank": 3, "decoded_token": ","}, "1505": {"logprob": -8.257355690002441, "rank": 4, "decoded_token": " or"}, "3950": {"logprob": -10.132355690002441, "rank": 5, "decoded_token": " day"}}, {"1046": {"logprob": -0.01126158982515335, "rank": 1, "decoded_token": "."}, "1626": {"logprob": -4.636261463165283, "rank": 2, "decoded_token": ".\n"}, "1338": {"logprob": -7.761261463165283, "rank": 3, "decoded_token": ".\n\n"}, "1044": {"logprob": -7.761261463165283, "rank": 4, "decoded_token": ","}, "1395": {"logprob": -8.011261940002441, "rank": 5, "decoded_token": " is"}}, {"2": {"logprob": -0.00709608756005764, "rank": 1, "decoded_token": " "}, "1032": {"logprob": -5.007096290588379, "rank": 2, "decoded_token": " The"}, "1256": {"logprob": -8.132096290588379, "rank": 3, "decoded_token": " "}}]], [[1049, 1046, 1349, 7244, 10575, 1395, 28528, 1408, 1261, 32656, 11237, 1044, 7283, 2015, 1513, 1278, 13424, 1626, 1050, 1046, 1349, 122203, 24361, 28035, 1454, 11223, 1321, 95746, 24765, 2425, 1261, 6133, 21283, 1626, 1051, 1046, 1349, 2965, 1294, 1261, 4804, 4250, 12006, 4302, 48049, 4837, 1261, 29397, 1435, 22140, 21457, 22196, 1626, 1052, 1046, 1349, 53301, 59396, 3549, 1294, 1261, 12097, 1044, 121040, 1536, 11223, 23170, 1321, 17744, 34941, 16429, 2425, 1261, 10991, 21283, 1046, 2], "1. A black dog is lying on a wooden floor, looking up at the camera.\n2. A rugged mountain landscape with green and rocky terrain under a clear sky.\n3. A person in a red swimsuit walks along a beach as waves crash nearby.\n4. A winding gravel path in a park, bordered by green grass and blooming trees under a blue sky.", [{"1049": {"logprob": -0.17000193893909454, "rank": 1, "decoded_token": "1"}, "11745": {"logprob": -1.9200019836425781, "rank": 2, "decoded_token": "Here"}, "69957": {"logprob": -4.920001983642578, "rank": 3, "decoded_token": "Sure"}, "117991": {"logprob": -7.295001983642578, "rank": 4, "decoded_token": "Certain"}, "1784": {"logprob": -7.295001983642578, "rank": 5, "decoded_token": "The"}}, {"1046": {"logprob": -1.597391747054644e-05, "rank": 1, "decoded_token": "."}, "1041": {"logprob": -11.500016212463379, "rank": 2, "decoded_token": ")"}, "1058": {"logprob": -13.062516212463379, "rank": 3, "decoded_token": ":"}, "3590": {"logprob": -13.750016212463379, "rank": 4, "decoded_token": ".A"}, "48426": {"logprob": -14.312516212463379, "rank": 5, "decoded_token": ".The"}}, {"1349": {"logprob": -0.07567699253559113, "rank": 1, "decoded_token": " A"}, "1531": {"logprob": -3.075676918029785, "rank": 2, "decoded_token": " The"}, "1603": {"logprob": -3.950676918029785, "rank": 3, "decoded_token": " **"}, "2409": {"logprob": -6.075676918029785, "rank": 4, "decoded_token": " This"}, "8479": {"logprob": -6.575676918029785, "rank": 5, "decoded_token": " Black"}}, {"7244": {"logprob": -0.06906593590974808, "rank": 1, "decoded_token": " black"}, "16450": {"logprob": -3.694066047668457, "rank": 2, "decoded_token": " sle"}, "6231": {"logprob": -4.506566047668457, "rank": 3, "decoded_token": " close"}, "4329": {"logprob": -4.944066047668457, "rank": 4, "decoded_token": " large"}, "8500": {"logprob": -5.256566047668457, "rank": 5, "decoded_token": " dark"}}, {"10575": {"logprob": -0.11913803219795227, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.24413800239563, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -5.494138240814209, "rank": 3, "decoded_token": " Labrador"}, "28404": {"logprob": -7.181638240814209, "rank": 4, "decoded_token": " pup"}, "8636": {"logprob": -7.869138240814209, "rank": 5, "decoded_token": " lab"}}, {"1395": {"logprob": -0.782707154750824, "rank": 1, "decoded_token": " is"}, "22524": {"logprob": -1.1577072143554688, "rank": 2, "decoded_token": " lies"}, "1454": {"logprob": -2.9077072143554688, "rank": 3, "decoded_token": " with"}, "10637": {"logprob": -3.0327072143554688, "rank": 4, "decoded_token": " looks"}, "28528": {"logprob": -3.5327072143554688, "rank": 5, "decoded_token": " lying"}}, {"28528": {"logprob": -0.3443163335323334, "rank": 1, "decoded_token": " lying"}, "7283": {"logprob": -2.094316244125366, "rank": 2, "decoded_token": " looking"}, "60700": {"logprob": -2.844316244125366, "rank": 3, "decoded_token": " laying"}, "38235": {"logprob": -3.344316244125366, "rank": 4, "decoded_token": " resting"}, "18970": {"logprob": -3.469316244125366, "rank": 5, "decoded_token": " sitting"}}, {"1408": {"logprob": -0.29093095660209656, "rank": 1, "decoded_token": " on"}, "3151": {"logprob": -1.415930986404419, "rank": 2, "decoded_token": " down"}, "41132": {"logprob": -6.16593074798584, "rank": 3, "decoded_token": " attent"}, "1321": {"logprob": -6.85343074798584, "rank": 4, "decoded_token": " and"}, "14038": {"logprob": -6.97843074798584, "rank": 5, "decoded_token": " flat"}}, {"1261": {"logprob": -0.05553353577852249, "rank": 1, "decoded_token": " a"}, "2246": {"logprob": -3.6805336475372314, "rank": 2, "decoded_token": " its"}, "32656": {"logprob": -3.8055336475372314, "rank": 3, "decoded_token": " wooden"}, "1278": {"logprob": -5.305533409118652, "rank": 4, "decoded_token": " the"}, "3977": {"logprob": -7.430533409118652, "rank": 5, "decoded_token": " top"}}, {"32656": {"logprob": -0.039505477994680405, "rank": 1, "decoded_token": " wooden"}, "3403": {"logprob": -3.9145054817199707, "rank": 2, "decoded_token": " text"}, "44130": {"logprob": -4.414505481719971, "rank": 3, "decoded_token": " rust"}, "12603": {"logprob": -5.914505481719971, "rank": 4, "decoded_token": " wood"}, "17253": {"logprob": -6.539505481719971, "rank": 5, "decoded_token": " weather"}}, {"11237": {"logprob": -0.373188853263855, "rank": 1, "decoded_token": " floor"}, "4691": {"logprob": -1.248188853263855, "rank": 2, "decoded_token": " surface"}, "1615": {"logprob": -4.2481889724731445, "rank": 3, "decoded_token": " pl"}, "3403": {"logprob": -5.6231889724731445, "rank": 4, "decoded_token": " text"}, "28984": {"logprob": -5.9981889724731445, "rank": 5, "decoded_token": " deck"}}, {"1044": {"logprob": -1.378434181213379, "rank": 3, "decoded_token": ","}, "7283": {"logprob": -1.378434181213379, "rank": 1, "decoded_token": " looking"}, "1626": {"logprob": -1.378434181213379, "rank": 2, "decoded_token": ".\n"}, "1321": {"logprob": -2.378434181213379, "rank": 4, "decoded_token": " and"}, "1454": {"logprob": -2.628434181213379, "rank": 5, "decoded_token": " with"}}, {"7283": {"logprob": -0.17630912363529205, "rank": 1, "decoded_token": " looking"}, "11589": {"logprob": -2.551309108734131, "rank": 2, "decoded_token": " gaz"}, "35542": {"logprob": -2.676309108734131, "rank": 3, "decoded_token": " staring"}, "22116": {"logprob": -6.238809108734131, "rank": 4, "decoded_token": " facing"}, "11735": {"logprob": -6.488809108734131, "rank": 5, "decoded_token": " giving"}}, {"2015": {"logprob": -0.8436563014984131, "rank": 1, "decoded_token": " up"}, "7655": {"logprob": -1.343656301498413, "rank": 2, "decoded_token": " directly"}, "74606": {"logprob": -1.718656301498413, "rank": 3, "decoded_token": " upwards"}, "40022": {"logprob": -2.593656301498413, "rank": 4, "decoded_token": " upward"}, "11521": {"logprob": -4.406156539916992, "rank": 5, "decoded_token": " straight"}}, {"1513": {"logprob": -0.45780688524246216, "rank": 1, "decoded_token": " at"}, "1626": {"logprob": -1.7078068256378174, "rank": 2, "decoded_token": ".\n"}, "1454": {"logprob": -2.3328068256378174, "rank": 3, "decoded_token": " with"}, "1935": {"logprob": -3.5828068256378174, "rank": 4, "decoded_token": " int"}, "41132": {"logprob": -3.9578068256378174, "rank": 5, "decoded_token": " attent"}}, {"1278": {"logprob": -0.0004164305282756686, "rank": 1, "decoded_token": " the"}, "4433": {"logprob": -8.00041675567627, "rank": 2, "decoded_token": " something"}, "1261": {"logprob": -10.50041675567627, "rank": 3, "decoded_token": " a"}, "2246": {"logprob": -10.87541675567627, "rank": 4, "decoded_token": " its"}, "1636": {"logprob": -11.37541675567627, "rank": 5, "decoded_token": " you"}}, {"13424": {"logprob": -0.000399033073335886, "rank": 1, "decoded_token": " camera"}, "56268": {"logprob": -8.125398635864258, "rank": 2, "decoded_token": " viewer"}, "68439": {"logprob": -9.500398635864258, "rank": 3, "decoded_token": " photographer"}, "37967": {"logprob": -12.000398635864258, "rank": 4, "decoded_token": " ceiling"}, "2965": {"logprob": -12.312898635864258, "rank": 5, "decoded_token": " person"}}, {"1626": {"logprob": -0.10298559814691544, "rank": 1, "decoded_token": ".\n"}, "1046": {"logprob": -2.9779856204986572, "rank": 2, "decoded_token": "."}, "1454": {"logprob": -3.2279856204986572, "rank": 3, "decoded_token": " with"}, "1338": {"logprob": -5.227985382080078, "rank": 4, "decoded_token": ".\n\n"}, "1935": {"logprob": -6.852985382080078, "rank": 5, "decoded_token": " int"}}, {"1050": {"logprob": -0.002897590398788452, "rank": 1, "decoded_token": "2"}, "1256": {"logprob": -6.5028977394104, "rank": 2, "decoded_token": " "}, "1293": {"logprob": -6.6278977394104, "rank": 3, "decoded_token": " "}, "1032": {"logprob": -9.877897262573242, "rank": 4, "decoded_token": " "}, "1009": {"logprob": -11.627897262573242, "rank": 5, "decoded_token": "\t"}}, {"1046": {"logprob": -1.5497195136049413e-06, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -14.875001907348633, "rank": 2, "decoded_token": ","}, "3590": {"logprob": -15.000001907348633, "rank": 3, "decoded_token": ".A"}, "2247": {"logprob": -15.125001907348633, "rank": 4, "decoded_token": " ."}, "1058": {"logprob": -15.375001907348633, "rank": 5, "decoded_token": ":"}}, {"1349": {"logprob": -0.6107801198959351, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.360780119895935, "rank": 2, "decoded_token": " Maj"}, "113465": {"logprob": -2.3607802391052246, "rank": 3, "decoded_token": " Rug"}, "27260": {"logprob": -3.7357802391052246, "rank": 4, "decoded_token": " Mountain"}, "1531": {"logprob": -4.485780239105225, "rank": 5, "decoded_token": " The"}}, {"122203": {"logprob": -0.8547073602676392, "rank": 1, "decoded_token": " rugged"}, "15375": {"logprob": -2.1047072410583496, "rank": 2, "decoded_token": " vast"}, "10726": {"logprob": -2.1047072410583496, "rank": 3, "decoded_token": " scen"}, "61082": {"logprob": -2.6047072410583496, "rank": 4, "decoded_token": " panor"}, "2965": {"logprob": -3.2922072410583496, "rank": 5, "decoded_token": " person"}}, {"24361": {"logprob": -0.41217130422592163, "rank": 1, "decoded_token": " mountain"}, "1044": {"logprob": -1.6621713638305664, "rank": 2, "decoded_token": ","}, "127945": {"logprob": -2.6621713638305664, "rank": 3, "decoded_token": " mountainous"}, "28035": {"logprob": -3.5371713638305664, "rank": 4, "decoded_token": " landscape"}, "1321": {"logprob": -3.6621713638305664, "rank": 5, "decoded_token": " and"}}, {"28035": {"logprob": -0.6676621437072754, "rank": 1, "decoded_token": " landscape"}, "4521": {"logprob": -0.7926621437072754, "rank": 2, "decoded_token": " range"}, "24765": {"logprob": -4.542662143707275, "rank": 3, "decoded_token": " terrain"}, "13327": {"logprob": -5.167662143707275, "rank": 4, "decoded_token": " scene"}, "12248": {"logprob": -5.167662143707275, "rank": 5, "decoded_token": " peak"}}, {"1454": {"logprob": -0.31015345454216003, "rank": 1, "decoded_token": " with"}, "6971": {"logprob": -2.4351534843444824, "rank": 2, "decoded_token": " features"}, "94973": {"logprob": -3.3101534843444824, "rank": 3, "decoded_token": " stretches"}, "89995": {"logprob": -3.4351534843444824, "rank": 4, "decoded_token": " showc"}, "1395": {"logprob": -3.5601534843444824, "rank": 5, "decoded_token": " is"}}, {"11223": {"logprob": -1.547694206237793, "rank": 1, "decoded_token": " green"}, "95746": {"logprob": -1.922694206237793, "rank": 2, "decoded_token": " rocky"}, "27469": {"logprob": -2.172694206237793, "rank": 3, "decoded_token": " peaks"}, "6245": {"logprob": -2.297694206237793, "rank": 4, "decoded_token": " multiple"}, "47147": {"logprob": -2.360194206237793, "rank": 5, "decoded_token": " steep"}}, {"1321": {"logprob": -0.9617817401885986, "rank": 1, "decoded_token": " and"}, "61263": {"logprob": -1.3367817401885986, "rank": 2, "decoded_token": " slopes"}, "51187": {"logprob": -2.3367817401885986, "rank": 3, "decoded_token": " hills"}, "47260": {"logprob": -2.3367817401885986, "rank": 4, "decoded_token": " vegetation"}, "50373": {"logprob": -2.7117817401885986, "rank": 5, "decoded_token": " patches"}}, {"95746": {"logprob": -0.11686273664236069, "rank": 1, "decoded_token": " rocky"}, "22980": {"logprob": -2.7418627738952637, "rank": 2, "decoded_token": " brown"}, "4266": {"logprob": -3.8668627738952637, "rank": 3, "decoded_token": " bar"}, "26549": {"logprob": -4.491862773895264, "rank": 4, "decoded_token": " gray"}, "9091": {"logprob": -5.366862773895264, "rank": 5, "decoded_token": " rock"}}, {"24765": {"logprob": -0.22640009224414825, "rank": 1, "decoded_token": " terrain"}, "57912": {"logprob": -2.476400136947632, "rank": 2, "decoded_token": " terrains"}, "61263": {"logprob": -2.726400136947632, "rank": 3, "decoded_token": " slopes"}, "51187": {"logprob": -3.851400136947632, "rank": 4, "decoded_token": " hills"}, "27469": {"logprob": -3.976400136947632, "rank": 5, "decoded_token": " peaks"}}, {"2425": {"logprob": -0.7823817133903503, "rank": 1, "decoded_token": " under"}, "1626": {"logprob": -1.1573817729949951, "rank": 2, "decoded_token": ".\n"}, "94973": {"logprob": -2.657381772994995, "rank": 3, "decoded_token": " stretches"}, "1395": {"logprob": -2.782381772994995, "rank": 4, "decoded_token": " is"}, "7038": {"logprob": -3.532381772994995, "rank": 5, "decoded_token": " extends"}}, {"1261": {"logprob": -0.016132064163684845, "rank": 1, "decoded_token": " a"}, "6133": {"logprob": -5.39113187789917, "rank": 2, "decoded_token": " clear"}, "1420": {"logprob": -5.39113187789917, "rank": 3, "decoded_token": " an"}, "1278": {"logprob": -6.01613187789917, "rank": 4, "decoded_token": " the"}, "16152": {"logprob": -6.26613187789917, "rank": 5, "decoded_token": " cloud"}}, {"6133": {"logprob": -0.44541382789611816, "rank": 1, "decoded_token": " clear"}, "16152": {"logprob": -2.070413827896118, "rank": 2, "decoded_token": " cloud"}, "18416": {"logprob": -2.320413827896118, "rank": 3, "decoded_token": " haz"}, "27254": {"logprob": -3.195413827896118, "rank": 4, "decoded_token": " partly"}, "10991": {"logprob": -3.320413827896118, "rank": 5, "decoded_token": " blue"}}, {"21283": {"logprob": -0.003768961876630783, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -5.7537689208984375, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -7.6287689208984375, "rank": 3, "decoded_token": ","}, "1505": {"logprob": -10.753768920898438, "rank": 4, "decoded_token": " or"}, "3044": {"logprob": -11.128768920898438, "rank": 5, "decoded_token": " sk"}}, {"1626": {"logprob": -0.0008177988929674029, "rank": 1, "decoded_token": ".\n"}, "1046": {"logprob": -7.375817775726318, "rank": 2, "decoded_token": "."}, "1395": {"logprob": -9.750818252563477, "rank": 3, "decoded_token": " is"}, "1010": {"logprob": -10.125818252563477, "rank": 4, "decoded_token": "\n"}, "1044": {"logprob": -10.750818252563477, "rank": 5, "decoded_token": ","}}, {"1051": {"logprob": -0.00013457823661156, "rank": 1, "decoded_token": "3"}, "1052": {"logprob": -9.125134468078613, "rank": 2, "decoded_token": "4"}, "1256": {"logprob": -11.375134468078613, "rank": 3, "decoded_token": " "}, "1050": {"logprob": -11.875134468078613, "rank": 4, "decoded_token": "2"}, "1049": {"logprob": -13.000134468078613, "rank": 5, "decoded_token": "1"}}, {"1046": {"logprob": -7.152555099310121e-07, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -14.875000953674316, "rank": 2, "decoded_token": ".A"}, "48426": {"logprob": -15.937500953674316, "rank": 3, "decoded_token": ".The"}, "1349": {"logprob": -17.0, "rank": 4, "decoded_token": " A"}, "1338": {"logprob": -17.3125, "rank": 5, "decoded_token": ".\n\n"}}, {"1349": {"logprob": -0.03193942829966545, "rank": 1, "decoded_token": " A"}, "10638": {"logprob": -4.406939506530762, "rank": 2, "decoded_token": " Two"}, "2048": {"logprob": -5.031939506530762, "rank": 3, "decoded_token": " An"}, "1488": {"logprob": -5.156939506530762, "rank": 4, "decoded_token": " W"}, "15035": {"logprob": -5.906939506530762, "rank": 5, "decoded_token": " People"}}, {"2965": {"logprob": -0.41655251383781433, "rank": 1, "decoded_token": " person"}, "92731": {"logprob": -1.5415525436401367, "rank": 2, "decoded_token": " lone"}, "79013": {"logprob": -2.7915525436401367, "rank": 3, "decoded_token": " solitary"}, "29397": {"logprob": -3.5415525436401367, "rank": 4, "decoded_token": " beach"}, "2169": {"logprob": -4.729052543640137, "rank": 5, "decoded_token": " ser"}}, {"1294": {"logprob": -0.9845026135444641, "rank": 1, "decoded_token": " in"}, "1395": {"logprob": -1.2345025539398193, "rank": 2, "decoded_token": " is"}, "48049": {"logprob": -1.8595025539398193, "rank": 3, "decoded_token": " walks"}, "23737": {"logprob": -2.2345025539398193, "rank": 4, "decoded_token": " stands"}, "1285": {"logprob": -2.8595025539398193, "rank": 5, "decoded_token": " w"}}, {"1261": {"logprob": -0.32012784481048584, "rank": 1, "decoded_token": " a"}, "4804": {"logprob": -1.3201278448104858, "rank": 2, "decoded_token": " red"}, "1420": {"logprob": -5.820127964019775, "rank": 3, "decoded_token": " an"}, "64031": {"logprob": -6.570127964019775, "rank": 4, "decoded_token": " swim"}, "18168": {"logprob": -6.695127964019775, "rank": 5, "decoded_token": " bright"}}, {"4804": {"logprob": -0.10999592393636703, "rank": 1, "decoded_token": " red"}, "1285": {"logprob": -2.3599958419799805, "rank": 2, "decoded_token": " w"}, "4250": {"logprob": -5.6099958419799805, "rank": 3, "decoded_token": " sw"}, "18168": {"logprob": -6.0474958419799805, "rank": 4, "decoded_token": " bright"}, "18258": {"logprob": -6.4224958419799805, "rank": 5, "decoded_token": " wet"}}, {"4250": {"logprob": -0.2469252496957779, "rank": 1, "decoded_token": " sw"}, "1285": {"logprob": -2.3719253540039062, "rank": 2, "decoded_token": " w"}, "64031": {"logprob": -2.7469253540039062, "rank": 3, "decoded_token": " swim"}, "17513": {"logprob": -3.2469253540039062, "rank": 4, "decoded_token": " suit"}, "75948": {"logprob": -4.371925354003906, "rank": 5, "decoded_token": " outfit"}}, {"12006": {"logprob": -5.722029527532868e-06, "rank": 1, "decoded_token": "ims"}, "25763": {"logprob": -12.750005722045898, "rank": 2, "decoded_token": "immer"}, "7552": {"logprob": -13.687505722045898, "rank": 3, "decoded_token": "imm"}, "2097": {"logprob": -16.6875057220459, "rank": 4, "decoded_token": "ins"}, "19523": {"logprob": -16.7500057220459, "rank": 5, "decoded_token": "imb"}}, {"4302": {"logprob": -1.8000440832111053e-05, "rank": 1, "decoded_token": "uit"}, "17513": {"logprob": -11.875018119812012, "rank": 2, "decoded_token": " suit"}, "8036": {"logprob": -13.250018119812012, "rank": 3, "decoded_token": "irt"}, "36953": {"logprob": -13.500018119812012, "rank": 4, "decoded_token": "uiten"}, "1276": {"logprob": -14.437518119812012, "rank": 5, "decoded_token": "it"}}, {"48049": {"logprob": -0.41766560077667236, "rank": 1, "decoded_token": " walks"}, "1395": {"logprob": -1.4176656007766724, "rank": 2, "decoded_token": " is"}, "19710": {"logprob": -2.792665481567383, "rank": 3, "decoded_token": " walking"}, "23737": {"logprob": -3.917665481567383, "rank": 4, "decoded_token": " stands"}, "1285": {"logprob": -4.292665481567383, "rank": 5, "decoded_token": " w"}}, {"4837": {"logprob": -0.002689199522137642, "rank": 1, "decoded_token": " along"}, "9412": {"logprob": -6.627689361572266, "rank": 2, "decoded_token": " alone"}, "6117": {"logprob": -7.377689361572266, "rank": 3, "decoded_token": " near"}, "1408": {"logprob": -8.002689361572266, "rank": 4, "decoded_token": " on"}, "2203": {"logprob": -8.377689361572266, "rank": 5, "decoded_token": " into"}}, {"1261": {"logprob": -0.38749611377716064, "rank": 1, "decoded_token": " a"}, "1278": {"logprob": -1.1374961137771606, "rank": 2, "decoded_token": " the"}, "1420": {"logprob": -7.387495994567871, "rank": 3, "decoded_token": " an"}, "100991": {"logprob": -13.949995994567871, "rank": 4, "decoded_token": " sandy"}, "18258": {"logprob": -14.512495994567871, "rank": 5, "decoded_token": " wet"}}, {"29397": {"logprob": -0.5292408466339111, "rank": 1, "decoded_token": " beach"}, "100991": {"logprob": -0.9042408466339111, "rank": 2, "decoded_token": " sandy"}, "1627": {"logprob": -6.029240608215332, "rank": 3, "decoded_token": " sh"}, "46422": {"logprob": -6.529240608215332, "rank": 4, "decoded_token": " shore"}, "2169": {"logprob": -7.779240608215332, "rank": 5, "decoded_token": " ser"}}, {"1435": {"logprob": -0.29965779185295105, "rank": 1, "decoded_token": " as"}, "1454": {"logprob": -1.6746578216552734, "rank": 2, "decoded_token": " with"}, "1513": {"logprob": -3.7996578216552734, "rank": 3, "decoded_token": " at"}, "3016": {"logprob": -3.7996578216552734, "rank": 4, "decoded_token": " while"}, "6117": {"logprob": -4.799657821655273, "rank": 5, "decoded_token": " near"}}, {"22140": {"logprob": -0.015346773900091648, "rank": 1, "decoded_token": " waves"}, "1261": {"logprob": -4.515347003936768, "rank": 2, "decoded_token": " a"}, "1278": {"logprob": -6.140347003936768, "rank": 3, "decoded_token": " the"}, "27208": {"logprob": -6.890347003936768, "rank": 4, "decoded_token": " ocean"}, "4329": {"logprob": -7.265347003936768, "rank": 5, "decoded_token": " large"}}, {"21457": {"logprob": -0.013234862126410007, "rank": 1, "decoded_token": " crash"}, "33168": {"logprob": -5.138235092163086, "rank": 2, "decoded_token": " gently"}, "10401": {"logprob": -5.950735092163086, "rank": 3, "decoded_token": " roll"}, "4323": {"logprob": -6.700735092163086, "rank": 4, "decoded_token": " break"}, "5125": {"logprob": -7.138235092163086, "rank": 5, "decoded_token": " approach"}}, {"22196": {"logprob": -0.060372594743967056, "rank": 1, "decoded_token": " nearby"}, "6117": {"logprob": -3.3103725910186768, "rank": 2, "decoded_token": " near"}, "1294": {"logprob": -4.435372829437256, "rank": 3, "decoded_token": " in"}, "25644": {"logprob": -6.310372829437256, "rank": 4, "decoded_token": " beside"}, "1321": {"logprob": -6.560372829437256, "rank": 5, "decoded_token": " and"}}, {"1626": {"logprob": -0.005290080793201923, "rank": 1, "decoded_token": ".\n"}, "1294": {"logprob": -6.5052900314331055, "rank": 2, "decoded_token": " in"}, "1044": {"logprob": -7.0052900314331055, "rank": 3, "decoded_token": ","}, "1321": {"logprob": -7.1302900314331055, "rank": 4, "decoded_token": " and"}, "1513": {"logprob": -7.2552900314331055, "rank": 5, "decoded_token": " at"}}, {"1052": {"logprob": -7.748573807475623e-06, "rank": 1, "decoded_token": "4"}, "1051": {"logprob": -12.562507629394531, "rank": 2, "decoded_token": "3"}, "1053": {"logprob": -13.125007629394531, "rank": 3, "decoded_token": "5"}, "1256": {"logprob": -14.125007629394531, "rank": 4, "decoded_token": " "}, "1049": {"logprob": -14.312507629394531, "rank": 5, "decoded_token": "1"}}, {"1046": {"logprob": -1.2993727978027891e-05, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -12.62501335144043, "rank": 2, "decoded_token": ","}, "3590": {"logprob": -12.75001335144043, "rank": 3, "decoded_token": ".A"}, "1058": {"logprob": -13.00001335144043, "rank": 4, "decoded_token": ":"}, "2247": {"logprob": -13.37501335144043, "rank": 5, "decoded_token": " ."}}, {"1349": {"logprob": -0.00046957432641647756, "rank": 1, "decoded_token": " A"}, "2048": {"logprob": -8.250469207763672, "rank": 2, "decoded_token": " An"}, "1488": {"logprob": -10.125469207763672, "rank": 3, "decoded_token": " W"}, "2409": {"logprob": -10.375469207763672, "rank": 4, "decoded_token": " This"}, "12232": {"logprob": -10.500469207763672, "rank": 5, "decoded_token": " Gra"}}, {"53301": {"logprob": -0.35120296478271484, "rank": 1, "decoded_token": " winding"}, "59396": {"logprob": -1.8512029647827148, "rank": 2, "decoded_token": " gravel"}, "2169": {"logprob": -2.476202964782715, "rank": 3, "decoded_token": " ser"}, "54742": {"logprob": -3.851202964782715, "rank": 4, "decoded_token": " peaceful"}, "43536": {"logprob": -5.101202964782715, "rank": 5, "decoded_token": " curved"}}, {"59396": {"logprob": -0.2955280840396881, "rank": 1, "decoded_token": " gravel"}, "3549": {"logprob": -1.6705280542373657, "rank": 2, "decoded_token": " path"}, "14801": {"logprob": -2.7955281734466553, "rank": 3, "decoded_token": " pathway"}, "1044": {"logprob": -6.420527935028076, "rank": 4, "decoded_token": ","}, "18341": {"logprob": -6.670527935028076, "rank": 5, "decoded_token": " pathways"}}, {"3549": {"logprob": -0.03408379852771759, "rank": 1, "decoded_token": " path"}, "14801": {"logprob": -3.409083843231201, "rank": 2, "decoded_token": " pathway"}, "18341": {"logprob": -8.284083366394043, "rank": 3, "decoded_token": " pathways"}, "1505": {"logprob": -9.534083366394043, "rank": 4, "decoded_token": " or"}, "7368": {"logprob": -10.659083366394043, "rank": 5, "decoded_token": "path"}}, {"1294": {"logprob": -1.0857839584350586, "rank": 1, "decoded_token": " in"}, "13335": {"logprob": -1.4607839584350586, "rank": 2, "decoded_token": " leads"}, "2645": {"logprob": -1.9607839584350586, "rank": 3, "decoded_token": " through"}, "29817": {"logprob": -2.4607839584350586, "rank": 4, "decoded_token": " surrounded"}, "22416": {"logprob": -3.2107839584350586, "rank": 5, "decoded_token": " curves"}}, {"1261": {"logprob": -0.00011705666838679463, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -9.500117301940918, "rank": 2, "decoded_token": " an"}, "1278": {"logprob": -10.250117301940918, "rank": 3, "decoded_token": " the"}, "2549": {"logprob": -12.750117301940918, "rank": 4, "decoded_token": " what"}, "11223": {"logprob": -13.750117301940918, "rank": 5, "decoded_token": " green"}}, {"12097": {"logprob": -0.02791696786880493, "rank": 1, "decoded_token": " park"}, "2169": {"logprob": -4.65291690826416, "rank": 2, "decoded_token": " ser"}, "1295": {"logprob": -4.65291690826416, "rank": 3, "decoded_token": " l"}, "23170": {"logprob": -5.27791690826416, "rank": 4, "decoded_token": " grass"}, "26428": {"logprob": -6.52791690826416, "rank": 5, "decoded_token": " garden"}}, {"1044": {"logprob": -1.350893259048462, "rank": 1, "decoded_token": ","}, "1395": {"logprob": -1.600893259048462, "rank": 2, "decoded_token": " is"}, "29817": {"logprob": -2.350893259048462, "rank": 3, "decoded_token": " surrounded"}, "121313": {"logprob": -2.475893259048462, "rank": 4, "decoded_token": " flanked"}, "1454": {"logprob": -2.475893259048462, "rank": 5, "decoded_token": " with"}}, {"121040": {"logprob": -0.710591197013855, "rank": 1, "decoded_token": " bordered"}, "121313": {"logprob": -1.085591197013855, "rank": 2, "decoded_token": " flanked"}, "54410": {"logprob": -1.960591197013855, "rank": 3, "decoded_token": " lined"}, "29817": {"logprob": -3.8355913162231445, "rank": 4, "decoded_token": " surrounded"}, "1454": {"logprob": -5.8355913162231445, "rank": 5, "decoded_token": " with"}}, {"1536": {"logprob": -4.6491513785440475e-06, "rank": 1, "decoded_token": " by"}, "1454": {"logprob": -12.375004768371582, "rank": 2, "decoded_token": " with"}, "1408": {"logprob": -15.812504768371582, "rank": 3, "decoded_token": " on"}, "3326": {"logprob": -16.875003814697266, "rank": 4, "decoded_token": "by"}, "1295": {"logprob": -16.875003814697266, "rank": 5, "decoded_token": " l"}}, {"11223": {"logprob": -0.4314780533313751, "rank": 1, "decoded_token": " green"}, "1295": {"logprob": -1.4314780235290527, "rank": 2, "decoded_token": " l"}, "23170": {"logprob": -2.4314780235290527, "rank": 3, "decoded_token": " grass"}, "17744": {"logprob": -4.806478023529053, "rank": 4, "decoded_token": " blo"}, "95612": {"logprob": -5.181478023529053, "rank": 5, "decoded_token": " vibrant"}}, {"23170": {"logprob": -0.00035041390219703317, "rank": 1, "decoded_token": " grass"}, "69230": {"logprob": -8.125349998474121, "rank": 2, "decoded_token": " lawn"}, "128633": {"logprob": -10.750349998474121, "rank": 3, "decoded_token": " grasses"}, "87781": {"logprob": -11.437849998474121, "rank": 4, "decoded_token": "\u8349"}, "16429": {"logprob": -11.437849998474121, "rank": 5, "decoded_token": " trees"}}, {"1321": {"logprob": -0.0009494088008068502, "rank": 1, "decoded_token": " and"}, "1044": {"logprob": -7.125949382781982, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -9.25094985961914, "rank": 3, "decoded_token": " with"}, "2425": {"logprob": -11.75094985961914, "rank": 4, "decoded_token": " under"}, "1046": {"logprob": -11.75094985961914, "rank": 5, "decoded_token": "."}}, {"17744": {"logprob": -0.21488544344902039, "rank": 1, "decoded_token": " blo"}, "105368": {"logprob": -1.8398854732513428, "rank": 2, "decoded_token": " bloss"}, "87833": {"logprob": -3.8398854732513428, "rank": 3, "decoded_token": " flowering"}, "16429": {"logprob": -4.464885234832764, "rank": 4, "decoded_token": " trees"}, "117207": {"logprob": -7.589885234832764, "rank": 5, "decoded_token": " bloom"}}, {"34941": {"logprob": -7.152555099310121e-07, "rank": 1, "decoded_token": "oming"}, "35974": {"logprob": -14.375000953674316, "rank": 2, "decoded_token": "omed"}, "6325": {"logprob": -16.5625, "rank": 3, "decoded_token": "oms"}, "11009": {"logprob": -17.625, "rank": 4, "decoded_token": "omy"}, "9457": {"logprob": -18.875, "rank": 5, "decoded_token": "ming"}}, {"16429": {"logprob": -0.002424398437142372, "rank": 1, "decoded_token": " trees"}, "103796": {"logprob": -6.627424240112305, "rank": 2, "decoded_token": " cherry"}, "32152": {"logprob": -7.377424240112305, "rank": 3, "decoded_token": " flowers"}, "29151": {"logprob": -9.314924240112305, "rank": 4, "decoded_token": " shr"}, "20370": {"logprob": -9.564924240112305, "rank": 5, "decoded_token": " fruit"}}, {"2425": {"logprob": -0.3792523741722107, "rank": 1, "decoded_token": " under"}, "1046": {"logprob": -1.3792524337768555, "rank": 2, "decoded_token": "."}, "3675": {"logprob": -2.8792524337768555, "rank": 3, "decoded_token": " against"}, "1044": {"logprob": -5.1292524337768555, "rank": 4, "decoded_token": ","}, "1454": {"logprob": -7.2542524337768555, "rank": 5, "decoded_token": " with"}}, {"1261": {"logprob": -0.0002315968304174021, "rank": 1, "decoded_token": " a"}, "1278": {"logprob": -8.875231742858887, "rank": 2, "decoded_token": " the"}, "10991": {"logprob": -9.875231742858887, "rank": 3, "decoded_token": " blue"}, "6133": {"logprob": -10.375231742858887, "rank": 4, "decoded_token": " clear"}, "1420": {"logprob": -12.250231742858887, "rank": 5, "decoded_token": " an"}}, {"10991": {"logprob": -0.6372600197792053, "rank": 1, "decoded_token": " blue"}, "6133": {"logprob": -0.7622600197792053, "rank": 2, "decoded_token": " clear"}, "18168": {"logprob": -5.3872599601745605, "rank": 3, "decoded_token": " bright"}, "105573": {"logprob": -10.012260437011719, "rank": 4, "decoded_token": " sunny"}, "15330": {"logprob": -11.512260437011719, "rank": 5, "decoded_token": " Blue"}}, {"21283": {"logprob": -6.12716976320371e-05, "rank": 1, "decoded_token": " sky"}, "1044": {"logprob": -9.87506103515625, "rank": 2, "decoded_token": ","}, "19673": {"logprob": -12.00006103515625, "rank": 3, "decoded_token": " Sky"}, "1321": {"logprob": -13.31256103515625, "rank": 4, "decoded_token": " and"}, "124968": {"logprob": -14.81256103515625, "rank": 5, "decoded_token": " skies"}}, {"1046": {"logprob": -0.00013982271775603294, "rank": 1, "decoded_token": "."}, "2": {"logprob": -9.500140190124512, "rank": 2, "decoded_token": ".\n"}, "1626": {"logprob": -10.000140190124512, "rank": 3, "decoded_token": ".\n\n"}, "1338": {"logprob": -11.750140190124512, "rank": 4, "decoded_token": " with"}}, {"2": {"logprob": -0.0004533693427219987, "rank": 1, "decoded_token": " "}, "1032": {"logprob": -7.750453472137451, "rank": 2, "decoded_token": " Each"}, "1256": {"logprob": -11.125452995300293, "rank": 3, "decoded_token": " This"}}]]]
\ No newline at end of file
[[[1784, 3937, 6122, 1261, 7244, 10575, 18970, 1408, 1261, 32656, 4691, 1046, 2], "The image shows a black dog sitting on a wooden surface.", [{"1784": {"logprob": -0.11685245484113693, "rank": 1, "decoded_token": "The"}, "4380": {"logprob": -2.3668525218963623, "rank": 2, "decoded_token": "This"}, "1049": {"logprob": -4.741852283477783, "rank": 3, "decoded_token": "1"}, "117991": {"logprob": -5.991852283477783, "rank": 4, "decoded_token": "Certain"}, "1785": {"logprob": -5.991852283477783, "rank": 5, "decoded_token": "In"}}, {"3937": {"logprob": -0.2591013014316559, "rank": 1, "decoded_token": " image"}, "2158": {"logprob": -1.5091012716293335, "rank": 2, "decoded_token": " first"}, "3977": {"logprob": -5.884101390838623, "rank": 3, "decoded_token": " top"}, "7244": {"logprob": -6.259101390838623, "rank": 4, "decoded_token": " black"}, "8061": {"logprob": -6.759101390838623, "rank": 5, "decoded_token": " images"}}, {"6122": {"logprob": -0.9660423994064331, "rank": 1, "decoded_token": " shows"}, "51948": {"logprob": -1.466042399406433, "rank": 2, "decoded_token": " depicts"}, "6971": {"logprob": -1.466042399406433, "rank": 3, "decoded_token": " features"}, "25981": {"logprob": -2.8410425186157227, "rank": 4, "decoded_token": " displays"}, "8688": {"logprob": -2.8410425186157227, "rank": 5, "decoded_token": " contains"}}, {"1261": {"logprob": -0.0030613720882683992, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -6.253061294555664, "rank": 2, "decoded_token": " an"}, "2295": {"logprob": -7.878061294555664, "rank": 3, "decoded_token": " two"}, "2342": {"logprob": -7.878061294555664, "rank": 4, "decoded_token": " only"}, "1278": {"logprob": -8.628061294555664, "rank": 5, "decoded_token": " the"}}, {"7244": {"logprob": -0.17649099230766296, "rank": 1, "decoded_token": " black"}, "6231": {"logprob": -2.3014910221099854, "rank": 2, "decoded_token": " close"}, "4249": {"logprob": -3.4264910221099854, "rank": 3, "decoded_token": " single"}, "4329": {"logprob": -5.113990783691406, "rank": 4, "decoded_token": " large"}, "10575": {"logprob": -5.176490783691406, "rank": 5, "decoded_token": " dog"}}, {"10575": {"logprob": -0.10929587483406067, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.4842958450317383, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -4.109295845031738, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -7.296795845031738, "rank": 4, "decoded_token": " Lab"}, "7990": {"logprob": -7.484295845031738, "rank": 5, "decoded_token": " cat"}}, {"18970": {"logprob": -0.830376148223877, "rank": 1, "decoded_token": " sitting"}, "1454": {"logprob": -1.580376148223877, "rank": 2, "decoded_token": " with"}, "28528": {"logprob": -1.955376148223877, "rank": 3, "decoded_token": " lying"}, "7283": {"logprob": -2.205376148223877, "rank": 4, "decoded_token": " looking"}, "15866": {"logprob": -3.017876148223877, "rank": 5, "decoded_token": " standing"}}, {"1408": {"logprob": -0.08554735779762268, "rank": 1, "decoded_token": " on"}, "1321": {"logprob": -3.71054744720459, "rank": 2, "decoded_token": " and"}, "3675": {"logprob": -3.96054744720459, "rank": 3, "decoded_token": " against"}, "41132": {"logprob": -4.71054744720459, "rank": 4, "decoded_token": " attent"}, "1454": {"logprob": -5.08554744720459, "rank": 5, "decoded_token": " with"}}, {"1261": {"logprob": -0.540847897529602, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -0.915847897529602, "rank": 2, "decoded_token": " wooden"}, "12603": {"logprob": -5.4158477783203125, "rank": 3, "decoded_token": " wood"}, "3977": {"logprob": -5.4158477783203125, "rank": 4, "decoded_token": " top"}, "17253": {"logprob": -6.2908477783203125, "rank": 5, "decoded_token": " weather"}}, {"32656": {"logprob": -0.025753861293196678, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -4.400753974914551, "rank": 2, "decoded_token": " rust"}, "12603": {"logprob": -5.275753974914551, "rank": 3, "decoded_token": " wood"}, "3403": {"logprob": -5.400753974914551, "rank": 4, "decoded_token": " text"}, "17253": {"logprob": -6.963253974914551, "rank": 5, "decoded_token": " weather"}}, {"4691": {"logprob": -0.7265751957893372, "rank": 1, "decoded_token": " surface"}, "11237": {"logprob": -0.8515751957893372, "rank": 2, "decoded_token": " floor"}, "7042": {"logprob": -2.6015751361846924, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -5.2265753746032715, "rank": 4, "decoded_token": " deck"}, "1615": {"logprob": -5.7265753746032715, "rank": 5, "decoded_token": " pl"}}, {"1046": {"logprob": -0.4868825674057007, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -1.9868825674057007, "rank": 2, "decoded_token": ","}, "1321": {"logprob": -2.3618826866149902, "rank": 3, "decoded_token": " and"}, "1454": {"logprob": -2.6118826866149902, "rank": 4, "decoded_token": " with"}, "7283": {"logprob": -2.7368826866149902, "rank": 5, "decoded_token": " looking"}}, {"2": {"logprob": -0.0026643513701856136, "rank": 1, "decoded_token": "</s>"}, "1531": {"logprob": -6.502664566040039, "rank": 2, "decoded_token": " The"}, "1032": {"logprob": -6.877664566040039, "rank": 3, "decoded_token": " "}, "3730": {"logprob": -9.752664566040039, "rank": 4, "decoded_token": " There"}, "1256": {"logprob": -11.002664566040039, "rank": 5, "decoded_token": " "}}]], [[1049, 1046, 1349, 7244, 10575, 1454, 2327, 94766, 32961, 53048, 41132, 3923, 1408, 1261, 32656, 4691, 1626, 1050, 1046, 1349, 15375, 24361, 4521, 94973, 5669, 1278, 48932, 2425, 1261, 16152, 1121, 21283, 1046, 2], "1. A black dog with floppy ears sits attentively on a wooden surface.\n2. A vast mountain range stretches across the horizon under a cloudy sky.", [{"1049": {"logprob": -0.42824622988700867, "rank": 1, "decoded_token": "1"}, "1045": {"logprob": -1.553246259689331, "rank": 2, "decoded_token": "-"}, "1065": {"logprob": -2.428246259689331, "rank": 3, "decoded_token": "A"}, "1784": {"logprob": -4.053246021270752, "rank": 4, "decoded_token": "The"}, "69957": {"logprob": -4.428246021270752, "rank": 5, "decoded_token": "Sure"}}, {"1046": {"logprob": -1.811964830267243e-05, "rank": 1, "decoded_token": "."}, "1058": {"logprob": -11.875018119812012, "rank": 2, "decoded_token": ":"}, "3590": {"logprob": -12.250018119812012, "rank": 3, "decoded_token": ".A"}, "1065": {"logprob": -13.062518119812012, "rank": 4, "decoded_token": "A"}, "1041": {"logprob": -13.750018119812012, "rank": 5, "decoded_token": ")"}}, {"1349": {"logprob": -0.13647246360778809, "rank": 1, "decoded_token": " A"}, "1429": {"logprob": -2.386472463607788, "rank": 2, "decoded_token": " \""}, "1603": {"logprob": -3.886472463607788, "rank": 3, "decoded_token": " **"}, "11967": {"logprob": -5.011472702026367, "rank": 4, "decoded_token": " Image"}, "1531": {"logprob": -5.011472702026367, "rank": 5, "decoded_token": " The"}}, {"7244": {"logprob": -0.18561004102230072, "rank": 1, "decoded_token": " black"}, "38462": {"logprob": -3.185610055923462, "rank": 2, "decoded_token": " curious"}, "68076": {"logprob": -3.623110055923462, "rank": 3, "decoded_token": " cute"}, "4329": {"logprob": -3.935610055923462, "rank": 4, "decoded_token": " large"}, "74168": {"logprob": -4.373109817504883, "rank": 5, "decoded_token": " gloss"}}, {"10575": {"logprob": -0.17297746241092682, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.1729774475097656, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.1729774475097656, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -6.985477447509766, "rank": 4, "decoded_token": " Lab"}, "8636": {"logprob": -7.360477447509766, "rank": 5, "decoded_token": " lab"}}, {"1454": {"logprob": -0.5785807967185974, "rank": 1, "decoded_token": " with"}, "53048": {"logprob": -1.2660808563232422, "rank": 2, "decoded_token": " sits"}, "1395": {"logprob": -3.016080856323242, "rank": 3, "decoded_token": " is"}, "22524": {"logprob": -3.578580856323242, "rank": 4, "decoded_token": " lies"}, "18970": {"logprob": -3.703580856323242, "rank": 5, "decoded_token": " sitting"}}, {"2327": {"logprob": -1.2709298133850098, "rank": 1, "decoded_token": " fl"}, "1261": {"logprob": -1.3959298133850098, "rank": 2, "decoded_token": " a"}, "17300": {"logprob": -1.8959298133850098, "rank": 3, "decoded_token": " soul"}, "100089": {"logprob": -2.6459298133850098, "rank": 4, "decoded_token": " expressive"}, "6444": {"logprob": -3.1459298133850098, "rank": 5, "decoded_token": " soft"}}, {"94766": {"logprob": -0.002432247158139944, "rank": 1, "decoded_token": "oppy"}, "124603": {"logprob": -6.377432346343994, "rank": 2, "decoded_token": "uffy"}, "1484": {"logprob": -7.877432346343994, "rank": 3, "decoded_token": "op"}, "24897": {"logprob": -8.877431869506836, "rank": 4, "decoded_token": "appy"}, "102477": {"logprob": -9.752431869506836, "rank": 5, "decoded_token": "opping"}}, {"32961": {"logprob": -5.113947918289341e-05, "rank": 1, "decoded_token": " ears"}, "16962": {"logprob": -11.312551498413086, "rank": 2, "decoded_token": " ear"}, "5731": {"logprob": -11.750051498413086, "rank": 3, "decoded_token": " eyes"}, "3351": {"logprob": -12.000051498413086, "rank": 4, "decoded_token": " years"}, "42071": {"logprob": -13.000051498413086, "rank": 5, "decoded_token": " cheeks"}}, {"53048": {"logprob": -0.6131591200828552, "rank": 1, "decoded_token": " sits"}, "10637": {"logprob": -1.9881591796875, "rank": 2, "decoded_token": " looks"}, "1321": {"logprob": -2.4256591796875, "rank": 3, "decoded_token": " and"}, "1395": {"logprob": -2.6756591796875, "rank": 4, "decoded_token": " is"}, "18970": {"logprob": -3.0506591796875, "rank": 5, "decoded_token": " sitting"}}, {"41132": {"logprob": -0.36187249422073364, "rank": 1, "decoded_token": " attent"}, "1408": {"logprob": -2.361872434616089, "rank": 2, "decoded_token": " on"}, "106534": {"logprob": -2.424372434616089, "rank": 3, "decoded_token": " calmly"}, "12276": {"logprob": -2.611872434616089, "rank": 4, "decoded_token": " alert"}, "6482": {"logprob": -5.174372673034668, "rank": 5, "decoded_token": " patient"}}, {"3923": {"logprob": -8.451581379631534e-05, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -9.50008487701416, "rank": 2, "decoded_token": "ive"}, "6655": {"logprob": -11.87508487701416, "rank": 3, "decoded_token": "atively"}, "3929": {"logprob": -14.00008487701416, "rank": 4, "decoded_token": "ently"}, "47885": {"logprob": -14.75008487701416, "rank": 5, "decoded_token": "edly"}}, {"1408": {"logprob": -0.058125678449869156, "rank": 1, "decoded_token": " on"}, "3675": {"logprob": -3.1831257343292236, "rank": 2, "decoded_token": " against"}, "1294": {"logprob": -4.9331254959106445, "rank": 3, "decoded_token": " in"}, "7283": {"logprob": -5.8081254959106445, "rank": 4, "decoded_token": " looking"}, "1044": {"logprob": -5.9331254959106445, "rank": 5, "decoded_token": ","}}, {"1261": {"logprob": -0.21029606461524963, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -1.7102960348129272, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -5.710296154022217, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -6.085296154022217, "rank": 4, "decoded_token": " rust"}, "12603": {"logprob": -6.960296154022217, "rank": 5, "decoded_token": " wood"}}, {"32656": {"logprob": -0.08548421412706375, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -2.710484266281128, "rank": 2, "decoded_token": " rust"}, "17253": {"logprob": -4.710484027862549, "rank": 3, "decoded_token": " weather"}, "12603": {"logprob": -5.960484027862549, "rank": 4, "decoded_token": " wood"}, "3403": {"logprob": -5.960484027862549, "rank": 5, "decoded_token": " text"}}, {"4691": {"logprob": -0.7172377109527588, "rank": 1, "decoded_token": " surface"}, "11237": {"logprob": -0.8422377109527588, "rank": 2, "decoded_token": " floor"}, "7042": {"logprob": -2.842237710952759, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -4.21723747253418, "rank": 4, "decoded_token": " deck"}, "92504": {"logprob": -6.21723747253418, "rank": 5, "decoded_token": " backdrop"}}, {"1626": {"logprob": -0.12971943616867065, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -2.3797194957733154, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -4.129719257354736, "rank": 3, "decoded_token": "."}, "1338": {"logprob": -5.129719257354736, "rank": 4, "decoded_token": ".\n\n"}, "7283": {"logprob": -5.504719257354736, "rank": 5, "decoded_token": " looking"}}, {"1050": {"logprob": -0.00015698630886618048, "rank": 1, "decoded_token": "2"}, "1256": {"logprob": -9.125157356262207, "rank": 2, "decoded_token": " "}, "1032": {"logprob": -10.875157356262207, "rank": 3, "decoded_token": " "}, "1293": {"logprob": -11.750157356262207, "rank": 4, "decoded_token": " "}, "1051": {"logprob": -12.125157356262207, "rank": 5, "decoded_token": "3"}}, {"1046": {"logprob": -6.6756979322235566e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.062506675720215, "rank": 2, "decoded_token": ".A"}, "1626": {"logprob": -13.187506675720215, "rank": 3, "decoded_token": ".\n"}, "1338": {"logprob": -14.750006675720215, "rank": 4, "decoded_token": ".\n\n"}, "1058": {"logprob": -14.937506675720215, "rank": 5, "decoded_token": ":"}}, {"1349": {"logprob": -0.5863217115402222, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.4613217115402222, "rank": 2, "decoded_token": " Maj"}, "37159": {"logprob": -2.2113218307495117, "rank": 3, "decoded_token": " Snow"}, "113465": {"logprob": -3.8988218307495117, "rank": 4, "decoded_token": " Rug"}, "1531": {"logprob": -3.9613218307495117, "rank": 5, "decoded_token": " The"}}, {"15375": {"logprob": -0.639299213886261, "rank": 1, "decoded_token": " vast"}, "37849": {"logprob": -2.014299154281616, "rank": 2, "decoded_token": " breat"}, "61082": {"logprob": -2.389299154281616, "rank": 3, "decoded_token": " panor"}, "10726": {"logprob": -3.139299154281616, "rank": 4, "decoded_token": " scen"}, "2169": {"logprob": -3.201799154281616, "rank": 5, "decoded_token": " ser"}}, {"24361": {"logprob": -0.702845573425293, "rank": 1, "decoded_token": " mountain"}, "127945": {"logprob": -1.952845573425293, "rank": 2, "decoded_token": " mountainous"}, "1044": {"logprob": -2.077845573425293, "rank": 3, "decoded_token": ","}, "4521": {"logprob": -2.327845573425293, "rank": 4, "decoded_token": " range"}, "28035": {"logprob": -2.452845573425293, "rank": 5, "decoded_token": " landscape"}}, {"4521": {"logprob": -0.07058162242174149, "rank": 1, "decoded_token": " range"}, "28035": {"logprob": -2.6955816745758057, "rank": 2, "decoded_token": " landscape"}, "37691": {"logprob": -8.320581436157227, "rank": 3, "decoded_token": " valley"}, "12248": {"logprob": -9.445581436157227, "rank": 4, "decoded_token": " peak"}, "13327": {"logprob": -9.695581436157227, "rank": 5, "decoded_token": " scene"}}, {"94973": {"logprob": -1.1164050102233887, "rank": 1, "decoded_token": " stretches"}, "1454": {"logprob": -1.1789050102233887, "rank": 2, "decoded_token": " with"}, "2425": {"logprob": -1.8664050102233887, "rank": 3, "decoded_token": " under"}, "1395": {"logprob": -2.5539050102233887, "rank": 4, "decoded_token": " is"}, "13875": {"logprob": -2.9914050102233887, "rank": 5, "decoded_token": " covered"}}, {"5669": {"logprob": -0.3286789357662201, "rank": 1, "decoded_token": " across"}, "1848": {"logprob": -2.078678846359253, "rank": 2, "decoded_token": " out"}, "2425": {"logprob": -2.328678846359253, "rank": 3, "decoded_token": " under"}, "2203": {"logprob": -3.328678846359253, "rank": 4, "decoded_token": " into"}, "8994": {"logprob": -4.766179084777832, "rank": 5, "decoded_token": " towards"}}, {"1278": {"logprob": -0.039004355669021606, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -3.289004325866699, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -7.414004325866699, "rank": 3, "decoded_token": " an"}, "2425": {"logprob": -9.0390043258667, "rank": 4, "decoded_token": " under"}, "1454": {"logprob": -9.2265043258667, "rank": 5, "decoded_token": " with"}}, {"48932": {"logprob": -0.2659883201122284, "rank": 1, "decoded_token": " horizon"}, "21283": {"logprob": -2.140988349914551, "rank": 2, "decoded_token": " sky"}, "3937": {"logprob": -3.015988349914551, "rank": 3, "decoded_token": " image"}, "28035": {"logprob": -3.515988349914551, "rank": 4, "decoded_token": " landscape"}, "3044": {"logprob": -4.265988349914551, "rank": 5, "decoded_token": " sk"}}, {"2425": {"logprob": -0.5356141328811646, "rank": 1, "decoded_token": " under"}, "1044": {"logprob": -1.5356141328811646, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -1.7856141328811646, "rank": 3, "decoded_token": " with"}, "25136": {"logprob": -3.785614013671875, "rank": 4, "decoded_token": " beneath"}, "1408": {"logprob": -5.785614013671875, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -0.006081883795559406, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -5.506082057952881, "rank": 2, "decoded_token": " an"}, "16152": {"logprob": -7.631082057952881, "rank": 3, "decoded_token": " cloud"}, "6133": {"logprob": -7.881082057952881, "rank": 4, "decoded_token": " clear"}, "2136": {"logprob": -8.006081581115723, "rank": 5, "decoded_token": " over"}}, {"16152": {"logprob": -0.6749536991119385, "rank": 1, "decoded_token": " cloud"}, "6133": {"logprob": -1.4249536991119385, "rank": 2, "decoded_token": " clear"}, "18416": {"logprob": -2.8624536991119385, "rank": 3, "decoded_token": " haz"}, "27254": {"logprob": -2.9874536991119385, "rank": 4, "decoded_token": " partly"}, "4391": {"logprob": -3.2374536991119385, "rank": 5, "decoded_token": " light"}}, {"1121": {"logprob": -0.10860869288444519, "rank": 1, "decoded_token": "y"}, "4527": {"logprob": -2.9836087226867676, "rank": 2, "decoded_token": "less"}, "1286": {"logprob": -3.4836087226867676, "rank": 3, "decoded_token": "ed"}, "77187": {"logprob": -4.608608722686768, "rank": 4, "decoded_token": "-filled"}, "114525": {"logprob": -4.858608722686768, "rank": 5, "decoded_token": "-covered"}}, {"21283": {"logprob": -0.002785732736811042, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -6.252785682678223, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -7.627785682678223, "rank": 3, "decoded_token": ","}, "26549": {"logprob": -8.627785682678223, "rank": 4, "decoded_token": " gray"}, "34052": {"logprob": -9.377785682678223, "rank": 5, "decoded_token": " grey"}}, {"1046": {"logprob": -0.047878943383693695, "rank": 1, "decoded_token": "."}, "1044": {"logprob": -3.1728789806365967, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -5.547878742218018, "rank": 3, "decoded_token": " with"}, "1338": {"logprob": -7.172878742218018, "rank": 4, "decoded_token": ".\n\n"}, "1294": {"logprob": -9.172879219055176, "rank": 5, "decoded_token": " in"}}, {"2": {"logprob": -1.3351351299206726e-05, "rank": 1, "decoded_token": "</s>"}, "1032": {"logprob": -11.25001335144043, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -16.00001335144043, "rank": 3, "decoded_token": " "}, "1319": {"logprob": -17.25001335144043, "rank": 4, "decoded_token": " ("}, "1766": {"logprob": -18.50001335144043, "rank": 5, "decoded_token": " ["}}]], [[1049, 1046, 1349, 7244, 10575, 53048, 41132, 3923, 1408, 1261, 32656, 11237, 1626, 1050, 1046, 1349, 15375, 24361, 4521, 94973, 5669, 1278, 48932, 2425, 1261, 16152, 1121, 21283, 1626, 1051, 1046, 8342, 71284, 7377, 1394, 22140, 1294, 1278, 27208, 1513, 97558, 1626, 1052, 1046, 1349, 53301, 59396, 3549, 13335, 2645, 1261, 1295, 3506, 11223, 12097, 1046, 2], "1. A black dog sits attentively on a wooden floor.\n2. A vast mountain range stretches across the horizon under a cloudy sky.\n3. Surfers wait for waves in the ocean at sunset.\n4. A winding gravel path leads through a lush green park.", [{"1049": {"logprob": -0.05001257359981537, "rank": 1, "decoded_token": "1"}, "1045": {"logprob": -3.1750125885009766, "rank": 2, "decoded_token": "-"}, "69957": {"logprob": -5.925012588500977, "rank": 3, "decoded_token": "Sure"}, "11745": {"logprob": -6.425012588500977, "rank": 4, "decoded_token": "Here"}, "1065": {"logprob": -6.425012588500977, "rank": 5, "decoded_token": "A"}}, {"1046": {"logprob": -8.702239938429557e-06, "rank": 1, "decoded_token": "."}, "1058": {"logprob": -12.000008583068848, "rank": 2, "decoded_token": ":"}, "3590": {"logprob": -13.375008583068848, "rank": 3, "decoded_token": ".A"}, "1041": {"logprob": -14.750008583068848, "rank": 4, "decoded_token": ")"}, "1065": {"logprob": -15.687508583068848, "rank": 5, "decoded_token": "A"}}, {"1349": {"logprob": -0.14196155965328217, "rank": 1, "decoded_token": " A"}, "1429": {"logprob": -2.2669615745544434, "rank": 2, "decoded_token": " \""}, "1531": {"logprob": -4.516961574554443, "rank": 3, "decoded_token": " The"}, "11967": {"logprob": -4.516961574554443, "rank": 4, "decoded_token": " Image"}, "1603": {"logprob": -5.391961574554443, "rank": 5, "decoded_token": " **"}}, {"7244": {"logprob": -0.14889711141586304, "rank": 1, "decoded_token": " black"}, "68076": {"logprob": -3.398897171020508, "rank": 2, "decoded_token": " cute"}, "6231": {"logprob": -3.961397171020508, "rank": 3, "decoded_token": " close"}, "38462": {"logprob": -4.273897171020508, "rank": 4, "decoded_token": " curious"}, "4329": {"logprob": -4.398897171020508, "rank": 5, "decoded_token": " large"}}, {"10575": {"logprob": -0.12091328203678131, "rank": 1, "decoded_token": " dog"}, "116572": {"logprob": -2.37091326713562, "rank": 2, "decoded_token": " puppy"}, "119075": {"logprob": -3.99591326713562, "rank": 3, "decoded_token": " Labrador"}, "15812": {"logprob": -7.683413505554199, "rank": 4, "decoded_token": " Lab"}, "8636": {"logprob": -7.808413505554199, "rank": 5, "decoded_token": " lab"}}, {"53048": {"logprob": -0.8691943287849426, "rank": 1, "decoded_token": " sits"}, "1454": {"logprob": -1.1191942691802979, "rank": 2, "decoded_token": " with"}, "1395": {"logprob": -2.431694269180298, "rank": 3, "decoded_token": " is"}, "18970": {"logprob": -2.744194269180298, "rank": 4, "decoded_token": " sitting"}, "22524": {"logprob": -3.681694269180298, "rank": 5, "decoded_token": " lies"}}, {"41132": {"logprob": -0.5939557552337646, "rank": 1, "decoded_token": " attent"}, "106534": {"logprob": -1.2814557552337646, "rank": 2, "decoded_token": " calmly"}, "12276": {"logprob": -2.8439557552337646, "rank": 3, "decoded_token": " alert"}, "1408": {"logprob": -2.8439557552337646, "rank": 4, "decoded_token": " on"}, "6482": {"logprob": -4.968955993652344, "rank": 5, "decoded_token": " patient"}}, {"3923": {"logprob": -0.00010084597306558862, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -9.500101089477539, "rank": 2, "decoded_token": "ive"}, "6655": {"logprob": -10.875101089477539, "rank": 3, "decoded_token": "atively"}, "3929": {"logprob": -13.000101089477539, "rank": 4, "decoded_token": "ently"}, "47885": {"logprob": -13.750101089477539, "rank": 5, "decoded_token": "edly"}}, {"1408": {"logprob": -0.056158196181058884, "rank": 1, "decoded_token": " on"}, "3675": {"logprob": -3.6811583042144775, "rank": 2, "decoded_token": " against"}, "1454": {"logprob": -4.306158065795898, "rank": 3, "decoded_token": " with"}, "1294": {"logprob": -5.181158065795898, "rank": 4, "decoded_token": " in"}, "7283": {"logprob": -5.431158065795898, "rank": 5, "decoded_token": " looking"}}, {"1261": {"logprob": -0.33056098222732544, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -1.3305609226226807, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -4.70556116104126, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -5.83056116104126, "rank": 4, "decoded_token": " rust"}, "12603": {"logprob": -6.58056116104126, "rank": 5, "decoded_token": " wood"}}, {"32656": {"logprob": -0.07081110030412674, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -2.9458110332489014, "rank": 2, "decoded_token": " rust"}, "17253": {"logprob": -4.6958112716674805, "rank": 3, "decoded_token": " weather"}, "12603": {"logprob": -5.8208112716674805, "rank": 4, "decoded_token": " wood"}, "3403": {"logprob": -6.0708112716674805, "rank": 5, "decoded_token": " text"}}, {"11237": {"logprob": -0.6428436636924744, "rank": 1, "decoded_token": " floor"}, "4691": {"logprob": -1.0178437232971191, "rank": 2, "decoded_token": " surface"}, "7042": {"logprob": -2.642843723297119, "rank": 3, "decoded_token": " background"}, "28984": {"logprob": -3.517843723297119, "rank": 4, "decoded_token": " deck"}, "92504": {"logprob": -6.017843723297119, "rank": 5, "decoded_token": " backdrop"}}, {"1626": {"logprob": -0.7337945103645325, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -0.8587945103645325, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -3.3587944507598877, "rank": 3, "decoded_token": " with"}, "7283": {"logprob": -3.6087944507598877, "rank": 4, "decoded_token": " looking"}, "1321": {"logprob": -4.108794689178467, "rank": 5, "decoded_token": " and"}}, {"1050": {"logprob": -1.0132738680113107e-05, "rank": 1, "decoded_token": "2"}, "1051": {"logprob": -11.75001049041748, "rank": 2, "decoded_token": "3"}, "1256": {"logprob": -14.00001049041748, "rank": 3, "decoded_token": " "}, "1049": {"logprob": -14.62501049041748, "rank": 4, "decoded_token": "1"}, "1032": {"logprob": -14.62501049041748, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -2.861018856492592e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.43750286102295, "rank": 2, "decoded_token": ".A"}, "4700": {"logprob": -15.37500286102295, "rank": 3, "decoded_token": ".M"}, "1626": {"logprob": -15.37500286102295, "rank": 4, "decoded_token": ".\n"}, "3051": {"logprob": -15.87500286102295, "rank": 5, "decoded_token": ".S"}}, {"1349": {"logprob": -0.6794427633285522, "rank": 1, "decoded_token": " A"}, "11826": {"logprob": -1.9294427633285522, "rank": 2, "decoded_token": " Maj"}, "37159": {"logprob": -2.116942882537842, "rank": 3, "decoded_token": " Snow"}, "27260": {"logprob": -2.616942882537842, "rank": 4, "decoded_token": " Mountain"}, "113465": {"logprob": -2.866942882537842, "rank": 5, "decoded_token": " Rug"}}, {"15375": {"logprob": -0.9194075465202332, "rank": 1, "decoded_token": " vast"}, "10726": {"logprob": -2.294407606124878, "rank": 2, "decoded_token": " scen"}, "4521": {"logprob": -2.356907606124878, "rank": 3, "decoded_token": " range"}, "122203": {"logprob": -2.419407606124878, "rank": 4, "decoded_token": " rugged"}, "61082": {"logprob": -2.856907606124878, "rank": 5, "decoded_token": " panor"}}, {"24361": {"logprob": -0.5804797410964966, "rank": 1, "decoded_token": " mountain"}, "127945": {"logprob": -1.8304797410964966, "rank": 2, "decoded_token": " mountainous"}, "28035": {"logprob": -2.455479621887207, "rank": 3, "decoded_token": " landscape"}, "4521": {"logprob": -2.455479621887207, "rank": 4, "decoded_token": " range"}, "1044": {"logprob": -2.705479621887207, "rank": 5, "decoded_token": ","}}, {"4521": {"logprob": -0.0493546724319458, "rank": 1, "decoded_token": " range"}, "28035": {"logprob": -3.0493545532226562, "rank": 2, "decoded_token": " landscape"}, "37691": {"logprob": -8.424354553222656, "rank": 3, "decoded_token": " valley"}, "13327": {"logprob": -9.049354553222656, "rank": 4, "decoded_token": " scene"}, "3719": {"logprob": -9.799354553222656, "rank": 5, "decoded_token": " view"}}, {"94973": {"logprob": -0.6676871180534363, "rank": 1, "decoded_token": " stretches"}, "2425": {"logprob": -1.792687177658081, "rank": 2, "decoded_token": " under"}, "1395": {"logprob": -2.292687177658081, "rank": 3, "decoded_token": " is"}, "1454": {"logprob": -2.730187177658081, "rank": 4, "decoded_token": " with"}, "7038": {"logprob": -3.292687177658081, "rank": 5, "decoded_token": " extends"}}, {"5669": {"logprob": -0.4542117118835449, "rank": 1, "decoded_token": " across"}, "2425": {"logprob": -1.454211711883545, "rank": 2, "decoded_token": " under"}, "1848": {"logprob": -2.454211711883545, "rank": 3, "decoded_token": " out"}, "2203": {"logprob": -4.204211711883545, "rank": 4, "decoded_token": " into"}, "25136": {"logprob": -4.641711711883545, "rank": 5, "decoded_token": " beneath"}}, {"1278": {"logprob": -0.23009441792964935, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -1.6050944328308105, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -5.6050944328308105, "rank": 3, "decoded_token": " an"}, "2425": {"logprob": -7.2300944328308105, "rank": 4, "decoded_token": " under"}, "1454": {"logprob": -10.167593955993652, "rank": 5, "decoded_token": " with"}}, {"48932": {"logprob": -0.3072167932987213, "rank": 1, "decoded_token": " horizon"}, "21283": {"logprob": -1.932216763496399, "rank": 2, "decoded_token": " sky"}, "3937": {"logprob": -3.1822168827056885, "rank": 3, "decoded_token": " image"}, "28035": {"logprob": -3.6822168827056885, "rank": 4, "decoded_token": " landscape"}, "3044": {"logprob": -3.6822168827056885, "rank": 5, "decoded_token": " sk"}}, {"2425": {"logprob": -0.2914469838142395, "rank": 1, "decoded_token": " under"}, "1044": {"logprob": -2.4164469242095947, "rank": 2, "decoded_token": ","}, "1454": {"logprob": -2.5414469242095947, "rank": 3, "decoded_token": " with"}, "1626": {"logprob": -3.7914469242095947, "rank": 4, "decoded_token": ".\n"}, "1408": {"logprob": -3.7914469242095947, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -0.0460360012948513, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -3.9210360050201416, "rank": 2, "decoded_token": " an"}, "16152": {"logprob": -4.1085357666015625, "rank": 3, "decoded_token": " cloud"}, "2136": {"logprob": -6.1710357666015625, "rank": 4, "decoded_token": " over"}, "6133": {"logprob": -6.4210357666015625, "rank": 5, "decoded_token": " clear"}}, {"16152": {"logprob": -0.20367540419101715, "rank": 1, "decoded_token": " cloud"}, "6133": {"logprob": -2.8286755084991455, "rank": 2, "decoded_token": " clear"}, "27254": {"logprob": -3.5161755084991455, "rank": 3, "decoded_token": " partly"}, "18416": {"logprob": -3.8286755084991455, "rank": 4, "decoded_token": " haz"}, "4391": {"logprob": -4.328675270080566, "rank": 5, "decoded_token": " light"}}, {"1121": {"logprob": -0.05241352692246437, "rank": 1, "decoded_token": "y"}, "1286": {"logprob": -3.8024134635925293, "rank": 2, "decoded_token": "ed"}, "77187": {"logprob": -4.552413463592529, "rank": 3, "decoded_token": "-filled"}, "4527": {"logprob": -4.802413463592529, "rank": 4, "decoded_token": "less"}, "114525": {"logprob": -4.927413463592529, "rank": 5, "decoded_token": "-covered"}}, {"21283": {"logprob": -0.0003716255014296621, "rank": 1, "decoded_token": " sky"}, "10991": {"logprob": -8.750371932983398, "rank": 2, "decoded_token": " blue"}, "1044": {"logprob": -9.375371932983398, "rank": 3, "decoded_token": ","}, "26549": {"logprob": -10.375371932983398, "rank": 4, "decoded_token": " gray"}, "34052": {"logprob": -11.250371932983398, "rank": 5, "decoded_token": " grey"}}, {"1626": {"logprob": -0.00012730741582345217, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -9.500126838684082, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -10.500126838684082, "rank": 3, "decoded_token": "."}, "1454": {"logprob": -10.875126838684082, "rank": 4, "decoded_token": " with"}, "1294": {"logprob": -13.250126838684082, "rank": 5, "decoded_token": " in"}}, {"1051": {"logprob": -3.2186455882765586e-06, "rank": 1, "decoded_token": "3"}, "1052": {"logprob": -12.75000286102295, "rank": 2, "decoded_token": "4"}, "1050": {"logprob": -15.00000286102295, "rank": 3, "decoded_token": "2"}, "1049": {"logprob": -16.937503814697266, "rank": 4, "decoded_token": "1"}, "1032": {"logprob": -17.875003814697266, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -1.6689286894688848e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -14.687501907348633, "rank": 2, "decoded_token": ".A"}, "5226": {"logprob": -15.687501907348633, "rank": 3, "decoded_token": ".D"}, "6847": {"logprob": -15.812501907348633, "rank": 4, "decoded_token": ".T"}, "48426": {"logprob": -16.812501907348633, "rank": 5, "decoded_token": ".The"}}, {"8342": {"logprob": -0.5730464458465576, "rank": 1, "decoded_token": " Sur"}, "1349": {"logprob": -1.6980464458465576, "rank": 2, "decoded_token": " A"}, "22468": {"logprob": -2.5730464458465576, "rank": 3, "decoded_token": " Several"}, "1488": {"logprob": -2.6980464458465576, "rank": 4, "decoded_token": " W"}, "15035": {"logprob": -3.1980464458465576, "rank": 5, "decoded_token": " People"}}, {"71284": {"logprob": -0.0033258858602494, "rank": 1, "decoded_token": "fers"}, "1102": {"logprob": -5.878325939178467, "rank": 2, "decoded_token": "f"}, "1726": {"logprob": -7.628325939178467, "rank": 3, "decoded_token": "fer"}, "61888": {"logprob": -12.253325462341309, "rank": 4, "decoded_token": "fline"}, "2119": {"logprob": -13.003325462341309, "rank": 5, "decoded_token": "fter"}}, {"7377": {"logprob": -1.4996429681777954, "rank": 1, "decoded_token": " wait"}, "1584": {"logprob": -1.7496429681777954, "rank": 2, "decoded_token": " are"}, "88014": {"logprob": -1.9371429681777954, "rank": 3, "decoded_token": " paddle"}, "1294": {"logprob": -1.9371429681777954, "rank": 4, "decoded_token": " in"}, "24434": {"logprob": -2.187142848968506, "rank": 5, "decoded_token": " ride"}}, {"1394": {"logprob": -0.6126739382743835, "rank": 1, "decoded_token": " for"}, "1294": {"logprob": -0.9876739382743835, "rank": 2, "decoded_token": " in"}, "1408": {"logprob": -2.7376739978790283, "rank": 3, "decoded_token": " on"}, "6482": {"logprob": -4.425173759460449, "rank": 4, "decoded_token": " patient"}, "1321": {"logprob": -5.612673759460449, "rank": 5, "decoded_token": " and"}}, {"22140": {"logprob": -0.00729279313236475, "rank": 1, "decoded_token": " waves"}, "1278": {"logprob": -5.632292747497559, "rank": 2, "decoded_token": " the"}, "1261": {"logprob": -5.757292747497559, "rank": 3, "decoded_token": " a"}, "39460": {"logprob": -8.257292747497559, "rank": 4, "decoded_token": " incoming"}, "1321": {"logprob": -9.757292747497559, "rank": 5, "decoded_token": " and"}}, {"1294": {"logprob": -0.3071398138999939, "rank": 1, "decoded_token": " in"}, "1408": {"logprob": -2.1821398735046387, "rank": 2, "decoded_token": " on"}, "1513": {"logprob": -2.4321398735046387, "rank": 3, "decoded_token": " at"}, "3016": {"logprob": -3.6821398735046387, "rank": 4, "decoded_token": " while"}, "1435": {"logprob": -3.8071398735046387, "rank": 5, "decoded_token": " as"}}, {"1278": {"logprob": -0.004646694287657738, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -6.1921467781066895, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -6.9421467781066895, "rank": 3, "decoded_token": " an"}, "40466": {"logprob": -7.2546467781066895, "rank": 4, "decoded_token": " shallow"}, "26517": {"logprob": -7.8796467781066895, "rank": 5, "decoded_token": " calm"}}, {"27208": {"logprob": -0.0658877044916153, "rank": 1, "decoded_token": " ocean"}, "7786": {"logprob": -3.440887689590454, "rank": 2, "decoded_token": " distance"}, "5124": {"logprob": -5.253387928009033, "rank": 3, "decoded_token": " early"}, "26517": {"logprob": -5.315887928009033, "rank": 4, "decoded_token": " calm"}, "11196": {"logprob": -5.378387928009033, "rank": 5, "decoded_token": " sea"}}, {"1513": {"logprob": -1.1504861116409302, "rank": 1, "decoded_token": " at"}, "1435": {"logprob": -1.2754861116409302, "rank": 2, "decoded_token": " as"}, "3184": {"logprob": -1.4004861116409302, "rank": 3, "decoded_token": " during"}, "3016": {"logprob": -2.9004859924316406, "rank": 4, "decoded_token": " while"}, "6117": {"logprob": -3.1504859924316406, "rank": 5, "decoded_token": " near"}}, {"97558": {"logprob": -0.12151996046304703, "rank": 1, "decoded_token": " sunset"}, "11729": {"logprob": -2.8715200424194336, "rank": 2, "decoded_token": " sun"}, "1266": {"logprob": -3.4965200424194336, "rank": 3, "decoded_token": " d"}, "54507": {"logprob": -3.9965200424194336, "rank": 4, "decoded_token": " dawn"}, "1261": {"logprob": -5.121520042419434, "rank": 5, "decoded_token": " a"}}, {"1626": {"logprob": -0.3073118329048157, "rank": 1, "decoded_token": ".\n"}, "1044": {"logprob": -2.182311773300171, "rank": 2, "decoded_token": ","}, "3016": {"logprob": -2.557311773300171, "rank": 3, "decoded_token": " while"}, "1454": {"logprob": -3.432311773300171, "rank": 4, "decoded_token": " with"}, "6117": {"logprob": -4.05731201171875, "rank": 5, "decoded_token": " near"}}, {"1052": {"logprob": -3.3378546504536644e-06, "rank": 1, "decoded_token": "4"}, "1051": {"logprob": -13.25000286102295, "rank": 2, "decoded_token": "3"}, "1049": {"logprob": -13.93750286102295, "rank": 3, "decoded_token": "1"}, "1053": {"logprob": -14.43750286102295, "rank": 4, "decoded_token": "5"}, "1032": {"logprob": -16.687503814697266, "rank": 5, "decoded_token": " "}}, {"1046": {"logprob": -1.6689286894688848e-06, "rank": 1, "decoded_token": "."}, "3590": {"logprob": -13.500001907348633, "rank": 2, "decoded_token": ".A"}, "6847": {"logprob": -16.437501907348633, "rank": 3, "decoded_token": ".T"}, "1044": {"logprob": -17.312501907348633, "rank": 4, "decoded_token": ","}, "1349": {"logprob": -17.375001907348633, "rank": 5, "decoded_token": " A"}}, {"1349": {"logprob": -0.004292916506528854, "rank": 1, "decoded_token": " A"}, "2048": {"logprob": -5.629292964935303, "rank": 2, "decoded_token": " An"}, "10638": {"logprob": -7.879292964935303, "rank": 3, "decoded_token": " Two"}, "111463": {"logprob": -10.004292488098145, "rank": 4, "decoded_token": " Trees"}, "1531": {"logprob": -10.879292488098145, "rank": 5, "decoded_token": " The"}}, {"53301": {"logprob": -1.5473321676254272, "rank": 1, "decoded_token": " winding"}, "15192": {"logprob": -1.7348321676254272, "rank": 2, "decoded_token": " narrow"}, "47945": {"logprob": -2.109832286834717, "rank": 3, "decoded_token": " dirt"}, "2169": {"logprob": -2.609832286834717, "rank": 4, "decoded_token": " ser"}, "59396": {"logprob": -2.672332286834717, "rank": 5, "decoded_token": " gravel"}}, {"59396": {"logprob": -0.8954829573631287, "rank": 1, "decoded_token": " gravel"}, "3549": {"logprob": -1.1454830169677734, "rank": 2, "decoded_token": " path"}, "47945": {"logprob": -1.6454830169677734, "rank": 3, "decoded_token": " dirt"}, "14801": {"logprob": -3.2704830169677734, "rank": 4, "decoded_token": " pathway"}, "15551": {"logprob": -4.270483016967773, "rank": 5, "decoded_token": " stone"}}, {"3549": {"logprob": -0.02117946185171604, "rank": 1, "decoded_token": " path"}, "14801": {"logprob": -3.896179437637329, "rank": 2, "decoded_token": " pathway"}, "33659": {"logprob": -8.14617919921875, "rank": 3, "decoded_token": " trail"}, "9480": {"logprob": -9.64617919921875, "rank": 4, "decoded_token": " road"}, "7368": {"logprob": -9.64617919921875, "rank": 5, "decoded_token": "path"}}, {"13335": {"logprob": -0.18962937593460083, "rank": 1, "decoded_token": " leads"}, "39985": {"logprob": -2.752129316329956, "rank": 2, "decoded_token": " cuts"}, "1639": {"logprob": -3.877129316329956, "rank": 3, "decoded_token": " me"}, "11500": {"logprob": -3.939629316329956, "rank": 4, "decoded_token": " runs"}, "2645": {"logprob": -4.189629554748535, "rank": 5, "decoded_token": " through"}}, {"2645": {"logprob": -0.05349981039762497, "rank": 1, "decoded_token": " through"}, "8994": {"logprob": -4.053499698638916, "rank": 2, "decoded_token": " towards"}, "2396": {"logprob": -4.303499698638916, "rank": 3, "decoded_token": " between"}, "2203": {"logprob": -4.678499698638916, "rank": 4, "decoded_token": " into"}, "1317": {"logprob": -5.678499698638916, "rank": 5, "decoded_token": " to"}}, {"1261": {"logprob": -0.017386287450790405, "rank": 1, "decoded_token": " a"}, "11223": {"logprob": -4.892386436462402, "rank": 2, "decoded_token": " green"}, "1295": {"logprob": -5.017386436462402, "rank": 3, "decoded_token": " l"}, "23170": {"logprob": -6.642386436462402, "rank": 4, "decoded_token": " grass"}, "1420": {"logprob": -7.267386436462402, "rank": 5, "decoded_token": " an"}}, {"1295": {"logprob": -0.9453322887420654, "rank": 1, "decoded_token": " l"}, "11223": {"logprob": -1.3203322887420654, "rank": 2, "decoded_token": " green"}, "23170": {"logprob": -1.9453322887420654, "rank": 3, "decoded_token": " grass"}, "12097": {"logprob": -2.4453322887420654, "rank": 4, "decoded_token": " park"}, "26428": {"logprob": -3.3203322887420654, "rank": 5, "decoded_token": " garden"}}, {"3506": {"logprob": -6.556489552167477e-06, "rank": 1, "decoded_token": "ush"}, "1374": {"logprob": -12.000006675720215, "rank": 2, "decoded_token": "us"}, "90716": {"logprob": -15.625006675720215, "rank": 3, "decoded_token": "USH"}, "16938": {"logprob": -15.875006675720215, "rank": 4, "decoded_token": "usher"}, "13326": {"logprob": -17.1875057220459, "rank": 5, "decoded_token": "inden"}}, {"11223": {"logprob": -0.3668670654296875, "rank": 1, "decoded_token": " green"}, "1044": {"logprob": -1.3668670654296875, "rank": 2, "decoded_token": ","}, "26428": {"logprob": -3.4918670654296875, "rank": 3, "decoded_token": " garden"}, "12097": {"logprob": -4.1168670654296875, "rank": 4, "decoded_token": " park"}, "23170": {"logprob": -5.8668670654296875, "rank": 5, "decoded_token": " grass"}}, {"12097": {"logprob": -0.5530153512954712, "rank": 1, "decoded_token": " park"}, "3727": {"logprob": -2.0530152320861816, "rank": 2, "decoded_token": " field"}, "28035": {"logprob": -2.1780152320861816, "rank": 3, "decoded_token": " landscape"}, "26428": {"logprob": -2.3030152320861816, "rank": 4, "decoded_token": " garden"}, "4457": {"logprob": -2.8030152320861816, "rank": 5, "decoded_token": " area"}}, {"1046": {"logprob": -0.7924000024795532, "rank": 1, "decoded_token": "."}, "1454": {"logprob": -1.2924000024795532, "rank": 2, "decoded_token": " with"}, "8994": {"logprob": -2.7923998832702637, "rank": 3, "decoded_token": " towards"}, "54410": {"logprob": -3.5423998832702637, "rank": 4, "decoded_token": " lined"}, "2425": {"logprob": -3.5423998832702637, "rank": 5, "decoded_token": " under"}}, {"2": {"logprob": -1.9073468138230965e-06, "rank": 1, "decoded_token": "</s>"}, "1032": {"logprob": -13.250001907348633, "rank": 2, "decoded_token": " "}, "1256": {"logprob": -16.250001907348633, "rank": 3, "decoded_token": " "}, "1293": {"logprob": -19.000001907348633, "rank": 4, "decoded_token": " "}, "1319": {"logprob": -20.000001907348633, "rank": 5, "decoded_token": " ("}}]]]
\ No newline at end of file
......@@ -2,7 +2,7 @@
import warnings
from collections.abc import Sequence
from typing import Optional, Union
from typing import Any, Optional, Union
import torch
......@@ -254,9 +254,9 @@ def check_logprobs_close(
def build_model_context(
model_id: str,
task: TaskOption = "auto",
dtype: Optional[Union[str, torch.dtype]] = None,
mm_processor_kwargs: Optional[dict] = None,
limit_mm_per_prompt: Optional[dict] = None,
dtype: Union[str, torch.dtype] = "auto",
mm_processor_kwargs: Optional[dict[str, Any]] = None,
limit_mm_per_prompt: Optional[dict[str, int]] = None,
disable_mm_preprocessor_cache: bool = True,
):
"""Creates an InputContext for a given model.
......@@ -274,9 +274,6 @@ def build_model_context(
model_info.check_available_online(on_fail="skip")
model_info.check_transformers_version(on_fail="skip")
if dtype is None:
dtype = "half"
model_config = ModelConfig(
model_id,
task=task,
......
......@@ -7,19 +7,25 @@ from unittest.mock import MagicMock
import numpy as np
import pytest
import torch
from transformers import ProcessorMixin
from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalFieldElem, MultiModalKwargs,
MultiModalKwargsItem,
MultiModalSharedField)
# yapf conflicts with isort for this block
# yapf: disable
from vllm.multimodal.processing import (PlaceholderFeaturesInfo,
PromptIndexTargets, PromptInsertion,
PromptReplacement, apply_text_matches,
ProcessingCache, PromptIndexTargets,
PromptInsertion, PromptReplacement,
apply_text_matches,
apply_token_matches,
find_mm_placeholders,
find_text_matches, find_token_matches,
iter_token_matches)
iter_token_matches,
replace_token_matches)
# yapf: enable
from vllm.multimodal.profiling import MultiModalProfiler
from vllm.transformers_utils.tokenizer import (AnyTokenizer,
......@@ -89,6 +95,58 @@ def test_iter_token_matches(token_ids, match_ids, expected):
assert all(match_len == len(match_ids) for match_len in match_lens)
# yapf: disable
@pytest.mark.parametrize(
("token_ids", "match_ids", "new_ids", "expected"),
[
([], [], [-1], []),
([], [32000], [-1], []),
(
[32000, 32000, 32000],
[32000],
[-1],
[-1, -1, -1],
),
(
[32000, 32000, 32000],
[32000, 32000],
[-1],
[-1, 32000],
),
(
[32000, 32000, 32000],
[32000, 32000, 32000],
[-1],
[-1],
),
(
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
[28747, 32000],
[-1],
[9833, -1, 32000, 32000, 9833, -1, 32000, 918],
),
(
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
[28747, 32000, 32000, 32000],
[-1],
[9833, -1, 9833, 28747, 32000, 32000, 918],
),
(
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
[28747, 0, 32000],
[-1],
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
),
],
)
# yapf: enable
def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
result = replace_token_matches(token_ids, match_ids, new_ids)
# Manually constructed results
assert result == expected
# yapf: disable
@pytest.mark.parametrize(
("prompt", "target_by_key", "expected_by_key"),
......@@ -837,6 +895,45 @@ def test_find_mm_placeholders(
assert result == expected
def _dummy_elem(modality: str, key: str, size: int):
return MultiModalFieldElem(
modality=modality,
key=key,
data=torch.empty((size, ), dtype=torch.int8),
field=MultiModalSharedField(1),
)
def _dummy_item(modality: str, size_by_key: dict[str, int]):
return MultiModalKwargsItem.from_elems([
_dummy_elem(modality, key, size) for key, size in size_by_key.items()
])
def _dummy_kw(size_by_key_modality: dict[str, dict[str, int]]):
return MultiModalKwargs.from_items([
_dummy_item(modality, size_by_key)
for modality, size_by_key in size_by_key_modality.items()
])
# yapf: disable
@pytest.mark.parametrize(
("item", "expected_size"),
[
(_dummy_item("a", {"a1": 100}), 100),
(_dummy_item("a", {"a1": 100, "a2": 110}), 210),
(_dummy_kw({"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}), 460), # noqa: E501
],
)
# yapf: enable
def test_cache_item_size(item, expected_size):
cache = ProcessingCache.get_lru_cache(2048, type(item))
cache[""] = item
assert cache.currsize == expected_size
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
@pytest.mark.parametrize(
("limit", "num_supported", "is_valid"),
......@@ -853,7 +950,7 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
tokenizer_mode="auto",
trust_remote_code=False,
seed=0,
dtype="half",
dtype="auto",
revision=None,
limit_mm_per_prompt=limit_mm_per_prompt,
)
......@@ -892,7 +989,7 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
tokenizer_mode="auto",
trust_remote_code=False,
seed=0,
dtype="half",
dtype="auto",
revision=None,
limit_mm_per_prompt=limit_mm_per_prompt,
)
......@@ -965,7 +1062,7 @@ def test_hf_processor_kwargs(model_id, call_kwargs, expected_kwargs):
tokenizer_mode="auto",
trust_remote_code=False,
seed=0,
dtype="half",
dtype="auto",
revision=None,
)
......
......@@ -314,7 +314,7 @@ def get_active_block_tables(block_tables, query_lens, seq_lens, block_size,
# Test edge cases
(1, 128, 16, 1024, 4, 2, 16, False), # large decode batch
(16, 4, 8, 8192, 48, 1, 128, True), # large prefill batch
(16, 4, 8, 1024, 4, 2, 128, True), # large prefill batch
(4, 12, 32, 2048, 16, 1, 32, True), # multi-head attention (MHA)
(4, 12, 32, 2048, 16, 16, 32, True), # multi-query attention (MQA)
])
......
......@@ -15,6 +15,8 @@ from ..utils import compare_two_settings, create_new_process_for_each_test
models_4bit_to_test = [
("facebook/opt-125m", "quantize opt model inflight"),
("mistralai/Mistral-7B-Instruct-v0.3",
"quantize inflight model with both HF and Mistral format weights")
]
models_pre_qaunt_4bit_to_test = [
......
......@@ -166,7 +166,7 @@ def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
test_prompts = multilora_inference.create_test_prompts(lora_path)
# Serialize model before deserializing and binding LoRA adapters
with vllm_runner(model_ref, ) as vllm_model:
with vllm_runner(model_ref) as vllm_model:
model_path = tmp_path / (model_ref + ".tensors")
vllm_model.apply_model(
......@@ -208,7 +208,7 @@ def test_load_without_tensorizer_load_format(vllm_runner):
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
## Serialize model
with vllm_runner(model_ref, ) as vllm_model:
with vllm_runner(model_ref) as vllm_model:
model_path = tmp_path / (model_ref + ".tensors")
vllm_model.apply_model(
......
......@@ -34,7 +34,9 @@ with depyf.prepare_debug(temp_dir):
# disable custom dispatcher, let Dynamo takes over
# all the control
llm = LLM(model="google/gemma-2b",
llm = LLM(model="Qwen/Qwen2.5-1.5B-Instruct",
max_model_len=512,
max_num_seqs=64,
enforce_eager=True,
compilation_config={"level": CompilationLevel.DYNAMO_AS_IS})
outputs = llm.generate(prompts, sampling_params)
......@@ -44,38 +46,51 @@ with depyf.prepare_debug(temp_dir):
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
assert generated_text.startswith(answer)
compiled_code = sorted(
compiled_codes = sorted(
glob.glob(os.path.join(temp_dir, "__transformed_code*.py")))
# we should only trigger Dynamo compilation three times:
# one for the profiling phase without kv cache
# one for the prefill phase with symbolic shapes
# one for the decode phase with symbolic shapes
for i, compiled_code in enumerate(compiled_codes):
print("{} file: {}".format(i + 1, compiled_code))
# We should only trigger Dynamo compilation 4 times:
# 1. forward pass (symbolic)
# 2. compute_logits (symbolic)
# 3. forward pass (shape 16)
# 4. forward pass (shape 32)
# and later calls should not trigger Dynamo compilation again.
# NOTE: it might still trigger XLA compilation.
# NOTE: It might still trigger XLA compilation.
# Check we have 4 compiled codes
assert len(compiled_codes) == 4
# check we have three compiled code
# this is the assumption when we use the custom dispatcher
assert len(compiled_code) == 3
kv_cache_prefix = "kv_cache"
attn_prefix = "ragged_paged_attention"
# check all the compilations are as expected
compiled_fn = sorted(
# Check all the compilations are as expected
compiled_fns = sorted(
glob.glob(os.path.join(temp_dir, "__compiled_fn*Captured*.py")))
# the first compilation is the profiling phase,
# it should not have any kv cache
with open(compiled_fn[0]) as f:
for i, compiled_fn in enumerate(compiled_fns):
print("{} file: {}".format(i + 1, compiled_fn))
# The first compilation is symbolic, so it should not have any kv_caches
with open(compiled_fns[0]) as f:
content = f.read()
assert kv_cache_prefix not in content
# The second compilation is symbolic, so it should not have any kv_caches
with open(compiled_fns[1]) as f:
content = f.read()
assert "kv_caches" not in content
assert kv_cache_prefix not in content
# the second compilation is the prefill phase,
# it should have kv cache and the flash_attention op
with open(compiled_fn[1]) as f:
# The third compilation is shape 16, so it should have kv_caches and the
# ragged_paged_attention
with open(compiled_fns[2]) as f:
content = f.read()
assert "kv_caches" in content and "torch.ops.xla.flash_attention" in content
assert (kv_cache_prefix in content and attn_prefix in content)
# the third compilation is the decode phase,
# it should have kv cache and the paged_attention op
with open(compiled_fn[2]) as f:
# The forth compilation is shape 32, so it should have kv_caches and the
# ragged_paged_attention
with open(compiled_fns[3]) as f:
content = f.read()
assert "kv_caches" in content and "torch.ops.xla.paged_attention" in content
assert (kv_cache_prefix in content and attn_prefix in content)
......@@ -14,12 +14,17 @@ from ..utils import compare_two_settings
def test_custom_dispatcher(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as m:
m.setenv("VLLM_RPC_TIMEOUT", "30000")
compare_two_settings(
"google/gemma-2b",
arg1=[
"--enforce-eager",
f"-O{CompilationLevel.DYNAMO_ONCE}",
],
arg2=["--enforce-eager", f"-O{CompilationLevel.DYNAMO_AS_IS}"],
env1={},
env2={})
compare_two_settings("Qwen/Qwen2.5-1.5B-Instruct",
arg1=[
"--max-model-len=256",
"--max-num-seqs=32",
"--enforce-eager",
f"-O{CompilationLevel.DYNAMO_ONCE}",
],
arg2=[
"--max-model-len=256", "--max-num-seqs=32",
"--enforce-eager",
f"-O{CompilationLevel.DYNAMO_AS_IS}"
],
env1={},
env2={})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment