Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
469e903b
Commit
469e903b
authored
Mar 28, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.8.2' into v0.8.2-dev
parents
389ebcf7
25f560a6
Changes
535
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
624 additions
and
359 deletions
+624
-359
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
...els/decoder_only/vision_language/vlm_utils/model_utils.py
+81
-54
tests/models/decoder_only/vision_language/vlm_utils/runners.py
.../models/decoder_only/vision_language/vlm_utils/runners.py
+10
-11
tests/models/decoder_only/vision_language/vlm_utils/types.py
tests/models/decoder_only/vision_language/vlm_utils/types.py
+20
-27
tests/models/embedding/language/test_cls_models.py
tests/models/embedding/language/test_cls_models.py
+15
-9
tests/models/embedding/language/test_embedding.py
tests/models/embedding/language/test_embedding.py
+13
-11
tests/models/embedding/language/test_gritlm.py
tests/models/embedding/language/test_gritlm.py
+56
-47
tests/models/embedding/utils.py
tests/models/embedding/utils.py
+3
-3
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
+34
-30
tests/models/embedding/vision_language/test_llava_next.py
tests/models/embedding/vision_language/test_llava_next.py
+23
-9
tests/models/embedding/vision_language/test_phi3v.py
tests/models/embedding/vision_language/test_phi3v.py
+4
-7
tests/models/encoder_decoder/audio_language/test_whisper.py
tests/models/encoder_decoder/audio_language/test_whisper.py
+2
-2
tests/models/encoder_decoder/language/test_bart.py
tests/models/encoder_decoder/language/test_bart.py
+5
-5
tests/models/encoder_decoder/vision_language/test_florence2.py
.../models/encoder_decoder/vision_language/test_florence2.py
+90
-53
tests/models/encoder_decoder/vision_language/test_mllama.py
tests/models/encoder_decoder/vision_language/test_mllama.py
+67
-30
tests/models/fixtures/mistral_small_3_chat.json
tests/models/fixtures/mistral_small_3_chat.json
+1
-0
tests/models/fixtures/pixtral_chat_engine.json
tests/models/fixtures/pixtral_chat_engine.json
+0
-1
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+187
-41
tests/models/multimodal/processing/test_h2ovl.py
tests/models/multimodal/processing/test_h2ovl.py
+5
-6
tests/models/multimodal/processing/test_idefics3.py
tests/models/multimodal/processing/test_idefics3.py
+3
-7
tests/models/multimodal/processing/test_internvl.py
tests/models/multimodal/processing/test_internvl.py
+5
-6
No files found.
Too many changes to show.
To preserve performance only
535 of 535+
files are displayed.
Plain diff
Email patch
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
View file @
469e903b
...
...
@@ -6,16 +6,15 @@ typically specific to a small subset of models.
import
re
import
types
from
pathlib
import
PosixPath
from
typing
import
Callable
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Optional
,
Union
import
torch
from
PIL.Image
import
Image
from
transformers
import
(
AutoConfig
,
AutoTokenizer
,
Batch
Encoding
,
from
transformers
import
(
AutoConfig
,
AutoTokenizer
,
Batch
Feature
,
GenerationConfig
)
from
vllm.sequence
import
SampleLogprobs
from
vllm.transformers_utils.tokenizer
import
patch_padding_side
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
.....conftest
import
HfRunner
,
ImageAsset
,
_ImageAssets
from
.types
import
RunnerOutput
...
...
@@ -49,7 +48,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
def
qwen_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
...
...
@@ -60,7 +59,7 @@ def qwen_vllm_to_hf_output(
def
qwen2_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
...
...
@@ -78,7 +77,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
def
llava_video_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
config
=
AutoConfig
.
from_pretrained
(
model
)
mm_token_id
=
config
.
video_token_index
return
_llava_vllm_to_hf_output
(
vllm_output
,
model
,
mm_token_id
)
...
...
@@ -211,43 +210,9 @@ def get_llava_embeddings(image_assets: _ImageAssets):
return
[
asset
.
image_embeds
for
asset
in
image_assets
]
####### postprocessors to run on HF BatchEncoding
def
cast_dtype_post_processor
(
hf_inp_key
:
str
)
->
Callable
[[
BatchEncoding
,
str
],
BatchEncoding
]:
"""Gets a handle to a post processor which converts a given key into a
target data type."""
def
process
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
torch_dtype
=
STR_DTYPE_TO_TORCH_DTYPE
[
dtype
]
hf_inputs
[
hf_inp_key
]
=
hf_inputs
[
hf_inp_key
].
to
(
torch_dtype
)
return
hf_inputs
return
process
def
ignore_inputs_post_processor
(
hf_inp_key
:
str
)
->
Callable
[[
BatchEncoding
,
str
],
BatchEncoding
]:
"""Gets a handle to a post processor which ignores a given key."""
def
process
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
del
hf_inputs
[
hf_inp_key
]
return
hf_inputs
return
process
def
wrap_inputs_post_processor
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
return
{
"model_inputs"
:
hf_inputs
}
def
molmo_post_processor
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
hf_inputs
=
cast_dtype_post_processor
(
"images"
)(
hf_inputs
,
dtype
)
return
{
k
:
v
.
unsqueeze
(
0
)
for
k
,
v
in
hf_inputs
.
items
()}
####### Prompt path encoders for models that need models on disk
def
qwen_prompt_path_encoder
(
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
L
ist
[
ImageAsset
],
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
])
->
str
:
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
...
...
@@ -257,7 +222,7 @@ def qwen_prompt_path_encoder(
Args:
tmp_path: Tempdir for test under consideration.
prompt: Prompt with image placeholders.
assets:
L
ist of image assets whose len equals the num placeholders.
assets:
l
ist of image assets whose len equals the num placeholders.
"""
# Ensure that the number of placeholders matches the number of assets;
# If this is not true, the test is probably written incorrectly.
...
...
@@ -295,8 +260,7 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
for
k
in
inputs
.
keys
()
# noqa
if
k
not
in
(
"seq_lens"
,
"sft_format"
)
}
inputs
=
BatchEncoding
(
data
=
inputs
,
tensor_type
=
"pt"
)
return
inputs
return
BatchFeature
(
data
=
inputs
,
tensor_type
=
"pt"
)
hf_model
.
processor
=
processor
hf_model
.
model
.
get_output_embeddings
=
lambda
:
\
...
...
@@ -304,8 +268,20 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return
hf_model
def
glm_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
"""Patches and returns an instance of the HfRunner to use for GLM4."""
def
gemma3_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
"""Patches and returns an instance of the HfRunner to use for Gemma 3."""
hf_processor
=
hf_model
.
processor
def
processor
(
*
args
,
**
kwargs
):
return
hf_processor
(
*
args
,
do_pan_and_scan
=
True
,
**
kwargs
)
hf_model
.
processor
=
processor
return
hf_model
def
glm4v_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
hf_processor
=
hf_model
.
processor
patch_padding_side
(
hf_processor
)
...
...
@@ -313,12 +289,20 @@ def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
if
images
is
None
:
return
hf_processor
(
*
args
,
**
kwargs
)
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
contents
=
re
.
findall
(
r
"<\|begin_of_image\|><\|endoftext\|><\|end_of_image\|>(.*?)<\|assistant\|>"
,
text
,
)
assert
len
(
contents
)
==
len
(
images
)
return
hf_processor
.
apply_chat_template
(
[{
"role"
:
"user"
,
"image"
:
image
s
,
"content"
:
te
x
t
}],
"image"
:
image
,
"content"
:
con
te
n
t
}
for
image
,
content
in
zip
(
images
,
contents
)
],
add_generation_prompt
=
True
,
tokenize
=
True
,
return_dict
=
True
,
...
...
@@ -350,7 +334,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
L
ist
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
l
ist
[
Image
]],
**
kwargs
):
# yapf: disable
from
vllm.model_executor.models.h2ovl
import
(
...
...
@@ -410,7 +394,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
L
ist
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
l
ist
[
Image
]],
**
kwargs
):
from
vllm.model_executor.models.internvl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
...
...
@@ -509,10 +493,52 @@ def mantis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return
hf_model
def
minicpm
o
_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
def
minicpm
v_25
_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
orig_generate
=
hf_model
.
model
.
generate
def
_generate
(
self
,
*
args
,
**
kwargs
):
def
_generate
(
self
,
*
args
,
input_ids
=
None
,
pixel_values
=
None
,
image_sizes
=
None
,
image_bound
=
None
,
tgt_sizes
=
None
,
**
kwargs
,
):
model_inputs
=
{
"input_ids"
:
input_ids
,
"pixel_values"
:
pixel_values
,
"image_sizes"
:
image_sizes
,
"image_bound"
:
image_bound
,
"tgt_sizes"
:
tgt_sizes
,
}
for
k
in
list
(
model_inputs
.
keys
()):
if
model_inputs
[
k
]
is
None
:
model_inputs
.
pop
(
k
)
return
orig_generate
(
model_inputs
,
*
args
,
decode_text
=
False
,
**
kwargs
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
return
hf_model
def
minicpmo_26_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
orig_generate
=
hf_model
.
model
.
generate
def
_generate
(
self
,
*
args
,
image_sizes
=
None
,
**
kwargs
):
return
orig_generate
(
*
args
,
decode_text
=
False
,
**
kwargs
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
return
hf_model
def
minicpmv_26_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
orig_generate
=
hf_model
.
model
.
generate
def
_generate
(
self
,
*
args
,
image_sizes
=
None
,
**
kwargs
):
return
orig_generate
(
*
args
,
decode_text
=
False
,
**
kwargs
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
...
...
@@ -531,10 +557,11 @@ def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
_generate
(
self
,
max_new_tokens
=
None
,
do_sample
=
None
,
**
kwargs
):
batch
=
{
k
:
kwargs
.
pop
(
k
)
k
:
kwargs
.
pop
(
k
)
.
unsqueeze
(
0
)
for
k
in
(
"input_ids"
,
"images"
,
"image_input_idx"
,
"image_masks"
)
if
k
in
kwargs
}
batch
=
BatchFeature
(
batch
).
to
(
dtype
=
self
.
dtype
)
return
self
.
generate_from_batch
(
batch
,
...
...
tests/models/decoder_only/vision_language/vlm_utils/runners.py
View file @
469e903b
...
...
@@ -3,7 +3,6 @@
types / modalities.
"""
from
pathlib
import
PosixPath
from
typing
import
Type
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.
import
builders
,
core
...
...
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
####### Entrypoints for running different test types
def
run_single_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
...
...
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_multi_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
...
...
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_embedding_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
...
...
@@ -85,8 +84,8 @@ def run_video_test(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
video_assets
:
_VideoAssets
,
):
assert
test_case
.
size_wrapper
is
not
None
...
...
@@ -111,8 +110,8 @@ def run_video_test(
def
run_custom_inputs_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
]):
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
]):
# Custom test cases can provide inputs directly, but they need to
# explicitly provided a CustomTestConfig, which wraps the inputs and
# the limit_mm_per_prompt
...
...
tests/models/decoder_only/vision_language/vlm_utils/types.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
"""Types for writing multimodal model tests."""
from
collections.abc
import
Iterable
from
enum
import
Enum
from
pathlib
import
PosixPath
from
typing
import
(
Any
,
Callable
,
Dict
,
Iterable
,
List
,
NamedTuple
,
Optional
,
Tuple
,
Type
,
Union
)
from
typing
import
Any
,
Callable
,
NamedTuple
,
Optional
,
Union
import
torch
from
PIL.Image
import
Image
from
pytest
import
MarkDecorator
from
transformers
import
AutoModelForCausalLM
,
BatchEncoding
from
transformers
import
AutoModelForCausalLM
from
transformers.models.auto.auto_factory
import
_BaseAutoModelClass
from
vllm.config
import
TaskOption
from
vllm.sequence
import
SampleLogprobs
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.utils
import
identity
from
.....conftest
import
IMAGE_ASSETS
,
HfRunner
,
ImageAsset
,
_ImageAssets
from
....utils
import
check_logprobs_close
...
...
@@ -35,7 +34,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.25
,
0.5
,
1.0
)]
EMBEDDING_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
)]
RunnerOutput
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
RunnerOutput
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
# yapf: enable
...
...
@@ -53,8 +52,8 @@ class SizeType(Enum):
class
CustomTestOptions
(
NamedTuple
):
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
L
ist
[
Union
[
L
ist
[
Image
],
Image
]]]]
limit_mm_per_prompt
:
D
ict
[
str
,
int
]
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
l
ist
[
Union
[
l
ist
[
Image
],
Image
]]]]
limit_mm_per_prompt
:
d
ict
[
str
,
int
]
# kwarg to pass multimodal data in as to vllm/hf runner instances.
runner_mm_key
:
str
=
"images"
...
...
@@ -63,13 +62,13 @@ class ImageSizeWrapper(NamedTuple):
type
:
SizeType
# A size factor is a wrapper of 0+ floats,
# while a fixed size contains an iterable of integer pairs
data
:
Union
[
Iterable
[
float
],
Iterable
[
T
uple
[
int
,
int
]]]
data
:
Union
[
Iterable
[
float
],
Iterable
[
t
uple
[
int
,
int
]]]
class
VLMTestInfo
(
NamedTuple
):
"""Holds the configuration for 1+ tests for one model architecture."""
models
:
L
ist
[
str
]
models
:
l
ist
[
str
]
test_type
:
Union
[
VLMTestType
,
Iterable
[
VLMTestType
]]
# Should be None only if this is a CUSTOM_INPUTS test
...
...
@@ -97,24 +96,19 @@ class VLMTestInfo(NamedTuple):
max_num_seqs
:
int
=
256
task
:
TaskOption
=
"auto"
tensor_parallel_size
:
int
=
1
vllm_runner_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
vllm_runner_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Optional callable which gets a list of token IDs from the model tokenizer
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]]
=
None
# Optional list of strings to stop generation, useful when stop tokens are
# not special tokens in the tokenizer
stop_str
:
Optional
[
L
ist
[
str
]]
=
None
stop_str
:
Optional
[
l
ist
[
str
]]
=
None
# Exposed options for HF runner
hf_model_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
hf_model_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Indicates we should explicitly pass the EOS from the tokenizer
use_tokenizer_eos
:
bool
=
False
auto_cls
:
Type
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
# Callable to pass to the HF runner to run on inputs; for now, we also pass
# the data type to input post processing, because almost all of the uses of
# postprocess_inputs are to fix the data types of BatchEncoding values.
postprocess_inputs
:
Callable
[[
BatchEncoding
,
str
],
BatchEncoding
]
=
identity
auto_cls
:
type
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
patch_hf_runner
:
Optional
[
Callable
[[
HfRunner
],
HfRunner
]]
=
None
# Post processors that if defined, will run oun the outputs of the
...
...
@@ -128,12 +122,12 @@ class VLMTestInfo(NamedTuple):
# Default expandable params per test; these defaults can be overridden in
# instances of this object; the complete set of test cases for the model
# is all combinations of .models + all fields below
max_tokens
:
Union
[
int
,
T
uple
[
int
]]
=
128
num_logprobs
:
Union
[
int
,
T
uple
[
int
]]
=
5
dtype
:
Union
[
str
,
Iterable
[
str
]]
=
"
half
"
max_tokens
:
Union
[
int
,
t
uple
[
int
]]
=
128
num_logprobs
:
Union
[
int
,
t
uple
[
int
]]
=
5
dtype
:
Union
[
str
,
Union
[
list
[
str
],
tuple
[
str
,
...]
]]
=
"
auto
"
distributed_executor_backend
:
Optional
[
Union
[
str
,
Iterable
[
str
]]]
=
None
# Only expanded in video tests
num_video_frames
:
Union
[
int
,
T
uple
[
int
]]
=
16
num_video_frames
:
Union
[
int
,
t
uple
[
int
]]
=
16
# Fixed image sizes / image size factors; most tests use image_size_factors
# The values provided for these two fields will be stacked and expanded
...
...
@@ -141,19 +135,19 @@ class VLMTestInfo(NamedTuple):
# once per tests (much like concatenating and wrapping in one parametrize
# call)
image_size_factors
:
Iterable
[
Iterable
[
float
]]
=
IMAGE_SIZE_FACTORS
image_sizes
:
Optional
[
Iterable
[
Iterable
[
T
uple
[
int
,
int
]]]]
=
None
image_sizes
:
Optional
[
Iterable
[
Iterable
[
t
uple
[
int
,
int
]]]]
=
None
# Hack for updating a prompt to take into a local path; currently only used
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for HF runner
prompt_path_encoder
:
Optional
[
Callable
[[
PosixPath
,
str
,
Union
[
L
ist
[
ImageAsset
],
_ImageAssets
]],
Callable
[[
PosixPath
,
str
,
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
]],
str
]]
=
None
# noqa: E501
# Allows configuring a test to run with custom inputs
custom_test_opts
:
Optional
[
L
ist
[
CustomTestOptions
]]
=
None
custom_test_opts
:
Optional
[
l
ist
[
CustomTestOptions
]]
=
None
marks
:
Optional
[
L
ist
[
MarkDecorator
]]
=
None
marks
:
Optional
[
l
ist
[
MarkDecorator
]]
=
None
def
get_non_parametrized_runner_kwargs
(
self
):
"""Returns a dictionary of expandable kwargs for items that are used
...
...
@@ -171,7 +165,6 @@ class VLMTestInfo(NamedTuple):
"vllm_output_post_proc"
:
self
.
vllm_output_post_proc
,
"auto_cls"
:
self
.
auto_cls
,
"use_tokenizer_eos"
:
self
.
use_tokenizer_eos
,
"postprocess_inputs"
:
self
.
postprocess_inputs
,
"comparator"
:
self
.
comparator
,
"get_stop_token_ids"
:
self
.
get_stop_token_ids
,
"hf_model_kwargs"
:
self
.
hf_model_kwargs
,
...
...
tests/models/embedding/language/test_cls_models.py
View file @
469e903b
...
...
@@ -9,6 +9,8 @@ import torch
from
transformers
import
AutoModelForSequenceClassification
from
....utils
import
models_path_prefix
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
parametrize
(
"model"
,
...
...
@@ -17,24 +19,24 @@ from ....utils import models_path_prefix
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
],
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
]
if
current_platform
.
is_rocm
()
else
[
"float"
])
def
test_classification_models
(
hf_runner
,
vllm_runner
,
example_prompts
,
model
:
str
,
dtype
:
str
,
monkeypatch
,
)
->
None
:
if
current_platform
.
is_rocm
():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend
monkeypatch
.
setenv
(
"VLLM_USE_TRITON_FLASH_ATTN"
,
"False"
)
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
classify
(
example_prompts
)
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
def
print_model
(
model
):
print
(
model
)
vllm_model
.
apply_model
(
print_model
)
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForSequenceClassification
)
as
hf_model
:
...
...
@@ -45,4 +47,8 @@ def test_classification_models(
hf_output
=
torch
.
tensor
(
hf_output
)
vllm_output
=
torch
.
tensor
(
vllm_output
)
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1e-3
)
# the tolerance value of 1e-2 is selected based on the
# half datatype tests in
# tests/models/embedding/language/test_embedding.py
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1e-3
if
dtype
==
"float"
else
1e-2
)
tests/models/embedding/language/test_embedding.py
View file @
469e903b
...
...
@@ -7,10 +7,11 @@ import os
import
pytest
from
vllm.config
import
PoolerConfig
from
....utils
import
models_path_prefix
from
vllm.platforms
import
current_platform
from
..utils
import
check_embeddings_close
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
parametrize
(
...
...
@@ -21,15 +22,15 @@ from vllm.platforms import current_platform
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/all-MiniLM-L12-v2"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/multilingual-e5-large"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
)),
# [Decoder-only]
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/bge-multilingual-gemma2"
),
marks
=
[
pytest
.
mark
.
core_model
]),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/e5-mistral-7b-instruct"
),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
)),
# [
Encoder-de
coder]
# [
Cross-En
coder]
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/stsb-roberta-base-v2"
)),
],
)
...
...
@@ -44,13 +45,21 @@ def test_models(
example_prompts
,
model
,
dtype
:
str
,
monkeypatch
,
)
->
None
:
if
model
==
"BAAI/bge-multilingual-gemma2"
and
current_platform
.
is_rocm
():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend
monkeypatch
.
setenv
(
"VLLM_USE_TRITON_FLASH_ATTN"
,
"False"
)
vllm_extra_kwargs
=
{}
if
model
==
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
):
vllm_extra_kwargs
[
"override_pooler_config"
]
=
\
PoolerConfig
(
pooling_type
=
"MEAN"
)
if
model
==
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
):
vllm_extra_kwargs
[
"hf_overrides"
]
=
{
"is_causal"
:
Fals
e
}
vllm_extra_kwargs
[
"hf_overrides"
]
=
{
"is_causal"
:
Tru
e
}
# The example_prompts has ending "\n", for example:
# "Write a short story about a robot that dreams for the first time.\n"
...
...
@@ -71,13 +80,6 @@ def test_models(
**
vllm_extra_kwargs
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
encode
(
example_prompts
)
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
def
print_model
(
model
):
print
(
model
)
vllm_model
.
apply_model
(
print_model
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
...
...
tests/models/embedding/language/test_gritlm.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
__future__
import
annotations
import
importlib.util
import
math
from
array
import
array
from
typing
import
List
import
os
import
openai
...
...
@@ -13,13 +13,14 @@ from scipy.spatial.distance import cosine
import
vllm
import
vllm.config
from
vllm.utils
import
STR_BACKEND_ENV_VAR
from
....utils
import
RemoteOpenAIServer
from
....utils
import
models_path_prefix
# GritLM embedding implementation is only supported by XFormers backend.
pytest
.
mark
.
skipif
(
not
importlib
.
util
.
find_spec
(
"xformers"
),
reason
=
"GritLM requires XFormers"
)
pytestmark
=
pytest
.
mark
.
skipif
(
not
importlib
.
util
.
find_spec
(
"xformers"
),
reason
=
"GritLM requires XFormers"
)
MODEL_NAME
=
os
.
path
.
join
(
models_path_prefix
,
"parasail-ai/GritLM-7B-vllm"
)
MAX_MODEL_LEN
=
4000
...
...
@@ -32,36 +33,34 @@ def _arr(arr):
return
array
(
"i"
,
arr
)
def
test_find_array
(
monkeypatch
):
def
test_find_array
(
monkeypatch
:
pytest
.
MonkeyPatch
):
# GritLM embedding implementation is only supported by XFormers backend.
monkeypatch
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
STR_BACKEND_ENV_VAR
,
"XFORMERS"
)
from
vllm.model_executor.models.gritlm
import
GritLMPooler
from
vllm.model_executor.models.gritlm
import
GritLMPooler
# Create an LLM object to get the model config.
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
pooler
=
GritLMPooler
(
model_config
=
llm
.
llm_engine
.
model_config
)
# Create an LLM object to get the model config.
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
pooler
=
GritLMPooler
(
model_config
=
llm
.
llm_engine
.
model_config
)
arr
=
_arr
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
])
arr
=
_arr
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
])
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
0
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
1
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
5
)
==
-
1
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
5
]),
start_idx
=
0
)
==
-
1
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
0
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
1
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
5
)
==
-
1
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
5
]),
start_idx
=
0
)
==
-
1
with
pytest
.
raises
(
ValueError
):
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=-
1
)
with
pytest
.
raises
(
ValueError
):
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=-
1
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
server_embedding
():
# GritLM embedding implementation is only supported by XFormers backend.
with
pytest
.
MonkeyPatch
.
context
()
as
mp
:
mp
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
args
=
[
"--task"
,
"embed"
,
"--max_model_len"
,
str
(
MAX_MODEL_LEN
)]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
args
=
[
"--task"
,
"embed"
,
"--max_model_len"
,
str
(
MAX_MODEL_LEN
)]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
@@ -72,9 +71,12 @@ def server_generate():
@
pytest_asyncio
.
fixture
async
def
client_embedding
(
server_embedding
:
RemoteOpenAIServer
):
async
with
server_embedding
.
get_async_client
()
as
async_client
:
yield
async_client
async
def
client_embedding
(
monkeypatch
:
pytest
.
MonkeyPatch
,
server_embedding
:
RemoteOpenAIServer
):
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
async
with
server_embedding
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest_asyncio
.
fixture
...
...
@@ -83,14 +85,20 @@ async def client_generate(server_generate: RemoteOpenAIServer):
yield
async_client
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
List
[
str
],
instruction
:
str
)
->
List
[
float
]:
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
list
[
str
],
instruction
:
str
,
)
->
list
[
float
]:
outputs
=
llm
.
encode
([
instruction
+
q
for
q
in
queries
],
)
return
[
output
.
outputs
.
embedding
for
output
in
outputs
]
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
List
[
str
],
instruction
:
str
)
->
List
[
float
]:
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
list
[
str
],
instruction
:
str
,
)
->
list
[
float
]:
outputs
=
await
client
.
embeddings
.
create
(
model
=
MODEL_NAME
,
input
=
[
instruction
+
q
for
q
in
queries
],
...
...
@@ -109,7 +117,7 @@ def get_test_data():
README.md in https://github.com/ContextualAI/gritlm
"""
q_instruction
=
gritlm_instruction
(
"Given a scientific paper title, retrieve the paper's abstract"
)
"Given a scientific paper title, retrieve the paper's abstract"
,
)
queries
=
[
"Bitcoin: A Peer-to-Peer Electronic Cash System"
,
"Generative Representational Instruction Tuning"
,
...
...
@@ -125,7 +133,7 @@ def get_test_data():
return
queries
,
q_instruction
,
documents
,
d_instruction
def
validate_embed_output
(
q_rep
:
L
ist
[
float
],
d_rep
:
L
ist
[
float
]):
def
validate_embed_output
(
q_rep
:
l
ist
[
float
],
d_rep
:
l
ist
[
float
]):
cosine_sim_q0_d0
=
1
-
cosine
(
q_rep
[
0
],
d_rep
[
0
])
assert
math
.
isclose
(
cosine_sim_q0_d0
,
0.609
,
abs_tol
=
0.001
)
...
...
@@ -139,31 +147,32 @@ def validate_embed_output(q_rep: List[float], d_rep: List[float]):
assert
math
.
isclose
(
cosine_sim_q1_d1
,
0.532
,
abs_tol
=
0.001
)
def
test_gritlm_offline_embedding
(
monkeypatch
):
def
test_gritlm_offline_embedding
(
monkeypatch
:
pytest
.
MonkeyPatch
):
# GritLM embedding implementation is only supported by XFormers backend.
monkeypatch
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
STR_BACKEND_ENV_VAR
,
"XFORMERS"
)
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
d_rep
=
run_llm_encode
(
llm
,
documents
,
d_instruction
,
)
q_rep
=
run_llm_encode
(
llm
,
queries
,
q_instruction
,
)
d_rep
=
run_llm_encode
(
llm
,
documents
,
d_instruction
,
)
q_rep
=
run_llm_encode
(
llm
,
queries
,
q_instruction
,
)
validate_embed_output
(
q_rep
,
d_rep
)
validate_embed_output
(
q_rep
,
d_rep
)
@
pytest
.
mark
.
asyncio
async
def
test_gritlm_api_server_embedding
(
client_embedding
:
openai
.
AsyncOpenAI
):
client_embedding
:
openai
.
AsyncOpenAI
,
):
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
d_rep
=
await
run_client_embeddings
(
...
...
tests/models/embedding/utils.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Sequence
from
collections.abc
import
Sequence
import
torch
import
torch.nn.functional
as
F
...
...
@@ -8,8 +8,8 @@ import torch.nn.functional as F
def
check_embeddings_close
(
*
,
embeddings_0_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_0_lst
:
Sequence
[
l
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
l
ist
[
float
]],
name_0
:
str
,
name_1
:
str
,
tol
:
float
=
1e-3
,
...
...
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
functools
import
partial
from
typing
import
Callable
,
Dict
,
List
,
Type
from
typing
import
Callable
import
os
import
pytest
import
torch
import
torch.nn.functional
as
F
from
PIL
import
Image
from
transformers
import
BatchEncoding
,
Qwen2VLForConditionalGeneration
from
transformers
import
Qwen2VLForConditionalGeneration
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
from
....utils
import
large_gpu_test
,
models_path_prefix
...
...
@@ -68,7 +68,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):
def
apply_chat_template_and_add_eos
(
messages
:
L
ist
[
D
ict
],
messages
:
l
ist
[
d
ict
],
apply_chat_template_fn
:
Callable
,
):
prompt
=
apply_chat_template_fn
(
...
...
@@ -76,16 +76,12 @@ def apply_chat_template_and_add_eos(
return
prompt
def
postprocess_inputs
(
hf_model
:
HfRunner
,
inputs
:
BatchEncoding
,
**
kwargs
):
return
hf_model
.
model
.
prepare_inputs_for_generation
(
**
inputs
,
**
kwargs
)
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
embed_texts
:
L
ist
[
bool
],
embed_texts
:
l
ist
[
bool
],
model
:
str
,
*
,
dtype
:
str
,
...
...
@@ -119,14 +115,8 @@ def _run_test(
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
Qwen2VLForConditionalGeneration
)
as
hf_model
:
hf_model
.
postprocess_inputs
=
partial
(
postprocess_inputs
,
hf_model
,
cache_position
=
torch
.
arange
(
0
,
1
,
# 1 for batch size
requires_grad
=
False
),
use_cache
=
False
)
prompts
=
[]
for
text
,
image
,
embed_text
in
zip
(
input_texts
,
input_images
,
embed_texts
):
# dse requires non-standard input processing
...
...
@@ -134,20 +124,34 @@ def _run_test(
messages
=
get_messages
(
image
,
text
,
embed_text
)
prompt
=
apply_chat_template_and_add_eos
(
messages
,
hf_model
.
processor
.
apply_chat_template
)
inputs
=
hf_model
.
get_inputs
(
prompts
=
[[
prompt
]],
images
=
[[
image
]],
)
with
torch
.
no_grad
():
prompts
.
append
(
prompt
)
all_inputs
=
hf_model
.
get_inputs
(
prompts
=
prompts
,
images
=
input_images
,
)
with
torch
.
no_grad
():
all_outputs
=
[]
for
inputs
in
all_inputs
:
inputs
=
hf_model
.
model
.
prepare_inputs_for_generation
(
**
inputs
,
cache_position
=
torch
.
arange
(
1
),
# 1 for batch size
use_cache
=
False
,
)
outputs
=
hf_model
.
model
(
**
hf_model
.
wrap_device
(
inputs
[
0
],
device
=
hf_model
.
model
.
device
.
type
),
**
hf_model
.
wrap_device
(
inputs
),
return_dict
=
True
,
output_hidden_states
=
True
,
)
pooled_output
=
torch
.
nn
.
functional
.
normalize
(
outputs
.
hidden_states
[
-
1
][
0
,
-
1
],
p
=
2
,
dim
=-
1
)
hf_outputs
.
append
(
pooled_output
.
tolist
())
pooled_output
=
F
.
normalize
(
outputs
.
hidden_states
[
-
1
][
0
,
-
1
],
p
=
2
,
dim
=-
1
)
all_outputs
.
append
(
pooled_output
.
tolist
())
hf_outputs
=
all_outputs
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
...
...
tests/models/embedding/vision_language/test_llava_next.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
os
import
pytest
import
torch.nn.functional
as
F
from
transformers
import
AutoModelForVision2Seq
from
transformers
import
AutoModelForImageTextToText
from
vllm.platforms
import
current_platform
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
from
....utils
import
large_gpu_test
,
models_path_prefix
from
..utils
import
check_embeddings_close
# Llava Next embedding implementation is only supported by CUDA.
# If run on ROCm, hf_model.model.resize_token_embeddings will
# cause the following error:
# RuntimeError: Calling torch.linalg.cholesky on a CUDA tensor
# requires compiling PyTorch with MAGMA. Please use PyTorch
# built with MAGMA support.
# If run on CPU, hf_model.model.resize_token_embeddings will
# cause the following error:
# RuntimeError: Calling torch.linalg.cholesky on a CPU tensor
# requires compiling PyTorch with LAPACK. Please use PyTorch
# built with LAPACK support.
pytestmark
=
pytest
.
mark
.
skipif
(
not
current_platform
.
is_cuda
(),
reason
=
"Llava Next model uses op that is only supported in CUDA"
)
llama3_template
=
'<|start_header_id|>user<|end_header_id|>
\n\n
{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
\n\n
\n
'
# noqa: E501
HF_TEXT_PROMPTS
=
[
...
...
@@ -36,9 +51,9 @@ MODELS = [os.path.join(models_path_prefix, "royokong/e5-v")]
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
model
:
str
,
*
,
...
...
@@ -56,7 +71,7 @@ def _run_test(
vllm_outputs
=
vllm_model
.
encode
(
input_texts
,
images
=
input_images
)
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelFor
Vision2Seq
)
as
hf_model
:
auto_cls
=
AutoModelFor
ImageTextToText
)
as
hf_model
:
# Patch the issue where generation_config.json is missing
hf_model
.
processor
.
patch_size
=
\
hf_model
.
model
.
config
.
vision_config
.
patch_size
...
...
@@ -72,8 +87,7 @@ def _run_test(
for
inputs
in
all_inputs
:
# Based on: https://huggingface.co/royokong/e5-v
outputs
=
hf_model
.
model
(
**
hf_model
.
wrap_device
(
inputs
,
device
=
hf_model
.
model
.
device
.
type
),
**
hf_model
.
wrap_device
(
inputs
),
return_dict
=
True
,
output_hidden_states
=
True
,
)
...
...
tests/models/embedding/vision_language/test_phi3v.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
os
import
pytest
import
torch.nn.functional
as
F
...
...
@@ -30,9 +28,9 @@ MODELS = [os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full")]
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
model
:
str
,
*
,
...
...
@@ -56,8 +54,7 @@ def _run_test(
for
inputs
in
all_inputs
:
# Based on: https://github.com/TIGER-AI-Lab/VLM2Vec/blob/db3b951bccabba220c1f53ab46a734e50dd2fc08/src/model.py
outputs
=
hf_model
.
model
(
**
hf_model
.
wrap_device
(
inputs
,
device
=
hf_model
.
model
.
device
.
type
),
**
hf_model
.
wrap_device
(
inputs
),
return_dict
=
True
,
output_hidden_states
=
True
,
)
...
...
tests/models/encoder_decoder/audio_language/test_whisper.py
View file @
469e903b
...
...
@@ -10,7 +10,7 @@ import pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm.assets.audio
import
AudioAsset
from
....utils
import
fork
_new_process_for_each_test
,
multi_gpu_test
from
....utils
import
create
_new_process_for_each_test
,
multi_gpu_test
PROMPTS
=
[
{
...
...
@@ -119,7 +119,7 @@ def run_test(
assert
output
.
outputs
[
0
].
text
==
expected
@
fork
_new_process_for_each_test
@
create
_new_process_for_each_test
()
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-small"
,
"openai/whisper-large-v3-turbo"
])
...
...
tests/models/encoder_decoder/language/test_bart.py
View file @
469e903b
...
...
@@ -4,7 +4,7 @@
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
"""
import
os
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
pytest
from
transformers
import
AutoModelForSeq2SeqLM
...
...
@@ -19,7 +19,7 @@ from ....utils import models_path_prefix
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
decoder_prompt_type
:
DecoderPromptType
,
):
"""Sanitize vllm output to be comparable with hf output."""
...
...
@@ -33,9 +33,9 @@ def vllm_to_hf_output(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
prompts
:
L
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts
:
l
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
decoder_prompt_type
:
DecoderPromptType
,
model
:
str
,
*
,
...
...
tests/models/encoder_decoder/vision_language/test_florence2.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
functools
import
partial
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
os
import
pytest
from
PIL
import
Image
from
vllm.inputs.data
import
ExplicitEncoderDecoderPrompt
from
vllm.inputs.data
import
ExplicitEncoderDecoderPrompt
,
TextPrompt
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
....conftest
import
HfRunner
,
VllmRunner
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
...utils
import
check_logprobs_close
from
....utils
import
models_path_prefix
Florence2Prompt
=
partial
(
ExplicitEncoderDecoderPrompt
,
decoder_prompt
=
None
,
mm_processor_kwargs
=
None
)
MODELS
=
[
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Florence-2-base"
)]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
TOKENIZER
=
os
.
path
.
join
(
models_path_prefix
,
"facebook/bart-base"
)
PROMPTS
=
[
Florence2Prompt
(
encoder_prompt
=
"<CAPTION>"
),
Florence2Prompt
(
encoder_prompt
=
"<DETAILED_CAPTION>"
),
Florence2Prompt
(
encoder_prompt
=
"<MORE_DETAILED_CAPTION>"
),
Florence2Prompt
(
encoder_prompt
=
"<CAPTION_TO_PHRASE_GROUNDING>"
),
Florence2Prompt
(
encoder_prompt
=
"<DENSE_REGION_CAPTION>"
),
Florence2Prompt
(
encoder_prompt
=
"<REGION_PROPOSAL>"
),
Florence2Prompt
(
encoder_prompt
=
"<OCR_WITH_REGION>"
),
Florence2Prompt
(
encoder_prompt
=
"<OCR>"
),
Florence2Prompt
(
encoder_prompt
=
"<OD>"
),
]
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"<CAPTION>"
,
# special task token
"cherry_blossom"
:
"Describe in detail what is shown in the image."
,
})
def
get_hf_images_prompts
(
prompts_
:
list
[
ExplicitEncoderDecoderPrompt
[
str
,
TextPrompt
]],
)
->
tuple
[
list
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
list
[
Image
.
Image
]]:
prompts
,
images
=
[],
[]
for
prompt
in
prompts_
:
encoder_prompt
=
prompt
[
"encoder_prompt"
]
prompts
.
append
(
ExplicitEncoderDecoderPrompt
(
encoder_prompt
=
encoder_prompt
[
"prompt"
],
decoder_prompt
=
None
,
))
images
.
append
(
encoder_prompt
[
"multi_modal_data"
][
"image"
])
return
prompts
,
images
def
vllm_to_hf_output
(
vllm_output
:
Tuple
[
List
[
int
],
str
,
Optional
[
SampleLogprobs
]],
):
"""Sanitize vllm output to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
hf_output_str
=
"</s><s>"
+
output_str
+
"</s>"
def
hf_to_vllm_output
(
hf_output
:
tuple
[
list
[
int
],
str
,
Optional
[
SampleLogprobs
]]):
"""Sanitize hf output to be comparable with vllm output."""
output_ids
,
output_str
,
out_logprobs
=
hf_output
return
output_ids
,
hf_output_str
,
out_logprobs
output_str
=
output_str
.
replace
(
"</s>"
,
""
).
replace
(
"<s>"
,
""
)
output_ids
=
[
ids
for
ids
in
output_ids
if
ids
not
in
[
0
,
2
]]
return
output_ids
,
output_str
,
out_logprobs
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
prompts
:
L
ist
[
ExplicitEncoderDecoderPrompt
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
list
[
l
ist
[
ExplicitEncoderDecoderPrompt
]
]
,
model
:
str
,
*
,
dtype
:
str
,
...
...
@@ -58,46 +65,76 @@ def run_test(
distributed_executor_backend
:
Optional
[
str
]
=
None
,
)
->
None
:
with
vllm_runner
(
model
,
max_num_seqs
=
8
,
tokenizer_name
=
TOKENIZER
,
dtype
=
dtype
,
tensor_parallel_size
=
tensor_parallel_size
,
distributed_executor_backend
=
distributed_executor_backend
,
enforce_eager
=
True
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_encoder_decoder_greedy_logprobs
(
prompts
,
max_tokens
,
num_logprobs
)
vllm_outputs_per_case
=
[
vllm_model
.
generate_encoder_decoder_greedy_logprobs
(
prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
)
for
prompts
in
inputs
]
hf_inputs
=
[
get_hf_images_prompts
(
prompts
)
for
prompts
in
inputs
]
# Florence-2 processors require image inputs
dummy_image
=
Image
.
new
(
mode
=
"RGB"
,
size
=
(
2
,
2
))
with
hf_runner
(
model
,
dtype
=
dtype
,
skip_tokenizer_init
=
True
)
as
hf_model
:
hf_model
.
model
.
get_output_embeddings
=
lambda
:
\
hf_model
.
model
.
language_model
.
lm_head
hf_outputs
=
(
hf_model
.
generate_encoder_decoder_greedy_logprobs_limit
(
prompts
,
max_tokens
,
num_logprobs
,
images
=
[
dummy_image
]
*
len
(
prompts
),
))
check_logprobs_close
(
outputs_0_lst
=
hf_outputs
,
outputs_
1
_lst
=
[
vllm_to_hf_output
(
vllm_output
)
for
vllm_output
in
vllm_outputs
]
,
name_
0
=
"
hf
"
,
name_1
=
"vllm"
,
)
hf_outputs
_per_case
=
[
hf_model
.
generate_encoder_decoder_greedy_logprobs_limit
(
prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
,
images
=
images
)
for
prompts
,
images
in
hf_inputs
]
for
hf_outputs
,
vllm_outputs
in
zip
(
hf_outputs_per_case
,
vllm_outputs_per_case
):
check_logprobs_close
(
outputs_
0
_lst
=
[
hf_to_vllm_output
(
output
)
for
output
in
hf_outputs
],
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_
1
=
"
vllm
"
,
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
,
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
[
# No image
[],
# Single-scale
[
1.0
],
# Single-scale, batched
[
1.0
,
1.0
,
1.0
],
# Multi-scale
[
0.25
,
0.5
,
1.0
],
],
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
,
vllm_runner
,
model
,
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
def
test_models
(
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
inputs_per_image
=
[[
ExplicitEncoderDecoderPrompt
(
encoder_prompt
=
TextPrompt
(
prompt
=
prompt
,
multi_modal_data
=
{
"image"
:
rescale_image_size
(
image
,
factor
)}),
decoder_prompt
=
None
,
)
for
factor
in
size_factors
]
for
image
,
prompt
in
zip
(
images
,
HF_IMAGE_PROMPTS
)]
run_test
(
hf_runner
,
vllm_runner
,
PROMPTS
,
inputs_per_image
,
model
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
...
...
tests/models/encoder_decoder/vision_language/test_mllama.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Type
,
overload
from
typing
import
Optional
,
overload
import
os
import
pytest
import
torch
from
transformers
import
(
AutoConfig
,
AutoModelForVision2Seq
,
AutoTokenizer
,
BatchEncoding
)
from
transformers
import
AutoConfig
,
AutoModelForImageTextToText
,
AutoTokenizer
from
vllm
import
LLM
,
SamplingParams
from
vllm.attention.backends.flash_attn
import
FlashAttentionMetadata
...
...
@@ -18,6 +17,7 @@ from vllm.sequence import SampleLogprobs
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
from
....quantization.utils
import
is_quant_method_supported
from
....utils
import
large_gpu_test
from
...utils
import
check_logprobs_close
from
....utils
import
models_path_prefix
...
...
@@ -66,7 +66,7 @@ prompt_data = {
}
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
def
vllm_to_hf_output
(
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
model
:
str
):
"""Sanitize vllm output to be comparable with hf output."""
...
...
@@ -93,9 +93,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def
_get_inputs
(
image_assets
:
_ImageAssets
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
)
->
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]]:
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
)
->
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]]:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
if
size_factors
is
not
None
:
...
...
@@ -125,12 +125,12 @@ def _get_inputs(
@
overload
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
*
,
size_factors
:
L
ist
[
float
],
size_factors
:
l
ist
[
float
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
...
...
@@ -142,12 +142,12 @@ def run_test(
@
overload
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
*
,
sizes
:
L
ist
[
T
uple
[
int
,
int
]],
sizes
:
l
ist
[
t
uple
[
int
,
int
]],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
...
...
@@ -158,13 +158,13 @@ def run_test(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
...
...
@@ -185,9 +185,9 @@ def run_test(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]],
model
:
str
,
*
,
dtype
:
str
,
...
...
@@ -217,7 +217,6 @@ def _run_test(
max_num_seqs
=
2
,
tensor_parallel_size
=
tensor_parallel_size
,
distributed_executor_backend
=
distributed_executor_backend
,
enforce_eager
=
True
,
limit_mm_per_prompt
=
{
"image"
:
_LIMIT_IMAGE_PER_PROMPT
})
as
vllm_model
:
vllm_outputs_per_image
=
[
...
...
@@ -228,14 +227,10 @@ def _run_test(
for
prompts
,
images
in
inputs
]
def
process
(
hf_inputs
:
BatchEncoding
,
**
kwargs
):
return
hf_inputs
with
hf_runner
(
model
,
dtype
=
dtype
,
model_kwargs
=
{
"device_map"
:
"auto"
},
postprocess_inputs
=
process
,
auto_cls
=
AutoModelForVision2Seq
)
as
hf_model
:
auto_cls
=
AutoModelForImageTextToText
)
as
hf_model
:
hf_outputs_per_image
=
[
hf_model
.
generate_greedy_logprobs_limit
(
prompts
,
max_tokens
,
...
...
@@ -399,6 +394,49 @@ def test_models_interleaved_images(hf_runner, vllm_runner, image_assets, model,
)
@
large_gpu_test
(
min_gb
=
48
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
reason
=
'bitsandbytes is not supported on this GPU type.'
)
def
test_bnb_regression
(
image_assets
:
_ImageAssets
,
model
:
str
,
dtype
:
str
,
max_tokens
:
int
,
):
stop_sign
=
image_assets
[
0
].
pil_image
prompts
=
[
{
"prompt"
:
"<|begin_of_text|>The content of the image <|image|> is"
,
"multi_modal_data"
:
{
"image"
:
stop_sign
},
},
{
"prompt"
:
"The color of the sky is blue but sometimes it can also be"
,
},
]
# Test regression about QKVCrossParallelLinear
llm
=
LLM
(
model
=
model
,
dtype
=
dtype
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
quantization
=
"bitsandbytes"
,
load_format
=
"bitsandbytes"
,
)
sampling_params
=
SamplingParams
(
temperature
=
0
,
max_tokens
=
max_tokens
,
)
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
assert
outputs
@
large_gpu_test
(
min_gb
=
48
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
...
...
@@ -443,7 +481,6 @@ def test_explicit_implicit_prompt(
max_model_len
=
4096
,
max_num_seqs
=
2
,
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
)
sampling_params
=
SamplingParams
(
temperature
=
0
,
...
...
@@ -475,14 +512,14 @@ def test_regression(vllm_runner, image_assets, model, dtype, max_tokens,
max_model_len
=
4096
,
max_num_seqs
=
2
,
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
limit_mm_per_prompt
=
{
"image"
:
_LIMIT_IMAGE_PER_PROMPT
})
as
vllm_model
:
# Regression tests for https://github.com/vllm-project/vllm/issues/10648
# Number of image tags is greater than the number of images provided
prompt
=
"<|begin_of_text|><|image|><|image|> Compare the two images"
# noqa: E501
# Number of groups of image tokens is greater than the number of images
# provided (the whitespace between the tags is necessary)
prompt
=
"<|begin_of_text|><|image|> <|image|> Compare the two images"
# noqa: E501
image
=
stop_sign
with
pytest
.
raises
(
ValueError
):
vllm_model
.
generate_greedy_logprobs
([
prompt
],
...
...
tests/models/fixtures/mistral_small_3_chat.json
0 → 100644
View file @
469e903b
[[[
1784
,
3937
,
6122
,
1261
,
7244
,
10575
,
28528
,
1408
,
1261
,
32656
,
11237
,
1044
,
7283
,
2015
,
1454
,
1261
,
38462
,
4818
,
1046
,
2
],
"The image shows a black dog lying on a wooden floor, looking up with a curious expression."
,
[{
"1784"
:
{
"logprob"
:
-0.4740446209907532
,
"rank"
:
1
,
"decoded_token"
:
"The"
},
"1065"
:
{
"logprob"
:
-1.0990445613861084
,
"rank"
:
2
,
"decoded_token"
:
"A"
},
"4380"
:
{
"logprob"
:
-3.3490445613861084
,
"rank"
:
3
,
"decoded_token"
:
"This"
},
"1785"
:
{
"logprob"
:
-5.0990447998046875
,
"rank"
:
4
,
"decoded_token"
:
"In"
},
"11745"
:
{
"logprob"
:
-6.4740447998046875
,
"rank"
:
5
,
"decoded_token"
:
"Here"
}},
{
"3937"
:
{
"logprob"
:
-0.06349722295999527
,
"rank"
:
1
,
"decoded_token"
:
" image"
},
"7244"
:
{
"logprob"
:
-2.813497304916382
,
"rank"
:
2
,
"decoded_token"
:
" black"
},
"16649"
:
{
"logprob"
:
-7.563497066497803
,
"rank"
:
3
,
"decoded_token"
:
" photo"
},
"18390"
:
{
"logprob"
:
-7.688497066497803
,
"rank"
:
4
,
"decoded_token"
:
" photograph"
},
"10575"
:
{
"logprob"
:
-8.438497543334961
,
"rank"
:
5
,
"decoded_token"
:
" dog"
}},
{
"6122"
:
{
"logprob"
:
-0.25453490018844604
,
"rank"
:
1
,
"decoded_token"
:
" shows"
},
"6971"
:
{
"logprob"
:
-1.8795349597930908
,
"rank"
:
2
,
"decoded_token"
:
" features"
},
"51948"
:
{
"logprob"
:
-2.754534959793091
,
"rank"
:
3
,
"decoded_token"
:
" depicts"
},
"25981"
:
{
"logprob"
:
-5.629534721374512
,
"rank"
:
4
,
"decoded_token"
:
" displays"
},
"1395"
:
{
"logprob"
:
-6.129534721374512
,
"rank"
:
5
,
"decoded_token"
:
" is"
}},
{
"1261"
:
{
"logprob"
:
-0.0001245659514097497
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-9.00012493133545
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"1278"
:
{
"logprob"
:
-14.25012493133545
,
"rank"
:
3
,
"decoded_token"
:
" the"
},
"7244"
:
{
"logprob"
:
-14.87512493133545
,
"rank"
:
4
,
"decoded_token"
:
" black"
},
"1925"
:
{
"logprob"
:
-16.125123977661133
,
"rank"
:
5
,
"decoded_token"
:
" one"
}},
{
"7244"
:
{
"logprob"
:
-0.009403933770954609
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"6231"
:
{
"logprob"
:
-5.259403705596924
,
"rank"
:
2
,
"decoded_token"
:
" close"
},
"16450"
:
{
"logprob"
:
-6.759403705596924
,
"rank"
:
3
,
"decoded_token"
:
" sle"
},
"8500"
:
{
"logprob"
:
-7.009403705596924
,
"rank"
:
4
,
"decoded_token"
:
" dark"
},
"4329"
:
{
"logprob"
:
-7.696903705596924
,
"rank"
:
5
,
"decoded_token"
:
" large"
}},
{
"10575"
:
{
"logprob"
:
-0.7522680163383484
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"119075"
:
{
"logprob"
:
-1.0022680759429932
,
"rank"
:
2
,
"decoded_token"
:
" Labrador"
},
"116572"
:
{
"logprob"
:
-1.8772680759429932
,
"rank"
:
3
,
"decoded_token"
:
" puppy"
},
"8636"
:
{
"logprob"
:
-5.627267837524414
,
"rank"
:
4
,
"decoded_token"
:
" lab"
},
"15812"
:
{
"logprob"
:
-5.814767837524414
,
"rank"
:
5
,
"decoded_token"
:
" Lab"
}},
{
"28528"
:
{
"logprob"
:
-0.2941223084926605
,
"rank"
:
1
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-2.1691222190856934
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"1454"
:
{
"logprob"
:
-2.5441222190856934
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"60700"
:
{
"logprob"
:
-3.2941222190856934
,
"rank"
:
4
,
"decoded_token"
:
" laying"
},
"18970"
:
{
"logprob"
:
-4.794122219085693
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"1408"
:
{
"logprob"
:
-0.3170951306819916
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3151"
:
{
"logprob"
:
-1.317095160484314
,
"rank"
:
2
,
"decoded_token"
:
" down"
},
"14038"
:
{
"logprob"
:
-7.3170952796936035
,
"rank"
:
3
,
"decoded_token"
:
" flat"
},
"104248"
:
{
"logprob"
:
-7.4420952796936035
,
"rank"
:
4
,
"decoded_token"
:
" comfortably"
},
"1321"
:
{
"logprob"
:
-7.6920952796936035
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1261"
:
{
"logprob"
:
-0.08228635042905807
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-3.2072863578796387
,
"rank"
:
2
,
"decoded_token"
:
" its"
},
"32656"
:
{
"logprob"
:
-3.3322863578796387
,
"rank"
:
3
,
"decoded_token"
:
" wooden"
},
"3977"
:
{
"logprob"
:
-6.957286357879639
,
"rank"
:
4
,
"decoded_token"
:
" top"
},
"1278"
:
{
"logprob"
:
-7.207286357879639
,
"rank"
:
5
,
"decoded_token"
:
" the"
}},
{
"32656"
:
{
"logprob"
:
-0.03605202957987785
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"3403"
:
{
"logprob"
:
-3.9110519886016846
,
"rank"
:
2
,
"decoded_token"
:
" text"
},
"44130"
:
{
"logprob"
:
-4.911052227020264
,
"rank"
:
3
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-6.036052227020264
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"8500"
:
{
"logprob"
:
-6.473552227020264
,
"rank"
:
5
,
"decoded_token"
:
" dark"
}},
{
"11237"
:
{
"logprob"
:
-0.6433407068252563
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-0.7683407068252563
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"1615"
:
{
"logprob"
:
-5.268340587615967
,
"rank"
:
3
,
"decoded_token"
:
" pl"
},
"3403"
:
{
"logprob"
:
-6.018340587615967
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"18645"
:
{
"logprob"
:
-7.143340587615967
,
"rank"
:
5
,
"decoded_token"
:
" flo"
}},
{
"1044"
:
{
"logprob"
:
-0.6826052665710449
,
"rank"
:
1
,
"decoded_token"
:
","
},
"1321"
:
{
"logprob"
:
-1.682605266571045
,
"rank"
:
2
,
"decoded_token"
:
" and"
},
"7283"
:
{
"logprob"
:
-1.807605266571045
,
"rank"
:
3
,
"decoded_token"
:
" looking"
},
"1046"
:
{
"logprob"
:
-2.682605266571045
,
"rank"
:
4
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-3.182605266571045
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"7283"
:
{
"logprob"
:
-0.07239976525306702
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"11589"
:
{
"logprob"
:
-3.197399854660034
,
"rank"
:
2
,
"decoded_token"
:
" gaz"
},
"35542"
:
{
"logprob"
:
-3.822399854660034
,
"rank"
:
3
,
"decoded_token"
:
" staring"
},
"1454"
:
{
"logprob"
:
-6.384899616241455
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"22116"
:
{
"logprob"
:
-6.572399616241455
,
"rank"
:
5
,
"decoded_token"
:
" facing"
}},
{
"2015"
:
{
"logprob"
:
-0.9646494388580322
,
"rank"
:
2
,
"decoded_token"
:
" up"
},
"7655"
:
{
"logprob"
:
-0.9646494388580322
,
"rank"
:
1
,
"decoded_token"
:
" directly"
},
"74606"
:
{
"logprob"
:
-2.0896494388580322
,
"rank"
:
3
,
"decoded_token"
:
" upwards"
},
"40022"
:
{
"logprob"
:
-3.0896494388580322
,
"rank"
:
4
,
"decoded_token"
:
" upward"
},
"1935"
:
{
"logprob"
:
-4.152149200439453
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1454"
:
{
"logprob"
:
-0.8447978496551514
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"1513"
:
{
"logprob"
:
-1.2197978496551514
,
"rank"
:
2
,
"decoded_token"
:
" at"
},
"41132"
:
{
"logprob"
:
-2.2197978496551514
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"1935"
:
{
"logprob"
:
-2.9697978496551514
,
"rank"
:
4
,
"decoded_token"
:
" int"
},
"7655"
:
{
"logprob"
:
-3.0947978496551514
,
"rank"
:
5
,
"decoded_token"
:
" directly"
}},
{
"1261"
:
{
"logprob"
:
-0.7162021994590759
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-1.3412022590637207
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"41132"
:
{
"logprob"
:
-2.2162022590637207
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"2246"
:
{
"logprob"
:
-3.2162022590637207
,
"rank"
:
4
,
"decoded_token"
:
" its"
},
"38462"
:
{
"logprob"
:
-3.9662022590637207
,
"rank"
:
5
,
"decoded_token"
:
" curious"
}},
{
"38462"
:
{
"logprob"
:
-0.7836517095565796
,
"rank"
:
1
,
"decoded_token"
:
" curious"
},
"26517"
:
{
"logprob"
:
-1.8461517095565796
,
"rank"
:
2
,
"decoded_token"
:
" calm"
},
"26905"
:
{
"logprob"
:
-2.533651828765869
,
"rank"
:
3
,
"decoded_token"
:
" gentle"
},
"11304"
:
{
"logprob"
:
-3.408651828765869
,
"rank"
:
4
,
"decoded_token"
:
" serious"
},
"97680"
:
{
"logprob"
:
-3.596151828765869
,
"rank"
:
5
,
"decoded_token"
:
" thoughtful"
}},
{
"4818"
:
{
"logprob"
:
-0.047154705971479416
,
"rank"
:
1
,
"decoded_token"
:
" expression"
},
"1321"
:
{
"logprob"
:
-3.922154664993286
,
"rank"
:
2
,
"decoded_token"
:
" and"
},
"1505"
:
{
"logprob"
:
-4.047154903411865
,
"rank"
:
3
,
"decoded_token"
:
" or"
},
"22131"
:
{
"logprob"
:
-4.797154903411865
,
"rank"
:
4
,
"decoded_token"
:
" gaze"
},
"1044"
:
{
"logprob"
:
-9.047154426574707
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1046"
:
{
"logprob"
:
-0.0008031480247154832
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1408"
:
{
"logprob"
:
-7.250802993774414
,
"rank"
:
2
,
"decoded_token"
:
" on"
},
"1321"
:
{
"logprob"
:
-10.500802993774414
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1338"
:
{
"logprob"
:
-11.000802993774414
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"3016"
:
{
"logprob"
:
-11.500802993774414
,
"rank"
:
5
,
"decoded_token"
:
" while"
}},
{
"2"
:
{
"logprob"
:
-0.0008517451351508498
,
"rank"
:
1
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-7.125851631164551
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1256"
:
{
"logprob"
:
-10.00085163116455
,
"rank"
:
3
,
"decoded_token"
:
" The"
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
1395
,
28528
,
1408
,
1261
,
32656
,
11237
,
1044
,
7283
,
2015
,
1513
,
1278
,
13424
,
1626
,
1050
,
1046
,
1349
,
10726
,
1290
,
3719
,
1307
,
122203
,
35463
,
1454
,
11223
,
1321
,
95746
,
24765
,
2425
,
1261
,
6133
,
21283
,
1046
,
2
],
"1. A black dog is lying on a wooden floor, looking up at the camera.
\n
2. A scenic view of rugged mountains with green and rocky terrain under a clear sky."
,
[{
"1049"
:
{
"logprob"
:
-0.05050129443407059
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"11745"
:
{
"logprob"
:
-3.5505013465881348
,
"rank"
:
2
,
"decoded_token"
:
"Here"
},
"69957"
:
{
"logprob"
:
-4.175501346588135
,
"rank"
:
3
,
"decoded_token"
:
"Sure"
},
"117991"
:
{
"logprob"
:
-6.175501346588135
,
"rank"
:
4
,
"decoded_token"
:
"Certain"
},
"1045"
:
{
"logprob"
:
-6.550501346588135
,
"rank"
:
5
,
"decoded_token"
:
"-"
}},
{
"1046"
:
{
"logprob"
:
-5.364403477869928e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1041"
:
{
"logprob"
:
-12.500005722045898
,
"rank"
:
2
,
"decoded_token"
:
")"
},
"1058"
:
{
"logprob"
:
-13.875005722045898
,
"rank"
:
3
,
"decoded_token"
:
":"
},
"1044"
:
{
"logprob"
:
-15.687505722045898
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1045"
:
{
"logprob"
:
-15.875005722045898
,
"rank"
:
5
,
"decoded_token"
:
"-"
}},
{
"1349"
:
{
"logprob"
:
-0.4890742003917694
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1531"
:
{
"logprob"
:
-1.1140742301940918
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1603"
:
{
"logprob"
:
-3.364074230194092
,
"rank"
:
3
,
"decoded_token"
:
" **"
},
"1656"
:
{
"logprob"
:
-4.364074230194092
,
"rank"
:
4
,
"decoded_token"
:
" In"
},
"2409"
:
{
"logprob"
:
-4.989074230194092
,
"rank"
:
5
,
"decoded_token"
:
" This"
}},
{
"7244"
:
{
"logprob"
:
-0.08685152232646942
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"6231"
:
{
"logprob"
:
-3.4618515968322754
,
"rank"
:
2
,
"decoded_token"
:
" close"
},
"16450"
:
{
"logprob"
:
-3.5868515968322754
,
"rank"
:
3
,
"decoded_token"
:
" sle"
},
"4329"
:
{
"logprob"
:
-4.899351596832275
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"8500"
:
{
"logprob"
:
-5.399351596832275
,
"rank"
:
5
,
"decoded_token"
:
" dark"
}},
{
"10575"
:
{
"logprob"
:
-0.20338763296604156
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-1.8283876180648804
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-3.95338773727417
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"28404"
:
{
"logprob"
:
-6.95338773727417
,
"rank"
:
4
,
"decoded_token"
:
" pup"
},
"8636"
:
{
"logprob"
:
-7.07838773727417
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"1395"
:
{
"logprob"
:
-0.532414972782135
,
"rank"
:
1
,
"decoded_token"
:
" is"
},
"22524"
:
{
"logprob"
:
-1.7824149131774902
,
"rank"
:
2
,
"decoded_token"
:
" lies"
},
"1454"
:
{
"logprob"
:
-2.1574149131774902
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"10637"
:
{
"logprob"
:
-3.2824149131774902
,
"rank"
:
4
,
"decoded_token"
:
" looks"
},
"28528"
:
{
"logprob"
:
-3.4074149131774902
,
"rank"
:
5
,
"decoded_token"
:
" lying"
}},
{
"28528"
:
{
"logprob"
:
-0.4258010685443878
,
"rank"
:
1
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-1.6758010387420654
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"60700"
:
{
"logprob"
:
-2.9258010387420654
,
"rank"
:
3
,
"decoded_token"
:
" laying"
},
"38235"
:
{
"logprob"
:
-3.6758010387420654
,
"rank"
:
4
,
"decoded_token"
:
" resting"
},
"18970"
:
{
"logprob"
:
-3.6758010387420654
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"1408"
:
{
"logprob"
:
-0.3588743805885315
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3151"
:
{
"logprob"
:
-1.2338743209838867
,
"rank"
:
2
,
"decoded_token"
:
" down"
},
"41132"
:
{
"logprob"
:
-6.358874320983887
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"14038"
:
{
"logprob"
:
-6.546374320983887
,
"rank"
:
4
,
"decoded_token"
:
" flat"
},
"1321"
:
{
"logprob"
:
-6.733874320983887
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1261"
:
{
"logprob"
:
-0.07801607996225357
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-2.9530160427093506
,
"rank"
:
2
,
"decoded_token"
:
" its"
},
"32656"
:
{
"logprob"
:
-4.20301628112793
,
"rank"
:
3
,
"decoded_token"
:
" wooden"
},
"1278"
:
{
"logprob"
:
-5.20301628112793
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"3977"
:
{
"logprob"
:
-6.57801628112793
,
"rank"
:
5
,
"decoded_token"
:
" top"
}},
{
"32656"
:
{
"logprob"
:
-0.06541638821363449
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"3403"
:
{
"logprob"
:
-3.4404163360595703
,
"rank"
:
2
,
"decoded_token"
:
" text"
},
"44130"
:
{
"logprob"
:
-3.9404163360595703
,
"rank"
:
3
,
"decoded_token"
:
" rust"
},
"17253"
:
{
"logprob"
:
-5.81541633605957
,
"rank"
:
4
,
"decoded_token"
:
" weather"
},
"12603"
:
{
"logprob"
:
-5.94041633605957
,
"rank"
:
5
,
"decoded_token"
:
" wood"
}},
{
"11237"
:
{
"logprob"
:
-0.4574064016342163
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-1.0824064016342163
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"1615"
:
{
"logprob"
:
-4.082406520843506
,
"rank"
:
3
,
"decoded_token"
:
" pl"
},
"3403"
:
{
"logprob"
:
-5.207406520843506
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"28984"
:
{
"logprob"
:
-6.582406520843506
,
"rank"
:
5
,
"decoded_token"
:
" deck"
}},
{
"1044"
:
{
"logprob"
:
-0.9594833850860596
,
"rank"
:
1
,
"decoded_token"
:
","
},
"7283"
:
{
"logprob"
:
-1.2094833850860596
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"1321"
:
{
"logprob"
:
-2.2094833850860596
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1454"
:
{
"logprob"
:
-2.4594833850860596
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"1626"
:
{
"logprob"
:
-2.5844833850860596
,
"rank"
:
5
,
"decoded_token"
:
".
\n
"
}},
{
"7283"
:
{
"logprob"
:
-0.15972694754600525
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"11589"
:
{
"logprob"
:
-2.534726858139038
,
"rank"
:
2
,
"decoded_token"
:
" gaz"
},
"35542"
:
{
"logprob"
:
-2.909726858139038
,
"rank"
:
3
,
"decoded_token"
:
" staring"
},
"22116"
:
{
"logprob"
:
-6.034727096557617
,
"rank"
:
4
,
"decoded_token"
:
" facing"
},
"1454"
:
{
"logprob"
:
-6.409727096557617
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"2015"
:
{
"logprob"
:
-0.894250750541687
,
"rank"
:
1
,
"decoded_token"
:
" up"
},
"7655"
:
{
"logprob"
:
-1.269250750541687
,
"rank"
:
2
,
"decoded_token"
:
" directly"
},
"74606"
:
{
"logprob"
:
-1.769250750541687
,
"rank"
:
3
,
"decoded_token"
:
" upwards"
},
"40022"
:
{
"logprob"
:
-2.6442508697509766
,
"rank"
:
4
,
"decoded_token"
:
" upward"
},
"1935"
:
{
"logprob"
:
-4.081750869750977
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1513"
:
{
"logprob"
:
-0.5085363388061523
,
"rank"
:
1
,
"decoded_token"
:
" at"
},
"1454"
:
{
"logprob"
:
-1.5085363388061523
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1626"
:
{
"logprob"
:
-2.6335363388061523
,
"rank"
:
3
,
"decoded_token"
:
".
\n
"
},
"1935"
:
{
"logprob"
:
-3.3835363388061523
,
"rank"
:
4
,
"decoded_token"
:
" int"
},
"41132"
:
{
"logprob"
:
-3.6335363388061523
,
"rank"
:
5
,
"decoded_token"
:
" attent"
}},
{
"1278"
:
{
"logprob"
:
-0.0010482537327334285
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"4433"
:
{
"logprob"
:
-7.0010480880737305
,
"rank"
:
2
,
"decoded_token"
:
" something"
},
"2246"
:
{
"logprob"
:
-10.25104808807373
,
"rank"
:
3
,
"decoded_token"
:
" its"
},
"1261"
:
{
"logprob"
:
-10.25104808807373
,
"rank"
:
4
,
"decoded_token"
:
" a"
},
"1636"
:
{
"logprob"
:
-10.50104808807373
,
"rank"
:
5
,
"decoded_token"
:
" you"
}},
{
"13424"
:
{
"logprob"
:
-0.0003800861886702478
,
"rank"
:
1
,
"decoded_token"
:
" camera"
},
"56268"
:
{
"logprob"
:
-8.250380516052246
,
"rank"
:
2
,
"decoded_token"
:
" viewer"
},
"68439"
:
{
"logprob"
:
-9.250380516052246
,
"rank"
:
3
,
"decoded_token"
:
" photographer"
},
"2965"
:
{
"logprob"
:
-12.375380516052246
,
"rank"
:
4
,
"decoded_token"
:
" person"
},
"37967"
:
{
"logprob"
:
-12.500380516052246
,
"rank"
:
5
,
"decoded_token"
:
" ceiling"
}},
{
"1626"
:
{
"logprob"
:
-0.34197133779525757
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1454"
:
{
"logprob"
:
-1.4669713973999023
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1046"
:
{
"logprob"
:
-3.3419713973999023
,
"rank"
:
3
,
"decoded_token"
:
"."
},
"1338"
:
{
"logprob"
:
-3.9669713973999023
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1935"
:
{
"logprob"
:
-5.966971397399902
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1050"
:
{
"logprob"
:
-0.002148107625544071
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1256"
:
{
"logprob"
:
-6.877148151397705
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-7.127148151397705
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-8.252147674560547
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1049"
:
{
"logprob"
:
-10.752147674560547
,
"rank"
:
5
,
"decoded_token"
:
"1"
}},
{
"1046"
:
{
"logprob"
:
-7.510157047363464e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.437507629394531
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"1626"
:
{
"logprob"
:
-13.437507629394531
,
"rank"
:
3
,
"decoded_token"
:
".
\n
"
},
"48426"
:
{
"logprob"
:
-13.687507629394531
,
"rank"
:
4
,
"decoded_token"
:
".The"
},
"1044"
:
{
"logprob"
:
-14.062507629394531
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1349"
:
{
"logprob"
:
-0.2843300700187683
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-2.034330129623413
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"113465"
:
{
"logprob"
:
-3.534330129623413
,
"rank"
:
3
,
"decoded_token"
:
" Rug"
},
"22468"
:
{
"logprob"
:
-4.409329891204834
,
"rank"
:
4
,
"decoded_token"
:
" Several"
},
"1531"
:
{
"logprob"
:
-4.534329891204834
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"10726"
:
{
"logprob"
:
-1.3984904289245605
,
"rank"
:
1
,
"decoded_token"
:
" scen"
},
"122203"
:
{
"logprob"
:
-1.7734904289245605
,
"rank"
:
2
,
"decoded_token"
:
" rugged"
},
"61082"
:
{
"logprob"
:
-1.7734904289245605
,
"rank"
:
3
,
"decoded_token"
:
" panor"
},
"15375"
:
{
"logprob"
:
-2.5234904289245605
,
"rank"
:
4
,
"decoded_token"
:
" vast"
},
"13770"
:
{
"logprob"
:
-2.6484904289245605
,
"rank"
:
5
,
"decoded_token"
:
" maj"
}},
{
"1290"
:
{
"logprob"
:
-3.099436753473128e-06
,
"rank"
:
1
,
"decoded_token"
:
"ic"
},
"2981"
:
{
"logprob"
:
-13.56250286102295
,
"rank"
:
2
,
"decoded_token"
:
"ically"
},
"1702"
:
{
"logprob"
:
-14.31250286102295
,
"rank"
:
3
,
"decoded_token"
:
"ice"
},
"4965"
:
{
"logprob"
:
-16.625003814697266
,
"rank"
:
4
,
"decoded_token"
:
"etic"
},
"4336"
:
{
"logprob"
:
-16.687503814697266
,
"rank"
:
5
,
"decoded_token"
:
"icro"
}},
{
"3719"
:
{
"logprob"
:
-0.1252945065498352
,
"rank"
:
1
,
"decoded_token"
:
" view"
},
"28035"
:
{
"logprob"
:
-2.8752944469451904
,
"rank"
:
2
,
"decoded_token"
:
" landscape"
},
"24361"
:
{
"logprob"
:
-3.2502944469451904
,
"rank"
:
3
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-5.1252946853637695
,
"rank"
:
4
,
"decoded_token"
:
" mountainous"
},
"1044"
:
{
"logprob"
:
-5.3752946853637695
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1307"
:
{
"logprob"
:
-0.09058280289173126
,
"rank"
:
1
,
"decoded_token"
:
" of"
},
"89995"
:
{
"logprob"
:
-3.465582847595215
,
"rank"
:
2
,
"decoded_token"
:
" showc"
},
"6122"
:
{
"logprob"
:
-3.715582847595215
,
"rank"
:
3
,
"decoded_token"
:
" shows"
},
"6971"
:
{
"logprob"
:
-4.590582847595215
,
"rank"
:
4
,
"decoded_token"
:
" features"
},
"66583"
:
{
"logprob"
:
-5.090582847595215
,
"rank"
:
5
,
"decoded_token"
:
" captures"
}},
{
"122203"
:
{
"logprob"
:
-0.5323622226715088
,
"rank"
:
1
,
"decoded_token"
:
" rugged"
},
"1261"
:
{
"logprob"
:
-2.032362222671509
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"6245"
:
{
"logprob"
:
-2.532362222671509
,
"rank"
:
3
,
"decoded_token"
:
" multiple"
},
"127945"
:
{
"logprob"
:
-3.157362222671509
,
"rank"
:
4
,
"decoded_token"
:
" mountainous"
},
"35463"
:
{
"logprob"
:
-3.532362222671509
,
"rank"
:
5
,
"decoded_token"
:
" mountains"
}},
{
"35463"
:
{
"logprob"
:
-0.6520033478736877
,
"rank"
:
1
,
"decoded_token"
:
" mountains"
},
"1044"
:
{
"logprob"
:
-1.027003288269043
,
"rank"
:
2
,
"decoded_token"
:
","
},
"24361"
:
{
"logprob"
:
-2.527003288269043
,
"rank"
:
3
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-3.902003288269043
,
"rank"
:
4
,
"decoded_token"
:
" mountainous"
},
"11223"
:
{
"logprob"
:
-4.652003288269043
,
"rank"
:
5
,
"decoded_token"
:
" green"
}},
{
"1454"
:
{
"logprob"
:
-0.39697548747062683
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"13875"
:
{
"logprob"
:
-2.146975517272949
,
"rank"
:
2
,
"decoded_token"
:
" covered"
},
"1321"
:
{
"logprob"
:
-2.271975517272949
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"2425"
:
{
"logprob"
:
-3.459475517272949
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"47948"
:
{
"logprob"
:
-4.459475517272949
,
"rank"
:
5
,
"decoded_token"
:
" stretching"
}},
{
"11223"
:
{
"logprob"
:
-1.3947651386260986
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"24880"
:
{
"logprob"
:
-1.8947651386260986
,
"rank"
:
2
,
"decoded_token"
:
" varying"
},
"95746"
:
{
"logprob"
:
-2.0822651386260986
,
"rank"
:
3
,
"decoded_token"
:
" rocky"
},
"1295"
:
{
"logprob"
:
-3.0197651386260986
,
"rank"
:
4
,
"decoded_token"
:
" l"
},
"19546"
:
{
"logprob"
:
-3.0822651386260986
,
"rank"
:
5
,
"decoded_token"
:
" varied"
}},
{
"1321"
:
{
"logprob"
:
-0.8649212121963501
,
"rank"
:
1
,
"decoded_token"
:
" and"
},
"61263"
:
{
"logprob"
:
-1.73992121219635
,
"rank"
:
2
,
"decoded_token"
:
" slopes"
},
"47260"
:
{
"logprob"
:
-1.86492121219635
,
"rank"
:
3
,
"decoded_token"
:
" vegetation"
},
"50373"
:
{
"logprob"
:
-1.98992121219635
,
"rank"
:
4
,
"decoded_token"
:
" patches"
},
"23170"
:
{
"logprob"
:
-3.4899210929870605
,
"rank"
:
5
,
"decoded_token"
:
" grass"
}},
{
"95746"
:
{
"logprob"
:
-0.21662631630897522
,
"rank"
:
1
,
"decoded_token"
:
" rocky"
},
"22980"
:
{
"logprob"
:
-1.9666262865066528
,
"rank"
:
2
,
"decoded_token"
:
" brown"
},
"26549"
:
{
"logprob"
:
-3.8416264057159424
,
"rank"
:
3
,
"decoded_token"
:
" gray"
},
"4266"
:
{
"logprob"
:
-4.216626167297363
,
"rank"
:
4
,
"decoded_token"
:
" bar"
},
"34052"
:
{
"logprob"
:
-4.966626167297363
,
"rank"
:
5
,
"decoded_token"
:
" grey"
}},
{
"24765"
:
{
"logprob"
:
-0.32041722536087036
,
"rank"
:
1
,
"decoded_token"
:
" terrain"
},
"57912"
:
{
"logprob"
:
-1.8204171657562256
,
"rank"
:
2
,
"decoded_token"
:
" terrains"
},
"61263"
:
{
"logprob"
:
-2.6954171657562256
,
"rank"
:
3
,
"decoded_token"
:
" slopes"
},
"84497"
:
{
"logprob"
:
-3.9454171657562256
,
"rank"
:
4
,
"decoded_token"
:
" landscapes"
},
"17764"
:
{
"logprob"
:
-4.695417404174805
,
"rank"
:
5
,
"decoded_token"
:
" surfaces"
}},
{
"2425"
:
{
"logprob"
:
-0.4664109945297241
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1046"
:
{
"logprob"
:
-1.4664109945297241
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-3.4664111137390137
,
"rank"
:
3
,
"decoded_token"
:
","
},
"22923"
:
{
"logprob"
:
-3.9664111137390137
,
"rank"
:
4
,
"decoded_token"
:
" extending"
},
"47948"
:
{
"logprob"
:
-4.091411113739014
,
"rank"
:
5
,
"decoded_token"
:
" stretching"
}},
{
"1261"
:
{
"logprob"
:
-0.015043734572827816
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-4.76504373550415
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"6133"
:
{
"logprob"
:
-6.01504373550415
,
"rank"
:
3
,
"decoded_token"
:
" clear"
},
"1278"
:
{
"logprob"
:
-6.26504373550415
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"16152"
:
{
"logprob"
:
-7.26504373550415
,
"rank"
:
5
,
"decoded_token"
:
" cloud"
}},
{
"6133"
:
{
"logprob"
:
-0.7420746684074402
,
"rank"
:
1
,
"decoded_token"
:
" clear"
},
"18416"
:
{
"logprob"
:
-1.492074728012085
,
"rank"
:
2
,
"decoded_token"
:
" haz"
},
"16152"
:
{
"logprob"
:
-1.992074728012085
,
"rank"
:
3
,
"decoded_token"
:
" cloud"
},
"27254"
:
{
"logprob"
:
-3.367074728012085
,
"rank"
:
4
,
"decoded_token"
:
" partly"
},
"4391"
:
{
"logprob"
:
-3.617074728012085
,
"rank"
:
5
,
"decoded_token"
:
" light"
}},
{
"21283"
:
{
"logprob"
:
-0.007355513051152229
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-5.257355690002441
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-6.382355690002441
,
"rank"
:
3
,
"decoded_token"
:
","
},
"1505"
:
{
"logprob"
:
-8.257355690002441
,
"rank"
:
4
,
"decoded_token"
:
" or"
},
"3950"
:
{
"logprob"
:
-10.132355690002441
,
"rank"
:
5
,
"decoded_token"
:
" day"
}},
{
"1046"
:
{
"logprob"
:
-0.01126158982515335
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1626"
:
{
"logprob"
:
-4.636261463165283
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1338"
:
{
"logprob"
:
-7.761261463165283
,
"rank"
:
3
,
"decoded_token"
:
".
\n\n
"
},
"1044"
:
{
"logprob"
:
-7.761261463165283
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1395"
:
{
"logprob"
:
-8.011261940002441
,
"rank"
:
5
,
"decoded_token"
:
" is"
}},
{
"2"
:
{
"logprob"
:
-0.00709608756005764
,
"rank"
:
1
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-5.007096290588379
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1256"
:
{
"logprob"
:
-8.132096290588379
,
"rank"
:
3
,
"decoded_token"
:
" "
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
1395
,
28528
,
1408
,
1261
,
32656
,
11237
,
1044
,
7283
,
2015
,
1513
,
1278
,
13424
,
1626
,
1050
,
1046
,
1349
,
122203
,
24361
,
28035
,
1454
,
11223
,
1321
,
95746
,
24765
,
2425
,
1261
,
6133
,
21283
,
1626
,
1051
,
1046
,
1349
,
2965
,
1294
,
1261
,
4804
,
4250
,
12006
,
4302
,
48049
,
4837
,
1261
,
29397
,
1435
,
22140
,
21457
,
22196
,
1626
,
1052
,
1046
,
1349
,
53301
,
59396
,
3549
,
1294
,
1261
,
12097
,
1044
,
121040
,
1536
,
11223
,
23170
,
1321
,
17744
,
34941
,
16429
,
2425
,
1261
,
10991
,
21283
,
1046
,
2
],
"1. A black dog is lying on a wooden floor, looking up at the camera.
\n
2. A rugged mountain landscape with green and rocky terrain under a clear sky.
\n
3. A person in a red swimsuit walks along a beach as waves crash nearby.
\n
4. A winding gravel path in a park, bordered by green grass and blooming trees under a blue sky."
,
[{
"1049"
:
{
"logprob"
:
-0.17000193893909454
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"11745"
:
{
"logprob"
:
-1.9200019836425781
,
"rank"
:
2
,
"decoded_token"
:
"Here"
},
"69957"
:
{
"logprob"
:
-4.920001983642578
,
"rank"
:
3
,
"decoded_token"
:
"Sure"
},
"117991"
:
{
"logprob"
:
-7.295001983642578
,
"rank"
:
4
,
"decoded_token"
:
"Certain"
},
"1784"
:
{
"logprob"
:
-7.295001983642578
,
"rank"
:
5
,
"decoded_token"
:
"The"
}},
{
"1046"
:
{
"logprob"
:
-1.597391747054644e-05
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1041"
:
{
"logprob"
:
-11.500016212463379
,
"rank"
:
2
,
"decoded_token"
:
")"
},
"1058"
:
{
"logprob"
:
-13.062516212463379
,
"rank"
:
3
,
"decoded_token"
:
":"
},
"3590"
:
{
"logprob"
:
-13.750016212463379
,
"rank"
:
4
,
"decoded_token"
:
".A"
},
"48426"
:
{
"logprob"
:
-14.312516212463379
,
"rank"
:
5
,
"decoded_token"
:
".The"
}},
{
"1349"
:
{
"logprob"
:
-0.07567699253559113
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1531"
:
{
"logprob"
:
-3.075676918029785
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1603"
:
{
"logprob"
:
-3.950676918029785
,
"rank"
:
3
,
"decoded_token"
:
" **"
},
"2409"
:
{
"logprob"
:
-6.075676918029785
,
"rank"
:
4
,
"decoded_token"
:
" This"
},
"8479"
:
{
"logprob"
:
-6.575676918029785
,
"rank"
:
5
,
"decoded_token"
:
" Black"
}},
{
"7244"
:
{
"logprob"
:
-0.06906593590974808
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"16450"
:
{
"logprob"
:
-3.694066047668457
,
"rank"
:
2
,
"decoded_token"
:
" sle"
},
"6231"
:
{
"logprob"
:
-4.506566047668457
,
"rank"
:
3
,
"decoded_token"
:
" close"
},
"4329"
:
{
"logprob"
:
-4.944066047668457
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"8500"
:
{
"logprob"
:
-5.256566047668457
,
"rank"
:
5
,
"decoded_token"
:
" dark"
}},
{
"10575"
:
{
"logprob"
:
-0.11913803219795227
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.24413800239563
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-5.494138240814209
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"28404"
:
{
"logprob"
:
-7.181638240814209
,
"rank"
:
4
,
"decoded_token"
:
" pup"
},
"8636"
:
{
"logprob"
:
-7.869138240814209
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"1395"
:
{
"logprob"
:
-0.782707154750824
,
"rank"
:
1
,
"decoded_token"
:
" is"
},
"22524"
:
{
"logprob"
:
-1.1577072143554688
,
"rank"
:
2
,
"decoded_token"
:
" lies"
},
"1454"
:
{
"logprob"
:
-2.9077072143554688
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"10637"
:
{
"logprob"
:
-3.0327072143554688
,
"rank"
:
4
,
"decoded_token"
:
" looks"
},
"28528"
:
{
"logprob"
:
-3.5327072143554688
,
"rank"
:
5
,
"decoded_token"
:
" lying"
}},
{
"28528"
:
{
"logprob"
:
-0.3443163335323334
,
"rank"
:
1
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-2.094316244125366
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"60700"
:
{
"logprob"
:
-2.844316244125366
,
"rank"
:
3
,
"decoded_token"
:
" laying"
},
"38235"
:
{
"logprob"
:
-3.344316244125366
,
"rank"
:
4
,
"decoded_token"
:
" resting"
},
"18970"
:
{
"logprob"
:
-3.469316244125366
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"1408"
:
{
"logprob"
:
-0.29093095660209656
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3151"
:
{
"logprob"
:
-1.415930986404419
,
"rank"
:
2
,
"decoded_token"
:
" down"
},
"41132"
:
{
"logprob"
:
-6.16593074798584
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"1321"
:
{
"logprob"
:
-6.85343074798584
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"14038"
:
{
"logprob"
:
-6.97843074798584
,
"rank"
:
5
,
"decoded_token"
:
" flat"
}},
{
"1261"
:
{
"logprob"
:
-0.05553353577852249
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-3.6805336475372314
,
"rank"
:
2
,
"decoded_token"
:
" its"
},
"32656"
:
{
"logprob"
:
-3.8055336475372314
,
"rank"
:
3
,
"decoded_token"
:
" wooden"
},
"1278"
:
{
"logprob"
:
-5.305533409118652
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"3977"
:
{
"logprob"
:
-7.430533409118652
,
"rank"
:
5
,
"decoded_token"
:
" top"
}},
{
"32656"
:
{
"logprob"
:
-0.039505477994680405
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"3403"
:
{
"logprob"
:
-3.9145054817199707
,
"rank"
:
2
,
"decoded_token"
:
" text"
},
"44130"
:
{
"logprob"
:
-4.414505481719971
,
"rank"
:
3
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-5.914505481719971
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"17253"
:
{
"logprob"
:
-6.539505481719971
,
"rank"
:
5
,
"decoded_token"
:
" weather"
}},
{
"11237"
:
{
"logprob"
:
-0.373188853263855
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-1.248188853263855
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"1615"
:
{
"logprob"
:
-4.2481889724731445
,
"rank"
:
3
,
"decoded_token"
:
" pl"
},
"3403"
:
{
"logprob"
:
-5.6231889724731445
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"28984"
:
{
"logprob"
:
-5.9981889724731445
,
"rank"
:
5
,
"decoded_token"
:
" deck"
}},
{
"1044"
:
{
"logprob"
:
-1.378434181213379
,
"rank"
:
3
,
"decoded_token"
:
","
},
"7283"
:
{
"logprob"
:
-1.378434181213379
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"1626"
:
{
"logprob"
:
-1.378434181213379
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1321"
:
{
"logprob"
:
-2.378434181213379
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"1454"
:
{
"logprob"
:
-2.628434181213379
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"7283"
:
{
"logprob"
:
-0.17630912363529205
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"11589"
:
{
"logprob"
:
-2.551309108734131
,
"rank"
:
2
,
"decoded_token"
:
" gaz"
},
"35542"
:
{
"logprob"
:
-2.676309108734131
,
"rank"
:
3
,
"decoded_token"
:
" staring"
},
"22116"
:
{
"logprob"
:
-6.238809108734131
,
"rank"
:
4
,
"decoded_token"
:
" facing"
},
"11735"
:
{
"logprob"
:
-6.488809108734131
,
"rank"
:
5
,
"decoded_token"
:
" giving"
}},
{
"2015"
:
{
"logprob"
:
-0.8436563014984131
,
"rank"
:
1
,
"decoded_token"
:
" up"
},
"7655"
:
{
"logprob"
:
-1.343656301498413
,
"rank"
:
2
,
"decoded_token"
:
" directly"
},
"74606"
:
{
"logprob"
:
-1.718656301498413
,
"rank"
:
3
,
"decoded_token"
:
" upwards"
},
"40022"
:
{
"logprob"
:
-2.593656301498413
,
"rank"
:
4
,
"decoded_token"
:
" upward"
},
"11521"
:
{
"logprob"
:
-4.406156539916992
,
"rank"
:
5
,
"decoded_token"
:
" straight"
}},
{
"1513"
:
{
"logprob"
:
-0.45780688524246216
,
"rank"
:
1
,
"decoded_token"
:
" at"
},
"1626"
:
{
"logprob"
:
-1.7078068256378174
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1454"
:
{
"logprob"
:
-2.3328068256378174
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1935"
:
{
"logprob"
:
-3.5828068256378174
,
"rank"
:
4
,
"decoded_token"
:
" int"
},
"41132"
:
{
"logprob"
:
-3.9578068256378174
,
"rank"
:
5
,
"decoded_token"
:
" attent"
}},
{
"1278"
:
{
"logprob"
:
-0.0004164305282756686
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"4433"
:
{
"logprob"
:
-8.00041675567627
,
"rank"
:
2
,
"decoded_token"
:
" something"
},
"1261"
:
{
"logprob"
:
-10.50041675567627
,
"rank"
:
3
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-10.87541675567627
,
"rank"
:
4
,
"decoded_token"
:
" its"
},
"1636"
:
{
"logprob"
:
-11.37541675567627
,
"rank"
:
5
,
"decoded_token"
:
" you"
}},
{
"13424"
:
{
"logprob"
:
-0.000399033073335886
,
"rank"
:
1
,
"decoded_token"
:
" camera"
},
"56268"
:
{
"logprob"
:
-8.125398635864258
,
"rank"
:
2
,
"decoded_token"
:
" viewer"
},
"68439"
:
{
"logprob"
:
-9.500398635864258
,
"rank"
:
3
,
"decoded_token"
:
" photographer"
},
"37967"
:
{
"logprob"
:
-12.000398635864258
,
"rank"
:
4
,
"decoded_token"
:
" ceiling"
},
"2965"
:
{
"logprob"
:
-12.312898635864258
,
"rank"
:
5
,
"decoded_token"
:
" person"
}},
{
"1626"
:
{
"logprob"
:
-0.10298559814691544
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1046"
:
{
"logprob"
:
-2.9779856204986572
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-3.2279856204986572
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1338"
:
{
"logprob"
:
-5.227985382080078
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1935"
:
{
"logprob"
:
-6.852985382080078
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1050"
:
{
"logprob"
:
-0.002897590398788452
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1256"
:
{
"logprob"
:
-6.5028977394104
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-6.6278977394104
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-9.877897262573242
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1009"
:
{
"logprob"
:
-11.627897262573242
,
"rank"
:
5
,
"decoded_token"
:
"
\t
"
}},
{
"1046"
:
{
"logprob"
:
-1.5497195136049413e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-14.875001907348633
,
"rank"
:
2
,
"decoded_token"
:
","
},
"3590"
:
{
"logprob"
:
-15.000001907348633
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"2247"
:
{
"logprob"
:
-15.125001907348633
,
"rank"
:
4
,
"decoded_token"
:
" ."
},
"1058"
:
{
"logprob"
:
-15.375001907348633
,
"rank"
:
5
,
"decoded_token"
:
":"
}},
{
"1349"
:
{
"logprob"
:
-0.6107801198959351
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-1.360780119895935
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"113465"
:
{
"logprob"
:
-2.3607802391052246
,
"rank"
:
3
,
"decoded_token"
:
" Rug"
},
"27260"
:
{
"logprob"
:
-3.7357802391052246
,
"rank"
:
4
,
"decoded_token"
:
" Mountain"
},
"1531"
:
{
"logprob"
:
-4.485780239105225
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"122203"
:
{
"logprob"
:
-0.8547073602676392
,
"rank"
:
1
,
"decoded_token"
:
" rugged"
},
"15375"
:
{
"logprob"
:
-2.1047072410583496
,
"rank"
:
2
,
"decoded_token"
:
" vast"
},
"10726"
:
{
"logprob"
:
-2.1047072410583496
,
"rank"
:
3
,
"decoded_token"
:
" scen"
},
"61082"
:
{
"logprob"
:
-2.6047072410583496
,
"rank"
:
4
,
"decoded_token"
:
" panor"
},
"2965"
:
{
"logprob"
:
-3.2922072410583496
,
"rank"
:
5
,
"decoded_token"
:
" person"
}},
{
"24361"
:
{
"logprob"
:
-0.41217130422592163
,
"rank"
:
1
,
"decoded_token"
:
" mountain"
},
"1044"
:
{
"logprob"
:
-1.6621713638305664
,
"rank"
:
2
,
"decoded_token"
:
","
},
"127945"
:
{
"logprob"
:
-2.6621713638305664
,
"rank"
:
3
,
"decoded_token"
:
" mountainous"
},
"28035"
:
{
"logprob"
:
-3.5371713638305664
,
"rank"
:
4
,
"decoded_token"
:
" landscape"
},
"1321"
:
{
"logprob"
:
-3.6621713638305664
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"28035"
:
{
"logprob"
:
-0.6676621437072754
,
"rank"
:
1
,
"decoded_token"
:
" landscape"
},
"4521"
:
{
"logprob"
:
-0.7926621437072754
,
"rank"
:
2
,
"decoded_token"
:
" range"
},
"24765"
:
{
"logprob"
:
-4.542662143707275
,
"rank"
:
3
,
"decoded_token"
:
" terrain"
},
"13327"
:
{
"logprob"
:
-5.167662143707275
,
"rank"
:
4
,
"decoded_token"
:
" scene"
},
"12248"
:
{
"logprob"
:
-5.167662143707275
,
"rank"
:
5
,
"decoded_token"
:
" peak"
}},
{
"1454"
:
{
"logprob"
:
-0.31015345454216003
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"6971"
:
{
"logprob"
:
-2.4351534843444824
,
"rank"
:
2
,
"decoded_token"
:
" features"
},
"94973"
:
{
"logprob"
:
-3.3101534843444824
,
"rank"
:
3
,
"decoded_token"
:
" stretches"
},
"89995"
:
{
"logprob"
:
-3.4351534843444824
,
"rank"
:
4
,
"decoded_token"
:
" showc"
},
"1395"
:
{
"logprob"
:
-3.5601534843444824
,
"rank"
:
5
,
"decoded_token"
:
" is"
}},
{
"11223"
:
{
"logprob"
:
-1.547694206237793
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"95746"
:
{
"logprob"
:
-1.922694206237793
,
"rank"
:
2
,
"decoded_token"
:
" rocky"
},
"27469"
:
{
"logprob"
:
-2.172694206237793
,
"rank"
:
3
,
"decoded_token"
:
" peaks"
},
"6245"
:
{
"logprob"
:
-2.297694206237793
,
"rank"
:
4
,
"decoded_token"
:
" multiple"
},
"47147"
:
{
"logprob"
:
-2.360194206237793
,
"rank"
:
5
,
"decoded_token"
:
" steep"
}},
{
"1321"
:
{
"logprob"
:
-0.9617817401885986
,
"rank"
:
1
,
"decoded_token"
:
" and"
},
"61263"
:
{
"logprob"
:
-1.3367817401885986
,
"rank"
:
2
,
"decoded_token"
:
" slopes"
},
"51187"
:
{
"logprob"
:
-2.3367817401885986
,
"rank"
:
3
,
"decoded_token"
:
" hills"
},
"47260"
:
{
"logprob"
:
-2.3367817401885986
,
"rank"
:
4
,
"decoded_token"
:
" vegetation"
},
"50373"
:
{
"logprob"
:
-2.7117817401885986
,
"rank"
:
5
,
"decoded_token"
:
" patches"
}},
{
"95746"
:
{
"logprob"
:
-0.11686273664236069
,
"rank"
:
1
,
"decoded_token"
:
" rocky"
},
"22980"
:
{
"logprob"
:
-2.7418627738952637
,
"rank"
:
2
,
"decoded_token"
:
" brown"
},
"4266"
:
{
"logprob"
:
-3.8668627738952637
,
"rank"
:
3
,
"decoded_token"
:
" bar"
},
"26549"
:
{
"logprob"
:
-4.491862773895264
,
"rank"
:
4
,
"decoded_token"
:
" gray"
},
"9091"
:
{
"logprob"
:
-5.366862773895264
,
"rank"
:
5
,
"decoded_token"
:
" rock"
}},
{
"24765"
:
{
"logprob"
:
-0.22640009224414825
,
"rank"
:
1
,
"decoded_token"
:
" terrain"
},
"57912"
:
{
"logprob"
:
-2.476400136947632
,
"rank"
:
2
,
"decoded_token"
:
" terrains"
},
"61263"
:
{
"logprob"
:
-2.726400136947632
,
"rank"
:
3
,
"decoded_token"
:
" slopes"
},
"51187"
:
{
"logprob"
:
-3.851400136947632
,
"rank"
:
4
,
"decoded_token"
:
" hills"
},
"27469"
:
{
"logprob"
:
-3.976400136947632
,
"rank"
:
5
,
"decoded_token"
:
" peaks"
}},
{
"2425"
:
{
"logprob"
:
-0.7823817133903503
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1626"
:
{
"logprob"
:
-1.1573817729949951
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"94973"
:
{
"logprob"
:
-2.657381772994995
,
"rank"
:
3
,
"decoded_token"
:
" stretches"
},
"1395"
:
{
"logprob"
:
-2.782381772994995
,
"rank"
:
4
,
"decoded_token"
:
" is"
},
"7038"
:
{
"logprob"
:
-3.532381772994995
,
"rank"
:
5
,
"decoded_token"
:
" extends"
}},
{
"1261"
:
{
"logprob"
:
-0.016132064163684845
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"6133"
:
{
"logprob"
:
-5.39113187789917
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"1420"
:
{
"logprob"
:
-5.39113187789917
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"1278"
:
{
"logprob"
:
-6.01613187789917
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"16152"
:
{
"logprob"
:
-6.26613187789917
,
"rank"
:
5
,
"decoded_token"
:
" cloud"
}},
{
"6133"
:
{
"logprob"
:
-0.44541382789611816
,
"rank"
:
1
,
"decoded_token"
:
" clear"
},
"16152"
:
{
"logprob"
:
-2.070413827896118
,
"rank"
:
2
,
"decoded_token"
:
" cloud"
},
"18416"
:
{
"logprob"
:
-2.320413827896118
,
"rank"
:
3
,
"decoded_token"
:
" haz"
},
"27254"
:
{
"logprob"
:
-3.195413827896118
,
"rank"
:
4
,
"decoded_token"
:
" partly"
},
"10991"
:
{
"logprob"
:
-3.320413827896118
,
"rank"
:
5
,
"decoded_token"
:
" blue"
}},
{
"21283"
:
{
"logprob"
:
-0.003768961876630783
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-5.7537689208984375
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-7.6287689208984375
,
"rank"
:
3
,
"decoded_token"
:
","
},
"1505"
:
{
"logprob"
:
-10.753768920898438
,
"rank"
:
4
,
"decoded_token"
:
" or"
},
"3044"
:
{
"logprob"
:
-11.128768920898438
,
"rank"
:
5
,
"decoded_token"
:
" sk"
}},
{
"1626"
:
{
"logprob"
:
-0.0008177988929674029
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1046"
:
{
"logprob"
:
-7.375817775726318
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"1395"
:
{
"logprob"
:
-9.750818252563477
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"1010"
:
{
"logprob"
:
-10.125818252563477
,
"rank"
:
4
,
"decoded_token"
:
"
\n
"
},
"1044"
:
{
"logprob"
:
-10.750818252563477
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1051"
:
{
"logprob"
:
-0.00013457823661156
,
"rank"
:
1
,
"decoded_token"
:
"3"
},
"1052"
:
{
"logprob"
:
-9.125134468078613
,
"rank"
:
2
,
"decoded_token"
:
"4"
},
"1256"
:
{
"logprob"
:
-11.375134468078613
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1050"
:
{
"logprob"
:
-11.875134468078613
,
"rank"
:
4
,
"decoded_token"
:
"2"
},
"1049"
:
{
"logprob"
:
-13.000134468078613
,
"rank"
:
5
,
"decoded_token"
:
"1"
}},
{
"1046"
:
{
"logprob"
:
-7.152555099310121e-07
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-14.875000953674316
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"48426"
:
{
"logprob"
:
-15.937500953674316
,
"rank"
:
3
,
"decoded_token"
:
".The"
},
"1349"
:
{
"logprob"
:
-17.0
,
"rank"
:
4
,
"decoded_token"
:
" A"
},
"1338"
:
{
"logprob"
:
-17.3125
,
"rank"
:
5
,
"decoded_token"
:
".
\n\n
"
}},
{
"1349"
:
{
"logprob"
:
-0.03193942829966545
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"10638"
:
{
"logprob"
:
-4.406939506530762
,
"rank"
:
2
,
"decoded_token"
:
" Two"
},
"2048"
:
{
"logprob"
:
-5.031939506530762
,
"rank"
:
3
,
"decoded_token"
:
" An"
},
"1488"
:
{
"logprob"
:
-5.156939506530762
,
"rank"
:
4
,
"decoded_token"
:
" W"
},
"15035"
:
{
"logprob"
:
-5.906939506530762
,
"rank"
:
5
,
"decoded_token"
:
" People"
}},
{
"2965"
:
{
"logprob"
:
-0.41655251383781433
,
"rank"
:
1
,
"decoded_token"
:
" person"
},
"92731"
:
{
"logprob"
:
-1.5415525436401367
,
"rank"
:
2
,
"decoded_token"
:
" lone"
},
"79013"
:
{
"logprob"
:
-2.7915525436401367
,
"rank"
:
3
,
"decoded_token"
:
" solitary"
},
"29397"
:
{
"logprob"
:
-3.5415525436401367
,
"rank"
:
4
,
"decoded_token"
:
" beach"
},
"2169"
:
{
"logprob"
:
-4.729052543640137
,
"rank"
:
5
,
"decoded_token"
:
" ser"
}},
{
"1294"
:
{
"logprob"
:
-0.9845026135444641
,
"rank"
:
1
,
"decoded_token"
:
" in"
},
"1395"
:
{
"logprob"
:
-1.2345025539398193
,
"rank"
:
2
,
"decoded_token"
:
" is"
},
"48049"
:
{
"logprob"
:
-1.8595025539398193
,
"rank"
:
3
,
"decoded_token"
:
" walks"
},
"23737"
:
{
"logprob"
:
-2.2345025539398193
,
"rank"
:
4
,
"decoded_token"
:
" stands"
},
"1285"
:
{
"logprob"
:
-2.8595025539398193
,
"rank"
:
5
,
"decoded_token"
:
" w"
}},
{
"1261"
:
{
"logprob"
:
-0.32012784481048584
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"4804"
:
{
"logprob"
:
-1.3201278448104858
,
"rank"
:
2
,
"decoded_token"
:
" red"
},
"1420"
:
{
"logprob"
:
-5.820127964019775
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"64031"
:
{
"logprob"
:
-6.570127964019775
,
"rank"
:
4
,
"decoded_token"
:
" swim"
},
"18168"
:
{
"logprob"
:
-6.695127964019775
,
"rank"
:
5
,
"decoded_token"
:
" bright"
}},
{
"4804"
:
{
"logprob"
:
-0.10999592393636703
,
"rank"
:
1
,
"decoded_token"
:
" red"
},
"1285"
:
{
"logprob"
:
-2.3599958419799805
,
"rank"
:
2
,
"decoded_token"
:
" w"
},
"4250"
:
{
"logprob"
:
-5.6099958419799805
,
"rank"
:
3
,
"decoded_token"
:
" sw"
},
"18168"
:
{
"logprob"
:
-6.0474958419799805
,
"rank"
:
4
,
"decoded_token"
:
" bright"
},
"18258"
:
{
"logprob"
:
-6.4224958419799805
,
"rank"
:
5
,
"decoded_token"
:
" wet"
}},
{
"4250"
:
{
"logprob"
:
-0.2469252496957779
,
"rank"
:
1
,
"decoded_token"
:
" sw"
},
"1285"
:
{
"logprob"
:
-2.3719253540039062
,
"rank"
:
2
,
"decoded_token"
:
" w"
},
"64031"
:
{
"logprob"
:
-2.7469253540039062
,
"rank"
:
3
,
"decoded_token"
:
" swim"
},
"17513"
:
{
"logprob"
:
-3.2469253540039062
,
"rank"
:
4
,
"decoded_token"
:
" suit"
},
"75948"
:
{
"logprob"
:
-4.371925354003906
,
"rank"
:
5
,
"decoded_token"
:
" outfit"
}},
{
"12006"
:
{
"logprob"
:
-5.722029527532868e-06
,
"rank"
:
1
,
"decoded_token"
:
"ims"
},
"25763"
:
{
"logprob"
:
-12.750005722045898
,
"rank"
:
2
,
"decoded_token"
:
"immer"
},
"7552"
:
{
"logprob"
:
-13.687505722045898
,
"rank"
:
3
,
"decoded_token"
:
"imm"
},
"2097"
:
{
"logprob"
:
-16.6875057220459
,
"rank"
:
4
,
"decoded_token"
:
"ins"
},
"19523"
:
{
"logprob"
:
-16.7500057220459
,
"rank"
:
5
,
"decoded_token"
:
"imb"
}},
{
"4302"
:
{
"logprob"
:
-1.8000440832111053e-05
,
"rank"
:
1
,
"decoded_token"
:
"uit"
},
"17513"
:
{
"logprob"
:
-11.875018119812012
,
"rank"
:
2
,
"decoded_token"
:
" suit"
},
"8036"
:
{
"logprob"
:
-13.250018119812012
,
"rank"
:
3
,
"decoded_token"
:
"irt"
},
"36953"
:
{
"logprob"
:
-13.500018119812012
,
"rank"
:
4
,
"decoded_token"
:
"uiten"
},
"1276"
:
{
"logprob"
:
-14.437518119812012
,
"rank"
:
5
,
"decoded_token"
:
"it"
}},
{
"48049"
:
{
"logprob"
:
-0.41766560077667236
,
"rank"
:
1
,
"decoded_token"
:
" walks"
},
"1395"
:
{
"logprob"
:
-1.4176656007766724
,
"rank"
:
2
,
"decoded_token"
:
" is"
},
"19710"
:
{
"logprob"
:
-2.792665481567383
,
"rank"
:
3
,
"decoded_token"
:
" walking"
},
"23737"
:
{
"logprob"
:
-3.917665481567383
,
"rank"
:
4
,
"decoded_token"
:
" stands"
},
"1285"
:
{
"logprob"
:
-4.292665481567383
,
"rank"
:
5
,
"decoded_token"
:
" w"
}},
{
"4837"
:
{
"logprob"
:
-0.002689199522137642
,
"rank"
:
1
,
"decoded_token"
:
" along"
},
"9412"
:
{
"logprob"
:
-6.627689361572266
,
"rank"
:
2
,
"decoded_token"
:
" alone"
},
"6117"
:
{
"logprob"
:
-7.377689361572266
,
"rank"
:
3
,
"decoded_token"
:
" near"
},
"1408"
:
{
"logprob"
:
-8.002689361572266
,
"rank"
:
4
,
"decoded_token"
:
" on"
},
"2203"
:
{
"logprob"
:
-8.377689361572266
,
"rank"
:
5
,
"decoded_token"
:
" into"
}},
{
"1261"
:
{
"logprob"
:
-0.38749611377716064
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1278"
:
{
"logprob"
:
-1.1374961137771606
,
"rank"
:
2
,
"decoded_token"
:
" the"
},
"1420"
:
{
"logprob"
:
-7.387495994567871
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"100991"
:
{
"logprob"
:
-13.949995994567871
,
"rank"
:
4
,
"decoded_token"
:
" sandy"
},
"18258"
:
{
"logprob"
:
-14.512495994567871
,
"rank"
:
5
,
"decoded_token"
:
" wet"
}},
{
"29397"
:
{
"logprob"
:
-0.5292408466339111
,
"rank"
:
1
,
"decoded_token"
:
" beach"
},
"100991"
:
{
"logprob"
:
-0.9042408466339111
,
"rank"
:
2
,
"decoded_token"
:
" sandy"
},
"1627"
:
{
"logprob"
:
-6.029240608215332
,
"rank"
:
3
,
"decoded_token"
:
" sh"
},
"46422"
:
{
"logprob"
:
-6.529240608215332
,
"rank"
:
4
,
"decoded_token"
:
" shore"
},
"2169"
:
{
"logprob"
:
-7.779240608215332
,
"rank"
:
5
,
"decoded_token"
:
" ser"
}},
{
"1435"
:
{
"logprob"
:
-0.29965779185295105
,
"rank"
:
1
,
"decoded_token"
:
" as"
},
"1454"
:
{
"logprob"
:
-1.6746578216552734
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1513"
:
{
"logprob"
:
-3.7996578216552734
,
"rank"
:
3
,
"decoded_token"
:
" at"
},
"3016"
:
{
"logprob"
:
-3.7996578216552734
,
"rank"
:
4
,
"decoded_token"
:
" while"
},
"6117"
:
{
"logprob"
:
-4.799657821655273
,
"rank"
:
5
,
"decoded_token"
:
" near"
}},
{
"22140"
:
{
"logprob"
:
-0.015346773900091648
,
"rank"
:
1
,
"decoded_token"
:
" waves"
},
"1261"
:
{
"logprob"
:
-4.515347003936768
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1278"
:
{
"logprob"
:
-6.140347003936768
,
"rank"
:
3
,
"decoded_token"
:
" the"
},
"27208"
:
{
"logprob"
:
-6.890347003936768
,
"rank"
:
4
,
"decoded_token"
:
" ocean"
},
"4329"
:
{
"logprob"
:
-7.265347003936768
,
"rank"
:
5
,
"decoded_token"
:
" large"
}},
{
"21457"
:
{
"logprob"
:
-0.013234862126410007
,
"rank"
:
1
,
"decoded_token"
:
" crash"
},
"33168"
:
{
"logprob"
:
-5.138235092163086
,
"rank"
:
2
,
"decoded_token"
:
" gently"
},
"10401"
:
{
"logprob"
:
-5.950735092163086
,
"rank"
:
3
,
"decoded_token"
:
" roll"
},
"4323"
:
{
"logprob"
:
-6.700735092163086
,
"rank"
:
4
,
"decoded_token"
:
" break"
},
"5125"
:
{
"logprob"
:
-7.138235092163086
,
"rank"
:
5
,
"decoded_token"
:
" approach"
}},
{
"22196"
:
{
"logprob"
:
-0.060372594743967056
,
"rank"
:
1
,
"decoded_token"
:
" nearby"
},
"6117"
:
{
"logprob"
:
-3.3103725910186768
,
"rank"
:
2
,
"decoded_token"
:
" near"
},
"1294"
:
{
"logprob"
:
-4.435372829437256
,
"rank"
:
3
,
"decoded_token"
:
" in"
},
"25644"
:
{
"logprob"
:
-6.310372829437256
,
"rank"
:
4
,
"decoded_token"
:
" beside"
},
"1321"
:
{
"logprob"
:
-6.560372829437256
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1626"
:
{
"logprob"
:
-0.005290080793201923
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1294"
:
{
"logprob"
:
-6.5052900314331055
,
"rank"
:
2
,
"decoded_token"
:
" in"
},
"1044"
:
{
"logprob"
:
-7.0052900314331055
,
"rank"
:
3
,
"decoded_token"
:
","
},
"1321"
:
{
"logprob"
:
-7.1302900314331055
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"1513"
:
{
"logprob"
:
-7.2552900314331055
,
"rank"
:
5
,
"decoded_token"
:
" at"
}},
{
"1052"
:
{
"logprob"
:
-7.748573807475623e-06
,
"rank"
:
1
,
"decoded_token"
:
"4"
},
"1051"
:
{
"logprob"
:
-12.562507629394531
,
"rank"
:
2
,
"decoded_token"
:
"3"
},
"1053"
:
{
"logprob"
:
-13.125007629394531
,
"rank"
:
3
,
"decoded_token"
:
"5"
},
"1256"
:
{
"logprob"
:
-14.125007629394531
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1049"
:
{
"logprob"
:
-14.312507629394531
,
"rank"
:
5
,
"decoded_token"
:
"1"
}},
{
"1046"
:
{
"logprob"
:
-1.2993727978027891e-05
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-12.62501335144043
,
"rank"
:
2
,
"decoded_token"
:
","
},
"3590"
:
{
"logprob"
:
-12.75001335144043
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"1058"
:
{
"logprob"
:
-13.00001335144043
,
"rank"
:
4
,
"decoded_token"
:
":"
},
"2247"
:
{
"logprob"
:
-13.37501335144043
,
"rank"
:
5
,
"decoded_token"
:
" ."
}},
{
"1349"
:
{
"logprob"
:
-0.00046957432641647756
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"2048"
:
{
"logprob"
:
-8.250469207763672
,
"rank"
:
2
,
"decoded_token"
:
" An"
},
"1488"
:
{
"logprob"
:
-10.125469207763672
,
"rank"
:
3
,
"decoded_token"
:
" W"
},
"2409"
:
{
"logprob"
:
-10.375469207763672
,
"rank"
:
4
,
"decoded_token"
:
" This"
},
"12232"
:
{
"logprob"
:
-10.500469207763672
,
"rank"
:
5
,
"decoded_token"
:
" Gra"
}},
{
"53301"
:
{
"logprob"
:
-0.35120296478271484
,
"rank"
:
1
,
"decoded_token"
:
" winding"
},
"59396"
:
{
"logprob"
:
-1.8512029647827148
,
"rank"
:
2
,
"decoded_token"
:
" gravel"
},
"2169"
:
{
"logprob"
:
-2.476202964782715
,
"rank"
:
3
,
"decoded_token"
:
" ser"
},
"54742"
:
{
"logprob"
:
-3.851202964782715
,
"rank"
:
4
,
"decoded_token"
:
" peaceful"
},
"43536"
:
{
"logprob"
:
-5.101202964782715
,
"rank"
:
5
,
"decoded_token"
:
" curved"
}},
{
"59396"
:
{
"logprob"
:
-0.2955280840396881
,
"rank"
:
1
,
"decoded_token"
:
" gravel"
},
"3549"
:
{
"logprob"
:
-1.6705280542373657
,
"rank"
:
2
,
"decoded_token"
:
" path"
},
"14801"
:
{
"logprob"
:
-2.7955281734466553
,
"rank"
:
3
,
"decoded_token"
:
" pathway"
},
"1044"
:
{
"logprob"
:
-6.420527935028076
,
"rank"
:
4
,
"decoded_token"
:
","
},
"18341"
:
{
"logprob"
:
-6.670527935028076
,
"rank"
:
5
,
"decoded_token"
:
" pathways"
}},
{
"3549"
:
{
"logprob"
:
-0.03408379852771759
,
"rank"
:
1
,
"decoded_token"
:
" path"
},
"14801"
:
{
"logprob"
:
-3.409083843231201
,
"rank"
:
2
,
"decoded_token"
:
" pathway"
},
"18341"
:
{
"logprob"
:
-8.284083366394043
,
"rank"
:
3
,
"decoded_token"
:
" pathways"
},
"1505"
:
{
"logprob"
:
-9.534083366394043
,
"rank"
:
4
,
"decoded_token"
:
" or"
},
"7368"
:
{
"logprob"
:
-10.659083366394043
,
"rank"
:
5
,
"decoded_token"
:
"path"
}},
{
"1294"
:
{
"logprob"
:
-1.0857839584350586
,
"rank"
:
1
,
"decoded_token"
:
" in"
},
"13335"
:
{
"logprob"
:
-1.4607839584350586
,
"rank"
:
2
,
"decoded_token"
:
" leads"
},
"2645"
:
{
"logprob"
:
-1.9607839584350586
,
"rank"
:
3
,
"decoded_token"
:
" through"
},
"29817"
:
{
"logprob"
:
-2.4607839584350586
,
"rank"
:
4
,
"decoded_token"
:
" surrounded"
},
"22416"
:
{
"logprob"
:
-3.2107839584350586
,
"rank"
:
5
,
"decoded_token"
:
" curves"
}},
{
"1261"
:
{
"logprob"
:
-0.00011705666838679463
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-9.500117301940918
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"1278"
:
{
"logprob"
:
-10.250117301940918
,
"rank"
:
3
,
"decoded_token"
:
" the"
},
"2549"
:
{
"logprob"
:
-12.750117301940918
,
"rank"
:
4
,
"decoded_token"
:
" what"
},
"11223"
:
{
"logprob"
:
-13.750117301940918
,
"rank"
:
5
,
"decoded_token"
:
" green"
}},
{
"12097"
:
{
"logprob"
:
-0.02791696786880493
,
"rank"
:
1
,
"decoded_token"
:
" park"
},
"2169"
:
{
"logprob"
:
-4.65291690826416
,
"rank"
:
2
,
"decoded_token"
:
" ser"
},
"1295"
:
{
"logprob"
:
-4.65291690826416
,
"rank"
:
3
,
"decoded_token"
:
" l"
},
"23170"
:
{
"logprob"
:
-5.27791690826416
,
"rank"
:
4
,
"decoded_token"
:
" grass"
},
"26428"
:
{
"logprob"
:
-6.52791690826416
,
"rank"
:
5
,
"decoded_token"
:
" garden"
}},
{
"1044"
:
{
"logprob"
:
-1.350893259048462
,
"rank"
:
1
,
"decoded_token"
:
","
},
"1395"
:
{
"logprob"
:
-1.600893259048462
,
"rank"
:
2
,
"decoded_token"
:
" is"
},
"29817"
:
{
"logprob"
:
-2.350893259048462
,
"rank"
:
3
,
"decoded_token"
:
" surrounded"
},
"121313"
:
{
"logprob"
:
-2.475893259048462
,
"rank"
:
4
,
"decoded_token"
:
" flanked"
},
"1454"
:
{
"logprob"
:
-2.475893259048462
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"121040"
:
{
"logprob"
:
-0.710591197013855
,
"rank"
:
1
,
"decoded_token"
:
" bordered"
},
"121313"
:
{
"logprob"
:
-1.085591197013855
,
"rank"
:
2
,
"decoded_token"
:
" flanked"
},
"54410"
:
{
"logprob"
:
-1.960591197013855
,
"rank"
:
3
,
"decoded_token"
:
" lined"
},
"29817"
:
{
"logprob"
:
-3.8355913162231445
,
"rank"
:
4
,
"decoded_token"
:
" surrounded"
},
"1454"
:
{
"logprob"
:
-5.8355913162231445
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"1536"
:
{
"logprob"
:
-4.6491513785440475e-06
,
"rank"
:
1
,
"decoded_token"
:
" by"
},
"1454"
:
{
"logprob"
:
-12.375004768371582
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1408"
:
{
"logprob"
:
-15.812504768371582
,
"rank"
:
3
,
"decoded_token"
:
" on"
},
"3326"
:
{
"logprob"
:
-16.875003814697266
,
"rank"
:
4
,
"decoded_token"
:
"by"
},
"1295"
:
{
"logprob"
:
-16.875003814697266
,
"rank"
:
5
,
"decoded_token"
:
" l"
}},
{
"11223"
:
{
"logprob"
:
-0.4314780533313751
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"1295"
:
{
"logprob"
:
-1.4314780235290527
,
"rank"
:
2
,
"decoded_token"
:
" l"
},
"23170"
:
{
"logprob"
:
-2.4314780235290527
,
"rank"
:
3
,
"decoded_token"
:
" grass"
},
"17744"
:
{
"logprob"
:
-4.806478023529053
,
"rank"
:
4
,
"decoded_token"
:
" blo"
},
"95612"
:
{
"logprob"
:
-5.181478023529053
,
"rank"
:
5
,
"decoded_token"
:
" vibrant"
}},
{
"23170"
:
{
"logprob"
:
-0.00035041390219703317
,
"rank"
:
1
,
"decoded_token"
:
" grass"
},
"69230"
:
{
"logprob"
:
-8.125349998474121
,
"rank"
:
2
,
"decoded_token"
:
" lawn"
},
"128633"
:
{
"logprob"
:
-10.750349998474121
,
"rank"
:
3
,
"decoded_token"
:
" grasses"
},
"87781"
:
{
"logprob"
:
-11.437849998474121
,
"rank"
:
4
,
"decoded_token"
:
"
\u
8349"
},
"16429"
:
{
"logprob"
:
-11.437849998474121
,
"rank"
:
5
,
"decoded_token"
:
" trees"
}},
{
"1321"
:
{
"logprob"
:
-0.0009494088008068502
,
"rank"
:
1
,
"decoded_token"
:
" and"
},
"1044"
:
{
"logprob"
:
-7.125949382781982
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-9.25094985961914
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"2425"
:
{
"logprob"
:
-11.75094985961914
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"1046"
:
{
"logprob"
:
-11.75094985961914
,
"rank"
:
5
,
"decoded_token"
:
"."
}},
{
"17744"
:
{
"logprob"
:
-0.21488544344902039
,
"rank"
:
1
,
"decoded_token"
:
" blo"
},
"105368"
:
{
"logprob"
:
-1.8398854732513428
,
"rank"
:
2
,
"decoded_token"
:
" bloss"
},
"87833"
:
{
"logprob"
:
-3.8398854732513428
,
"rank"
:
3
,
"decoded_token"
:
" flowering"
},
"16429"
:
{
"logprob"
:
-4.464885234832764
,
"rank"
:
4
,
"decoded_token"
:
" trees"
},
"117207"
:
{
"logprob"
:
-7.589885234832764
,
"rank"
:
5
,
"decoded_token"
:
" bloom"
}},
{
"34941"
:
{
"logprob"
:
-7.152555099310121e-07
,
"rank"
:
1
,
"decoded_token"
:
"oming"
},
"35974"
:
{
"logprob"
:
-14.375000953674316
,
"rank"
:
2
,
"decoded_token"
:
"omed"
},
"6325"
:
{
"logprob"
:
-16.5625
,
"rank"
:
3
,
"decoded_token"
:
"oms"
},
"11009"
:
{
"logprob"
:
-17.625
,
"rank"
:
4
,
"decoded_token"
:
"omy"
},
"9457"
:
{
"logprob"
:
-18.875
,
"rank"
:
5
,
"decoded_token"
:
"ming"
}},
{
"16429"
:
{
"logprob"
:
-0.002424398437142372
,
"rank"
:
1
,
"decoded_token"
:
" trees"
},
"103796"
:
{
"logprob"
:
-6.627424240112305
,
"rank"
:
2
,
"decoded_token"
:
" cherry"
},
"32152"
:
{
"logprob"
:
-7.377424240112305
,
"rank"
:
3
,
"decoded_token"
:
" flowers"
},
"29151"
:
{
"logprob"
:
-9.314924240112305
,
"rank"
:
4
,
"decoded_token"
:
" shr"
},
"20370"
:
{
"logprob"
:
-9.564924240112305
,
"rank"
:
5
,
"decoded_token"
:
" fruit"
}},
{
"2425"
:
{
"logprob"
:
-0.3792523741722107
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1046"
:
{
"logprob"
:
-1.3792524337768555
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"3675"
:
{
"logprob"
:
-2.8792524337768555
,
"rank"
:
3
,
"decoded_token"
:
" against"
},
"1044"
:
{
"logprob"
:
-5.1292524337768555
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-7.2542524337768555
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"1261"
:
{
"logprob"
:
-0.0002315968304174021
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1278"
:
{
"logprob"
:
-8.875231742858887
,
"rank"
:
2
,
"decoded_token"
:
" the"
},
"10991"
:
{
"logprob"
:
-9.875231742858887
,
"rank"
:
3
,
"decoded_token"
:
" blue"
},
"6133"
:
{
"logprob"
:
-10.375231742858887
,
"rank"
:
4
,
"decoded_token"
:
" clear"
},
"1420"
:
{
"logprob"
:
-12.250231742858887
,
"rank"
:
5
,
"decoded_token"
:
" an"
}},
{
"10991"
:
{
"logprob"
:
-0.6372600197792053
,
"rank"
:
1
,
"decoded_token"
:
" blue"
},
"6133"
:
{
"logprob"
:
-0.7622600197792053
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"18168"
:
{
"logprob"
:
-5.3872599601745605
,
"rank"
:
3
,
"decoded_token"
:
" bright"
},
"105573"
:
{
"logprob"
:
-10.012260437011719
,
"rank"
:
4
,
"decoded_token"
:
" sunny"
},
"15330"
:
{
"logprob"
:
-11.512260437011719
,
"rank"
:
5
,
"decoded_token"
:
" Blue"
}},
{
"21283"
:
{
"logprob"
:
-6.12716976320371e-05
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"1044"
:
{
"logprob"
:
-9.87506103515625
,
"rank"
:
2
,
"decoded_token"
:
","
},
"19673"
:
{
"logprob"
:
-12.00006103515625
,
"rank"
:
3
,
"decoded_token"
:
" Sky"
},
"1321"
:
{
"logprob"
:
-13.31256103515625
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"124968"
:
{
"logprob"
:
-14.81256103515625
,
"rank"
:
5
,
"decoded_token"
:
" skies"
}},
{
"1046"
:
{
"logprob"
:
-0.00013982271775603294
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"2"
:
{
"logprob"
:
-9.500140190124512
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1626"
:
{
"logprob"
:
-10.000140190124512
,
"rank"
:
3
,
"decoded_token"
:
".
\n\n
"
},
"1338"
:
{
"logprob"
:
-11.750140190124512
,
"rank"
:
4
,
"decoded_token"
:
" with"
}},
{
"2"
:
{
"logprob"
:
-0.0004533693427219987
,
"rank"
:
1
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-7.750453472137451
,
"rank"
:
2
,
"decoded_token"
:
" Each"
},
"1256"
:
{
"logprob"
:
-11.125452995300293
,
"rank"
:
3
,
"decoded_token"
:
" This"
}}]]]
\ No newline at end of file
tests/models/fixtures/pixtral_chat_engine.json
deleted
100644 → 0
View file @
389ebcf7
[[[
1784
,
3937
,
6122
,
1261
,
7244
,
10575
,
18970
,
1408
,
1261
,
32656
,
4691
,
1046
,
2
],
"The image shows a black dog sitting on a wooden surface."
,
[{
"1784"
:
{
"logprob"
:
-0.11685245484113693
,
"rank"
:
1
,
"decoded_token"
:
"The"
},
"4380"
:
{
"logprob"
:
-2.3668525218963623
,
"rank"
:
2
,
"decoded_token"
:
"This"
},
"1049"
:
{
"logprob"
:
-4.741852283477783
,
"rank"
:
3
,
"decoded_token"
:
"1"
},
"117991"
:
{
"logprob"
:
-5.991852283477783
,
"rank"
:
4
,
"decoded_token"
:
"Certain"
},
"1785"
:
{
"logprob"
:
-5.991852283477783
,
"rank"
:
5
,
"decoded_token"
:
"In"
}},
{
"3937"
:
{
"logprob"
:
-0.2591013014316559
,
"rank"
:
1
,
"decoded_token"
:
" image"
},
"2158"
:
{
"logprob"
:
-1.5091012716293335
,
"rank"
:
2
,
"decoded_token"
:
" first"
},
"3977"
:
{
"logprob"
:
-5.884101390838623
,
"rank"
:
3
,
"decoded_token"
:
" top"
},
"7244"
:
{
"logprob"
:
-6.259101390838623
,
"rank"
:
4
,
"decoded_token"
:
" black"
},
"8061"
:
{
"logprob"
:
-6.759101390838623
,
"rank"
:
5
,
"decoded_token"
:
" images"
}},
{
"6122"
:
{
"logprob"
:
-0.9660423994064331
,
"rank"
:
1
,
"decoded_token"
:
" shows"
},
"51948"
:
{
"logprob"
:
-1.466042399406433
,
"rank"
:
2
,
"decoded_token"
:
" depicts"
},
"6971"
:
{
"logprob"
:
-1.466042399406433
,
"rank"
:
3
,
"decoded_token"
:
" features"
},
"25981"
:
{
"logprob"
:
-2.8410425186157227
,
"rank"
:
4
,
"decoded_token"
:
" displays"
},
"8688"
:
{
"logprob"
:
-2.8410425186157227
,
"rank"
:
5
,
"decoded_token"
:
" contains"
}},
{
"1261"
:
{
"logprob"
:
-0.0030613720882683992
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-6.253061294555664
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"2295"
:
{
"logprob"
:
-7.878061294555664
,
"rank"
:
3
,
"decoded_token"
:
" two"
},
"2342"
:
{
"logprob"
:
-7.878061294555664
,
"rank"
:
4
,
"decoded_token"
:
" only"
},
"1278"
:
{
"logprob"
:
-8.628061294555664
,
"rank"
:
5
,
"decoded_token"
:
" the"
}},
{
"7244"
:
{
"logprob"
:
-0.17649099230766296
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"6231"
:
{
"logprob"
:
-2.3014910221099854
,
"rank"
:
2
,
"decoded_token"
:
" close"
},
"4249"
:
{
"logprob"
:
-3.4264910221099854
,
"rank"
:
3
,
"decoded_token"
:
" single"
},
"4329"
:
{
"logprob"
:
-5.113990783691406
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"10575"
:
{
"logprob"
:
-5.176490783691406
,
"rank"
:
5
,
"decoded_token"
:
" dog"
}},
{
"10575"
:
{
"logprob"
:
-0.10929587483406067
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.4842958450317383
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-4.109295845031738
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"15812"
:
{
"logprob"
:
-7.296795845031738
,
"rank"
:
4
,
"decoded_token"
:
" Lab"
},
"7990"
:
{
"logprob"
:
-7.484295845031738
,
"rank"
:
5
,
"decoded_token"
:
" cat"
}},
{
"18970"
:
{
"logprob"
:
-0.830376148223877
,
"rank"
:
1
,
"decoded_token"
:
" sitting"
},
"1454"
:
{
"logprob"
:
-1.580376148223877
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"28528"
:
{
"logprob"
:
-1.955376148223877
,
"rank"
:
3
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-2.205376148223877
,
"rank"
:
4
,
"decoded_token"
:
" looking"
},
"15866"
:
{
"logprob"
:
-3.017876148223877
,
"rank"
:
5
,
"decoded_token"
:
" standing"
}},
{
"1408"
:
{
"logprob"
:
-0.08554735779762268
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"1321"
:
{
"logprob"
:
-3.71054744720459
,
"rank"
:
2
,
"decoded_token"
:
" and"
},
"3675"
:
{
"logprob"
:
-3.96054744720459
,
"rank"
:
3
,
"decoded_token"
:
" against"
},
"41132"
:
{
"logprob"
:
-4.71054744720459
,
"rank"
:
4
,
"decoded_token"
:
" attent"
},
"1454"
:
{
"logprob"
:
-5.08554744720459
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"1261"
:
{
"logprob"
:
-0.540847897529602
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"32656"
:
{
"logprob"
:
-0.915847897529602
,
"rank"
:
2
,
"decoded_token"
:
" wooden"
},
"12603"
:
{
"logprob"
:
-5.4158477783203125
,
"rank"
:
3
,
"decoded_token"
:
" wood"
},
"3977"
:
{
"logprob"
:
-5.4158477783203125
,
"rank"
:
4
,
"decoded_token"
:
" top"
},
"17253"
:
{
"logprob"
:
-6.2908477783203125
,
"rank"
:
5
,
"decoded_token"
:
" weather"
}},
{
"32656"
:
{
"logprob"
:
-0.025753861293196678
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"44130"
:
{
"logprob"
:
-4.400753974914551
,
"rank"
:
2
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-5.275753974914551
,
"rank"
:
3
,
"decoded_token"
:
" wood"
},
"3403"
:
{
"logprob"
:
-5.400753974914551
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"17253"
:
{
"logprob"
:
-6.963253974914551
,
"rank"
:
5
,
"decoded_token"
:
" weather"
}},
{
"4691"
:
{
"logprob"
:
-0.7265751957893372
,
"rank"
:
1
,
"decoded_token"
:
" surface"
},
"11237"
:
{
"logprob"
:
-0.8515751957893372
,
"rank"
:
2
,
"decoded_token"
:
" floor"
},
"7042"
:
{
"logprob"
:
-2.6015751361846924
,
"rank"
:
3
,
"decoded_token"
:
" background"
},
"28984"
:
{
"logprob"
:
-5.2265753746032715
,
"rank"
:
4
,
"decoded_token"
:
" deck"
},
"1615"
:
{
"logprob"
:
-5.7265753746032715
,
"rank"
:
5
,
"decoded_token"
:
" pl"
}},
{
"1046"
:
{
"logprob"
:
-0.4868825674057007
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-1.9868825674057007
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1321"
:
{
"logprob"
:
-2.3618826866149902
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1454"
:
{
"logprob"
:
-2.6118826866149902
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"7283"
:
{
"logprob"
:
-2.7368826866149902
,
"rank"
:
5
,
"decoded_token"
:
" looking"
}},
{
"2"
:
{
"logprob"
:
-0.0026643513701856136
,
"rank"
:
1
,
"decoded_token"
:
"</s>"
},
"1531"
:
{
"logprob"
:
-6.502664566040039
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1032"
:
{
"logprob"
:
-6.877664566040039
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"3730"
:
{
"logprob"
:
-9.752664566040039
,
"rank"
:
4
,
"decoded_token"
:
" There"
},
"1256"
:
{
"logprob"
:
-11.002664566040039
,
"rank"
:
5
,
"decoded_token"
:
" "
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
1454
,
2327
,
94766
,
32961
,
53048
,
41132
,
3923
,
1408
,
1261
,
32656
,
4691
,
1626
,
1050
,
1046
,
1349
,
15375
,
24361
,
4521
,
94973
,
5669
,
1278
,
48932
,
2425
,
1261
,
16152
,
1121
,
21283
,
1046
,
2
],
"1. A black dog with floppy ears sits attentively on a wooden surface.
\n
2. A vast mountain range stretches across the horizon under a cloudy sky."
,
[{
"1049"
:
{
"logprob"
:
-0.42824622988700867
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"1045"
:
{
"logprob"
:
-1.553246259689331
,
"rank"
:
2
,
"decoded_token"
:
"-"
},
"1065"
:
{
"logprob"
:
-2.428246259689331
,
"rank"
:
3
,
"decoded_token"
:
"A"
},
"1784"
:
{
"logprob"
:
-4.053246021270752
,
"rank"
:
4
,
"decoded_token"
:
"The"
},
"69957"
:
{
"logprob"
:
-4.428246021270752
,
"rank"
:
5
,
"decoded_token"
:
"Sure"
}},
{
"1046"
:
{
"logprob"
:
-1.811964830267243e-05
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1058"
:
{
"logprob"
:
-11.875018119812012
,
"rank"
:
2
,
"decoded_token"
:
":"
},
"3590"
:
{
"logprob"
:
-12.250018119812012
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"1065"
:
{
"logprob"
:
-13.062518119812012
,
"rank"
:
4
,
"decoded_token"
:
"A"
},
"1041"
:
{
"logprob"
:
-13.750018119812012
,
"rank"
:
5
,
"decoded_token"
:
")"
}},
{
"1349"
:
{
"logprob"
:
-0.13647246360778809
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1429"
:
{
"logprob"
:
-2.386472463607788
,
"rank"
:
2
,
"decoded_token"
:
"
\"
"
},
"1603"
:
{
"logprob"
:
-3.886472463607788
,
"rank"
:
3
,
"decoded_token"
:
" **"
},
"11967"
:
{
"logprob"
:
-5.011472702026367
,
"rank"
:
4
,
"decoded_token"
:
" Image"
},
"1531"
:
{
"logprob"
:
-5.011472702026367
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"7244"
:
{
"logprob"
:
-0.18561004102230072
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"38462"
:
{
"logprob"
:
-3.185610055923462
,
"rank"
:
2
,
"decoded_token"
:
" curious"
},
"68076"
:
{
"logprob"
:
-3.623110055923462
,
"rank"
:
3
,
"decoded_token"
:
" cute"
},
"4329"
:
{
"logprob"
:
-3.935610055923462
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"74168"
:
{
"logprob"
:
-4.373109817504883
,
"rank"
:
5
,
"decoded_token"
:
" gloss"
}},
{
"10575"
:
{
"logprob"
:
-0.17297746241092682
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.1729774475097656
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-3.1729774475097656
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"15812"
:
{
"logprob"
:
-6.985477447509766
,
"rank"
:
4
,
"decoded_token"
:
" Lab"
},
"8636"
:
{
"logprob"
:
-7.360477447509766
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"1454"
:
{
"logprob"
:
-0.5785807967185974
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"53048"
:
{
"logprob"
:
-1.2660808563232422
,
"rank"
:
2
,
"decoded_token"
:
" sits"
},
"1395"
:
{
"logprob"
:
-3.016080856323242
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"22524"
:
{
"logprob"
:
-3.578580856323242
,
"rank"
:
4
,
"decoded_token"
:
" lies"
},
"18970"
:
{
"logprob"
:
-3.703580856323242
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"2327"
:
{
"logprob"
:
-1.2709298133850098
,
"rank"
:
1
,
"decoded_token"
:
" fl"
},
"1261"
:
{
"logprob"
:
-1.3959298133850098
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"17300"
:
{
"logprob"
:
-1.8959298133850098
,
"rank"
:
3
,
"decoded_token"
:
" soul"
},
"100089"
:
{
"logprob"
:
-2.6459298133850098
,
"rank"
:
4
,
"decoded_token"
:
" expressive"
},
"6444"
:
{
"logprob"
:
-3.1459298133850098
,
"rank"
:
5
,
"decoded_token"
:
" soft"
}},
{
"94766"
:
{
"logprob"
:
-0.002432247158139944
,
"rank"
:
1
,
"decoded_token"
:
"oppy"
},
"124603"
:
{
"logprob"
:
-6.377432346343994
,
"rank"
:
2
,
"decoded_token"
:
"uffy"
},
"1484"
:
{
"logprob"
:
-7.877432346343994
,
"rank"
:
3
,
"decoded_token"
:
"op"
},
"24897"
:
{
"logprob"
:
-8.877431869506836
,
"rank"
:
4
,
"decoded_token"
:
"appy"
},
"102477"
:
{
"logprob"
:
-9.752431869506836
,
"rank"
:
5
,
"decoded_token"
:
"opping"
}},
{
"32961"
:
{
"logprob"
:
-5.113947918289341e-05
,
"rank"
:
1
,
"decoded_token"
:
" ears"
},
"16962"
:
{
"logprob"
:
-11.312551498413086
,
"rank"
:
2
,
"decoded_token"
:
" ear"
},
"5731"
:
{
"logprob"
:
-11.750051498413086
,
"rank"
:
3
,
"decoded_token"
:
" eyes"
},
"3351"
:
{
"logprob"
:
-12.000051498413086
,
"rank"
:
4
,
"decoded_token"
:
" years"
},
"42071"
:
{
"logprob"
:
-13.000051498413086
,
"rank"
:
5
,
"decoded_token"
:
" cheeks"
}},
{
"53048"
:
{
"logprob"
:
-0.6131591200828552
,
"rank"
:
1
,
"decoded_token"
:
" sits"
},
"10637"
:
{
"logprob"
:
-1.9881591796875
,
"rank"
:
2
,
"decoded_token"
:
" looks"
},
"1321"
:
{
"logprob"
:
-2.4256591796875
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1395"
:
{
"logprob"
:
-2.6756591796875
,
"rank"
:
4
,
"decoded_token"
:
" is"
},
"18970"
:
{
"logprob"
:
-3.0506591796875
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"41132"
:
{
"logprob"
:
-0.36187249422073364
,
"rank"
:
1
,
"decoded_token"
:
" attent"
},
"1408"
:
{
"logprob"
:
-2.361872434616089
,
"rank"
:
2
,
"decoded_token"
:
" on"
},
"106534"
:
{
"logprob"
:
-2.424372434616089
,
"rank"
:
3
,
"decoded_token"
:
" calmly"
},
"12276"
:
{
"logprob"
:
-2.611872434616089
,
"rank"
:
4
,
"decoded_token"
:
" alert"
},
"6482"
:
{
"logprob"
:
-5.174372673034668
,
"rank"
:
5
,
"decoded_token"
:
" patient"
}},
{
"3923"
:
{
"logprob"
:
-8.451581379631534e-05
,
"rank"
:
1
,
"decoded_token"
:
"ively"
},
"1556"
:
{
"logprob"
:
-9.50008487701416
,
"rank"
:
2
,
"decoded_token"
:
"ive"
},
"6655"
:
{
"logprob"
:
-11.87508487701416
,
"rank"
:
3
,
"decoded_token"
:
"atively"
},
"3929"
:
{
"logprob"
:
-14.00008487701416
,
"rank"
:
4
,
"decoded_token"
:
"ently"
},
"47885"
:
{
"logprob"
:
-14.75008487701416
,
"rank"
:
5
,
"decoded_token"
:
"edly"
}},
{
"1408"
:
{
"logprob"
:
-0.058125678449869156
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3675"
:
{
"logprob"
:
-3.1831257343292236
,
"rank"
:
2
,
"decoded_token"
:
" against"
},
"1294"
:
{
"logprob"
:
-4.9331254959106445
,
"rank"
:
3
,
"decoded_token"
:
" in"
},
"7283"
:
{
"logprob"
:
-5.8081254959106445
,
"rank"
:
4
,
"decoded_token"
:
" looking"
},
"1044"
:
{
"logprob"
:
-5.9331254959106445
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1261"
:
{
"logprob"
:
-0.21029606461524963
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"32656"
:
{
"logprob"
:
-1.7102960348129272
,
"rank"
:
2
,
"decoded_token"
:
" wooden"
},
"17253"
:
{
"logprob"
:
-5.710296154022217
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"44130"
:
{
"logprob"
:
-6.085296154022217
,
"rank"
:
4
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-6.960296154022217
,
"rank"
:
5
,
"decoded_token"
:
" wood"
}},
{
"32656"
:
{
"logprob"
:
-0.08548421412706375
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"44130"
:
{
"logprob"
:
-2.710484266281128
,
"rank"
:
2
,
"decoded_token"
:
" rust"
},
"17253"
:
{
"logprob"
:
-4.710484027862549
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"12603"
:
{
"logprob"
:
-5.960484027862549
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"3403"
:
{
"logprob"
:
-5.960484027862549
,
"rank"
:
5
,
"decoded_token"
:
" text"
}},
{
"4691"
:
{
"logprob"
:
-0.7172377109527588
,
"rank"
:
1
,
"decoded_token"
:
" surface"
},
"11237"
:
{
"logprob"
:
-0.8422377109527588
,
"rank"
:
2
,
"decoded_token"
:
" floor"
},
"7042"
:
{
"logprob"
:
-2.842237710952759
,
"rank"
:
3
,
"decoded_token"
:
" background"
},
"28984"
:
{
"logprob"
:
-4.21723747253418
,
"rank"
:
4
,
"decoded_token"
:
" deck"
},
"92504"
:
{
"logprob"
:
-6.21723747253418
,
"rank"
:
5
,
"decoded_token"
:
" backdrop"
}},
{
"1626"
:
{
"logprob"
:
-0.12971943616867065
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-2.3797194957733154
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1046"
:
{
"logprob"
:
-4.129719257354736
,
"rank"
:
3
,
"decoded_token"
:
"."
},
"1338"
:
{
"logprob"
:
-5.129719257354736
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"7283"
:
{
"logprob"
:
-5.504719257354736
,
"rank"
:
5
,
"decoded_token"
:
" looking"
}},
{
"1050"
:
{
"logprob"
:
-0.00015698630886618048
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1256"
:
{
"logprob"
:
-9.125157356262207
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-10.875157356262207
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-11.750157356262207
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1051"
:
{
"logprob"
:
-12.125157356262207
,
"rank"
:
5
,
"decoded_token"
:
"3"
}},
{
"1046"
:
{
"logprob"
:
-6.6756979322235566e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.062506675720215
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"1626"
:
{
"logprob"
:
-13.187506675720215
,
"rank"
:
3
,
"decoded_token"
:
".
\n
"
},
"1338"
:
{
"logprob"
:
-14.750006675720215
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1058"
:
{
"logprob"
:
-14.937506675720215
,
"rank"
:
5
,
"decoded_token"
:
":"
}},
{
"1349"
:
{
"logprob"
:
-0.5863217115402222
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-1.4613217115402222
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"37159"
:
{
"logprob"
:
-2.2113218307495117
,
"rank"
:
3
,
"decoded_token"
:
" Snow"
},
"113465"
:
{
"logprob"
:
-3.8988218307495117
,
"rank"
:
4
,
"decoded_token"
:
" Rug"
},
"1531"
:
{
"logprob"
:
-3.9613218307495117
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"15375"
:
{
"logprob"
:
-0.639299213886261
,
"rank"
:
1
,
"decoded_token"
:
" vast"
},
"37849"
:
{
"logprob"
:
-2.014299154281616
,
"rank"
:
2
,
"decoded_token"
:
" breat"
},
"61082"
:
{
"logprob"
:
-2.389299154281616
,
"rank"
:
3
,
"decoded_token"
:
" panor"
},
"10726"
:
{
"logprob"
:
-3.139299154281616
,
"rank"
:
4
,
"decoded_token"
:
" scen"
},
"2169"
:
{
"logprob"
:
-3.201799154281616
,
"rank"
:
5
,
"decoded_token"
:
" ser"
}},
{
"24361"
:
{
"logprob"
:
-0.702845573425293
,
"rank"
:
1
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-1.952845573425293
,
"rank"
:
2
,
"decoded_token"
:
" mountainous"
},
"1044"
:
{
"logprob"
:
-2.077845573425293
,
"rank"
:
3
,
"decoded_token"
:
","
},
"4521"
:
{
"logprob"
:
-2.327845573425293
,
"rank"
:
4
,
"decoded_token"
:
" range"
},
"28035"
:
{
"logprob"
:
-2.452845573425293
,
"rank"
:
5
,
"decoded_token"
:
" landscape"
}},
{
"4521"
:
{
"logprob"
:
-0.07058162242174149
,
"rank"
:
1
,
"decoded_token"
:
" range"
},
"28035"
:
{
"logprob"
:
-2.6955816745758057
,
"rank"
:
2
,
"decoded_token"
:
" landscape"
},
"37691"
:
{
"logprob"
:
-8.320581436157227
,
"rank"
:
3
,
"decoded_token"
:
" valley"
},
"12248"
:
{
"logprob"
:
-9.445581436157227
,
"rank"
:
4
,
"decoded_token"
:
" peak"
},
"13327"
:
{
"logprob"
:
-9.695581436157227
,
"rank"
:
5
,
"decoded_token"
:
" scene"
}},
{
"94973"
:
{
"logprob"
:
-1.1164050102233887
,
"rank"
:
1
,
"decoded_token"
:
" stretches"
},
"1454"
:
{
"logprob"
:
-1.1789050102233887
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"2425"
:
{
"logprob"
:
-1.8664050102233887
,
"rank"
:
3
,
"decoded_token"
:
" under"
},
"1395"
:
{
"logprob"
:
-2.5539050102233887
,
"rank"
:
4
,
"decoded_token"
:
" is"
},
"13875"
:
{
"logprob"
:
-2.9914050102233887
,
"rank"
:
5
,
"decoded_token"
:
" covered"
}},
{
"5669"
:
{
"logprob"
:
-0.3286789357662201
,
"rank"
:
1
,
"decoded_token"
:
" across"
},
"1848"
:
{
"logprob"
:
-2.078678846359253
,
"rank"
:
2
,
"decoded_token"
:
" out"
},
"2425"
:
{
"logprob"
:
-2.328678846359253
,
"rank"
:
3
,
"decoded_token"
:
" under"
},
"2203"
:
{
"logprob"
:
-3.328678846359253
,
"rank"
:
4
,
"decoded_token"
:
" into"
},
"8994"
:
{
"logprob"
:
-4.766179084777832
,
"rank"
:
5
,
"decoded_token"
:
" towards"
}},
{
"1278"
:
{
"logprob"
:
-0.039004355669021606
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-3.289004325866699
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-7.414004325866699
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"2425"
:
{
"logprob"
:
-9.0390043258667
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"1454"
:
{
"logprob"
:
-9.2265043258667
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"48932"
:
{
"logprob"
:
-0.2659883201122284
,
"rank"
:
1
,
"decoded_token"
:
" horizon"
},
"21283"
:
{
"logprob"
:
-2.140988349914551
,
"rank"
:
2
,
"decoded_token"
:
" sky"
},
"3937"
:
{
"logprob"
:
-3.015988349914551
,
"rank"
:
3
,
"decoded_token"
:
" image"
},
"28035"
:
{
"logprob"
:
-3.515988349914551
,
"rank"
:
4
,
"decoded_token"
:
" landscape"
},
"3044"
:
{
"logprob"
:
-4.265988349914551
,
"rank"
:
5
,
"decoded_token"
:
" sk"
}},
{
"2425"
:
{
"logprob"
:
-0.5356141328811646
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1044"
:
{
"logprob"
:
-1.5356141328811646
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-1.7856141328811646
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"25136"
:
{
"logprob"
:
-3.785614013671875
,
"rank"
:
4
,
"decoded_token"
:
" beneath"
},
"1408"
:
{
"logprob"
:
-5.785614013671875
,
"rank"
:
5
,
"decoded_token"
:
" on"
}},
{
"1261"
:
{
"logprob"
:
-0.006081883795559406
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-5.506082057952881
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"16152"
:
{
"logprob"
:
-7.631082057952881
,
"rank"
:
3
,
"decoded_token"
:
" cloud"
},
"6133"
:
{
"logprob"
:
-7.881082057952881
,
"rank"
:
4
,
"decoded_token"
:
" clear"
},
"2136"
:
{
"logprob"
:
-8.006081581115723
,
"rank"
:
5
,
"decoded_token"
:
" over"
}},
{
"16152"
:
{
"logprob"
:
-0.6749536991119385
,
"rank"
:
1
,
"decoded_token"
:
" cloud"
},
"6133"
:
{
"logprob"
:
-1.4249536991119385
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"18416"
:
{
"logprob"
:
-2.8624536991119385
,
"rank"
:
3
,
"decoded_token"
:
" haz"
},
"27254"
:
{
"logprob"
:
-2.9874536991119385
,
"rank"
:
4
,
"decoded_token"
:
" partly"
},
"4391"
:
{
"logprob"
:
-3.2374536991119385
,
"rank"
:
5
,
"decoded_token"
:
" light"
}},
{
"1121"
:
{
"logprob"
:
-0.10860869288444519
,
"rank"
:
1
,
"decoded_token"
:
"y"
},
"4527"
:
{
"logprob"
:
-2.9836087226867676
,
"rank"
:
2
,
"decoded_token"
:
"less"
},
"1286"
:
{
"logprob"
:
-3.4836087226867676
,
"rank"
:
3
,
"decoded_token"
:
"ed"
},
"77187"
:
{
"logprob"
:
-4.608608722686768
,
"rank"
:
4
,
"decoded_token"
:
"-filled"
},
"114525"
:
{
"logprob"
:
-4.858608722686768
,
"rank"
:
5
,
"decoded_token"
:
"-covered"
}},
{
"21283"
:
{
"logprob"
:
-0.002785732736811042
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-6.252785682678223
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-7.627785682678223
,
"rank"
:
3
,
"decoded_token"
:
","
},
"26549"
:
{
"logprob"
:
-8.627785682678223
,
"rank"
:
4
,
"decoded_token"
:
" gray"
},
"34052"
:
{
"logprob"
:
-9.377785682678223
,
"rank"
:
5
,
"decoded_token"
:
" grey"
}},
{
"1046"
:
{
"logprob"
:
-0.047878943383693695
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-3.1728789806365967
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-5.547878742218018
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1338"
:
{
"logprob"
:
-7.172878742218018
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1294"
:
{
"logprob"
:
-9.172879219055176
,
"rank"
:
5
,
"decoded_token"
:
" in"
}},
{
"2"
:
{
"logprob"
:
-1.3351351299206726e-05
,
"rank"
:
1
,
"decoded_token"
:
"</s>"
},
"1032"
:
{
"logprob"
:
-11.25001335144043
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1256"
:
{
"logprob"
:
-16.00001335144043
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1319"
:
{
"logprob"
:
-17.25001335144043
,
"rank"
:
4
,
"decoded_token"
:
" ("
},
"1766"
:
{
"logprob"
:
-18.50001335144043
,
"rank"
:
5
,
"decoded_token"
:
" ["
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
53048
,
41132
,
3923
,
1408
,
1261
,
32656
,
11237
,
1626
,
1050
,
1046
,
1349
,
15375
,
24361
,
4521
,
94973
,
5669
,
1278
,
48932
,
2425
,
1261
,
16152
,
1121
,
21283
,
1626
,
1051
,
1046
,
8342
,
71284
,
7377
,
1394
,
22140
,
1294
,
1278
,
27208
,
1513
,
97558
,
1626
,
1052
,
1046
,
1349
,
53301
,
59396
,
3549
,
13335
,
2645
,
1261
,
1295
,
3506
,
11223
,
12097
,
1046
,
2
],
"1. A black dog sits attentively on a wooden floor.
\n
2. A vast mountain range stretches across the horizon under a cloudy sky.
\n
3. Surfers wait for waves in the ocean at sunset.
\n
4. A winding gravel path leads through a lush green park."
,
[{
"1049"
:
{
"logprob"
:
-0.05001257359981537
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"1045"
:
{
"logprob"
:
-3.1750125885009766
,
"rank"
:
2
,
"decoded_token"
:
"-"
},
"69957"
:
{
"logprob"
:
-5.925012588500977
,
"rank"
:
3
,
"decoded_token"
:
"Sure"
},
"11745"
:
{
"logprob"
:
-6.425012588500977
,
"rank"
:
4
,
"decoded_token"
:
"Here"
},
"1065"
:
{
"logprob"
:
-6.425012588500977
,
"rank"
:
5
,
"decoded_token"
:
"A"
}},
{
"1046"
:
{
"logprob"
:
-8.702239938429557e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1058"
:
{
"logprob"
:
-12.000008583068848
,
"rank"
:
2
,
"decoded_token"
:
":"
},
"3590"
:
{
"logprob"
:
-13.375008583068848
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"1041"
:
{
"logprob"
:
-14.750008583068848
,
"rank"
:
4
,
"decoded_token"
:
")"
},
"1065"
:
{
"logprob"
:
-15.687508583068848
,
"rank"
:
5
,
"decoded_token"
:
"A"
}},
{
"1349"
:
{
"logprob"
:
-0.14196155965328217
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1429"
:
{
"logprob"
:
-2.2669615745544434
,
"rank"
:
2
,
"decoded_token"
:
"
\"
"
},
"1531"
:
{
"logprob"
:
-4.516961574554443
,
"rank"
:
3
,
"decoded_token"
:
" The"
},
"11967"
:
{
"logprob"
:
-4.516961574554443
,
"rank"
:
4
,
"decoded_token"
:
" Image"
},
"1603"
:
{
"logprob"
:
-5.391961574554443
,
"rank"
:
5
,
"decoded_token"
:
" **"
}},
{
"7244"
:
{
"logprob"
:
-0.14889711141586304
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"68076"
:
{
"logprob"
:
-3.398897171020508
,
"rank"
:
2
,
"decoded_token"
:
" cute"
},
"6231"
:
{
"logprob"
:
-3.961397171020508
,
"rank"
:
3
,
"decoded_token"
:
" close"
},
"38462"
:
{
"logprob"
:
-4.273897171020508
,
"rank"
:
4
,
"decoded_token"
:
" curious"
},
"4329"
:
{
"logprob"
:
-4.398897171020508
,
"rank"
:
5
,
"decoded_token"
:
" large"
}},
{
"10575"
:
{
"logprob"
:
-0.12091328203678131
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.37091326713562
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-3.99591326713562
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"15812"
:
{
"logprob"
:
-7.683413505554199
,
"rank"
:
4
,
"decoded_token"
:
" Lab"
},
"8636"
:
{
"logprob"
:
-7.808413505554199
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"53048"
:
{
"logprob"
:
-0.8691943287849426
,
"rank"
:
1
,
"decoded_token"
:
" sits"
},
"1454"
:
{
"logprob"
:
-1.1191942691802979
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1395"
:
{
"logprob"
:
-2.431694269180298
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"18970"
:
{
"logprob"
:
-2.744194269180298
,
"rank"
:
4
,
"decoded_token"
:
" sitting"
},
"22524"
:
{
"logprob"
:
-3.681694269180298
,
"rank"
:
5
,
"decoded_token"
:
" lies"
}},
{
"41132"
:
{
"logprob"
:
-0.5939557552337646
,
"rank"
:
1
,
"decoded_token"
:
" attent"
},
"106534"
:
{
"logprob"
:
-1.2814557552337646
,
"rank"
:
2
,
"decoded_token"
:
" calmly"
},
"12276"
:
{
"logprob"
:
-2.8439557552337646
,
"rank"
:
3
,
"decoded_token"
:
" alert"
},
"1408"
:
{
"logprob"
:
-2.8439557552337646
,
"rank"
:
4
,
"decoded_token"
:
" on"
},
"6482"
:
{
"logprob"
:
-4.968955993652344
,
"rank"
:
5
,
"decoded_token"
:
" patient"
}},
{
"3923"
:
{
"logprob"
:
-0.00010084597306558862
,
"rank"
:
1
,
"decoded_token"
:
"ively"
},
"1556"
:
{
"logprob"
:
-9.500101089477539
,
"rank"
:
2
,
"decoded_token"
:
"ive"
},
"6655"
:
{
"logprob"
:
-10.875101089477539
,
"rank"
:
3
,
"decoded_token"
:
"atively"
},
"3929"
:
{
"logprob"
:
-13.000101089477539
,
"rank"
:
4
,
"decoded_token"
:
"ently"
},
"47885"
:
{
"logprob"
:
-13.750101089477539
,
"rank"
:
5
,
"decoded_token"
:
"edly"
}},
{
"1408"
:
{
"logprob"
:
-0.056158196181058884
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3675"
:
{
"logprob"
:
-3.6811583042144775
,
"rank"
:
2
,
"decoded_token"
:
" against"
},
"1454"
:
{
"logprob"
:
-4.306158065795898
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1294"
:
{
"logprob"
:
-5.181158065795898
,
"rank"
:
4
,
"decoded_token"
:
" in"
},
"7283"
:
{
"logprob"
:
-5.431158065795898
,
"rank"
:
5
,
"decoded_token"
:
" looking"
}},
{
"1261"
:
{
"logprob"
:
-0.33056098222732544
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"32656"
:
{
"logprob"
:
-1.3305609226226807
,
"rank"
:
2
,
"decoded_token"
:
" wooden"
},
"17253"
:
{
"logprob"
:
-4.70556116104126
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"44130"
:
{
"logprob"
:
-5.83056116104126
,
"rank"
:
4
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-6.58056116104126
,
"rank"
:
5
,
"decoded_token"
:
" wood"
}},
{
"32656"
:
{
"logprob"
:
-0.07081110030412674
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"44130"
:
{
"logprob"
:
-2.9458110332489014
,
"rank"
:
2
,
"decoded_token"
:
" rust"
},
"17253"
:
{
"logprob"
:
-4.6958112716674805
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"12603"
:
{
"logprob"
:
-5.8208112716674805
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"3403"
:
{
"logprob"
:
-6.0708112716674805
,
"rank"
:
5
,
"decoded_token"
:
" text"
}},
{
"11237"
:
{
"logprob"
:
-0.6428436636924744
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-1.0178437232971191
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"7042"
:
{
"logprob"
:
-2.642843723297119
,
"rank"
:
3
,
"decoded_token"
:
" background"
},
"28984"
:
{
"logprob"
:
-3.517843723297119
,
"rank"
:
4
,
"decoded_token"
:
" deck"
},
"92504"
:
{
"logprob"
:
-6.017843723297119
,
"rank"
:
5
,
"decoded_token"
:
" backdrop"
}},
{
"1626"
:
{
"logprob"
:
-0.7337945103645325
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-0.8587945103645325
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-3.3587944507598877
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"7283"
:
{
"logprob"
:
-3.6087944507598877
,
"rank"
:
4
,
"decoded_token"
:
" looking"
},
"1321"
:
{
"logprob"
:
-4.108794689178467
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1050"
:
{
"logprob"
:
-1.0132738680113107e-05
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1051"
:
{
"logprob"
:
-11.75001049041748
,
"rank"
:
2
,
"decoded_token"
:
"3"
},
"1256"
:
{
"logprob"
:
-14.00001049041748
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1049"
:
{
"logprob"
:
-14.62501049041748
,
"rank"
:
4
,
"decoded_token"
:
"1"
},
"1032"
:
{
"logprob"
:
-14.62501049041748
,
"rank"
:
5
,
"decoded_token"
:
" "
}},
{
"1046"
:
{
"logprob"
:
-2.861018856492592e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.43750286102295
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"4700"
:
{
"logprob"
:
-15.37500286102295
,
"rank"
:
3
,
"decoded_token"
:
".M"
},
"1626"
:
{
"logprob"
:
-15.37500286102295
,
"rank"
:
4
,
"decoded_token"
:
".
\n
"
},
"3051"
:
{
"logprob"
:
-15.87500286102295
,
"rank"
:
5
,
"decoded_token"
:
".S"
}},
{
"1349"
:
{
"logprob"
:
-0.6794427633285522
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-1.9294427633285522
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"37159"
:
{
"logprob"
:
-2.116942882537842
,
"rank"
:
3
,
"decoded_token"
:
" Snow"
},
"27260"
:
{
"logprob"
:
-2.616942882537842
,
"rank"
:
4
,
"decoded_token"
:
" Mountain"
},
"113465"
:
{
"logprob"
:
-2.866942882537842
,
"rank"
:
5
,
"decoded_token"
:
" Rug"
}},
{
"15375"
:
{
"logprob"
:
-0.9194075465202332
,
"rank"
:
1
,
"decoded_token"
:
" vast"
},
"10726"
:
{
"logprob"
:
-2.294407606124878
,
"rank"
:
2
,
"decoded_token"
:
" scen"
},
"4521"
:
{
"logprob"
:
-2.356907606124878
,
"rank"
:
3
,
"decoded_token"
:
" range"
},
"122203"
:
{
"logprob"
:
-2.419407606124878
,
"rank"
:
4
,
"decoded_token"
:
" rugged"
},
"61082"
:
{
"logprob"
:
-2.856907606124878
,
"rank"
:
5
,
"decoded_token"
:
" panor"
}},
{
"24361"
:
{
"logprob"
:
-0.5804797410964966
,
"rank"
:
1
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-1.8304797410964966
,
"rank"
:
2
,
"decoded_token"
:
" mountainous"
},
"28035"
:
{
"logprob"
:
-2.455479621887207
,
"rank"
:
3
,
"decoded_token"
:
" landscape"
},
"4521"
:
{
"logprob"
:
-2.455479621887207
,
"rank"
:
4
,
"decoded_token"
:
" range"
},
"1044"
:
{
"logprob"
:
-2.705479621887207
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"4521"
:
{
"logprob"
:
-0.0493546724319458
,
"rank"
:
1
,
"decoded_token"
:
" range"
},
"28035"
:
{
"logprob"
:
-3.0493545532226562
,
"rank"
:
2
,
"decoded_token"
:
" landscape"
},
"37691"
:
{
"logprob"
:
-8.424354553222656
,
"rank"
:
3
,
"decoded_token"
:
" valley"
},
"13327"
:
{
"logprob"
:
-9.049354553222656
,
"rank"
:
4
,
"decoded_token"
:
" scene"
},
"3719"
:
{
"logprob"
:
-9.799354553222656
,
"rank"
:
5
,
"decoded_token"
:
" view"
}},
{
"94973"
:
{
"logprob"
:
-0.6676871180534363
,
"rank"
:
1
,
"decoded_token"
:
" stretches"
},
"2425"
:
{
"logprob"
:
-1.792687177658081
,
"rank"
:
2
,
"decoded_token"
:
" under"
},
"1395"
:
{
"logprob"
:
-2.292687177658081
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"1454"
:
{
"logprob"
:
-2.730187177658081
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"7038"
:
{
"logprob"
:
-3.292687177658081
,
"rank"
:
5
,
"decoded_token"
:
" extends"
}},
{
"5669"
:
{
"logprob"
:
-0.4542117118835449
,
"rank"
:
1
,
"decoded_token"
:
" across"
},
"2425"
:
{
"logprob"
:
-1.454211711883545
,
"rank"
:
2
,
"decoded_token"
:
" under"
},
"1848"
:
{
"logprob"
:
-2.454211711883545
,
"rank"
:
3
,
"decoded_token"
:
" out"
},
"2203"
:
{
"logprob"
:
-4.204211711883545
,
"rank"
:
4
,
"decoded_token"
:
" into"
},
"25136"
:
{
"logprob"
:
-4.641711711883545
,
"rank"
:
5
,
"decoded_token"
:
" beneath"
}},
{
"1278"
:
{
"logprob"
:
-0.23009441792964935
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-1.6050944328308105
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-5.6050944328308105
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"2425"
:
{
"logprob"
:
-7.2300944328308105
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"1454"
:
{
"logprob"
:
-10.167593955993652
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"48932"
:
{
"logprob"
:
-0.3072167932987213
,
"rank"
:
1
,
"decoded_token"
:
" horizon"
},
"21283"
:
{
"logprob"
:
-1.932216763496399
,
"rank"
:
2
,
"decoded_token"
:
" sky"
},
"3937"
:
{
"logprob"
:
-3.1822168827056885
,
"rank"
:
3
,
"decoded_token"
:
" image"
},
"28035"
:
{
"logprob"
:
-3.6822168827056885
,
"rank"
:
4
,
"decoded_token"
:
" landscape"
},
"3044"
:
{
"logprob"
:
-3.6822168827056885
,
"rank"
:
5
,
"decoded_token"
:
" sk"
}},
{
"2425"
:
{
"logprob"
:
-0.2914469838142395
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1044"
:
{
"logprob"
:
-2.4164469242095947
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-2.5414469242095947
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1626"
:
{
"logprob"
:
-3.7914469242095947
,
"rank"
:
4
,
"decoded_token"
:
".
\n
"
},
"1408"
:
{
"logprob"
:
-3.7914469242095947
,
"rank"
:
5
,
"decoded_token"
:
" on"
}},
{
"1261"
:
{
"logprob"
:
-0.0460360012948513
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-3.9210360050201416
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"16152"
:
{
"logprob"
:
-4.1085357666015625
,
"rank"
:
3
,
"decoded_token"
:
" cloud"
},
"2136"
:
{
"logprob"
:
-6.1710357666015625
,
"rank"
:
4
,
"decoded_token"
:
" over"
},
"6133"
:
{
"logprob"
:
-6.4210357666015625
,
"rank"
:
5
,
"decoded_token"
:
" clear"
}},
{
"16152"
:
{
"logprob"
:
-0.20367540419101715
,
"rank"
:
1
,
"decoded_token"
:
" cloud"
},
"6133"
:
{
"logprob"
:
-2.8286755084991455
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"27254"
:
{
"logprob"
:
-3.5161755084991455
,
"rank"
:
3
,
"decoded_token"
:
" partly"
},
"18416"
:
{
"logprob"
:
-3.8286755084991455
,
"rank"
:
4
,
"decoded_token"
:
" haz"
},
"4391"
:
{
"logprob"
:
-4.328675270080566
,
"rank"
:
5
,
"decoded_token"
:
" light"
}},
{
"1121"
:
{
"logprob"
:
-0.05241352692246437
,
"rank"
:
1
,
"decoded_token"
:
"y"
},
"1286"
:
{
"logprob"
:
-3.8024134635925293
,
"rank"
:
2
,
"decoded_token"
:
"ed"
},
"77187"
:
{
"logprob"
:
-4.552413463592529
,
"rank"
:
3
,
"decoded_token"
:
"-filled"
},
"4527"
:
{
"logprob"
:
-4.802413463592529
,
"rank"
:
4
,
"decoded_token"
:
"less"
},
"114525"
:
{
"logprob"
:
-4.927413463592529
,
"rank"
:
5
,
"decoded_token"
:
"-covered"
}},
{
"21283"
:
{
"logprob"
:
-0.0003716255014296621
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-8.750371932983398
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-9.375371932983398
,
"rank"
:
3
,
"decoded_token"
:
","
},
"26549"
:
{
"logprob"
:
-10.375371932983398
,
"rank"
:
4
,
"decoded_token"
:
" gray"
},
"34052"
:
{
"logprob"
:
-11.250371932983398
,
"rank"
:
5
,
"decoded_token"
:
" grey"
}},
{
"1626"
:
{
"logprob"
:
-0.00012730741582345217
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-9.500126838684082
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1046"
:
{
"logprob"
:
-10.500126838684082
,
"rank"
:
3
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-10.875126838684082
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"1294"
:
{
"logprob"
:
-13.250126838684082
,
"rank"
:
5
,
"decoded_token"
:
" in"
}},
{
"1051"
:
{
"logprob"
:
-3.2186455882765586e-06
,
"rank"
:
1
,
"decoded_token"
:
"3"
},
"1052"
:
{
"logprob"
:
-12.75000286102295
,
"rank"
:
2
,
"decoded_token"
:
"4"
},
"1050"
:
{
"logprob"
:
-15.00000286102295
,
"rank"
:
3
,
"decoded_token"
:
"2"
},
"1049"
:
{
"logprob"
:
-16.937503814697266
,
"rank"
:
4
,
"decoded_token"
:
"1"
},
"1032"
:
{
"logprob"
:
-17.875003814697266
,
"rank"
:
5
,
"decoded_token"
:
" "
}},
{
"1046"
:
{
"logprob"
:
-1.6689286894688848e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-14.687501907348633
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"5226"
:
{
"logprob"
:
-15.687501907348633
,
"rank"
:
3
,
"decoded_token"
:
".D"
},
"6847"
:
{
"logprob"
:
-15.812501907348633
,
"rank"
:
4
,
"decoded_token"
:
".T"
},
"48426"
:
{
"logprob"
:
-16.812501907348633
,
"rank"
:
5
,
"decoded_token"
:
".The"
}},
{
"8342"
:
{
"logprob"
:
-0.5730464458465576
,
"rank"
:
1
,
"decoded_token"
:
" Sur"
},
"1349"
:
{
"logprob"
:
-1.6980464458465576
,
"rank"
:
2
,
"decoded_token"
:
" A"
},
"22468"
:
{
"logprob"
:
-2.5730464458465576
,
"rank"
:
3
,
"decoded_token"
:
" Several"
},
"1488"
:
{
"logprob"
:
-2.6980464458465576
,
"rank"
:
4
,
"decoded_token"
:
" W"
},
"15035"
:
{
"logprob"
:
-3.1980464458465576
,
"rank"
:
5
,
"decoded_token"
:
" People"
}},
{
"71284"
:
{
"logprob"
:
-0.0033258858602494
,
"rank"
:
1
,
"decoded_token"
:
"fers"
},
"1102"
:
{
"logprob"
:
-5.878325939178467
,
"rank"
:
2
,
"decoded_token"
:
"f"
},
"1726"
:
{
"logprob"
:
-7.628325939178467
,
"rank"
:
3
,
"decoded_token"
:
"fer"
},
"61888"
:
{
"logprob"
:
-12.253325462341309
,
"rank"
:
4
,
"decoded_token"
:
"fline"
},
"2119"
:
{
"logprob"
:
-13.003325462341309
,
"rank"
:
5
,
"decoded_token"
:
"fter"
}},
{
"7377"
:
{
"logprob"
:
-1.4996429681777954
,
"rank"
:
1
,
"decoded_token"
:
" wait"
},
"1584"
:
{
"logprob"
:
-1.7496429681777954
,
"rank"
:
2
,
"decoded_token"
:
" are"
},
"88014"
:
{
"logprob"
:
-1.9371429681777954
,
"rank"
:
3
,
"decoded_token"
:
" paddle"
},
"1294"
:
{
"logprob"
:
-1.9371429681777954
,
"rank"
:
4
,
"decoded_token"
:
" in"
},
"24434"
:
{
"logprob"
:
-2.187142848968506
,
"rank"
:
5
,
"decoded_token"
:
" ride"
}},
{
"1394"
:
{
"logprob"
:
-0.6126739382743835
,
"rank"
:
1
,
"decoded_token"
:
" for"
},
"1294"
:
{
"logprob"
:
-0.9876739382743835
,
"rank"
:
2
,
"decoded_token"
:
" in"
},
"1408"
:
{
"logprob"
:
-2.7376739978790283
,
"rank"
:
3
,
"decoded_token"
:
" on"
},
"6482"
:
{
"logprob"
:
-4.425173759460449
,
"rank"
:
4
,
"decoded_token"
:
" patient"
},
"1321"
:
{
"logprob"
:
-5.612673759460449
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"22140"
:
{
"logprob"
:
-0.00729279313236475
,
"rank"
:
1
,
"decoded_token"
:
" waves"
},
"1278"
:
{
"logprob"
:
-5.632292747497559
,
"rank"
:
2
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-5.757292747497559
,
"rank"
:
3
,
"decoded_token"
:
" a"
},
"39460"
:
{
"logprob"
:
-8.257292747497559
,
"rank"
:
4
,
"decoded_token"
:
" incoming"
},
"1321"
:
{
"logprob"
:
-9.757292747497559
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1294"
:
{
"logprob"
:
-0.3071398138999939
,
"rank"
:
1
,
"decoded_token"
:
" in"
},
"1408"
:
{
"logprob"
:
-2.1821398735046387
,
"rank"
:
2
,
"decoded_token"
:
" on"
},
"1513"
:
{
"logprob"
:
-2.4321398735046387
,
"rank"
:
3
,
"decoded_token"
:
" at"
},
"3016"
:
{
"logprob"
:
-3.6821398735046387
,
"rank"
:
4
,
"decoded_token"
:
" while"
},
"1435"
:
{
"logprob"
:
-3.8071398735046387
,
"rank"
:
5
,
"decoded_token"
:
" as"
}},
{
"1278"
:
{
"logprob"
:
-0.004646694287657738
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-6.1921467781066895
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-6.9421467781066895
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"40466"
:
{
"logprob"
:
-7.2546467781066895
,
"rank"
:
4
,
"decoded_token"
:
" shallow"
},
"26517"
:
{
"logprob"
:
-7.8796467781066895
,
"rank"
:
5
,
"decoded_token"
:
" calm"
}},
{
"27208"
:
{
"logprob"
:
-0.0658877044916153
,
"rank"
:
1
,
"decoded_token"
:
" ocean"
},
"7786"
:
{
"logprob"
:
-3.440887689590454
,
"rank"
:
2
,
"decoded_token"
:
" distance"
},
"5124"
:
{
"logprob"
:
-5.253387928009033
,
"rank"
:
3
,
"decoded_token"
:
" early"
},
"26517"
:
{
"logprob"
:
-5.315887928009033
,
"rank"
:
4
,
"decoded_token"
:
" calm"
},
"11196"
:
{
"logprob"
:
-5.378387928009033
,
"rank"
:
5
,
"decoded_token"
:
" sea"
}},
{
"1513"
:
{
"logprob"
:
-1.1504861116409302
,
"rank"
:
1
,
"decoded_token"
:
" at"
},
"1435"
:
{
"logprob"
:
-1.2754861116409302
,
"rank"
:
2
,
"decoded_token"
:
" as"
},
"3184"
:
{
"logprob"
:
-1.4004861116409302
,
"rank"
:
3
,
"decoded_token"
:
" during"
},
"3016"
:
{
"logprob"
:
-2.9004859924316406
,
"rank"
:
4
,
"decoded_token"
:
" while"
},
"6117"
:
{
"logprob"
:
-3.1504859924316406
,
"rank"
:
5
,
"decoded_token"
:
" near"
}},
{
"97558"
:
{
"logprob"
:
-0.12151996046304703
,
"rank"
:
1
,
"decoded_token"
:
" sunset"
},
"11729"
:
{
"logprob"
:
-2.8715200424194336
,
"rank"
:
2
,
"decoded_token"
:
" sun"
},
"1266"
:
{
"logprob"
:
-3.4965200424194336
,
"rank"
:
3
,
"decoded_token"
:
" d"
},
"54507"
:
{
"logprob"
:
-3.9965200424194336
,
"rank"
:
4
,
"decoded_token"
:
" dawn"
},
"1261"
:
{
"logprob"
:
-5.121520042419434
,
"rank"
:
5
,
"decoded_token"
:
" a"
}},
{
"1626"
:
{
"logprob"
:
-0.3073118329048157
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-2.182311773300171
,
"rank"
:
2
,
"decoded_token"
:
","
},
"3016"
:
{
"logprob"
:
-2.557311773300171
,
"rank"
:
3
,
"decoded_token"
:
" while"
},
"1454"
:
{
"logprob"
:
-3.432311773300171
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"6117"
:
{
"logprob"
:
-4.05731201171875
,
"rank"
:
5
,
"decoded_token"
:
" near"
}},
{
"1052"
:
{
"logprob"
:
-3.3378546504536644e-06
,
"rank"
:
1
,
"decoded_token"
:
"4"
},
"1051"
:
{
"logprob"
:
-13.25000286102295
,
"rank"
:
2
,
"decoded_token"
:
"3"
},
"1049"
:
{
"logprob"
:
-13.93750286102295
,
"rank"
:
3
,
"decoded_token"
:
"1"
},
"1053"
:
{
"logprob"
:
-14.43750286102295
,
"rank"
:
4
,
"decoded_token"
:
"5"
},
"1032"
:
{
"logprob"
:
-16.687503814697266
,
"rank"
:
5
,
"decoded_token"
:
" "
}},
{
"1046"
:
{
"logprob"
:
-1.6689286894688848e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.500001907348633
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"6847"
:
{
"logprob"
:
-16.437501907348633
,
"rank"
:
3
,
"decoded_token"
:
".T"
},
"1044"
:
{
"logprob"
:
-17.312501907348633
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1349"
:
{
"logprob"
:
-17.375001907348633
,
"rank"
:
5
,
"decoded_token"
:
" A"
}},
{
"1349"
:
{
"logprob"
:
-0.004292916506528854
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"2048"
:
{
"logprob"
:
-5.629292964935303
,
"rank"
:
2
,
"decoded_token"
:
" An"
},
"10638"
:
{
"logprob"
:
-7.879292964935303
,
"rank"
:
3
,
"decoded_token"
:
" Two"
},
"111463"
:
{
"logprob"
:
-10.004292488098145
,
"rank"
:
4
,
"decoded_token"
:
" Trees"
},
"1531"
:
{
"logprob"
:
-10.879292488098145
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"53301"
:
{
"logprob"
:
-1.5473321676254272
,
"rank"
:
1
,
"decoded_token"
:
" winding"
},
"15192"
:
{
"logprob"
:
-1.7348321676254272
,
"rank"
:
2
,
"decoded_token"
:
" narrow"
},
"47945"
:
{
"logprob"
:
-2.109832286834717
,
"rank"
:
3
,
"decoded_token"
:
" dirt"
},
"2169"
:
{
"logprob"
:
-2.609832286834717
,
"rank"
:
4
,
"decoded_token"
:
" ser"
},
"59396"
:
{
"logprob"
:
-2.672332286834717
,
"rank"
:
5
,
"decoded_token"
:
" gravel"
}},
{
"59396"
:
{
"logprob"
:
-0.8954829573631287
,
"rank"
:
1
,
"decoded_token"
:
" gravel"
},
"3549"
:
{
"logprob"
:
-1.1454830169677734
,
"rank"
:
2
,
"decoded_token"
:
" path"
},
"47945"
:
{
"logprob"
:
-1.6454830169677734
,
"rank"
:
3
,
"decoded_token"
:
" dirt"
},
"14801"
:
{
"logprob"
:
-3.2704830169677734
,
"rank"
:
4
,
"decoded_token"
:
" pathway"
},
"15551"
:
{
"logprob"
:
-4.270483016967773
,
"rank"
:
5
,
"decoded_token"
:
" stone"
}},
{
"3549"
:
{
"logprob"
:
-0.02117946185171604
,
"rank"
:
1
,
"decoded_token"
:
" path"
},
"14801"
:
{
"logprob"
:
-3.896179437637329
,
"rank"
:
2
,
"decoded_token"
:
" pathway"
},
"33659"
:
{
"logprob"
:
-8.14617919921875
,
"rank"
:
3
,
"decoded_token"
:
" trail"
},
"9480"
:
{
"logprob"
:
-9.64617919921875
,
"rank"
:
4
,
"decoded_token"
:
" road"
},
"7368"
:
{
"logprob"
:
-9.64617919921875
,
"rank"
:
5
,
"decoded_token"
:
"path"
}},
{
"13335"
:
{
"logprob"
:
-0.18962937593460083
,
"rank"
:
1
,
"decoded_token"
:
" leads"
},
"39985"
:
{
"logprob"
:
-2.752129316329956
,
"rank"
:
2
,
"decoded_token"
:
" cuts"
},
"1639"
:
{
"logprob"
:
-3.877129316329956
,
"rank"
:
3
,
"decoded_token"
:
" me"
},
"11500"
:
{
"logprob"
:
-3.939629316329956
,
"rank"
:
4
,
"decoded_token"
:
" runs"
},
"2645"
:
{
"logprob"
:
-4.189629554748535
,
"rank"
:
5
,
"decoded_token"
:
" through"
}},
{
"2645"
:
{
"logprob"
:
-0.05349981039762497
,
"rank"
:
1
,
"decoded_token"
:
" through"
},
"8994"
:
{
"logprob"
:
-4.053499698638916
,
"rank"
:
2
,
"decoded_token"
:
" towards"
},
"2396"
:
{
"logprob"
:
-4.303499698638916
,
"rank"
:
3
,
"decoded_token"
:
" between"
},
"2203"
:
{
"logprob"
:
-4.678499698638916
,
"rank"
:
4
,
"decoded_token"
:
" into"
},
"1317"
:
{
"logprob"
:
-5.678499698638916
,
"rank"
:
5
,
"decoded_token"
:
" to"
}},
{
"1261"
:
{
"logprob"
:
-0.017386287450790405
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"11223"
:
{
"logprob"
:
-4.892386436462402
,
"rank"
:
2
,
"decoded_token"
:
" green"
},
"1295"
:
{
"logprob"
:
-5.017386436462402
,
"rank"
:
3
,
"decoded_token"
:
" l"
},
"23170"
:
{
"logprob"
:
-6.642386436462402
,
"rank"
:
4
,
"decoded_token"
:
" grass"
},
"1420"
:
{
"logprob"
:
-7.267386436462402
,
"rank"
:
5
,
"decoded_token"
:
" an"
}},
{
"1295"
:
{
"logprob"
:
-0.9453322887420654
,
"rank"
:
1
,
"decoded_token"
:
" l"
},
"11223"
:
{
"logprob"
:
-1.3203322887420654
,
"rank"
:
2
,
"decoded_token"
:
" green"
},
"23170"
:
{
"logprob"
:
-1.9453322887420654
,
"rank"
:
3
,
"decoded_token"
:
" grass"
},
"12097"
:
{
"logprob"
:
-2.4453322887420654
,
"rank"
:
4
,
"decoded_token"
:
" park"
},
"26428"
:
{
"logprob"
:
-3.3203322887420654
,
"rank"
:
5
,
"decoded_token"
:
" garden"
}},
{
"3506"
:
{
"logprob"
:
-6.556489552167477e-06
,
"rank"
:
1
,
"decoded_token"
:
"ush"
},
"1374"
:
{
"logprob"
:
-12.000006675720215
,
"rank"
:
2
,
"decoded_token"
:
"us"
},
"90716"
:
{
"logprob"
:
-15.625006675720215
,
"rank"
:
3
,
"decoded_token"
:
"USH"
},
"16938"
:
{
"logprob"
:
-15.875006675720215
,
"rank"
:
4
,
"decoded_token"
:
"usher"
},
"13326"
:
{
"logprob"
:
-17.1875057220459
,
"rank"
:
5
,
"decoded_token"
:
"inden"
}},
{
"11223"
:
{
"logprob"
:
-0.3668670654296875
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"1044"
:
{
"logprob"
:
-1.3668670654296875
,
"rank"
:
2
,
"decoded_token"
:
","
},
"26428"
:
{
"logprob"
:
-3.4918670654296875
,
"rank"
:
3
,
"decoded_token"
:
" garden"
},
"12097"
:
{
"logprob"
:
-4.1168670654296875
,
"rank"
:
4
,
"decoded_token"
:
" park"
},
"23170"
:
{
"logprob"
:
-5.8668670654296875
,
"rank"
:
5
,
"decoded_token"
:
" grass"
}},
{
"12097"
:
{
"logprob"
:
-0.5530153512954712
,
"rank"
:
1
,
"decoded_token"
:
" park"
},
"3727"
:
{
"logprob"
:
-2.0530152320861816
,
"rank"
:
2
,
"decoded_token"
:
" field"
},
"28035"
:
{
"logprob"
:
-2.1780152320861816
,
"rank"
:
3
,
"decoded_token"
:
" landscape"
},
"26428"
:
{
"logprob"
:
-2.3030152320861816
,
"rank"
:
4
,
"decoded_token"
:
" garden"
},
"4457"
:
{
"logprob"
:
-2.8030152320861816
,
"rank"
:
5
,
"decoded_token"
:
" area"
}},
{
"1046"
:
{
"logprob"
:
-0.7924000024795532
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-1.2924000024795532
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"8994"
:
{
"logprob"
:
-2.7923998832702637
,
"rank"
:
3
,
"decoded_token"
:
" towards"
},
"54410"
:
{
"logprob"
:
-3.5423998832702637
,
"rank"
:
4
,
"decoded_token"
:
" lined"
},
"2425"
:
{
"logprob"
:
-3.5423998832702637
,
"rank"
:
5
,
"decoded_token"
:
" under"
}},
{
"2"
:
{
"logprob"
:
-1.9073468138230965e-06
,
"rank"
:
1
,
"decoded_token"
:
"</s>"
},
"1032"
:
{
"logprob"
:
-13.250001907348633
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1256"
:
{
"logprob"
:
-16.250001907348633
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-19.000001907348633
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1319"
:
{
"logprob"
:
-20.000001907348633
,
"rank"
:
5
,
"decoded_token"
:
" ("
}}]]]
\ No newline at end of file
tests/models/multimodal/processing/test_common.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
import
copy
from
functools
import
partial
from
typing
import
Optional
,
Union
import
numpy
as
np
import
pytest
from
mistral_common.protocol.instruct.messages
import
(
ImageChunk
,
TextChunk
,
UserMessage
)
from
mistral_common.protocol.instruct.request
import
ChatCompletionRequest
from
PIL
import
Image
from
transformers
import
PreTrainedTokenizer
,
PreTrainedTokenizerFast
from
vllm.config
import
ModelConfig
from
vllm.inputs
import
InputProcessingContext
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.processing
import
ProcessingCache
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalDataDict
from
vllm.multimodal.inputs
import
MultiModalInputs
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
,
ProcessingCache
from
vllm.transformers_utils.tokenizer
import
(
MistralTokenizer
,
cached_tokenizer_from_config
)
from
....multimodal.utils
import
random_audio
,
random_image
,
random_video
from
...registry
import
HF_EXAMPLE_MODELS
...
...
@@ -21,6 +29,7 @@ def _test_processing_correctness(
hit_rate
:
float
,
num_batches
:
int
,
simplify_rate
:
float
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model_id
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
...
...
@@ -29,8 +38,8 @@ def _test_processing_correctness(
model_config
=
ModelConfig
(
model_id
,
task
=
"auto"
,
tokenizer
=
model_id
,
tokenizer_mode
=
"auto"
,
tokenizer
=
model_info
.
tokenizer
or
model_id
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
trust_remote_code
=
model_info
.
trust_remote_code
,
seed
=
0
,
dtype
=
"float16"
,
...
...
@@ -45,7 +54,7 @@ def _test_processing_correctness(
tokenizer
=
cached_tokenizer_from_config
(
model_config
),
)
# Ensure that it can fit all of the data
cache
=
ProcessingCache
(
capacity
=
1
<<
30
)
cache
=
ProcessingCache
(
capacity
_gb
=
2048
)
processing_info
=
factories
.
info
(
ctx
)
supported_mm_limits
=
processing_info
.
get_supported_mm_limits
()
...
...
@@ -82,14 +91,6 @@ def _test_processing_correctness(
partial
(
random_audio
,
rng
,
min_len
=
512
,
max_len
=
1024
,
sr
=
16000
),
}
tokenizer_encode_kwargs
=
{}
if
model_config
.
hf_config
.
model_type
==
"mllama"
:
# For Mllama, tokenizer will always add bos_token at the beginning of
# prompt by default, causing hf_processor outputs incorrect token ids.
# So we need use `add_special_tokens=False` here to leave bos_token
# to be added by the processor.
tokenizer_encode_kwargs
=
{
"add_special_tokens"
:
False
}
for
batch_idx
in
range
(
num_batches
):
mm_data
=
{
k
:
...
...
@@ -112,37 +113,131 @@ def _test_processing_correctness(
elif
len
(
mm_data
[
k
])
==
1
:
mm_data
[
k
]
=
mm_data
[
k
][
0
]
baseline_result
=
baseline_processor
.
apply
(
prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_result
=
cached_processor
.
apply
(
prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
if
isinstance
(
tokenizer
,
MistralTokenizer
):
_test_processing_correctness_mistral
(
model_config
,
tokenizer
,
prompt
,
mm_data
,
baseline_processor
,
cached_processor
,
batch_idx
,
ignore_mm_keys
=
ignore_mm_keys
,
)
else
:
_test_processing_correctness_hf
(
model_config
,
tokenizer
,
prompt
,
mm_data
,
baseline_processor
,
cached_processor
,
batch_idx
,
ignore_mm_keys
=
ignore_mm_keys
,
)
def
_test_processing_correctness_hf
(
model_config
:
ModelConfig
,
tokenizer
:
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
],
prompt
:
str
,
mm_data
:
MultiModalDataDict
,
baseline_processor
:
BaseMultiModalProcessor
,
cached_processor
:
BaseMultiModalProcessor
,
batch_idx
:
int
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
if
model_config
.
hf_config
.
model_type
in
(
"mllama"
,
"whisper"
,
"ultravox"
):
# For some multimodal models, tokenizer will always add bos_token
# at the beginning of prompt by default, causing hf_processor outputs
# incorrect token ids. So we need use `add_special_tokens=False` here
# to leave bos_token to be added by the processor.
token_prompt
=
tokenizer
.
encode
(
prompt
,
add_special_tokens
=
False
)
else
:
token_prompt
=
tokenizer
.
encode
(
prompt
)
baseline_result
=
baseline_processor
.
apply
(
prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_result
=
cached_processor
.
apply
(
prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
assert
_inputs_equal
(
baseline_result
,
cached_result
,
ignore_mm_keys
,
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
assert
baseline_result
==
cached_result
,
(
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
)
baseline_tokenized_result
=
baseline_processor
.
apply
(
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
baseline_tokenized_result
=
baseline_processor
.
apply
(
tokenizer
.
encode
(
prompt
,
**
tokenizer_encode_kwargs
)
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{}
,
)
assert
_inputs_equal
(
baseline_result
,
baseline_tokenized_result
,
ignore_mm_keys
,
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
assert
baseline_result
==
baseline_tokenized_result
,
(
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
)
cached_tokenized_result
=
cached_processor
.
apply
(
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_tokenized_result
=
cached_processor
.
apply
(
tokenizer
.
encode
(
prompt
,
**
tokenizer_encode_kwargs
)
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{}
,
)
assert
_inputs_equal
(
cached_result
,
cached_tokenized_result
,
ignore_mm_keys
,
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
assert
cached_result
==
cached_tokenized_result
,
(
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
)
def
_test_processing_correctness_mistral
(
model_config
:
ModelConfig
,
tokenizer
:
MistralTokenizer
,
prompt
:
str
,
mm_data
:
MultiModalDataDict
,
baseline_processor
:
BaseMultiModalProcessor
,
cached_processor
:
BaseMultiModalProcessor
,
batch_idx
:
int
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
images
=
mm_data
.
get
(
"image"
,
[])
if
not
isinstance
(
images
,
list
):
images
=
[
images
]
request
=
ChatCompletionRequest
(
messages
=
[
UserMessage
(
content
=
[
TextChunk
(
text
=
prompt
),
*
(
ImageChunk
(
image
=
image
)
for
image
in
images
),
]),
])
res
=
tokenizer
.
mistral
.
encode_chat_completion
(
request
)
token_prompt
=
res
.
tokens
# Mistral chat outputs tokens directly, rather than text prompts
baseline_tokenized_result
=
baseline_processor
.
apply
(
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_tokenized_result
=
cached_processor
.
apply
(
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
assert
_inputs_equal
(
baseline_tokenized_result
,
cached_tokenized_result
,
ignore_mm_keys
,
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
# yapf: disable
...
...
@@ -151,7 +246,9 @@ def _test_processing_correctness(
"Salesforce/blip2-opt-2.7b"
,
"facebook/chameleon-7b"
,
"deepseek-ai/deepseek-vl2-tiny"
,
"microsoft/Florence-2-base"
,
"adept/fuyu-8b"
,
"google/gemma-3-4b-it"
,
"THUDM/glm-4v-9b"
,
"h2oai/h2ovl-mississippi-800m"
,
"OpenGVLab/InternVL2-1B"
,
...
...
@@ -162,6 +259,7 @@ def _test_processing_correctness(
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
"meta-llama/Llama-3.2-11B-Vision-Instruct"
,
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
"mistralai/Pixtral-12B-2409"
,
"mistral-community/pixtral-12b"
,
"openbmb/MiniCPM-o-2_6"
,
"openbmb/MiniCPM-V-2_6"
,
...
...
@@ -173,6 +271,9 @@ def _test_processing_correctness(
"Qwen/Qwen2.5-VL-3B-Instruct"
,
"Qwen/Qwen2-Audio-7B-Instruct"
,
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
"openai/whisper-large-v3"
,
"google/paligemma-3b-mix-224"
,
"google/paligemma2-3b-ft-docci-448"
,
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
...
...
@@ -184,16 +285,24 @@ def test_processing_correctness(
num_batches
:
int
,
simplify_rate
:
float
,
):
ignore_mm_keys
=
None
if
'ultravox'
in
model_id
:
# In Ultravox, the audio_features can be different depending on padding
# The slight difference should not be a problem though, since
# attention_mask lets us ignore the difference.
ignore_mm_keys
=
[
'audio_features'
]
_test_processing_correctness
(
model_id
,
hit_rate
=
hit_rate
,
num_batches
=
num_batches
,
simplify_rate
=
simplify_rate
,
ignore_mm_keys
=
ignore_mm_keys
,
)
# yapf: disable
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"microsoft/Phi-3-vision-
128k-
instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"microsoft/Phi-3
.5
-vision-instruct"
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"simplify_rate"
,
[
1.0
])
...
...
@@ -217,3 +326,40 @@ def test_processing_correctness_phi3v(
num_batches
=
num_batches
,
simplify_rate
=
simplify_rate
,
)
def
_inputs_equal
(
a
:
MultiModalInputs
,
b
:
MultiModalInputs
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
return
_drop_mm_kwargs_keys
(
a
,
ignore_mm_keys
)
==
_drop_mm_kwargs_keys
(
b
,
ignore_mm_keys
)
def
_drop_mm_kwargs_keys
(
result
:
MultiModalInputs
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
)
->
MultiModalInputs
:
"""Drop specified keys from result['mm_kwargs'].
This is mainly to avoid doing exact match of audio_features in ultravox.
Args:
result: Result to drop keys from
ignore_mm_keys: List of keys to ignore, e.g. ['audio_features']
"""
if
not
ignore_mm_keys
:
return
result
if
'mm_kwargs'
in
result
:
result
=
copy
.
deepcopy
(
result
)
mm_kwargs
=
result
[
'mm_kwargs'
]
for
key
in
ignore_mm_keys
:
mm_kwargs
.
pop
(
key
,
None
)
for
items
in
mm_kwargs
.
_items_by_modality
.
values
():
for
item
in
items
:
for
key
in
ignore_mm_keys
:
item
.
pop
(
key
,
None
)
return
result
tests/models/multimodal/processing/test_h2ovl.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
"""Tests for H2OVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
pytest
from
PIL
import
Image
...
...
@@ -95,14 +96,14 @@ def _run_check(
tokenizer
=
processor
.
info
.
get_tokenizer
()
config
=
processor
.
info
.
get_hf_config
()
prompt
=
"<image>"
*
len
(
images
)
mm_data
=
{
"image"
:
images
}
total_expected_num_patches
=
sum
(
_get_expected_num_patches
(
config
,
image
,
len
(
images
),
min_num
,
max_num
)
for
image
in
images
)
processed_inputs
=
processor
.
apply
(
"<image>"
*
len
(
images
),
mm_data
,
mm_processor_kwargs
)
processed_inputs
=
processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
)
# Ensure we have the right number of placeholders per num_crops size
image_token_id
=
tokenizer
.
convert_tokens_to_ids
(
"<IMG_CONTEXT>"
)
...
...
@@ -151,9 +152,7 @@ def test_processor_override(
}
ctx
=
build_model_context
(
model_name
=
model_id
,
tokenizer_name
=
model_id
,
trust_remote_code
=
True
,
model_id
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
...
...
tests/models/multimodal/processing/test_idefics3.py
View file @
469e903b
...
...
@@ -11,10 +11,8 @@ from ....conftest import _ImageAssets
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceM4/Idefics3-8B-Llama3"
)]
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceM4/Idefics3-8B-Llama3"
)])
# yapf: disable
@
pytest
.
mark
.
parametrize
(
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
),
...
...
@@ -27,7 +25,7 @@ models = [os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3")]
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
def
test_processor_override
(
image_assets
:
_ImageAssets
,
model
:
str
,
model
_id
:
str
,
mm_processor_kwargs
:
dict
[
str
,
object
],
expected_toks_per_img
:
int
,
num_imgs
:
int
,
...
...
@@ -38,9 +36,7 @@ def test_processor_override(
# in this test and assume that the kwargs will be correctly expanded by
# the partial when calling the custom input processor.
ctx
=
build_model_context
(
model_name
=
model
,
tokenizer_name
=
model
,
trust_remote_code
=
True
,
model_id
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
...
...
tests/models/multimodal/processing/test_internvl.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
"""Tests for InternVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
os
import
pytest
...
...
@@ -57,14 +58,14 @@ def _run_check(
tokenizer
=
processor
.
info
.
get_tokenizer
()
config
=
processor
.
info
.
get_hf_config
()
prompt
=
"<image>"
*
len
(
images
)
mm_data
=
{
"image"
:
images
}
total_expected_num_patches
=
sum
(
_get_expected_num_patches
(
config
,
image
,
len
(
images
),
min_num
,
max_num
)
for
image
in
images
)
processed_inputs
=
processor
.
apply
(
"<image>"
*
len
(
images
),
mm_data
,
mm_processor_kwargs
)
processed_inputs
=
processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
)
# Ensure we have the right number of placeholders per num_crops size
image_token_id
=
tokenizer
.
convert_tokens_to_ids
(
"<IMG_CONTEXT>"
)
...
...
@@ -110,9 +111,7 @@ def test_processor_override(
}
ctx
=
build_model_context
(
model_name
=
model_id
,
tokenizer_name
=
model_id
,
trust_remote_code
=
True
,
model_id
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
...
...
Prev
1
…
20
21
22
23
24
25
26
27
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment