Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
469e903b
Commit
469e903b
authored
Mar 28, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.8.2' into v0.8.2-dev
parents
389ebcf7
25f560a6
Changes
535
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
624 additions
and
359 deletions
+624
-359
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
...els/decoder_only/vision_language/vlm_utils/model_utils.py
+81
-54
tests/models/decoder_only/vision_language/vlm_utils/runners.py
.../models/decoder_only/vision_language/vlm_utils/runners.py
+10
-11
tests/models/decoder_only/vision_language/vlm_utils/types.py
tests/models/decoder_only/vision_language/vlm_utils/types.py
+20
-27
tests/models/embedding/language/test_cls_models.py
tests/models/embedding/language/test_cls_models.py
+15
-9
tests/models/embedding/language/test_embedding.py
tests/models/embedding/language/test_embedding.py
+13
-11
tests/models/embedding/language/test_gritlm.py
tests/models/embedding/language/test_gritlm.py
+56
-47
tests/models/embedding/utils.py
tests/models/embedding/utils.py
+3
-3
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
+34
-30
tests/models/embedding/vision_language/test_llava_next.py
tests/models/embedding/vision_language/test_llava_next.py
+23
-9
tests/models/embedding/vision_language/test_phi3v.py
tests/models/embedding/vision_language/test_phi3v.py
+4
-7
tests/models/encoder_decoder/audio_language/test_whisper.py
tests/models/encoder_decoder/audio_language/test_whisper.py
+2
-2
tests/models/encoder_decoder/language/test_bart.py
tests/models/encoder_decoder/language/test_bart.py
+5
-5
tests/models/encoder_decoder/vision_language/test_florence2.py
.../models/encoder_decoder/vision_language/test_florence2.py
+90
-53
tests/models/encoder_decoder/vision_language/test_mllama.py
tests/models/encoder_decoder/vision_language/test_mllama.py
+67
-30
tests/models/fixtures/mistral_small_3_chat.json
tests/models/fixtures/mistral_small_3_chat.json
+1
-0
tests/models/fixtures/pixtral_chat_engine.json
tests/models/fixtures/pixtral_chat_engine.json
+0
-1
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+187
-41
tests/models/multimodal/processing/test_h2ovl.py
tests/models/multimodal/processing/test_h2ovl.py
+5
-6
tests/models/multimodal/processing/test_idefics3.py
tests/models/multimodal/processing/test_idefics3.py
+3
-7
tests/models/multimodal/processing/test_internvl.py
tests/models/multimodal/processing/test_internvl.py
+5
-6
No files found.
Too many changes to show.
To preserve performance only
535 of 535+
files are displayed.
Plain diff
Email patch
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
View file @
469e903b
...
@@ -6,16 +6,15 @@ typically specific to a small subset of models.
...
@@ -6,16 +6,15 @@ typically specific to a small subset of models.
import
re
import
re
import
types
import
types
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
Callable
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Optional
,
Union
import
torch
import
torch
from
PIL.Image
import
Image
from
PIL.Image
import
Image
from
transformers
import
(
AutoConfig
,
AutoTokenizer
,
Batch
Encoding
,
from
transformers
import
(
AutoConfig
,
AutoTokenizer
,
Batch
Feature
,
GenerationConfig
)
GenerationConfig
)
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.transformers_utils.tokenizer
import
patch_padding_side
from
vllm.transformers_utils.tokenizer
import
patch_padding_side
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
.....conftest
import
HfRunner
,
ImageAsset
,
_ImageAssets
from
.....conftest
import
HfRunner
,
ImageAsset
,
_ImageAssets
from
.types
import
RunnerOutput
from
.types
import
RunnerOutput
...
@@ -49,7 +48,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
...
@@ -49,7 +48,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
def
qwen_vllm_to_hf_output
(
def
qwen_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
output_ids
,
output_str
,
out_logprobs
=
vllm_output
...
@@ -60,7 +59,7 @@ def qwen_vllm_to_hf_output(
...
@@ -60,7 +59,7 @@ def qwen_vllm_to_hf_output(
def
qwen2_vllm_to_hf_output
(
def
qwen2_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
output_ids
,
output_str
,
out_logprobs
=
vllm_output
...
@@ -78,7 +77,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
...
@@ -78,7 +77,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
def
llava_video_vllm_to_hf_output
(
def
llava_video_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
config
=
AutoConfig
.
from_pretrained
(
model
)
config
=
AutoConfig
.
from_pretrained
(
model
)
mm_token_id
=
config
.
video_token_index
mm_token_id
=
config
.
video_token_index
return
_llava_vllm_to_hf_output
(
vllm_output
,
model
,
mm_token_id
)
return
_llava_vllm_to_hf_output
(
vllm_output
,
model
,
mm_token_id
)
...
@@ -211,43 +210,9 @@ def get_llava_embeddings(image_assets: _ImageAssets):
...
@@ -211,43 +210,9 @@ def get_llava_embeddings(image_assets: _ImageAssets):
return
[
asset
.
image_embeds
for
asset
in
image_assets
]
return
[
asset
.
image_embeds
for
asset
in
image_assets
]
####### postprocessors to run on HF BatchEncoding
def
cast_dtype_post_processor
(
hf_inp_key
:
str
)
->
Callable
[[
BatchEncoding
,
str
],
BatchEncoding
]:
"""Gets a handle to a post processor which converts a given key into a
target data type."""
def
process
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
torch_dtype
=
STR_DTYPE_TO_TORCH_DTYPE
[
dtype
]
hf_inputs
[
hf_inp_key
]
=
hf_inputs
[
hf_inp_key
].
to
(
torch_dtype
)
return
hf_inputs
return
process
def
ignore_inputs_post_processor
(
hf_inp_key
:
str
)
->
Callable
[[
BatchEncoding
,
str
],
BatchEncoding
]:
"""Gets a handle to a post processor which ignores a given key."""
def
process
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
del
hf_inputs
[
hf_inp_key
]
return
hf_inputs
return
process
def
wrap_inputs_post_processor
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
return
{
"model_inputs"
:
hf_inputs
}
def
molmo_post_processor
(
hf_inputs
:
BatchEncoding
,
dtype
:
str
):
hf_inputs
=
cast_dtype_post_processor
(
"images"
)(
hf_inputs
,
dtype
)
return
{
k
:
v
.
unsqueeze
(
0
)
for
k
,
v
in
hf_inputs
.
items
()}
####### Prompt path encoders for models that need models on disk
####### Prompt path encoders for models that need models on disk
def
qwen_prompt_path_encoder
(
def
qwen_prompt_path_encoder
(
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
L
ist
[
ImageAsset
],
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
])
->
str
:
_ImageAssets
])
->
str
:
"""Given a temporary dir path, export one or more image assets into the
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
tempdir & replace its contents with the local path to the string so that
...
@@ -257,7 +222,7 @@ def qwen_prompt_path_encoder(
...
@@ -257,7 +222,7 @@ def qwen_prompt_path_encoder(
Args:
Args:
tmp_path: Tempdir for test under consideration.
tmp_path: Tempdir for test under consideration.
prompt: Prompt with image placeholders.
prompt: Prompt with image placeholders.
assets:
L
ist of image assets whose len equals the num placeholders.
assets:
l
ist of image assets whose len equals the num placeholders.
"""
"""
# Ensure that the number of placeholders matches the number of assets;
# Ensure that the number of placeholders matches the number of assets;
# If this is not true, the test is probably written incorrectly.
# If this is not true, the test is probably written incorrectly.
...
@@ -295,8 +260,7 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -295,8 +260,7 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
for
k
in
inputs
.
keys
()
# noqa
for
k
in
inputs
.
keys
()
# noqa
if
k
not
in
(
"seq_lens"
,
"sft_format"
)
if
k
not
in
(
"seq_lens"
,
"sft_format"
)
}
}
inputs
=
BatchEncoding
(
data
=
inputs
,
tensor_type
=
"pt"
)
return
BatchFeature
(
data
=
inputs
,
tensor_type
=
"pt"
)
return
inputs
hf_model
.
processor
=
processor
hf_model
.
processor
=
processor
hf_model
.
model
.
get_output_embeddings
=
lambda
:
\
hf_model
.
model
.
get_output_embeddings
=
lambda
:
\
...
@@ -304,8 +268,20 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -304,8 +268,20 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return
hf_model
return
hf_model
def
glm_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
def
gemma3_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
"""Patches and returns an instance of the HfRunner to use for GLM4."""
"""Patches and returns an instance of the HfRunner to use for Gemma 3."""
hf_processor
=
hf_model
.
processor
def
processor
(
*
args
,
**
kwargs
):
return
hf_processor
(
*
args
,
do_pan_and_scan
=
True
,
**
kwargs
)
hf_model
.
processor
=
processor
return
hf_model
def
glm4v_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
hf_processor
=
hf_model
.
processor
hf_processor
=
hf_model
.
processor
patch_padding_side
(
hf_processor
)
patch_padding_side
(
hf_processor
)
...
@@ -313,12 +289,20 @@ def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -313,12 +289,20 @@ def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
if
images
is
None
:
if
images
is
None
:
return
hf_processor
(
*
args
,
**
kwargs
)
return
hf_processor
(
*
args
,
**
kwargs
)
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
contents
=
re
.
findall
(
r
"<\|begin_of_image\|><\|endoftext\|><\|end_of_image\|>(.*?)<\|assistant\|>"
,
text
,
)
assert
len
(
contents
)
==
len
(
images
)
return
hf_processor
.
apply_chat_template
(
return
hf_processor
.
apply_chat_template
(
[{
[{
"role"
:
"user"
,
"role"
:
"user"
,
"image"
:
image
s
,
"image"
:
image
,
"content"
:
te
x
t
"content"
:
con
te
n
t
}],
}
for
image
,
content
in
zip
(
images
,
contents
)
],
add_generation_prompt
=
True
,
add_generation_prompt
=
True
,
tokenize
=
True
,
tokenize
=
True
,
return_dict
=
True
,
return_dict
=
True
,
...
@@ -350,7 +334,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -350,7 +334,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
self
.
image_size
=
self
.
vision_config
.
image_size
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
L
ist
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
l
ist
[
Image
]],
**
kwargs
):
**
kwargs
):
# yapf: disable
# yapf: disable
from
vllm.model_executor.models.h2ovl
import
(
from
vllm.model_executor.models.h2ovl
import
(
...
@@ -410,7 +394,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -410,7 +394,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
self
.
image_size
=
self
.
vision_config
.
image_size
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
L
ist
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
l
ist
[
Image
]],
**
kwargs
):
**
kwargs
):
from
vllm.model_executor.models.internvl
import
(
from
vllm.model_executor.models.internvl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
...
@@ -509,10 +493,52 @@ def mantis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -509,10 +493,52 @@ def mantis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return
hf_model
return
hf_model
def
minicpm
o
_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
def
minicpm
v_25
_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
orig_generate
=
hf_model
.
model
.
generate
orig_generate
=
hf_model
.
model
.
generate
def
_generate
(
self
,
*
args
,
**
kwargs
):
def
_generate
(
self
,
*
args
,
input_ids
=
None
,
pixel_values
=
None
,
image_sizes
=
None
,
image_bound
=
None
,
tgt_sizes
=
None
,
**
kwargs
,
):
model_inputs
=
{
"input_ids"
:
input_ids
,
"pixel_values"
:
pixel_values
,
"image_sizes"
:
image_sizes
,
"image_bound"
:
image_bound
,
"tgt_sizes"
:
tgt_sizes
,
}
for
k
in
list
(
model_inputs
.
keys
()):
if
model_inputs
[
k
]
is
None
:
model_inputs
.
pop
(
k
)
return
orig_generate
(
model_inputs
,
*
args
,
decode_text
=
False
,
**
kwargs
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
return
hf_model
def
minicpmo_26_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
orig_generate
=
hf_model
.
model
.
generate
def
_generate
(
self
,
*
args
,
image_sizes
=
None
,
**
kwargs
):
return
orig_generate
(
*
args
,
decode_text
=
False
,
**
kwargs
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
return
hf_model
def
minicpmv_26_patch_hf_runner
(
hf_model
:
HfRunner
)
->
HfRunner
:
orig_generate
=
hf_model
.
model
.
generate
def
_generate
(
self
,
*
args
,
image_sizes
=
None
,
**
kwargs
):
return
orig_generate
(
*
args
,
decode_text
=
False
,
**
kwargs
)
return
orig_generate
(
*
args
,
decode_text
=
False
,
**
kwargs
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
hf_model
.
model
.
generate
=
types
.
MethodType
(
_generate
,
hf_model
.
model
)
...
@@ -531,10 +557,11 @@ def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -531,10 +557,11 @@ def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
_generate
(
self
,
max_new_tokens
=
None
,
do_sample
=
None
,
**
kwargs
):
def
_generate
(
self
,
max_new_tokens
=
None
,
do_sample
=
None
,
**
kwargs
):
batch
=
{
batch
=
{
k
:
kwargs
.
pop
(
k
)
k
:
kwargs
.
pop
(
k
)
.
unsqueeze
(
0
)
for
k
in
(
"input_ids"
,
"images"
,
"image_input_idx"
,
"image_masks"
)
for
k
in
(
"input_ids"
,
"images"
,
"image_input_idx"
,
"image_masks"
)
if
k
in
kwargs
if
k
in
kwargs
}
}
batch
=
BatchFeature
(
batch
).
to
(
dtype
=
self
.
dtype
)
return
self
.
generate_from_batch
(
return
self
.
generate_from_batch
(
batch
,
batch
,
...
...
tests/models/decoder_only/vision_language/vlm_utils/runners.py
View file @
469e903b
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
types / modalities.
types / modalities.
"""
"""
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
Type
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.
import
builders
,
core
from
.
import
builders
,
core
...
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
...
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
####### Entrypoints for running different test types
####### Entrypoints for running different test types
def
run_single_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
def
run_single_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
...
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
...
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_multi_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
def
run_multi_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
...
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
...
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_embedding_test
(
*
,
model_test_info
:
VLMTestInfo
,
def
run_embedding_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
...
@@ -85,8 +84,8 @@ def run_video_test(
...
@@ -85,8 +84,8 @@ def run_video_test(
*
,
*
,
model_test_info
:
VLMTestInfo
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
video_assets
:
_VideoAssets
,
video_assets
:
_VideoAssets
,
):
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
...
@@ -111,8 +110,8 @@ def run_video_test(
...
@@ -111,8 +110,8 @@ def run_video_test(
def
run_custom_inputs_test
(
*
,
model_test_info
:
VLMTestInfo
,
def
run_custom_inputs_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
]):
vllm_runner
:
t
ype
[
VllmRunner
]):
# Custom test cases can provide inputs directly, but they need to
# Custom test cases can provide inputs directly, but they need to
# explicitly provided a CustomTestConfig, which wraps the inputs and
# explicitly provided a CustomTestConfig, which wraps the inputs and
# the limit_mm_per_prompt
# the limit_mm_per_prompt
...
...
tests/models/decoder_only/vision_language/vlm_utils/types.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Types for writing multimodal model tests."""
"""Types for writing multimodal model tests."""
from
collections.abc
import
Iterable
from
enum
import
Enum
from
enum
import
Enum
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
(
Any
,
Callable
,
Dict
,
Iterable
,
List
,
NamedTuple
,
Optional
,
from
typing
import
Any
,
Callable
,
NamedTuple
,
Optional
,
Union
Tuple
,
Type
,
Union
)
import
torch
import
torch
from
PIL.Image
import
Image
from
PIL.Image
import
Image
from
pytest
import
MarkDecorator
from
pytest
import
MarkDecorator
from
transformers
import
AutoModelForCausalLM
,
BatchEncoding
from
transformers
import
AutoModelForCausalLM
from
transformers.models.auto.auto_factory
import
_BaseAutoModelClass
from
transformers.models.auto.auto_factory
import
_BaseAutoModelClass
from
vllm.config
import
TaskOption
from
vllm.config
import
TaskOption
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.utils
import
identity
from
.....conftest
import
IMAGE_ASSETS
,
HfRunner
,
ImageAsset
,
_ImageAssets
from
.....conftest
import
IMAGE_ASSETS
,
HfRunner
,
ImageAsset
,
_ImageAssets
from
....utils
import
check_logprobs_close
from
....utils
import
check_logprobs_close
...
@@ -35,7 +34,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
...
@@ -35,7 +34,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.25
,
0.5
,
1.0
)]
IMAGE_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.25
,
0.5
,
1.0
)]
EMBEDDING_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
)]
EMBEDDING_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
)]
RunnerOutput
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
RunnerOutput
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
# yapf: enable
# yapf: enable
...
@@ -53,8 +52,8 @@ class SizeType(Enum):
...
@@ -53,8 +52,8 @@ class SizeType(Enum):
class
CustomTestOptions
(
NamedTuple
):
class
CustomTestOptions
(
NamedTuple
):
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
L
ist
[
Union
[
L
ist
[
Image
],
Image
]]]]
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
l
ist
[
Union
[
l
ist
[
Image
],
Image
]]]]
limit_mm_per_prompt
:
D
ict
[
str
,
int
]
limit_mm_per_prompt
:
d
ict
[
str
,
int
]
# kwarg to pass multimodal data in as to vllm/hf runner instances.
# kwarg to pass multimodal data in as to vllm/hf runner instances.
runner_mm_key
:
str
=
"images"
runner_mm_key
:
str
=
"images"
...
@@ -63,13 +62,13 @@ class ImageSizeWrapper(NamedTuple):
...
@@ -63,13 +62,13 @@ class ImageSizeWrapper(NamedTuple):
type
:
SizeType
type
:
SizeType
# A size factor is a wrapper of 0+ floats,
# A size factor is a wrapper of 0+ floats,
# while a fixed size contains an iterable of integer pairs
# while a fixed size contains an iterable of integer pairs
data
:
Union
[
Iterable
[
float
],
Iterable
[
T
uple
[
int
,
int
]]]
data
:
Union
[
Iterable
[
float
],
Iterable
[
t
uple
[
int
,
int
]]]
class
VLMTestInfo
(
NamedTuple
):
class
VLMTestInfo
(
NamedTuple
):
"""Holds the configuration for 1+ tests for one model architecture."""
"""Holds the configuration for 1+ tests for one model architecture."""
models
:
L
ist
[
str
]
models
:
l
ist
[
str
]
test_type
:
Union
[
VLMTestType
,
Iterable
[
VLMTestType
]]
test_type
:
Union
[
VLMTestType
,
Iterable
[
VLMTestType
]]
# Should be None only if this is a CUSTOM_INPUTS test
# Should be None only if this is a CUSTOM_INPUTS test
...
@@ -97,24 +96,19 @@ class VLMTestInfo(NamedTuple):
...
@@ -97,24 +96,19 @@ class VLMTestInfo(NamedTuple):
max_num_seqs
:
int
=
256
max_num_seqs
:
int
=
256
task
:
TaskOption
=
"auto"
task
:
TaskOption
=
"auto"
tensor_parallel_size
:
int
=
1
tensor_parallel_size
:
int
=
1
vllm_runner_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
vllm_runner_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Optional callable which gets a list of token IDs from the model tokenizer
# Optional callable which gets a list of token IDs from the model tokenizer
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]]
=
None
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]]
=
None
# Optional list of strings to stop generation, useful when stop tokens are
# Optional list of strings to stop generation, useful when stop tokens are
# not special tokens in the tokenizer
# not special tokens in the tokenizer
stop_str
:
Optional
[
L
ist
[
str
]]
=
None
stop_str
:
Optional
[
l
ist
[
str
]]
=
None
# Exposed options for HF runner
# Exposed options for HF runner
hf_model_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
hf_model_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Indicates we should explicitly pass the EOS from the tokenizer
# Indicates we should explicitly pass the EOS from the tokenizer
use_tokenizer_eos
:
bool
=
False
use_tokenizer_eos
:
bool
=
False
auto_cls
:
Type
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
auto_cls
:
type
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
# Callable to pass to the HF runner to run on inputs; for now, we also pass
# the data type to input post processing, because almost all of the uses of
# postprocess_inputs are to fix the data types of BatchEncoding values.
postprocess_inputs
:
Callable
[[
BatchEncoding
,
str
],
BatchEncoding
]
=
identity
patch_hf_runner
:
Optional
[
Callable
[[
HfRunner
],
HfRunner
]]
=
None
patch_hf_runner
:
Optional
[
Callable
[[
HfRunner
],
HfRunner
]]
=
None
# Post processors that if defined, will run oun the outputs of the
# Post processors that if defined, will run oun the outputs of the
...
@@ -128,12 +122,12 @@ class VLMTestInfo(NamedTuple):
...
@@ -128,12 +122,12 @@ class VLMTestInfo(NamedTuple):
# Default expandable params per test; these defaults can be overridden in
# Default expandable params per test; these defaults can be overridden in
# instances of this object; the complete set of test cases for the model
# instances of this object; the complete set of test cases for the model
# is all combinations of .models + all fields below
# is all combinations of .models + all fields below
max_tokens
:
Union
[
int
,
T
uple
[
int
]]
=
128
max_tokens
:
Union
[
int
,
t
uple
[
int
]]
=
128
num_logprobs
:
Union
[
int
,
T
uple
[
int
]]
=
5
num_logprobs
:
Union
[
int
,
t
uple
[
int
]]
=
5
dtype
:
Union
[
str
,
Iterable
[
str
]]
=
"
half
"
dtype
:
Union
[
str
,
Union
[
list
[
str
],
tuple
[
str
,
...]
]]
=
"
auto
"
distributed_executor_backend
:
Optional
[
Union
[
str
,
Iterable
[
str
]]]
=
None
distributed_executor_backend
:
Optional
[
Union
[
str
,
Iterable
[
str
]]]
=
None
# Only expanded in video tests
# Only expanded in video tests
num_video_frames
:
Union
[
int
,
T
uple
[
int
]]
=
16
num_video_frames
:
Union
[
int
,
t
uple
[
int
]]
=
16
# Fixed image sizes / image size factors; most tests use image_size_factors
# Fixed image sizes / image size factors; most tests use image_size_factors
# The values provided for these two fields will be stacked and expanded
# The values provided for these two fields will be stacked and expanded
...
@@ -141,19 +135,19 @@ class VLMTestInfo(NamedTuple):
...
@@ -141,19 +135,19 @@ class VLMTestInfo(NamedTuple):
# once per tests (much like concatenating and wrapping in one parametrize
# once per tests (much like concatenating and wrapping in one parametrize
# call)
# call)
image_size_factors
:
Iterable
[
Iterable
[
float
]]
=
IMAGE_SIZE_FACTORS
image_size_factors
:
Iterable
[
Iterable
[
float
]]
=
IMAGE_SIZE_FACTORS
image_sizes
:
Optional
[
Iterable
[
Iterable
[
T
uple
[
int
,
int
]]]]
=
None
image_sizes
:
Optional
[
Iterable
[
Iterable
[
t
uple
[
int
,
int
]]]]
=
None
# Hack for updating a prompt to take into a local path; currently only used
# Hack for updating a prompt to take into a local path; currently only used
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for HF runner
# for HF runner
prompt_path_encoder
:
Optional
[
prompt_path_encoder
:
Optional
[
Callable
[[
PosixPath
,
str
,
Union
[
L
ist
[
ImageAsset
],
_ImageAssets
]],
Callable
[[
PosixPath
,
str
,
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
]],
str
]]
=
None
# noqa: E501
str
]]
=
None
# noqa: E501
# Allows configuring a test to run with custom inputs
# Allows configuring a test to run with custom inputs
custom_test_opts
:
Optional
[
L
ist
[
CustomTestOptions
]]
=
None
custom_test_opts
:
Optional
[
l
ist
[
CustomTestOptions
]]
=
None
marks
:
Optional
[
L
ist
[
MarkDecorator
]]
=
None
marks
:
Optional
[
l
ist
[
MarkDecorator
]]
=
None
def
get_non_parametrized_runner_kwargs
(
self
):
def
get_non_parametrized_runner_kwargs
(
self
):
"""Returns a dictionary of expandable kwargs for items that are used
"""Returns a dictionary of expandable kwargs for items that are used
...
@@ -171,7 +165,6 @@ class VLMTestInfo(NamedTuple):
...
@@ -171,7 +165,6 @@ class VLMTestInfo(NamedTuple):
"vllm_output_post_proc"
:
self
.
vllm_output_post_proc
,
"vllm_output_post_proc"
:
self
.
vllm_output_post_proc
,
"auto_cls"
:
self
.
auto_cls
,
"auto_cls"
:
self
.
auto_cls
,
"use_tokenizer_eos"
:
self
.
use_tokenizer_eos
,
"use_tokenizer_eos"
:
self
.
use_tokenizer_eos
,
"postprocess_inputs"
:
self
.
postprocess_inputs
,
"comparator"
:
self
.
comparator
,
"comparator"
:
self
.
comparator
,
"get_stop_token_ids"
:
self
.
get_stop_token_ids
,
"get_stop_token_ids"
:
self
.
get_stop_token_ids
,
"hf_model_kwargs"
:
self
.
hf_model_kwargs
,
"hf_model_kwargs"
:
self
.
hf_model_kwargs
,
...
...
tests/models/embedding/language/test_cls_models.py
View file @
469e903b
...
@@ -9,6 +9,8 @@ import torch
...
@@ -9,6 +9,8 @@ import torch
from
transformers
import
AutoModelForSequenceClassification
from
transformers
import
AutoModelForSequenceClassification
from
....utils
import
models_path_prefix
from
....utils
import
models_path_prefix
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model"
,
"model"
,
...
@@ -17,24 +19,24 @@ from ....utils import models_path_prefix
...
@@ -17,24 +19,24 @@ from ....utils import models_path_prefix
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
],
],
)
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
]
if
current_platform
.
is_rocm
()
else
[
"float"
])
def
test_classification_models
(
def
test_classification_models
(
hf_runner
,
hf_runner
,
vllm_runner
,
vllm_runner
,
example_prompts
,
example_prompts
,
model
:
str
,
model
:
str
,
dtype
:
str
,
dtype
:
str
,
monkeypatch
,
)
->
None
:
)
->
None
:
if
current_platform
.
is_rocm
():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend
monkeypatch
.
setenv
(
"VLLM_USE_TRITON_FLASH_ATTN"
,
"False"
)
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
classify
(
example_prompts
)
vllm_outputs
=
vllm_model
.
classify
(
example_prompts
)
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
def
print_model
(
model
):
print
(
model
)
vllm_model
.
apply_model
(
print_model
)
with
hf_runner
(
model
,
with
hf_runner
(
model
,
dtype
=
dtype
,
dtype
=
dtype
,
auto_cls
=
AutoModelForSequenceClassification
)
as
hf_model
:
auto_cls
=
AutoModelForSequenceClassification
)
as
hf_model
:
...
@@ -45,4 +47,8 @@ def test_classification_models(
...
@@ -45,4 +47,8 @@ def test_classification_models(
hf_output
=
torch
.
tensor
(
hf_output
)
hf_output
=
torch
.
tensor
(
hf_output
)
vllm_output
=
torch
.
tensor
(
vllm_output
)
vllm_output
=
torch
.
tensor
(
vllm_output
)
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1e-3
)
# the tolerance value of 1e-2 is selected based on the
# half datatype tests in
# tests/models/embedding/language/test_embedding.py
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1e-3
if
dtype
==
"float"
else
1e-2
)
tests/models/embedding/language/test_embedding.py
View file @
469e903b
...
@@ -7,10 +7,11 @@ import os
...
@@ -7,10 +7,11 @@ import os
import
pytest
import
pytest
from
vllm.config
import
PoolerConfig
from
vllm.config
import
PoolerConfig
from
....utils
import
models_path_prefix
from
....utils
import
models_path_prefix
from
vllm.platforms
import
current_platform
from
..utils
import
check_embeddings_close
from
..utils
import
check_embeddings_close
from
vllm.platforms
import
current_platform
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
...
@@ -21,15 +22,15 @@ from vllm.platforms import current_platform
...
@@ -21,15 +22,15 @@ from vllm.platforms import current_platform
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/all-MiniLM-L12-v2"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/all-MiniLM-L12-v2"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/multilingual-e5-large"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/multilingual-e5-large"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
)),
# [Decoder-only]
# [Decoder-only]
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/bge-multilingual-gemma2"
),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/bge-multilingual-gemma2"
),
marks
=
[
pytest
.
mark
.
core_model
]),
marks
=
[
pytest
.
mark
.
core_model
]),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/e5-mistral-7b-instruct"
),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/e5-mistral-7b-instruct"
),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
)),
# [
Encoder-de
coder]
# [
Cross-En
coder]
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/stsb-roberta-base-v2"
)),
pytest
.
param
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/stsb-roberta-base-v2"
)),
],
],
)
)
...
@@ -44,13 +45,21 @@ def test_models(
...
@@ -44,13 +45,21 @@ def test_models(
example_prompts
,
example_prompts
,
model
,
model
,
dtype
:
str
,
dtype
:
str
,
monkeypatch
,
)
->
None
:
)
->
None
:
if
model
==
"BAAI/bge-multilingual-gemma2"
and
current_platform
.
is_rocm
():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend
monkeypatch
.
setenv
(
"VLLM_USE_TRITON_FLASH_ATTN"
,
"False"
)
vllm_extra_kwargs
=
{}
vllm_extra_kwargs
=
{}
if
model
==
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
):
if
model
==
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
):
vllm_extra_kwargs
[
"override_pooler_config"
]
=
\
vllm_extra_kwargs
[
"override_pooler_config"
]
=
\
PoolerConfig
(
pooling_type
=
"MEAN"
)
PoolerConfig
(
pooling_type
=
"MEAN"
)
if
model
==
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
):
if
model
==
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-Qwen2-7B-instruct"
):
vllm_extra_kwargs
[
"hf_overrides"
]
=
{
"is_causal"
:
Fals
e
}
vllm_extra_kwargs
[
"hf_overrides"
]
=
{
"is_causal"
:
Tru
e
}
# The example_prompts has ending "\n", for example:
# The example_prompts has ending "\n", for example:
# "Write a short story about a robot that dreams for the first time.\n"
# "Write a short story about a robot that dreams for the first time.\n"
...
@@ -71,13 +80,6 @@ def test_models(
...
@@ -71,13 +80,6 @@ def test_models(
**
vllm_extra_kwargs
)
as
vllm_model
:
**
vllm_extra_kwargs
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
encode
(
example_prompts
)
vllm_outputs
=
vllm_model
.
encode
(
example_prompts
)
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
def
print_model
(
model
):
print
(
model
)
vllm_model
.
apply_model
(
print_model
)
check_embeddings_close
(
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
embeddings_1_lst
=
vllm_outputs
,
...
...
tests/models/embedding/language/test_gritlm.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
__future__
import
annotations
import
importlib.util
import
importlib.util
import
math
import
math
from
array
import
array
from
array
import
array
from
typing
import
List
import
os
import
os
import
openai
import
openai
...
@@ -13,13 +13,14 @@ from scipy.spatial.distance import cosine
...
@@ -13,13 +13,14 @@ from scipy.spatial.distance import cosine
import
vllm
import
vllm
import
vllm.config
import
vllm.config
from
vllm.utils
import
STR_BACKEND_ENV_VAR
from
....utils
import
RemoteOpenAIServer
from
....utils
import
RemoteOpenAIServer
from
....utils
import
models_path_prefix
from
....utils
import
models_path_prefix
# GritLM embedding implementation is only supported by XFormers backend.
# GritLM embedding implementation is only supported by XFormers backend.
pytest
.
mark
.
skipif
(
not
importlib
.
util
.
find_spec
(
"xformers"
),
pytestmark
=
pytest
.
mark
.
skipif
(
not
importlib
.
util
.
find_spec
(
"xformers"
),
reason
=
"GritLM requires XFormers"
)
reason
=
"GritLM requires XFormers"
)
MODEL_NAME
=
os
.
path
.
join
(
models_path_prefix
,
"parasail-ai/GritLM-7B-vllm"
)
MODEL_NAME
=
os
.
path
.
join
(
models_path_prefix
,
"parasail-ai/GritLM-7B-vllm"
)
MAX_MODEL_LEN
=
4000
MAX_MODEL_LEN
=
4000
...
@@ -32,36 +33,34 @@ def _arr(arr):
...
@@ -32,36 +33,34 @@ def _arr(arr):
return
array
(
"i"
,
arr
)
return
array
(
"i"
,
arr
)
def
test_find_array
(
monkeypatch
):
def
test_find_array
(
monkeypatch
:
pytest
.
MonkeyPatch
):
# GritLM embedding implementation is only supported by XFormers backend.
# GritLM embedding implementation is only supported by XFormers backend.
monkeypatch
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
STR_BACKEND_ENV_VAR
,
"XFORMERS"
)
from
vllm.model_executor.models.gritlm
import
GritLMPooler
from
vllm.model_executor.models.gritlm
import
GritLMPooler
# Create an LLM object to get the model config.
# Create an LLM object to get the model config.
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
pooler
=
GritLMPooler
(
model_config
=
llm
.
llm_engine
.
model_config
)
pooler
=
GritLMPooler
(
model_config
=
llm
.
llm_engine
.
model_config
)
arr
=
_arr
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
])
arr
=
_arr
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
])
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
0
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
0
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
1
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
1
)
==
3
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
5
)
==
-
1
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=
5
)
==
-
1
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
5
]),
start_idx
=
0
)
==
-
1
assert
pooler
.
_find_array
(
arr
,
_arr
([
3
,
5
]),
start_idx
=
0
)
==
-
1
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=-
1
)
pooler
.
_find_array
(
arr
,
_arr
([
3
,
4
,
5
]),
start_idx
=-
1
)
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
server_embedding
():
def
server_embedding
():
# GritLM embedding implementation is only supported by XFormers backend.
# GritLM embedding implementation is only supported by XFormers backend.
with
pytest
.
MonkeyPatch
.
context
()
as
mp
:
args
=
[
"--task"
,
"embed"
,
"--max_model_len"
,
str
(
MAX_MODEL_LEN
)]
mp
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
args
=
[
"--task"
,
"embed"
,
"--max_model_len"
,
str
(
MAX_MODEL_LEN
)]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
...
@@ -72,9 +71,12 @@ def server_generate():
...
@@ -72,9 +71,12 @@ def server_generate():
@
pytest_asyncio
.
fixture
@
pytest_asyncio
.
fixture
async
def
client_embedding
(
server_embedding
:
RemoteOpenAIServer
):
async
def
client_embedding
(
monkeypatch
:
pytest
.
MonkeyPatch
,
async
with
server_embedding
.
get_async_client
()
as
async_client
:
server_embedding
:
RemoteOpenAIServer
):
yield
async_client
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
async
with
server_embedding
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest_asyncio
.
fixture
@
pytest_asyncio
.
fixture
...
@@ -83,14 +85,20 @@ async def client_generate(server_generate: RemoteOpenAIServer):
...
@@ -83,14 +85,20 @@ async def client_generate(server_generate: RemoteOpenAIServer):
yield
async_client
yield
async_client
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
List
[
str
],
def
run_llm_encode
(
instruction
:
str
)
->
List
[
float
]:
llm
:
vllm
.
LLM
,
queries
:
list
[
str
],
instruction
:
str
,
)
->
list
[
float
]:
outputs
=
llm
.
encode
([
instruction
+
q
for
q
in
queries
],
)
outputs
=
llm
.
encode
([
instruction
+
q
for
q
in
queries
],
)
return
[
output
.
outputs
.
embedding
for
output
in
outputs
]
return
[
output
.
outputs
.
embedding
for
output
in
outputs
]
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
List
[
str
],
async
def
run_client_embeddings
(
instruction
:
str
)
->
List
[
float
]:
client
:
vllm
.
LLM
,
queries
:
list
[
str
],
instruction
:
str
,
)
->
list
[
float
]:
outputs
=
await
client
.
embeddings
.
create
(
outputs
=
await
client
.
embeddings
.
create
(
model
=
MODEL_NAME
,
model
=
MODEL_NAME
,
input
=
[
instruction
+
q
for
q
in
queries
],
input
=
[
instruction
+
q
for
q
in
queries
],
...
@@ -109,7 +117,7 @@ def get_test_data():
...
@@ -109,7 +117,7 @@ def get_test_data():
README.md in https://github.com/ContextualAI/gritlm
README.md in https://github.com/ContextualAI/gritlm
"""
"""
q_instruction
=
gritlm_instruction
(
q_instruction
=
gritlm_instruction
(
"Given a scientific paper title, retrieve the paper's abstract"
)
"Given a scientific paper title, retrieve the paper's abstract"
,
)
queries
=
[
queries
=
[
"Bitcoin: A Peer-to-Peer Electronic Cash System"
,
"Bitcoin: A Peer-to-Peer Electronic Cash System"
,
"Generative Representational Instruction Tuning"
,
"Generative Representational Instruction Tuning"
,
...
@@ -125,7 +133,7 @@ def get_test_data():
...
@@ -125,7 +133,7 @@ def get_test_data():
return
queries
,
q_instruction
,
documents
,
d_instruction
return
queries
,
q_instruction
,
documents
,
d_instruction
def
validate_embed_output
(
q_rep
:
L
ist
[
float
],
d_rep
:
L
ist
[
float
]):
def
validate_embed_output
(
q_rep
:
l
ist
[
float
],
d_rep
:
l
ist
[
float
]):
cosine_sim_q0_d0
=
1
-
cosine
(
q_rep
[
0
],
d_rep
[
0
])
cosine_sim_q0_d0
=
1
-
cosine
(
q_rep
[
0
],
d_rep
[
0
])
assert
math
.
isclose
(
cosine_sim_q0_d0
,
0.609
,
abs_tol
=
0.001
)
assert
math
.
isclose
(
cosine_sim_q0_d0
,
0.609
,
abs_tol
=
0.001
)
...
@@ -139,31 +147,32 @@ def validate_embed_output(q_rep: List[float], d_rep: List[float]):
...
@@ -139,31 +147,32 @@ def validate_embed_output(q_rep: List[float], d_rep: List[float]):
assert
math
.
isclose
(
cosine_sim_q1_d1
,
0.532
,
abs_tol
=
0.001
)
assert
math
.
isclose
(
cosine_sim_q1_d1
,
0.532
,
abs_tol
=
0.001
)
def
test_gritlm_offline_embedding
(
monkeypatch
):
def
test_gritlm_offline_embedding
(
monkeypatch
:
pytest
.
MonkeyPatch
):
# GritLM embedding implementation is only supported by XFormers backend.
# GritLM embedding implementation is only supported by XFormers backend.
monkeypatch
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
"XFORMERS"
)
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
STR_BACKEND_ENV_VAR
,
"XFORMERS"
)
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
llm
=
vllm
.
LLM
(
MODEL_NAME
,
task
=
"embed"
,
max_model_len
=
MAX_MODEL_LEN
)
d_rep
=
run_llm_encode
(
d_rep
=
run_llm_encode
(
llm
,
llm
,
documents
,
documents
,
d_instruction
,
d_instruction
,
)
)
q_rep
=
run_llm_encode
(
q_rep
=
run_llm_encode
(
llm
,
llm
,
queries
,
queries
,
q_instruction
,
q_instruction
,
)
)
validate_embed_output
(
q_rep
,
d_rep
)
validate_embed_output
(
q_rep
,
d_rep
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_gritlm_api_server_embedding
(
async
def
test_gritlm_api_server_embedding
(
client_embedding
:
openai
.
AsyncOpenAI
):
client_embedding
:
openai
.
AsyncOpenAI
,
):
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
queries
,
q_instruction
,
documents
,
d_instruction
=
get_test_data
()
d_rep
=
await
run_client_embeddings
(
d_rep
=
await
run_client_embeddings
(
...
...
tests/models/embedding/utils.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Sequence
from
collections.abc
import
Sequence
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
...
@@ -8,8 +8,8 @@ import torch.nn.functional as F
...
@@ -8,8 +8,8 @@ import torch.nn.functional as F
def
check_embeddings_close
(
def
check_embeddings_close
(
*
,
*
,
embeddings_0_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_0_lst
:
Sequence
[
l
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
l
ist
[
float
]],
name_0
:
str
,
name_0
:
str
,
name_1
:
str
,
name_1
:
str
,
tol
:
float
=
1e-3
,
tol
:
float
=
1e-3
,
...
...
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
functools
import
partial
from
typing
import
Callable
from
typing
import
Callable
,
Dict
,
List
,
Type
import
os
import
os
import
pytest
import
pytest
import
torch
import
torch
import
torch.nn.functional
as
F
from
PIL
import
Image
from
PIL
import
Image
from
transformers
import
BatchEncoding
,
Qwen2VLForConditionalGeneration
from
transformers
import
Qwen2VLForConditionalGeneration
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
from
....utils
import
large_gpu_test
,
models_path_prefix
from
....utils
import
large_gpu_test
,
models_path_prefix
...
@@ -68,7 +68,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):
...
@@ -68,7 +68,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):
def
apply_chat_template_and_add_eos
(
def
apply_chat_template_and_add_eos
(
messages
:
L
ist
[
D
ict
],
messages
:
l
ist
[
d
ict
],
apply_chat_template_fn
:
Callable
,
apply_chat_template_fn
:
Callable
,
):
):
prompt
=
apply_chat_template_fn
(
prompt
=
apply_chat_template_fn
(
...
@@ -76,16 +76,12 @@ def apply_chat_template_and_add_eos(
...
@@ -76,16 +76,12 @@ def apply_chat_template_and_add_eos(
return
prompt
return
prompt
def
postprocess_inputs
(
hf_model
:
HfRunner
,
inputs
:
BatchEncoding
,
**
kwargs
):
return
hf_model
.
model
.
prepare_inputs_for_generation
(
**
inputs
,
**
kwargs
)
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
input_images
:
PromptImageInput
,
embed_texts
:
L
ist
[
bool
],
embed_texts
:
l
ist
[
bool
],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
@@ -119,14 +115,8 @@ def _run_test(
...
@@ -119,14 +115,8 @@ def _run_test(
with
hf_runner
(
model
,
with
hf_runner
(
model
,
dtype
=
dtype
,
dtype
=
dtype
,
auto_cls
=
Qwen2VLForConditionalGeneration
)
as
hf_model
:
auto_cls
=
Qwen2VLForConditionalGeneration
)
as
hf_model
:
hf_model
.
postprocess_inputs
=
partial
(
postprocess_inputs
,
prompts
=
[]
hf_model
,
cache_position
=
torch
.
arange
(
0
,
1
,
# 1 for batch size
requires_grad
=
False
),
use_cache
=
False
)
for
text
,
image
,
embed_text
in
zip
(
input_texts
,
input_images
,
for
text
,
image
,
embed_text
in
zip
(
input_texts
,
input_images
,
embed_texts
):
embed_texts
):
# dse requires non-standard input processing
# dse requires non-standard input processing
...
@@ -134,20 +124,34 @@ def _run_test(
...
@@ -134,20 +124,34 @@ def _run_test(
messages
=
get_messages
(
image
,
text
,
embed_text
)
messages
=
get_messages
(
image
,
text
,
embed_text
)
prompt
=
apply_chat_template_and_add_eos
(
prompt
=
apply_chat_template_and_add_eos
(
messages
,
hf_model
.
processor
.
apply_chat_template
)
messages
,
hf_model
.
processor
.
apply_chat_template
)
inputs
=
hf_model
.
get_inputs
(
prompts
=
[[
prompt
]],
prompts
.
append
(
prompt
)
images
=
[[
image
]],
)
all_inputs
=
hf_model
.
get_inputs
(
with
torch
.
no_grad
():
prompts
=
prompts
,
images
=
input_images
,
)
with
torch
.
no_grad
():
all_outputs
=
[]
for
inputs
in
all_inputs
:
inputs
=
hf_model
.
model
.
prepare_inputs_for_generation
(
**
inputs
,
cache_position
=
torch
.
arange
(
1
),
# 1 for batch size
use_cache
=
False
,
)
outputs
=
hf_model
.
model
(
outputs
=
hf_model
.
model
(
**
hf_model
.
wrap_device
(
inputs
[
0
],
**
hf_model
.
wrap_device
(
inputs
),
device
=
hf_model
.
model
.
device
.
type
),
return_dict
=
True
,
return_dict
=
True
,
output_hidden_states
=
True
,
output_hidden_states
=
True
,
)
)
pooled_output
=
torch
.
nn
.
functional
.
normalize
(
pooled_output
=
F
.
normalize
(
outputs
.
hidden_states
[
-
1
][
0
,
-
1
],
outputs
.
hidden_states
[
-
1
][
0
,
-
1
],
p
=
2
,
dim
=-
1
)
p
=
2
,
hf_outputs
.
append
(
pooled_output
.
tolist
())
dim
=-
1
)
all_outputs
.
append
(
pooled_output
.
tolist
())
hf_outputs
=
all_outputs
check_embeddings_close
(
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_0_lst
=
hf_outputs
,
...
...
tests/models/embedding/vision_language/test_llava_next.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
os
import
os
import
pytest
import
pytest
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
transformers
import
AutoModelForVision2Seq
from
transformers
import
AutoModelForImageTextToText
from
vllm.platforms
import
current_platform
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
from
....utils
import
large_gpu_test
,
models_path_prefix
from
....utils
import
large_gpu_test
,
models_path_prefix
from
..utils
import
check_embeddings_close
from
..utils
import
check_embeddings_close
# Llava Next embedding implementation is only supported by CUDA.
# If run on ROCm, hf_model.model.resize_token_embeddings will
# cause the following error:
# RuntimeError: Calling torch.linalg.cholesky on a CUDA tensor
# requires compiling PyTorch with MAGMA. Please use PyTorch
# built with MAGMA support.
# If run on CPU, hf_model.model.resize_token_embeddings will
# cause the following error:
# RuntimeError: Calling torch.linalg.cholesky on a CPU tensor
# requires compiling PyTorch with LAPACK. Please use PyTorch
# built with LAPACK support.
pytestmark
=
pytest
.
mark
.
skipif
(
not
current_platform
.
is_cuda
(),
reason
=
"Llava Next model uses op that is only supported in CUDA"
)
llama3_template
=
'<|start_header_id|>user<|end_header_id|>
\n\n
{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
\n\n
\n
'
# noqa: E501
llama3_template
=
'<|start_header_id|>user<|end_header_id|>
\n\n
{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
\n\n
\n
'
# noqa: E501
HF_TEXT_PROMPTS
=
[
HF_TEXT_PROMPTS
=
[
...
@@ -36,9 +51,9 @@ MODELS = [os.path.join(models_path_prefix, "royokong/e5-v")]
...
@@ -36,9 +51,9 @@ MODELS = [os.path.join(models_path_prefix, "royokong/e5-v")]
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
input_images
:
PromptImageInput
,
model
:
str
,
model
:
str
,
*
,
*
,
...
@@ -56,7 +71,7 @@ def _run_test(
...
@@ -56,7 +71,7 @@ def _run_test(
vllm_outputs
=
vllm_model
.
encode
(
input_texts
,
images
=
input_images
)
vllm_outputs
=
vllm_model
.
encode
(
input_texts
,
images
=
input_images
)
with
hf_runner
(
model
,
dtype
=
dtype
,
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelFor
Vision2Seq
)
as
hf_model
:
auto_cls
=
AutoModelFor
ImageTextToText
)
as
hf_model
:
# Patch the issue where generation_config.json is missing
# Patch the issue where generation_config.json is missing
hf_model
.
processor
.
patch_size
=
\
hf_model
.
processor
.
patch_size
=
\
hf_model
.
model
.
config
.
vision_config
.
patch_size
hf_model
.
model
.
config
.
vision_config
.
patch_size
...
@@ -72,8 +87,7 @@ def _run_test(
...
@@ -72,8 +87,7 @@ def _run_test(
for
inputs
in
all_inputs
:
for
inputs
in
all_inputs
:
# Based on: https://huggingface.co/royokong/e5-v
# Based on: https://huggingface.co/royokong/e5-v
outputs
=
hf_model
.
model
(
outputs
=
hf_model
.
model
(
**
hf_model
.
wrap_device
(
inputs
,
**
hf_model
.
wrap_device
(
inputs
),
device
=
hf_model
.
model
.
device
.
type
),
return_dict
=
True
,
return_dict
=
True
,
output_hidden_states
=
True
,
output_hidden_states
=
True
,
)
)
...
...
tests/models/embedding/vision_language/test_phi3v.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
os
import
os
import
pytest
import
pytest
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
...
@@ -30,9 +28,9 @@ MODELS = [os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full")]
...
@@ -30,9 +28,9 @@ MODELS = [os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full")]
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
input_images
:
PromptImageInput
,
model
:
str
,
model
:
str
,
*
,
*
,
...
@@ -56,8 +54,7 @@ def _run_test(
...
@@ -56,8 +54,7 @@ def _run_test(
for
inputs
in
all_inputs
:
for
inputs
in
all_inputs
:
# Based on: https://github.com/TIGER-AI-Lab/VLM2Vec/blob/db3b951bccabba220c1f53ab46a734e50dd2fc08/src/model.py
# Based on: https://github.com/TIGER-AI-Lab/VLM2Vec/blob/db3b951bccabba220c1f53ab46a734e50dd2fc08/src/model.py
outputs
=
hf_model
.
model
(
outputs
=
hf_model
.
model
(
**
hf_model
.
wrap_device
(
inputs
,
**
hf_model
.
wrap_device
(
inputs
),
device
=
hf_model
.
model
.
device
.
type
),
return_dict
=
True
,
return_dict
=
True
,
output_hidden_states
=
True
,
output_hidden_states
=
True
,
)
)
...
...
tests/models/encoder_decoder/audio_language/test_whisper.py
View file @
469e903b
...
@@ -10,7 +10,7 @@ import pytest
...
@@ -10,7 +10,7 @@ import pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
from
....utils
import
fork
_new_process_for_each_test
,
multi_gpu_test
from
....utils
import
create
_new_process_for_each_test
,
multi_gpu_test
PROMPTS
=
[
PROMPTS
=
[
{
{
...
@@ -119,7 +119,7 @@ def run_test(
...
@@ -119,7 +119,7 @@ def run_test(
assert
output
.
outputs
[
0
].
text
==
expected
assert
output
.
outputs
[
0
].
text
==
expected
@
fork
_new_process_for_each_test
@
create
_new_process_for_each_test
()
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-small"
,
"openai/whisper-large-v3-turbo"
])
"model"
,
[
"openai/whisper-small"
,
"openai/whisper-large-v3-turbo"
])
...
...
tests/models/encoder_decoder/language/test_bart.py
View file @
469e903b
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
"""
"""
import
os
import
os
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
pytest
import
pytest
from
transformers
import
AutoModelForSeq2SeqLM
from
transformers
import
AutoModelForSeq2SeqLM
...
@@ -19,7 +19,7 @@ from ....utils import models_path_prefix
...
@@ -19,7 +19,7 @@ from ....utils import models_path_prefix
def
vllm_to_hf_output
(
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
decoder_prompt_type
:
DecoderPromptType
,
decoder_prompt_type
:
DecoderPromptType
,
):
):
"""Sanitize vllm output to be comparable with hf output."""
"""Sanitize vllm output to be comparable with hf output."""
...
@@ -33,9 +33,9 @@ def vllm_to_hf_output(
...
@@ -33,9 +33,9 @@ def vllm_to_hf_output(
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts
:
L
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
prompts
:
l
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
decoder_prompt_type
:
DecoderPromptType
,
decoder_prompt_type
:
DecoderPromptType
,
model
:
str
,
model
:
str
,
*
,
*
,
...
...
tests/models/encoder_decoder/vision_language/test_florence2.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
functools
import
partial
from
typing
import
Optional
from
typing
import
List
,
Optional
,
Tuple
,
Type
import
os
import
os
import
pytest
import
pytest
from
PIL
import
Image
from
PIL
import
Image
from
vllm.inputs.data
import
ExplicitEncoderDecoderPrompt
from
vllm.inputs.data
import
ExplicitEncoderDecoderPrompt
,
TextPrompt
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
....conftest
import
HfRunner
,
VllmRunner
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
...utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
from
....utils
import
models_path_prefix
from
....utils
import
models_path_prefix
Florence2Prompt
=
partial
(
ExplicitEncoderDecoderPrompt
,
decoder_prompt
=
None
,
mm_processor_kwargs
=
None
)
MODELS
=
[
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Florence-2-base"
)]
MODELS
=
[
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Florence-2-base"
)]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
# Therefore, we borrow the BartTokenizer from the original Bart model
TOKENIZER
=
os
.
path
.
join
(
models_path_prefix
,
"facebook/bart-base"
)
TOKENIZER
=
os
.
path
.
join
(
models_path_prefix
,
"facebook/bart-base"
)
PROMPTS
=
[
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
Florence2Prompt
(
encoder_prompt
=
"<CAPTION>"
),
"stop_sign"
:
Florence2Prompt
(
encoder_prompt
=
"<DETAILED_CAPTION>"
),
"<CAPTION>"
,
# special task token
Florence2Prompt
(
encoder_prompt
=
"<MORE_DETAILED_CAPTION>"
),
"cherry_blossom"
:
Florence2Prompt
(
encoder_prompt
=
"<CAPTION_TO_PHRASE_GROUNDING>"
),
"Describe in detail what is shown in the image."
,
Florence2Prompt
(
encoder_prompt
=
"<DENSE_REGION_CAPTION>"
),
})
Florence2Prompt
(
encoder_prompt
=
"<REGION_PROPOSAL>"
),
Florence2Prompt
(
encoder_prompt
=
"<OCR_WITH_REGION>"
),
Florence2Prompt
(
encoder_prompt
=
"<OCR>"
),
Florence2Prompt
(
encoder_prompt
=
"<OD>"
),
]
def
get_hf_images_prompts
(
prompts_
:
list
[
ExplicitEncoderDecoderPrompt
[
str
,
TextPrompt
]],
)
->
tuple
[
list
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
list
[
Image
.
Image
]]:
prompts
,
images
=
[],
[]
for
prompt
in
prompts_
:
encoder_prompt
=
prompt
[
"encoder_prompt"
]
prompts
.
append
(
ExplicitEncoderDecoderPrompt
(
encoder_prompt
=
encoder_prompt
[
"prompt"
],
decoder_prompt
=
None
,
))
images
.
append
(
encoder_prompt
[
"multi_modal_data"
][
"image"
])
return
prompts
,
images
def
vllm_to_hf_output
(
vllm_output
:
Tuple
[
List
[
int
],
str
,
Optional
[
SampleLogprobs
]],
):
"""Sanitize vllm output to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
hf_output_str
=
"</s><s>"
+
output_str
+
"</s>"
def
hf_to_vllm_output
(
hf_output
:
tuple
[
list
[
int
],
str
,
Optional
[
SampleLogprobs
]]):
"""Sanitize hf output to be comparable with vllm output."""
output_ids
,
output_str
,
out_logprobs
=
hf_output
return
output_ids
,
hf_output_str
,
out_logprobs
output_str
=
output_str
.
replace
(
"</s>"
,
""
).
replace
(
"<s>"
,
""
)
output_ids
=
[
ids
for
ids
in
output_ids
if
ids
not
in
[
0
,
2
]]
return
output_ids
,
output_str
,
out_logprobs
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts
:
L
ist
[
ExplicitEncoderDecoderPrompt
],
inputs
:
list
[
l
ist
[
ExplicitEncoderDecoderPrompt
]
]
,
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
@@ -58,46 +65,76 @@ def run_test(
...
@@ -58,46 +65,76 @@ def run_test(
distributed_executor_backend
:
Optional
[
str
]
=
None
,
distributed_executor_backend
:
Optional
[
str
]
=
None
,
)
->
None
:
)
->
None
:
with
vllm_runner
(
model
,
with
vllm_runner
(
model
,
max_num_seqs
=
8
,
tokenizer_name
=
TOKENIZER
,
tokenizer_name
=
TOKENIZER
,
dtype
=
dtype
,
dtype
=
dtype
,
tensor_parallel_size
=
tensor_parallel_size
,
tensor_parallel_size
=
tensor_parallel_size
,
distributed_executor_backend
=
distributed_executor_backend
,
distributed_executor_backend
=
distributed_executor_backend
,
enforce_eager
=
True
)
as
vllm_model
:
enforce_eager
=
True
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_encoder_decoder_greedy_logprobs
(
vllm_outputs_per_case
=
[
prompts
,
max_tokens
,
num_logprobs
)
vllm_model
.
generate_encoder_decoder_greedy_logprobs
(
prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
)
for
prompts
in
inputs
]
hf_inputs
=
[
get_hf_images_prompts
(
prompts
)
for
prompts
in
inputs
]
# Florence-2 processors require image inputs
dummy_image
=
Image
.
new
(
mode
=
"RGB"
,
size
=
(
2
,
2
))
with
hf_runner
(
model
,
dtype
=
dtype
,
skip_tokenizer_init
=
True
)
as
hf_model
:
with
hf_runner
(
model
,
dtype
=
dtype
,
skip_tokenizer_init
=
True
)
as
hf_model
:
hf_model
.
model
.
get_output_embeddings
=
lambda
:
\
hf_model
.
model
.
get_output_embeddings
=
lambda
:
\
hf_model
.
model
.
language_model
.
lm_head
hf_model
.
model
.
language_model
.
lm_head
hf_outputs
=
(
hf_model
.
generate_encoder_decoder_greedy_logprobs_limit
(
hf_outputs
_per_case
=
[
prompts
,
hf_model
.
generate_encoder_decoder_greedy_logprobs_limit
(
max_tokens
,
prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
,
images
=
images
)
num_logprobs
,
for
prompts
,
images
in
hf_inputs
images
=
[
dummy_image
]
*
len
(
prompts
),
]
))
for
hf_outputs
,
vllm_outputs
in
zip
(
hf_outputs_per_case
,
check_logprobs_close
(
vllm_outputs_per_case
):
outputs_0_lst
=
hf_outputs
,
check_logprobs_close
(
outputs_
1
_lst
=
[
outputs_
0
_lst
=
[
hf_to_vllm_output
(
output
)
for
output
in
hf_outputs
],
vllm_to_hf_output
(
vllm_output
)
for
vllm_output
in
vllm_outputs
outputs_1_lst
=
vllm_outputs
,
]
,
name_0
=
"hf"
,
name_
0
=
"
hf
"
,
name_
1
=
"
vllm
"
,
name_1
=
"vllm"
,
)
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
,
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
[
# No image
[],
# Single-scale
[
1.0
],
# Single-scale, batched
[
1.0
,
1.0
,
1.0
],
# Multi-scale
[
0.25
,
0.5
,
1.0
],
],
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
,
vllm_runner
,
model
,
dtype
,
max_tokens
,
def
test_models
(
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
num_logprobs
)
->
None
:
image_assets
:
_ImageAssets
,
model
:
str
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
inputs_per_image
=
[[
ExplicitEncoderDecoderPrompt
(
encoder_prompt
=
TextPrompt
(
prompt
=
prompt
,
multi_modal_data
=
{
"image"
:
rescale_image_size
(
image
,
factor
)}),
decoder_prompt
=
None
,
)
for
factor
in
size_factors
]
for
image
,
prompt
in
zip
(
images
,
HF_IMAGE_PROMPTS
)]
run_test
(
run_test
(
hf_runner
,
hf_runner
,
vllm_runner
,
vllm_runner
,
PROMPTS
,
inputs_per_image
,
model
,
model
,
dtype
=
dtype
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
max_tokens
=
max_tokens
,
...
...
tests/models/encoder_decoder/vision_language/test_mllama.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Type
,
overload
from
typing
import
Optional
,
overload
import
os
import
os
import
pytest
import
pytest
import
torch
import
torch
from
transformers
import
(
AutoConfig
,
AutoModelForVision2Seq
,
AutoTokenizer
,
from
transformers
import
AutoConfig
,
AutoModelForImageTextToText
,
AutoTokenizer
BatchEncoding
)
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
vllm.attention.backends.flash_attn
import
FlashAttentionMetadata
from
vllm.attention.backends.flash_attn
import
FlashAttentionMetadata
...
@@ -18,6 +17,7 @@ from vllm.sequence import SampleLogprobs
...
@@ -18,6 +17,7 @@ from vllm.sequence import SampleLogprobs
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
_ImageAssets
)
from
....quantization.utils
import
is_quant_method_supported
from
....utils
import
large_gpu_test
from
....utils
import
large_gpu_test
from
...utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
from
....utils
import
models_path_prefix
from
....utils
import
models_path_prefix
...
@@ -66,7 +66,7 @@ prompt_data = {
...
@@ -66,7 +66,7 @@ prompt_data = {
}
}
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
def
vllm_to_hf_output
(
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
Optional
[
SampleLogprobs
]],
model
:
str
):
model
:
str
):
"""Sanitize vllm output to be comparable with hf output."""
"""Sanitize vllm output to be comparable with hf output."""
...
@@ -93,9 +93,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
...
@@ -93,9 +93,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def
_get_inputs
(
def
_get_inputs
(
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
*
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
)
->
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]]:
)
->
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]]:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
if
size_factors
is
not
None
:
if
size_factors
is
not
None
:
...
@@ -125,12 +125,12 @@ def _get_inputs(
...
@@ -125,12 +125,12 @@ def _get_inputs(
@
overload
@
overload
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
:
str
,
*
,
*
,
size_factors
:
L
ist
[
float
],
size_factors
:
l
ist
[
float
],
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
@@ -142,12 +142,12 @@ def run_test(
...
@@ -142,12 +142,12 @@ def run_test(
@
overload
@
overload
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
:
str
,
*
,
*
,
sizes
:
L
ist
[
T
uple
[
int
,
int
]],
sizes
:
l
ist
[
t
uple
[
int
,
int
]],
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
@@ -158,13 +158,13 @@ def run_test(
...
@@ -158,13 +158,13 @@ def run_test(
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
:
str
,
*
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
@@ -185,9 +185,9 @@ def run_test(
...
@@ -185,9 +185,9 @@ def run_test(
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
@@ -217,7 +217,6 @@ def _run_test(
...
@@ -217,7 +217,6 @@ def _run_test(
max_num_seqs
=
2
,
max_num_seqs
=
2
,
tensor_parallel_size
=
tensor_parallel_size
,
tensor_parallel_size
=
tensor_parallel_size
,
distributed_executor_backend
=
distributed_executor_backend
,
distributed_executor_backend
=
distributed_executor_backend
,
enforce_eager
=
True
,
limit_mm_per_prompt
=
{
"image"
:
_LIMIT_IMAGE_PER_PROMPT
limit_mm_per_prompt
=
{
"image"
:
_LIMIT_IMAGE_PER_PROMPT
})
as
vllm_model
:
})
as
vllm_model
:
vllm_outputs_per_image
=
[
vllm_outputs_per_image
=
[
...
@@ -228,14 +227,10 @@ def _run_test(
...
@@ -228,14 +227,10 @@ def _run_test(
for
prompts
,
images
in
inputs
for
prompts
,
images
in
inputs
]
]
def
process
(
hf_inputs
:
BatchEncoding
,
**
kwargs
):
return
hf_inputs
with
hf_runner
(
model
,
with
hf_runner
(
model
,
dtype
=
dtype
,
dtype
=
dtype
,
model_kwargs
=
{
"device_map"
:
"auto"
},
model_kwargs
=
{
"device_map"
:
"auto"
},
postprocess_inputs
=
process
,
auto_cls
=
AutoModelForImageTextToText
)
as
hf_model
:
auto_cls
=
AutoModelForVision2Seq
)
as
hf_model
:
hf_outputs_per_image
=
[
hf_outputs_per_image
=
[
hf_model
.
generate_greedy_logprobs_limit
(
prompts
,
hf_model
.
generate_greedy_logprobs_limit
(
prompts
,
max_tokens
,
max_tokens
,
...
@@ -399,6 +394,49 @@ def test_models_interleaved_images(hf_runner, vllm_runner, image_assets, model,
...
@@ -399,6 +394,49 @@ def test_models_interleaved_images(hf_runner, vllm_runner, image_assets, model,
)
)
@
large_gpu_test
(
min_gb
=
48
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
reason
=
'bitsandbytes is not supported on this GPU type.'
)
def
test_bnb_regression
(
image_assets
:
_ImageAssets
,
model
:
str
,
dtype
:
str
,
max_tokens
:
int
,
):
stop_sign
=
image_assets
[
0
].
pil_image
prompts
=
[
{
"prompt"
:
"<|begin_of_text|>The content of the image <|image|> is"
,
"multi_modal_data"
:
{
"image"
:
stop_sign
},
},
{
"prompt"
:
"The color of the sky is blue but sometimes it can also be"
,
},
]
# Test regression about QKVCrossParallelLinear
llm
=
LLM
(
model
=
model
,
dtype
=
dtype
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
quantization
=
"bitsandbytes"
,
load_format
=
"bitsandbytes"
,
)
sampling_params
=
SamplingParams
(
temperature
=
0
,
max_tokens
=
max_tokens
,
)
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
assert
outputs
@
large_gpu_test
(
min_gb
=
48
)
@
large_gpu_test
(
min_gb
=
48
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
...
@@ -443,7 +481,6 @@ def test_explicit_implicit_prompt(
...
@@ -443,7 +481,6 @@ def test_explicit_implicit_prompt(
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
tensor_parallel_size
=
1
,
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
)
)
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
temperature
=
0
,
temperature
=
0
,
...
@@ -475,14 +512,14 @@ def test_regression(vllm_runner, image_assets, model, dtype, max_tokens,
...
@@ -475,14 +512,14 @@ def test_regression(vllm_runner, image_assets, model, dtype, max_tokens,
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
tensor_parallel_size
=
1
,
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
limit_mm_per_prompt
=
{
"image"
:
limit_mm_per_prompt
=
{
"image"
:
_LIMIT_IMAGE_PER_PROMPT
})
as
vllm_model
:
_LIMIT_IMAGE_PER_PROMPT
})
as
vllm_model
:
# Regression tests for https://github.com/vllm-project/vllm/issues/10648
# Regression tests for https://github.com/vllm-project/vllm/issues/10648
# Number of image tags is greater than the number of images provided
# Number of groups of image tokens is greater than the number of images
prompt
=
"<|begin_of_text|><|image|><|image|> Compare the two images"
# noqa: E501
# provided (the whitespace between the tags is necessary)
prompt
=
"<|begin_of_text|><|image|> <|image|> Compare the two images"
# noqa: E501
image
=
stop_sign
image
=
stop_sign
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
vllm_model
.
generate_greedy_logprobs
([
prompt
],
vllm_model
.
generate_greedy_logprobs
([
prompt
],
...
...
tests/models/fixtures/mistral_small_3_chat.json
0 → 100644
View file @
469e903b
[[[
1784
,
3937
,
6122
,
1261
,
7244
,
10575
,
28528
,
1408
,
1261
,
32656
,
11237
,
1044
,
7283
,
2015
,
1454
,
1261
,
38462
,
4818
,
1046
,
2
],
"The image shows a black dog lying on a wooden floor, looking up with a curious expression."
,
[{
"1784"
:
{
"logprob"
:
-0.4740446209907532
,
"rank"
:
1
,
"decoded_token"
:
"The"
},
"1065"
:
{
"logprob"
:
-1.0990445613861084
,
"rank"
:
2
,
"decoded_token"
:
"A"
},
"4380"
:
{
"logprob"
:
-3.3490445613861084
,
"rank"
:
3
,
"decoded_token"
:
"This"
},
"1785"
:
{
"logprob"
:
-5.0990447998046875
,
"rank"
:
4
,
"decoded_token"
:
"In"
},
"11745"
:
{
"logprob"
:
-6.4740447998046875
,
"rank"
:
5
,
"decoded_token"
:
"Here"
}},
{
"3937"
:
{
"logprob"
:
-0.06349722295999527
,
"rank"
:
1
,
"decoded_token"
:
" image"
},
"7244"
:
{
"logprob"
:
-2.813497304916382
,
"rank"
:
2
,
"decoded_token"
:
" black"
},
"16649"
:
{
"logprob"
:
-7.563497066497803
,
"rank"
:
3
,
"decoded_token"
:
" photo"
},
"18390"
:
{
"logprob"
:
-7.688497066497803
,
"rank"
:
4
,
"decoded_token"
:
" photograph"
},
"10575"
:
{
"logprob"
:
-8.438497543334961
,
"rank"
:
5
,
"decoded_token"
:
" dog"
}},
{
"6122"
:
{
"logprob"
:
-0.25453490018844604
,
"rank"
:
1
,
"decoded_token"
:
" shows"
},
"6971"
:
{
"logprob"
:
-1.8795349597930908
,
"rank"
:
2
,
"decoded_token"
:
" features"
},
"51948"
:
{
"logprob"
:
-2.754534959793091
,
"rank"
:
3
,
"decoded_token"
:
" depicts"
},
"25981"
:
{
"logprob"
:
-5.629534721374512
,
"rank"
:
4
,
"decoded_token"
:
" displays"
},
"1395"
:
{
"logprob"
:
-6.129534721374512
,
"rank"
:
5
,
"decoded_token"
:
" is"
}},
{
"1261"
:
{
"logprob"
:
-0.0001245659514097497
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-9.00012493133545
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"1278"
:
{
"logprob"
:
-14.25012493133545
,
"rank"
:
3
,
"decoded_token"
:
" the"
},
"7244"
:
{
"logprob"
:
-14.87512493133545
,
"rank"
:
4
,
"decoded_token"
:
" black"
},
"1925"
:
{
"logprob"
:
-16.125123977661133
,
"rank"
:
5
,
"decoded_token"
:
" one"
}},
{
"7244"
:
{
"logprob"
:
-0.009403933770954609
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"6231"
:
{
"logprob"
:
-5.259403705596924
,
"rank"
:
2
,
"decoded_token"
:
" close"
},
"16450"
:
{
"logprob"
:
-6.759403705596924
,
"rank"
:
3
,
"decoded_token"
:
" sle"
},
"8500"
:
{
"logprob"
:
-7.009403705596924
,
"rank"
:
4
,
"decoded_token"
:
" dark"
},
"4329"
:
{
"logprob"
:
-7.696903705596924
,
"rank"
:
5
,
"decoded_token"
:
" large"
}},
{
"10575"
:
{
"logprob"
:
-0.7522680163383484
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"119075"
:
{
"logprob"
:
-1.0022680759429932
,
"rank"
:
2
,
"decoded_token"
:
" Labrador"
},
"116572"
:
{
"logprob"
:
-1.8772680759429932
,
"rank"
:
3
,
"decoded_token"
:
" puppy"
},
"8636"
:
{
"logprob"
:
-5.627267837524414
,
"rank"
:
4
,
"decoded_token"
:
" lab"
},
"15812"
:
{
"logprob"
:
-5.814767837524414
,
"rank"
:
5
,
"decoded_token"
:
" Lab"
}},
{
"28528"
:
{
"logprob"
:
-0.2941223084926605
,
"rank"
:
1
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-2.1691222190856934
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"1454"
:
{
"logprob"
:
-2.5441222190856934
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"60700"
:
{
"logprob"
:
-3.2941222190856934
,
"rank"
:
4
,
"decoded_token"
:
" laying"
},
"18970"
:
{
"logprob"
:
-4.794122219085693
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"1408"
:
{
"logprob"
:
-0.3170951306819916
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3151"
:
{
"logprob"
:
-1.317095160484314
,
"rank"
:
2
,
"decoded_token"
:
" down"
},
"14038"
:
{
"logprob"
:
-7.3170952796936035
,
"rank"
:
3
,
"decoded_token"
:
" flat"
},
"104248"
:
{
"logprob"
:
-7.4420952796936035
,
"rank"
:
4
,
"decoded_token"
:
" comfortably"
},
"1321"
:
{
"logprob"
:
-7.6920952796936035
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1261"
:
{
"logprob"
:
-0.08228635042905807
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-3.2072863578796387
,
"rank"
:
2
,
"decoded_token"
:
" its"
},
"32656"
:
{
"logprob"
:
-3.3322863578796387
,
"rank"
:
3
,
"decoded_token"
:
" wooden"
},
"3977"
:
{
"logprob"
:
-6.957286357879639
,
"rank"
:
4
,
"decoded_token"
:
" top"
},
"1278"
:
{
"logprob"
:
-7.207286357879639
,
"rank"
:
5
,
"decoded_token"
:
" the"
}},
{
"32656"
:
{
"logprob"
:
-0.03605202957987785
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"3403"
:
{
"logprob"
:
-3.9110519886016846
,
"rank"
:
2
,
"decoded_token"
:
" text"
},
"44130"
:
{
"logprob"
:
-4.911052227020264
,
"rank"
:
3
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-6.036052227020264
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"8500"
:
{
"logprob"
:
-6.473552227020264
,
"rank"
:
5
,
"decoded_token"
:
" dark"
}},
{
"11237"
:
{
"logprob"
:
-0.6433407068252563
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-0.7683407068252563
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"1615"
:
{
"logprob"
:
-5.268340587615967
,
"rank"
:
3
,
"decoded_token"
:
" pl"
},
"3403"
:
{
"logprob"
:
-6.018340587615967
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"18645"
:
{
"logprob"
:
-7.143340587615967
,
"rank"
:
5
,
"decoded_token"
:
" flo"
}},
{
"1044"
:
{
"logprob"
:
-0.6826052665710449
,
"rank"
:
1
,
"decoded_token"
:
","
},
"1321"
:
{
"logprob"
:
-1.682605266571045
,
"rank"
:
2
,
"decoded_token"
:
" and"
},
"7283"
:
{
"logprob"
:
-1.807605266571045
,
"rank"
:
3
,
"decoded_token"
:
" looking"
},
"1046"
:
{
"logprob"
:
-2.682605266571045
,
"rank"
:
4
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-3.182605266571045
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"7283"
:
{
"logprob"
:
-0.07239976525306702
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"11589"
:
{
"logprob"
:
-3.197399854660034
,
"rank"
:
2
,
"decoded_token"
:
" gaz"
},
"35542"
:
{
"logprob"
:
-3.822399854660034
,
"rank"
:
3
,
"decoded_token"
:
" staring"
},
"1454"
:
{
"logprob"
:
-6.384899616241455
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"22116"
:
{
"logprob"
:
-6.572399616241455
,
"rank"
:
5
,
"decoded_token"
:
" facing"
}},
{
"2015"
:
{
"logprob"
:
-0.9646494388580322
,
"rank"
:
2
,
"decoded_token"
:
" up"
},
"7655"
:
{
"logprob"
:
-0.9646494388580322
,
"rank"
:
1
,
"decoded_token"
:
" directly"
},
"74606"
:
{
"logprob"
:
-2.0896494388580322
,
"rank"
:
3
,
"decoded_token"
:
" upwards"
},
"40022"
:
{
"logprob"
:
-3.0896494388580322
,
"rank"
:
4
,
"decoded_token"
:
" upward"
},
"1935"
:
{
"logprob"
:
-4.152149200439453
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1454"
:
{
"logprob"
:
-0.8447978496551514
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"1513"
:
{
"logprob"
:
-1.2197978496551514
,
"rank"
:
2
,
"decoded_token"
:
" at"
},
"41132"
:
{
"logprob"
:
-2.2197978496551514
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"1935"
:
{
"logprob"
:
-2.9697978496551514
,
"rank"
:
4
,
"decoded_token"
:
" int"
},
"7655"
:
{
"logprob"
:
-3.0947978496551514
,
"rank"
:
5
,
"decoded_token"
:
" directly"
}},
{
"1261"
:
{
"logprob"
:
-0.7162021994590759
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-1.3412022590637207
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"41132"
:
{
"logprob"
:
-2.2162022590637207
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"2246"
:
{
"logprob"
:
-3.2162022590637207
,
"rank"
:
4
,
"decoded_token"
:
" its"
},
"38462"
:
{
"logprob"
:
-3.9662022590637207
,
"rank"
:
5
,
"decoded_token"
:
" curious"
}},
{
"38462"
:
{
"logprob"
:
-0.7836517095565796
,
"rank"
:
1
,
"decoded_token"
:
" curious"
},
"26517"
:
{
"logprob"
:
-1.8461517095565796
,
"rank"
:
2
,
"decoded_token"
:
" calm"
},
"26905"
:
{
"logprob"
:
-2.533651828765869
,
"rank"
:
3
,
"decoded_token"
:
" gentle"
},
"11304"
:
{
"logprob"
:
-3.408651828765869
,
"rank"
:
4
,
"decoded_token"
:
" serious"
},
"97680"
:
{
"logprob"
:
-3.596151828765869
,
"rank"
:
5
,
"decoded_token"
:
" thoughtful"
}},
{
"4818"
:
{
"logprob"
:
-0.047154705971479416
,
"rank"
:
1
,
"decoded_token"
:
" expression"
},
"1321"
:
{
"logprob"
:
-3.922154664993286
,
"rank"
:
2
,
"decoded_token"
:
" and"
},
"1505"
:
{
"logprob"
:
-4.047154903411865
,
"rank"
:
3
,
"decoded_token"
:
" or"
},
"22131"
:
{
"logprob"
:
-4.797154903411865
,
"rank"
:
4
,
"decoded_token"
:
" gaze"
},
"1044"
:
{
"logprob"
:
-9.047154426574707
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1046"
:
{
"logprob"
:
-0.0008031480247154832
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1408"
:
{
"logprob"
:
-7.250802993774414
,
"rank"
:
2
,
"decoded_token"
:
" on"
},
"1321"
:
{
"logprob"
:
-10.500802993774414
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1338"
:
{
"logprob"
:
-11.000802993774414
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"3016"
:
{
"logprob"
:
-11.500802993774414
,
"rank"
:
5
,
"decoded_token"
:
" while"
}},
{
"2"
:
{
"logprob"
:
-0.0008517451351508498
,
"rank"
:
1
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-7.125851631164551
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1256"
:
{
"logprob"
:
-10.00085163116455
,
"rank"
:
3
,
"decoded_token"
:
" The"
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
1395
,
28528
,
1408
,
1261
,
32656
,
11237
,
1044
,
7283
,
2015
,
1513
,
1278
,
13424
,
1626
,
1050
,
1046
,
1349
,
10726
,
1290
,
3719
,
1307
,
122203
,
35463
,
1454
,
11223
,
1321
,
95746
,
24765
,
2425
,
1261
,
6133
,
21283
,
1046
,
2
],
"1. A black dog is lying on a wooden floor, looking up at the camera.
\n
2. A scenic view of rugged mountains with green and rocky terrain under a clear sky."
,
[{
"1049"
:
{
"logprob"
:
-0.05050129443407059
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"11745"
:
{
"logprob"
:
-3.5505013465881348
,
"rank"
:
2
,
"decoded_token"
:
"Here"
},
"69957"
:
{
"logprob"
:
-4.175501346588135
,
"rank"
:
3
,
"decoded_token"
:
"Sure"
},
"117991"
:
{
"logprob"
:
-6.175501346588135
,
"rank"
:
4
,
"decoded_token"
:
"Certain"
},
"1045"
:
{
"logprob"
:
-6.550501346588135
,
"rank"
:
5
,
"decoded_token"
:
"-"
}},
{
"1046"
:
{
"logprob"
:
-5.364403477869928e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1041"
:
{
"logprob"
:
-12.500005722045898
,
"rank"
:
2
,
"decoded_token"
:
")"
},
"1058"
:
{
"logprob"
:
-13.875005722045898
,
"rank"
:
3
,
"decoded_token"
:
":"
},
"1044"
:
{
"logprob"
:
-15.687505722045898
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1045"
:
{
"logprob"
:
-15.875005722045898
,
"rank"
:
5
,
"decoded_token"
:
"-"
}},
{
"1349"
:
{
"logprob"
:
-0.4890742003917694
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1531"
:
{
"logprob"
:
-1.1140742301940918
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1603"
:
{
"logprob"
:
-3.364074230194092
,
"rank"
:
3
,
"decoded_token"
:
" **"
},
"1656"
:
{
"logprob"
:
-4.364074230194092
,
"rank"
:
4
,
"decoded_token"
:
" In"
},
"2409"
:
{
"logprob"
:
-4.989074230194092
,
"rank"
:
5
,
"decoded_token"
:
" This"
}},
{
"7244"
:
{
"logprob"
:
-0.08685152232646942
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"6231"
:
{
"logprob"
:
-3.4618515968322754
,
"rank"
:
2
,
"decoded_token"
:
" close"
},
"16450"
:
{
"logprob"
:
-3.5868515968322754
,
"rank"
:
3
,
"decoded_token"
:
" sle"
},
"4329"
:
{
"logprob"
:
-4.899351596832275
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"8500"
:
{
"logprob"
:
-5.399351596832275
,
"rank"
:
5
,
"decoded_token"
:
" dark"
}},
{
"10575"
:
{
"logprob"
:
-0.20338763296604156
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-1.8283876180648804
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-3.95338773727417
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"28404"
:
{
"logprob"
:
-6.95338773727417
,
"rank"
:
4
,
"decoded_token"
:
" pup"
},
"8636"
:
{
"logprob"
:
-7.07838773727417
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"1395"
:
{
"logprob"
:
-0.532414972782135
,
"rank"
:
1
,
"decoded_token"
:
" is"
},
"22524"
:
{
"logprob"
:
-1.7824149131774902
,
"rank"
:
2
,
"decoded_token"
:
" lies"
},
"1454"
:
{
"logprob"
:
-2.1574149131774902
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"10637"
:
{
"logprob"
:
-3.2824149131774902
,
"rank"
:
4
,
"decoded_token"
:
" looks"
},
"28528"
:
{
"logprob"
:
-3.4074149131774902
,
"rank"
:
5
,
"decoded_token"
:
" lying"
}},
{
"28528"
:
{
"logprob"
:
-0.4258010685443878
,
"rank"
:
1
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-1.6758010387420654
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"60700"
:
{
"logprob"
:
-2.9258010387420654
,
"rank"
:
3
,
"decoded_token"
:
" laying"
},
"38235"
:
{
"logprob"
:
-3.6758010387420654
,
"rank"
:
4
,
"decoded_token"
:
" resting"
},
"18970"
:
{
"logprob"
:
-3.6758010387420654
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"1408"
:
{
"logprob"
:
-0.3588743805885315
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3151"
:
{
"logprob"
:
-1.2338743209838867
,
"rank"
:
2
,
"decoded_token"
:
" down"
},
"41132"
:
{
"logprob"
:
-6.358874320983887
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"14038"
:
{
"logprob"
:
-6.546374320983887
,
"rank"
:
4
,
"decoded_token"
:
" flat"
},
"1321"
:
{
"logprob"
:
-6.733874320983887
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1261"
:
{
"logprob"
:
-0.07801607996225357
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-2.9530160427093506
,
"rank"
:
2
,
"decoded_token"
:
" its"
},
"32656"
:
{
"logprob"
:
-4.20301628112793
,
"rank"
:
3
,
"decoded_token"
:
" wooden"
},
"1278"
:
{
"logprob"
:
-5.20301628112793
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"3977"
:
{
"logprob"
:
-6.57801628112793
,
"rank"
:
5
,
"decoded_token"
:
" top"
}},
{
"32656"
:
{
"logprob"
:
-0.06541638821363449
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"3403"
:
{
"logprob"
:
-3.4404163360595703
,
"rank"
:
2
,
"decoded_token"
:
" text"
},
"44130"
:
{
"logprob"
:
-3.9404163360595703
,
"rank"
:
3
,
"decoded_token"
:
" rust"
},
"17253"
:
{
"logprob"
:
-5.81541633605957
,
"rank"
:
4
,
"decoded_token"
:
" weather"
},
"12603"
:
{
"logprob"
:
-5.94041633605957
,
"rank"
:
5
,
"decoded_token"
:
" wood"
}},
{
"11237"
:
{
"logprob"
:
-0.4574064016342163
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-1.0824064016342163
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"1615"
:
{
"logprob"
:
-4.082406520843506
,
"rank"
:
3
,
"decoded_token"
:
" pl"
},
"3403"
:
{
"logprob"
:
-5.207406520843506
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"28984"
:
{
"logprob"
:
-6.582406520843506
,
"rank"
:
5
,
"decoded_token"
:
" deck"
}},
{
"1044"
:
{
"logprob"
:
-0.9594833850860596
,
"rank"
:
1
,
"decoded_token"
:
","
},
"7283"
:
{
"logprob"
:
-1.2094833850860596
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"1321"
:
{
"logprob"
:
-2.2094833850860596
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1454"
:
{
"logprob"
:
-2.4594833850860596
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"1626"
:
{
"logprob"
:
-2.5844833850860596
,
"rank"
:
5
,
"decoded_token"
:
".
\n
"
}},
{
"7283"
:
{
"logprob"
:
-0.15972694754600525
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"11589"
:
{
"logprob"
:
-2.534726858139038
,
"rank"
:
2
,
"decoded_token"
:
" gaz"
},
"35542"
:
{
"logprob"
:
-2.909726858139038
,
"rank"
:
3
,
"decoded_token"
:
" staring"
},
"22116"
:
{
"logprob"
:
-6.034727096557617
,
"rank"
:
4
,
"decoded_token"
:
" facing"
},
"1454"
:
{
"logprob"
:
-6.409727096557617
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"2015"
:
{
"logprob"
:
-0.894250750541687
,
"rank"
:
1
,
"decoded_token"
:
" up"
},
"7655"
:
{
"logprob"
:
-1.269250750541687
,
"rank"
:
2
,
"decoded_token"
:
" directly"
},
"74606"
:
{
"logprob"
:
-1.769250750541687
,
"rank"
:
3
,
"decoded_token"
:
" upwards"
},
"40022"
:
{
"logprob"
:
-2.6442508697509766
,
"rank"
:
4
,
"decoded_token"
:
" upward"
},
"1935"
:
{
"logprob"
:
-4.081750869750977
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1513"
:
{
"logprob"
:
-0.5085363388061523
,
"rank"
:
1
,
"decoded_token"
:
" at"
},
"1454"
:
{
"logprob"
:
-1.5085363388061523
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1626"
:
{
"logprob"
:
-2.6335363388061523
,
"rank"
:
3
,
"decoded_token"
:
".
\n
"
},
"1935"
:
{
"logprob"
:
-3.3835363388061523
,
"rank"
:
4
,
"decoded_token"
:
" int"
},
"41132"
:
{
"logprob"
:
-3.6335363388061523
,
"rank"
:
5
,
"decoded_token"
:
" attent"
}},
{
"1278"
:
{
"logprob"
:
-0.0010482537327334285
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"4433"
:
{
"logprob"
:
-7.0010480880737305
,
"rank"
:
2
,
"decoded_token"
:
" something"
},
"2246"
:
{
"logprob"
:
-10.25104808807373
,
"rank"
:
3
,
"decoded_token"
:
" its"
},
"1261"
:
{
"logprob"
:
-10.25104808807373
,
"rank"
:
4
,
"decoded_token"
:
" a"
},
"1636"
:
{
"logprob"
:
-10.50104808807373
,
"rank"
:
5
,
"decoded_token"
:
" you"
}},
{
"13424"
:
{
"logprob"
:
-0.0003800861886702478
,
"rank"
:
1
,
"decoded_token"
:
" camera"
},
"56268"
:
{
"logprob"
:
-8.250380516052246
,
"rank"
:
2
,
"decoded_token"
:
" viewer"
},
"68439"
:
{
"logprob"
:
-9.250380516052246
,
"rank"
:
3
,
"decoded_token"
:
" photographer"
},
"2965"
:
{
"logprob"
:
-12.375380516052246
,
"rank"
:
4
,
"decoded_token"
:
" person"
},
"37967"
:
{
"logprob"
:
-12.500380516052246
,
"rank"
:
5
,
"decoded_token"
:
" ceiling"
}},
{
"1626"
:
{
"logprob"
:
-0.34197133779525757
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1454"
:
{
"logprob"
:
-1.4669713973999023
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1046"
:
{
"logprob"
:
-3.3419713973999023
,
"rank"
:
3
,
"decoded_token"
:
"."
},
"1338"
:
{
"logprob"
:
-3.9669713973999023
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1935"
:
{
"logprob"
:
-5.966971397399902
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1050"
:
{
"logprob"
:
-0.002148107625544071
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1256"
:
{
"logprob"
:
-6.877148151397705
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-7.127148151397705
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-8.252147674560547
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1049"
:
{
"logprob"
:
-10.752147674560547
,
"rank"
:
5
,
"decoded_token"
:
"1"
}},
{
"1046"
:
{
"logprob"
:
-7.510157047363464e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.437507629394531
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"1626"
:
{
"logprob"
:
-13.437507629394531
,
"rank"
:
3
,
"decoded_token"
:
".
\n
"
},
"48426"
:
{
"logprob"
:
-13.687507629394531
,
"rank"
:
4
,
"decoded_token"
:
".The"
},
"1044"
:
{
"logprob"
:
-14.062507629394531
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1349"
:
{
"logprob"
:
-0.2843300700187683
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-2.034330129623413
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"113465"
:
{
"logprob"
:
-3.534330129623413
,
"rank"
:
3
,
"decoded_token"
:
" Rug"
},
"22468"
:
{
"logprob"
:
-4.409329891204834
,
"rank"
:
4
,
"decoded_token"
:
" Several"
},
"1531"
:
{
"logprob"
:
-4.534329891204834
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"10726"
:
{
"logprob"
:
-1.3984904289245605
,
"rank"
:
1
,
"decoded_token"
:
" scen"
},
"122203"
:
{
"logprob"
:
-1.7734904289245605
,
"rank"
:
2
,
"decoded_token"
:
" rugged"
},
"61082"
:
{
"logprob"
:
-1.7734904289245605
,
"rank"
:
3
,
"decoded_token"
:
" panor"
},
"15375"
:
{
"logprob"
:
-2.5234904289245605
,
"rank"
:
4
,
"decoded_token"
:
" vast"
},
"13770"
:
{
"logprob"
:
-2.6484904289245605
,
"rank"
:
5
,
"decoded_token"
:
" maj"
}},
{
"1290"
:
{
"logprob"
:
-3.099436753473128e-06
,
"rank"
:
1
,
"decoded_token"
:
"ic"
},
"2981"
:
{
"logprob"
:
-13.56250286102295
,
"rank"
:
2
,
"decoded_token"
:
"ically"
},
"1702"
:
{
"logprob"
:
-14.31250286102295
,
"rank"
:
3
,
"decoded_token"
:
"ice"
},
"4965"
:
{
"logprob"
:
-16.625003814697266
,
"rank"
:
4
,
"decoded_token"
:
"etic"
},
"4336"
:
{
"logprob"
:
-16.687503814697266
,
"rank"
:
5
,
"decoded_token"
:
"icro"
}},
{
"3719"
:
{
"logprob"
:
-0.1252945065498352
,
"rank"
:
1
,
"decoded_token"
:
" view"
},
"28035"
:
{
"logprob"
:
-2.8752944469451904
,
"rank"
:
2
,
"decoded_token"
:
" landscape"
},
"24361"
:
{
"logprob"
:
-3.2502944469451904
,
"rank"
:
3
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-5.1252946853637695
,
"rank"
:
4
,
"decoded_token"
:
" mountainous"
},
"1044"
:
{
"logprob"
:
-5.3752946853637695
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1307"
:
{
"logprob"
:
-0.09058280289173126
,
"rank"
:
1
,
"decoded_token"
:
" of"
},
"89995"
:
{
"logprob"
:
-3.465582847595215
,
"rank"
:
2
,
"decoded_token"
:
" showc"
},
"6122"
:
{
"logprob"
:
-3.715582847595215
,
"rank"
:
3
,
"decoded_token"
:
" shows"
},
"6971"
:
{
"logprob"
:
-4.590582847595215
,
"rank"
:
4
,
"decoded_token"
:
" features"
},
"66583"
:
{
"logprob"
:
-5.090582847595215
,
"rank"
:
5
,
"decoded_token"
:
" captures"
}},
{
"122203"
:
{
"logprob"
:
-0.5323622226715088
,
"rank"
:
1
,
"decoded_token"
:
" rugged"
},
"1261"
:
{
"logprob"
:
-2.032362222671509
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"6245"
:
{
"logprob"
:
-2.532362222671509
,
"rank"
:
3
,
"decoded_token"
:
" multiple"
},
"127945"
:
{
"logprob"
:
-3.157362222671509
,
"rank"
:
4
,
"decoded_token"
:
" mountainous"
},
"35463"
:
{
"logprob"
:
-3.532362222671509
,
"rank"
:
5
,
"decoded_token"
:
" mountains"
}},
{
"35463"
:
{
"logprob"
:
-0.6520033478736877
,
"rank"
:
1
,
"decoded_token"
:
" mountains"
},
"1044"
:
{
"logprob"
:
-1.027003288269043
,
"rank"
:
2
,
"decoded_token"
:
","
},
"24361"
:
{
"logprob"
:
-2.527003288269043
,
"rank"
:
3
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-3.902003288269043
,
"rank"
:
4
,
"decoded_token"
:
" mountainous"
},
"11223"
:
{
"logprob"
:
-4.652003288269043
,
"rank"
:
5
,
"decoded_token"
:
" green"
}},
{
"1454"
:
{
"logprob"
:
-0.39697548747062683
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"13875"
:
{
"logprob"
:
-2.146975517272949
,
"rank"
:
2
,
"decoded_token"
:
" covered"
},
"1321"
:
{
"logprob"
:
-2.271975517272949
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"2425"
:
{
"logprob"
:
-3.459475517272949
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"47948"
:
{
"logprob"
:
-4.459475517272949
,
"rank"
:
5
,
"decoded_token"
:
" stretching"
}},
{
"11223"
:
{
"logprob"
:
-1.3947651386260986
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"24880"
:
{
"logprob"
:
-1.8947651386260986
,
"rank"
:
2
,
"decoded_token"
:
" varying"
},
"95746"
:
{
"logprob"
:
-2.0822651386260986
,
"rank"
:
3
,
"decoded_token"
:
" rocky"
},
"1295"
:
{
"logprob"
:
-3.0197651386260986
,
"rank"
:
4
,
"decoded_token"
:
" l"
},
"19546"
:
{
"logprob"
:
-3.0822651386260986
,
"rank"
:
5
,
"decoded_token"
:
" varied"
}},
{
"1321"
:
{
"logprob"
:
-0.8649212121963501
,
"rank"
:
1
,
"decoded_token"
:
" and"
},
"61263"
:
{
"logprob"
:
-1.73992121219635
,
"rank"
:
2
,
"decoded_token"
:
" slopes"
},
"47260"
:
{
"logprob"
:
-1.86492121219635
,
"rank"
:
3
,
"decoded_token"
:
" vegetation"
},
"50373"
:
{
"logprob"
:
-1.98992121219635
,
"rank"
:
4
,
"decoded_token"
:
" patches"
},
"23170"
:
{
"logprob"
:
-3.4899210929870605
,
"rank"
:
5
,
"decoded_token"
:
" grass"
}},
{
"95746"
:
{
"logprob"
:
-0.21662631630897522
,
"rank"
:
1
,
"decoded_token"
:
" rocky"
},
"22980"
:
{
"logprob"
:
-1.9666262865066528
,
"rank"
:
2
,
"decoded_token"
:
" brown"
},
"26549"
:
{
"logprob"
:
-3.8416264057159424
,
"rank"
:
3
,
"decoded_token"
:
" gray"
},
"4266"
:
{
"logprob"
:
-4.216626167297363
,
"rank"
:
4
,
"decoded_token"
:
" bar"
},
"34052"
:
{
"logprob"
:
-4.966626167297363
,
"rank"
:
5
,
"decoded_token"
:
" grey"
}},
{
"24765"
:
{
"logprob"
:
-0.32041722536087036
,
"rank"
:
1
,
"decoded_token"
:
" terrain"
},
"57912"
:
{
"logprob"
:
-1.8204171657562256
,
"rank"
:
2
,
"decoded_token"
:
" terrains"
},
"61263"
:
{
"logprob"
:
-2.6954171657562256
,
"rank"
:
3
,
"decoded_token"
:
" slopes"
},
"84497"
:
{
"logprob"
:
-3.9454171657562256
,
"rank"
:
4
,
"decoded_token"
:
" landscapes"
},
"17764"
:
{
"logprob"
:
-4.695417404174805
,
"rank"
:
5
,
"decoded_token"
:
" surfaces"
}},
{
"2425"
:
{
"logprob"
:
-0.4664109945297241
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1046"
:
{
"logprob"
:
-1.4664109945297241
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-3.4664111137390137
,
"rank"
:
3
,
"decoded_token"
:
","
},
"22923"
:
{
"logprob"
:
-3.9664111137390137
,
"rank"
:
4
,
"decoded_token"
:
" extending"
},
"47948"
:
{
"logprob"
:
-4.091411113739014
,
"rank"
:
5
,
"decoded_token"
:
" stretching"
}},
{
"1261"
:
{
"logprob"
:
-0.015043734572827816
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-4.76504373550415
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"6133"
:
{
"logprob"
:
-6.01504373550415
,
"rank"
:
3
,
"decoded_token"
:
" clear"
},
"1278"
:
{
"logprob"
:
-6.26504373550415
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"16152"
:
{
"logprob"
:
-7.26504373550415
,
"rank"
:
5
,
"decoded_token"
:
" cloud"
}},
{
"6133"
:
{
"logprob"
:
-0.7420746684074402
,
"rank"
:
1
,
"decoded_token"
:
" clear"
},
"18416"
:
{
"logprob"
:
-1.492074728012085
,
"rank"
:
2
,
"decoded_token"
:
" haz"
},
"16152"
:
{
"logprob"
:
-1.992074728012085
,
"rank"
:
3
,
"decoded_token"
:
" cloud"
},
"27254"
:
{
"logprob"
:
-3.367074728012085
,
"rank"
:
4
,
"decoded_token"
:
" partly"
},
"4391"
:
{
"logprob"
:
-3.617074728012085
,
"rank"
:
5
,
"decoded_token"
:
" light"
}},
{
"21283"
:
{
"logprob"
:
-0.007355513051152229
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-5.257355690002441
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-6.382355690002441
,
"rank"
:
3
,
"decoded_token"
:
","
},
"1505"
:
{
"logprob"
:
-8.257355690002441
,
"rank"
:
4
,
"decoded_token"
:
" or"
},
"3950"
:
{
"logprob"
:
-10.132355690002441
,
"rank"
:
5
,
"decoded_token"
:
" day"
}},
{
"1046"
:
{
"logprob"
:
-0.01126158982515335
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1626"
:
{
"logprob"
:
-4.636261463165283
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1338"
:
{
"logprob"
:
-7.761261463165283
,
"rank"
:
3
,
"decoded_token"
:
".
\n\n
"
},
"1044"
:
{
"logprob"
:
-7.761261463165283
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1395"
:
{
"logprob"
:
-8.011261940002441
,
"rank"
:
5
,
"decoded_token"
:
" is"
}},
{
"2"
:
{
"logprob"
:
-0.00709608756005764
,
"rank"
:
1
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-5.007096290588379
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1256"
:
{
"logprob"
:
-8.132096290588379
,
"rank"
:
3
,
"decoded_token"
:
" "
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
1395
,
28528
,
1408
,
1261
,
32656
,
11237
,
1044
,
7283
,
2015
,
1513
,
1278
,
13424
,
1626
,
1050
,
1046
,
1349
,
122203
,
24361
,
28035
,
1454
,
11223
,
1321
,
95746
,
24765
,
2425
,
1261
,
6133
,
21283
,
1626
,
1051
,
1046
,
1349
,
2965
,
1294
,
1261
,
4804
,
4250
,
12006
,
4302
,
48049
,
4837
,
1261
,
29397
,
1435
,
22140
,
21457
,
22196
,
1626
,
1052
,
1046
,
1349
,
53301
,
59396
,
3549
,
1294
,
1261
,
12097
,
1044
,
121040
,
1536
,
11223
,
23170
,
1321
,
17744
,
34941
,
16429
,
2425
,
1261
,
10991
,
21283
,
1046
,
2
],
"1. A black dog is lying on a wooden floor, looking up at the camera.
\n
2. A rugged mountain landscape with green and rocky terrain under a clear sky.
\n
3. A person in a red swimsuit walks along a beach as waves crash nearby.
\n
4. A winding gravel path in a park, bordered by green grass and blooming trees under a blue sky."
,
[{
"1049"
:
{
"logprob"
:
-0.17000193893909454
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"11745"
:
{
"logprob"
:
-1.9200019836425781
,
"rank"
:
2
,
"decoded_token"
:
"Here"
},
"69957"
:
{
"logprob"
:
-4.920001983642578
,
"rank"
:
3
,
"decoded_token"
:
"Sure"
},
"117991"
:
{
"logprob"
:
-7.295001983642578
,
"rank"
:
4
,
"decoded_token"
:
"Certain"
},
"1784"
:
{
"logprob"
:
-7.295001983642578
,
"rank"
:
5
,
"decoded_token"
:
"The"
}},
{
"1046"
:
{
"logprob"
:
-1.597391747054644e-05
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1041"
:
{
"logprob"
:
-11.500016212463379
,
"rank"
:
2
,
"decoded_token"
:
")"
},
"1058"
:
{
"logprob"
:
-13.062516212463379
,
"rank"
:
3
,
"decoded_token"
:
":"
},
"3590"
:
{
"logprob"
:
-13.750016212463379
,
"rank"
:
4
,
"decoded_token"
:
".A"
},
"48426"
:
{
"logprob"
:
-14.312516212463379
,
"rank"
:
5
,
"decoded_token"
:
".The"
}},
{
"1349"
:
{
"logprob"
:
-0.07567699253559113
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1531"
:
{
"logprob"
:
-3.075676918029785
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1603"
:
{
"logprob"
:
-3.950676918029785
,
"rank"
:
3
,
"decoded_token"
:
" **"
},
"2409"
:
{
"logprob"
:
-6.075676918029785
,
"rank"
:
4
,
"decoded_token"
:
" This"
},
"8479"
:
{
"logprob"
:
-6.575676918029785
,
"rank"
:
5
,
"decoded_token"
:
" Black"
}},
{
"7244"
:
{
"logprob"
:
-0.06906593590974808
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"16450"
:
{
"logprob"
:
-3.694066047668457
,
"rank"
:
2
,
"decoded_token"
:
" sle"
},
"6231"
:
{
"logprob"
:
-4.506566047668457
,
"rank"
:
3
,
"decoded_token"
:
" close"
},
"4329"
:
{
"logprob"
:
-4.944066047668457
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"8500"
:
{
"logprob"
:
-5.256566047668457
,
"rank"
:
5
,
"decoded_token"
:
" dark"
}},
{
"10575"
:
{
"logprob"
:
-0.11913803219795227
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.24413800239563
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-5.494138240814209
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"28404"
:
{
"logprob"
:
-7.181638240814209
,
"rank"
:
4
,
"decoded_token"
:
" pup"
},
"8636"
:
{
"logprob"
:
-7.869138240814209
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"1395"
:
{
"logprob"
:
-0.782707154750824
,
"rank"
:
1
,
"decoded_token"
:
" is"
},
"22524"
:
{
"logprob"
:
-1.1577072143554688
,
"rank"
:
2
,
"decoded_token"
:
" lies"
},
"1454"
:
{
"logprob"
:
-2.9077072143554688
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"10637"
:
{
"logprob"
:
-3.0327072143554688
,
"rank"
:
4
,
"decoded_token"
:
" looks"
},
"28528"
:
{
"logprob"
:
-3.5327072143554688
,
"rank"
:
5
,
"decoded_token"
:
" lying"
}},
{
"28528"
:
{
"logprob"
:
-0.3443163335323334
,
"rank"
:
1
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-2.094316244125366
,
"rank"
:
2
,
"decoded_token"
:
" looking"
},
"60700"
:
{
"logprob"
:
-2.844316244125366
,
"rank"
:
3
,
"decoded_token"
:
" laying"
},
"38235"
:
{
"logprob"
:
-3.344316244125366
,
"rank"
:
4
,
"decoded_token"
:
" resting"
},
"18970"
:
{
"logprob"
:
-3.469316244125366
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"1408"
:
{
"logprob"
:
-0.29093095660209656
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3151"
:
{
"logprob"
:
-1.415930986404419
,
"rank"
:
2
,
"decoded_token"
:
" down"
},
"41132"
:
{
"logprob"
:
-6.16593074798584
,
"rank"
:
3
,
"decoded_token"
:
" attent"
},
"1321"
:
{
"logprob"
:
-6.85343074798584
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"14038"
:
{
"logprob"
:
-6.97843074798584
,
"rank"
:
5
,
"decoded_token"
:
" flat"
}},
{
"1261"
:
{
"logprob"
:
-0.05553353577852249
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-3.6805336475372314
,
"rank"
:
2
,
"decoded_token"
:
" its"
},
"32656"
:
{
"logprob"
:
-3.8055336475372314
,
"rank"
:
3
,
"decoded_token"
:
" wooden"
},
"1278"
:
{
"logprob"
:
-5.305533409118652
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"3977"
:
{
"logprob"
:
-7.430533409118652
,
"rank"
:
5
,
"decoded_token"
:
" top"
}},
{
"32656"
:
{
"logprob"
:
-0.039505477994680405
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"3403"
:
{
"logprob"
:
-3.9145054817199707
,
"rank"
:
2
,
"decoded_token"
:
" text"
},
"44130"
:
{
"logprob"
:
-4.414505481719971
,
"rank"
:
3
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-5.914505481719971
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"17253"
:
{
"logprob"
:
-6.539505481719971
,
"rank"
:
5
,
"decoded_token"
:
" weather"
}},
{
"11237"
:
{
"logprob"
:
-0.373188853263855
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-1.248188853263855
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"1615"
:
{
"logprob"
:
-4.2481889724731445
,
"rank"
:
3
,
"decoded_token"
:
" pl"
},
"3403"
:
{
"logprob"
:
-5.6231889724731445
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"28984"
:
{
"logprob"
:
-5.9981889724731445
,
"rank"
:
5
,
"decoded_token"
:
" deck"
}},
{
"1044"
:
{
"logprob"
:
-1.378434181213379
,
"rank"
:
3
,
"decoded_token"
:
","
},
"7283"
:
{
"logprob"
:
-1.378434181213379
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"1626"
:
{
"logprob"
:
-1.378434181213379
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1321"
:
{
"logprob"
:
-2.378434181213379
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"1454"
:
{
"logprob"
:
-2.628434181213379
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"7283"
:
{
"logprob"
:
-0.17630912363529205
,
"rank"
:
1
,
"decoded_token"
:
" looking"
},
"11589"
:
{
"logprob"
:
-2.551309108734131
,
"rank"
:
2
,
"decoded_token"
:
" gaz"
},
"35542"
:
{
"logprob"
:
-2.676309108734131
,
"rank"
:
3
,
"decoded_token"
:
" staring"
},
"22116"
:
{
"logprob"
:
-6.238809108734131
,
"rank"
:
4
,
"decoded_token"
:
" facing"
},
"11735"
:
{
"logprob"
:
-6.488809108734131
,
"rank"
:
5
,
"decoded_token"
:
" giving"
}},
{
"2015"
:
{
"logprob"
:
-0.8436563014984131
,
"rank"
:
1
,
"decoded_token"
:
" up"
},
"7655"
:
{
"logprob"
:
-1.343656301498413
,
"rank"
:
2
,
"decoded_token"
:
" directly"
},
"74606"
:
{
"logprob"
:
-1.718656301498413
,
"rank"
:
3
,
"decoded_token"
:
" upwards"
},
"40022"
:
{
"logprob"
:
-2.593656301498413
,
"rank"
:
4
,
"decoded_token"
:
" upward"
},
"11521"
:
{
"logprob"
:
-4.406156539916992
,
"rank"
:
5
,
"decoded_token"
:
" straight"
}},
{
"1513"
:
{
"logprob"
:
-0.45780688524246216
,
"rank"
:
1
,
"decoded_token"
:
" at"
},
"1626"
:
{
"logprob"
:
-1.7078068256378174
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1454"
:
{
"logprob"
:
-2.3328068256378174
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1935"
:
{
"logprob"
:
-3.5828068256378174
,
"rank"
:
4
,
"decoded_token"
:
" int"
},
"41132"
:
{
"logprob"
:
-3.9578068256378174
,
"rank"
:
5
,
"decoded_token"
:
" attent"
}},
{
"1278"
:
{
"logprob"
:
-0.0004164305282756686
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"4433"
:
{
"logprob"
:
-8.00041675567627
,
"rank"
:
2
,
"decoded_token"
:
" something"
},
"1261"
:
{
"logprob"
:
-10.50041675567627
,
"rank"
:
3
,
"decoded_token"
:
" a"
},
"2246"
:
{
"logprob"
:
-10.87541675567627
,
"rank"
:
4
,
"decoded_token"
:
" its"
},
"1636"
:
{
"logprob"
:
-11.37541675567627
,
"rank"
:
5
,
"decoded_token"
:
" you"
}},
{
"13424"
:
{
"logprob"
:
-0.000399033073335886
,
"rank"
:
1
,
"decoded_token"
:
" camera"
},
"56268"
:
{
"logprob"
:
-8.125398635864258
,
"rank"
:
2
,
"decoded_token"
:
" viewer"
},
"68439"
:
{
"logprob"
:
-9.500398635864258
,
"rank"
:
3
,
"decoded_token"
:
" photographer"
},
"37967"
:
{
"logprob"
:
-12.000398635864258
,
"rank"
:
4
,
"decoded_token"
:
" ceiling"
},
"2965"
:
{
"logprob"
:
-12.312898635864258
,
"rank"
:
5
,
"decoded_token"
:
" person"
}},
{
"1626"
:
{
"logprob"
:
-0.10298559814691544
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1046"
:
{
"logprob"
:
-2.9779856204986572
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-3.2279856204986572
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1338"
:
{
"logprob"
:
-5.227985382080078
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1935"
:
{
"logprob"
:
-6.852985382080078
,
"rank"
:
5
,
"decoded_token"
:
" int"
}},
{
"1050"
:
{
"logprob"
:
-0.002897590398788452
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1256"
:
{
"logprob"
:
-6.5028977394104
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-6.6278977394104
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-9.877897262573242
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1009"
:
{
"logprob"
:
-11.627897262573242
,
"rank"
:
5
,
"decoded_token"
:
"
\t
"
}},
{
"1046"
:
{
"logprob"
:
-1.5497195136049413e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-14.875001907348633
,
"rank"
:
2
,
"decoded_token"
:
","
},
"3590"
:
{
"logprob"
:
-15.000001907348633
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"2247"
:
{
"logprob"
:
-15.125001907348633
,
"rank"
:
4
,
"decoded_token"
:
" ."
},
"1058"
:
{
"logprob"
:
-15.375001907348633
,
"rank"
:
5
,
"decoded_token"
:
":"
}},
{
"1349"
:
{
"logprob"
:
-0.6107801198959351
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-1.360780119895935
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"113465"
:
{
"logprob"
:
-2.3607802391052246
,
"rank"
:
3
,
"decoded_token"
:
" Rug"
},
"27260"
:
{
"logprob"
:
-3.7357802391052246
,
"rank"
:
4
,
"decoded_token"
:
" Mountain"
},
"1531"
:
{
"logprob"
:
-4.485780239105225
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"122203"
:
{
"logprob"
:
-0.8547073602676392
,
"rank"
:
1
,
"decoded_token"
:
" rugged"
},
"15375"
:
{
"logprob"
:
-2.1047072410583496
,
"rank"
:
2
,
"decoded_token"
:
" vast"
},
"10726"
:
{
"logprob"
:
-2.1047072410583496
,
"rank"
:
3
,
"decoded_token"
:
" scen"
},
"61082"
:
{
"logprob"
:
-2.6047072410583496
,
"rank"
:
4
,
"decoded_token"
:
" panor"
},
"2965"
:
{
"logprob"
:
-3.2922072410583496
,
"rank"
:
5
,
"decoded_token"
:
" person"
}},
{
"24361"
:
{
"logprob"
:
-0.41217130422592163
,
"rank"
:
1
,
"decoded_token"
:
" mountain"
},
"1044"
:
{
"logprob"
:
-1.6621713638305664
,
"rank"
:
2
,
"decoded_token"
:
","
},
"127945"
:
{
"logprob"
:
-2.6621713638305664
,
"rank"
:
3
,
"decoded_token"
:
" mountainous"
},
"28035"
:
{
"logprob"
:
-3.5371713638305664
,
"rank"
:
4
,
"decoded_token"
:
" landscape"
},
"1321"
:
{
"logprob"
:
-3.6621713638305664
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"28035"
:
{
"logprob"
:
-0.6676621437072754
,
"rank"
:
1
,
"decoded_token"
:
" landscape"
},
"4521"
:
{
"logprob"
:
-0.7926621437072754
,
"rank"
:
2
,
"decoded_token"
:
" range"
},
"24765"
:
{
"logprob"
:
-4.542662143707275
,
"rank"
:
3
,
"decoded_token"
:
" terrain"
},
"13327"
:
{
"logprob"
:
-5.167662143707275
,
"rank"
:
4
,
"decoded_token"
:
" scene"
},
"12248"
:
{
"logprob"
:
-5.167662143707275
,
"rank"
:
5
,
"decoded_token"
:
" peak"
}},
{
"1454"
:
{
"logprob"
:
-0.31015345454216003
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"6971"
:
{
"logprob"
:
-2.4351534843444824
,
"rank"
:
2
,
"decoded_token"
:
" features"
},
"94973"
:
{
"logprob"
:
-3.3101534843444824
,
"rank"
:
3
,
"decoded_token"
:
" stretches"
},
"89995"
:
{
"logprob"
:
-3.4351534843444824
,
"rank"
:
4
,
"decoded_token"
:
" showc"
},
"1395"
:
{
"logprob"
:
-3.5601534843444824
,
"rank"
:
5
,
"decoded_token"
:
" is"
}},
{
"11223"
:
{
"logprob"
:
-1.547694206237793
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"95746"
:
{
"logprob"
:
-1.922694206237793
,
"rank"
:
2
,
"decoded_token"
:
" rocky"
},
"27469"
:
{
"logprob"
:
-2.172694206237793
,
"rank"
:
3
,
"decoded_token"
:
" peaks"
},
"6245"
:
{
"logprob"
:
-2.297694206237793
,
"rank"
:
4
,
"decoded_token"
:
" multiple"
},
"47147"
:
{
"logprob"
:
-2.360194206237793
,
"rank"
:
5
,
"decoded_token"
:
" steep"
}},
{
"1321"
:
{
"logprob"
:
-0.9617817401885986
,
"rank"
:
1
,
"decoded_token"
:
" and"
},
"61263"
:
{
"logprob"
:
-1.3367817401885986
,
"rank"
:
2
,
"decoded_token"
:
" slopes"
},
"51187"
:
{
"logprob"
:
-2.3367817401885986
,
"rank"
:
3
,
"decoded_token"
:
" hills"
},
"47260"
:
{
"logprob"
:
-2.3367817401885986
,
"rank"
:
4
,
"decoded_token"
:
" vegetation"
},
"50373"
:
{
"logprob"
:
-2.7117817401885986
,
"rank"
:
5
,
"decoded_token"
:
" patches"
}},
{
"95746"
:
{
"logprob"
:
-0.11686273664236069
,
"rank"
:
1
,
"decoded_token"
:
" rocky"
},
"22980"
:
{
"logprob"
:
-2.7418627738952637
,
"rank"
:
2
,
"decoded_token"
:
" brown"
},
"4266"
:
{
"logprob"
:
-3.8668627738952637
,
"rank"
:
3
,
"decoded_token"
:
" bar"
},
"26549"
:
{
"logprob"
:
-4.491862773895264
,
"rank"
:
4
,
"decoded_token"
:
" gray"
},
"9091"
:
{
"logprob"
:
-5.366862773895264
,
"rank"
:
5
,
"decoded_token"
:
" rock"
}},
{
"24765"
:
{
"logprob"
:
-0.22640009224414825
,
"rank"
:
1
,
"decoded_token"
:
" terrain"
},
"57912"
:
{
"logprob"
:
-2.476400136947632
,
"rank"
:
2
,
"decoded_token"
:
" terrains"
},
"61263"
:
{
"logprob"
:
-2.726400136947632
,
"rank"
:
3
,
"decoded_token"
:
" slopes"
},
"51187"
:
{
"logprob"
:
-3.851400136947632
,
"rank"
:
4
,
"decoded_token"
:
" hills"
},
"27469"
:
{
"logprob"
:
-3.976400136947632
,
"rank"
:
5
,
"decoded_token"
:
" peaks"
}},
{
"2425"
:
{
"logprob"
:
-0.7823817133903503
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1626"
:
{
"logprob"
:
-1.1573817729949951
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"94973"
:
{
"logprob"
:
-2.657381772994995
,
"rank"
:
3
,
"decoded_token"
:
" stretches"
},
"1395"
:
{
"logprob"
:
-2.782381772994995
,
"rank"
:
4
,
"decoded_token"
:
" is"
},
"7038"
:
{
"logprob"
:
-3.532381772994995
,
"rank"
:
5
,
"decoded_token"
:
" extends"
}},
{
"1261"
:
{
"logprob"
:
-0.016132064163684845
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"6133"
:
{
"logprob"
:
-5.39113187789917
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"1420"
:
{
"logprob"
:
-5.39113187789917
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"1278"
:
{
"logprob"
:
-6.01613187789917
,
"rank"
:
4
,
"decoded_token"
:
" the"
},
"16152"
:
{
"logprob"
:
-6.26613187789917
,
"rank"
:
5
,
"decoded_token"
:
" cloud"
}},
{
"6133"
:
{
"logprob"
:
-0.44541382789611816
,
"rank"
:
1
,
"decoded_token"
:
" clear"
},
"16152"
:
{
"logprob"
:
-2.070413827896118
,
"rank"
:
2
,
"decoded_token"
:
" cloud"
},
"18416"
:
{
"logprob"
:
-2.320413827896118
,
"rank"
:
3
,
"decoded_token"
:
" haz"
},
"27254"
:
{
"logprob"
:
-3.195413827896118
,
"rank"
:
4
,
"decoded_token"
:
" partly"
},
"10991"
:
{
"logprob"
:
-3.320413827896118
,
"rank"
:
5
,
"decoded_token"
:
" blue"
}},
{
"21283"
:
{
"logprob"
:
-0.003768961876630783
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-5.7537689208984375
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-7.6287689208984375
,
"rank"
:
3
,
"decoded_token"
:
","
},
"1505"
:
{
"logprob"
:
-10.753768920898438
,
"rank"
:
4
,
"decoded_token"
:
" or"
},
"3044"
:
{
"logprob"
:
-11.128768920898438
,
"rank"
:
5
,
"decoded_token"
:
" sk"
}},
{
"1626"
:
{
"logprob"
:
-0.0008177988929674029
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1046"
:
{
"logprob"
:
-7.375817775726318
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"1395"
:
{
"logprob"
:
-9.750818252563477
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"1010"
:
{
"logprob"
:
-10.125818252563477
,
"rank"
:
4
,
"decoded_token"
:
"
\n
"
},
"1044"
:
{
"logprob"
:
-10.750818252563477
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1051"
:
{
"logprob"
:
-0.00013457823661156
,
"rank"
:
1
,
"decoded_token"
:
"3"
},
"1052"
:
{
"logprob"
:
-9.125134468078613
,
"rank"
:
2
,
"decoded_token"
:
"4"
},
"1256"
:
{
"logprob"
:
-11.375134468078613
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1050"
:
{
"logprob"
:
-11.875134468078613
,
"rank"
:
4
,
"decoded_token"
:
"2"
},
"1049"
:
{
"logprob"
:
-13.000134468078613
,
"rank"
:
5
,
"decoded_token"
:
"1"
}},
{
"1046"
:
{
"logprob"
:
-7.152555099310121e-07
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-14.875000953674316
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"48426"
:
{
"logprob"
:
-15.937500953674316
,
"rank"
:
3
,
"decoded_token"
:
".The"
},
"1349"
:
{
"logprob"
:
-17.0
,
"rank"
:
4
,
"decoded_token"
:
" A"
},
"1338"
:
{
"logprob"
:
-17.3125
,
"rank"
:
5
,
"decoded_token"
:
".
\n\n
"
}},
{
"1349"
:
{
"logprob"
:
-0.03193942829966545
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"10638"
:
{
"logprob"
:
-4.406939506530762
,
"rank"
:
2
,
"decoded_token"
:
" Two"
},
"2048"
:
{
"logprob"
:
-5.031939506530762
,
"rank"
:
3
,
"decoded_token"
:
" An"
},
"1488"
:
{
"logprob"
:
-5.156939506530762
,
"rank"
:
4
,
"decoded_token"
:
" W"
},
"15035"
:
{
"logprob"
:
-5.906939506530762
,
"rank"
:
5
,
"decoded_token"
:
" People"
}},
{
"2965"
:
{
"logprob"
:
-0.41655251383781433
,
"rank"
:
1
,
"decoded_token"
:
" person"
},
"92731"
:
{
"logprob"
:
-1.5415525436401367
,
"rank"
:
2
,
"decoded_token"
:
" lone"
},
"79013"
:
{
"logprob"
:
-2.7915525436401367
,
"rank"
:
3
,
"decoded_token"
:
" solitary"
},
"29397"
:
{
"logprob"
:
-3.5415525436401367
,
"rank"
:
4
,
"decoded_token"
:
" beach"
},
"2169"
:
{
"logprob"
:
-4.729052543640137
,
"rank"
:
5
,
"decoded_token"
:
" ser"
}},
{
"1294"
:
{
"logprob"
:
-0.9845026135444641
,
"rank"
:
1
,
"decoded_token"
:
" in"
},
"1395"
:
{
"logprob"
:
-1.2345025539398193
,
"rank"
:
2
,
"decoded_token"
:
" is"
},
"48049"
:
{
"logprob"
:
-1.8595025539398193
,
"rank"
:
3
,
"decoded_token"
:
" walks"
},
"23737"
:
{
"logprob"
:
-2.2345025539398193
,
"rank"
:
4
,
"decoded_token"
:
" stands"
},
"1285"
:
{
"logprob"
:
-2.8595025539398193
,
"rank"
:
5
,
"decoded_token"
:
" w"
}},
{
"1261"
:
{
"logprob"
:
-0.32012784481048584
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"4804"
:
{
"logprob"
:
-1.3201278448104858
,
"rank"
:
2
,
"decoded_token"
:
" red"
},
"1420"
:
{
"logprob"
:
-5.820127964019775
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"64031"
:
{
"logprob"
:
-6.570127964019775
,
"rank"
:
4
,
"decoded_token"
:
" swim"
},
"18168"
:
{
"logprob"
:
-6.695127964019775
,
"rank"
:
5
,
"decoded_token"
:
" bright"
}},
{
"4804"
:
{
"logprob"
:
-0.10999592393636703
,
"rank"
:
1
,
"decoded_token"
:
" red"
},
"1285"
:
{
"logprob"
:
-2.3599958419799805
,
"rank"
:
2
,
"decoded_token"
:
" w"
},
"4250"
:
{
"logprob"
:
-5.6099958419799805
,
"rank"
:
3
,
"decoded_token"
:
" sw"
},
"18168"
:
{
"logprob"
:
-6.0474958419799805
,
"rank"
:
4
,
"decoded_token"
:
" bright"
},
"18258"
:
{
"logprob"
:
-6.4224958419799805
,
"rank"
:
5
,
"decoded_token"
:
" wet"
}},
{
"4250"
:
{
"logprob"
:
-0.2469252496957779
,
"rank"
:
1
,
"decoded_token"
:
" sw"
},
"1285"
:
{
"logprob"
:
-2.3719253540039062
,
"rank"
:
2
,
"decoded_token"
:
" w"
},
"64031"
:
{
"logprob"
:
-2.7469253540039062
,
"rank"
:
3
,
"decoded_token"
:
" swim"
},
"17513"
:
{
"logprob"
:
-3.2469253540039062
,
"rank"
:
4
,
"decoded_token"
:
" suit"
},
"75948"
:
{
"logprob"
:
-4.371925354003906
,
"rank"
:
5
,
"decoded_token"
:
" outfit"
}},
{
"12006"
:
{
"logprob"
:
-5.722029527532868e-06
,
"rank"
:
1
,
"decoded_token"
:
"ims"
},
"25763"
:
{
"logprob"
:
-12.750005722045898
,
"rank"
:
2
,
"decoded_token"
:
"immer"
},
"7552"
:
{
"logprob"
:
-13.687505722045898
,
"rank"
:
3
,
"decoded_token"
:
"imm"
},
"2097"
:
{
"logprob"
:
-16.6875057220459
,
"rank"
:
4
,
"decoded_token"
:
"ins"
},
"19523"
:
{
"logprob"
:
-16.7500057220459
,
"rank"
:
5
,
"decoded_token"
:
"imb"
}},
{
"4302"
:
{
"logprob"
:
-1.8000440832111053e-05
,
"rank"
:
1
,
"decoded_token"
:
"uit"
},
"17513"
:
{
"logprob"
:
-11.875018119812012
,
"rank"
:
2
,
"decoded_token"
:
" suit"
},
"8036"
:
{
"logprob"
:
-13.250018119812012
,
"rank"
:
3
,
"decoded_token"
:
"irt"
},
"36953"
:
{
"logprob"
:
-13.500018119812012
,
"rank"
:
4
,
"decoded_token"
:
"uiten"
},
"1276"
:
{
"logprob"
:
-14.437518119812012
,
"rank"
:
5
,
"decoded_token"
:
"it"
}},
{
"48049"
:
{
"logprob"
:
-0.41766560077667236
,
"rank"
:
1
,
"decoded_token"
:
" walks"
},
"1395"
:
{
"logprob"
:
-1.4176656007766724
,
"rank"
:
2
,
"decoded_token"
:
" is"
},
"19710"
:
{
"logprob"
:
-2.792665481567383
,
"rank"
:
3
,
"decoded_token"
:
" walking"
},
"23737"
:
{
"logprob"
:
-3.917665481567383
,
"rank"
:
4
,
"decoded_token"
:
" stands"
},
"1285"
:
{
"logprob"
:
-4.292665481567383
,
"rank"
:
5
,
"decoded_token"
:
" w"
}},
{
"4837"
:
{
"logprob"
:
-0.002689199522137642
,
"rank"
:
1
,
"decoded_token"
:
" along"
},
"9412"
:
{
"logprob"
:
-6.627689361572266
,
"rank"
:
2
,
"decoded_token"
:
" alone"
},
"6117"
:
{
"logprob"
:
-7.377689361572266
,
"rank"
:
3
,
"decoded_token"
:
" near"
},
"1408"
:
{
"logprob"
:
-8.002689361572266
,
"rank"
:
4
,
"decoded_token"
:
" on"
},
"2203"
:
{
"logprob"
:
-8.377689361572266
,
"rank"
:
5
,
"decoded_token"
:
" into"
}},
{
"1261"
:
{
"logprob"
:
-0.38749611377716064
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1278"
:
{
"logprob"
:
-1.1374961137771606
,
"rank"
:
2
,
"decoded_token"
:
" the"
},
"1420"
:
{
"logprob"
:
-7.387495994567871
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"100991"
:
{
"logprob"
:
-13.949995994567871
,
"rank"
:
4
,
"decoded_token"
:
" sandy"
},
"18258"
:
{
"logprob"
:
-14.512495994567871
,
"rank"
:
5
,
"decoded_token"
:
" wet"
}},
{
"29397"
:
{
"logprob"
:
-0.5292408466339111
,
"rank"
:
1
,
"decoded_token"
:
" beach"
},
"100991"
:
{
"logprob"
:
-0.9042408466339111
,
"rank"
:
2
,
"decoded_token"
:
" sandy"
},
"1627"
:
{
"logprob"
:
-6.029240608215332
,
"rank"
:
3
,
"decoded_token"
:
" sh"
},
"46422"
:
{
"logprob"
:
-6.529240608215332
,
"rank"
:
4
,
"decoded_token"
:
" shore"
},
"2169"
:
{
"logprob"
:
-7.779240608215332
,
"rank"
:
5
,
"decoded_token"
:
" ser"
}},
{
"1435"
:
{
"logprob"
:
-0.29965779185295105
,
"rank"
:
1
,
"decoded_token"
:
" as"
},
"1454"
:
{
"logprob"
:
-1.6746578216552734
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1513"
:
{
"logprob"
:
-3.7996578216552734
,
"rank"
:
3
,
"decoded_token"
:
" at"
},
"3016"
:
{
"logprob"
:
-3.7996578216552734
,
"rank"
:
4
,
"decoded_token"
:
" while"
},
"6117"
:
{
"logprob"
:
-4.799657821655273
,
"rank"
:
5
,
"decoded_token"
:
" near"
}},
{
"22140"
:
{
"logprob"
:
-0.015346773900091648
,
"rank"
:
1
,
"decoded_token"
:
" waves"
},
"1261"
:
{
"logprob"
:
-4.515347003936768
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1278"
:
{
"logprob"
:
-6.140347003936768
,
"rank"
:
3
,
"decoded_token"
:
" the"
},
"27208"
:
{
"logprob"
:
-6.890347003936768
,
"rank"
:
4
,
"decoded_token"
:
" ocean"
},
"4329"
:
{
"logprob"
:
-7.265347003936768
,
"rank"
:
5
,
"decoded_token"
:
" large"
}},
{
"21457"
:
{
"logprob"
:
-0.013234862126410007
,
"rank"
:
1
,
"decoded_token"
:
" crash"
},
"33168"
:
{
"logprob"
:
-5.138235092163086
,
"rank"
:
2
,
"decoded_token"
:
" gently"
},
"10401"
:
{
"logprob"
:
-5.950735092163086
,
"rank"
:
3
,
"decoded_token"
:
" roll"
},
"4323"
:
{
"logprob"
:
-6.700735092163086
,
"rank"
:
4
,
"decoded_token"
:
" break"
},
"5125"
:
{
"logprob"
:
-7.138235092163086
,
"rank"
:
5
,
"decoded_token"
:
" approach"
}},
{
"22196"
:
{
"logprob"
:
-0.060372594743967056
,
"rank"
:
1
,
"decoded_token"
:
" nearby"
},
"6117"
:
{
"logprob"
:
-3.3103725910186768
,
"rank"
:
2
,
"decoded_token"
:
" near"
},
"1294"
:
{
"logprob"
:
-4.435372829437256
,
"rank"
:
3
,
"decoded_token"
:
" in"
},
"25644"
:
{
"logprob"
:
-6.310372829437256
,
"rank"
:
4
,
"decoded_token"
:
" beside"
},
"1321"
:
{
"logprob"
:
-6.560372829437256
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1626"
:
{
"logprob"
:
-0.005290080793201923
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1294"
:
{
"logprob"
:
-6.5052900314331055
,
"rank"
:
2
,
"decoded_token"
:
" in"
},
"1044"
:
{
"logprob"
:
-7.0052900314331055
,
"rank"
:
3
,
"decoded_token"
:
","
},
"1321"
:
{
"logprob"
:
-7.1302900314331055
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"1513"
:
{
"logprob"
:
-7.2552900314331055
,
"rank"
:
5
,
"decoded_token"
:
" at"
}},
{
"1052"
:
{
"logprob"
:
-7.748573807475623e-06
,
"rank"
:
1
,
"decoded_token"
:
"4"
},
"1051"
:
{
"logprob"
:
-12.562507629394531
,
"rank"
:
2
,
"decoded_token"
:
"3"
},
"1053"
:
{
"logprob"
:
-13.125007629394531
,
"rank"
:
3
,
"decoded_token"
:
"5"
},
"1256"
:
{
"logprob"
:
-14.125007629394531
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1049"
:
{
"logprob"
:
-14.312507629394531
,
"rank"
:
5
,
"decoded_token"
:
"1"
}},
{
"1046"
:
{
"logprob"
:
-1.2993727978027891e-05
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-12.62501335144043
,
"rank"
:
2
,
"decoded_token"
:
","
},
"3590"
:
{
"logprob"
:
-12.75001335144043
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"1058"
:
{
"logprob"
:
-13.00001335144043
,
"rank"
:
4
,
"decoded_token"
:
":"
},
"2247"
:
{
"logprob"
:
-13.37501335144043
,
"rank"
:
5
,
"decoded_token"
:
" ."
}},
{
"1349"
:
{
"logprob"
:
-0.00046957432641647756
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"2048"
:
{
"logprob"
:
-8.250469207763672
,
"rank"
:
2
,
"decoded_token"
:
" An"
},
"1488"
:
{
"logprob"
:
-10.125469207763672
,
"rank"
:
3
,
"decoded_token"
:
" W"
},
"2409"
:
{
"logprob"
:
-10.375469207763672
,
"rank"
:
4
,
"decoded_token"
:
" This"
},
"12232"
:
{
"logprob"
:
-10.500469207763672
,
"rank"
:
5
,
"decoded_token"
:
" Gra"
}},
{
"53301"
:
{
"logprob"
:
-0.35120296478271484
,
"rank"
:
1
,
"decoded_token"
:
" winding"
},
"59396"
:
{
"logprob"
:
-1.8512029647827148
,
"rank"
:
2
,
"decoded_token"
:
" gravel"
},
"2169"
:
{
"logprob"
:
-2.476202964782715
,
"rank"
:
3
,
"decoded_token"
:
" ser"
},
"54742"
:
{
"logprob"
:
-3.851202964782715
,
"rank"
:
4
,
"decoded_token"
:
" peaceful"
},
"43536"
:
{
"logprob"
:
-5.101202964782715
,
"rank"
:
5
,
"decoded_token"
:
" curved"
}},
{
"59396"
:
{
"logprob"
:
-0.2955280840396881
,
"rank"
:
1
,
"decoded_token"
:
" gravel"
},
"3549"
:
{
"logprob"
:
-1.6705280542373657
,
"rank"
:
2
,
"decoded_token"
:
" path"
},
"14801"
:
{
"logprob"
:
-2.7955281734466553
,
"rank"
:
3
,
"decoded_token"
:
" pathway"
},
"1044"
:
{
"logprob"
:
-6.420527935028076
,
"rank"
:
4
,
"decoded_token"
:
","
},
"18341"
:
{
"logprob"
:
-6.670527935028076
,
"rank"
:
5
,
"decoded_token"
:
" pathways"
}},
{
"3549"
:
{
"logprob"
:
-0.03408379852771759
,
"rank"
:
1
,
"decoded_token"
:
" path"
},
"14801"
:
{
"logprob"
:
-3.409083843231201
,
"rank"
:
2
,
"decoded_token"
:
" pathway"
},
"18341"
:
{
"logprob"
:
-8.284083366394043
,
"rank"
:
3
,
"decoded_token"
:
" pathways"
},
"1505"
:
{
"logprob"
:
-9.534083366394043
,
"rank"
:
4
,
"decoded_token"
:
" or"
},
"7368"
:
{
"logprob"
:
-10.659083366394043
,
"rank"
:
5
,
"decoded_token"
:
"path"
}},
{
"1294"
:
{
"logprob"
:
-1.0857839584350586
,
"rank"
:
1
,
"decoded_token"
:
" in"
},
"13335"
:
{
"logprob"
:
-1.4607839584350586
,
"rank"
:
2
,
"decoded_token"
:
" leads"
},
"2645"
:
{
"logprob"
:
-1.9607839584350586
,
"rank"
:
3
,
"decoded_token"
:
" through"
},
"29817"
:
{
"logprob"
:
-2.4607839584350586
,
"rank"
:
4
,
"decoded_token"
:
" surrounded"
},
"22416"
:
{
"logprob"
:
-3.2107839584350586
,
"rank"
:
5
,
"decoded_token"
:
" curves"
}},
{
"1261"
:
{
"logprob"
:
-0.00011705666838679463
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-9.500117301940918
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"1278"
:
{
"logprob"
:
-10.250117301940918
,
"rank"
:
3
,
"decoded_token"
:
" the"
},
"2549"
:
{
"logprob"
:
-12.750117301940918
,
"rank"
:
4
,
"decoded_token"
:
" what"
},
"11223"
:
{
"logprob"
:
-13.750117301940918
,
"rank"
:
5
,
"decoded_token"
:
" green"
}},
{
"12097"
:
{
"logprob"
:
-0.02791696786880493
,
"rank"
:
1
,
"decoded_token"
:
" park"
},
"2169"
:
{
"logprob"
:
-4.65291690826416
,
"rank"
:
2
,
"decoded_token"
:
" ser"
},
"1295"
:
{
"logprob"
:
-4.65291690826416
,
"rank"
:
3
,
"decoded_token"
:
" l"
},
"23170"
:
{
"logprob"
:
-5.27791690826416
,
"rank"
:
4
,
"decoded_token"
:
" grass"
},
"26428"
:
{
"logprob"
:
-6.52791690826416
,
"rank"
:
5
,
"decoded_token"
:
" garden"
}},
{
"1044"
:
{
"logprob"
:
-1.350893259048462
,
"rank"
:
1
,
"decoded_token"
:
","
},
"1395"
:
{
"logprob"
:
-1.600893259048462
,
"rank"
:
2
,
"decoded_token"
:
" is"
},
"29817"
:
{
"logprob"
:
-2.350893259048462
,
"rank"
:
3
,
"decoded_token"
:
" surrounded"
},
"121313"
:
{
"logprob"
:
-2.475893259048462
,
"rank"
:
4
,
"decoded_token"
:
" flanked"
},
"1454"
:
{
"logprob"
:
-2.475893259048462
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"121040"
:
{
"logprob"
:
-0.710591197013855
,
"rank"
:
1
,
"decoded_token"
:
" bordered"
},
"121313"
:
{
"logprob"
:
-1.085591197013855
,
"rank"
:
2
,
"decoded_token"
:
" flanked"
},
"54410"
:
{
"logprob"
:
-1.960591197013855
,
"rank"
:
3
,
"decoded_token"
:
" lined"
},
"29817"
:
{
"logprob"
:
-3.8355913162231445
,
"rank"
:
4
,
"decoded_token"
:
" surrounded"
},
"1454"
:
{
"logprob"
:
-5.8355913162231445
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"1536"
:
{
"logprob"
:
-4.6491513785440475e-06
,
"rank"
:
1
,
"decoded_token"
:
" by"
},
"1454"
:
{
"logprob"
:
-12.375004768371582
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1408"
:
{
"logprob"
:
-15.812504768371582
,
"rank"
:
3
,
"decoded_token"
:
" on"
},
"3326"
:
{
"logprob"
:
-16.875003814697266
,
"rank"
:
4
,
"decoded_token"
:
"by"
},
"1295"
:
{
"logprob"
:
-16.875003814697266
,
"rank"
:
5
,
"decoded_token"
:
" l"
}},
{
"11223"
:
{
"logprob"
:
-0.4314780533313751
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"1295"
:
{
"logprob"
:
-1.4314780235290527
,
"rank"
:
2
,
"decoded_token"
:
" l"
},
"23170"
:
{
"logprob"
:
-2.4314780235290527
,
"rank"
:
3
,
"decoded_token"
:
" grass"
},
"17744"
:
{
"logprob"
:
-4.806478023529053
,
"rank"
:
4
,
"decoded_token"
:
" blo"
},
"95612"
:
{
"logprob"
:
-5.181478023529053
,
"rank"
:
5
,
"decoded_token"
:
" vibrant"
}},
{
"23170"
:
{
"logprob"
:
-0.00035041390219703317
,
"rank"
:
1
,
"decoded_token"
:
" grass"
},
"69230"
:
{
"logprob"
:
-8.125349998474121
,
"rank"
:
2
,
"decoded_token"
:
" lawn"
},
"128633"
:
{
"logprob"
:
-10.750349998474121
,
"rank"
:
3
,
"decoded_token"
:
" grasses"
},
"87781"
:
{
"logprob"
:
-11.437849998474121
,
"rank"
:
4
,
"decoded_token"
:
"
\u
8349"
},
"16429"
:
{
"logprob"
:
-11.437849998474121
,
"rank"
:
5
,
"decoded_token"
:
" trees"
}},
{
"1321"
:
{
"logprob"
:
-0.0009494088008068502
,
"rank"
:
1
,
"decoded_token"
:
" and"
},
"1044"
:
{
"logprob"
:
-7.125949382781982
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-9.25094985961914
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"2425"
:
{
"logprob"
:
-11.75094985961914
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"1046"
:
{
"logprob"
:
-11.75094985961914
,
"rank"
:
5
,
"decoded_token"
:
"."
}},
{
"17744"
:
{
"logprob"
:
-0.21488544344902039
,
"rank"
:
1
,
"decoded_token"
:
" blo"
},
"105368"
:
{
"logprob"
:
-1.8398854732513428
,
"rank"
:
2
,
"decoded_token"
:
" bloss"
},
"87833"
:
{
"logprob"
:
-3.8398854732513428
,
"rank"
:
3
,
"decoded_token"
:
" flowering"
},
"16429"
:
{
"logprob"
:
-4.464885234832764
,
"rank"
:
4
,
"decoded_token"
:
" trees"
},
"117207"
:
{
"logprob"
:
-7.589885234832764
,
"rank"
:
5
,
"decoded_token"
:
" bloom"
}},
{
"34941"
:
{
"logprob"
:
-7.152555099310121e-07
,
"rank"
:
1
,
"decoded_token"
:
"oming"
},
"35974"
:
{
"logprob"
:
-14.375000953674316
,
"rank"
:
2
,
"decoded_token"
:
"omed"
},
"6325"
:
{
"logprob"
:
-16.5625
,
"rank"
:
3
,
"decoded_token"
:
"oms"
},
"11009"
:
{
"logprob"
:
-17.625
,
"rank"
:
4
,
"decoded_token"
:
"omy"
},
"9457"
:
{
"logprob"
:
-18.875
,
"rank"
:
5
,
"decoded_token"
:
"ming"
}},
{
"16429"
:
{
"logprob"
:
-0.002424398437142372
,
"rank"
:
1
,
"decoded_token"
:
" trees"
},
"103796"
:
{
"logprob"
:
-6.627424240112305
,
"rank"
:
2
,
"decoded_token"
:
" cherry"
},
"32152"
:
{
"logprob"
:
-7.377424240112305
,
"rank"
:
3
,
"decoded_token"
:
" flowers"
},
"29151"
:
{
"logprob"
:
-9.314924240112305
,
"rank"
:
4
,
"decoded_token"
:
" shr"
},
"20370"
:
{
"logprob"
:
-9.564924240112305
,
"rank"
:
5
,
"decoded_token"
:
" fruit"
}},
{
"2425"
:
{
"logprob"
:
-0.3792523741722107
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1046"
:
{
"logprob"
:
-1.3792524337768555
,
"rank"
:
2
,
"decoded_token"
:
"."
},
"3675"
:
{
"logprob"
:
-2.8792524337768555
,
"rank"
:
3
,
"decoded_token"
:
" against"
},
"1044"
:
{
"logprob"
:
-5.1292524337768555
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-7.2542524337768555
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"1261"
:
{
"logprob"
:
-0.0002315968304174021
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1278"
:
{
"logprob"
:
-8.875231742858887
,
"rank"
:
2
,
"decoded_token"
:
" the"
},
"10991"
:
{
"logprob"
:
-9.875231742858887
,
"rank"
:
3
,
"decoded_token"
:
" blue"
},
"6133"
:
{
"logprob"
:
-10.375231742858887
,
"rank"
:
4
,
"decoded_token"
:
" clear"
},
"1420"
:
{
"logprob"
:
-12.250231742858887
,
"rank"
:
5
,
"decoded_token"
:
" an"
}},
{
"10991"
:
{
"logprob"
:
-0.6372600197792053
,
"rank"
:
1
,
"decoded_token"
:
" blue"
},
"6133"
:
{
"logprob"
:
-0.7622600197792053
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"18168"
:
{
"logprob"
:
-5.3872599601745605
,
"rank"
:
3
,
"decoded_token"
:
" bright"
},
"105573"
:
{
"logprob"
:
-10.012260437011719
,
"rank"
:
4
,
"decoded_token"
:
" sunny"
},
"15330"
:
{
"logprob"
:
-11.512260437011719
,
"rank"
:
5
,
"decoded_token"
:
" Blue"
}},
{
"21283"
:
{
"logprob"
:
-6.12716976320371e-05
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"1044"
:
{
"logprob"
:
-9.87506103515625
,
"rank"
:
2
,
"decoded_token"
:
","
},
"19673"
:
{
"logprob"
:
-12.00006103515625
,
"rank"
:
3
,
"decoded_token"
:
" Sky"
},
"1321"
:
{
"logprob"
:
-13.31256103515625
,
"rank"
:
4
,
"decoded_token"
:
" and"
},
"124968"
:
{
"logprob"
:
-14.81256103515625
,
"rank"
:
5
,
"decoded_token"
:
" skies"
}},
{
"1046"
:
{
"logprob"
:
-0.00013982271775603294
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"2"
:
{
"logprob"
:
-9.500140190124512
,
"rank"
:
2
,
"decoded_token"
:
".
\n
"
},
"1626"
:
{
"logprob"
:
-10.000140190124512
,
"rank"
:
3
,
"decoded_token"
:
".
\n\n
"
},
"1338"
:
{
"logprob"
:
-11.750140190124512
,
"rank"
:
4
,
"decoded_token"
:
" with"
}},
{
"2"
:
{
"logprob"
:
-0.0004533693427219987
,
"rank"
:
1
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-7.750453472137451
,
"rank"
:
2
,
"decoded_token"
:
" Each"
},
"1256"
:
{
"logprob"
:
-11.125452995300293
,
"rank"
:
3
,
"decoded_token"
:
" This"
}}]]]
\ No newline at end of file
tests/models/fixtures/pixtral_chat_engine.json
deleted
100644 → 0
View file @
389ebcf7
[[[
1784
,
3937
,
6122
,
1261
,
7244
,
10575
,
18970
,
1408
,
1261
,
32656
,
4691
,
1046
,
2
],
"The image shows a black dog sitting on a wooden surface."
,
[{
"1784"
:
{
"logprob"
:
-0.11685245484113693
,
"rank"
:
1
,
"decoded_token"
:
"The"
},
"4380"
:
{
"logprob"
:
-2.3668525218963623
,
"rank"
:
2
,
"decoded_token"
:
"This"
},
"1049"
:
{
"logprob"
:
-4.741852283477783
,
"rank"
:
3
,
"decoded_token"
:
"1"
},
"117991"
:
{
"logprob"
:
-5.991852283477783
,
"rank"
:
4
,
"decoded_token"
:
"Certain"
},
"1785"
:
{
"logprob"
:
-5.991852283477783
,
"rank"
:
5
,
"decoded_token"
:
"In"
}},
{
"3937"
:
{
"logprob"
:
-0.2591013014316559
,
"rank"
:
1
,
"decoded_token"
:
" image"
},
"2158"
:
{
"logprob"
:
-1.5091012716293335
,
"rank"
:
2
,
"decoded_token"
:
" first"
},
"3977"
:
{
"logprob"
:
-5.884101390838623
,
"rank"
:
3
,
"decoded_token"
:
" top"
},
"7244"
:
{
"logprob"
:
-6.259101390838623
,
"rank"
:
4
,
"decoded_token"
:
" black"
},
"8061"
:
{
"logprob"
:
-6.759101390838623
,
"rank"
:
5
,
"decoded_token"
:
" images"
}},
{
"6122"
:
{
"logprob"
:
-0.9660423994064331
,
"rank"
:
1
,
"decoded_token"
:
" shows"
},
"51948"
:
{
"logprob"
:
-1.466042399406433
,
"rank"
:
2
,
"decoded_token"
:
" depicts"
},
"6971"
:
{
"logprob"
:
-1.466042399406433
,
"rank"
:
3
,
"decoded_token"
:
" features"
},
"25981"
:
{
"logprob"
:
-2.8410425186157227
,
"rank"
:
4
,
"decoded_token"
:
" displays"
},
"8688"
:
{
"logprob"
:
-2.8410425186157227
,
"rank"
:
5
,
"decoded_token"
:
" contains"
}},
{
"1261"
:
{
"logprob"
:
-0.0030613720882683992
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-6.253061294555664
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"2295"
:
{
"logprob"
:
-7.878061294555664
,
"rank"
:
3
,
"decoded_token"
:
" two"
},
"2342"
:
{
"logprob"
:
-7.878061294555664
,
"rank"
:
4
,
"decoded_token"
:
" only"
},
"1278"
:
{
"logprob"
:
-8.628061294555664
,
"rank"
:
5
,
"decoded_token"
:
" the"
}},
{
"7244"
:
{
"logprob"
:
-0.17649099230766296
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"6231"
:
{
"logprob"
:
-2.3014910221099854
,
"rank"
:
2
,
"decoded_token"
:
" close"
},
"4249"
:
{
"logprob"
:
-3.4264910221099854
,
"rank"
:
3
,
"decoded_token"
:
" single"
},
"4329"
:
{
"logprob"
:
-5.113990783691406
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"10575"
:
{
"logprob"
:
-5.176490783691406
,
"rank"
:
5
,
"decoded_token"
:
" dog"
}},
{
"10575"
:
{
"logprob"
:
-0.10929587483406067
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.4842958450317383
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-4.109295845031738
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"15812"
:
{
"logprob"
:
-7.296795845031738
,
"rank"
:
4
,
"decoded_token"
:
" Lab"
},
"7990"
:
{
"logprob"
:
-7.484295845031738
,
"rank"
:
5
,
"decoded_token"
:
" cat"
}},
{
"18970"
:
{
"logprob"
:
-0.830376148223877
,
"rank"
:
1
,
"decoded_token"
:
" sitting"
},
"1454"
:
{
"logprob"
:
-1.580376148223877
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"28528"
:
{
"logprob"
:
-1.955376148223877
,
"rank"
:
3
,
"decoded_token"
:
" lying"
},
"7283"
:
{
"logprob"
:
-2.205376148223877
,
"rank"
:
4
,
"decoded_token"
:
" looking"
},
"15866"
:
{
"logprob"
:
-3.017876148223877
,
"rank"
:
5
,
"decoded_token"
:
" standing"
}},
{
"1408"
:
{
"logprob"
:
-0.08554735779762268
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"1321"
:
{
"logprob"
:
-3.71054744720459
,
"rank"
:
2
,
"decoded_token"
:
" and"
},
"3675"
:
{
"logprob"
:
-3.96054744720459
,
"rank"
:
3
,
"decoded_token"
:
" against"
},
"41132"
:
{
"logprob"
:
-4.71054744720459
,
"rank"
:
4
,
"decoded_token"
:
" attent"
},
"1454"
:
{
"logprob"
:
-5.08554744720459
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"1261"
:
{
"logprob"
:
-0.540847897529602
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"32656"
:
{
"logprob"
:
-0.915847897529602
,
"rank"
:
2
,
"decoded_token"
:
" wooden"
},
"12603"
:
{
"logprob"
:
-5.4158477783203125
,
"rank"
:
3
,
"decoded_token"
:
" wood"
},
"3977"
:
{
"logprob"
:
-5.4158477783203125
,
"rank"
:
4
,
"decoded_token"
:
" top"
},
"17253"
:
{
"logprob"
:
-6.2908477783203125
,
"rank"
:
5
,
"decoded_token"
:
" weather"
}},
{
"32656"
:
{
"logprob"
:
-0.025753861293196678
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"44130"
:
{
"logprob"
:
-4.400753974914551
,
"rank"
:
2
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-5.275753974914551
,
"rank"
:
3
,
"decoded_token"
:
" wood"
},
"3403"
:
{
"logprob"
:
-5.400753974914551
,
"rank"
:
4
,
"decoded_token"
:
" text"
},
"17253"
:
{
"logprob"
:
-6.963253974914551
,
"rank"
:
5
,
"decoded_token"
:
" weather"
}},
{
"4691"
:
{
"logprob"
:
-0.7265751957893372
,
"rank"
:
1
,
"decoded_token"
:
" surface"
},
"11237"
:
{
"logprob"
:
-0.8515751957893372
,
"rank"
:
2
,
"decoded_token"
:
" floor"
},
"7042"
:
{
"logprob"
:
-2.6015751361846924
,
"rank"
:
3
,
"decoded_token"
:
" background"
},
"28984"
:
{
"logprob"
:
-5.2265753746032715
,
"rank"
:
4
,
"decoded_token"
:
" deck"
},
"1615"
:
{
"logprob"
:
-5.7265753746032715
,
"rank"
:
5
,
"decoded_token"
:
" pl"
}},
{
"1046"
:
{
"logprob"
:
-0.4868825674057007
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-1.9868825674057007
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1321"
:
{
"logprob"
:
-2.3618826866149902
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1454"
:
{
"logprob"
:
-2.6118826866149902
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"7283"
:
{
"logprob"
:
-2.7368826866149902
,
"rank"
:
5
,
"decoded_token"
:
" looking"
}},
{
"2"
:
{
"logprob"
:
-0.0026643513701856136
,
"rank"
:
1
,
"decoded_token"
:
"</s>"
},
"1531"
:
{
"logprob"
:
-6.502664566040039
,
"rank"
:
2
,
"decoded_token"
:
" The"
},
"1032"
:
{
"logprob"
:
-6.877664566040039
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"3730"
:
{
"logprob"
:
-9.752664566040039
,
"rank"
:
4
,
"decoded_token"
:
" There"
},
"1256"
:
{
"logprob"
:
-11.002664566040039
,
"rank"
:
5
,
"decoded_token"
:
" "
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
1454
,
2327
,
94766
,
32961
,
53048
,
41132
,
3923
,
1408
,
1261
,
32656
,
4691
,
1626
,
1050
,
1046
,
1349
,
15375
,
24361
,
4521
,
94973
,
5669
,
1278
,
48932
,
2425
,
1261
,
16152
,
1121
,
21283
,
1046
,
2
],
"1. A black dog with floppy ears sits attentively on a wooden surface.
\n
2. A vast mountain range stretches across the horizon under a cloudy sky."
,
[{
"1049"
:
{
"logprob"
:
-0.42824622988700867
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"1045"
:
{
"logprob"
:
-1.553246259689331
,
"rank"
:
2
,
"decoded_token"
:
"-"
},
"1065"
:
{
"logprob"
:
-2.428246259689331
,
"rank"
:
3
,
"decoded_token"
:
"A"
},
"1784"
:
{
"logprob"
:
-4.053246021270752
,
"rank"
:
4
,
"decoded_token"
:
"The"
},
"69957"
:
{
"logprob"
:
-4.428246021270752
,
"rank"
:
5
,
"decoded_token"
:
"Sure"
}},
{
"1046"
:
{
"logprob"
:
-1.811964830267243e-05
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1058"
:
{
"logprob"
:
-11.875018119812012
,
"rank"
:
2
,
"decoded_token"
:
":"
},
"3590"
:
{
"logprob"
:
-12.250018119812012
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"1065"
:
{
"logprob"
:
-13.062518119812012
,
"rank"
:
4
,
"decoded_token"
:
"A"
},
"1041"
:
{
"logprob"
:
-13.750018119812012
,
"rank"
:
5
,
"decoded_token"
:
")"
}},
{
"1349"
:
{
"logprob"
:
-0.13647246360778809
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1429"
:
{
"logprob"
:
-2.386472463607788
,
"rank"
:
2
,
"decoded_token"
:
"
\"
"
},
"1603"
:
{
"logprob"
:
-3.886472463607788
,
"rank"
:
3
,
"decoded_token"
:
" **"
},
"11967"
:
{
"logprob"
:
-5.011472702026367
,
"rank"
:
4
,
"decoded_token"
:
" Image"
},
"1531"
:
{
"logprob"
:
-5.011472702026367
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"7244"
:
{
"logprob"
:
-0.18561004102230072
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"38462"
:
{
"logprob"
:
-3.185610055923462
,
"rank"
:
2
,
"decoded_token"
:
" curious"
},
"68076"
:
{
"logprob"
:
-3.623110055923462
,
"rank"
:
3
,
"decoded_token"
:
" cute"
},
"4329"
:
{
"logprob"
:
-3.935610055923462
,
"rank"
:
4
,
"decoded_token"
:
" large"
},
"74168"
:
{
"logprob"
:
-4.373109817504883
,
"rank"
:
5
,
"decoded_token"
:
" gloss"
}},
{
"10575"
:
{
"logprob"
:
-0.17297746241092682
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.1729774475097656
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-3.1729774475097656
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"15812"
:
{
"logprob"
:
-6.985477447509766
,
"rank"
:
4
,
"decoded_token"
:
" Lab"
},
"8636"
:
{
"logprob"
:
-7.360477447509766
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"1454"
:
{
"logprob"
:
-0.5785807967185974
,
"rank"
:
1
,
"decoded_token"
:
" with"
},
"53048"
:
{
"logprob"
:
-1.2660808563232422
,
"rank"
:
2
,
"decoded_token"
:
" sits"
},
"1395"
:
{
"logprob"
:
-3.016080856323242
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"22524"
:
{
"logprob"
:
-3.578580856323242
,
"rank"
:
4
,
"decoded_token"
:
" lies"
},
"18970"
:
{
"logprob"
:
-3.703580856323242
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"2327"
:
{
"logprob"
:
-1.2709298133850098
,
"rank"
:
1
,
"decoded_token"
:
" fl"
},
"1261"
:
{
"logprob"
:
-1.3959298133850098
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"17300"
:
{
"logprob"
:
-1.8959298133850098
,
"rank"
:
3
,
"decoded_token"
:
" soul"
},
"100089"
:
{
"logprob"
:
-2.6459298133850098
,
"rank"
:
4
,
"decoded_token"
:
" expressive"
},
"6444"
:
{
"logprob"
:
-3.1459298133850098
,
"rank"
:
5
,
"decoded_token"
:
" soft"
}},
{
"94766"
:
{
"logprob"
:
-0.002432247158139944
,
"rank"
:
1
,
"decoded_token"
:
"oppy"
},
"124603"
:
{
"logprob"
:
-6.377432346343994
,
"rank"
:
2
,
"decoded_token"
:
"uffy"
},
"1484"
:
{
"logprob"
:
-7.877432346343994
,
"rank"
:
3
,
"decoded_token"
:
"op"
},
"24897"
:
{
"logprob"
:
-8.877431869506836
,
"rank"
:
4
,
"decoded_token"
:
"appy"
},
"102477"
:
{
"logprob"
:
-9.752431869506836
,
"rank"
:
5
,
"decoded_token"
:
"opping"
}},
{
"32961"
:
{
"logprob"
:
-5.113947918289341e-05
,
"rank"
:
1
,
"decoded_token"
:
" ears"
},
"16962"
:
{
"logprob"
:
-11.312551498413086
,
"rank"
:
2
,
"decoded_token"
:
" ear"
},
"5731"
:
{
"logprob"
:
-11.750051498413086
,
"rank"
:
3
,
"decoded_token"
:
" eyes"
},
"3351"
:
{
"logprob"
:
-12.000051498413086
,
"rank"
:
4
,
"decoded_token"
:
" years"
},
"42071"
:
{
"logprob"
:
-13.000051498413086
,
"rank"
:
5
,
"decoded_token"
:
" cheeks"
}},
{
"53048"
:
{
"logprob"
:
-0.6131591200828552
,
"rank"
:
1
,
"decoded_token"
:
" sits"
},
"10637"
:
{
"logprob"
:
-1.9881591796875
,
"rank"
:
2
,
"decoded_token"
:
" looks"
},
"1321"
:
{
"logprob"
:
-2.4256591796875
,
"rank"
:
3
,
"decoded_token"
:
" and"
},
"1395"
:
{
"logprob"
:
-2.6756591796875
,
"rank"
:
4
,
"decoded_token"
:
" is"
},
"18970"
:
{
"logprob"
:
-3.0506591796875
,
"rank"
:
5
,
"decoded_token"
:
" sitting"
}},
{
"41132"
:
{
"logprob"
:
-0.36187249422073364
,
"rank"
:
1
,
"decoded_token"
:
" attent"
},
"1408"
:
{
"logprob"
:
-2.361872434616089
,
"rank"
:
2
,
"decoded_token"
:
" on"
},
"106534"
:
{
"logprob"
:
-2.424372434616089
,
"rank"
:
3
,
"decoded_token"
:
" calmly"
},
"12276"
:
{
"logprob"
:
-2.611872434616089
,
"rank"
:
4
,
"decoded_token"
:
" alert"
},
"6482"
:
{
"logprob"
:
-5.174372673034668
,
"rank"
:
5
,
"decoded_token"
:
" patient"
}},
{
"3923"
:
{
"logprob"
:
-8.451581379631534e-05
,
"rank"
:
1
,
"decoded_token"
:
"ively"
},
"1556"
:
{
"logprob"
:
-9.50008487701416
,
"rank"
:
2
,
"decoded_token"
:
"ive"
},
"6655"
:
{
"logprob"
:
-11.87508487701416
,
"rank"
:
3
,
"decoded_token"
:
"atively"
},
"3929"
:
{
"logprob"
:
-14.00008487701416
,
"rank"
:
4
,
"decoded_token"
:
"ently"
},
"47885"
:
{
"logprob"
:
-14.75008487701416
,
"rank"
:
5
,
"decoded_token"
:
"edly"
}},
{
"1408"
:
{
"logprob"
:
-0.058125678449869156
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3675"
:
{
"logprob"
:
-3.1831257343292236
,
"rank"
:
2
,
"decoded_token"
:
" against"
},
"1294"
:
{
"logprob"
:
-4.9331254959106445
,
"rank"
:
3
,
"decoded_token"
:
" in"
},
"7283"
:
{
"logprob"
:
-5.8081254959106445
,
"rank"
:
4
,
"decoded_token"
:
" looking"
},
"1044"
:
{
"logprob"
:
-5.9331254959106445
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"1261"
:
{
"logprob"
:
-0.21029606461524963
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"32656"
:
{
"logprob"
:
-1.7102960348129272
,
"rank"
:
2
,
"decoded_token"
:
" wooden"
},
"17253"
:
{
"logprob"
:
-5.710296154022217
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"44130"
:
{
"logprob"
:
-6.085296154022217
,
"rank"
:
4
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-6.960296154022217
,
"rank"
:
5
,
"decoded_token"
:
" wood"
}},
{
"32656"
:
{
"logprob"
:
-0.08548421412706375
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"44130"
:
{
"logprob"
:
-2.710484266281128
,
"rank"
:
2
,
"decoded_token"
:
" rust"
},
"17253"
:
{
"logprob"
:
-4.710484027862549
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"12603"
:
{
"logprob"
:
-5.960484027862549
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"3403"
:
{
"logprob"
:
-5.960484027862549
,
"rank"
:
5
,
"decoded_token"
:
" text"
}},
{
"4691"
:
{
"logprob"
:
-0.7172377109527588
,
"rank"
:
1
,
"decoded_token"
:
" surface"
},
"11237"
:
{
"logprob"
:
-0.8422377109527588
,
"rank"
:
2
,
"decoded_token"
:
" floor"
},
"7042"
:
{
"logprob"
:
-2.842237710952759
,
"rank"
:
3
,
"decoded_token"
:
" background"
},
"28984"
:
{
"logprob"
:
-4.21723747253418
,
"rank"
:
4
,
"decoded_token"
:
" deck"
},
"92504"
:
{
"logprob"
:
-6.21723747253418
,
"rank"
:
5
,
"decoded_token"
:
" backdrop"
}},
{
"1626"
:
{
"logprob"
:
-0.12971943616867065
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-2.3797194957733154
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1046"
:
{
"logprob"
:
-4.129719257354736
,
"rank"
:
3
,
"decoded_token"
:
"."
},
"1338"
:
{
"logprob"
:
-5.129719257354736
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"7283"
:
{
"logprob"
:
-5.504719257354736
,
"rank"
:
5
,
"decoded_token"
:
" looking"
}},
{
"1050"
:
{
"logprob"
:
-0.00015698630886618048
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1256"
:
{
"logprob"
:
-9.125157356262207
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1032"
:
{
"logprob"
:
-10.875157356262207
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-11.750157356262207
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1051"
:
{
"logprob"
:
-12.125157356262207
,
"rank"
:
5
,
"decoded_token"
:
"3"
}},
{
"1046"
:
{
"logprob"
:
-6.6756979322235566e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.062506675720215
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"1626"
:
{
"logprob"
:
-13.187506675720215
,
"rank"
:
3
,
"decoded_token"
:
".
\n
"
},
"1338"
:
{
"logprob"
:
-14.750006675720215
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1058"
:
{
"logprob"
:
-14.937506675720215
,
"rank"
:
5
,
"decoded_token"
:
":"
}},
{
"1349"
:
{
"logprob"
:
-0.5863217115402222
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-1.4613217115402222
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"37159"
:
{
"logprob"
:
-2.2113218307495117
,
"rank"
:
3
,
"decoded_token"
:
" Snow"
},
"113465"
:
{
"logprob"
:
-3.8988218307495117
,
"rank"
:
4
,
"decoded_token"
:
" Rug"
},
"1531"
:
{
"logprob"
:
-3.9613218307495117
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"15375"
:
{
"logprob"
:
-0.639299213886261
,
"rank"
:
1
,
"decoded_token"
:
" vast"
},
"37849"
:
{
"logprob"
:
-2.014299154281616
,
"rank"
:
2
,
"decoded_token"
:
" breat"
},
"61082"
:
{
"logprob"
:
-2.389299154281616
,
"rank"
:
3
,
"decoded_token"
:
" panor"
},
"10726"
:
{
"logprob"
:
-3.139299154281616
,
"rank"
:
4
,
"decoded_token"
:
" scen"
},
"2169"
:
{
"logprob"
:
-3.201799154281616
,
"rank"
:
5
,
"decoded_token"
:
" ser"
}},
{
"24361"
:
{
"logprob"
:
-0.702845573425293
,
"rank"
:
1
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-1.952845573425293
,
"rank"
:
2
,
"decoded_token"
:
" mountainous"
},
"1044"
:
{
"logprob"
:
-2.077845573425293
,
"rank"
:
3
,
"decoded_token"
:
","
},
"4521"
:
{
"logprob"
:
-2.327845573425293
,
"rank"
:
4
,
"decoded_token"
:
" range"
},
"28035"
:
{
"logprob"
:
-2.452845573425293
,
"rank"
:
5
,
"decoded_token"
:
" landscape"
}},
{
"4521"
:
{
"logprob"
:
-0.07058162242174149
,
"rank"
:
1
,
"decoded_token"
:
" range"
},
"28035"
:
{
"logprob"
:
-2.6955816745758057
,
"rank"
:
2
,
"decoded_token"
:
" landscape"
},
"37691"
:
{
"logprob"
:
-8.320581436157227
,
"rank"
:
3
,
"decoded_token"
:
" valley"
},
"12248"
:
{
"logprob"
:
-9.445581436157227
,
"rank"
:
4
,
"decoded_token"
:
" peak"
},
"13327"
:
{
"logprob"
:
-9.695581436157227
,
"rank"
:
5
,
"decoded_token"
:
" scene"
}},
{
"94973"
:
{
"logprob"
:
-1.1164050102233887
,
"rank"
:
1
,
"decoded_token"
:
" stretches"
},
"1454"
:
{
"logprob"
:
-1.1789050102233887
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"2425"
:
{
"logprob"
:
-1.8664050102233887
,
"rank"
:
3
,
"decoded_token"
:
" under"
},
"1395"
:
{
"logprob"
:
-2.5539050102233887
,
"rank"
:
4
,
"decoded_token"
:
" is"
},
"13875"
:
{
"logprob"
:
-2.9914050102233887
,
"rank"
:
5
,
"decoded_token"
:
" covered"
}},
{
"5669"
:
{
"logprob"
:
-0.3286789357662201
,
"rank"
:
1
,
"decoded_token"
:
" across"
},
"1848"
:
{
"logprob"
:
-2.078678846359253
,
"rank"
:
2
,
"decoded_token"
:
" out"
},
"2425"
:
{
"logprob"
:
-2.328678846359253
,
"rank"
:
3
,
"decoded_token"
:
" under"
},
"2203"
:
{
"logprob"
:
-3.328678846359253
,
"rank"
:
4
,
"decoded_token"
:
" into"
},
"8994"
:
{
"logprob"
:
-4.766179084777832
,
"rank"
:
5
,
"decoded_token"
:
" towards"
}},
{
"1278"
:
{
"logprob"
:
-0.039004355669021606
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-3.289004325866699
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-7.414004325866699
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"2425"
:
{
"logprob"
:
-9.0390043258667
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"1454"
:
{
"logprob"
:
-9.2265043258667
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"48932"
:
{
"logprob"
:
-0.2659883201122284
,
"rank"
:
1
,
"decoded_token"
:
" horizon"
},
"21283"
:
{
"logprob"
:
-2.140988349914551
,
"rank"
:
2
,
"decoded_token"
:
" sky"
},
"3937"
:
{
"logprob"
:
-3.015988349914551
,
"rank"
:
3
,
"decoded_token"
:
" image"
},
"28035"
:
{
"logprob"
:
-3.515988349914551
,
"rank"
:
4
,
"decoded_token"
:
" landscape"
},
"3044"
:
{
"logprob"
:
-4.265988349914551
,
"rank"
:
5
,
"decoded_token"
:
" sk"
}},
{
"2425"
:
{
"logprob"
:
-0.5356141328811646
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1044"
:
{
"logprob"
:
-1.5356141328811646
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-1.7856141328811646
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"25136"
:
{
"logprob"
:
-3.785614013671875
,
"rank"
:
4
,
"decoded_token"
:
" beneath"
},
"1408"
:
{
"logprob"
:
-5.785614013671875
,
"rank"
:
5
,
"decoded_token"
:
" on"
}},
{
"1261"
:
{
"logprob"
:
-0.006081883795559406
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-5.506082057952881
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"16152"
:
{
"logprob"
:
-7.631082057952881
,
"rank"
:
3
,
"decoded_token"
:
" cloud"
},
"6133"
:
{
"logprob"
:
-7.881082057952881
,
"rank"
:
4
,
"decoded_token"
:
" clear"
},
"2136"
:
{
"logprob"
:
-8.006081581115723
,
"rank"
:
5
,
"decoded_token"
:
" over"
}},
{
"16152"
:
{
"logprob"
:
-0.6749536991119385
,
"rank"
:
1
,
"decoded_token"
:
" cloud"
},
"6133"
:
{
"logprob"
:
-1.4249536991119385
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"18416"
:
{
"logprob"
:
-2.8624536991119385
,
"rank"
:
3
,
"decoded_token"
:
" haz"
},
"27254"
:
{
"logprob"
:
-2.9874536991119385
,
"rank"
:
4
,
"decoded_token"
:
" partly"
},
"4391"
:
{
"logprob"
:
-3.2374536991119385
,
"rank"
:
5
,
"decoded_token"
:
" light"
}},
{
"1121"
:
{
"logprob"
:
-0.10860869288444519
,
"rank"
:
1
,
"decoded_token"
:
"y"
},
"4527"
:
{
"logprob"
:
-2.9836087226867676
,
"rank"
:
2
,
"decoded_token"
:
"less"
},
"1286"
:
{
"logprob"
:
-3.4836087226867676
,
"rank"
:
3
,
"decoded_token"
:
"ed"
},
"77187"
:
{
"logprob"
:
-4.608608722686768
,
"rank"
:
4
,
"decoded_token"
:
"-filled"
},
"114525"
:
{
"logprob"
:
-4.858608722686768
,
"rank"
:
5
,
"decoded_token"
:
"-covered"
}},
{
"21283"
:
{
"logprob"
:
-0.002785732736811042
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-6.252785682678223
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-7.627785682678223
,
"rank"
:
3
,
"decoded_token"
:
","
},
"26549"
:
{
"logprob"
:
-8.627785682678223
,
"rank"
:
4
,
"decoded_token"
:
" gray"
},
"34052"
:
{
"logprob"
:
-9.377785682678223
,
"rank"
:
5
,
"decoded_token"
:
" grey"
}},
{
"1046"
:
{
"logprob"
:
-0.047878943383693695
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1044"
:
{
"logprob"
:
-3.1728789806365967
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-5.547878742218018
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1338"
:
{
"logprob"
:
-7.172878742218018
,
"rank"
:
4
,
"decoded_token"
:
".
\n\n
"
},
"1294"
:
{
"logprob"
:
-9.172879219055176
,
"rank"
:
5
,
"decoded_token"
:
" in"
}},
{
"2"
:
{
"logprob"
:
-1.3351351299206726e-05
,
"rank"
:
1
,
"decoded_token"
:
"</s>"
},
"1032"
:
{
"logprob"
:
-11.25001335144043
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1256"
:
{
"logprob"
:
-16.00001335144043
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1319"
:
{
"logprob"
:
-17.25001335144043
,
"rank"
:
4
,
"decoded_token"
:
" ("
},
"1766"
:
{
"logprob"
:
-18.50001335144043
,
"rank"
:
5
,
"decoded_token"
:
" ["
}}]],
[[
1049
,
1046
,
1349
,
7244
,
10575
,
53048
,
41132
,
3923
,
1408
,
1261
,
32656
,
11237
,
1626
,
1050
,
1046
,
1349
,
15375
,
24361
,
4521
,
94973
,
5669
,
1278
,
48932
,
2425
,
1261
,
16152
,
1121
,
21283
,
1626
,
1051
,
1046
,
8342
,
71284
,
7377
,
1394
,
22140
,
1294
,
1278
,
27208
,
1513
,
97558
,
1626
,
1052
,
1046
,
1349
,
53301
,
59396
,
3549
,
13335
,
2645
,
1261
,
1295
,
3506
,
11223
,
12097
,
1046
,
2
],
"1. A black dog sits attentively on a wooden floor.
\n
2. A vast mountain range stretches across the horizon under a cloudy sky.
\n
3. Surfers wait for waves in the ocean at sunset.
\n
4. A winding gravel path leads through a lush green park."
,
[{
"1049"
:
{
"logprob"
:
-0.05001257359981537
,
"rank"
:
1
,
"decoded_token"
:
"1"
},
"1045"
:
{
"logprob"
:
-3.1750125885009766
,
"rank"
:
2
,
"decoded_token"
:
"-"
},
"69957"
:
{
"logprob"
:
-5.925012588500977
,
"rank"
:
3
,
"decoded_token"
:
"Sure"
},
"11745"
:
{
"logprob"
:
-6.425012588500977
,
"rank"
:
4
,
"decoded_token"
:
"Here"
},
"1065"
:
{
"logprob"
:
-6.425012588500977
,
"rank"
:
5
,
"decoded_token"
:
"A"
}},
{
"1046"
:
{
"logprob"
:
-8.702239938429557e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1058"
:
{
"logprob"
:
-12.000008583068848
,
"rank"
:
2
,
"decoded_token"
:
":"
},
"3590"
:
{
"logprob"
:
-13.375008583068848
,
"rank"
:
3
,
"decoded_token"
:
".A"
},
"1041"
:
{
"logprob"
:
-14.750008583068848
,
"rank"
:
4
,
"decoded_token"
:
")"
},
"1065"
:
{
"logprob"
:
-15.687508583068848
,
"rank"
:
5
,
"decoded_token"
:
"A"
}},
{
"1349"
:
{
"logprob"
:
-0.14196155965328217
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"1429"
:
{
"logprob"
:
-2.2669615745544434
,
"rank"
:
2
,
"decoded_token"
:
"
\"
"
},
"1531"
:
{
"logprob"
:
-4.516961574554443
,
"rank"
:
3
,
"decoded_token"
:
" The"
},
"11967"
:
{
"logprob"
:
-4.516961574554443
,
"rank"
:
4
,
"decoded_token"
:
" Image"
},
"1603"
:
{
"logprob"
:
-5.391961574554443
,
"rank"
:
5
,
"decoded_token"
:
" **"
}},
{
"7244"
:
{
"logprob"
:
-0.14889711141586304
,
"rank"
:
1
,
"decoded_token"
:
" black"
},
"68076"
:
{
"logprob"
:
-3.398897171020508
,
"rank"
:
2
,
"decoded_token"
:
" cute"
},
"6231"
:
{
"logprob"
:
-3.961397171020508
,
"rank"
:
3
,
"decoded_token"
:
" close"
},
"38462"
:
{
"logprob"
:
-4.273897171020508
,
"rank"
:
4
,
"decoded_token"
:
" curious"
},
"4329"
:
{
"logprob"
:
-4.398897171020508
,
"rank"
:
5
,
"decoded_token"
:
" large"
}},
{
"10575"
:
{
"logprob"
:
-0.12091328203678131
,
"rank"
:
1
,
"decoded_token"
:
" dog"
},
"116572"
:
{
"logprob"
:
-2.37091326713562
,
"rank"
:
2
,
"decoded_token"
:
" puppy"
},
"119075"
:
{
"logprob"
:
-3.99591326713562
,
"rank"
:
3
,
"decoded_token"
:
" Labrador"
},
"15812"
:
{
"logprob"
:
-7.683413505554199
,
"rank"
:
4
,
"decoded_token"
:
" Lab"
},
"8636"
:
{
"logprob"
:
-7.808413505554199
,
"rank"
:
5
,
"decoded_token"
:
" lab"
}},
{
"53048"
:
{
"logprob"
:
-0.8691943287849426
,
"rank"
:
1
,
"decoded_token"
:
" sits"
},
"1454"
:
{
"logprob"
:
-1.1191942691802979
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"1395"
:
{
"logprob"
:
-2.431694269180298
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"18970"
:
{
"logprob"
:
-2.744194269180298
,
"rank"
:
4
,
"decoded_token"
:
" sitting"
},
"22524"
:
{
"logprob"
:
-3.681694269180298
,
"rank"
:
5
,
"decoded_token"
:
" lies"
}},
{
"41132"
:
{
"logprob"
:
-0.5939557552337646
,
"rank"
:
1
,
"decoded_token"
:
" attent"
},
"106534"
:
{
"logprob"
:
-1.2814557552337646
,
"rank"
:
2
,
"decoded_token"
:
" calmly"
},
"12276"
:
{
"logprob"
:
-2.8439557552337646
,
"rank"
:
3
,
"decoded_token"
:
" alert"
},
"1408"
:
{
"logprob"
:
-2.8439557552337646
,
"rank"
:
4
,
"decoded_token"
:
" on"
},
"6482"
:
{
"logprob"
:
-4.968955993652344
,
"rank"
:
5
,
"decoded_token"
:
" patient"
}},
{
"3923"
:
{
"logprob"
:
-0.00010084597306558862
,
"rank"
:
1
,
"decoded_token"
:
"ively"
},
"1556"
:
{
"logprob"
:
-9.500101089477539
,
"rank"
:
2
,
"decoded_token"
:
"ive"
},
"6655"
:
{
"logprob"
:
-10.875101089477539
,
"rank"
:
3
,
"decoded_token"
:
"atively"
},
"3929"
:
{
"logprob"
:
-13.000101089477539
,
"rank"
:
4
,
"decoded_token"
:
"ently"
},
"47885"
:
{
"logprob"
:
-13.750101089477539
,
"rank"
:
5
,
"decoded_token"
:
"edly"
}},
{
"1408"
:
{
"logprob"
:
-0.056158196181058884
,
"rank"
:
1
,
"decoded_token"
:
" on"
},
"3675"
:
{
"logprob"
:
-3.6811583042144775
,
"rank"
:
2
,
"decoded_token"
:
" against"
},
"1454"
:
{
"logprob"
:
-4.306158065795898
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1294"
:
{
"logprob"
:
-5.181158065795898
,
"rank"
:
4
,
"decoded_token"
:
" in"
},
"7283"
:
{
"logprob"
:
-5.431158065795898
,
"rank"
:
5
,
"decoded_token"
:
" looking"
}},
{
"1261"
:
{
"logprob"
:
-0.33056098222732544
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"32656"
:
{
"logprob"
:
-1.3305609226226807
,
"rank"
:
2
,
"decoded_token"
:
" wooden"
},
"17253"
:
{
"logprob"
:
-4.70556116104126
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"44130"
:
{
"logprob"
:
-5.83056116104126
,
"rank"
:
4
,
"decoded_token"
:
" rust"
},
"12603"
:
{
"logprob"
:
-6.58056116104126
,
"rank"
:
5
,
"decoded_token"
:
" wood"
}},
{
"32656"
:
{
"logprob"
:
-0.07081110030412674
,
"rank"
:
1
,
"decoded_token"
:
" wooden"
},
"44130"
:
{
"logprob"
:
-2.9458110332489014
,
"rank"
:
2
,
"decoded_token"
:
" rust"
},
"17253"
:
{
"logprob"
:
-4.6958112716674805
,
"rank"
:
3
,
"decoded_token"
:
" weather"
},
"12603"
:
{
"logprob"
:
-5.8208112716674805
,
"rank"
:
4
,
"decoded_token"
:
" wood"
},
"3403"
:
{
"logprob"
:
-6.0708112716674805
,
"rank"
:
5
,
"decoded_token"
:
" text"
}},
{
"11237"
:
{
"logprob"
:
-0.6428436636924744
,
"rank"
:
1
,
"decoded_token"
:
" floor"
},
"4691"
:
{
"logprob"
:
-1.0178437232971191
,
"rank"
:
2
,
"decoded_token"
:
" surface"
},
"7042"
:
{
"logprob"
:
-2.642843723297119
,
"rank"
:
3
,
"decoded_token"
:
" background"
},
"28984"
:
{
"logprob"
:
-3.517843723297119
,
"rank"
:
4
,
"decoded_token"
:
" deck"
},
"92504"
:
{
"logprob"
:
-6.017843723297119
,
"rank"
:
5
,
"decoded_token"
:
" backdrop"
}},
{
"1626"
:
{
"logprob"
:
-0.7337945103645325
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-0.8587945103645325
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-3.3587944507598877
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"7283"
:
{
"logprob"
:
-3.6087944507598877
,
"rank"
:
4
,
"decoded_token"
:
" looking"
},
"1321"
:
{
"logprob"
:
-4.108794689178467
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1050"
:
{
"logprob"
:
-1.0132738680113107e-05
,
"rank"
:
1
,
"decoded_token"
:
"2"
},
"1051"
:
{
"logprob"
:
-11.75001049041748
,
"rank"
:
2
,
"decoded_token"
:
"3"
},
"1256"
:
{
"logprob"
:
-14.00001049041748
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1049"
:
{
"logprob"
:
-14.62501049041748
,
"rank"
:
4
,
"decoded_token"
:
"1"
},
"1032"
:
{
"logprob"
:
-14.62501049041748
,
"rank"
:
5
,
"decoded_token"
:
" "
}},
{
"1046"
:
{
"logprob"
:
-2.861018856492592e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.43750286102295
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"4700"
:
{
"logprob"
:
-15.37500286102295
,
"rank"
:
3
,
"decoded_token"
:
".M"
},
"1626"
:
{
"logprob"
:
-15.37500286102295
,
"rank"
:
4
,
"decoded_token"
:
".
\n
"
},
"3051"
:
{
"logprob"
:
-15.87500286102295
,
"rank"
:
5
,
"decoded_token"
:
".S"
}},
{
"1349"
:
{
"logprob"
:
-0.6794427633285522
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"11826"
:
{
"logprob"
:
-1.9294427633285522
,
"rank"
:
2
,
"decoded_token"
:
" Maj"
},
"37159"
:
{
"logprob"
:
-2.116942882537842
,
"rank"
:
3
,
"decoded_token"
:
" Snow"
},
"27260"
:
{
"logprob"
:
-2.616942882537842
,
"rank"
:
4
,
"decoded_token"
:
" Mountain"
},
"113465"
:
{
"logprob"
:
-2.866942882537842
,
"rank"
:
5
,
"decoded_token"
:
" Rug"
}},
{
"15375"
:
{
"logprob"
:
-0.9194075465202332
,
"rank"
:
1
,
"decoded_token"
:
" vast"
},
"10726"
:
{
"logprob"
:
-2.294407606124878
,
"rank"
:
2
,
"decoded_token"
:
" scen"
},
"4521"
:
{
"logprob"
:
-2.356907606124878
,
"rank"
:
3
,
"decoded_token"
:
" range"
},
"122203"
:
{
"logprob"
:
-2.419407606124878
,
"rank"
:
4
,
"decoded_token"
:
" rugged"
},
"61082"
:
{
"logprob"
:
-2.856907606124878
,
"rank"
:
5
,
"decoded_token"
:
" panor"
}},
{
"24361"
:
{
"logprob"
:
-0.5804797410964966
,
"rank"
:
1
,
"decoded_token"
:
" mountain"
},
"127945"
:
{
"logprob"
:
-1.8304797410964966
,
"rank"
:
2
,
"decoded_token"
:
" mountainous"
},
"28035"
:
{
"logprob"
:
-2.455479621887207
,
"rank"
:
3
,
"decoded_token"
:
" landscape"
},
"4521"
:
{
"logprob"
:
-2.455479621887207
,
"rank"
:
4
,
"decoded_token"
:
" range"
},
"1044"
:
{
"logprob"
:
-2.705479621887207
,
"rank"
:
5
,
"decoded_token"
:
","
}},
{
"4521"
:
{
"logprob"
:
-0.0493546724319458
,
"rank"
:
1
,
"decoded_token"
:
" range"
},
"28035"
:
{
"logprob"
:
-3.0493545532226562
,
"rank"
:
2
,
"decoded_token"
:
" landscape"
},
"37691"
:
{
"logprob"
:
-8.424354553222656
,
"rank"
:
3
,
"decoded_token"
:
" valley"
},
"13327"
:
{
"logprob"
:
-9.049354553222656
,
"rank"
:
4
,
"decoded_token"
:
" scene"
},
"3719"
:
{
"logprob"
:
-9.799354553222656
,
"rank"
:
5
,
"decoded_token"
:
" view"
}},
{
"94973"
:
{
"logprob"
:
-0.6676871180534363
,
"rank"
:
1
,
"decoded_token"
:
" stretches"
},
"2425"
:
{
"logprob"
:
-1.792687177658081
,
"rank"
:
2
,
"decoded_token"
:
" under"
},
"1395"
:
{
"logprob"
:
-2.292687177658081
,
"rank"
:
3
,
"decoded_token"
:
" is"
},
"1454"
:
{
"logprob"
:
-2.730187177658081
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"7038"
:
{
"logprob"
:
-3.292687177658081
,
"rank"
:
5
,
"decoded_token"
:
" extends"
}},
{
"5669"
:
{
"logprob"
:
-0.4542117118835449
,
"rank"
:
1
,
"decoded_token"
:
" across"
},
"2425"
:
{
"logprob"
:
-1.454211711883545
,
"rank"
:
2
,
"decoded_token"
:
" under"
},
"1848"
:
{
"logprob"
:
-2.454211711883545
,
"rank"
:
3
,
"decoded_token"
:
" out"
},
"2203"
:
{
"logprob"
:
-4.204211711883545
,
"rank"
:
4
,
"decoded_token"
:
" into"
},
"25136"
:
{
"logprob"
:
-4.641711711883545
,
"rank"
:
5
,
"decoded_token"
:
" beneath"
}},
{
"1278"
:
{
"logprob"
:
-0.23009441792964935
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-1.6050944328308105
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-5.6050944328308105
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"2425"
:
{
"logprob"
:
-7.2300944328308105
,
"rank"
:
4
,
"decoded_token"
:
" under"
},
"1454"
:
{
"logprob"
:
-10.167593955993652
,
"rank"
:
5
,
"decoded_token"
:
" with"
}},
{
"48932"
:
{
"logprob"
:
-0.3072167932987213
,
"rank"
:
1
,
"decoded_token"
:
" horizon"
},
"21283"
:
{
"logprob"
:
-1.932216763496399
,
"rank"
:
2
,
"decoded_token"
:
" sky"
},
"3937"
:
{
"logprob"
:
-3.1822168827056885
,
"rank"
:
3
,
"decoded_token"
:
" image"
},
"28035"
:
{
"logprob"
:
-3.6822168827056885
,
"rank"
:
4
,
"decoded_token"
:
" landscape"
},
"3044"
:
{
"logprob"
:
-3.6822168827056885
,
"rank"
:
5
,
"decoded_token"
:
" sk"
}},
{
"2425"
:
{
"logprob"
:
-0.2914469838142395
,
"rank"
:
1
,
"decoded_token"
:
" under"
},
"1044"
:
{
"logprob"
:
-2.4164469242095947
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1454"
:
{
"logprob"
:
-2.5414469242095947
,
"rank"
:
3
,
"decoded_token"
:
" with"
},
"1626"
:
{
"logprob"
:
-3.7914469242095947
,
"rank"
:
4
,
"decoded_token"
:
".
\n
"
},
"1408"
:
{
"logprob"
:
-3.7914469242095947
,
"rank"
:
5
,
"decoded_token"
:
" on"
}},
{
"1261"
:
{
"logprob"
:
-0.0460360012948513
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-3.9210360050201416
,
"rank"
:
2
,
"decoded_token"
:
" an"
},
"16152"
:
{
"logprob"
:
-4.1085357666015625
,
"rank"
:
3
,
"decoded_token"
:
" cloud"
},
"2136"
:
{
"logprob"
:
-6.1710357666015625
,
"rank"
:
4
,
"decoded_token"
:
" over"
},
"6133"
:
{
"logprob"
:
-6.4210357666015625
,
"rank"
:
5
,
"decoded_token"
:
" clear"
}},
{
"16152"
:
{
"logprob"
:
-0.20367540419101715
,
"rank"
:
1
,
"decoded_token"
:
" cloud"
},
"6133"
:
{
"logprob"
:
-2.8286755084991455
,
"rank"
:
2
,
"decoded_token"
:
" clear"
},
"27254"
:
{
"logprob"
:
-3.5161755084991455
,
"rank"
:
3
,
"decoded_token"
:
" partly"
},
"18416"
:
{
"logprob"
:
-3.8286755084991455
,
"rank"
:
4
,
"decoded_token"
:
" haz"
},
"4391"
:
{
"logprob"
:
-4.328675270080566
,
"rank"
:
5
,
"decoded_token"
:
" light"
}},
{
"1121"
:
{
"logprob"
:
-0.05241352692246437
,
"rank"
:
1
,
"decoded_token"
:
"y"
},
"1286"
:
{
"logprob"
:
-3.8024134635925293
,
"rank"
:
2
,
"decoded_token"
:
"ed"
},
"77187"
:
{
"logprob"
:
-4.552413463592529
,
"rank"
:
3
,
"decoded_token"
:
"-filled"
},
"4527"
:
{
"logprob"
:
-4.802413463592529
,
"rank"
:
4
,
"decoded_token"
:
"less"
},
"114525"
:
{
"logprob"
:
-4.927413463592529
,
"rank"
:
5
,
"decoded_token"
:
"-covered"
}},
{
"21283"
:
{
"logprob"
:
-0.0003716255014296621
,
"rank"
:
1
,
"decoded_token"
:
" sky"
},
"10991"
:
{
"logprob"
:
-8.750371932983398
,
"rank"
:
2
,
"decoded_token"
:
" blue"
},
"1044"
:
{
"logprob"
:
-9.375371932983398
,
"rank"
:
3
,
"decoded_token"
:
","
},
"26549"
:
{
"logprob"
:
-10.375371932983398
,
"rank"
:
4
,
"decoded_token"
:
" gray"
},
"34052"
:
{
"logprob"
:
-11.250371932983398
,
"rank"
:
5
,
"decoded_token"
:
" grey"
}},
{
"1626"
:
{
"logprob"
:
-0.00012730741582345217
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-9.500126838684082
,
"rank"
:
2
,
"decoded_token"
:
","
},
"1046"
:
{
"logprob"
:
-10.500126838684082
,
"rank"
:
3
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-10.875126838684082
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"1294"
:
{
"logprob"
:
-13.250126838684082
,
"rank"
:
5
,
"decoded_token"
:
" in"
}},
{
"1051"
:
{
"logprob"
:
-3.2186455882765586e-06
,
"rank"
:
1
,
"decoded_token"
:
"3"
},
"1052"
:
{
"logprob"
:
-12.75000286102295
,
"rank"
:
2
,
"decoded_token"
:
"4"
},
"1050"
:
{
"logprob"
:
-15.00000286102295
,
"rank"
:
3
,
"decoded_token"
:
"2"
},
"1049"
:
{
"logprob"
:
-16.937503814697266
,
"rank"
:
4
,
"decoded_token"
:
"1"
},
"1032"
:
{
"logprob"
:
-17.875003814697266
,
"rank"
:
5
,
"decoded_token"
:
" "
}},
{
"1046"
:
{
"logprob"
:
-1.6689286894688848e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-14.687501907348633
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"5226"
:
{
"logprob"
:
-15.687501907348633
,
"rank"
:
3
,
"decoded_token"
:
".D"
},
"6847"
:
{
"logprob"
:
-15.812501907348633
,
"rank"
:
4
,
"decoded_token"
:
".T"
},
"48426"
:
{
"logprob"
:
-16.812501907348633
,
"rank"
:
5
,
"decoded_token"
:
".The"
}},
{
"8342"
:
{
"logprob"
:
-0.5730464458465576
,
"rank"
:
1
,
"decoded_token"
:
" Sur"
},
"1349"
:
{
"logprob"
:
-1.6980464458465576
,
"rank"
:
2
,
"decoded_token"
:
" A"
},
"22468"
:
{
"logprob"
:
-2.5730464458465576
,
"rank"
:
3
,
"decoded_token"
:
" Several"
},
"1488"
:
{
"logprob"
:
-2.6980464458465576
,
"rank"
:
4
,
"decoded_token"
:
" W"
},
"15035"
:
{
"logprob"
:
-3.1980464458465576
,
"rank"
:
5
,
"decoded_token"
:
" People"
}},
{
"71284"
:
{
"logprob"
:
-0.0033258858602494
,
"rank"
:
1
,
"decoded_token"
:
"fers"
},
"1102"
:
{
"logprob"
:
-5.878325939178467
,
"rank"
:
2
,
"decoded_token"
:
"f"
},
"1726"
:
{
"logprob"
:
-7.628325939178467
,
"rank"
:
3
,
"decoded_token"
:
"fer"
},
"61888"
:
{
"logprob"
:
-12.253325462341309
,
"rank"
:
4
,
"decoded_token"
:
"fline"
},
"2119"
:
{
"logprob"
:
-13.003325462341309
,
"rank"
:
5
,
"decoded_token"
:
"fter"
}},
{
"7377"
:
{
"logprob"
:
-1.4996429681777954
,
"rank"
:
1
,
"decoded_token"
:
" wait"
},
"1584"
:
{
"logprob"
:
-1.7496429681777954
,
"rank"
:
2
,
"decoded_token"
:
" are"
},
"88014"
:
{
"logprob"
:
-1.9371429681777954
,
"rank"
:
3
,
"decoded_token"
:
" paddle"
},
"1294"
:
{
"logprob"
:
-1.9371429681777954
,
"rank"
:
4
,
"decoded_token"
:
" in"
},
"24434"
:
{
"logprob"
:
-2.187142848968506
,
"rank"
:
5
,
"decoded_token"
:
" ride"
}},
{
"1394"
:
{
"logprob"
:
-0.6126739382743835
,
"rank"
:
1
,
"decoded_token"
:
" for"
},
"1294"
:
{
"logprob"
:
-0.9876739382743835
,
"rank"
:
2
,
"decoded_token"
:
" in"
},
"1408"
:
{
"logprob"
:
-2.7376739978790283
,
"rank"
:
3
,
"decoded_token"
:
" on"
},
"6482"
:
{
"logprob"
:
-4.425173759460449
,
"rank"
:
4
,
"decoded_token"
:
" patient"
},
"1321"
:
{
"logprob"
:
-5.612673759460449
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"22140"
:
{
"logprob"
:
-0.00729279313236475
,
"rank"
:
1
,
"decoded_token"
:
" waves"
},
"1278"
:
{
"logprob"
:
-5.632292747497559
,
"rank"
:
2
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-5.757292747497559
,
"rank"
:
3
,
"decoded_token"
:
" a"
},
"39460"
:
{
"logprob"
:
-8.257292747497559
,
"rank"
:
4
,
"decoded_token"
:
" incoming"
},
"1321"
:
{
"logprob"
:
-9.757292747497559
,
"rank"
:
5
,
"decoded_token"
:
" and"
}},
{
"1294"
:
{
"logprob"
:
-0.3071398138999939
,
"rank"
:
1
,
"decoded_token"
:
" in"
},
"1408"
:
{
"logprob"
:
-2.1821398735046387
,
"rank"
:
2
,
"decoded_token"
:
" on"
},
"1513"
:
{
"logprob"
:
-2.4321398735046387
,
"rank"
:
3
,
"decoded_token"
:
" at"
},
"3016"
:
{
"logprob"
:
-3.6821398735046387
,
"rank"
:
4
,
"decoded_token"
:
" while"
},
"1435"
:
{
"logprob"
:
-3.8071398735046387
,
"rank"
:
5
,
"decoded_token"
:
" as"
}},
{
"1278"
:
{
"logprob"
:
-0.004646694287657738
,
"rank"
:
1
,
"decoded_token"
:
" the"
},
"1261"
:
{
"logprob"
:
-6.1921467781066895
,
"rank"
:
2
,
"decoded_token"
:
" a"
},
"1420"
:
{
"logprob"
:
-6.9421467781066895
,
"rank"
:
3
,
"decoded_token"
:
" an"
},
"40466"
:
{
"logprob"
:
-7.2546467781066895
,
"rank"
:
4
,
"decoded_token"
:
" shallow"
},
"26517"
:
{
"logprob"
:
-7.8796467781066895
,
"rank"
:
5
,
"decoded_token"
:
" calm"
}},
{
"27208"
:
{
"logprob"
:
-0.0658877044916153
,
"rank"
:
1
,
"decoded_token"
:
" ocean"
},
"7786"
:
{
"logprob"
:
-3.440887689590454
,
"rank"
:
2
,
"decoded_token"
:
" distance"
},
"5124"
:
{
"logprob"
:
-5.253387928009033
,
"rank"
:
3
,
"decoded_token"
:
" early"
},
"26517"
:
{
"logprob"
:
-5.315887928009033
,
"rank"
:
4
,
"decoded_token"
:
" calm"
},
"11196"
:
{
"logprob"
:
-5.378387928009033
,
"rank"
:
5
,
"decoded_token"
:
" sea"
}},
{
"1513"
:
{
"logprob"
:
-1.1504861116409302
,
"rank"
:
1
,
"decoded_token"
:
" at"
},
"1435"
:
{
"logprob"
:
-1.2754861116409302
,
"rank"
:
2
,
"decoded_token"
:
" as"
},
"3184"
:
{
"logprob"
:
-1.4004861116409302
,
"rank"
:
3
,
"decoded_token"
:
" during"
},
"3016"
:
{
"logprob"
:
-2.9004859924316406
,
"rank"
:
4
,
"decoded_token"
:
" while"
},
"6117"
:
{
"logprob"
:
-3.1504859924316406
,
"rank"
:
5
,
"decoded_token"
:
" near"
}},
{
"97558"
:
{
"logprob"
:
-0.12151996046304703
,
"rank"
:
1
,
"decoded_token"
:
" sunset"
},
"11729"
:
{
"logprob"
:
-2.8715200424194336
,
"rank"
:
2
,
"decoded_token"
:
" sun"
},
"1266"
:
{
"logprob"
:
-3.4965200424194336
,
"rank"
:
3
,
"decoded_token"
:
" d"
},
"54507"
:
{
"logprob"
:
-3.9965200424194336
,
"rank"
:
4
,
"decoded_token"
:
" dawn"
},
"1261"
:
{
"logprob"
:
-5.121520042419434
,
"rank"
:
5
,
"decoded_token"
:
" a"
}},
{
"1626"
:
{
"logprob"
:
-0.3073118329048157
,
"rank"
:
1
,
"decoded_token"
:
".
\n
"
},
"1044"
:
{
"logprob"
:
-2.182311773300171
,
"rank"
:
2
,
"decoded_token"
:
","
},
"3016"
:
{
"logprob"
:
-2.557311773300171
,
"rank"
:
3
,
"decoded_token"
:
" while"
},
"1454"
:
{
"logprob"
:
-3.432311773300171
,
"rank"
:
4
,
"decoded_token"
:
" with"
},
"6117"
:
{
"logprob"
:
-4.05731201171875
,
"rank"
:
5
,
"decoded_token"
:
" near"
}},
{
"1052"
:
{
"logprob"
:
-3.3378546504536644e-06
,
"rank"
:
1
,
"decoded_token"
:
"4"
},
"1051"
:
{
"logprob"
:
-13.25000286102295
,
"rank"
:
2
,
"decoded_token"
:
"3"
},
"1049"
:
{
"logprob"
:
-13.93750286102295
,
"rank"
:
3
,
"decoded_token"
:
"1"
},
"1053"
:
{
"logprob"
:
-14.43750286102295
,
"rank"
:
4
,
"decoded_token"
:
"5"
},
"1032"
:
{
"logprob"
:
-16.687503814697266
,
"rank"
:
5
,
"decoded_token"
:
" "
}},
{
"1046"
:
{
"logprob"
:
-1.6689286894688848e-06
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"3590"
:
{
"logprob"
:
-13.500001907348633
,
"rank"
:
2
,
"decoded_token"
:
".A"
},
"6847"
:
{
"logprob"
:
-16.437501907348633
,
"rank"
:
3
,
"decoded_token"
:
".T"
},
"1044"
:
{
"logprob"
:
-17.312501907348633
,
"rank"
:
4
,
"decoded_token"
:
","
},
"1349"
:
{
"logprob"
:
-17.375001907348633
,
"rank"
:
5
,
"decoded_token"
:
" A"
}},
{
"1349"
:
{
"logprob"
:
-0.004292916506528854
,
"rank"
:
1
,
"decoded_token"
:
" A"
},
"2048"
:
{
"logprob"
:
-5.629292964935303
,
"rank"
:
2
,
"decoded_token"
:
" An"
},
"10638"
:
{
"logprob"
:
-7.879292964935303
,
"rank"
:
3
,
"decoded_token"
:
" Two"
},
"111463"
:
{
"logprob"
:
-10.004292488098145
,
"rank"
:
4
,
"decoded_token"
:
" Trees"
},
"1531"
:
{
"logprob"
:
-10.879292488098145
,
"rank"
:
5
,
"decoded_token"
:
" The"
}},
{
"53301"
:
{
"logprob"
:
-1.5473321676254272
,
"rank"
:
1
,
"decoded_token"
:
" winding"
},
"15192"
:
{
"logprob"
:
-1.7348321676254272
,
"rank"
:
2
,
"decoded_token"
:
" narrow"
},
"47945"
:
{
"logprob"
:
-2.109832286834717
,
"rank"
:
3
,
"decoded_token"
:
" dirt"
},
"2169"
:
{
"logprob"
:
-2.609832286834717
,
"rank"
:
4
,
"decoded_token"
:
" ser"
},
"59396"
:
{
"logprob"
:
-2.672332286834717
,
"rank"
:
5
,
"decoded_token"
:
" gravel"
}},
{
"59396"
:
{
"logprob"
:
-0.8954829573631287
,
"rank"
:
1
,
"decoded_token"
:
" gravel"
},
"3549"
:
{
"logprob"
:
-1.1454830169677734
,
"rank"
:
2
,
"decoded_token"
:
" path"
},
"47945"
:
{
"logprob"
:
-1.6454830169677734
,
"rank"
:
3
,
"decoded_token"
:
" dirt"
},
"14801"
:
{
"logprob"
:
-3.2704830169677734
,
"rank"
:
4
,
"decoded_token"
:
" pathway"
},
"15551"
:
{
"logprob"
:
-4.270483016967773
,
"rank"
:
5
,
"decoded_token"
:
" stone"
}},
{
"3549"
:
{
"logprob"
:
-0.02117946185171604
,
"rank"
:
1
,
"decoded_token"
:
" path"
},
"14801"
:
{
"logprob"
:
-3.896179437637329
,
"rank"
:
2
,
"decoded_token"
:
" pathway"
},
"33659"
:
{
"logprob"
:
-8.14617919921875
,
"rank"
:
3
,
"decoded_token"
:
" trail"
},
"9480"
:
{
"logprob"
:
-9.64617919921875
,
"rank"
:
4
,
"decoded_token"
:
" road"
},
"7368"
:
{
"logprob"
:
-9.64617919921875
,
"rank"
:
5
,
"decoded_token"
:
"path"
}},
{
"13335"
:
{
"logprob"
:
-0.18962937593460083
,
"rank"
:
1
,
"decoded_token"
:
" leads"
},
"39985"
:
{
"logprob"
:
-2.752129316329956
,
"rank"
:
2
,
"decoded_token"
:
" cuts"
},
"1639"
:
{
"logprob"
:
-3.877129316329956
,
"rank"
:
3
,
"decoded_token"
:
" me"
},
"11500"
:
{
"logprob"
:
-3.939629316329956
,
"rank"
:
4
,
"decoded_token"
:
" runs"
},
"2645"
:
{
"logprob"
:
-4.189629554748535
,
"rank"
:
5
,
"decoded_token"
:
" through"
}},
{
"2645"
:
{
"logprob"
:
-0.05349981039762497
,
"rank"
:
1
,
"decoded_token"
:
" through"
},
"8994"
:
{
"logprob"
:
-4.053499698638916
,
"rank"
:
2
,
"decoded_token"
:
" towards"
},
"2396"
:
{
"logprob"
:
-4.303499698638916
,
"rank"
:
3
,
"decoded_token"
:
" between"
},
"2203"
:
{
"logprob"
:
-4.678499698638916
,
"rank"
:
4
,
"decoded_token"
:
" into"
},
"1317"
:
{
"logprob"
:
-5.678499698638916
,
"rank"
:
5
,
"decoded_token"
:
" to"
}},
{
"1261"
:
{
"logprob"
:
-0.017386287450790405
,
"rank"
:
1
,
"decoded_token"
:
" a"
},
"11223"
:
{
"logprob"
:
-4.892386436462402
,
"rank"
:
2
,
"decoded_token"
:
" green"
},
"1295"
:
{
"logprob"
:
-5.017386436462402
,
"rank"
:
3
,
"decoded_token"
:
" l"
},
"23170"
:
{
"logprob"
:
-6.642386436462402
,
"rank"
:
4
,
"decoded_token"
:
" grass"
},
"1420"
:
{
"logprob"
:
-7.267386436462402
,
"rank"
:
5
,
"decoded_token"
:
" an"
}},
{
"1295"
:
{
"logprob"
:
-0.9453322887420654
,
"rank"
:
1
,
"decoded_token"
:
" l"
},
"11223"
:
{
"logprob"
:
-1.3203322887420654
,
"rank"
:
2
,
"decoded_token"
:
" green"
},
"23170"
:
{
"logprob"
:
-1.9453322887420654
,
"rank"
:
3
,
"decoded_token"
:
" grass"
},
"12097"
:
{
"logprob"
:
-2.4453322887420654
,
"rank"
:
4
,
"decoded_token"
:
" park"
},
"26428"
:
{
"logprob"
:
-3.3203322887420654
,
"rank"
:
5
,
"decoded_token"
:
" garden"
}},
{
"3506"
:
{
"logprob"
:
-6.556489552167477e-06
,
"rank"
:
1
,
"decoded_token"
:
"ush"
},
"1374"
:
{
"logprob"
:
-12.000006675720215
,
"rank"
:
2
,
"decoded_token"
:
"us"
},
"90716"
:
{
"logprob"
:
-15.625006675720215
,
"rank"
:
3
,
"decoded_token"
:
"USH"
},
"16938"
:
{
"logprob"
:
-15.875006675720215
,
"rank"
:
4
,
"decoded_token"
:
"usher"
},
"13326"
:
{
"logprob"
:
-17.1875057220459
,
"rank"
:
5
,
"decoded_token"
:
"inden"
}},
{
"11223"
:
{
"logprob"
:
-0.3668670654296875
,
"rank"
:
1
,
"decoded_token"
:
" green"
},
"1044"
:
{
"logprob"
:
-1.3668670654296875
,
"rank"
:
2
,
"decoded_token"
:
","
},
"26428"
:
{
"logprob"
:
-3.4918670654296875
,
"rank"
:
3
,
"decoded_token"
:
" garden"
},
"12097"
:
{
"logprob"
:
-4.1168670654296875
,
"rank"
:
4
,
"decoded_token"
:
" park"
},
"23170"
:
{
"logprob"
:
-5.8668670654296875
,
"rank"
:
5
,
"decoded_token"
:
" grass"
}},
{
"12097"
:
{
"logprob"
:
-0.5530153512954712
,
"rank"
:
1
,
"decoded_token"
:
" park"
},
"3727"
:
{
"logprob"
:
-2.0530152320861816
,
"rank"
:
2
,
"decoded_token"
:
" field"
},
"28035"
:
{
"logprob"
:
-2.1780152320861816
,
"rank"
:
3
,
"decoded_token"
:
" landscape"
},
"26428"
:
{
"logprob"
:
-2.3030152320861816
,
"rank"
:
4
,
"decoded_token"
:
" garden"
},
"4457"
:
{
"logprob"
:
-2.8030152320861816
,
"rank"
:
5
,
"decoded_token"
:
" area"
}},
{
"1046"
:
{
"logprob"
:
-0.7924000024795532
,
"rank"
:
1
,
"decoded_token"
:
"."
},
"1454"
:
{
"logprob"
:
-1.2924000024795532
,
"rank"
:
2
,
"decoded_token"
:
" with"
},
"8994"
:
{
"logprob"
:
-2.7923998832702637
,
"rank"
:
3
,
"decoded_token"
:
" towards"
},
"54410"
:
{
"logprob"
:
-3.5423998832702637
,
"rank"
:
4
,
"decoded_token"
:
" lined"
},
"2425"
:
{
"logprob"
:
-3.5423998832702637
,
"rank"
:
5
,
"decoded_token"
:
" under"
}},
{
"2"
:
{
"logprob"
:
-1.9073468138230965e-06
,
"rank"
:
1
,
"decoded_token"
:
"</s>"
},
"1032"
:
{
"logprob"
:
-13.250001907348633
,
"rank"
:
2
,
"decoded_token"
:
" "
},
"1256"
:
{
"logprob"
:
-16.250001907348633
,
"rank"
:
3
,
"decoded_token"
:
" "
},
"1293"
:
{
"logprob"
:
-19.000001907348633
,
"rank"
:
4
,
"decoded_token"
:
" "
},
"1319"
:
{
"logprob"
:
-20.000001907348633
,
"rank"
:
5
,
"decoded_token"
:
" ("
}}]]]
\ No newline at end of file
tests/models/multimodal/processing/test_common.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
copy
from
functools
import
partial
from
functools
import
partial
from
typing
import
Optional
,
Union
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
from
mistral_common.protocol.instruct.messages
import
(
ImageChunk
,
TextChunk
,
UserMessage
)
from
mistral_common.protocol.instruct.request
import
ChatCompletionRequest
from
PIL
import
Image
from
PIL
import
Image
from
transformers
import
PreTrainedTokenizer
,
PreTrainedTokenizerFast
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
from
vllm.inputs
import
InputProcessingContext
from
vllm.inputs
import
InputProcessingContext
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalDataDict
from
vllm.multimodal.processing
import
ProcessingCache
from
vllm.multimodal.inputs
import
MultiModalInputs
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
,
ProcessingCache
from
vllm.transformers_utils.tokenizer
import
(
MistralTokenizer
,
cached_tokenizer_from_config
)
from
....multimodal.utils
import
random_audio
,
random_image
,
random_video
from
....multimodal.utils
import
random_audio
,
random_image
,
random_video
from
...registry
import
HF_EXAMPLE_MODELS
from
...registry
import
HF_EXAMPLE_MODELS
...
@@ -21,6 +29,7 @@ def _test_processing_correctness(
...
@@ -21,6 +29,7 @@ def _test_processing_correctness(
hit_rate
:
float
,
hit_rate
:
float
,
num_batches
:
int
,
num_batches
:
int
,
simplify_rate
:
float
,
simplify_rate
:
float
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
):
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model_id
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model_id
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
...
@@ -29,8 +38,8 @@ def _test_processing_correctness(
...
@@ -29,8 +38,8 @@ def _test_processing_correctness(
model_config
=
ModelConfig
(
model_config
=
ModelConfig
(
model_id
,
model_id
,
task
=
"auto"
,
task
=
"auto"
,
tokenizer
=
model_id
,
tokenizer
=
model_info
.
tokenizer
or
model_id
,
tokenizer_mode
=
"auto"
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
trust_remote_code
=
model_info
.
trust_remote_code
,
trust_remote_code
=
model_info
.
trust_remote_code
,
seed
=
0
,
seed
=
0
,
dtype
=
"float16"
,
dtype
=
"float16"
,
...
@@ -45,7 +54,7 @@ def _test_processing_correctness(
...
@@ -45,7 +54,7 @@ def _test_processing_correctness(
tokenizer
=
cached_tokenizer_from_config
(
model_config
),
tokenizer
=
cached_tokenizer_from_config
(
model_config
),
)
)
# Ensure that it can fit all of the data
# Ensure that it can fit all of the data
cache
=
ProcessingCache
(
capacity
=
1
<<
30
)
cache
=
ProcessingCache
(
capacity
_gb
=
2048
)
processing_info
=
factories
.
info
(
ctx
)
processing_info
=
factories
.
info
(
ctx
)
supported_mm_limits
=
processing_info
.
get_supported_mm_limits
()
supported_mm_limits
=
processing_info
.
get_supported_mm_limits
()
...
@@ -82,14 +91,6 @@ def _test_processing_correctness(
...
@@ -82,14 +91,6 @@ def _test_processing_correctness(
partial
(
random_audio
,
rng
,
min_len
=
512
,
max_len
=
1024
,
sr
=
16000
),
partial
(
random_audio
,
rng
,
min_len
=
512
,
max_len
=
1024
,
sr
=
16000
),
}
}
tokenizer_encode_kwargs
=
{}
if
model_config
.
hf_config
.
model_type
==
"mllama"
:
# For Mllama, tokenizer will always add bos_token at the beginning of
# prompt by default, causing hf_processor outputs incorrect token ids.
# So we need use `add_special_tokens=False` here to leave bos_token
# to be added by the processor.
tokenizer_encode_kwargs
=
{
"add_special_tokens"
:
False
}
for
batch_idx
in
range
(
num_batches
):
for
batch_idx
in
range
(
num_batches
):
mm_data
=
{
mm_data
=
{
k
:
k
:
...
@@ -112,37 +113,131 @@ def _test_processing_correctness(
...
@@ -112,37 +113,131 @@ def _test_processing_correctness(
elif
len
(
mm_data
[
k
])
==
1
:
elif
len
(
mm_data
[
k
])
==
1
:
mm_data
[
k
]
=
mm_data
[
k
][
0
]
mm_data
[
k
]
=
mm_data
[
k
][
0
]
baseline_result
=
baseline_processor
.
apply
(
if
isinstance
(
tokenizer
,
MistralTokenizer
):
prompt
,
_test_processing_correctness_mistral
(
mm_data
=
mm_data
,
model_config
,
hf_processor_mm_kwargs
=
{},
tokenizer
,
)
prompt
,
cached_result
=
cached_processor
.
apply
(
mm_data
,
prompt
,
baseline_processor
,
mm_data
=
mm_data
,
cached_processor
,
hf_processor_mm_kwargs
=
{},
batch_idx
,
)
ignore_mm_keys
=
ignore_mm_keys
,
)
else
:
_test_processing_correctness_hf
(
model_config
,
tokenizer
,
prompt
,
mm_data
,
baseline_processor
,
cached_processor
,
batch_idx
,
ignore_mm_keys
=
ignore_mm_keys
,
)
def
_test_processing_correctness_hf
(
model_config
:
ModelConfig
,
tokenizer
:
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
],
prompt
:
str
,
mm_data
:
MultiModalDataDict
,
baseline_processor
:
BaseMultiModalProcessor
,
cached_processor
:
BaseMultiModalProcessor
,
batch_idx
:
int
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
if
model_config
.
hf_config
.
model_type
in
(
"mllama"
,
"whisper"
,
"ultravox"
):
# For some multimodal models, tokenizer will always add bos_token
# at the beginning of prompt by default, causing hf_processor outputs
# incorrect token ids. So we need use `add_special_tokens=False` here
# to leave bos_token to be added by the processor.
token_prompt
=
tokenizer
.
encode
(
prompt
,
add_special_tokens
=
False
)
else
:
token_prompt
=
tokenizer
.
encode
(
prompt
)
baseline_result
=
baseline_processor
.
apply
(
prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_result
=
cached_processor
.
apply
(
prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
assert
_inputs_equal
(
baseline_result
,
cached_result
,
ignore_mm_keys
,
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
assert
baseline_result
==
cached_result
,
(
baseline_tokenized_result
=
baseline_processor
.
apply
(
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
)
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
baseline_tokenized_result
=
baseline_processor
.
apply
(
assert
_inputs_equal
(
tokenizer
.
encode
(
prompt
,
**
tokenizer_encode_kwargs
)
,
baseline_result
,
mm_data
=
mm_data
,
baseline_tokenized_result
,
hf_processor_mm_kwargs
=
{}
,
ignore_mm_keys
,
)
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
assert
baseline_result
==
baseline_tokenized_result
,
(
cached_tokenized_result
=
cached_processor
.
apply
(
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
)
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_tokenized_result
=
cached_processor
.
apply
(
assert
_inputs_equal
(
tokenizer
.
encode
(
prompt
,
**
tokenizer_encode_kwargs
)
,
cached_result
,
mm_data
=
mm_data
,
cached_tokenized_result
,
hf_processor_mm_kwargs
=
{}
,
ignore_mm_keys
,
)
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
assert
cached_result
==
cached_tokenized_result
,
(
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
)
def
_test_processing_correctness_mistral
(
model_config
:
ModelConfig
,
tokenizer
:
MistralTokenizer
,
prompt
:
str
,
mm_data
:
MultiModalDataDict
,
baseline_processor
:
BaseMultiModalProcessor
,
cached_processor
:
BaseMultiModalProcessor
,
batch_idx
:
int
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
images
=
mm_data
.
get
(
"image"
,
[])
if
not
isinstance
(
images
,
list
):
images
=
[
images
]
request
=
ChatCompletionRequest
(
messages
=
[
UserMessage
(
content
=
[
TextChunk
(
text
=
prompt
),
*
(
ImageChunk
(
image
=
image
)
for
image
in
images
),
]),
])
res
=
tokenizer
.
mistral
.
encode_chat_completion
(
request
)
token_prompt
=
res
.
tokens
# Mistral chat outputs tokens directly, rather than text prompts
baseline_tokenized_result
=
baseline_processor
.
apply
(
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
cached_tokenized_result
=
cached_processor
.
apply
(
token_prompt
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
)
assert
_inputs_equal
(
baseline_tokenized_result
,
cached_tokenized_result
,
ignore_mm_keys
,
),
f
"Failed (
{
batch_idx
=
}
,
{
prompt
=
}
,
{
mm_data
=
}
)"
# yapf: disable
# yapf: disable
...
@@ -151,7 +246,9 @@ def _test_processing_correctness(
...
@@ -151,7 +246,9 @@ def _test_processing_correctness(
"Salesforce/blip2-opt-2.7b"
,
"Salesforce/blip2-opt-2.7b"
,
"facebook/chameleon-7b"
,
"facebook/chameleon-7b"
,
"deepseek-ai/deepseek-vl2-tiny"
,
"deepseek-ai/deepseek-vl2-tiny"
,
"microsoft/Florence-2-base"
,
"adept/fuyu-8b"
,
"adept/fuyu-8b"
,
"google/gemma-3-4b-it"
,
"THUDM/glm-4v-9b"
,
"THUDM/glm-4v-9b"
,
"h2oai/h2ovl-mississippi-800m"
,
"h2oai/h2ovl-mississippi-800m"
,
"OpenGVLab/InternVL2-1B"
,
"OpenGVLab/InternVL2-1B"
,
...
@@ -162,6 +259,7 @@ def _test_processing_correctness(
...
@@ -162,6 +259,7 @@ def _test_processing_correctness(
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
"meta-llama/Llama-3.2-11B-Vision-Instruct"
,
"meta-llama/Llama-3.2-11B-Vision-Instruct"
,
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
"mistralai/Pixtral-12B-2409"
,
"mistral-community/pixtral-12b"
,
"mistral-community/pixtral-12b"
,
"openbmb/MiniCPM-o-2_6"
,
"openbmb/MiniCPM-o-2_6"
,
"openbmb/MiniCPM-V-2_6"
,
"openbmb/MiniCPM-V-2_6"
,
...
@@ -173,6 +271,9 @@ def _test_processing_correctness(
...
@@ -173,6 +271,9 @@ def _test_processing_correctness(
"Qwen/Qwen2.5-VL-3B-Instruct"
,
"Qwen/Qwen2.5-VL-3B-Instruct"
,
"Qwen/Qwen2-Audio-7B-Instruct"
,
"Qwen/Qwen2-Audio-7B-Instruct"
,
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
"openai/whisper-large-v3"
,
"google/paligemma-3b-mix-224"
,
"google/paligemma2-3b-ft-docci-448"
,
])
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
...
@@ -184,16 +285,24 @@ def test_processing_correctness(
...
@@ -184,16 +285,24 @@ def test_processing_correctness(
num_batches
:
int
,
num_batches
:
int
,
simplify_rate
:
float
,
simplify_rate
:
float
,
):
):
ignore_mm_keys
=
None
if
'ultravox'
in
model_id
:
# In Ultravox, the audio_features can be different depending on padding
# The slight difference should not be a problem though, since
# attention_mask lets us ignore the difference.
ignore_mm_keys
=
[
'audio_features'
]
_test_processing_correctness
(
_test_processing_correctness
(
model_id
,
model_id
,
hit_rate
=
hit_rate
,
hit_rate
=
hit_rate
,
num_batches
=
num_batches
,
num_batches
=
num_batches
,
simplify_rate
=
simplify_rate
,
simplify_rate
=
simplify_rate
,
ignore_mm_keys
=
ignore_mm_keys
,
)
)
# yapf: disable
# yapf: disable
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"microsoft/Phi-3-vision-
128k-
instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"microsoft/Phi-3
.5
-vision-instruct"
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"simplify_rate"
,
[
1.0
])
@
pytest
.
mark
.
parametrize
(
"simplify_rate"
,
[
1.0
])
...
@@ -217,3 +326,40 @@ def test_processing_correctness_phi3v(
...
@@ -217,3 +326,40 @@ def test_processing_correctness_phi3v(
num_batches
=
num_batches
,
num_batches
=
num_batches
,
simplify_rate
=
simplify_rate
,
simplify_rate
=
simplify_rate
,
)
)
def
_inputs_equal
(
a
:
MultiModalInputs
,
b
:
MultiModalInputs
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
):
return
_drop_mm_kwargs_keys
(
a
,
ignore_mm_keys
)
==
_drop_mm_kwargs_keys
(
b
,
ignore_mm_keys
)
def
_drop_mm_kwargs_keys
(
result
:
MultiModalInputs
,
ignore_mm_keys
:
Optional
[
list
[
str
]]
=
None
,
)
->
MultiModalInputs
:
"""Drop specified keys from result['mm_kwargs'].
This is mainly to avoid doing exact match of audio_features in ultravox.
Args:
result: Result to drop keys from
ignore_mm_keys: List of keys to ignore, e.g. ['audio_features']
"""
if
not
ignore_mm_keys
:
return
result
if
'mm_kwargs'
in
result
:
result
=
copy
.
deepcopy
(
result
)
mm_kwargs
=
result
[
'mm_kwargs'
]
for
key
in
ignore_mm_keys
:
mm_kwargs
.
pop
(
key
,
None
)
for
items
in
mm_kwargs
.
_items_by_modality
.
values
():
for
item
in
items
:
for
key
in
ignore_mm_keys
:
item
.
pop
(
key
,
None
)
return
result
tests/models/multimodal/processing/test_h2ovl.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Tests for H2OVL's multimodal preprocessing kwargs."""
"""Tests for H2OVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
pytest
import
pytest
from
PIL
import
Image
from
PIL
import
Image
...
@@ -95,14 +96,14 @@ def _run_check(
...
@@ -95,14 +96,14 @@ def _run_check(
tokenizer
=
processor
.
info
.
get_tokenizer
()
tokenizer
=
processor
.
info
.
get_tokenizer
()
config
=
processor
.
info
.
get_hf_config
()
config
=
processor
.
info
.
get_hf_config
()
prompt
=
"<image>"
*
len
(
images
)
mm_data
=
{
"image"
:
images
}
mm_data
=
{
"image"
:
images
}
total_expected_num_patches
=
sum
(
total_expected_num_patches
=
sum
(
_get_expected_num_patches
(
config
,
image
,
len
(
images
),
min_num
,
max_num
)
_get_expected_num_patches
(
config
,
image
,
len
(
images
),
min_num
,
max_num
)
for
image
in
images
)
for
image
in
images
)
processed_inputs
=
processor
.
apply
(
"<image>"
*
len
(
images
),
mm_data
,
processed_inputs
=
processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
)
mm_processor_kwargs
)
# Ensure we have the right number of placeholders per num_crops size
# Ensure we have the right number of placeholders per num_crops size
image_token_id
=
tokenizer
.
convert_tokens_to_ids
(
"<IMG_CONTEXT>"
)
image_token_id
=
tokenizer
.
convert_tokens_to_ids
(
"<IMG_CONTEXT>"
)
...
@@ -151,9 +152,7 @@ def test_processor_override(
...
@@ -151,9 +152,7 @@ def test_processor_override(
}
}
ctx
=
build_model_context
(
ctx
=
build_model_context
(
model_name
=
model_id
,
model_id
,
tokenizer_name
=
model_id
,
trust_remote_code
=
True
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
)
...
...
tests/models/multimodal/processing/test_idefics3.py
View file @
469e903b
...
@@ -11,10 +11,8 @@ from ....conftest import _ImageAssets
...
@@ -11,10 +11,8 @@ from ....conftest import _ImageAssets
from
...utils
import
build_model_context
from
...utils
import
build_model_context
from
....utils
import
models_path_prefix
from
....utils
import
models_path_prefix
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceM4/Idefics3-8B-Llama3"
)]
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceM4/Idefics3-8B-Llama3"
)])
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
# yapf: disable
# yapf: disable
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
),
(
"mm_processor_kwargs"
,
"expected_toks_per_img"
),
...
@@ -27,7 +25,7 @@ models = [os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3")]
...
@@ -27,7 +25,7 @@ models = [os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3")]
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
def
test_processor_override
(
def
test_processor_override
(
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
_id
:
str
,
mm_processor_kwargs
:
dict
[
str
,
object
],
mm_processor_kwargs
:
dict
[
str
,
object
],
expected_toks_per_img
:
int
,
expected_toks_per_img
:
int
,
num_imgs
:
int
,
num_imgs
:
int
,
...
@@ -38,9 +36,7 @@ def test_processor_override(
...
@@ -38,9 +36,7 @@ def test_processor_override(
# in this test and assume that the kwargs will be correctly expanded by
# in this test and assume that the kwargs will be correctly expanded by
# the partial when calling the custom input processor.
# the partial when calling the custom input processor.
ctx
=
build_model_context
(
ctx
=
build_model_context
(
model_name
=
model
,
model_id
,
tokenizer_name
=
model
,
trust_remote_code
=
True
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
...
...
tests/models/multimodal/processing/test_internvl.py
View file @
469e903b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Tests for InternVL's multimodal preprocessing kwargs."""
"""Tests for InternVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
os
import
os
import
pytest
import
pytest
...
@@ -57,14 +58,14 @@ def _run_check(
...
@@ -57,14 +58,14 @@ def _run_check(
tokenizer
=
processor
.
info
.
get_tokenizer
()
tokenizer
=
processor
.
info
.
get_tokenizer
()
config
=
processor
.
info
.
get_hf_config
()
config
=
processor
.
info
.
get_hf_config
()
prompt
=
"<image>"
*
len
(
images
)
mm_data
=
{
"image"
:
images
}
mm_data
=
{
"image"
:
images
}
total_expected_num_patches
=
sum
(
total_expected_num_patches
=
sum
(
_get_expected_num_patches
(
config
,
image
,
len
(
images
),
min_num
,
max_num
)
_get_expected_num_patches
(
config
,
image
,
len
(
images
),
min_num
,
max_num
)
for
image
in
images
)
for
image
in
images
)
processed_inputs
=
processor
.
apply
(
"<image>"
*
len
(
images
),
mm_data
,
processed_inputs
=
processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
)
mm_processor_kwargs
)
# Ensure we have the right number of placeholders per num_crops size
# Ensure we have the right number of placeholders per num_crops size
image_token_id
=
tokenizer
.
convert_tokens_to_ids
(
"<IMG_CONTEXT>"
)
image_token_id
=
tokenizer
.
convert_tokens_to_ids
(
"<IMG_CONTEXT>"
)
...
@@ -110,9 +111,7 @@ def test_processor_override(
...
@@ -110,9 +111,7 @@ def test_processor_override(
}
}
ctx
=
build_model_context
(
ctx
=
build_model_context
(
model_name
=
model_id
,
model_id
,
tokenizer_name
=
model_id
,
trust_remote_code
=
True
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
)
...
...
Prev
1
…
20
21
22
23
24
25
26
27
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment