Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cb072ce9
Unverified
Commit
cb072ce9
authored
Apr 17, 2025
by
Isotr0py
Committed by
GitHub
Apr 17, 2025
Browse files
[Bugfix] Update Florence-2 tokenizer to make grounding tasks work (#16734)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
95aca283
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
16 additions
and
10 deletions
+16
-10
examples/offline_inference/encoder_decoder_multimodal.py
examples/offline_inference/encoder_decoder_multimodal.py
+2
-1
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+1
-1
tests/conftest.py
tests/conftest.py
+2
-0
tests/models/encoder_decoder/vision_language/test_florence2.py
.../models/encoder_decoder/vision_language/test_florence2.py
+10
-7
tests/models/registry.py
tests/models/registry.py
+1
-1
No files found.
examples/offline_inference/encoder_decoder_multimodal.py
View file @
cb072ce9
...
...
@@ -22,7 +22,7 @@ class ModelRequestData(NamedTuple):
def
run_florence2
():
engine_args
=
EngineArgs
(
model
=
"microsoft/Florence-2-large"
,
tokenizer
=
"
facebook/bart-large
"
,
tokenizer
=
"
Isotr0py/Florence-2-tokenizer
"
,
max_num_seqs
=
8
,
trust_remote_code
=
True
,
limit_mm_per_prompt
=
{
"image"
:
1
},
...
...
@@ -165,6 +165,7 @@ def main(args):
temperature
=
0
,
top_p
=
1.0
,
max_tokens
=
64
,
skip_special_tokens
=
False
,
)
start
=
time
.
time
()
...
...
examples/offline_inference/vision_language.py
View file @
cb072ce9
...
...
@@ -150,7 +150,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData:
engine_args
=
EngineArgs
(
model
=
"microsoft/Florence-2-large"
,
tokenizer
=
"
facebook/bart-large
"
,
tokenizer
=
"
Isotr0py/Florence-2-tokenizer
"
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
trust_remote_code
=
True
,
...
...
tests/conftest.py
View file @
cb072ce9
...
...
@@ -925,6 +925,7 @@ class VllmRunner:
max_tokens
:
int
,
num_logprobs
:
int
,
num_prompt_logprobs
:
Optional
[
int
]
=
None
,
skip_special_tokens
:
bool
=
True
,
)
->
Union
[
list
[
TokensTextLogprobs
],
list
[
TokensTextLogprobsPromptLogprobs
]]:
greedy_logprobs_params
=
SamplingParams
(
...
...
@@ -932,6 +933,7 @@ class VllmRunner:
max_tokens
=
max_tokens
,
logprobs
=
num_logprobs
,
prompt_logprobs
=
(
num_prompt_logprobs
),
skip_special_tokens
=
skip_special_tokens
,
)
'''
Greedy logprobs generation for vLLM encoder/decoder models
...
...
tests/models/encoder_decoder/vision_language/test_florence2.py
View file @
cb072ce9
...
...
@@ -13,12 +13,12 @@ from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from
...utils
import
check_logprobs_close
MODELS
=
[
"microsoft/Florence-2-base"
]
# Florence-2
uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we
borrow the BartTokenizer from the original Bart model
TOKENIZER
=
"
facebook/bart-base
"
# Florence-2
model repo's tokenizer config is missing some special tokens.
# Therefore, we
use a converted tokenizer from a forked repo
TOKENIZER
=
"
Isotr0py/Florence-2-tokenizer
"
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"<
CAPTION
>"
,
# special task token
"<
OD
>"
,
# special task token
which will output special tokens
"cherry_blossom"
:
"Describe in detail what is shown in the image."
,
})
...
...
@@ -45,7 +45,6 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,
output_ids
,
output_str
,
out_logprobs
=
hf_output
output_str
=
output_str
.
replace
(
"</s>"
,
""
).
replace
(
"<s>"
,
""
)
output_ids
=
[
ids
for
ids
in
output_ids
if
ids
not
in
[
0
,
2
]]
return
output_ids
,
output_str
,
out_logprobs
...
...
@@ -71,8 +70,11 @@ def run_test(
enforce_eager
=
True
)
as
vllm_model
:
vllm_outputs_per_case
=
[
vllm_model
.
generate_encoder_decoder_greedy_logprobs
(
prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
)
for
prompts
in
inputs
prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
,
skip_special_tokens
=
False
,
)
for
prompts
in
inputs
]
hf_inputs
=
[
get_hf_images_prompts
(
prompts
)
for
prompts
in
inputs
]
...
...
@@ -93,6 +95,7 @@ def run_test(
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
num_outputs_0_skip_tokens
=
1
,
)
...
...
tests/models/registry.py
View file @
cb072ce9
...
...
@@ -366,7 +366,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
"Florence2ForConditionalGeneration"
:
_HfExamplesInfo
(
"microsoft/Florence-2-base"
,
# noqa: E501
tokenizer
=
"
facebook/bart-base
"
,
tokenizer
=
"
Isotr0py/Florence-2-tokenizer
"
,
trust_remote_code
=
True
),
# noqa: E501
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-11B-Vision-Instruct"
),
# noqa: E501
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3"
),
# noqa: E501
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment