Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
17083b9b
Unverified
Commit
17083b9b
authored
May 05, 2023
by
Connor Henderson
Committed by
GitHub
May 05, 2023
Browse files
fix: Passing language as acronym to Whisper generate (#23141)
* add fix * address comments * remove error formatting
parent
40082d59
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
2 deletions
+21
-2
src/transformers/models/whisper/modeling_whisper.py
src/transformers/models/whisper/modeling_whisper.py
+6
-2
tests/models/whisper/test_modeling_whisper.py
tests/models/whisper/test_modeling_whisper.py
+15
-0
No files found.
src/transformers/models/whisper/modeling_whisper.py
View file @
17083b9b
...
@@ -1562,6 +1562,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
...
@@ -1562,6 +1562,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
generation_config
.
return_timestamps
=
False
generation_config
.
return_timestamps
=
False
if
language
is
not
None
:
if
language
is
not
None
:
language
=
language
.
lower
()
generation_config
.
language
=
language
generation_config
.
language
=
language
if
task
is
not
None
:
if
task
is
not
None
:
generation_config
.
task
=
task
generation_config
.
task
=
task
...
@@ -1573,10 +1574,13 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
...
@@ -1573,10 +1574,13 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
language_token
=
generation_config
.
language
language_token
=
generation_config
.
language
elif
generation_config
.
language
in
TO_LANGUAGE_CODE
.
keys
():
elif
generation_config
.
language
in
TO_LANGUAGE_CODE
.
keys
():
language_token
=
f
"<|
{
TO_LANGUAGE_CODE
[
generation_config
.
language
]
}
|>"
language_token
=
f
"<|
{
TO_LANGUAGE_CODE
[
generation_config
.
language
]
}
|>"
elif
generation_config
.
language
in
TO_LANGUAGE_CODE
.
values
():
language_token
=
f
"<|
{
generation_config
.
language
}
|>"
else
:
else
:
is_language_code
=
len
(
generation_config
.
language
)
==
2
raise
ValueError
(
raise
ValueError
(
f
"Unsupported language:
{
self
.
language
}
. Language should be one of:"
f
"Unsupported language:
{
generation_config
.
language
}
. Language should be one of:"
f
"
{
list
(
TO_LANGUAGE_CODE
.
key
s
())
if
generation_config
.
language
in
TO_LANGUAGE_CODE
.
keys
()
else
list
(
TO_LANGUAGE_CODE
.
value
s
())
}
."
f
"
{
list
(
TO_LANGUAGE_CODE
.
value
s
())
if
is_language_code
else
list
(
TO_LANGUAGE_CODE
.
key
s
())
}
."
)
)
forced_decoder_ids
.
append
((
1
,
generation_config
.
lang_to_id
[
language_token
]))
forced_decoder_ids
.
append
((
1
,
generation_config
.
lang_to_id
[
language_token
]))
else
:
else
:
...
...
tests/models/whisper/test_modeling_whisper.py
View file @
17083b9b
...
@@ -414,6 +414,21 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
...
@@ -414,6 +414,21 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
model
.
generate
(
input_features
)
model
.
generate
(
input_features
)
model
.
generate
(
input_features
,
num_beams
=
4
,
do_sample
=
True
,
early_stopping
=
False
,
num_return_sequences
=
3
)
model
.
generate
(
input_features
,
num_beams
=
4
,
do_sample
=
True
,
early_stopping
=
False
,
num_return_sequences
=
3
)
def
test_generate_language
(
self
):
config
,
input_dict
=
self
.
model_tester
.
prepare_config_and_inputs
()
input_features
=
input_dict
[
"input_features"
]
model
=
WhisperForConditionalGeneration
(
config
).
to
(
torch_device
)
# Hack to keep the test fast and not require downloading a model with a generation_config
model
.
generation_config
.
__setattr__
(
"lang_to_id"
,
{
"<|en|>"
:
1
})
model
.
generation_config
.
__setattr__
(
"task_to_id"
,
{
"transcribe"
:
2
})
# test language code
model
.
generate
(
input_features
,
language
=
"en"
)
# test tokenizer code
model
.
generate
(
input_features
,
language
=
"<|en|>"
)
# test language name
model
.
generate
(
input_features
,
language
=
"English"
)
def
test_forward_signature
(
self
):
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment