Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
814b9550
"magic_pdf/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "9a3fe26ee4c54ea920de6b392c204082f16c58a9"
Unverified
Commit
814b9550
authored
Dec 01, 2020
by
Rodolfo Quispe
Committed by
GitHub
Dec 01, 2020
Browse files
Fix doc for language code (#8848)
parent
4a9e502a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
6 deletions
+6
-6
src/transformers/models/mbart/tokenization_mbart.py
src/transformers/models/mbart/tokenization_mbart.py
+3
-3
src/transformers/models/mbart/tokenization_mbart_fast.py
src/transformers/models/mbart/tokenization_mbart_fast.py
+3
-3
No files found.
src/transformers/models/mbart/tokenization_mbart.py
View file @
814b9550
...
...
@@ -153,7 +153,7 @@ class MBartTokenizer(XLMRobertaTokenizer):
adding special tokens. An MBART sequence has the following format, where ``X`` represents the sequence:
- ``input_ids`` (for encoder) ``X [eos, src_lang_code]``
- ``decoder_input_ids``: (for decoder) ``
[
tgt_lang_code]
X [eos]
``
- ``decoder_input_ids``: (for decoder) ``
X [eos,
tgt_lang_code]``
BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
separator.
...
...
@@ -220,13 +220,13 @@ class MBartTokenizer(XLMRobertaTokenizer):
return
model_inputs
def
set_src_lang_special_tokens
(
self
,
src_lang
)
->
None
:
"""Reset the special tokens to the source lang setting. No prefix and suffix=[eos,
cur
_lang_code]."""
"""Reset the special tokens to the source lang setting. No prefix and suffix=[eos,
src
_lang_code]."""
self
.
cur_lang_code
=
self
.
lang_code_to_id
[
src_lang
]
self
.
prefix_tokens
=
[]
self
.
suffix_tokens
=
[
self
.
eos_token_id
,
self
.
cur_lang_code
]
def
set_tgt_lang_special_tokens
(
self
,
lang
:
str
)
->
None
:
"""Reset the special tokens to the target language setting.
P
refix
[tgt_lang_code],
suffix
=[eos]."""
"""Reset the special tokens to the target language setting.
No p
refix
and
suffix=[eos
, tgt_lang_code
]."""
self
.
cur_lang_code
=
self
.
lang_code_to_id
[
lang
]
self
.
prefix_tokens
=
[]
self
.
suffix_tokens
=
[
self
.
eos_token_id
,
self
.
cur_lang_code
]
src/transformers/models/mbart/tokenization_mbart_fast.py
View file @
814b9550
...
...
@@ -152,7 +152,7 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
An MBART sequence has the following format, where ``X`` represents the sequence:
- ``input_ids`` (for encoder) ``X [eos, src_lang_code]``
- ``decoder_input_ids``: (for decoder) ``
[
tgt_lang_code]
X [eos]
``
- ``decoder_input_ids``: (for decoder) ``
X [eos,
tgt_lang_code]``
BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
separator.
...
...
@@ -218,7 +218,7 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
return
model_inputs
def
set_src_lang_special_tokens
(
self
,
src_lang
)
->
None
:
"""Reset the special tokens to the source lang setting. No prefix and suffix=[eos,
cur
_lang_code]."""
"""Reset the special tokens to the source lang setting. No prefix and suffix=[eos,
src
_lang_code]."""
self
.
cur_lang_code
=
self
.
convert_tokens_to_ids
(
src_lang
)
self
.
prefix_tokens
=
[]
self
.
suffix_tokens
=
[
self
.
eos_token_id
,
self
.
cur_lang_code
]
...
...
@@ -233,7 +233,7 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
)
def
set_tgt_lang_special_tokens
(
self
,
lang
:
str
)
->
None
:
"""Reset the special tokens to the target language setting.
P
refix
[tgt_lang_code],
suffix
=[eos]."""
"""Reset the special tokens to the target language setting.
No p
refix
and
suffix=[eos
, tgt_lang_code
]."""
self
.
cur_lang_code
=
self
.
convert_tokens_to_ids
(
lang
)
self
.
prefix_tokens
=
[]
self
.
suffix_tokens
=
[
self
.
eos_token_id
,
self
.
cur_lang_code
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment