Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
13285220
Unverified
Commit
13285220
authored
Mar 05, 2024
by
Arthur
Committed by
GitHub
Mar 05, 2024
Browse files
[`UdopTokenizer`] Fix post merge imports (#29451)
* update * ... * nits * arf * 🧼 * beat the last guy * style everyone
parent
fa7f3cf3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
14 deletions
+16
-14
src/transformers/models/udop/tokenization_udop.py
src/transformers/models/udop/tokenization_udop.py
+0
-7
src/transformers/models/udop/tokenization_udop_fast.py
src/transformers/models/udop/tokenization_udop_fast.py
+11
-6
tests/models/udop/test_tokenization_udop.py
tests/models/udop/test_tokenization_udop.py
+5
-1
No files found.
src/transformers/models/udop/tokenization_udop.py
View file @
13285220
...
@@ -157,12 +157,6 @@ PRETRAINED_VOCAB_FILES_MAP = {
...
@@ -157,12 +157,6 @@ PRETRAINED_VOCAB_FILES_MAP = {
}
}
# TODO(PVP) - this should be removed in Transformers v5
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
=
{
"microsoft/udop-large"
:
512
,
}
class
UdopTokenizer
(
PreTrainedTokenizer
):
class
UdopTokenizer
(
PreTrainedTokenizer
):
"""
"""
Adapted from [`LayoutXLMTokenizer`] and [`T5Tokenizer`]. Based on
Adapted from [`LayoutXLMTokenizer`] and [`T5Tokenizer`]. Based on
...
@@ -256,7 +250,6 @@ class UdopTokenizer(PreTrainedTokenizer):
...
@@ -256,7 +250,6 @@ class UdopTokenizer(PreTrainedTokenizer):
vocab_files_names
=
VOCAB_FILES_NAMES
vocab_files_names
=
VOCAB_FILES_NAMES
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
model_input_names
=
[
"input_ids"
,
"attention_mask"
]
model_input_names
=
[
"input_ids"
,
"attention_mask"
]
def
__init__
(
def
__init__
(
...
...
src/transformers/models/udop/tokenization_udop_fast.py
View file @
13285220
...
@@ -29,11 +29,6 @@ from ...tokenization_utils_base import (
...
@@ -29,11 +29,6 @@ from ...tokenization_utils_base import (
)
)
from
...tokenization_utils_fast
import
PreTrainedTokenizerFast
from
...tokenization_utils_fast
import
PreTrainedTokenizerFast
from
...utils
import
PaddingStrategy
,
TensorType
,
add_end_docstrings
,
is_sentencepiece_available
,
logging
from
...utils
import
PaddingStrategy
,
TensorType
,
add_end_docstrings
,
is_sentencepiece_available
,
logging
from
..udop.tokenization_udop
import
(
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
,
PRETRAINED_VOCAB_FILES_MAP
,
VOCAB_FILES_NAMES
,
)
if
is_sentencepiece_available
():
if
is_sentencepiece_available
():
...
@@ -42,6 +37,17 @@ else:
...
@@ -42,6 +37,17 @@ else:
UdopTokenizer
=
None
UdopTokenizer
=
None
VOCAB_FILES_NAMES
=
{
"vocab_file"
:
"spiece.model"
,
"tokenizer_file"
:
"tokenizer.json"
}
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"microsoft/udop-large"
:
"https://huggingface.co/microsoft/udop-large/resolve/main/spiece.model"
,
},
"tokenizer_file"
:
{
"microsoft/udop-large"
:
"https://huggingface.co/microsoft/udop-large/resolve/main/tokenizer.json"
,
},
}
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
UDOP_ENCODE_KWARGS_DOCSTRING
=
r
"""
UDOP_ENCODE_KWARGS_DOCSTRING
=
r
"""
...
@@ -197,7 +203,6 @@ class UdopTokenizerFast(PreTrainedTokenizerFast):
...
@@ -197,7 +203,6 @@ class UdopTokenizerFast(PreTrainedTokenizerFast):
vocab_files_names
=
VOCAB_FILES_NAMES
vocab_files_names
=
VOCAB_FILES_NAMES
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
model_input_names
=
[
"input_ids"
,
"attention_mask"
]
model_input_names
=
[
"input_ids"
,
"attention_mask"
]
slow_tokenizer_class
=
UdopTokenizer
slow_tokenizer_class
=
UdopTokenizer
...
...
tests/models/udop/test_tokenization_udop.py
View file @
13285220
...
@@ -22,12 +22,12 @@ from typing import List
...
@@ -22,12 +22,12 @@ from typing import List
from
transformers
import
(
from
transformers
import
(
AddedToken
,
AddedToken
,
SpecialTokensMixin
,
SpecialTokensMixin
,
UdopTokenizer
,
UdopTokenizerFast
,
UdopTokenizerFast
,
is_tf_available
,
is_tf_available
,
is_torch_available
,
is_torch_available
,
logging
,
logging
,
)
)
from
transformers.models.udop.tokenization_udop
import
UdopTokenizer
from
transformers.testing_utils
import
(
from
transformers.testing_utils
import
(
get_tests_dir
,
get_tests_dir
,
is_pt_tf_cross_test
,
is_pt_tf_cross_test
,
...
@@ -1717,6 +1717,10 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
...
@@ -1717,6 +1717,10 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def
test_alignement_methods
(
self
):
def
test_alignement_methods
(
self
):
pass
pass
@
unittest
.
skip
(
"#TODO will be removed in main"
)
def
test_pretrained_model_lists
(
self
):
pass
@
unittest
.
skip
(
"UDOP tokenizer requires boxes besides sequences."
)
@
unittest
.
skip
(
"UDOP tokenizer requires boxes besides sequences."
)
def
test_maximum_encoding_length_pair_input
(
self
):
def
test_maximum_encoding_length_pair_input
(
self
):
pass
pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment