Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
3723f30a
Unverified
Commit
3723f30a
authored
Jun 05, 2020
by
Sam Shleifer
Committed by
GitHub
Jun 05, 2020
Browse files
[cleanup] MarianTokenizer: delete unused constants (#4802)
parent
acaa2e62
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
8 deletions
+0
-8
src/transformers/tokenization_marian.py
src/transformers/tokenization_marian.py
+0
-8
No files found.
src/transformers/tokenization_marian.py
View file @
3723f30a
...
...
@@ -7,7 +7,6 @@ from typing import Dict, List, Optional, Tuple, Union
import
sentencepiece
from
.file_utils
import
S3_BUCKET_PREFIX
from
.tokenization_utils
import
BatchEncoding
,
PreTrainedTokenizer
...
...
@@ -17,11 +16,6 @@ vocab_files_names = {
"vocab"
:
"vocab.json"
,
"tokenizer_config_file"
:
"tokenizer_config.json"
,
}
MODEL_NAMES
=
(
"opus-mt-en-de"
,)
# TODO(SS): delete this, the only required constant is vocab_files_names
PRETRAINED_VOCAB_FILES_MAP
=
{
k
:
{
m
:
f
"
{
S3_BUCKET_PREFIX
}
/Helsinki-NLP/
{
m
}
/
{
fname
}
"
for
m
in
MODEL_NAMES
}
for
k
,
fname
in
vocab_files_names
.
items
()
}
# Example URL https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/vocab.json
...
...
@@ -41,8 +35,6 @@ class MarianTokenizer(PreTrainedTokenizer):
"""
vocab_files_names
=
vocab_files_names
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes
=
{
m
:
512
for
m
in
MODEL_NAMES
}
model_input_names
=
[
"attention_mask"
]
# actually attention_mask, decoder_attention_mask
language_code_re
=
re
.
compile
(
">>.+<<"
)
# type: re.Pattern
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment