Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6dacc79d
Commit
6dacc79d
authored
Jul 05, 2019
by
thomwolf
Browse files
fix python2 tests
parent
36bca545
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
5 deletions
+7
-5
pytorch_transformers/tests/tokenization_tests_commons.py
pytorch_transformers/tests/tokenization_tests_commons.py
+2
-4
pytorch_transformers/tokenization_utils.py
pytorch_transformers/tokenization_utils.py
+5
-1
No files found.
pytorch_transformers/tests/tokenization_tests_commons.py
View file @
6dacc79d
...
@@ -12,9 +12,7 @@
...
@@ -12,9 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
from
__future__
import
division
from
__future__
import
print_function
import
os
import
os
import
sys
import
sys
...
@@ -47,7 +45,7 @@ def create_and_check_save_and_load_tokenizer(tester, tokenizer_class, *inputs, *
...
@@ -47,7 +45,7 @@ def create_and_check_save_and_load_tokenizer(tester, tokenizer_class, *inputs, *
def
create_and_check_pickle_tokenizer
(
tester
,
tokenizer_class
,
*
inputs
,
**
kwargs
):
def
create_and_check_pickle_tokenizer
(
tester
,
tokenizer_class
,
*
inputs
,
**
kwargs
):
tokenizer
=
tokenizer_class
(
*
inputs
,
**
kwargs
)
tokenizer
=
tokenizer_class
(
*
inputs
,
**
kwargs
)
text
=
"Munich and Berlin are nice cities"
text
=
u
"Munich and Berlin are nice cities"
filename
=
u
"/tmp/tokenizer.bin"
filename
=
u
"/tmp/tokenizer.bin"
subwords
=
tokenizer
.
tokenize
(
text
)
subwords
=
tokenizer
.
tokenize
(
text
)
...
...
pytorch_transformers/tokenization_utils.py
View file @
6dacc79d
...
@@ -101,8 +101,12 @@ class PreTrainedTokenizer(object):
...
@@ -101,8 +101,12 @@ class PreTrainedTokenizer(object):
max_len
=
cls
.
max_model_input_sizes
[
pretrained_model_name_or_path
]
max_len
=
cls
.
max_model_input_sizes
[
pretrained_model_name_or_path
]
kwargs
[
'max_len'
]
=
min
(
kwargs
.
get
(
'max_len'
,
int
(
1e12
)),
max_len
)
kwargs
[
'max_len'
]
=
min
(
kwargs
.
get
(
'max_len'
,
int
(
1e12
)),
max_len
)
# Merge resolved_vocab_files arguments in kwargs.
for
args_name
,
file_path
in
resolved_vocab_files
.
items
():
kwargs
[
args_name
]
=
file_path
# Instantiate tokenizer.
# Instantiate tokenizer.
tokenizer
=
cls
(
*
inputs
,
**
resolved_vocab_files
,
**
kwargs
)
tokenizer
=
cls
(
*
inputs
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment