Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6dacc79d
Commit
6dacc79d
authored
Jul 05, 2019
by
thomwolf
Browse files
fix python2 tests
parent
36bca545
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
5 deletions
+7
-5
pytorch_transformers/tests/tokenization_tests_commons.py
pytorch_transformers/tests/tokenization_tests_commons.py
+2
-4
pytorch_transformers/tokenization_utils.py
pytorch_transformers/tokenization_utils.py
+5
-1
No files found.
pytorch_transformers/tests/tokenization_tests_commons.py
View file @
6dacc79d
...
...
@@ -12,9 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
sys
...
...
@@ -47,7 +45,7 @@ def create_and_check_save_and_load_tokenizer(tester, tokenizer_class, *inputs, *
def
create_and_check_pickle_tokenizer
(
tester
,
tokenizer_class
,
*
inputs
,
**
kwargs
):
tokenizer
=
tokenizer_class
(
*
inputs
,
**
kwargs
)
text
=
"Munich and Berlin are nice cities"
text
=
u
"Munich and Berlin are nice cities"
filename
=
u
"/tmp/tokenizer.bin"
subwords
=
tokenizer
.
tokenize
(
text
)
...
...
pytorch_transformers/tokenization_utils.py
View file @
6dacc79d
...
...
@@ -101,8 +101,12 @@ class PreTrainedTokenizer(object):
max_len
=
cls
.
max_model_input_sizes
[
pretrained_model_name_or_path
]
kwargs
[
'max_len'
]
=
min
(
kwargs
.
get
(
'max_len'
,
int
(
1e12
)),
max_len
)
# Merge resolved_vocab_files arguments in kwargs.
for
args_name
,
file_path
in
resolved_vocab_files
.
items
():
kwargs
[
args_name
]
=
file_path
# Instantiate tokenizer.
tokenizer
=
cls
(
*
inputs
,
**
resolved_vocab_files
,
**
kwargs
)
tokenizer
=
cls
(
*
inputs
,
**
kwargs
)
return
tokenizer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment