Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a175a9dc
Commit
a175a9dc
authored
Aug 27, 2019
by
thomwolf
Browse files
add kwargs to base encode function
parent
f1b01874
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
+6
-5
pytorch_transformers/tokenization_utils.py
pytorch_transformers/tokenization_utils.py
+6
-5
No files found.
pytorch_transformers/tokenization_utils.py
View file @
a175a9dc
...
@@ -563,7 +563,7 @@ class PreTrainedTokenizer(object):
...
@@ -563,7 +563,7 @@ class PreTrainedTokenizer(object):
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
raise
NotImplementedError
raise
NotImplementedError
def
encode
(
self
,
text
,
text_pair
=
None
,
add_special_tokens
=
False
):
def
encode
(
self
,
text
,
text_pair
=
None
,
add_special_tokens
=
False
,
**
kwargs
):
"""
"""
Converts a string in a sequence of ids (integer), using the tokenizer and vocabulary.
Converts a string in a sequence of ids (integer), using the tokenizer and vocabulary.
...
@@ -574,15 +574,16 @@ class PreTrainedTokenizer(object):
...
@@ -574,15 +574,16 @@ class PreTrainedTokenizer(object):
text_pair: Optional second sequence to be encoded.
text_pair: Optional second sequence to be encoded.
add_special_tokens: if set to ``True``, the sequences will be encoded with the special tokens relative
add_special_tokens: if set to ``True``, the sequences will be encoded with the special tokens relative
to their model.
to their model.
**kwargs: passed to the `self.tokenize()` method
"""
"""
if
text_pair
is
None
:
if
text_pair
is
None
:
if
add_special_tokens
:
if
add_special_tokens
:
return
self
.
add_special_tokens_single_sentence
(
self
.
convert_tokens_to_ids
(
self
.
tokenize
(
text
)))
return
self
.
add_special_tokens_single_sentence
(
self
.
convert_tokens_to_ids
(
self
.
tokenize
(
text
,
**
kwargs
)))
else
:
else
:
return
self
.
convert_tokens_to_ids
(
self
.
tokenize
(
text
))
return
self
.
convert_tokens_to_ids
(
self
.
tokenize
(
text
,
**
kwargs
))
first_sentence_tokens
=
[
self
.
_convert_token_to_id
(
token
)
for
token
in
self
.
tokenize
(
text
)]
first_sentence_tokens
=
[
self
.
_convert_token_to_id
(
token
)
for
token
in
self
.
tokenize
(
text
,
**
kwargs
)]
second_sentence_tokens
=
[
self
.
_convert_token_to_id
(
token
)
for
token
in
self
.
tokenize
(
text_pair
)]
second_sentence_tokens
=
[
self
.
_convert_token_to_id
(
token
)
for
token
in
self
.
tokenize
(
text_pair
,
**
kwargs
)]
if
add_special_tokens
:
if
add_special_tokens
:
return
self
.
add_special_tokens_sentences_pair
(
first_sentence_tokens
,
second_sentence_tokens
)
return
self
.
add_special_tokens_sentences_pair
(
first_sentence_tokens
,
second_sentence_tokens
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment