Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
f8aace6b
"src/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "c21799212f09753f27b64f6c9cbd56a685e05f2b"
Commit
f8aace6b
authored
Aug 30, 2019
by
thomwolf
Browse files
update tokenizers to use self.XX_token_id instead of converting self.XX_token
parent
8faf2e08
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
13 deletions
+13
-13
pytorch_transformers/tokenization_bert.py
pytorch_transformers/tokenization_bert.py
+3
-3
pytorch_transformers/tokenization_roberta.py
pytorch_transformers/tokenization_roberta.py
+3
-3
pytorch_transformers/tokenization_xlm.py
pytorch_transformers/tokenization_xlm.py
+3
-3
pytorch_transformers/tokenization_xlnet.py
pytorch_transformers/tokenization_xlnet.py
+4
-4
No files found.
pytorch_transformers/tokenization_bert.py
View file @
f8aace6b
...
@@ -171,15 +171,15 @@ class BertTokenizer(PreTrainedTokenizer):
...
@@ -171,15 +171,15 @@ class BertTokenizer(PreTrainedTokenizer):
Adds special tokens to the a sequence for sequence classification tasks.
Adds special tokens to the a sequence for sequence classification tasks.
A BERT sequence has the following format: [CLS] X [SEP]
A BERT sequence has the following format: [CLS] X [SEP]
"""
"""
return
[
self
.
_convert_token_to_id
(
self
.
cls_token
)]
+
token_ids
+
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
return
[
self
.
cls_token_id
]
+
token_ids
+
[
self
.
sep_token_id
]
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
"""
"""
Adds special tokens to a sequence pair for sequence classification tasks.
Adds special tokens to a sequence pair for sequence classification tasks.
A BERT sequence pair has the following format: [CLS] A [SEP] B [SEP]
A BERT sequence pair has the following format: [CLS] A [SEP] B [SEP]
"""
"""
sep
=
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
sep
=
[
self
.
sep_token_id
]
cls
=
[
self
.
_convert_token_to_id
(
self
.
cls_token
)
]
cls
=
[
self
.
cls_token_id
]
return
cls
+
token_ids_0
+
sep
+
token_ids_1
+
sep
return
cls
+
token_ids_0
+
sep
+
token_ids_1
+
sep
def
save_vocabulary
(
self
,
vocab_path
):
def
save_vocabulary
(
self
,
vocab_path
):
...
...
pytorch_transformers/tokenization_roberta.py
View file @
f8aace6b
...
@@ -86,13 +86,13 @@ class RobertaTokenizer(GPT2Tokenizer):
...
@@ -86,13 +86,13 @@ class RobertaTokenizer(GPT2Tokenizer):
Adds special tokens to a sequence for sequence classification tasks.
Adds special tokens to a sequence for sequence classification tasks.
A RoBERTa sequence has the following format: [CLS] X [SEP]
A RoBERTa sequence has the following format: [CLS] X [SEP]
"""
"""
return
[
self
.
_convert_token_to_id
(
self
.
cls_token
)]
+
token_ids
+
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
return
[
self
.
cls_token_id
]
+
token_ids
+
[
self
.
sep_token_id
]
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
"""
"""
Adds special tokens to a sequence pair for sequence classification tasks.
Adds special tokens to a sequence pair for sequence classification tasks.
A RoBERTa sequence pair has the following format: [CLS] A [SEP][SEP] B [SEP]
A RoBERTa sequence pair has the following format: [CLS] A [SEP][SEP] B [SEP]
"""
"""
sep
=
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
sep
=
[
self
.
sep_token_id
]
cls
=
[
self
.
_convert_token_to_id
(
self
.
cls_token
)
]
cls
=
[
self
.
cls_token_id
]
return
cls
+
token_ids_0
+
sep
+
sep
+
token_ids_1
+
sep
return
cls
+
token_ids_0
+
sep
+
sep
+
token_ids_1
+
sep
pytorch_transformers/tokenization_xlm.py
View file @
f8aace6b
...
@@ -220,15 +220,15 @@ class XLMTokenizer(PreTrainedTokenizer):
...
@@ -220,15 +220,15 @@ class XLMTokenizer(PreTrainedTokenizer):
Adds special tokens to a sequence for sequence classification tasks.
Adds special tokens to a sequence for sequence classification tasks.
An XLM sequence has the following format: [CLS] X [SEP]
An XLM sequence has the following format: [CLS] X [SEP]
"""
"""
return
[
self
.
_convert_token_to_id
(
self
.
cls_token
)]
+
token_ids
+
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
return
[
self
.
cls_token_id
]
+
token_ids
+
[
self
.
sep_token_id
]
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
"""
"""
Adds special tokens to a sequence pair for sequence classification tasks.
Adds special tokens to a sequence pair for sequence classification tasks.
An XLM sequence pair has the following format: [CLS] A [SEP] B [SEP]
An XLM sequence pair has the following format: [CLS] A [SEP] B [SEP]
"""
"""
sep
=
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
sep
=
[
self
.
sep_token_id
]
cls
=
[
self
.
_convert_token_to_id
(
self
.
cls_token
)
]
cls
=
[
self
.
cls_token_id
]
return
cls
+
token_ids_0
+
sep
+
token_ids_1
+
sep
return
cls
+
token_ids_0
+
sep
+
token_ids_1
+
sep
def
save_vocabulary
(
self
,
save_directory
):
def
save_vocabulary
(
self
,
save_directory
):
...
...
pytorch_transformers/tokenization_xlnet.py
View file @
f8aace6b
...
@@ -182,8 +182,8 @@ class XLNetTokenizer(PreTrainedTokenizer):
...
@@ -182,8 +182,8 @@ class XLNetTokenizer(PreTrainedTokenizer):
Adds special tokens to a sequence pair for sequence classification tasks.
Adds special tokens to a sequence pair for sequence classification tasks.
An XLNet sequence pair has the following format: A [SEP] B [SEP][CLS]
An XLNet sequence pair has the following format: A [SEP] B [SEP][CLS]
"""
"""
sep
=
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
sep
=
[
self
.
sep_token_id
]
cls
=
[
self
.
_convert_token_to_id
(
self
.
cls_token
)
]
cls
=
[
self
.
cls_token_id
]
return
token_ids
+
sep
+
cls
return
token_ids
+
sep
+
cls
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
def
add_special_tokens_sentences_pair
(
self
,
token_ids_0
,
token_ids_1
):
...
@@ -191,8 +191,8 @@ class XLNetTokenizer(PreTrainedTokenizer):
...
@@ -191,8 +191,8 @@ class XLNetTokenizer(PreTrainedTokenizer):
Adds special tokens to a sequence for sequence classification tasks.
Adds special tokens to a sequence for sequence classification tasks.
An XLNet sequence has the following format: X [SEP][CLS]
An XLNet sequence has the following format: X [SEP][CLS]
"""
"""
sep
=
[
self
.
_convert_token_to_id
(
self
.
sep_token
)
]
sep
=
[
self
.
sep_token_id
]
cls
=
[
self
.
_convert_token_to_id
(
self
.
cls_token
)
]
cls
=
[
self
.
cls_token_id
]
return
token_ids_0
+
sep
+
token_ids_1
+
sep
+
cls
return
token_ids_0
+
sep
+
token_ids_1
+
sep
+
cls
def
save_vocabulary
(
self
,
save_directory
):
def
save_vocabulary
(
self
,
save_directory
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment