"git@developer.sourcefind.cn:chenpangpang/diffusers.git" did not exist on "b1fe1706425aa0ca36a38dafab54e0dd4a6e6baf"
Commit c832f43a authored by LysandreJik's avatar LysandreJik
Browse files

`output_token_type` -> `token_type_ids`

parent 3927d775
...@@ -413,7 +413,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length, ...@@ -413,7 +413,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
max_length=max_seq_length, max_length=max_seq_length,
truncate_first_sequence=True # We're truncating the first sequence as a priority truncate_first_sequence=True # We're truncating the first sequence as a priority
) )
input_ids, segment_ids = inputs["input_ids"], inputs["output_token_type"] input_ids, segment_ids = inputs["input_ids"], inputs["token_type_ids"]
# The mask has 1 for real tokens and 0 for padding tokens. Only real # The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to. # tokens are attended to.
......
...@@ -197,7 +197,7 @@ class CommonTestCases: ...@@ -197,7 +197,7 @@ class CommonTestCases:
seq_0 = "Test this method." seq_0 = "Test this method."
seq_1 = "With these inputs." seq_1 = "With these inputs."
information = tokenizer.encode_plus(seq_0, seq_1, add_special_tokens=True, output_token_type=True) information = tokenizer.encode_plus(seq_0, seq_1, add_special_tokens=True, output_token_type=True)
sequences, mask = information["input_ids"], information["output_token_type"] sequences, mask = information["input_ids"], information["token_type_ids"]
assert len(sequences) == len(mask) assert len(sequences) == len(mask)
def test_number_of_added_tokens(self): def test_number_of_added_tokens(self):
......
...@@ -765,7 +765,7 @@ class PreTrainedTokenizer(object): ...@@ -765,7 +765,7 @@ class PreTrainedTokenizer(object):
information["input_ids"] = sequence_tokens information["input_ids"] = sequence_tokens
if output_token_type: if output_token_type:
information["output_token_type"] = [0] * len(information["input_ids"]) information["token_type_ids"] = [0] * len(information["input_ids"])
else: else:
first_sentence_tokens = get_input_ids(text) first_sentence_tokens = get_input_ids(text)
second_sentence_tokens = get_input_ids(text_pair) second_sentence_tokens = get_input_ids(text_pair)
...@@ -780,7 +780,7 @@ class PreTrainedTokenizer(object): ...@@ -780,7 +780,7 @@ class PreTrainedTokenizer(object):
) )
if output_token_type: if output_token_type:
information["output_token_type"] = self.create_mask_from_sequences(text, text_pair) information["token_type_ids"] = self.create_mask_from_sequences(text, text_pair)
else: else:
logger.warning("No special tokens were added. The two sequences have been concatenated.") logger.warning("No special tokens were added. The two sequences have been concatenated.")
sequence = first_sentence_tokens + second_sentence_tokens sequence = first_sentence_tokens + second_sentence_tokens
...@@ -789,7 +789,7 @@ class PreTrainedTokenizer(object): ...@@ -789,7 +789,7 @@ class PreTrainedTokenizer(object):
information["overflowing_tokens"] = sequence[max_length - stride:] information["overflowing_tokens"] = sequence[max_length - stride:]
sequence = sequence[:max_length] sequence = sequence[:max_length]
if output_token_type: if output_token_type:
information["output_token_type"] = [0] * len(sequence) information["token_type_ids"] = [0] * len(sequence)
information["input_ids"] = sequence information["input_ids"] = sequence
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment