Commit 91e2171b authored by Chen Chen's avatar Chen Chen Committed by A. Unique TensorFlower
Browse files

Internal Change

PiperOrigin-RevId: 318942343
parent e30aa7d8
...@@ -148,6 +148,9 @@ def _tokenize_example(example, max_length, tokenizer, text_preprocessing=None): ...@@ -148,6 +148,9 @@ def _tokenize_example(example, max_length, tokenizer, text_preprocessing=None):
new_examples = [] new_examples = []
new_example = InputExample(sentence_id=example.sentence_id) new_example = InputExample(sentence_id=example.sentence_id)
for i, word in enumerate(example.words): for i, word in enumerate(example.words):
if any([x < 0 for x in example.label_ids]):
raise ValueError("Unexpected negative label_id: %s" % example.label_ids)
if text_preprocessing: if text_preprocessing:
word = text_preprocessing(word) word = text_preprocessing(word)
subwords = tokenizer.tokenize(word) subwords = tokenizer.tokenize(word)
...@@ -177,11 +180,7 @@ def _convert_single_example(example, max_seq_length, tokenizer): ...@@ -177,11 +180,7 @@ def _convert_single_example(example, max_seq_length, tokenizer):
tokens.extend(example.words) tokens.extend(example.words)
tokens.append("[SEP]") tokens.append("[SEP]")
input_ids = tokenizer.convert_tokens_to_ids(tokens) input_ids = tokenizer.convert_tokens_to_ids(tokens)
label_ids = [_PADDING_LABEL_ID] label_ids = [_PADDING_LABEL_ID]
if any([x < 0 for x in example.label_ids]):
raise ValueError("Unexpected negative label_id: %s" % example.label_ids)
label_ids.extend(example.label_ids) label_ids.extend(example.label_ids)
label_ids.append(_PADDING_LABEL_ID) label_ids.append(_PADDING_LABEL_ID)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment