"examples/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "36b583b705429d39c71b01f04446404d7417da74"
Commit df52abe3 authored by erenup's avatar erenup
Browse files

add sep_toekn between question and choice

parent 43c24325
...@@ -329,7 +329,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length, ...@@ -329,7 +329,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
if example.question.find("_") != -1: if example.question.find("_") != -1:
tokens_b = tokenizer.tokenize(example.question.replace("_", ending)) tokens_b = tokenizer.tokenize(example.question.replace("_", ending))
else: else:
tokens_b = tokenizer.tokenize(example.question + " " + ending) tokens_b = tokenizer.tokenize(example.question)
tokens_b += [sep_token]
if sep_token_extra:
tokens_b += [sep_token]
tokens_b += tokenizer.tokenize(ending)
special_tokens_count = 4 if sep_token_extra else 3 special_tokens_count = 4 if sep_token_extra else 3
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count) _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
...@@ -425,10 +430,11 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length): ...@@ -425,10 +430,11 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
total_length = len(tokens_a) + len(tokens_b) total_length = len(tokens_a) + len(tokens_b)
if total_length <= max_length: if total_length <= max_length:
break break
if len(tokens_a) > len(tokens_b): # if len(tokens_a) > len(tokens_b):
tokens_a.pop() # tokens_a.pop()
else: # else:
tokens_b.pop() # tokens_b.pop()
tokens_a.pop()
processors = { processors = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment