Commit e1fdf2a8 authored by lintangsutawika's avatar lintangsutawika
Browse files

update

parent 0e26c0bd
......@@ -6,7 +6,7 @@ dataset_name: wic
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}}"
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}} word: {{word}}"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
......
......@@ -2,7 +2,7 @@ group:
- super-glue-lm-eval-v1
task: wsc
dataset_path: super_glue
dataset_name: wsc
dataset_name: wsc.fixed
output_type: multiple_choice
training_split: train
validation_split: validation
......
import re
from lm_eval.utils import general_detokenize
def t5_prompt_doc_to_text(x):
def _mark_span(text, span_str, span_idx, mark):
pattern_tmpl = r"^((?:\S+\s){N})(W)"
pattern = re.sub("N", str(span_idx), pattern_tmpl)
pattern = re.sub("W", span_str, pattern)
return re.sub(pattern, r"\1{0}\2{0}".format(mark), text)
text = x["text"]
text = _mark_span(text, x["span2_text"], x["span2_index"], "*")
return "wsc: "+text
def default_doc_to_text(x):
raw_passage = x["text"]
# NOTE: HuggingFace span indices are word-based not character-based.
......
......@@ -6,7 +6,7 @@ dataset_name: wsc
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: !function "preprocess_wsc.t5_prompt_doc_to_text"
doc_to_text: !function "t5_utils.t5_prompt_doc_to_text"
doc_to_target: span1_text
metric_list:
- metric: exact_match
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment