Commit 0d1ef037 authored by lintangsutawika's avatar lintangsutawika
Browse files

solved merge conflict

parents aa44be3f ada4a31d
......@@ -19,4 +19,4 @@ metric_list:
aggregation: !function t5_utils.squad_f1_agg
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -3,14 +3,12 @@ import string
import collections
import numpy as np
from tqdm import tqdm
from datasets import Dataset, concatenate_datasets
from datasets import Dataset
from lm_eval.api.metrics import metric_max_over_ground_truths
def doc_to_text(doc):
passage = doc["passage"]
passage = re.sub(r"(\.|\?|\!|\"|\')\n@highlight\n", r"\1 ", passage)
passage = re.sub(r"\n@highlight\n", ". ", passage)
......@@ -34,7 +32,6 @@ def process_docs(dataset):
}
answers = doc.pop("answers")
for idx, answer in enumerate(answers):
for key in split_doc.keys():
if key in doc:
split_doc[key].append(doc[key])
......
......@@ -12,4 +12,4 @@ doc_to_choice: ['True', 'False']
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
......@@ -19,4 +19,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -12,4 +12,4 @@ doc_to_choice: ['no', 'yes']
metric_list:
- metric: acc
metadata:
- version: 1.0
version: 1.0
......@@ -19,4 +19,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -12,4 +12,4 @@ doc_to_choice: ['no', 'yes']
metric_list:
- metric: acc
metadata:
- version: 1.0
version: 1.0
......@@ -20,4 +20,4 @@ filter_list:
filter:
- function: !function t5_utils.WSCPostprocess
metadata:
- version: 0.0
version: 0.0
......@@ -8,7 +8,6 @@ def doc_to_text(x):
def _wsc_inputs(x):
words = x["text"].split(" ")
# We would need some special logic to handle the case where the pronoun is the
......@@ -55,7 +54,6 @@ def _wsc_inputs(x):
class WSCPostprocess(Filter):
def __init__(self, **kwargs):
self.determiners = {
"a",
"an",
......@@ -86,10 +84,8 @@ class WSCPostprocess(Filter):
return " ".join([w for w in s.split(" ") if w not in self.determiners])
def apply(self, resps, docs):
filtered_resps = []
for prediction, reference in zip(*(resps, docs["span1_text"])):
prediction = self.clean(prediction[0])
reference = self.clean(reference)
......
......@@ -16,4 +16,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -15,4 +15,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
import argparse
from typing import Dict, List
import yaml
import sacrebleu
try:
import pycountry
......
......@@ -14,4 +14,4 @@ generation_kwargs:
temperature: 0.0
repeats: 1
metadata:
- version: 0.0
version: 1.0
......@@ -28,4 +28,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 2.0
version: 3.0
......@@ -76,4 +76,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 2.0
version: 3.0
......@@ -33,4 +33,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 2.0
version: 2.0
......@@ -10,4 +10,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 2.0
version: 2.0
......@@ -6,7 +6,6 @@ from rouge_score import rouge_scorer, scoring
def process_results_mc2(doc, results):
lls, is_greedy = zip(*results)
# Split on the first `0` as everything before it is true (`1`).
......@@ -20,7 +19,6 @@ def process_results_mc2(doc, results):
def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.map(preprocess_function)
......@@ -49,7 +47,6 @@ def preprocess_function(examples):
def process_results_gen(doc, results):
completion = results[0]
true_refs, false_refs = doc["correct_answers"], doc["incorrect_answers"]
all_refs = true_refs + false_refs
......
......@@ -17,4 +17,4 @@ metric_list:
ignore_case: false
ignore_punctuation: false
metadata:
- version: 1.0
version: 2.0
......@@ -17,4 +17,4 @@ metric_list:
ignore_case: false
ignore_punctuation: false
metadata:
- version: 1.0
version: 2.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment