Commit 0d1ef037 authored by lintangsutawika's avatar lintangsutawika
Browse files

solved merge conflict

parents aa44be3f ada4a31d
......@@ -17,7 +17,25 @@ Homepage: https://www.cs.cmu.edu/~glai1/data/race/
### Citation
```
BibTeX-formatted citation goes here
@inproceedings{lai-etal-2017-race,
title = "{RACE}: Large-scale {R}e{A}ding Comprehension Dataset From Examinations",
author = "Lai, Guokun and
Xie, Qizhe and
Liu, Hanxiao and
Yang, Yiming and
Hovy, Eduard",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1082",
doi = "10.18653/v1/D17-1082",
pages = "785--794"
}
```
### Groups and Tasks
......
......@@ -11,4 +11,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 2.0
version: 2.0
......@@ -14,4 +14,4 @@ generation_kwargs:
do_sample: false
temperature: 0.0
metadata:
- version: 0.0
version: 0.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -108,7 +108,7 @@ def _num_cpu_cores():
class _SCROLLSTask(Task):
VERSION = 1
VERSION = 2
DATASET_PATH = "tau/scrolls"
DATASET_NAME = None
PRUNE_TOKENIZERS = None
......@@ -235,7 +235,6 @@ class _SCROLLSMultipleChoiceTask(_SCROLLSTask):
}
def construct_requests(self, doc, ctx, **kwargs):
request_list = [
Instance(
request_type="loglikelihood",
......
......@@ -6,11 +6,14 @@ training_split: train
validation_split: validation
doc_to_text: "Q: {{context}} {{question}}\nA:"
target_delimiter: " "
doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"]
doc_to_target: "{{label}}"
doc_to_choice:
- "{{answerA}}"
- "{{answerB}}"
- "{{answerC}}"
doc_to_target: "{{ (label|int) - 1 }}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -14,7 +14,6 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
import datasets
from evaluate import load
from math import exp
from functools import partial
......@@ -50,7 +49,7 @@ def _squad_agg(key, items):
@register_task("squadv2")
class SQuAD2(Task):
VERSION = 2
VERSION = 3
DATASET_PATH = "squad_v2"
DATASET_NAME = None
......@@ -120,14 +119,14 @@ class SQuAD2(Task):
doc=doc,
arguments=(ctx, {"until": ["\n"]}),
idx=0,
**kwargs
**kwargs,
),
Instance(
request_type="loglikelihood",
doc=doc,
arguments=(ctx, " " + "unanswerable"),
idx=0,
**kwargs
**kwargs,
),
]
......
......@@ -15,4 +15,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -14,4 +14,4 @@ doc_to_decontamination_query: passage
metric_list:
- metric: acc
metadata:
- version: 2.0
version: 2.0
......@@ -23,4 +23,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -19,4 +19,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -14,4 +14,4 @@ metric_list:
- metric: f1
aggregation: !function "aggregate.cb_multi_fi"
metadata:
- version: 1.0
version: 1.0
......@@ -22,4 +22,4 @@ metric_list:
aggregation: !function "t5_utils.agg_mean_3class_f1"
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -2,7 +2,6 @@ import sklearn.metrics
def mean_3class_f1(predictions, references): # This is a passthrough function
string_label = ["entailment", "contradiction", "neutral"]
predictions = (
string_label.index(predictions[0]) if predictions[0] in string_label else 0
......@@ -13,7 +12,6 @@ def mean_3class_f1(predictions, references): # This is a passthrough function
def agg_mean_3class_f1(items):
predictions, references = zip(*items)
"""Computes the unweighted average of the F1 per class."""
......
......@@ -12,4 +12,4 @@ doc_to_choice: !function utils.doc_to_choice
metric_list:
- metric: acc
metadata:
- version: 1.0
version: 1.0
......@@ -19,4 +19,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -12,4 +12,4 @@ doc_to_choice: "['''{{answer}}\\nIs the answer correct? yes''', '''{{answer}}\\n
metric_list:
- metric: acc
metadata:
- version: 2.0
version: 2.0
......@@ -20,4 +20,4 @@ metric_list:
aggregation: !function t5_utils.agg_em
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -5,7 +5,6 @@ import sklearn.metrics
def f1(predictions, references): # This is a passthrough function
_prediction = predictions[0]
_reference = references[0].split("_")[-1]
string_label = ["False", "True"]
......@@ -20,7 +19,6 @@ def f1(predictions, references): # This is a passthrough function
def agg_f1(items):
predictions, references = zip(*items)
references, predictions = np.asarray(references), np.asarray(predictions)
......@@ -28,7 +26,6 @@ def agg_f1(items):
def em(predictions, references): # This is a passthrough function
_prediction = predictions[0]
_group, _reference = references[0].split("_")
string_label = ["False", "True"]
......
......@@ -17,4 +17,4 @@ metric_list:
higher_is_better: True
aggregation: mean
metadata:
- version: 1.0
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment