Unverified Commit cda25fef authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge branch 'main' into standardize_metrics

parents dfb41835 4d10ad56
......@@ -13,4 +13,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -19,4 +19,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -11,4 +11,4 @@ doc_to_choice: ["no", "yes"]
metric_list:
- metric: f1
metadata:
- version: 1.0
version: 1.0
......@@ -15,4 +15,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -3,7 +3,6 @@ from functools import partial
def process_docs(dataset, set_answer_type="bool"):
FEATURES = ["title", "abstract", "question", "answer", "answer_type"]
def _categorise_answer(answer_blob):
......
......@@ -17,7 +17,25 @@ Homepage: https://www.cs.cmu.edu/~glai1/data/race/
### Citation
```
BibTeX-formatted citation goes here
@inproceedings{lai-etal-2017-race,
title = "{RACE}: Large-scale {R}e{A}ding Comprehension Dataset From Examinations",
author = "Lai, Guokun and
Xie, Qizhe and
Liu, Hanxiao and
Yang, Yiming and
Hovy, Eduard",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1082",
doi = "10.18653/v1/D17-1082",
pages = "785--794"
}
```
### Groups and Tasks
......
......@@ -11,4 +11,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 2.0
version: 2.0
......@@ -14,4 +14,4 @@ generation_kwargs:
do_sample: false
temperature: 0.0
metadata:
- version: 0.0
version: 0.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -235,7 +235,6 @@ class _SCROLLSMultipleChoiceTask(_SCROLLSTask):
}
def construct_requests(self, doc, ctx, **kwargs):
request_list = [
Instance(
request_type="loglikelihood",
......
......@@ -6,11 +6,14 @@ training_split: train
validation_split: validation
doc_to_text: "Q: {{context}} {{question}}\nA:"
target_delimiter: " "
doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"]
doc_to_target: "{{label}}"
doc_to_choice:
- "{{answerA}}"
- "{{answerB}}"
- "{{answerC}}"
doc_to_target: "{{ (label|int) - 1 }}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -14,7 +14,6 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
import datasets
from evaluate import load
from math import exp
from functools import partial
......@@ -120,14 +119,14 @@ class SQuAD2(Task):
doc=doc,
arguments=(ctx, {"until": ["\n"]}),
idx=0,
**kwargs
**kwargs,
),
Instance(
request_type="loglikelihood",
doc=doc,
arguments=(ctx, " " + "unanswerable"),
idx=0,
**kwargs
**kwargs,
),
]
......
......@@ -15,4 +15,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -14,4 +14,4 @@ doc_to_decontamination_query: passage
metric_list:
- metric: acc
metadata:
- version: 2.0
version: 2.0
......@@ -23,4 +23,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -19,4 +19,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 0.0
......@@ -14,4 +14,4 @@ metric_list:
- metric: f1
aggregation: !function "aggregate.cb_multi_fi"
metadata:
- version: 1.0
version: 1.0
......@@ -22,4 +22,4 @@ metric_list:
aggregation: !function "t5_utils.agg_mean_3class_f1"
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -2,7 +2,6 @@ import sklearn.metrics
def mean_3class_f1(predictions, references): # This is a passthrough function
string_label = ["entailment", "contradiction", "neutral"]
predictions = (
string_label.index(predictions[0]) if predictions[0] in string_label else 0
......@@ -13,7 +12,6 @@ def mean_3class_f1(predictions, references): # This is a passthrough function
def agg_mean_3class_f1(items):
predictions, references = zip(*items)
"""Computes the unweighted average of the F1 per class."""
......
......@@ -12,4 +12,4 @@ doc_to_choice: !function utils.doc_to_choice
metric_list:
- metric: acc
metadata:
- version: 1.0
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment