Commit 0d1ef037 authored by lintangsutawika's avatar lintangsutawika
Browse files

solved merge conflict

parents aa44be3f ada4a31d
...@@ -17,7 +17,25 @@ Homepage: https://www.cs.cmu.edu/~glai1/data/race/ ...@@ -17,7 +17,25 @@ Homepage: https://www.cs.cmu.edu/~glai1/data/race/
### Citation ### Citation
``` ```
BibTeX-formatted citation goes here @inproceedings{lai-etal-2017-race,
title = "{RACE}: Large-scale {R}e{A}ding Comprehension Dataset From Examinations",
author = "Lai, Guokun and
Xie, Qizhe and
Liu, Hanxiao and
Yang, Yiming and
Hovy, Eduard",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1082",
doi = "10.18653/v1/D17-1082",
pages = "785--794"
}
``` ```
### Groups and Tasks ### Groups and Tasks
......
...@@ -11,4 +11,4 @@ metric_list: ...@@ -11,4 +11,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 2.0 version: 2.0
...@@ -14,4 +14,4 @@ generation_kwargs: ...@@ -14,4 +14,4 @@ generation_kwargs:
do_sample: false do_sample: false
temperature: 0.0 temperature: 0.0
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -18,4 +18,4 @@ metric_list: ...@@ -18,4 +18,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -108,7 +108,7 @@ def _num_cpu_cores(): ...@@ -108,7 +108,7 @@ def _num_cpu_cores():
class _SCROLLSTask(Task): class _SCROLLSTask(Task):
VERSION = 1 VERSION = 2
DATASET_PATH = "tau/scrolls" DATASET_PATH = "tau/scrolls"
DATASET_NAME = None DATASET_NAME = None
PRUNE_TOKENIZERS = None PRUNE_TOKENIZERS = None
...@@ -235,7 +235,6 @@ class _SCROLLSMultipleChoiceTask(_SCROLLSTask): ...@@ -235,7 +235,6 @@ class _SCROLLSMultipleChoiceTask(_SCROLLSTask):
} }
def construct_requests(self, doc, ctx, **kwargs): def construct_requests(self, doc, ctx, **kwargs):
request_list = [ request_list = [
Instance( Instance(
request_type="loglikelihood", request_type="loglikelihood",
......
...@@ -6,11 +6,14 @@ training_split: train ...@@ -6,11 +6,14 @@ training_split: train
validation_split: validation validation_split: validation
doc_to_text: "Q: {{context}} {{question}}\nA:" doc_to_text: "Q: {{context}} {{question}}\nA:"
target_delimiter: " " target_delimiter: " "
doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"] doc_to_choice:
doc_to_target: "{{label}}" - "{{answerA}}"
- "{{answerB}}"
- "{{answerC}}"
doc_to_target: "{{ (label|int) - 1 }}"
metric_list: metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -14,7 +14,6 @@ also determine when no answer is supported by the paragraph and abstain from ans ...@@ -14,7 +14,6 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/ Homepage: https://rajpurkar.github.io/SQuAD-explorer/
""" """
import datasets import datasets
from evaluate import load
from math import exp from math import exp
from functools import partial from functools import partial
...@@ -50,7 +49,7 @@ def _squad_agg(key, items): ...@@ -50,7 +49,7 @@ def _squad_agg(key, items):
@register_task("squadv2") @register_task("squadv2")
class SQuAD2(Task): class SQuAD2(Task):
VERSION = 2 VERSION = 3
DATASET_PATH = "squad_v2" DATASET_PATH = "squad_v2"
DATASET_NAME = None DATASET_NAME = None
...@@ -120,14 +119,14 @@ class SQuAD2(Task): ...@@ -120,14 +119,14 @@ class SQuAD2(Task):
doc=doc, doc=doc,
arguments=(ctx, {"until": ["\n"]}), arguments=(ctx, {"until": ["\n"]}),
idx=0, idx=0,
**kwargs **kwargs,
), ),
Instance( Instance(
request_type="loglikelihood", request_type="loglikelihood",
doc=doc, doc=doc,
arguments=(ctx, " " + "unanswerable"), arguments=(ctx, " " + "unanswerable"),
idx=0, idx=0,
**kwargs **kwargs,
), ),
] ]
......
...@@ -15,4 +15,4 @@ metric_list: ...@@ -15,4 +15,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -14,4 +14,4 @@ doc_to_decontamination_query: passage ...@@ -14,4 +14,4 @@ doc_to_decontamination_query: passage
metric_list: metric_list:
- metric: acc - metric: acc
metadata: metadata:
- version: 2.0 version: 2.0
...@@ -23,4 +23,4 @@ metric_list: ...@@ -23,4 +23,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -19,4 +19,4 @@ metric_list: ...@@ -19,4 +19,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -14,4 +14,4 @@ metric_list: ...@@ -14,4 +14,4 @@ metric_list:
- metric: f1 - metric: f1
aggregation: !function "aggregate.cb_multi_fi" aggregation: !function "aggregate.cb_multi_fi"
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -22,4 +22,4 @@ metric_list: ...@@ -22,4 +22,4 @@ metric_list:
aggregation: !function "t5_utils.agg_mean_3class_f1" aggregation: !function "t5_utils.agg_mean_3class_f1"
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -2,7 +2,6 @@ import sklearn.metrics ...@@ -2,7 +2,6 @@ import sklearn.metrics
def mean_3class_f1(predictions, references): # This is a passthrough function def mean_3class_f1(predictions, references): # This is a passthrough function
string_label = ["entailment", "contradiction", "neutral"] string_label = ["entailment", "contradiction", "neutral"]
predictions = ( predictions = (
string_label.index(predictions[0]) if predictions[0] in string_label else 0 string_label.index(predictions[0]) if predictions[0] in string_label else 0
...@@ -13,7 +12,6 @@ def mean_3class_f1(predictions, references): # This is a passthrough function ...@@ -13,7 +12,6 @@ def mean_3class_f1(predictions, references): # This is a passthrough function
def agg_mean_3class_f1(items): def agg_mean_3class_f1(items):
predictions, references = zip(*items) predictions, references = zip(*items)
"""Computes the unweighted average of the F1 per class.""" """Computes the unweighted average of the F1 per class."""
......
...@@ -12,4 +12,4 @@ doc_to_choice: !function utils.doc_to_choice ...@@ -12,4 +12,4 @@ doc_to_choice: !function utils.doc_to_choice
metric_list: metric_list:
- metric: acc - metric: acc
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -19,4 +19,4 @@ metric_list: ...@@ -19,4 +19,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -12,4 +12,4 @@ doc_to_choice: "['''{{answer}}\\nIs the answer correct? yes''', '''{{answer}}\\n ...@@ -12,4 +12,4 @@ doc_to_choice: "['''{{answer}}\\nIs the answer correct? yes''', '''{{answer}}\\n
metric_list: metric_list:
- metric: acc - metric: acc
metadata: metadata:
- version: 2.0 version: 2.0
...@@ -20,4 +20,4 @@ metric_list: ...@@ -20,4 +20,4 @@ metric_list:
aggregation: !function t5_utils.agg_em aggregation: !function t5_utils.agg_em
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -5,7 +5,6 @@ import sklearn.metrics ...@@ -5,7 +5,6 @@ import sklearn.metrics
def f1(predictions, references): # This is a passthrough function def f1(predictions, references): # This is a passthrough function
_prediction = predictions[0] _prediction = predictions[0]
_reference = references[0].split("_")[-1] _reference = references[0].split("_")[-1]
string_label = ["False", "True"] string_label = ["False", "True"]
...@@ -20,7 +19,6 @@ def f1(predictions, references): # This is a passthrough function ...@@ -20,7 +19,6 @@ def f1(predictions, references): # This is a passthrough function
def agg_f1(items): def agg_f1(items):
predictions, references = zip(*items) predictions, references = zip(*items)
references, predictions = np.asarray(references), np.asarray(predictions) references, predictions = np.asarray(references), np.asarray(predictions)
...@@ -28,7 +26,6 @@ def agg_f1(items): ...@@ -28,7 +26,6 @@ def agg_f1(items):
def em(predictions, references): # This is a passthrough function def em(predictions, references): # This is a passthrough function
_prediction = predictions[0] _prediction = predictions[0]
_group, _reference = references[0].split("_") _group, _reference = references[0].split("_")
string_label = ["False", "True"] string_label = ["False", "True"]
......
...@@ -17,4 +17,4 @@ metric_list: ...@@ -17,4 +17,4 @@ metric_list:
higher_is_better: True higher_is_better: True
aggregation: mean aggregation: mean
metadata: metadata:
- version: 1.0 version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment