Commit 8c93320a authored by lintangsutawika's avatar lintangsutawika
Browse files

add multirc metrics

parent 4a768feb
...@@ -8,7 +8,7 @@ validation_split: validation ...@@ -8,7 +8,7 @@ validation_split: validation
output_type: greedy_until output_type: greedy_until
doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}" doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: "{% set group_id = idx.question|string %}{{[group_id+'_False', group_id+'_True']}}"
generation_kwargs: generation_kwargs:
until: until:
- "</s>" - "</s>"
...@@ -18,3 +18,6 @@ metric_list: ...@@ -18,3 +18,6 @@ metric_list:
- metric: !function t5_utils.f1 - metric: !function t5_utils.f1
aggregation: !function t5_utils.agg_f1 aggregation: !function t5_utils.agg_f1
higher_is_better: true higher_is_better: true
- metric: !function t5_utils.em
aggregation: !function t5_utils.agg_em
higher_is_better: true
import collections
import numpy as np
import sklearn.metrics
def f1(predictions, references): # This is a passthrough function
_prediction = predictions[0]
_reference = references[0].split("_")[-1]
string_label = ['False', 'True']
reference = string_label.index(_reference)
prediction = string_label.index(_prediction) if _prediction in string_label else not bool(reference)
return (prediction, reference)
def agg_f1(items):
predictions, references = zip(*items)
references, predictions = np.asarray(references), np.asarray(predictions)
return sklearn.metrics.f1_score(references, predictions)
def em(predictions, references): # This is a passthrough function
_prediction = predictions[0]
_group, _reference = references[0].split("_")
string_label = ['False', 'True']
reference = string_label.index(_reference)
prediction = string_label.index(_prediction) if _prediction in string_label else not bool(reference)
return (_group, prediction, reference)
def agg_em(items):
grouped_values = collections.defaultdict(lambda: ([], []))
for group, prediction, reference in items:
grouped_values[group][0].append(reference)
grouped_values[group][1].append(prediction)
group_scores = []
for group, (targets, predictions) in grouped_values.items():
score = float(np.array_equal(targets, predictions))
group_scores.append(score)
return np.mean(group_scores)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment