Commit 135b41ce authored by lintangsutawika's avatar lintangsutawika
Browse files

format

parent 0436b5d6
...@@ -4,6 +4,7 @@ import numpy as np ...@@ -4,6 +4,7 @@ import numpy as np
from rouge_score import rouge_scorer, scoring from rouge_score import rouge_scorer, scoring
def process_results_mc2(doc, results): def process_results_mc2(doc, results):
lls, is_greedy = zip(*results) lls, is_greedy = zip(*results)
...@@ -24,7 +25,6 @@ def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset: ...@@ -24,7 +25,6 @@ def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
def preprocess_function(examples): def preprocess_function(examples):
def _format_answers(answers): def _format_answers(answers):
formatted_answers = [] formatted_answers = []
for answer in answers: for answer in answers:
...@@ -121,26 +121,27 @@ def process_results_gen(doc, results): ...@@ -121,26 +121,27 @@ def process_results_gen(doc, results):
def bleu(refs, preds): def bleu(refs, preds):
""" """
Returns `t5` style BLEU scores. See the related implementation: Returns `t5` style BLEU scores. See the related implementation:
https://github.com/google-research/text-to-text-transfer-transformer/blob/3d10afd51ba97ac29eb66ae701eca274488202f7/t5/evaluation/metrics.py#L41 https://github.com/google-research/text-to-text-transfer-transformer/blob/3d10afd51ba97ac29eb66ae701eca274488202f7/t5/evaluation/metrics.py#L41
:param refs: :param refs:
A `list` of `list` of reference `str`s. A `list` of `list` of reference `str`s.
:param preds: :param preds:
A `list` of predicted `str`s. A `list` of predicted `str`s.
""" """
score = sacrebleu.corpus_bleu( score = sacrebleu.corpus_bleu(
preds, preds,
refs, refs,
smooth_method="exp", smooth_method="exp",
smooth_value=0.0, smooth_value=0.0,
force=False, force=False,
lowercase=False, lowercase=False,
tokenize="intl", tokenize="intl",
use_effective_order=False, use_effective_order=False,
).score ).score
return score return score
def rouge(refs, preds): def rouge(refs, preds):
""" """
...@@ -169,6 +170,7 @@ def rouge(refs, preds): ...@@ -169,6 +170,7 @@ def rouge(refs, preds):
result = aggregator.aggregate() result = aggregator.aggregate()
return {type: result[type].mid.fmeasure * 100 for type in rouge_types} return {type: result[type].mid.fmeasure * 100 for type in rouge_types}
# def bleurt_max(predictions, references): # def bleurt_max(predictions, references):
# pass # pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment