Commit fc329d31 authored by lintangsutawika's avatar lintangsutawika
Browse files

update

parent 60ca1a27
...@@ -21,27 +21,27 @@ filter_list: ...@@ -21,27 +21,27 @@ filter_list:
- function: remove_whitespace - function: remove_whitespace
- function: take_first - function: take_first
metric_list: metric_list:
- metric: !function metric.exact - metric: exact
aggregation: mean aggregation: !function utils.exact
higher_is_better: true higher_is_better: true
- metric: !function metric.f1 # - metric: f1
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
- metric: !function metric.HasAns_exact # - metric: HasAns_exact
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
- metric: !function metric.HasAns_f1 # - metric: HasAns_f1
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
- metric: !function metric.NoAns_exact # - metric: NoAns_exact
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
- metric: !function metric.NoAns_f1 # - metric: NoAns_f1
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
- metric: !function metric.best_exact # - metric: best_exact
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
- metric: !function metric.best_f1 # - metric: best_f1
aggregation: mean # aggregation: mean
higher_is_better: true # higher_is_better: true
import evaluate
from functools import partial
def _squad_metric(predictions, references):
squad_metric = evaluate.load("squad_v2")
return squad_metric.compute(predictions=predictions, references=references)
# Exact match (the normalized answer exactly match the gold answer)
def exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("exact", 0)
# The F-score of predicted tokens versus the gold answer
def f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("f1", 0)
# Exact match (the normalized answer exactly match the gold answer)
def HasAns_exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("HasAns_exact", 0)
# The F-score of predicted tokens versus the gold answer
def HasAns_f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("HasAns_f1", 0)
# Exact match (the normalized answer exactly match the gold answer)
def NoAns_exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("NoAns_exact", 0)
# The F-score of predicted tokens versus the gold answer
def NoAns_f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("NoAns_f1", 0)
# Best exact match (with varying threshold)
def best_exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("best_exact", 0)
# Best F1 (with varying threshold)
def best_f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("best_f1", 0)
...@@ -20,8 +20,12 @@ def process_results(doc, results): ...@@ -20,8 +20,12 @@ def process_results(doc, results):
"answers": doc["answers"], "answers": doc["answers"],
} }
print(_squad_metric(predictions, references)) return {
return _squad_metric(predictions, references) "predictions": predictions,
"reference": references
}
# return _squad_metric([predictions], [references])
# return {key: value if key in metrics for key, value in score.items()}
def _squad_metric(predictions, references): def _squad_metric(predictions, references):
...@@ -29,33 +33,36 @@ def _squad_metric(predictions, references): ...@@ -29,33 +33,36 @@ def _squad_metric(predictions, references):
return squad_metric.compute(predictions=predictions, references=references) return squad_metric.compute(predictions=predictions, references=references)
# Exact match (the normalized answer exactly match the gold answer) # Exact match (the normalized answer exactly match the gold answer)
def exact(predictions, references): def exact(items):
return _squad_metric(predictions=predictions, references=references).get("exact", 0) print(items)
import sys; sys.exit()
predictions, references = zip(*items)
return _squad_metric(predictions=predictions, references=references)["exact"]
# The F-score of predicted tokens versus the gold answer # The F-score of predicted tokens versus the gold answer
def f1(predictions, references): def f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("f1", 0) return _squad_metric(predictions=predictions, references=references)["f1"]
# Exact match (the normalized answer exactly match the gold answer) # Exact match (the normalized answer exactly match the gold answer)
def HasAns_exact(predictions, references): def HasAns_exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("HasAns_exact", 0) return _squad_metric(predictions=predictions, references=references)["HasAns_exact"]
# The F-score of predicted tokens versus the gold answer # The F-score of predicted tokens versus the gold answer
def HasAns_f1(predictions, references): def HasAns_f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("HasAns_f1", 0) return _squad_metric(predictions=predictions, references=references)["HasAns_f1"]
# Exact match (the normalized answer exactly match the gold answer) # Exact match (the normalized answer exactly match the gold answer)
def NoAns_exact(predictions, references): def NoAns_exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("NoAns_exact", 0) return _squad_metric(predictions=predictions, references=references)["NoAns_exact"]
# The F-score of predicted tokens versus the gold answer # The F-score of predicted tokens versus the gold answer
def NoAns_f1(predictions, references): def NoAns_f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("NoAns_f1", 0) return _squad_metric(predictions=predictions, references=references)["NoAns_f1"]
# Best exact match (with varying threshold) # Best exact match (with varying threshold)
def best_exact(predictions, references): def best_exact(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("best_exact", 0) return _squad_metric(predictions=predictions, references=references)["best_exact"]
# Best F1 (with varying threshold) # Best F1 (with varying threshold)
def best_f1(predictions, references): def best_f1(predictions, references):
return _squad_metric(predictions=predictions, references=references).get("best_f1", 0) return _squad_metric(predictions=predictions, references=references)["best_f1"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment