Commit 2d843472 authored by jon-tow's avatar jon-tow
Browse files

feat: temp remove bootstrap calc

parent 83dbfbf6
...@@ -284,15 +284,12 @@ def evaluate( ...@@ -284,15 +284,12 @@ def evaluate(
# hotfix: bleu, chrf, ter seem to be really expensive to bootstrap # hotfix: bleu, chrf, ter seem to be really expensive to bootstrap
# so we run them less iterations. still looking for a cleaner way to do this # so we run them less iterations. still looking for a cleaner way to do this
stderr = lm_eval.metrics.stderr_for_metric( # stderr = lm_eval.metrics.stderr_for_metric(
metric=task.aggregation()[real_metric], # metric=task.aggregation()[real_metric],
bootstrap_iters=min(bootstrap_iters, 1000) # bootstrap_iters=min(bootstrap_iters, 1000)
if metric in ["bleu", "chrf", "ter"] # if metric in ["bleu", "chrf", "ter"]
else bootstrap_iters, # else bootstrap_iters,
) # )
if stderr is not None:
results[task_name][metric + "_stderr"] = stderr(items)
return {"results": dict(results), "versions": dict(versions)} return {"results": dict(results), "versions": dict(versions)}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment