feat: temp remove bootstrap calc

2d843472 · jon-tow · 83dbfbf6 · 2d843472
Commit 2d843472 authored May 01, 2023 by jon-tow
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 9 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +6 -9

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -284,15 +284,12 @@ def evaluate(
        # hotfix: bleu, chrf, ter seem to be really expensive to bootstrap
        # so we run them less iterations. still looking for a cleaner way to do this

-        stderr = lm_eval.metrics.stderr_for_metric(
-            metric=task.aggregation()[real_metric],
-            bootstrap_iters=min(bootstrap_iters, 1000)
-            if metric in ["bleu", "chrf", "ter"]
-            else bootstrap_iters,
-        )
-
-        if stderr is not None:
-            results[task_name][metric + "_stderr"] = stderr(items)
+        # stderr = lm_eval.metrics.stderr_for_metric(
+        #     metric=task.aggregation()[real_metric],
+        #     bootstrap_iters=min(bootstrap_iters, 1000)
+        #     if metric in ["bleu", "chrf", "ter"]
+        #     else bootstrap_iters,
+        # )

    return {"results": dict(results), "versions": dict(versions)}