Fix float limit override (#2325)

* Fix float limit override See: https://github.com/EleutherAI/lm-evaluation-harness/issues/2324 The float limit will be override with the previous int limit of multiple tasks are triggered together. This PR fix this issue * Update evaluator.py * Update evaluator.py

Fix float limit override (#2325)
* Fix float limit override See: https://github.com/EleutherAI/lm-evaluation-harness/issues/2324 The float limit will be override with the previous int limit of multiple tasks are triggered together. This PR fix this issue * Update evaluator.py * Update evaluator.py
aa457edc · Chenjie Luo · GitHub · fe3040f1 · aa457edc
Unverified Commit aa457edc authored Oct 07, 2024 by Chenjie Luo Committed by GitHub Oct 07, 2024
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 3 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +7 -3

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -433,10 +433,14 @@ def evaluate(
            )
    # end multimodality validation check
+    # Cache the limit arg.
+    limit_arg = limit
+    limits = []
    for task_output in eval_tasks:
        task: Task = task_output.task
-        limit = get_sample_size(task, limit)
+        limit = get_sample_size(task, limit_arg)
+        limits.append(limit)
        task.build_all_requests(
            limit=limit,
            rank=lm.rank,
@@ -506,7 +510,7 @@ def evaluate(
    WORLD_SIZE = lm.world_size
    ### Postprocess outputs ###
    # TODO: del model here, maybe (idea: allow user to specify device of e.g. reward model separately)
-    for task_output in eval_tasks:
+    for task_output, limit in zip(eval_tasks, limits):
        task = task_output.task
        task.apply_filters()
@@ -655,7 +659,7 @@ def evaluate(
                        len(task_output.task.eval_docs),
                    ),
                }
-                for task_output in eval_tasks
+                for task_output, limit in zip(eval_tasks, limits)
            },
        }
        if log_samples: