Unverified Commit aa457edc authored by Chenjie Luo's avatar Chenjie Luo Committed by GitHub
Browse files

Fix float limit override (#2325)

* Fix float limit override

See: https://github.com/EleutherAI/lm-evaluation-harness/issues/2324

The float limit will be override with the previous int limit of multiple tasks are triggered together.

This PR fix this issue

* Update evaluator.py

* Update evaluator.py
parent fe3040f1
...@@ -433,10 +433,14 @@ def evaluate( ...@@ -433,10 +433,14 @@ def evaluate(
) )
# end multimodality validation check # end multimodality validation check
# Cache the limit arg.
limit_arg = limit
limits = []
for task_output in eval_tasks: for task_output in eval_tasks:
task: Task = task_output.task task: Task = task_output.task
limit = get_sample_size(task, limit) limit = get_sample_size(task, limit_arg)
limits.append(limit)
task.build_all_requests( task.build_all_requests(
limit=limit, limit=limit,
rank=lm.rank, rank=lm.rank,
...@@ -506,7 +510,7 @@ def evaluate( ...@@ -506,7 +510,7 @@ def evaluate(
WORLD_SIZE = lm.world_size WORLD_SIZE = lm.world_size
### Postprocess outputs ### ### Postprocess outputs ###
# TODO: del model here, maybe (idea: allow user to specify device of e.g. reward model separately) # TODO: del model here, maybe (idea: allow user to specify device of e.g. reward model separately)
for task_output in eval_tasks: for task_output, limit in zip(eval_tasks, limits):
task = task_output.task task = task_output.task
task.apply_filters() task.apply_filters()
...@@ -655,7 +659,7 @@ def evaluate( ...@@ -655,7 +659,7 @@ def evaluate(
len(task_output.task.eval_docs), len(task_output.task.eval_docs),
), ),
} }
for task_output in eval_tasks for task_output, limit in zip(eval_tasks, limits)
}, },
} }
if log_samples: if log_samples:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment