Commit d86aabc4 authored by Leo Gao's avatar Leo Gao
Browse files

more changes

parent 7b2b2a23
...@@ -22,7 +22,7 @@ def simple_evaluate(model, model_args=None, tasks=[], ...@@ -22,7 +22,7 @@ def simple_evaluate(model, model_args=None, tasks=[],
String arguments for each model class, see LM.create_from_arg_string. String arguments for each model class, see LM.create_from_arg_string.
Ignored if `model` argument is a LM object. Ignored if `model` argument is a LM object.
:param tasks: list[Union[str, Task]] :param tasks: list[Union[str, Task]]
List of task names or Task objects List of task names or Task objects. Task objects will be taken to have name task.EVAL_HARNESS_NAME if defined and type(task).__name__ otherwise.
:param num_fewshot: int :param num_fewshot: int
Number of examples in few-shot context Number of examples in few-shot context
:param batch_size: int, optional :param batch_size: int, optional
...@@ -64,7 +64,6 @@ def simple_evaluate(model, model_args=None, tasks=[], ...@@ -64,7 +64,6 @@ def simple_evaluate(model, model_args=None, tasks=[],
results = evaluate( results = evaluate(
lm=lm, lm=lm,
task_dict=task_dict, task_dict=task_dict,
provide_description=False,
num_fewshot=num_fewshot, num_fewshot=num_fewshot,
limit=limit, limit=limit,
description_dict=description_dict description_dict=description_dict
...@@ -87,13 +86,13 @@ def simple_evaluate(model, model_args=None, tasks=[], ...@@ -87,13 +86,13 @@ def simple_evaluate(model, model_args=None, tasks=[],
@positional_deprecated @positional_deprecated
def evaluate(lm, task_dict, provide_description, num_fewshot, limit, bootstrap_iters=100000, description_dict=None): def evaluate(lm, task_dict, provide_description=None, num_fewshot=0, limit=None, bootstrap_iters=100000, description_dict=None):
"""Instantiate and evaluate a model on a list of tasks. """Instantiate and evaluate a model on a list of tasks.
:param lm: obj :param lm: obj
Language Model Language Model
:param task_dict: dict[str, Task] :param task_dict: dict[str, Task]
Dictionary of tasks Dictionary of tasks. Tasks will be taken to have name task.EVAL_HARNESS_NAME if defined and type(task).__name__ otherwise.
:param provide_description: bool :param provide_description: bool
Not implemented, and this option is deprecated and will be removed in a future version in favor of a different description providing method Not implemented, and this option is deprecated and will be removed in a future version in favor of a different description providing method
:param num_fewshot: int :param num_fewshot: int
...@@ -111,6 +110,9 @@ def evaluate(lm, task_dict, provide_description, num_fewshot, limit, bootstrap_i ...@@ -111,6 +110,9 @@ def evaluate(lm, task_dict, provide_description, num_fewshot, limit, bootstrap_i
# TODO: todo: implement proper description-providing system # TODO: todo: implement proper description-providing system
assert not provide_description # not implemented. assert not provide_description # not implemented.
if provide_description is not None:
# nudge people to not specify it at all
print("WARNING: provide_description is deprecated and will be removed in a future version in favor of description_dict")
task_dict_items = [ task_dict_items = [
(name, task) (name, task)
......
...@@ -240,7 +240,7 @@ def get_task_name_from_object(task_object): ...@@ -240,7 +240,7 @@ def get_task_name_from_object(task_object):
return name return name
# this gives a mechanism for non-registered tasks to have a custom name anyways when reporting # this gives a mechanism for non-registered tasks to have a custom name anyways when reporting
return task_object.EVAL_HARNESS_NAME if hasattr(task_object, "EVAL_HARNESS_NAME") else task_object.__name__ return task_object.EVAL_HARNESS_NAME if hasattr(task_object, "EVAL_HARNESS_NAME") else type(task_object).__name__
def get_task_dict(task_name_list: List[Union[str, lm_eval.base.Task]]): def get_task_dict(task_name_list: List[Union[str, lm_eval.base.Task]]):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment