Commit 10dd7d38 authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Make `evaluate` and `simple_evaluate` description args consistent

parent d1319950
...@@ -12,7 +12,7 @@ import numpy as np ...@@ -12,7 +12,7 @@ import numpy as np
def simple_evaluate(model, model_args, task_names, def simple_evaluate(model, model_args, task_names,
num_fewshot=0, batch_size=None, device=None, num_fewshot=0, batch_size=None, device=None,
no_cache=False, limit=None, bootstrap_iters=100000, no_cache=False, limit=None, bootstrap_iters=100000,
description_dict_path=None): description_dict=None):
"""Instantiate and evaluate a model on a list of tasks. """Instantiate and evaluate a model on a list of tasks.
:param model: str :param model: str
...@@ -33,8 +33,8 @@ def simple_evaluate(model, model_args, task_names, ...@@ -33,8 +33,8 @@ def simple_evaluate(model, model_args, task_names,
Limit the number of examples per task (only use this for testing) Limit the number of examples per task (only use this for testing)
:param bootstrap_iters: :param bootstrap_iters:
Number of iterations for bootstrap statistics Number of iterations for bootstrap statistics
:param description_dict_path: :param description_dict:
Path to a JSON file containing `task_name: description` key-values for custom prompts Dictionary of custom task descriptions of the form: `task_name: description`
:return :return
Dictionary of results Dictionary of results
""" """
...@@ -52,11 +52,6 @@ def simple_evaluate(model, model_args, task_names, ...@@ -52,11 +52,6 @@ def simple_evaluate(model, model_args, task_names,
task_dict = lm_eval.tasks.get_task_dict(task_names) task_dict = lm_eval.tasks.get_task_dict(task_names)
description_dict = {}
if description_dict_path:
with open(description_dict_path, 'r') as f:
description_dict = json.load(f)
results = evaluate(lm, task_dict, False, num_fewshot, limit, description_dict=description_dict) results = evaluate(lm, task_dict, False, num_fewshot, limit, description_dict=description_dict)
# add info about the model and few shot config # add info about the model and few shot config
...@@ -90,7 +85,7 @@ def evaluate(lm, task_dict, provide_description, num_fewshot, limit, bootstrap_i ...@@ -90,7 +85,7 @@ def evaluate(lm, task_dict, provide_description, num_fewshot, limit, bootstrap_i
:param bootstrap_iters: :param bootstrap_iters:
Number of iterations for bootstrap statistics Number of iterations for bootstrap statistics
:param description_dict: :param description_dict:
Dictionary of task descriptions of the form: `task_name: description` Dictionary of custom task descriptions of the form: `task_name: description`
:return :return
Dictionary of results Dictionary of results
""" """
......
...@@ -35,6 +35,11 @@ def main(): ...@@ -35,6 +35,11 @@ def main():
else: else:
task_names = args.tasks.split(",") task_names = args.tasks.split(",")
description_dict = {}
if args.description_dict_path:
with open(args.description_dict_path, 'r') as f:
description_dict = json.load(f)
results = evaluator.simple_evaluate( results = evaluator.simple_evaluate(
model=args.model, model=args.model,
model_args=args.model_args, model_args=args.model_args,
...@@ -44,6 +49,7 @@ def main(): ...@@ -44,6 +49,7 @@ def main():
device=args.device, device=args.device,
no_cache=args.no_cache, no_cache=args.no_cache,
limit=args.limit, limit=args.limit,
description_dict=description_dict
) )
dumped = json.dumps(results, indent=2) dumped = json.dumps(results, indent=2)
......
...@@ -159,7 +159,7 @@ description_dict = { ...@@ -159,7 +159,7 @@ description_dict = {
} }
``` ```
One can also interface with `evaluator.evaluate` from a higher level by simply passing a JSON file path to the `description_dict_path` arg of the command-line interface program, `main.py`. The JSON file pointed to should be structured the same way as the aforementioned `description_dict`. E.g. for some file at `/your/path/descriptions.json` you might have: One can also interface with `evaluator.evaluate`/`evaluator.simple_evaluate` from a higher level by simply passing a JSON file path to the `description_dict_path` arg of the command-line interface (CLI) programs, `main.py` and `write_out.py` . The JSON file pointed to should be structured the same way as the aforementioned `description_dict`. E.g. for some file at `/your/path/descriptions.json` you might have:
```json ```json
{ {
...@@ -168,7 +168,7 @@ One can also interface with `evaluator.evaluate` from a higher level by simply p ...@@ -168,7 +168,7 @@ One can also interface with `evaluator.evaluate` from a higher level by simply p
} }
``` ```
which can then be hooked up to the evaluator through the `main.py` CLI as: which can then be used, for example, in the `main.py` CLI as:
```python ```python
python main.py \ python main.py \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment