Commit 10dd7d38 authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Make `evaluate` and `simple_evaluate` description args consistent

parent d1319950
......@@ -12,7 +12,7 @@ import numpy as np
def simple_evaluate(model, model_args, task_names,
num_fewshot=0, batch_size=None, device=None,
no_cache=False, limit=None, bootstrap_iters=100000,
description_dict_path=None):
description_dict=None):
"""Instantiate and evaluate a model on a list of tasks.
:param model: str
......@@ -33,8 +33,8 @@ def simple_evaluate(model, model_args, task_names,
Limit the number of examples per task (only use this for testing)
:param bootstrap_iters:
Number of iterations for bootstrap statistics
:param description_dict_path:
Path to a JSON file containing `task_name: description` key-values for custom prompts
:param description_dict:
Dictionary of custom task descriptions of the form: `task_name: description`
:return
Dictionary of results
"""
......@@ -52,11 +52,6 @@ def simple_evaluate(model, model_args, task_names,
task_dict = lm_eval.tasks.get_task_dict(task_names)
description_dict = {}
if description_dict_path:
with open(description_dict_path, 'r') as f:
description_dict = json.load(f)
results = evaluate(lm, task_dict, False, num_fewshot, limit, description_dict=description_dict)
# add info about the model and few shot config
......@@ -90,7 +85,7 @@ def evaluate(lm, task_dict, provide_description, num_fewshot, limit, bootstrap_i
:param bootstrap_iters:
Number of iterations for bootstrap statistics
:param description_dict:
Dictionary of task descriptions of the form: `task_name: description`
Dictionary of custom task descriptions of the form: `task_name: description`
:return
Dictionary of results
"""
......
......@@ -35,6 +35,11 @@ def main():
else:
task_names = args.tasks.split(",")
description_dict = {}
if args.description_dict_path:
with open(args.description_dict_path, 'r') as f:
description_dict = json.load(f)
results = evaluator.simple_evaluate(
model=args.model,
model_args=args.model_args,
......@@ -44,6 +49,7 @@ def main():
device=args.device,
no_cache=args.no_cache,
limit=args.limit,
description_dict=description_dict
)
dumped = json.dumps(results, indent=2)
......
......@@ -159,7 +159,7 @@ description_dict = {
}
```
One can also interface with `evaluator.evaluate` from a higher level by simply passing a JSON file path to the `description_dict_path` arg of the command-line interface program, `main.py`. The JSON file pointed to should be structured the same way as the aforementioned `description_dict`. E.g. for some file at `/your/path/descriptions.json` you might have:
One can also interface with `evaluator.evaluate`/`evaluator.simple_evaluate` from a higher level by simply passing a JSON file path to the `description_dict_path` arg of the command-line interface (CLI) programs, `main.py` and `write_out.py` . The JSON file pointed to should be structured the same way as the aforementioned `description_dict`. E.g. for some file at `/your/path/descriptions.json` you might have:
```json
{
......@@ -168,7 +168,7 @@ One can also interface with `evaluator.evaluate` from a higher level by simply p
}
```
which can then be hooked up to the evaluator through the `main.py` CLI as:
which can then be used, for example, in the `main.py` CLI as:
```python
python main.py \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment