Commit 1405697f authored by lintangsutawika's avatar lintangsutawika
Browse files

edit

parent c37268bb
...@@ -79,7 +79,6 @@ def simple_evaluate( ...@@ -79,7 +79,6 @@ def simple_evaluate(
results = evaluate( results = evaluate(
lm=lm, lm=lm,
task_dict=task_dict, task_dict=task_dict,
num_fewshot=num_fewshot,
limit=limit, limit=limit,
bootstrap_iters=bootstrap_iters, bootstrap_iters=bootstrap_iters,
decontamination_ngrams_path=decontamination_ngrams_path, decontamination_ngrams_path=decontamination_ngrams_path,
...@@ -108,7 +107,6 @@ decontaminate_suffix = "_decontaminate" ...@@ -108,7 +107,6 @@ decontaminate_suffix = "_decontaminate"
def evaluate( def evaluate(
lm, lm,
task_dict, task_dict,
num_fewshot=0,
limit=None, limit=None,
bootstrap_iters=100000, bootstrap_iters=100000,
decontamination_ngrams_path=None, decontamination_ngrams_path=None,
...@@ -176,7 +174,6 @@ def evaluate( ...@@ -176,7 +174,6 @@ def evaluate(
for task_name, task in task_dict.items(): for task_name, task in task_dict.items():
task.apply_filters() task.apply_filters()
### Collect values of metrics on all datapoints ### ### Collect values of metrics on all datapoints ###
# TODO: make metric configurable, add metric registry # TODO: make metric configurable, add metric registry
vals = collections.defaultdict(list) vals = collections.defaultdict(list)
...@@ -193,8 +190,6 @@ def evaluate( ...@@ -193,8 +190,6 @@ def evaluate(
metrics = task.process_results(doc, [req.filtered_resps[key] for req in requests]) metrics = task.process_results(doc, [req.filtered_resps[key] for req in requests])
for metric, value in metrics.items(): for metric, value in metrics.items():
vals[(task_name, key, metric)].append(value) vals[(task_name, key, metric)].append(value)
### Aggregate results over all datapoints ### ### Aggregate results over all datapoints ###
# aggregate results ; run bootstrap CIs # aggregate results ; run bootstrap CIs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment