Commit 17172a26 authored by lintangsutawika's avatar lintangsutawika
Browse files

temp save

parent 81b8e670
...@@ -38,6 +38,7 @@ def simple_evaluate( ...@@ -38,6 +38,7 @@ def simple_evaluate(
write_out: bool = False, write_out: bool = False,
log_samples: bool = True, log_samples: bool = True,
gen_kwargs: str = None, gen_kwargs: str = None,
weight_by_size: bool = False,
): ):
"""Instantiate and evaluate a model on a list of tasks. """Instantiate and evaluate a model on a list of tasks.
...@@ -155,6 +156,7 @@ def simple_evaluate( ...@@ -155,6 +156,7 @@ def simple_evaluate(
decontamination_ngrams_path=decontamination_ngrams_path, decontamination_ngrams_path=decontamination_ngrams_path,
write_out=write_out, write_out=write_out,
log_samples=log_samples, log_samples=log_samples,
weight_by_size=weight_by_size,
) )
if lm.rank == 0: if lm.rank == 0:
...@@ -192,6 +194,7 @@ def evaluate( ...@@ -192,6 +194,7 @@ def evaluate(
decontamination_ngrams_path=None, decontamination_ngrams_path=None,
write_out: bool = False, write_out: bool = False,
log_samples: bool = True, log_samples: bool = True,
weight_by_size: bool = False,
): ):
"""Instantiate and evaluate a model on a list of tasks. """Instantiate and evaluate a model on a list of tasks.
...@@ -474,17 +477,22 @@ def evaluate( ...@@ -474,17 +477,22 @@ def evaluate(
total_size = 0 total_size = 0
for task in task_list: for task in task_list:
print("###")
print(task)
print(metrics)
print("###")
metrics = results[task].copy() metrics = results[task].copy()
if "alias" in metrics: if "alias" in metrics:
metrics.pop("alias") metrics.pop("alias")
current_size = metrics.pop("samples")
# TODO: There should be a way for users # TODO: There should be a way for users
# to toggle between weighted and # to toggle between weighted and
# unweighted averaging # unweighted averaging
# For unweighted averaging, use: if weight_by_size:
# current_size = 1 current_size = metrics.pop("samples")
else:
current_size = 1
all_stderr = [] all_stderr = []
for metric in [ for metric in [
......
...@@ -4,9 +4,10 @@ task: ...@@ -4,9 +4,10 @@ task:
- group: arc_stuff - group: arc_stuff
task: task:
- arc_challenge - arc_challenge
- glue
- task: arc_easy - task: arc_easy
metric_list: metric_list:
- metric: acc - metric: acc
num_fewshot: 3 num_fewshot: 3
# - task: mmlu_stem # - task: mmlu
# num_fewshot: 2 # num_fewshot: 2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment