Commit 24ba70a3 authored by Nathan Habib's avatar Nathan Habib
Browse files

cleanup

parent dfea19e8
...@@ -22,7 +22,7 @@ from lm_eval.evaluator_utils import ( ...@@ -22,7 +22,7 @@ from lm_eval.evaluator_utils import (
run_task_tests, run_task_tests,
) )
from lm_eval.loggers import EvaluationTracker from lm_eval.loggers import EvaluationTracker
from lm_eval.loggers.utils import add_env_info, get_git_commit_hash from lm_eval.loggers.utils import add_env_info, add_tokenizer_info, get_git_commit_hash
from lm_eval.tasks import TaskManager, get_task_dict from lm_eval.tasks import TaskManager, get_task_dict
from lm_eval.utils import ( from lm_eval.utils import (
eval_logger, eval_logger,
...@@ -271,6 +271,7 @@ def simple_evaluate( ...@@ -271,6 +271,7 @@ def simple_evaluate(
model_args=model_args, model_args=model_args,
system_instruction=system_instruction, system_instruction=system_instruction,
chat_template=lm.chat_template if apply_chat_template else None, chat_template=lm.chat_template if apply_chat_template else None,
fewshot_as_multiturn=fewshot_as_multiturn,
) )
results = evaluate( results = evaluate(
...@@ -325,6 +326,7 @@ def simple_evaluate( ...@@ -325,6 +326,7 @@ def simple_evaluate(
results["git_hash"] = get_git_commit_hash() results["git_hash"] = get_git_commit_hash()
results["date"] = start_date results["date"] = start_date
add_env_info(results) # additional environment info to results add_env_info(results) # additional environment info to results
add_tokenizer_info(results, lm) # additional info about tokenizer
return results return results
else: else:
return None return None
...@@ -607,16 +609,16 @@ def evaluate( ...@@ -607,16 +609,16 @@ def evaluate(
] ]
# compute group's pooled metric and stderr # compute group's pooled metric and stderr
results[group][ results[group][metric] = (
metric lm_eval.api.metrics.aggregate_subtask_metrics(metrics, sizes)
] = lm_eval.api.metrics.aggregate_subtask_metrics(metrics, sizes) )
# TODO: calculate grouped metric using aggregation fn # TODO: calculate grouped metric using aggregation fn
if "N/A" in stderrs: if "N/A" in stderrs:
results[group][stderr] = "N/A" results[group][stderr] = "N/A"
else: else:
results[group][ results[group][stderr] = (
stderr lm_eval.api.metrics.pooled_sample_stderr(stderrs, sizes)
] = lm_eval.api.metrics.pooled_sample_stderr(stderrs, sizes) )
# TODO: allow GroupConfigs to choose which variance formula is used, for back-compatibility # TODO: allow GroupConfigs to choose which variance formula is used, for back-compatibility
# To use the old (likely incorrect) variance formula, comment out the above and uncomment this line: # To use the old (likely incorrect) variance formula, comment out the above and uncomment this line:
# results[group][stderr] = lm_eval.api.metrics.combined_sample_stderr(stderrs, sizes, metrics=metrics) # results[group][stderr] = lm_eval.api.metrics.combined_sample_stderr(stderrs, sizes, metrics=metrics)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment