Commit 96dfe976 authored by lintangsutawika's avatar lintangsutawika
Browse files

pre-commit format

parent 3d1b8f43
...@@ -71,12 +71,12 @@ class GroupConfig(dict): ...@@ -71,12 +71,12 @@ class GroupConfig(dict):
group_alias: Optional[str] = None group_alias: Optional[str] = None
task: Optional[Union[str, list]] = None task: Optional[Union[str, list]] = None
tag_to_task: Optional[str] = False tag_to_task: Optional[str] = False
aggregate_metric: Optional[ aggregate_metric: Optional[Union[List[AggMetricConfig], AggMetricConfig, dict]] = (
Union[List[AggMetricConfig], AggMetricConfig, dict] None
] = None )
metadata: Optional[ metadata: Optional[dict] = (
dict None # by default, not used in the code. allows for users to pass arbitrary info to tasks
] = None # by default, not used in the code. allows for users to pass arbitrary info to tasks )
def __getitem__(self, item): def __getitem__(self, item):
return getattr(self, item) return getattr(self, item)
......
...@@ -688,10 +688,10 @@ def evaluate( ...@@ -688,10 +688,10 @@ def evaluate(
if "N/A" in stderrs: if "N/A" in stderrs:
results[group_or_task][stderr] = "N/A" results[group_or_task][stderr] = "N/A"
else: else:
results[group_or_task][ results[group_or_task][stderr] = (
stderr lm_eval.api.metrics.pooled_sample_stderr(
] = lm_eval.api.metrics.pooled_sample_stderr( stderrs, sizes
stderrs, sizes )
) )
# TODO: allow GroupConfigs to choose which variance formula is used, for back-compatibility # TODO: allow GroupConfigs to choose which variance formula is used, for back-compatibility
# To use the old (likely incorrect) variance formula, comment out the above and uncomment this line: # To use the old (likely incorrect) variance formula, comment out the above and uncomment this line:
......
...@@ -344,9 +344,9 @@ def consolidate_results( ...@@ -344,9 +344,9 @@ def consolidate_results(
metric_key metric_key
] ]
results[task_output.task_id]["samples"] = task_output.sample_len results[task_output.task_id]["samples"] = task_output.sample_len
results[task_output.task_id][ results[task_output.task_id][f"{metric}_stderr,{filter_key}"] = (
f"{metric}_stderr,{filter_key}" task_output.agg_metrics[f"{metric}_stderr,{filter_key}"]
] = task_output.agg_metrics[f"{metric}_stderr,{filter_key}"] )
return results, samples, configs, versions, num_fewshot, higher_is_better return results, samples, configs, versions, num_fewshot, higher_is_better
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment