Commit e5161a6d authored by lintangsutawika's avatar lintangsutawika
Browse files

changes

parent 57f08e40
......@@ -61,21 +61,30 @@ def include_benchmarks(task_dir, benchmark_dir="benchmarks"):
if (subdirs == [] or subdirs == ["__pycache__"]) and (len(file_list) > 0):
for f in file_list:
if f.endswith(".yaml"):
benchmark_path = os.path.join(root, f)
with open(benchmark_path, "rb") as file:
yaml_config = yaml.full_load(file)
assert "group" in yaml_config
group = yaml_config["group"]
task_list = yaml_config["task"]
task_names = utils.pattern_match(task_list, ALL_TASKS)
for task in task_names:
if task in TASK_REGISTRY:
if group in GROUP_REGISTRY:
GROUP_REGISTRY[group].append(task)
else:
GROUP_REGISTRY[group] = [task]
try:
benchmark_path = os.path.join(root, f)
with open(benchmark_path, "rb") as file:
yaml_config = yaml.full_load(file)
assert "group" in yaml_config
group = yaml_config["group"]
task_list = yaml_config["task"]
task_names = utils.pattern_match(task_list, ALL_TASKS)
for task in task_names:
if task in TASK_REGISTRY:
if group in GROUP_REGISTRY:
GROUP_REGISTRY[group].append(task)
else:
GROUP_REGISTRY[group] = [task]
ALL_TASKS.add(group)
except Exception as error:
eval_logger.warning(
"Failed to load benchmark in\n"
f" {benchmark_path}\n"
" Benchmark will not be added to registry\n"
f" Error: {error}"
)
task_dir = os.path.dirname(os.path.abspath(__file__)) + "/"
......
......@@ -8,6 +8,6 @@ task:
- winogrande
- arc_challenge
- arc_easy
- logiqa
- blimp_*
- hendrycksTest*
# - logiqa
# - blimp_*
# - hendrycksTest*
group:
- super-glue-lm-eval-v1
task: winogrande
dataset_path: winogrande
dataset_name: winogrande_xl
output_type: multiple_choice
should_decontaminate: true
doc_to_decontamination_query: "{{sentence}}"
training_split: train
validation_split: validation
doc_to_text: !function preprocess_winogrande.doc_to_text
doc_to_target: !function preprocess_winogrande.doc_to_target
doc_to_choice: !function preprocess_winogrande.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: sentence
metric_list:
- metric: exact_match
- metric: acc
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
import re
from lm_eval.utils import general_detokenize
def partial_context(doc, option):
# Substitute the pronoun in the sentence with the specified option
# and ignore everything after.
pronoun_loc = doc["sentence"].index("_")
return doc["sentence"][:pronoun_loc] + option
def partial_target(doc):
# The target is everything after the document specified pronoun.
pronoun_loc = doc["sentence"].index("_") + 1
return " " + doc["sentence"][pronoun_loc:].strip()
task: winogrande
dataset_path: winogrande
dataset_name: winogrande_xl
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: !function preprocess_winogrande.doc_to_text
doc_to_target: !function preprocess_winogrande.doc_to_target
doc_to_choice: !function preprocess_winogrande.doc_to_choice
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment