Commit 829280e8 authored by lintangsutawika's avatar lintangsutawika
Browse files

adjustments on import origin

parent 9c3f7227
......@@ -5,11 +5,11 @@ from typing import List, Union
from lm_eval import utils
from lm_eval.logger import eval_logger
from lm_eval.api.task import TaskConfig, Task, ConfigurableTask
from lm_eval.api.register import (
from lm_eval.api.registry import (
register_task,
register_group,
task_registry,
group_registry,
TASK_REGISTRY,
GROUP_REGISTRY,
)
......@@ -48,8 +48,6 @@ for root, subdirs, file_list in os.walk(task_dir):
" Config will not be added to registry"
)
TASK_REGISTRY = task_registry
GROUP_REGISTRY = group_registry
ALL_TASKS = sorted(list(TASK_REGISTRY.keys()) + list(GROUP_REGISTRY.keys()))
......
......@@ -16,7 +16,7 @@ from lm_eval import utils
from lm_eval.prompts import get_prompt
from lm_eval.api.task import MultipleChoiceTask
from lm_eval.api.register import register_task, register_group
from lm_eval.api.registry import register_task, register_group
_CITATION = """
@article{Clark2018ThinkYH,
......
......@@ -24,7 +24,7 @@ from lm_eval.api.instance import Instance
from lm_eval.prompts import get_prompt
from lm_eval.api.register import register_task, register_group
from lm_eval.api.registry import register_task, register_group
_CITATION = """
@misc{cobbe2021training,
......
......@@ -16,7 +16,7 @@ from lm_eval.api.task import Task
from lm_eval.api.instance import Instance
from lm_eval.api.metrics import mean, perplexity
from lm_eval.api.register import register_task, register_group
from lm_eval.api.registry import register_task, register_group
_CITATION = """
@misc{
......
......@@ -12,7 +12,7 @@ Homepage: https://pile.eleuther.ai/
from lm_eval.api.task import PerplexityTask
from lm_eval.api.register import register_task, register_group
from lm_eval.api.registry import register_task, register_group
_CITATION = """
@article{pile,
......
group:
- super-glue-lm-eval-v1
task: "default"
dataset_path: super_glue
dataset_name: boolq
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}\nAnswer:"
doc_to_target: "{{label}}" # this will be cast to an int.
template_aliases: "{% set answer_choices = ['no', 'yes'] %}"
import sklearn
import numpy as np
def cb_multi_fi(items):
preds, golds = zip(*items)
preds = np.array(preds)
golds = np.array(golds)
f11 = sklearn.metrics.f1_score(y_true=golds == 0, y_pred=preds == 0)
f12 = sklearn.metrics.f1_score(y_true=golds == 1, y_pred=preds == 1)
f13 = sklearn.metrics.f1_score(y_true=golds == 2, y_pred=preds == 2)
avg_f1 = np.mean([f11, f12, f13])
return avg_f1
group:
- super-glue-lm-eval-v1
task: "default"
dataset_path: super_glue
dataset_name: cb
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: "{{premise}}\nQuestion: {{hypothesis}}. True, False, or Neither?\nAnswer:"
doc_to_target: "{{label}}" # this will be cast to an int.
template_aliases: "{% set answer_choices = ['True', 'False', 'Neither'] %}"
metric_list:
- metric: acc
- metric: f1
aggregation: !function "aggregate.cb_multi_fi"
......@@ -13,7 +13,7 @@ import re
from lm_eval.api.task import PerplexityTask
from lm_eval.api.register import register_task, register_group
from lm_eval.api.registry import register_task, register_group
_CITATION = """
@misc{merity2016pointer,
......
group:
- wikitext_group
task: wikitext_yaml
task: default
dataset_path: EleutherAI/wikitext_document_level
dataset_name: wikitext-2-raw-v1
output_type: loglikelihood_rolling
......@@ -14,11 +14,5 @@ should_decontaminate: true
doc_to_decontamination_query: "{{page}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment