Commit 08218829 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into t5v2-alt-plus

parents 51afaca2 a97fde23
"dataset_name": "basic_ancient_chinese"
"description": "以下是关于古汉语知识的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_basic_ancient_chinese"
"dataset_name": "couplet_prediction"
"description": "以下是关于对联的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_couplet_prediction"
"dataset_name": "homographic_character_resolution"
"description": "以下是关于通假字的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_homographic_character_resolution"
"dataset_name": "named_entity_recognition"
"description": "以下是关于古汉语命名体识别的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_named_entity_recognition"
"dataset_name": "poetry_appreciate"
"description": "以下是关于古诗词曲鉴赏的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_appreciate"
"dataset_name": "poetry_context_prediction"
"description": "以下是关于古诗词上下句预测的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_context_prediction"
"dataset_name": "poetry_quality_assessment"
"description": "以下是关于古诗词质量评估的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_quality_assessment"
"dataset_name": "poetry_sentiment_analysis"
"description": "以下是关于诗词情感分类的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_sentiment_analysis"
"dataset_name": "polysemy_resolution"
"description": "以下是关于古文单字多义的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_polysemy_resolution"
"dataset_name": "reading_comprehension"
"description": "以下是关于古文阅读理解的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_reading_comprehension"
"dataset_name": "sentence_segmentation"
"description": "以下是关于古文断句的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_sentence_segmentation"
......@@ -7,7 +7,7 @@ import os
import yaml
from tqdm import tqdm
from lm_eval.logger import eval_logger
from lm_eval.utils import eval_logger
SUBJECTS = {
......
......@@ -2,12 +2,14 @@
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
import yaml
from tqdm import tqdm
from lm_eval.logger import eval_logger
eval_logger = logging.getLogger("lm-eval")
SUBJECTS = {
......
......@@ -3,7 +3,7 @@ dataset_path: EleutherAI/advanced_ai_risk
output_type: multiple_choice
validation_split: validation
target_delimiter: ""
doc_to_text: "Human: {question}\n\nAssistant:"
doc_to_text: "Human: {{question}}\n\nAssistant:"
doc_to_target: 0
doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
should_decontaminate: true
......
......@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "lm_eval"
version = "0.4.1"
version = "0.4.2"
authors = [
{name="EleutherAI", email="contact@eleuther.ai"}
]
......
......@@ -25,8 +25,8 @@ class TEST_VLLM:
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
MULTIPLE_CH: List[Instance] = multiple_choice_task.instances
generate_until_task = task_list["gsm8k"] # type: ignore
generate_until_task.build_all_requests(limit=10, rank=0, world_size=1)
generate_until_task._config.generation_kwargs["max_gen_toks"] = 10
generate_until_task.build_all_requests(limit=10, rank=0, world_size=1)
generate_until: List[Instance] = generate_until_task.instances
rolling_task = task_list["wikitext"] # type: ignore
rolling_task.build_all_requests(limit=10, rank=0, world_size=1)
......
import argparse
import pytest
import lm_eval.__main__
def test_cli_parse_error():
"""
Assert error raised if cli args argument doesn't have type
"""
with pytest.raises(ValueError):
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"--model", "-m", type=str, default="hf", help="Name of model e.g. `hf`"
)
parser.add_argument(
"--tasks",
"-t",
default=None,
metavar="task1,task2",
help="To get full list of tasks, use the command lm-eval --tasks list",
)
lm_eval.__main__.check_argument_types(parser)
def test_cli_parse_no_error():
"""
Assert typed arguments are parsed correctly
"""
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"--model", "-m", type=str, default="hf", help="Name of model e.g. `hf`"
)
parser.add_argument(
"--tasks",
"-t",
type=str,
default=None,
metavar="task1,task2",
help="To get full list of tasks, use the command lm-eval --tasks list",
)
lm_eval.__main__.check_argument_types(parser)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment