Commit 08218829 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into t5v2-alt-plus

parents 51afaca2 a97fde23
"dataset_name": "basic_ancient_chinese"
"description": "以下是关于古汉语知识的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_basic_ancient_chinese"
"dataset_name": "couplet_prediction"
"description": "以下是关于对联的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_couplet_prediction"
"dataset_name": "homographic_character_resolution"
"description": "以下是关于通假字的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_homographic_character_resolution"
"dataset_name": "named_entity_recognition"
"description": "以下是关于古汉语命名体识别的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_named_entity_recognition"
"dataset_name": "poetry_appreciate"
"description": "以下是关于古诗词曲鉴赏的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_appreciate"
"dataset_name": "poetry_context_prediction"
"description": "以下是关于古诗词上下句预测的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_context_prediction"
"dataset_name": "poetry_quality_assessment"
"description": "以下是关于古诗词质量评估的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_quality_assessment"
"dataset_name": "poetry_sentiment_analysis"
"description": "以下是关于诗词情感分类的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_poetry_sentiment_analysis"
"dataset_name": "polysemy_resolution"
"description": "以下是关于古文单字多义的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_polysemy_resolution"
"dataset_name": "reading_comprehension"
"description": "以下是关于古文阅读理解的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_reading_comprehension"
"dataset_name": "sentence_segmentation"
"description": "以下是关于古文断句的单项选择题,请直接给出正确答案的选项。\n\n"
"include": "_default_template_yaml"
"task": "aclue_sentence_segmentation"
...@@ -7,7 +7,7 @@ import os ...@@ -7,7 +7,7 @@ import os
import yaml import yaml
from tqdm import tqdm from tqdm import tqdm
from lm_eval.logger import eval_logger from lm_eval.utils import eval_logger
SUBJECTS = { SUBJECTS = {
......
...@@ -2,12 +2,14 @@ ...@@ -2,12 +2,14 @@
Take in a YAML, and output all "other" splits with this YAML Take in a YAML, and output all "other" splits with this YAML
""" """
import argparse import argparse
import logging
import os import os
import yaml import yaml
from tqdm import tqdm from tqdm import tqdm
from lm_eval.logger import eval_logger
eval_logger = logging.getLogger("lm-eval")
SUBJECTS = { SUBJECTS = {
......
...@@ -3,7 +3,7 @@ dataset_path: EleutherAI/advanced_ai_risk ...@@ -3,7 +3,7 @@ dataset_path: EleutherAI/advanced_ai_risk
output_type: multiple_choice output_type: multiple_choice
validation_split: validation validation_split: validation
target_delimiter: "" target_delimiter: ""
doc_to_text: "Human: {question}\n\nAssistant:" doc_to_text: "Human: {{question}}\n\nAssistant:"
doc_to_target: 0 doc_to_target: 0
doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}" doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
should_decontaminate: true should_decontaminate: true
......
...@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" ...@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "lm_eval" name = "lm_eval"
version = "0.4.1" version = "0.4.2"
authors = [ authors = [
{name="EleutherAI", email="contact@eleuther.ai"} {name="EleutherAI", email="contact@eleuther.ai"}
] ]
......
...@@ -25,8 +25,8 @@ class TEST_VLLM: ...@@ -25,8 +25,8 @@ class TEST_VLLM:
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1) multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
MULTIPLE_CH: List[Instance] = multiple_choice_task.instances MULTIPLE_CH: List[Instance] = multiple_choice_task.instances
generate_until_task = task_list["gsm8k"] # type: ignore generate_until_task = task_list["gsm8k"] # type: ignore
generate_until_task.build_all_requests(limit=10, rank=0, world_size=1)
generate_until_task._config.generation_kwargs["max_gen_toks"] = 10 generate_until_task._config.generation_kwargs["max_gen_toks"] = 10
generate_until_task.build_all_requests(limit=10, rank=0, world_size=1)
generate_until: List[Instance] = generate_until_task.instances generate_until: List[Instance] = generate_until_task.instances
rolling_task = task_list["wikitext"] # type: ignore rolling_task = task_list["wikitext"] # type: ignore
rolling_task.build_all_requests(limit=10, rank=0, world_size=1) rolling_task.build_all_requests(limit=10, rank=0, world_size=1)
......
import argparse
import pytest
import lm_eval.__main__
def test_cli_parse_error():
"""
Assert error raised if cli args argument doesn't have type
"""
with pytest.raises(ValueError):
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"--model", "-m", type=str, default="hf", help="Name of model e.g. `hf`"
)
parser.add_argument(
"--tasks",
"-t",
default=None,
metavar="task1,task2",
help="To get full list of tasks, use the command lm-eval --tasks list",
)
lm_eval.__main__.check_argument_types(parser)
def test_cli_parse_no_error():
"""
Assert typed arguments are parsed correctly
"""
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"--model", "-m", type=str, default="hf", help="Name of model e.g. `hf`"
)
parser.add_argument(
"--tasks",
"-t",
type=str,
default=None,
metavar="task1,task2",
help="To get full list of tasks, use the command lm-eval --tasks list",
)
lm_eval.__main__.check_argument_types(parser)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment