Unverified Commit aa2dd2b5 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Format] Add config lints (#892)

parent 3dbba119
...@@ -12,46 +12,46 @@ mbpp_infer_cfg = dict( ...@@ -12,46 +12,46 @@ mbpp_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个函数,从给定的两个元组列表中查找相似的元素。 你的代码应该通过这些测试:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n" '你是一名专业的 Python 程序员,你的任务是:编写一个函数,从给定的两个元组列表中查找相似的元素。 你的代码应该通过这些测试:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n " "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个 Python 函数来识别一个整数是否不是素数。 你的代码应该通过这些测试:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n" '你是一名专业的 Python 程序员,你的任务是:编写一个 Python 函数来识别一个整数是否不是素数。 你的代码应该通过这些测试:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n " "[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个函数,使用堆队列算法从给定的数字列表中查找最大整数。 你的代码应该通过这些测试:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n" '你是一名专业的 Python 程序员,你的任务是:编写一个函数,使用堆队列算法从给定的数字列表中查找最大整数。 你的代码应该通过这些测试:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n " "[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是: {text} 你的代码应该通过这些测试:\n\n {test_list} \n" '你是一名专业的 Python 程序员,你的任务是: {text} 你的代码应该通过这些测试:\n\n {test_list} \n'
), ),
dict(role="BOT", prompt="[BEGIN]\n"), dict(role='BOT', prompt='[BEGIN]\n'),
], )), ], )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)) inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT") mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role='BOT')
mbpp_cn_datasets = [ mbpp_cn_datasets = [
dict( dict(
......
...@@ -12,46 +12,46 @@ mbpp_infer_cfg = dict( ...@@ -12,46 +12,46 @@ mbpp_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个函数,从给定的两个元组列表中查找相似的元素。 你的代码应该通过这些测试:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n" '你是一名专业的 Python 程序员,你的任务是:编写一个函数,从给定的两个元组列表中查找相似的元素。 你的代码应该通过这些测试:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n " "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个 Python 函数来识别一个整数是否不是素数。 你的代码应该通过这些测试:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n" '你是一名专业的 Python 程序员,你的任务是:编写一个 Python 函数来识别一个整数是否不是素数。 你的代码应该通过这些测试:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n " "[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个函数,使用堆队列算法从给定的数字列表中查找最大整数。 你的代码应该通过这些测试:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n" '你是一名专业的 Python 程序员,你的任务是:编写一个函数,使用堆队列算法从给定的数字列表中查找最大整数。 你的代码应该通过这些测试:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n " "[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是: {text} 你的代码应该通过这些测试:\n\n {test_list} \n" '你是一名专业的 Python 程序员,你的任务是: {text} 你的代码应该通过这些测试:\n\n {test_list} \n'
), ),
dict(role="BOT", prompt="[BEGIN]\n"), dict(role='BOT', prompt='[BEGIN]\n'),
], )), ], )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)) inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT") mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role='BOT')
mbpp_cn_datasets = [ mbpp_cn_datasets = [
dict( dict(
......
from mmengine.config import read_base from mmengine.config import read_base
with read_base(): with read_base():
from .mbpp_cn_gen_9114d5 import mbpp_cn_datasets # noqa: F401, F403 from .mbpp_cn_gen_9114d5 import mbpp_cn_datasets # noqa: F401, F403
\ No newline at end of file
...@@ -12,46 +12,46 @@ mbpp_infer_cfg = dict( ...@@ -12,46 +12,46 @@ mbpp_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个函数,从给定的两个元组列表中查找相似的元素。 你的代码应该通过这些测试:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n" '你是一名专业的 Python 程序员,你的任务是:编写一个函数,从给定的两个元组列表中查找相似的元素。 你的代码应该通过这些测试:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n " "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个 Python 函数来识别一个整数是否不是素数。 你的代码应该通过这些测试:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n" '你是一名专业的 Python 程序员,你的任务是:编写一个 Python 函数来识别一个整数是否不是素数。 你的代码应该通过这些测试:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n " "[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是:编写一个函数,使用堆队列算法从给定的数字列表中查找最大整数。 你的代码应该通过这些测试:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n" '你是一名专业的 Python 程序员,你的任务是:编写一个函数,使用堆队列算法从给定的数字列表中查找最大整数。 你的代码应该通过这些测试:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n " "[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"你是一名专业的 Python 程序员,你的任务是: {text} 你的代码应该通过这些测试:\n\n {test_list} \n" '你是一名专业的 Python 程序员,你的任务是: {text} 你的代码应该通过这些测试:\n\n {test_list} \n'
), ),
dict(role="BOT", prompt="[BEGIN]\n"), dict(role='BOT', prompt='[BEGIN]\n'),
], )), ], )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)) inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT") mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role='BOT')
mbpp_cn_datasets = [ mbpp_cn_datasets = [
dict( dict(
......
...@@ -12,46 +12,46 @@ mbpp_plus_infer_cfg = dict( ...@@ -12,46 +12,46 @@ mbpp_plus_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the shared elements from the given two lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n" 'You are an expert Python programmer, and here is your task: Write a function to find the shared elements from the given two lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n return tuple(set(test_tup1) & set(test_tup2))' \n[DONE] \n\n " "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n return tuple(set(test_tup1) & set(test_tup2))' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n" 'You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import math\ndef is_not_prime(n):\n if n == 1:\n return True\n for i in range(2, int(math.sqrt(n))+1):\n if n % i == 0:\n return True\n return False' \n[DONE] \n\n " "[BEGIN]\n 'import math\ndef is_not_prime(n):\n if n == 1:\n return True\n for i in range(2, int(math.sqrt(n))+1):\n if n % i == 0:\n return True\n return False' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the n largest integers from a given list of numbers, returned in descending order. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n" 'You are an expert Python programmer, and here is your task: Write a function to find the n largest integers from a given list of numbers, returned in descending order. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums: list,n: int) -> list:\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE] \n\n " "[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums: list,n: int) -> list:\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n" 'You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n'
), ),
dict(role="BOT", prompt="[BEGIN]\n"), dict(role='BOT', prompt='[BEGIN]\n'),
], )), ], )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)) inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_plus_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator, metric='MBPPPlus'), pred_role="BOT") mbpp_plus_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator, metric='MBPPPlus'), pred_role='BOT')
mbpp_plus_datasets = [ mbpp_plus_datasets = [
dict( dict(
......
from mmengine.config import read_base from mmengine.config import read_base
with read_base(): with read_base():
from.mbpp_plus_gen_0b836a import mbpp_plus_datasets # noqa: F401, F403 from.mbpp_plus_gen_0b836a import mbpp_plus_datasets # noqa: F401, F403
\ No newline at end of file
...@@ -12,46 +12,46 @@ mbpp_plus_infer_cfg = dict( ...@@ -12,46 +12,46 @@ mbpp_plus_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the shared elements from the given two lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n" 'You are an expert Python programmer, and here is your task: Write a function to find the shared elements from the given two lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n return tuple(set(test_tup1) & set(test_tup2))' \n[DONE] \n\n " "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n return tuple(set(test_tup1) & set(test_tup2))' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n" 'You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import math\ndef is_not_prime(n):\n if n == 1:\n return True\n for i in range(2, int(math.sqrt(n))+1):\n if n % i == 0:\n return True\n return False' \n[DONE] \n\n " "[BEGIN]\n 'import math\ndef is_not_prime(n):\n if n == 1:\n return True\n for i in range(2, int(math.sqrt(n))+1):\n if n % i == 0:\n return True\n return False' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the n largest integers from a given list of numbers, returned in descending order. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n" 'You are an expert Python programmer, and here is your task: Write a function to find the n largest integers from a given list of numbers, returned in descending order. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
), ),
dict( dict(
role="BOT", role='BOT',
prompt= prompt=
"[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums: list,n: int) -> list:\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE] \n\n " "[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums: list,n: int) -> list:\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE] \n\n "
), ),
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n" 'You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n'
), ),
dict(role="BOT", prompt="[BEGIN]\n"), dict(role='BOT', prompt='[BEGIN]\n'),
], )), ], )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)) inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_plus_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator, metric='MBPPPlus'), pred_role="BOT") mbpp_plus_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator, metric='MBPPPlus'), pred_role='BOT')
mbpp_plus_datasets = [ mbpp_plus_datasets = [
dict( dict(
......
...@@ -5,32 +5,32 @@ from opencompass.openicl.icl_evaluator import JiebaRougeEvaluator ...@@ -5,32 +5,32 @@ from opencompass.openicl.icl_evaluator import JiebaRougeEvaluator
from opencompass.datasets import MGSMSDataset, MGSM_Evaluator, mgsm_postprocess from opencompass.datasets import MGSMSDataset, MGSM_Evaluator, mgsm_postprocess
ALL_LANGUAGES = ["bn", "de", "en", "es", "fr", "ja", "ru", "sw", "te", "th", "zh"] ALL_LANGUAGES = ['bn', 'de', 'en', 'es', 'fr', 'ja', 'ru', 'sw', 'te', 'th', 'zh']
LANG_TO_INSTRUCTIONS = { LANG_TO_INSTRUCTIONS = {
"en": """Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of "Answer:". Do not add anything other than the integer answer after "Answer:".\n\n{question}""", 'en': """Solve this math problem. Give the reasoning steps before giving the final answer on the last line by itself in the format of "Answer:". Do not add anything other than the integer answer after "Answer:".\n\n{question}""",
"bn": """এই গণিতের সমস্যাটি সমাধান করুন। চূড়ান্ত উত্তর দেওয়ার আগে যুক্তিসম্পন্ন পদক্ষেপ প্রদান করুন। চূড়ান্ত উত্তরটি একক সংখ্যা হিসাবে "উত্তর:" এর পরে শেষ লাইনে দিন। "উত্তর:" এর পরে অন্য কিছু যুক্ত করবেন না।.\n\n{question}""", 'bn': """এই গণিতের সমস্যাটি সমাধান করুন। চূড়ান্ত উত্তর দেওয়ার আগে যুক্তিসম্পন্ন পদক্ষেপ প্রদান করুন। চূড়ান্ত উত্তরটি একক সংখ্যা হিসাবে "উত্তর:" এর পরে শেষ লাইনে দিন। "উত্তর:" এর পরে অন্য কিছু যুক্ত করবেন না।.\n\n{question}""",
"de": """Löse dieses Mathematikproblem. Gib die Schritte zur Begründung an, bevor du die endgültige Antwort in der letzten Zeile alleine im Format "Antwort:" gibst. Füge nichts anderes als die ganzzahlige Antwort nach "Antwort:" hinzu.\n\n{question}""", 'de': """Löse dieses Mathematikproblem. Gib die Schritte zur Begründung an, bevor du die endgültige Antwort in der letzten Zeile alleine im Format "Antwort:" gibst. Füge nichts anderes als die ganzzahlige Antwort nach "Antwort:" hinzu.\n\n{question}""",
"es": """Resuelve este problema matemático. Proporciona los pasos de razonamiento antes de dar la respuesta final en la última línea por sí misma en el formato de "Respuesta:". No añadas nada más que la respuesta entera después de "Respuesta:".\n\n{question}""", 'es': """Resuelve este problema matemático. Proporciona los pasos de razonamiento antes de dar la respuesta final en la última línea por sí misma en el formato de "Respuesta:". No añadas nada más que la respuesta entera después de "Respuesta:".\n\n{question}""",
"fr": """Résolvez ce problème de mathématiques. Donnez les étapes de raisonnement avant de fournir la réponse finale sur la dernière ligne elle-même dans le format de "Réponse:". N'ajoutez rien d'autre que la réponse entière après "Réponse:".\n\n{question}""", 'fr': """Résolvez ce problème de mathématiques. Donnez les étapes de raisonnement avant de fournir la réponse finale sur la dernière ligne elle-même dans le format de "Réponse:". N'ajoutez rien d'autre que la réponse entière après "Réponse:".\n\n{question}""",
"ja": """の数学の問題を解いてください。最終的な答えを出す前に、解答の推論過程を記述してください。そして最後の行には "答え:" の形式で答えを記述し、その後には整数の答え以外何も追加しないでください。\n\n{question}""", 'ja': """の数学の問題を解いてください。最終的な答えを出す前に、解答の推論過程を記述してください。そして最後の行には "答え:" の形式で答えを記述し、その後には整数の答え以外何も追加しないでください。\n\n{question}""",
"ru": """Решите эту математическую задачу. Объясните шаги рассуждения перед тем, как дать окончательный ответ в последней строке сам по себе в формате "Ответ:". Не добавляйте ничего, кроме целочисленного ответа после "Ответ:".\n\n{question}""", 'ru': """Решите эту математическую задачу. Объясните шаги рассуждения перед тем, как дать окончательный ответ в последней строке сам по себе в формате "Ответ:". Не добавляйте ничего, кроме целочисленного ответа после "Ответ:".\n\n{question}""",
"sw": """Suluhisha tatizo hili la hesabu. Toa hatua za mantiki kabla ya kutoa jibu la mwisho kwenye mstari wa mwisho peke yake katika muundo wa "Jibu:". Usiongeze chochote kingine isipokuwa jibu la integer baada ya "Jibu:".\n\n{question}""", 'sw': """Suluhisha tatizo hili la hesabu. Toa hatua za mantiki kabla ya kutoa jibu la mwisho kwenye mstari wa mwisho peke yake katika muundo wa "Jibu:". Usiongeze chochote kingine isipokuwa jibu la integer baada ya "Jibu:".\n\n{question}""",
"te": """ఈ గణిత సమస్యను పరిష్కరించండి. చివరి సమాధానాన్ని ఇవ్వదానికి ముందు తర్కాత్మక అదుగులను ఇవ్వండి. చివరి పంక్తిలో మాత్రమే 'సమాధానం:' అనే ఆకారంలో చివరి సమాధానాద్ని ఇవ్వండి సమాధానం: తర్వాత పూర్ణాంక సమాధానానికి తప్పించి ఎదేనా చేర్చవద్దు.\n\n{question}""", 'te': """ఈ గణిత సమస్యను పరిష్కరించండి. చివరి సమాధానాన్ని ఇవ్వదానికి ముందు తర్కాత్మక అదుగులను ఇవ్వండి. చివరి పంక్తిలో మాత్రమే 'సమాధానం:' అనే ఆకారంలో చివరి సమాధానాద్ని ఇవ్వండి సమాధానం: తర్వాత పూర్ణాంక సమాధానానికి తప్పించి ఎదేనా చేర్చవద్దు.\n\n{question}""",
"th": """แก้ปัญหาคณิตศาสตร์นี้ ให้ให้ขั้นตอนการใช้เหตุผลก่อนที่จะให้คำตอบสุดท้ายในบรรทัดสุดท้ายโดยอยู่ในรูปแบบ "คำตอบ:" ไม่ควรเพิ่มอะไรนอกจากคำตอบที่เป็นจำนวนเต็มหลังจาก "คำตอบ:"\n\n{question}""", 'th': """แก้ปัญหาคณิตศาสตร์นี้ ให้ให้ขั้นตอนการใช้เหตุผลก่อนที่จะให้คำตอบสุดท้ายในบรรทัดสุดท้ายโดยอยู่ในรูปแบบ "คำตอบ:" ไม่ควรเพิ่มอะไรนอกจากคำตอบที่เป็นจำนวนเต็มหลังจาก "คำตอบ:"\n\n{question}""",
"zh": """解决这个数学问题。在最后一行给出答案前,请提供推理步骤。最后一行应该以 "答案: " 的形式独立给出答案。在 "答案:" 后不要添加除整数答案之外的任何内容。\n\n{question}""", 'zh': """解决这个数学问题。在最后一行给出答案前,请提供推理步骤。最后一行应该以 "答案: " 的形式独立给出答案。在 "答案:" 后不要添加除整数答案之外的任何内容。\n\n{question}""",
} }
mgsm_datasets = [] mgsm_datasets = []
for lang in ALL_LANGUAGES: for lang in ALL_LANGUAGES:
mgsm_reader_cfg = dict(input_columns=["question"], output_column="answer") mgsm_reader_cfg = dict(input_columns=['question'], output_column='answer')
mgsm_infer_cfg = dict( mgsm_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
round=[ round=[
dict(role="HUMAN", prompt=LANG_TO_INSTRUCTIONS[lang]), dict(role='HUMAN', prompt=LANG_TO_INSTRUCTIONS[lang]),
] ]
), ),
), ),
...@@ -40,15 +40,15 @@ for lang in ALL_LANGUAGES: ...@@ -40,15 +40,15 @@ for lang in ALL_LANGUAGES:
mgsm_eval_cfg = dict( mgsm_eval_cfg = dict(
evaluator=dict(type=MGSM_Evaluator), evaluator=dict(type=MGSM_Evaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=mgsm_postprocess, lang=lang), pred_postprocessor=dict(type=mgsm_postprocess, lang=lang),
) )
mgsm_datasets.append( mgsm_datasets.append(
dict( dict(
type=MGSMSDataset, type=MGSMSDataset,
abbr=f"mgsm_{lang}", abbr=f'mgsm_{lang}',
path=f"data/mgsm/mgsm_{lang}.tsv", path=f'data/mgsm/mgsm_{lang}.tsv',
reader_cfg=mgsm_reader_cfg, reader_cfg=mgsm_reader_cfg,
infer_cfg=mgsm_infer_cfg, infer_cfg=mgsm_infer_cfg,
eval_cfg=mgsm_eval_cfg, eval_cfg=mgsm_eval_cfg,
......
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
...@@ -8,68 +8,68 @@ from opencompass.datasets import MMLUDatasetClean as MMLUDataset ...@@ -8,68 +8,68 @@ from opencompass.datasets import MMLUDatasetClean as MMLUDataset
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
...@@ -80,18 +80,18 @@ for _name in mmlu_all_sets: ...@@ -80,18 +80,18 @@ for _name in mmlu_all_sets:
type=PromptTemplate, type=PromptTemplate,
template={ template={
opt: opt:
f"{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}\n" f'{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}\n'
for opt in ["A", "B", "C", "D"] for opt in ['A', 'B', 'C', 'D']
}, },
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
opt: opt:
f"{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}" f'{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}'
for opt in ["A", "B", "C", "D"] for opt in ['A', 'B', 'C', 'D']
}, },
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=PPLInferencer), inferencer=dict(type=PPLInferencer),
...@@ -102,9 +102,9 @@ for _name in mmlu_all_sets: ...@@ -102,9 +102,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -9,8 +9,8 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess ...@@ -9,8 +9,8 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_prompt_template = dict( mmlu_prompt_template = dict(
...@@ -37,72 +37,72 @@ mmlu_eval_cfg = dict( ...@@ -37,72 +37,72 @@ mmlu_eval_cfg = dict(
pred_postprocessor=dict(type=first_capital_postprocess)) pred_postprocessor=dict(type=first_capital_postprocess))
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
for _name in mmlu_all_sets: for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg.copy(), infer_cfg=mmlu_infer_cfg.copy(),
......
...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_option_postprocess ...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
...@@ -81,25 +81,25 @@ for _name in mmlu_all_sets: ...@@ -81,25 +81,25 @@ for _name in mmlu_all_sets:
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: " f'{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: '
), ),
dict(role="BOT", prompt="{target}\n") dict(role='BOT', prompt='{target}\n')
]), ]),
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
begin="</E>", begin='</E>',
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt=f"{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: " prompt=f'{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: '
), ),
], ],
), ),
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=GenInferencer), inferencer=dict(type=GenInferencer),
...@@ -111,9 +111,9 @@ for _name in mmlu_all_sets: ...@@ -111,9 +111,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess ...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
...@@ -81,26 +81,26 @@ for _name in mmlu_all_sets: ...@@ -81,26 +81,26 @@ for _name in mmlu_all_sets:
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: " f'{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: '
), ),
dict(role="BOT", prompt="{target}\n") dict(role='BOT', prompt='{target}\n')
]), ]),
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
begin="</E>", begin='</E>',
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: " f'{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: '
), ),
], ],
), ),
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=GenInferencer), inferencer=dict(type=GenInferencer),
...@@ -112,9 +112,9 @@ for _name in mmlu_all_sets: ...@@ -112,9 +112,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -8,68 +8,68 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess ...@@ -8,68 +8,68 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader # None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
...@@ -79,13 +79,13 @@ for _name in mmlu_all_sets: ...@@ -79,13 +79,13 @@ for _name in mmlu_all_sets:
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
template= template=
"{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: {target}\n", '{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: {target}\n',
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template= template=
f"{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer:", f'{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer:',
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=GenInferencer), inferencer=dict(type=GenInferencer),
...@@ -98,9 +98,9 @@ for _name in mmlu_all_sets: ...@@ -98,9 +98,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_option_postprocess ...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
...@@ -81,26 +81,26 @@ for _name in mmlu_all_sets: ...@@ -81,26 +81,26 @@ for _name in mmlu_all_sets:
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: " f'{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: '
), ),
dict(role="BOT", prompt="{target}\n") dict(role='BOT', prompt='{target}\n')
]), ]),
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
begin="</E>", begin='</E>',
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: " f'{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: '
), ),
], ],
), ),
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=GenInferencer), inferencer=dict(type=GenInferencer),
...@@ -112,9 +112,9 @@ for _name in mmlu_all_sets: ...@@ -112,9 +112,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -24,8 +24,8 @@ D) {D} ...@@ -24,8 +24,8 @@ D) {D}
""".strip() """.strip()
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_datasets = [] mmlu_datasets = []
...@@ -35,7 +35,7 @@ for name in mmlu_all_sets: ...@@ -35,7 +35,7 @@ for name in mmlu_all_sets:
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
round=[ round=[
dict(role="HUMAN", prompt=QUERY_TEMPLATE), dict(role='HUMAN', prompt=QUERY_TEMPLATE),
], ],
), ),
), ),
...@@ -45,13 +45,13 @@ for name in mmlu_all_sets: ...@@ -45,13 +45,13 @@ for name in mmlu_all_sets:
mmlu_eval_cfg = dict( mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator), evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=match_answer_pattern, answer_pattern=r"(?i)ANSWER\s*:\s*([A-D])")) pred_postprocessor=dict(type=match_answer_pattern, answer_pattern=r'(?i)ANSWER\s*:\s*([A-D])'))
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{name}", abbr=f'lukaemon_mmlu_{name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=name, name=name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -8,68 +8,68 @@ from opencompass.datasets import MMLUDataset ...@@ -8,68 +8,68 @@ from opencompass.datasets import MMLUDataset
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
mmlu_datasets = [] mmlu_datasets = []
...@@ -79,12 +79,12 @@ for _name in mmlu_all_sets: ...@@ -79,12 +79,12 @@ for _name in mmlu_all_sets:
mmlu_infer_cfg = dict( mmlu_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={opt: f"{question_overall}\nAnswer: {opt}\n" for opt in ["A", "B", "C", "D"]}, template={opt: f'{question_overall}\nAnswer: {opt}\n' for opt in ['A', 'B', 'C', 'D']},
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={opt: f"{_hint}</E>{question_overall}\nAnswer: {opt}" for opt in ["A", "B", "C", "D"]}, template={opt: f'{_hint}</E>{question_overall}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D']},
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=PPLInferencer), inferencer=dict(type=PPLInferencer),
...@@ -94,9 +94,9 @@ for _name in mmlu_all_sets: ...@@ -94,9 +94,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_option_postprocess ...@@ -9,68 +9,68 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict( mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"], input_columns=['input', 'A', 'B', 'C', 'D'],
output_column="target", output_column='target',
train_split='dev') train_split='dev')
mmlu_all_sets = [ mmlu_all_sets = [
"college_biology", 'college_biology',
"college_chemistry", 'college_chemistry',
"college_computer_science", 'college_computer_science',
"college_mathematics", 'college_mathematics',
"college_physics", 'college_physics',
"electrical_engineering", 'electrical_engineering',
"astronomy", 'astronomy',
"anatomy", 'anatomy',
"abstract_algebra", 'abstract_algebra',
"machine_learning", 'machine_learning',
"clinical_knowledge", 'clinical_knowledge',
"global_facts", 'global_facts',
"management", 'management',
"nutrition", 'nutrition',
"marketing", 'marketing',
"professional_accounting", 'professional_accounting',
"high_school_geography", 'high_school_geography',
"international_law", 'international_law',
"moral_scenarios", 'moral_scenarios',
"computer_security", 'computer_security',
"high_school_microeconomics", 'high_school_microeconomics',
"professional_law", 'professional_law',
"medical_genetics", 'medical_genetics',
"professional_psychology", 'professional_psychology',
"jurisprudence", 'jurisprudence',
"world_religions", 'world_religions',
"philosophy", 'philosophy',
"virology", 'virology',
"high_school_chemistry", 'high_school_chemistry',
"public_relations", 'public_relations',
"high_school_macroeconomics", 'high_school_macroeconomics',
"human_sexuality", 'human_sexuality',
"elementary_mathematics", 'elementary_mathematics',
"high_school_physics", 'high_school_physics',
"high_school_computer_science", 'high_school_computer_science',
"high_school_european_history", 'high_school_european_history',
"business_ethics", 'business_ethics',
"moral_disputes", 'moral_disputes',
"high_school_statistics", 'high_school_statistics',
"miscellaneous", 'miscellaneous',
"formal_logic", 'formal_logic',
"high_school_government_and_politics", 'high_school_government_and_politics',
"prehistory", 'prehistory',
"security_studies", 'security_studies',
"high_school_biology", 'high_school_biology',
"logical_fallacies", 'logical_fallacies',
"high_school_world_history", 'high_school_world_history',
"professional_medicine", 'professional_medicine',
"high_school_mathematics", 'high_school_mathematics',
"college_medicine", 'college_medicine',
"high_school_us_history", 'high_school_us_history',
"sociology", 'sociology',
"econometrics", 'econometrics',
"high_school_psychology", 'high_school_psychology',
"human_aging", 'human_aging',
"us_foreign_policy", 'us_foreign_policy',
"conceptual_physics", 'conceptual_physics',
] ]
...@@ -82,26 +82,26 @@ for _name in mmlu_all_sets: ...@@ -82,26 +82,26 @@ for _name in mmlu_all_sets:
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: " f'{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: '
), ),
dict(role="BOT", prompt="{target}\n") dict(role='BOT', prompt='{target}\n')
]), ]),
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
begin="</E>", begin='</E>',
round=[ round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: " f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: "
), ),
], ],
), ),
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=256), inferencer=dict(type=GenInferencer, max_out_len=256),
...@@ -113,9 +113,9 @@ for _name in mmlu_all_sets: ...@@ -113,9 +113,9 @@ for _name in mmlu_all_sets:
mmlu_datasets.append( mmlu_datasets.append(
dict( dict(
abbr=f"lukaemon_mmlu_{_name}", abbr=f'lukaemon_mmlu_{_name}',
type=MMLUDataset, type=MMLUDataset,
path="./data/mmlu/", path='./data/mmlu/',
name=_name, name=_name,
reader_cfg=mmlu_reader_cfg, reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg, infer_cfg=mmlu_infer_cfg,
......
...@@ -12,7 +12,7 @@ narrativeqa_reader_cfg = dict( ...@@ -12,7 +12,7 @@ narrativeqa_reader_cfg = dict(
narrativeqa_infer_cfg = dict( narrativeqa_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"), template='{evidence}\nAnswer these questions:\nQ: {question}?\nA:'),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict( inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
......
...@@ -9,7 +9,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess ...@@ -9,7 +9,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
few_shot_prompts = { few_shot_prompts = {
'single_choice_prompts': { 'single_choice_prompts': {
"single_choice_cn": [ 'single_choice_cn': [
dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'), dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'),
dict(role='BOT', prompt='回答: A'), dict(role='BOT', prompt='回答: A'),
dict(role='HUMAN', prompt='问题: 梅建平作为黄玉兰的姥姥,对黄玉兰的成长有重要影响。梅建平的妈妈是朱丽丽。蓝鑫把黄玉兰视为其母亲。焦慧不仅是朱丽丽的妈妈,还是朱丽丽的榜样。 \n在上面提供的打乱的家族关系文本中,"蓝鑫"的能够向上追溯到的最年长的亲人是谁?\nA. 梅建平\nB. 朱丽丽\nC. 蓝鑫\nD. 焦慧\n'), dict(role='HUMAN', prompt='问题: 梅建平作为黄玉兰的姥姥,对黄玉兰的成长有重要影响。梅建平的妈妈是朱丽丽。蓝鑫把黄玉兰视为其母亲。焦慧不仅是朱丽丽的妈妈,还是朱丽丽的榜样。 \n在上面提供的打乱的家族关系文本中,"蓝鑫"的能够向上追溯到的最年长的亲人是谁?\nA. 梅建平\nB. 朱丽丽\nC. 蓝鑫\nD. 焦慧\n'),
...@@ -20,7 +20,7 @@ few_shot_prompts = { ...@@ -20,7 +20,7 @@ few_shot_prompts = {
dict(role='BOT', prompt='回答: D'), dict(role='BOT', prompt='回答: D'),
dict(role='HUMAN', prompt='问题: {question}'), dict(role='HUMAN', prompt='问题: {question}'),
], ],
"single_choice_en": [ 'single_choice_en': [
dict(role='HUMAN', prompt="Question: Jasmine Lane plays the role of James Hill's father in James Hill's life. \nGiven the scrambled family relationships described above, who is the eldest relative that 'James Hill' can trace back to in the context?\nA. Jasmine Lane\nB. James Hill\nC. Christopher Flores\nD. Paula Reeves\n"), dict(role='HUMAN', prompt="Question: Jasmine Lane plays the role of James Hill's father in James Hill's life. \nGiven the scrambled family relationships described above, who is the eldest relative that 'James Hill' can trace back to in the context?\nA. Jasmine Lane\nB. James Hill\nC. Christopher Flores\nD. Paula Reeves\n"),
dict(role='BOT', prompt='Answer: A'), dict(role='BOT', prompt='Answer: A'),
dict(role='HUMAN', prompt="Question: For Michael Goodwin, Annette Valdez is not just a dad, but also a friend.For Annette Valdez, Mary Sanders is not just a father, but also a friend. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Michael Goodwin' can trace back to in the context?\nA. Michael Goodwin\nB. Annette Valdez\nC. Mary Sanders\nD. James Mitchell\n"), dict(role='HUMAN', prompt="Question: For Michael Goodwin, Annette Valdez is not just a dad, but also a friend.For Annette Valdez, Mary Sanders is not just a father, but also a friend. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Michael Goodwin' can trace back to in the context?\nA. Michael Goodwin\nB. Annette Valdez\nC. Mary Sanders\nD. James Mitchell\n"),
...@@ -31,7 +31,7 @@ few_shot_prompts = { ...@@ -31,7 +31,7 @@ few_shot_prompts = {
dict(role='BOT', prompt='Answer: B'), dict(role='BOT', prompt='Answer: B'),
dict(role='HUMAN', prompt='Question: {question}'), dict(role='HUMAN', prompt='Question: {question}'),
], ],
"single_choice_cn_reasoning": [ 'single_choice_cn_reasoning': [
dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'), dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'),
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 刘红的爸爸是毕东。\n\n因此,"刘红"能够向上追溯到的最年长的亲人是毕东。所以答案是:A'), dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 刘红的爸爸是毕东。\n\n因此,"刘红"能够向上追溯到的最年长的亲人是毕东。所以答案是:A'),
...@@ -45,7 +45,7 @@ few_shot_prompts = { ...@@ -45,7 +45,7 @@ few_shot_prompts = {
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 郑玉珍的外婆是谢平。\n- 谢平的奶奶是魏丽丽。\n- 魏丽丽的外公是魏阳。 \n- 魏阳的姥姥是郭兵。\n- 郭兵的爷爷是李建国。 \n- 李建国的外婆是丁颖。 \n- 丁颖的外公是武波。 \n- 武波的父亲是刘瑜。\n- 刘瑜的爷爷是许玲。\n- 许玲的父亲是余桂芳。 \n\n因此,"郑玉珍"能够向上追溯到的最年长的亲人是余桂芳。所以答案是:D'), dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 郑玉珍的外婆是谢平。\n- 谢平的奶奶是魏丽丽。\n- 魏丽丽的外公是魏阳。 \n- 魏阳的姥姥是郭兵。\n- 郭兵的爷爷是李建国。 \n- 李建国的外婆是丁颖。 \n- 丁颖的外公是武波。 \n- 武波的父亲是刘瑜。\n- 刘瑜的爷爷是许玲。\n- 许玲的父亲是余桂芳。 \n\n因此,"郑玉珍"能够向上追溯到的最年长的亲人是余桂芳。所以答案是:D'),
dict(role='HUMAN', prompt='问题: {question}'), dict(role='HUMAN', prompt='问题: {question}'),
], ],
"single_choice_en_reasoning": [ 'single_choice_en_reasoning': [
dict(role='HUMAN', prompt="Question: Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jessica Stewart' can trace back to in the context?\nA. Jack Burch\nB. Jessica Stewart\nC. Sharon House\nD. Carolyn Jackson\n"), dict(role='HUMAN', prompt="Question: Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jessica Stewart' can trace back to in the context?\nA. Jack Burch\nB. Jessica Stewart\nC. Sharon House\nD. Carolyn Jackson\n"),
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing.\n\nTherefore, the eldest relative that 'Jessica Stewart' can trace back to in the context is Sharon House. The answer is: C"), dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing.\n\nTherefore, the eldest relative that 'Jessica Stewart' can trace back to in the context is Sharon House. The answer is: C"),
dict(role='HUMAN', prompt="Question: For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.Jacob Oconnor's paternal grandmother is Robert Hill. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jacob Oconnor' can trace back to in the context?\nA. Laura Holland\nB. Robert Hill\nC. Jacob Oconnor\nD. Mikayla Scott\n"), dict(role='HUMAN', prompt="Question: For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.Jacob Oconnor's paternal grandmother is Robert Hill. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jacob Oconnor' can trace back to in the context?\nA. Laura Holland\nB. Robert Hill\nC. Jacob Oconnor\nD. Mikayla Scott\n"),
...@@ -92,8 +92,8 @@ for _name in list(single_choice_prompts.keys()): ...@@ -92,8 +92,8 @@ for _name in list(single_choice_prompts.keys()):
needlebench_datasets = [] needlebench_datasets = []
needlebench_atc_reader_cfg = dict(input_columns=["question"], needlebench_atc_reader_cfg = dict(input_columns=['question'],
output_column="answer") output_column='answer')
for _name in list(single_choice_prompts.keys()): for _name in list(single_choice_prompts.keys()):
...@@ -116,7 +116,7 @@ for _name in list(single_choice_prompts.keys()): ...@@ -116,7 +116,7 @@ for _name in list(single_choice_prompts.keys()):
for num_needles in current_needle_num_list: for num_needles in current_needle_num_list:
abbr = (f'NeedleBenchATCDataset-' abbr = (f'NeedleBenchATCDataset-'
f'{num_needles}Needle-{"EN" if "en" in _name else "ZH"}') f'{num_needles}Needle-{"EN" if "en" in _name else "ZH"}')
language = "English" if "en" in _name else "Chinese" language = 'English' if 'en' in _name else 'Chinese'
if 'reasoning' in _name: if 'reasoning' in _name:
abbr += '-Reasoning' abbr += '-Reasoning'
dataset_dict = { dataset_dict = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment