Unverified Commit aa2dd2b5 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Format] Add config lints (#892)

parent 3dbba119
...@@ -11,13 +11,13 @@ gsm8k_infer_cfg = dict( ...@@ -11,13 +11,13 @@ gsm8k_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.\nFor the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.\nAngelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.\nHowever, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.\nThey also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.\nAnd they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.\nSo Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.\nThey want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75\nThey will need to plan to study 4 days to allow for all the time they need.\nThe answer is 4\n"), dict(role='BOT', prompt='Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.\nFor the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.\nAngelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.\nHowever, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.\nThey also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.\nAnd they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.\nSo Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.\nThey want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75\nThey will need to plan to study 4 days to allow for all the time they need.\nThe answer is 4\n'),
dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"), dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"),
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"), dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"),
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n"), dict(role='BOT', prompt='For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n'),
dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"),
], ],
)), )),
......
...@@ -11,18 +11,18 @@ gsm8k_infer_cfg = dict( ...@@ -11,18 +11,18 @@ gsm8k_infer_cfg = dict(
template=dict( template=dict(
round=[ round=[
dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.\nFor the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.\nAngelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.\nHowever, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.\nThey also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.\nAnd they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.\nSo Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.\nThey want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75\nThey will need to plan to study 4 days to allow for all the time they need.\nThe answer is 4\n"), dict(role='BOT', prompt='Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.\nFor the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.\nAngelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.\nHowever, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.\nThey also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.\nAnd they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.\nSo Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.\nThey want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75\nThey will need to plan to study 4 days to allow for all the time they need.\nThe answer is 4\n'),
dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"), dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"),
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"), dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"),
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n"), dict(role='BOT', prompt='For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n'),
dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"),
], ],
)), )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512, stopping_criteria=[":", "Question:", "Question"])) inferencer=dict(type=GenInferencer, max_out_len=512, stopping_criteria=[':', 'Question:', 'Question']))
gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator), gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
pred_postprocessor=dict(type=gsm8k_postprocess), pred_postprocessor=dict(type=gsm8k_postprocess),
......
...@@ -18,7 +18,7 @@ gsm8k_infer_cfg = dict( ...@@ -18,7 +18,7 @@ gsm8k_infer_cfg = dict(
inferencer=dict(type=GenInferencer, max_out_len=512)) inferencer=dict(type=GenInferencer, max_out_len=512))
gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator), gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=gsm8k_postprocess), pred_postprocessor=dict(type=gsm8k_postprocess),
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess)) dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
......
...@@ -15,7 +15,7 @@ gsm8k_infer_cfg = dict( ...@@ -15,7 +15,7 @@ gsm8k_infer_cfg = dict(
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"), dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"),
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"),
dict(role='BOT', prompt="For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n"), dict(role='BOT', prompt='For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n'),
dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"), dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"),
], ],
)), )),
......
...@@ -39,7 +39,7 @@ gsm8k_infer_cfg = dict( ...@@ -39,7 +39,7 @@ gsm8k_infer_cfg = dict(
inferencer=dict(type=GenInferencer)) inferencer=dict(type=GenInferencer))
gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator), gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=gsm8k_postprocess), pred_postprocessor=dict(type=gsm8k_postprocess),
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess)) dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
......
...@@ -7,7 +7,7 @@ from opencompass.datasets import GSM8KDataset, GSM8KReferenceSkywork ...@@ -7,7 +7,7 @@ from opencompass.datasets import GSM8KDataset, GSM8KReferenceSkywork
gsm8k_datasets = [] gsm8k_datasets = []
gsm8k_infer_cfg = dict( gsm8k_infer_cfg = dict(
prompt_template=dict(type=PromptTemplate, template="{question} {answer}"), prompt_template=dict(type=PromptTemplate, template='{question} {answer}'),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLOnlyInferencer), inferencer=dict(type=PPLOnlyInferencer),
) )
...@@ -33,7 +33,7 @@ for split in ['train', 'test']: ...@@ -33,7 +33,7 @@ for split in ['train', 'test']:
gsm8k_infer_cfg = dict( gsm8k_infer_cfg = dict(
prompt_template=dict(type=PromptTemplate, template="{text}"), prompt_template=dict(type=PromptTemplate, template='{text}'),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLOnlyInferencer), inferencer=dict(type=PPLOnlyInferencer),
) )
......
...@@ -10,13 +10,13 @@ gsmhard_infer_cfg = dict( ...@@ -10,13 +10,13 @@ gsmhard_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
begin="</E>", begin='</E>',
round=[ round=[
dict(role='HUMAN', prompt="Question: {question}\nAnswer:"), dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
dict(role="BOT", prompt="The answer is {answer}"), dict(role='BOT', prompt='The answer is {answer}'),
], ],
), ),
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
......
...@@ -6,10 +6,10 @@ from opencompass.datasets import hellaswagDatasetwithICE ...@@ -6,10 +6,10 @@ from opencompass.datasets import hellaswagDatasetwithICE
from opencompass.utils.text_postprocessors import first_option_postprocess from opencompass.utils.text_postprocessors import first_option_postprocess
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"], input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column="label", output_column='label',
train_split="train", train_split='train',
test_split="val", test_split='val',
) )
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
...@@ -17,8 +17,8 @@ hellaswag_infer_cfg = dict( ...@@ -17,8 +17,8 @@ hellaswag_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
round=[ round=[
dict(role="HUMAN", prompt=f"{{ctx}}\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}\nWhat is the right option?"), dict(role='HUMAN', prompt=f'{{ctx}}\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}\nWhat is the right option?'),
dict(role="BOT", prompt="{label}\n"), dict(role='BOT', prompt='{label}\n'),
] ]
), ),
), ),
...@@ -26,15 +26,15 @@ hellaswag_infer_cfg = dict( ...@@ -26,15 +26,15 @@ hellaswag_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
begin=[ begin=[
dict(role="HUMAN", prompt="Continue the following text without adding any additional information or formatting:\n"), dict(role='HUMAN', prompt='Continue the following text without adding any additional information or formatting:\n'),
"</E>", '</E>',
], ],
round=[ round=[
dict(role="HUMAN", prompt=f"{{ctx}}\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}\nWhat is the right option?"), dict(role='HUMAN', prompt=f'{{ctx}}\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}\nWhat is the right option?'),
dict(role="BOT", prompt="{label}\n"), dict(role='BOT', prompt='{label}\n'),
], ],
), ),
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=list(range(10))), retriever=dict(type=FixKRetriever, fix_id_list=list(range(10))),
inferencer=dict(type=GenInferencer), inferencer=dict(type=GenInferencer),
...@@ -42,15 +42,15 @@ hellaswag_infer_cfg = dict( ...@@ -42,15 +42,15 @@ hellaswag_infer_cfg = dict(
hellaswag_eval_cfg = dict( hellaswag_eval_cfg = dict(
evaluator=dict(type=AccEvaluator), evaluator=dict(type=AccEvaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=first_option_postprocess, options="ABCD"), pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
) )
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr="hellaswag", abbr='hellaswag',
type=hellaswagDatasetwithICE, type=hellaswagDatasetwithICE,
path="./data/hellaswag/", path='./data/hellaswag/',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg, eval_cfg=hellaswag_eval_cfg,
......
...@@ -6,23 +6,23 @@ from opencompass.datasets import hellaswagDatasetwithICE ...@@ -6,23 +6,23 @@ from opencompass.datasets import hellaswagDatasetwithICE
from opencompass.utils.text_postprocessors import first_capital_postprocess from opencompass.utils.text_postprocessors import first_capital_postprocess
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"], input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column="label", output_column='label',
train_split="train", train_split='train',
test_split="val", test_split='val',
) )
hint = "Continue the following text without adding any additional information or formatting:" hint = 'Continue the following text without adding any additional information or formatting:'
question_and_options = "{ctx}\nA) {A}\nB) {B}\nC) {C}\nD) {D}\nWhat is the right option?" question_and_options = '{ctx}\nA) {A}\nB) {B}\nC) {C}\nD) {D}\nWhat is the right option?'
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={answer: f'{question_and_options}\n{answer}\n' for answer in ["A", "B", "C", "D"]}, template={answer: f'{question_and_options}\n{answer}\n' for answer in ['A', 'B', 'C', 'D']},
), ),
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={answer: f"{hint}\n</E>{question_and_options}\n{answer}" for answer in ["A", "B", "C", "D"]}, template={answer: f'{hint}\n</E>{question_and_options}\n{answer}' for answer in ['A', 'B', 'C', 'D']},
ice_token="</E>", ice_token='</E>',
), ),
retriever=dict(type=FixKRetriever, fix_id_list=list(range(10))), retriever=dict(type=FixKRetriever, fix_id_list=list(range(10))),
inferencer=dict(type=PPLInferencer), inferencer=dict(type=PPLInferencer),
...@@ -35,9 +35,9 @@ hellaswag_eval_cfg = dict( ...@@ -35,9 +35,9 @@ hellaswag_eval_cfg = dict(
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr="hellaswag", abbr='hellaswag',
type=hellaswagDatasetwithICE, type=hellaswagDatasetwithICE,
path="./data/hellaswag/", path='./data/hellaswag/',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg, eval_cfg=hellaswag_eval_cfg,
......
...@@ -13,8 +13,8 @@ hellaswag_infer_cfg = dict( ...@@ -13,8 +13,8 @@ hellaswag_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
i: dict(round=[ i: dict(round=[
dict(role="HUMAN", prompt="{ctx}"), dict(role='HUMAN', prompt='{ctx}'),
dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"), dict(role='BOT', prompt=f"{{{chr(ord('A') + i)}}}"),
]) ])
for i in range(4) for i in range(4)
}), }),
......
...@@ -6,8 +6,8 @@ from opencompass.datasets import hellaswagDataset_V2 ...@@ -6,8 +6,8 @@ from opencompass.datasets import hellaswagDataset_V2
from opencompass.utils.text_postprocessors import first_option_postprocess from opencompass.utils.text_postprocessors import first_option_postprocess
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"], input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column="label", output_column='label',
) )
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
...@@ -15,11 +15,11 @@ hellaswag_infer_cfg = dict( ...@@ -15,11 +15,11 @@ hellaswag_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt=("{ctx}\nQuestion: Which ending makes the most sense?\n" prompt=('{ctx}\nQuestion: Which ending makes the most sense?\n'
"A. {A}\nB. {B}\nC. {C}\nD. {D}\n" 'A. {A}\nB. {B}\nC. {C}\nD. {D}\n'
"You may choose from 'A', 'B', 'C', 'D'.\n" "You may choose from 'A', 'B', 'C', 'D'.\n"
"Answer:"), 'Answer:'),
), ),
]), ]),
), ),
...@@ -29,7 +29,7 @@ hellaswag_infer_cfg = dict( ...@@ -29,7 +29,7 @@ hellaswag_infer_cfg = dict(
hellaswag_eval_cfg = dict( hellaswag_eval_cfg = dict(
evaluator=dict(type=AccEvaluator), evaluator=dict(type=AccEvaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
) )
......
...@@ -13,8 +13,8 @@ hellaswag_infer_cfg = dict( ...@@ -13,8 +13,8 @@ hellaswag_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
i: dict(round=[ i: dict(round=[
dict(role="HUMAN", prompt="{ctx}"), dict(role='HUMAN', prompt='{ctx}'),
dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"), dict(role='BOT', prompt=f"{{{chr(ord('A') + i)}}}"),
]) ])
for i in range(4) for i in range(4)
}), }),
......
...@@ -13,8 +13,8 @@ hellaswag_infer_cfg = dict( ...@@ -13,8 +13,8 @@ hellaswag_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
ans: dict(round=[ ans: dict(round=[
dict(role="HUMAN", prompt="{ctx}\nQuestion: Which ending makes the most sense?\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: "), dict(role='HUMAN', prompt='{ctx}\nQuestion: Which ending makes the most sense?\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '),
dict(role="BOT", prompt=f"{ans}"), dict(role='BOT', prompt=f'{ans}'),
]) for ans in ['A', 'B', 'C', 'D'] ]) for ans in ['A', 'B', 'C', 'D']
}), }),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
......
...@@ -13,10 +13,10 @@ hellaswag_infer_cfg = dict( ...@@ -13,10 +13,10 @@ hellaswag_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
0: "{ctx} {A}", 0: '{ctx} {A}',
1: "{ctx} {B}", 1: '{ctx} {B}',
2: "{ctx} {C}", 2: '{ctx} {C}',
3: "{ctx} {D}", 3: '{ctx} {D}',
}), }),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer)) inferencer=dict(type=PPLInferencer))
......
...@@ -12,17 +12,17 @@ hellaswag_infer_cfg = dict( ...@@ -12,17 +12,17 @@ hellaswag_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
"0": dict( '0': dict(
round=[dict(role="HUMAN", prompt="{query} {A}")] round=[dict(role='HUMAN', prompt='{query} {A}')]
), ),
"1": dict( '1': dict(
round=[dict(role="HUMAN", prompt="{query} {B}")] round=[dict(role='HUMAN', prompt='{query} {B}')]
), ),
"2": dict( '2': dict(
round=[dict(role="HUMAN", prompt="{query} {C}")] round=[dict(role='HUMAN', prompt='{query} {C}')]
), ),
"3": dict( '3': dict(
round=[dict(role="HUMAN", prompt="{query} {D}")] round=[dict(role='HUMAN', prompt='{query} {D}')]
), ),
}), }),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
......
from mmengine.config import read_base from mmengine.config import read_base
with read_base(): with read_base():
from .humanevalx_gen_620cfa import humanevalx_datasets # noqa: F401, F403 from .humanevalx_gen_620cfa import humanevalx_datasets # noqa: F401, F403
\ No newline at end of file
...@@ -33,10 +33,10 @@ Create a {lang} script for this problem: ...@@ -33,10 +33,10 @@ Create a {lang} script for this problem:
humanevalx_eval_cfg_dict = { humanevalx_eval_cfg_dict = {
lang: dict( lang: dict(
evaluator=dict( evaluator=dict(
type=HumanevalXEvaluator, type=HumanevalXEvaluator,
language=lang, language=lang,
ip_address= ip_address=
"localhost", # replace to your code_eval_server ip_address, port 'localhost', # replace to your code_eval_server ip_address, port
port=5001 port=5001
), # refer to https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html to launch a server ), # refer to https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html to launch a server
pred_role='BOT') pred_role='BOT')
...@@ -57,4 +57,4 @@ humanevalx_datasets = [ ...@@ -57,4 +57,4 @@ humanevalx_datasets = [
infer_cfg=humanevalx_infer_cfg[lang], infer_cfg=humanevalx_infer_cfg[lang],
eval_cfg=humanevalx_eval_cfg_dict[lang]) eval_cfg=humanevalx_eval_cfg_dict[lang])
for lang in ['python', 'cpp', 'go', 'java', 'js'] for lang in ['python', 'cpp', 'go', 'java', 'js']
] ]
\ No newline at end of file
...@@ -16,10 +16,10 @@ humanevalx_infer_cfg = dict( ...@@ -16,10 +16,10 @@ humanevalx_infer_cfg = dict(
humanevalx_eval_cfg_dict = { humanevalx_eval_cfg_dict = {
lang : dict( lang : dict(
evaluator=dict( evaluator=dict(
type=HumanevalXEvaluator, type=HumanevalXEvaluator,
language=lang, language=lang,
ip_address= ip_address=
"localhost", # replace to your code_eval_server ip_address, port 'localhost', # replace to your code_eval_server ip_address, port
port=5001), # refer to https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html to launch a server port=5001), # refer to https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html to launch a server
pred_role='BOT') pred_role='BOT')
for lang in ['python', 'cpp', 'go', 'java', 'js'] # do not support rust now for lang in ['python', 'cpp', 'go', 'java', 'js'] # do not support rust now
...@@ -38,4 +38,4 @@ humanevalx_datasets = [ ...@@ -38,4 +38,4 @@ humanevalx_datasets = [
infer_cfg=humanevalx_infer_cfg, infer_cfg=humanevalx_infer_cfg,
eval_cfg=humanevalx_eval_cfg_dict[lang]) eval_cfg=humanevalx_eval_cfg_dict[lang])
for lang in ['python', 'cpp', 'go', 'java', 'js'] for lang in ['python', 'cpp', 'go', 'java', 'js']
] ]
\ No newline at end of file
...@@ -71,7 +71,7 @@ hungarianmath_infer_cfg = dict( ...@@ -71,7 +71,7 @@ hungarianmath_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template=dict( template=dict(
round=[ round=[
dict(role='HUMAN', prompt=template+"\n\nProblem:\n{question}\n\nSolution:\n"), dict(role='HUMAN', prompt=template+'\n\nProblem:\n{question}\n\nSolution:\n'),
], ],
)), )),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
......
...@@ -13,5 +13,5 @@ with read_base(): ...@@ -13,5 +13,5 @@ with read_base():
from .infinitebenchretrievenumber.infinitebench_retrievenumber_gen import InfiniteBench_retrievenumber_datasets from .infinitebenchretrievenumber.infinitebench_retrievenumber_gen import InfiniteBench_retrievenumber_datasets
from .infinitebenchretrievepasskey.infinitebench_retrievepasskey_gen import InfiniteBench_retrievepasskey_datasets from .infinitebenchretrievepasskey.infinitebench_retrievepasskey_gen import InfiniteBench_retrievepasskey_datasets
from .infinitebenchzhqa.infinitebench_zhqa_gen import InfiniteBench_zhqa_datasets from .infinitebenchzhqa.infinitebench_zhqa_gen import InfiniteBench_zhqa_datasets
infinitebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) infinitebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment