Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
6d709641
Commit
6d709641
authored
Feb 10, 2023
by
haileyschoelkopf
Browse files
add deepmind math task draft
parent
f9eca2c8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
342 additions
and
0 deletions
+342
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+52
-0
lm_eval/tasks/dm_math.py
lm_eval/tasks/dm_math.py
+290
-0
No files found.
lm_eval/tasks/__init__.py
View file @
6d709641
...
@@ -52,6 +52,7 @@ from . import gsm8k
...
@@ -52,6 +52,7 @@ from . import gsm8k
from
.
import
storycloze
from
.
import
storycloze
from
.
import
toxigen
from
.
import
toxigen
from
.
import
crowspairs
from
.
import
crowspairs
from
.
import
dm_math
########################################
########################################
# Translation tasks
# Translation tasks
...
@@ -306,6 +307,57 @@ TASK_REGISTRY = {
...
@@ -306,6 +307,57 @@ TASK_REGISTRY = {
"crows_pairs_french_nationality"
:
crowspairs
.
CrowsPairsFrenchNationality
,
"crows_pairs_french_nationality"
:
crowspairs
.
CrowsPairsFrenchNationality
,
"crows_pairs_french_physical_appearance"
:
crowspairs
.
CrowsPairsFrenchPhysicalAppearance
,
"crows_pairs_french_physical_appearance"
:
crowspairs
.
CrowsPairsFrenchPhysicalAppearance
,
"crows_pairs_french_autre"
:
crowspairs
.
CrowsPairsFrenchAutre
,
"crows_pairs_french_autre"
:
crowspairs
.
CrowsPairsFrenchAutre
,
"dm_math_alg_lin1d"
:
dm_math
.
DMMathLinAlg1d
,
"dm_math_alg_lin1d_comp"
:
dm_math
.
DMMathLinAlg1dComp
,
"dm_math_alg_lin2d"
:
dm_math
.
DMMathLinAlg2d
,
"dm_math_alg_lin2d_comp"
:
dm_math
.
DMMathLinAlg2dComp
,
"dm_math_alg_poly_roots"
:
dm_math
.
DMMathPolyRoots
,
"dm_math_alg_poly_roots_comp"
:
dm_math
.
DMMathPolyRootsComp
,
"dm_math_alg_seq_next_term"
:
dm_math
.
DMMathSeqNext
,
"dm_math_alg_seq_nth_term"
:
dm_math
.
DMMathSeqNth
,
"dm_math_arith_add_or_sub"
:
dm_math
.
DMMathAddOrSub
,
"dm_math_arith_add_or_sub_base"
:
dm_math
.
DMMathAddOrSubBase
,
"dm_math_arith_add_sub_comp"
:
dm_math
.
DMMathAddOrSubComp
,
"dm_math_arith_div"
:
dm_math
.
DMMathDiv
,
"dm_math_arith_mixed"
:
dm_math
.
DMMathMixed
,
"dm_math_arith_mul"
:
dm_math
.
DMMathMult
,
"dm_math_arith_mul_div_comp"
:
dm_math
.
DMMathMultDivComp
,
"dm_math_arith_simplify"
:
dm_math
.
DMMathSimplify
,
"dm_math_calc_diff"
:
dm_math
.
DMMathDiff
,
"dm_math_calc_diff_comp"
:
dm_math
.
DMMathDiffComp
,
"dm_math_comp_kth_largest"
:
dm_math
.
DMMathKthBiggest
,
"dm_math_comp_kth_largest_comp"
:
dm_math
.
DMMathKthBiggestComp
,
"dm_math_comp_pair"
:
dm_math
.
DMMathPair
,
"dm_math_comp_pair_comp"
:
dm_math
.
DMMathPairComp
,
"dm_math_comp_sort"
:
dm_math
.
DMMathSort
,
"dm_math_comp_sort_comp"
:
dm_math
.
DMMathSortComp
,
"dm_math_meas_conv"
:
dm_math
.
DMMathMeasConv
,
"dm_math_meas_time"
:
dm_math
.
DMMathMeasTime
,
"dm_math_num_base_conv"
:
dm_math
.
DMMathBaseConv
,
"dm_math_num_div_remainder"
:
dm_math
.
DMMathDivRemainder
,
"dm_math_num_div_remainder_comp"
:
dm_math
.
DMMathDivRemainderComp
,
"dm_math_num_gcd"
:
dm_math
.
DMMathGcd
,
"dm_math_num_gcd_comp"
:
dm_math
.
DMMathGcdComp
,
"dm_math_num_is_factor"
:
dm_math
.
DMMathIsFactor
,
"dm_math_num_is_factor_comp"
:
dm_math
.
DMMathIsFactorComp
,
"dm_math_num_lcm"
:
dm_math
.
DMMathLcm
,
"dm_math_num_lcm_comp"
:
dm_math
.
DMMathLcmComp
,
"dm_math_num_list_prime_factors"
:
dm_math
.
DMMathListPrimeFactors
,
"dm_math_num_list_prime_factors_comp"
:
dm_math
.
DMMathListPrimeFactorsComp
,
"dm_math_num_place_val"
:
dm_math
.
DMMathPlaceVal
,
"dm_math_num_place_val_comp"
:
dm_math
.
DMMathPlaceValComp
,
"dm_math_num_round"
:
dm_math
.
DMMathRoundNum
,
"dm_math_num_round_comp"
:
dm_math
.
DMMathRoundNumComp
,
"dm_math_poly_add"
:
dm_math
.
DMMathAddPoly
,
"dm_math_poly_coeff"
:
dm_math
.
DMMathPolyCoeff
,
"dm_math_poly_collect"
:
dm_math
.
DMMathPolyCollect
,
"dm_math_poly_compose"
:
dm_math
.
DMMathPolyComp
,
"dm_math_poly_eval"
:
dm_math
.
DMMathPolyEval
,
"dm_math_poly_eval_comp"
:
dm_math
.
DMMathPolyEvalComp
,
"dm_math_poly_expand"
:
dm_math
.
DMMathPolyExpand
,
"dm_math_poly_simplify"
:
dm_math
.
DMMathPolySimplify
,
"dm_math_prob_level_set"
:
dm_math
.
DMMathProbLevelSet
,
"dm_math_prob_seq"
:
dm_math
.
DMMathProbSeq
,
# Requires manual download of data.
# Requires manual download of data.
# "storycloze_2016": storycloze.StoryCloze2016,
# "storycloze_2016": storycloze.StoryCloze2016,
# "storycloze_2018": storycloze.StoryCloze2018,
# "storycloze_2018": storycloze.StoryCloze2018,
...
...
lm_eval/tasks/dm_math.py
0 → 100644
View file @
6d709641
"""
Analysing Mathematical Reasoning Abilities of Neural Models
https://arxiv.org/pdf/1904.01557.pdf
*Describe dataset here*
Homepage: https://github.com/deepmind/mathematics_dataset
"""
from
lm_eval.metrics
import
mean
from
lm_eval.base
import
Task
,
rf
_CITATION
=
"""
ADD CITATION HERE
"""
class
DMMath
(
Task
):
DATASET_PATH
=
"math_dataset"
DATASET_NAME
=
None
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
False
def
has_test_docs
(
self
):
return
True
def
training_docs
(
self
):
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"train"
])
def
validation_docs
(
self
):
return
NotImplemented
def
test_docs
(
self
):
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"test"
])
def
_process_doc
(
self
,
doc
):
# dataset fields are in format "b'{string contents here}'".
# we want to strip this extraneous bytes formatting from the strings.
doc
[
"answer"
]
=
doc
[
"answer"
].
lstrip
(
"b'"
).
rstrip
(
"'"
)
doc
[
"question"
]
=
doc
[
"question"
].
lstrip
(
"b'"
).
rstrip
(
"'"
)
return
doc
def
doc_to_text
(
self
,
doc
):
return
doc
[
"question"
]
+
"Answer:"
def
should_decontaminate
(
self
):
return
True
def
doc_to_decontamination_query
(
self
,
doc
):
return
doc
[
"question"
]
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
"answer"
]
def
construct_requests
(
self
,
doc
,
ctx
):
return
rf
.
greedy_until
(
ctx
,
[
"
\n
"
,
"
\n\n
"
])
def
process_results
(
self
,
doc
,
results
):
if
doc
[
"answer"
].
rstrip
(
"
\n
"
)
==
results
[
0
].
rstrip
(
"
\n
"
):
# for now, simple string comparison. TODO: sympy answer checking, especially for harder subsets that don't just return a number
is_correct
=
1
else
:
is_correct
=
0
return
{
"acc"
:
is_correct
}
def
aggregation
(
self
):
return
{
"acc"
:
mean
}
def
higher_is_better
(
self
):
return
{
"acc"
:
True
}
class
DMMathLinAlg1d
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__linear_1d"
class
DMMathLinAlg1dComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__linear_1d_composed"
class
DMMathLinAlg2d
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__linear_2d"
class
DMMathLinAlg2dComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__linear_2d"
class
DMMathPolyRoots
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__polynomial_roots"
class
DMMathPolyRootsComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__polynomial_roots_composed"
class
DMMathSeqNext
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__sequence_next_term"
class
DMMathSeqNth
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"algebra__sequence_nth_term"
class
DMMathAddOrSub
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__add_or_sub"
class
DMMathAddOrSubBase
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__add_or_sub_in_base"
class
DMMathAddOrSubComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__add_sub_multiple"
class
DMMathDiv
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__div"
class
DMMathMixed
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__mixed"
class
DMMathMult
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__mul"
class
DMMathMultDivComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__mul_div_multiple"
class
DMMathNearestRoot
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__nearest_integer_root"
class
DMMathSimplify
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"arithmetic__simplify_surd"
class
DMMathDiff
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"calculus__differentiate"
class
DMMathDiffComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"calculus__differentiate_composed"
class
DMMathClosest
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__closest"
class
DMMathClosestComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__closest_composed"
class
DMMathKthBiggest
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__kth_biggest"
class
DMMathKthBiggestComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__kth_biggest_composed"
class
DMMathPair
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__pair"
class
DMMathPairComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__pair_composed"
class
DMMathSort
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__sort"
class
DMMathSortComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"comparison__sort_composed"
class
DMMathMeasConv
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"measurement__conversion"
class
DMMathMeasTime
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"measurement__time"
class
DMMathBaseConv
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__base_conversion"
class
DMMathDivRemainder
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__div_remainder"
class
DMMathDivRemainderComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__div_remainder_composed"
class
DMMathGcd
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__gcd"
class
DMMathGcdComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__gcd_composed"
class
DMMathIsFactor
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__is_factor"
class
DMMathIsFactorComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__is_factor_composed"
class
DMMathLcm
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__lcm"
class
DMMathLcmComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__lcm_composed"
class
DMMathListPrimeFactors
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__list_prime_factors"
class
DMMathListPrimeFactorsComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__list_prime_factors_composed"
class
DMMathPlaceVal
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__place_value"
class
DMMathPlaceValComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__place_value_composed"
class
DMMathRoundNum
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__round_number"
class
DMMathRoundNumComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"numbers__round_number_composed"
class
DMMathAddPoly
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__add"
class
DMMathPolyCoeff
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__coefficient_named"
class
DMMathPolyCollect
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__collect"
class
DMMathPolyComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__compose"
class
DMMathPolyEval
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__evaluate"
class
DMMathPolyEvalComp
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__evaluate_composed"
class
DMMathPolyExpand
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__expand"
class
DMMathPolySimplify
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"polynomials__simplify_power"
class
DMMathProbLevelSet
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"probability__swr_p_level_set"
class
DMMathProbSeq
(
DMMath
):
VERSION
=
0
DATASET_NAME
=
"probability__swr_p_sequence"
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment