Commit 6d709641 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add deepmind math task draft

parent f9eca2c8
...@@ -52,6 +52,7 @@ from . import gsm8k ...@@ -52,6 +52,7 @@ from . import gsm8k
from . import storycloze from . import storycloze
from . import toxigen from . import toxigen
from . import crowspairs from . import crowspairs
from . import dm_math
######################################## ########################################
# Translation tasks # Translation tasks
...@@ -306,6 +307,57 @@ TASK_REGISTRY = { ...@@ -306,6 +307,57 @@ TASK_REGISTRY = {
"crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality, "crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality,
"crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance, "crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance,
"crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre, "crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre,
"dm_math_alg_lin1d": dm_math.DMMathLinAlg1d,
"dm_math_alg_lin1d_comp": dm_math.DMMathLinAlg1dComp,
"dm_math_alg_lin2d": dm_math.DMMathLinAlg2d,
"dm_math_alg_lin2d_comp": dm_math.DMMathLinAlg2dComp,
"dm_math_alg_poly_roots": dm_math.DMMathPolyRoots,
"dm_math_alg_poly_roots_comp": dm_math.DMMathPolyRootsComp,
"dm_math_alg_seq_next_term": dm_math.DMMathSeqNext,
"dm_math_alg_seq_nth_term": dm_math.DMMathSeqNth,
"dm_math_arith_add_or_sub": dm_math.DMMathAddOrSub,
"dm_math_arith_add_or_sub_base": dm_math.DMMathAddOrSubBase,
"dm_math_arith_add_sub_comp": dm_math.DMMathAddOrSubComp,
"dm_math_arith_div": dm_math.DMMathDiv,
"dm_math_arith_mixed": dm_math.DMMathMixed,
"dm_math_arith_mul": dm_math.DMMathMult,
"dm_math_arith_mul_div_comp": dm_math.DMMathMultDivComp,
"dm_math_arith_simplify": dm_math.DMMathSimplify,
"dm_math_calc_diff": dm_math.DMMathDiff,
"dm_math_calc_diff_comp": dm_math.DMMathDiffComp,
"dm_math_comp_kth_largest": dm_math.DMMathKthBiggest,
"dm_math_comp_kth_largest_comp": dm_math.DMMathKthBiggestComp,
"dm_math_comp_pair": dm_math.DMMathPair,
"dm_math_comp_pair_comp": dm_math.DMMathPairComp,
"dm_math_comp_sort": dm_math.DMMathSort,
"dm_math_comp_sort_comp": dm_math.DMMathSortComp,
"dm_math_meas_conv": dm_math.DMMathMeasConv,
"dm_math_meas_time": dm_math.DMMathMeasTime,
"dm_math_num_base_conv": dm_math.DMMathBaseConv,
"dm_math_num_div_remainder": dm_math.DMMathDivRemainder,
"dm_math_num_div_remainder_comp": dm_math.DMMathDivRemainderComp,
"dm_math_num_gcd": dm_math.DMMathGcd,
"dm_math_num_gcd_comp": dm_math.DMMathGcdComp,
"dm_math_num_is_factor": dm_math.DMMathIsFactor,
"dm_math_num_is_factor_comp": dm_math.DMMathIsFactorComp,
"dm_math_num_lcm": dm_math.DMMathLcm,
"dm_math_num_lcm_comp": dm_math.DMMathLcmComp,
"dm_math_num_list_prime_factors": dm_math.DMMathListPrimeFactors,
"dm_math_num_list_prime_factors_comp": dm_math.DMMathListPrimeFactorsComp,
"dm_math_num_place_val": dm_math.DMMathPlaceVal,
"dm_math_num_place_val_comp": dm_math.DMMathPlaceValComp,
"dm_math_num_round": dm_math.DMMathRoundNum,
"dm_math_num_round_comp": dm_math.DMMathRoundNumComp,
"dm_math_poly_add": dm_math.DMMathAddPoly,
"dm_math_poly_coeff": dm_math.DMMathPolyCoeff,
"dm_math_poly_collect": dm_math.DMMathPolyCollect,
"dm_math_poly_compose": dm_math.DMMathPolyComp,
"dm_math_poly_eval": dm_math.DMMathPolyEval,
"dm_math_poly_eval_comp": dm_math.DMMathPolyEvalComp,
"dm_math_poly_expand": dm_math.DMMathPolyExpand,
"dm_math_poly_simplify": dm_math.DMMathPolySimplify,
"dm_math_prob_level_set": dm_math.DMMathProbLevelSet,
"dm_math_prob_seq": dm_math.DMMathProbSeq,
# Requires manual download of data. # Requires manual download of data.
# "storycloze_2016": storycloze.StoryCloze2016, # "storycloze_2016": storycloze.StoryCloze2016,
# "storycloze_2018": storycloze.StoryCloze2018, # "storycloze_2018": storycloze.StoryCloze2018,
......
"""
Analysing Mathematical Reasoning Abilities of Neural Models
https://arxiv.org/pdf/1904.01557.pdf
*Describe dataset here*
Homepage: https://github.com/deepmind/mathematics_dataset
"""
from lm_eval.metrics import mean
from lm_eval.base import Task, rf
_CITATION = """
ADD CITATION HERE
"""
class DMMath(Task):
DATASET_PATH = "math_dataset"
DATASET_NAME = None
def has_training_docs(self):
return True
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def training_docs(self):
return map(self._process_doc, self.dataset["train"])
def validation_docs(self):
return NotImplemented
def test_docs(self):
return map(self._process_doc, self.dataset["test"])
def _process_doc(self, doc):
# dataset fields are in format "b'{string contents here}'".
# we want to strip this extraneous bytes formatting from the strings.
doc["answer"] = doc["answer"].lstrip("b'").rstrip("'")
doc["question"] = doc["question"].lstrip("b'").rstrip("'")
return doc
def doc_to_text(self, doc):
return doc["question"] + "Answer:"
def should_decontaminate(self):
return True
def doc_to_decontamination_query(self, doc):
return doc["question"]
def doc_to_target(self, doc):
return " " + doc["answer"]
def construct_requests(self, doc, ctx):
return rf.greedy_until(ctx, ["\n", "\n\n"])
def process_results(self, doc, results):
if doc["answer"].rstrip("\n") == results[0].rstrip("\n"): # for now, simple string comparison. TODO: sympy answer checking, especially for harder subsets that don't just return a number
is_correct = 1
else:
is_correct = 0
return {"acc": is_correct}
def aggregation(self):
return {"acc": mean}
def higher_is_better(self):
return {"acc": True}
class DMMathLinAlg1d(DMMath):
VERSION = 0
DATASET_NAME = "algebra__linear_1d"
class DMMathLinAlg1dComp(DMMath):
VERSION = 0
DATASET_NAME = "algebra__linear_1d_composed"
class DMMathLinAlg2d(DMMath):
VERSION = 0
DATASET_NAME = "algebra__linear_2d"
class DMMathLinAlg2dComp(DMMath):
VERSION = 0
DATASET_NAME = "algebra__linear_2d"
class DMMathPolyRoots(DMMath):
VERSION = 0
DATASET_NAME = "algebra__polynomial_roots"
class DMMathPolyRootsComp(DMMath):
VERSION = 0
DATASET_NAME = "algebra__polynomial_roots_composed"
class DMMathSeqNext(DMMath):
VERSION = 0
DATASET_NAME = "algebra__sequence_next_term"
class DMMathSeqNth(DMMath):
VERSION = 0
DATASET_NAME = "algebra__sequence_nth_term"
class DMMathAddOrSub(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__add_or_sub"
class DMMathAddOrSubBase(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__add_or_sub_in_base"
class DMMathAddOrSubComp(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__add_sub_multiple"
class DMMathDiv(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__div"
class DMMathMixed(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__mixed"
class DMMathMult(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__mul"
class DMMathMultDivComp(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__mul_div_multiple"
class DMMathNearestRoot(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__nearest_integer_root"
class DMMathSimplify(DMMath):
VERSION = 0
DATASET_NAME = "arithmetic__simplify_surd"
class DMMathDiff(DMMath):
VERSION = 0
DATASET_NAME = "calculus__differentiate"
class DMMathDiffComp(DMMath):
VERSION = 0
DATASET_NAME = "calculus__differentiate_composed"
class DMMathClosest(DMMath):
VERSION = 0
DATASET_NAME = "comparison__closest"
class DMMathClosestComp(DMMath):
VERSION = 0
DATASET_NAME = "comparison__closest_composed"
class DMMathKthBiggest(DMMath):
VERSION = 0
DATASET_NAME = "comparison__kth_biggest"
class DMMathKthBiggestComp(DMMath):
VERSION = 0
DATASET_NAME = "comparison__kth_biggest_composed"
class DMMathPair(DMMath):
VERSION = 0
DATASET_NAME = "comparison__pair"
class DMMathPairComp(DMMath):
VERSION = 0
DATASET_NAME = "comparison__pair_composed"
class DMMathSort(DMMath):
VERSION = 0
DATASET_NAME = "comparison__sort"
class DMMathSortComp(DMMath):
VERSION = 0
DATASET_NAME = "comparison__sort_composed"
class DMMathMeasConv(DMMath):
VERSION = 0
DATASET_NAME = "measurement__conversion"
class DMMathMeasTime(DMMath):
VERSION = 0
DATASET_NAME = "measurement__time"
class DMMathBaseConv(DMMath):
VERSION = 0
DATASET_NAME = "numbers__base_conversion"
class DMMathDivRemainder(DMMath):
VERSION = 0
DATASET_NAME = "numbers__div_remainder"
class DMMathDivRemainderComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__div_remainder_composed"
class DMMathGcd(DMMath):
VERSION = 0
DATASET_NAME = "numbers__gcd"
class DMMathGcdComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__gcd_composed"
class DMMathIsFactor(DMMath):
VERSION = 0
DATASET_NAME = "numbers__is_factor"
class DMMathIsFactorComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__is_factor_composed"
class DMMathLcm(DMMath):
VERSION = 0
DATASET_NAME = "numbers__lcm"
class DMMathLcmComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__lcm_composed"
class DMMathListPrimeFactors(DMMath):
VERSION = 0
DATASET_NAME = "numbers__list_prime_factors"
class DMMathListPrimeFactorsComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__list_prime_factors_composed"
class DMMathPlaceVal(DMMath):
VERSION = 0
DATASET_NAME = "numbers__place_value"
class DMMathPlaceValComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__place_value_composed"
class DMMathRoundNum(DMMath):
VERSION = 0
DATASET_NAME = "numbers__round_number"
class DMMathRoundNumComp(DMMath):
VERSION = 0
DATASET_NAME = "numbers__round_number_composed"
class DMMathAddPoly(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__add"
class DMMathPolyCoeff(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__coefficient_named"
class DMMathPolyCollect(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__collect"
class DMMathPolyComp(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__compose"
class DMMathPolyEval(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__evaluate"
class DMMathPolyEvalComp(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__evaluate_composed"
class DMMathPolyExpand(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__expand"
class DMMathPolySimplify(DMMath):
VERSION = 0
DATASET_NAME = "polynomials__simplify_power"
class DMMathProbLevelSet(DMMath):
VERSION = 0
DATASET_NAME = "probability__swr_p_level_set"
class DMMathProbSeq(DMMath):
VERSION = 0
DATASET_NAME = "probability__swr_p_sequence"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment