Unverified Commit 358adaf7 authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

add math_verify to some tasks (#2686)

* add math_verify to minerva math

* add math_verify to benchmark

* fix error

* increment version
parent 52df63b7
......@@ -11,5 +11,8 @@ aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
- metric: math_verify
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
......@@ -63,3 +63,6 @@ If other tasks on this dataset are already supported:
### Variant Wishlist
- [ ] zero-shot variant
### Changelog
version 2.0: (21-Feb-2025); added math_verify (extraction) metric. For details [see](https://huggingface.co/blog/math_verify_leaderboard)
......@@ -19,9 +19,12 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
- metric: math_verify
aggregation: mean
higher_is_better: true
num_fewshot: 4
metadata:
version: 1.0
version: 2.0
dataset_kwargs:
trust_remote_code: true
fewshot_config:
......
import re
import signal
from importlib.metadata import version
from typing import Dict, List, Optional
import datasets
......@@ -8,13 +9,17 @@ from lm_eval.utils import eval_logger
try:
import antlr4
import sympy
from math_verify import parse, verify
from sympy.parsing.latex import parse_latex
except ModuleNotFoundError:
raise ModuleNotFoundError(
"`sympy` is required for generating translation task prompt templates. \
please install sympy via pip install lm-eval[math] or pip install -e .[math]",
)
assert version("antlr4-python3-runtime").startswith("4.11")
except (ModuleNotFoundError, AssertionError) as e:
raise type(e)(
"`sympy`, `math_verify` and `antlr4-python3-runtime==4.11` are required for generating translation task prompt templates. "
"Please install the required packages via pip install lm-eval[math] or pip install -e .[math]"
) from e
# taken from
......@@ -75,8 +80,13 @@ def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
else:
retval = 0
# math_verify
res = verify(parse(doc["answer"]), parse(candidates))
mathval = 1 if res else 0
results = {
"exact_match": retval,
"math_verify": mathval,
}
return results
......
......@@ -66,7 +66,7 @@ ibm_watsonx_ai = ["ibm_watsonx_ai>=1.1.22"]
ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
neuronx = ["optimum[neuronx]"]
mamba = ["mamba_ssm", "causal-conv1d==1.0.2"]
math = ["sympy>=1.12", "antlr4-python3-runtime==4.11"]
math = ["sympy>=1.12", "antlr4-python3-runtime==4.11", "math_verify[antlr4_11_0]"]
multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"]
optimum = ["optimum[openvino]"]
promptsource = ["promptsource>=0.2.3"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment