Unverified Commit a11f8d5f authored by Xiaotong Jiang's avatar Xiaotong Jiang Committed by GitHub
Browse files

[feat] Add math eval to CI (#2652)

parent 098d659c
......@@ -68,6 +68,17 @@ class TestEvalAccuracyLarge(unittest.TestCase):
metrics = run_eval(args)
self.assertGreater(metrics["score"], 0.835)
def test_math(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="math",
num_examples=5000,
num_threads=1024
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.519 - 0.01) # -1% to account for sampling variance
if __name__ == "__main__":
unittest.main()
......@@ -37,6 +37,18 @@ class TestEvalAccuracyMini(unittest.TestCase):
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.65)
def test_math(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="math",
num_examples=64,
num_threads=32,
temperature=0.1,
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.519 - 0.03) # -3% to account for sampling variance
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment