Unverified Commit a7164b62 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Tune the threshold for accuracy tests in CI (#2071)

parent 11668533
......@@ -42,7 +42,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
)
metrics = run_eval(args)
assert metrics["score"] >= 0.71, f"{metrics}"
self.assertGreater(metrics["score"], 0.71)
def test_human_eval(self):
args = SimpleNamespace(
......@@ -54,7 +54,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
)
metrics = run_eval(args)
assert metrics["score"] >= 0.64, f"{metrics}"
self.assertGreater(metrics["score"], 0.64)
def test_mgsm_en(self):
args = SimpleNamespace(
......@@ -66,7 +66,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
)
metrics = run_eval(args)
assert metrics["score"] >= 0.84, f"{metrics}"
self.assertGreater(metrics["score"], 0.84)
if __name__ == "__main__":
......
......@@ -47,7 +47,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
)
metrics = run_eval(args)
assert metrics["score"] >= 0.62, f"{metrics}"
self.assertGreater(metrics["score"], 0.62)
def test_human_eval(self):
args = SimpleNamespace(
......@@ -59,7 +59,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
)
metrics = run_eval(args)
assert metrics["score"] >= 0.42, f"{metrics}"
self.assertGreater(metrics["score"], 0.41)
def test_mgsm_en(self):
args = SimpleNamespace(
......@@ -71,7 +71,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
)
metrics = run_eval(args)
assert metrics["score"] >= 0.62, f"{metrics}"
self.assertGreater(metrics["score"], 0.62)
if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment