Update threshold in test_nightly_gsm8k_eval.py (#2836)

f1769586 · Lianmin Zheng · GitHub · 5d6e9467 · f1769586
Unverified Commit f1769586 authored Jan 10, 2025 by Lianmin Zheng Committed by GitHub Jan 10, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

test/srt/test_nightly_gsm8k_eval.py test/srt/test_nightly_gsm8k_eval.py +2 -2

No files found.
--- a/test/srt/test_nightly_gsm8k_eval.py
+++ b/test/srt/test_nightly_gsm8k_eval.py
@@ -26,8 +26,8 @@ MODEL_SCORE_THRESHOLDS = {
    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85,
    "google/gemma-2-27b-it": 0.92,
    "meta-llama/Llama-3.1-70B-Instruct": 0.95,
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64,
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63,
-    "Qwen/Qwen2-57B-A14B-Instruct": 0.88,
+    "Qwen/Qwen2-57B-A14B-Instruct": 0.87,
    "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
    "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
    "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,