Unverified Commit f1769586 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Update threshold in test_nightly_gsm8k_eval.py (#2836)

parent 5d6e9467
...@@ -26,8 +26,8 @@ MODEL_SCORE_THRESHOLDS = { ...@@ -26,8 +26,8 @@ MODEL_SCORE_THRESHOLDS = {
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85, "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85,
"google/gemma-2-27b-it": 0.92, "google/gemma-2-27b-it": 0.92,
"meta-llama/Llama-3.1-70B-Instruct": 0.95, "meta-llama/Llama-3.1-70B-Instruct": 0.95,
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63,
"Qwen/Qwen2-57B-A14B-Instruct": 0.88, "Qwen/Qwen2-57B-A14B-Instruct": 0.87,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83, "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84, "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment