"lib/vscode:/vscode.git/clone" did not exist on "6fe2152bfca7bb1fa681eae957b252c6cd0b7a53"
Unverified Commit 1a4f35e2 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Normalize lm-eval command between baseline and correctness test (#18560)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent be1e128d
......@@ -46,6 +46,6 @@ while getopts "m:b:l:f:t:" OPT; do
done
lm_eval --model vllm \
--model_args "pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,distributed_executor_backend=ray,trust_remote_code=true,max_model_len=4096" \
--model_args "pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,add_bos_token=true,trust_remote_code=true,max_model_len=4096" \
--tasks gsm8k --num_fewshot "$FEWSHOT" --limit "$LIMIT" \
--batch_size "$BATCH_SIZE"
......@@ -18,12 +18,14 @@ RTOL = 0.08
def launch_lm_eval(eval_config, tp_size):
trust_remote_code = eval_config.get("trust_remote_code", False)
max_model_len = eval_config.get("max_model_len", 4096)
model_args = (
f"pretrained={eval_config['model_name']},"
f"tensor_parallel_size={tp_size},"
f"enforce_eager=true,"
f"add_bos_token=true,"
f"trust_remote_code={trust_remote_code}"
f"trust_remote_code={trust_remote_code},"
f"max_model_len={max_model_len}"
)
results = lm_eval.simple_evaluate(
model="vllm",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment