Commit 292fdae5 authored by Yen-Ting Lin's avatar Yen-Ting Lin
Browse files

Add trust_remote_code and wandb_args to harness_eval.slurm, and add run_all.sh script

parent f4f44b96
...@@ -53,7 +53,7 @@ echo "TASKS: $tasks" ...@@ -53,7 +53,7 @@ echo "TASKS: $tasks"
export PROGRAM="\ export PROGRAM="\
lm_eval \ lm_eval \
--model hf \ --model hf \
--model_args pretrained=$model,parallelize=True \ --model_args pretrained=$model,parallelize=True,trust_remote_code=True \
--tasks $tasks \ --tasks $tasks \
--num_fewshot 0 \ --num_fewshot 0 \
--batch_size 8 \ --batch_size 8 \
...@@ -61,8 +61,8 @@ lm_eval \ ...@@ -61,8 +61,8 @@ lm_eval \
--write_out \ --write_out \
--log_samples \ --log_samples \
--verbosity DEBUG \ --verbosity DEBUG \
--wandb_args project=lm-eval-harness-integration,job_type=eval \ --wandb_args project=lm-eval-harness-integration,job_type=eval,name=$model \
--hf_hub_log_args=hub_results_org=yentinglin,hub_repo_name=twllm-eval-results \ --hf_hub_log_args hub_results_org=yentinglin,hub_repo_name=lm-eval-results,push_results_to_hub=True,push_samples_to_hub=True,public_repo=False \
--seed 42 \ --seed 42 \
--trust_remote_code \ --trust_remote_code \
" "
......
#!/bin/bash
# Define the models to run
declare -a models=(
"yentinglin/Llama-3-Taiwan-70B-Instruct"
"yentinglin/Taiwan-Llama-3-70B-Cooldown"
"yentinglin/Taiwan-Llama-3-70B"
"yentinglin/Taiwan-Llama-3-8B-Instruct"
"yentinglin/Taiwan-Llama-3-8B-Cooldown"
"yentinglin/Taiwan-Llama-3-8B"
"meta-llama/Meta-Llama-3-70B-Instruct"
"meta-llama/Meta-Llama-3-70B"
"meta-llama/Meta-Llama-3-8B-Instruct"
"meta-llama/Meta-Llama-3-8B"
"Qwen/Qwen1.5-110B-Chat"
"Qwen/Qwen1.5-72B-Chat"
"deepseek-ai/DeepSeek-V2-Chat"
"01-ai/Yi-34B-Chat"
"CohereForAI/c4ai-command-r-plus"
"mistralai/Mixtral-8x22B-Instruct-v0.1"
"MediaTek-Research/Breeze-7B-Instruct-v1_0"
"MediaTek-Research/Breeze-7B-Base-v1_0"
"taide/Llama3-TAIDE-LX-8B-Chat-Alpha1"
"taide/TAIDE-LX-7B-Chat"
"taide/TAIDE-LX-7B"
"microsoft/Phi-3-mini-4k-instruct"
"apple/OpenELM-3B-Instruct"
)
# SLURM script to be used
SLURM_SCRIPT="harness_eval.slurm"
# Parameters for the script
PARAMS="tmlu,twllm_eval,tw_legal,ccp,pega,tmmluplus"
# Loop through each model and submit a job
for model in "${models[@]}"
do
echo "Submitting job for $model"
sbatch $SLURM_SCRIPT $model $PARAMS
done
echo "All jobs submitted"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment