Add trust_remote_code and wandb_args to harness_eval.slurm, and add run_all.sh script

292fdae5 · Yen-Ting Lin · f4f44b96 · 292fdae5 · 292fdae5
Commit 292fdae5 authored May 07, 2024 by Yen-Ting Lin
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 3 deletions

harness_eval.slurm harness_eval.slurm +3 -3

run_all.sh run_all.sh +43 -0

No files found.
--- a/harness_eval.slurm
+++ b/harness_eval.slurm
@@ -53,7 +53,7 @@ echo "TASKS: $tasks"
 export PROGRAM="\
 lm_eval \
 --model hf \
--model_args pretrained=$model,parallelize=True \
+--model_args pretrained=$model,parallelize=True,trust_remote_code=True \
 --tasks $tasks \
 --num_fewshot 0 \
 --batch_size 8 \
@@ -61,8 +61,8 @@ lm_eval \
 --write_out \
 --log_samples \
 --verbosity DEBUG \
--wandb_args project=lm-eval-harness-integration,job_type=eval \
+--wandb_args project=lm-eval-harness-integration,job_type=eval,name=$model \
--hf_hub_log_args=hub_results_org=yentinglin,hub_repo_name=twllm-eval-results \
+--hf_hub_log_args hub_results_org=yentinglin,hub_repo_name=lm-eval-results,push_results_to_hub=True,push_samples_to_hub=True,public_repo=False \
 --seed 42 \
 --trust_remote_code \
 "

--- a/run_all.sh
+++ b/run_all.sh
+#!/bin/bash
+# Define the models to run
+declare -a models=(
+"yentinglin/Llama-3-Taiwan-70B-Instruct"
+"yentinglin/Taiwan-Llama-3-70B-Cooldown"
+"yentinglin/Taiwan-Llama-3-70B"
+"yentinglin/Taiwan-Llama-3-8B-Instruct"
+"yentinglin/Taiwan-Llama-3-8B-Cooldown"
+"yentinglin/Taiwan-Llama-3-8B"
+"meta-llama/Meta-Llama-3-70B-Instruct"
+"meta-llama/Meta-Llama-3-70B"
+"meta-llama/Meta-Llama-3-8B-Instruct"
+"meta-llama/Meta-Llama-3-8B"
+"Qwen/Qwen1.5-110B-Chat"
+"Qwen/Qwen1.5-72B-Chat"
+"deepseek-ai/DeepSeek-V2-Chat"
+"01-ai/Yi-34B-Chat"
+"CohereForAI/c4ai-command-r-plus"
+"mistralai/Mixtral-8x22B-Instruct-v0.1"
+"MediaTek-Research/Breeze-7B-Instruct-v1_0"
+"MediaTek-Research/Breeze-7B-Base-v1_0"
+"taide/Llama3-TAIDE-LX-8B-Chat-Alpha1"
+"taide/TAIDE-LX-7B-Chat"
+"taide/TAIDE-LX-7B"
+"microsoft/Phi-3-mini-4k-instruct"
+"apple/OpenELM-3B-Instruct"
+)
+# SLURM script to be used
+SLURM_SCRIPT="harness_eval.slurm"
+# Parameters for the script
+PARAMS="tmlu,twllm_eval,tw_legal,ccp,pega,tmmluplus"
+# Loop through each model and submit a job
+for model in "${models[@]}"
+do
+  echo "Submitting job for $model"
+  sbatch $SLURM_SCRIPT $model $PARAMS
+done
+echo "All jobs submitted"