Commit f4f44b96 authored by Yen-Ting Lin's avatar Yen-Ting Lin
Browse files

Update .gitignore and harness_eval.slurm

parent ce53a60e
...@@ -23,4 +23,5 @@ wandb ...@@ -23,4 +23,5 @@ wandb
examples/wandb examples/wandb
evals/ evals/
harness_eval_main_log.txt harness_eval_main_log.txt
None/ None/
\ No newline at end of file logs/
\ No newline at end of file
...@@ -2,14 +2,14 @@ ...@@ -2,14 +2,14 @@
# this is a multi-node SLURM script using `accelerate` launcher # this is a multi-node SLURM script using `accelerate` launcher
#SBATCH --job-name=eval_llm #SBATCH --job-name=eval-harness
#SBATCH --partition=defq #SBATCH --partition=defq
#SBATCH --nodes=1 #SBATCH --nodes=1
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per node #SBATCH --ntasks-per-node=1 # crucial - only 1 task per node
#SBATCH --gres=gpu:8 # EDIT this if it's not 8-gpus per node #SBATCH --gres=gpu:8 # EDIT this if it's not 8-gpus per node
#SBATCH --exclusive #SBATCH --exclusive
#SBATCH --output=/mnt/home/f08944064/lighteval/logs/%x-%j.out #SBATCH --output=logs/%x-%j.out
#SBATCH --error=/mnt/home/f08944064/lighteval/logs/%x-%j.err #SBATCH --error=logs/%x-%j.err
echo "START TIME: $(date)" echo "START TIME: $(date)"
...@@ -41,20 +41,31 @@ MASTER_PORT=6000 ...@@ -41,20 +41,31 @@ MASTER_PORT=6000
# 0 and the launcher will hang # 0 and the launcher will hang
# #
# same goes for `\$(hostname -s|tr -dc '0-9')` - we want it to interpolate at `srun` time # same goes for `\$(hostname -s|tr -dc '0-9')` - we want it to interpolate at `srun` time
LAUNCHER="lm_eval --model hf \ LAUNCHER=""
"
model=$1
tasks=$2
echo "MODEL: $model"
echo "TASKS: $tasks"
# EDIT the path+name of the python script and whatever args it needs # EDIT the path+name of the python script and whatever args it needs
export PROGRAM="\ export PROGRAM="\
--model_args pretrained={$1},parallelize=True \ lm_eval \
--tasks {$2} \ --model hf \
--batch_size auto \ --model_args pretrained=$model,parallelize=True \
--tasks $tasks \
--num_fewshot 0 \
--batch_size 8 \
--output_path evals \
--write_out \
--log_samples \
--verbosity DEBUG \ --verbosity DEBUG \
--wandb_args project=lm-eval-harness-integration,job_type=eval \
--hf_hub_log_args=hub_results_org=yentinglin,hub_repo_name=twllm-eval-results \
--seed 42 \
--trust_remote_code \
" "
#--tasks examples/tasks/open_llm_leaderboard_tasks.txt \
#--model_args "pretrained=${1},trust_remote_code=True" \
#--model_parallel \
#--use_chat_template \
export CMD="$LAUNCHER $PROGRAM" export CMD="$LAUNCHER $PROGRAM"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment