#!/bin/bash source env.sh GPUS=$1 string="" for ((i=0; i<$GPUS; i++)); do string="$string$i," done string=${string%","} export HIP_VISIBLE_DEVICES=$string # echo "$HIP_VISIBLE_DEVICES" APP="python3 FastChat-main/fastchat/train/train.py \ --deepspeed ds_config.json \ --model_name_or_path /data/llama/llama-13b-hf \ --data_path ./FastChat-main/playground/data/alpaca-data-conversation.json \ --output_dir /data/llama/checkpoints \ --num_train_epochs 3 \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 16 \ --evaluation_strategy "no" \ --save_strategy "steps" \ --save_steps 50 \ --save_total_limit 100 \ --learning_rate 2e-5 \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --model_max_length 2048 \ --gradient_checkpointing True \ --lazy_preprocess True \ --fp16" local_rank=$OMPI_COMM_WORLD_LOCAL_RANK echo $local_rank case ${local_rank} in [0]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=0 --membind=0 ${APP} numactl --cpunodebind=0 --membind=0 ${APP} ;; [1]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=0 --membind=0 ${APP} numactl --cpunodebind=0 --membind=0 ${APP} ;; [2]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=0 --membind=0 ${APP} numactl --cpunodebind=0 --membind=0 ${APP} ;; [3]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=0 --membind=0 ${APP} numactl --cpunodebind=0 --membind=0 ${APP} ;; [4]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=3 --membind=3 ${APP} numactl --cpunodebind=3 --membind=3 ${APP} ;; [5]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=3 --membind=3 ${APP} numactl --cpunodebind=3 --membind=3 ${APP} ;; [6]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=3 --membind=3 ${APP} numactl --cpunodebind=3 --membind=3 ${APP} ;; [7]) export HIP_VISIBLE_DEVICES=$string echo numactl --cpunodebind=3 --membind=3 ${APP} numactl --cpunodebind=3 --membind=3 ${APP} ;; esac