Commit 77d22308 authored by zhougaofeng's avatar zhougaofeng
Browse files

Upload New File

parent 47f1dd37
Pipeline #1098 canceled with stages
#!/bin/bash
# also launch it on slave machine using slave_config.yaml
NPROC_PER_NODE=2
NNODES=1
RANK=0
MASTER_ADDR=127.0.0.1
MASTER_PORT=17170
CUDA_VISIBLE_DEVICES=6,7 torchrun \
--nproc_per_node $NPROC_PER_NODE \
--nnodes $NNODES \
--node_rank $RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT \
src/train.py \
--stage sft --do_train \
--model_name_or_path /home/practice/internlm2-math-7b \
--dataset alpaca_en_demo --template intern2 --finetuning_type lora --lora_target q_proj,v_proj \
--output_dir saves/intern2/lora/sft \
--overwrite_output_dir \
--overwrite_cache --per_device_train_batch_size 2 --gradient_accumulation_steps 32 --lr_scheduler_type cosine \
--logging_steps 10 --save_steps 1000 --learning_rate 1e-4 --num_train_epochs 3.0 --plot_loss --fp16
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment