train.sh 625 Bytes
Newer Older
chenzk's avatar
v1.0.8  
chenzk committed
1
2
3
4
5
6
7
8
9
# --nproc_per_node=8:dp=2, pp=2, and tp=2
# --nproc_per_node=4:dp=1, pp=2, and tp=2
# --nproc_per_node=1:dp=1, pp=1, and tp=1

CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=4 run_train.py --config-file examples/config_tiny_llama.yaml
# CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=4 run_train.py --config-file examples/config_tiny_llama_cosmo2tokenizer.yaml

# CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=4 run_train.py --config-file examples/config_llama3_dummytokenizer.yaml
# CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=4 run_train.py --config-file examples/config_llama3.yaml