Commit 96eb05f5 authored by liangjing's avatar liangjing
Browse files

update

parent 2b09ea90
Pipeline #1890 passed with stage
......@@ -11,7 +11,9 @@ export NCCL_IB_TIMEOUT=22
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=0
export NVTE_FLASH_ATTN=1
export NVTE_FUSED_ATTN=0
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
RANK=$OMPI_COMM_WORLD_RANK
......
......@@ -6,14 +6,13 @@ export OMP_NUM_THREADS=1
export NCCL_P2P_LEVEL=SYS
export NCCL_ALGO=Ring
export NCCL_NCHANNELS_PER_PEER=16
export NCCL_MIN_NCHANNELS=20
export NCCL_IB_TIMEOUT=22
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_HCA=xx #based on your environment
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=0
export NVTE_FLASH_ATTN=1
export NVTE_FUSED_ATTN=0
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
RANK=$OMPI_COMM_WORLD_RANK
......@@ -49,7 +48,7 @@ TRAINING_ARGS=(
--clip-grad 1.0
--bf16
--use-distributed-optimizer
--use-flash-attn-triton
--use-flash-attn
--disable-bias-linear
--attention-dropout 0
--hidden-dropout 0
......
......@@ -10,7 +10,9 @@ export NCCL_IB_TIMEOUT=22
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=0
export NVTE_FLASH_ATTN=1
export NVTE_FUSED_ATTN=0
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
RANK=$OMPI_COMM_WORLD_RANK
......
......@@ -13,8 +13,9 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_HCA=xx #based on your environment
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=0
source /opt/dtk/env.sh
export NVTE_FLASH_ATTN=1
export NVTE_FUSED_ATTN=0
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment