Commit e195b4fe authored by lishen's avatar lishen
Browse files

修改mpirun启动脚本

parent 2f64c109
# 设置默认值
testmode="internode"
profiling=""
for para in $*
do
if [[ $para == --testmode* ]];then
......@@ -15,7 +19,6 @@ LAUNCH_WITH_BINDING=${TEST_DIR}/launch_with_binding.sh # Please adjust the varia
DTK_ENV="/opt/dtk/env.sh" # where env.sh of dtk
TEST_ENV=${TEST_DIR}/test_env.sh
#######################################################################################
# Those variables no need to modify
# HOSTFILE="hostfile_$(basename "$0" | sed -E 's/^run_(.+)\.sh$/\1/')"
......@@ -40,7 +43,7 @@ mpirun -np ${GPUS} --hostfile ${HOSTFILE} \
--testmode=${testmode} \
--profiling=${profiling}
"
#> log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
# > log-${testmode}-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
wait
......
File mode changed from 100644 to 100755
......@@ -4,6 +4,7 @@
# 适用于: mpirun 启动的多节点训练
# 网络: InfiniBand (SHCA) 或 RoCE
# =============================================================================
export PYTHONPATH=$(pwd)
# rocSHMEM
export ROCSHMEM_GDA_NUM_QPS_DEFAULT_CTX=288
......@@ -16,6 +17,3 @@ export ROCSHMEM_TOPO_FILE_FORCE=$(pwd)/tests_mpi/topo.config
export LD_LIBRARY_PATH=/opt/dtk/dushmem/lib:$LD_LIBRARY_PATH
export DEEP_EP_DEVICE_TO_HCA_MAPPING=0:mlx5_2:1,1:mlx5_3:1,2:mlx5_4:1,3:mlx5_5:1,4:mlx5_6:1,5:mlx5_7:1,6:mlx5_8:1,7:mlx5_9:1
export NVSHMEM_SYMMETRIC_SIZE=10737418240
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 根据硬件拓扑调整
export PYTHONPATH=$(pwd)
File mode changed from 100644 to 100755
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment