"vscode:/vscode.git/clone" did not exist on "7b5460556eed76f94fd051d31db8200b4ec24c96"
Commit ee64defa authored by one's avatar one
Browse files

Rename rocHPL patch file

parent 8633301e
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
index 155f502..3984e62 100755
index 155f502..94f7b0d 100755
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
......@@ -25,59 +25,65 @@ index 155f502..3984e62 100755
devices=
# #################################################
@@ -117,12 +124,13 @@ mpi_args=
#Check if using OpenMPI
if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
mpi_args+=" --map-by node --rank-by slot --bind-to none "
+ #mpi_args+=" --map-by numa:PE=16 --bind-to core --report-bindings "
#Check if this is OpenMPI+UCX
ompi_info=$(dirname ${mpi_bin})/ompi_info
if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
# ucx-specific args
@@ -111,21 +118,6 @@ num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
-#Default MPI options
-mpi_args=
-
-#Check if using OpenMPI
-if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
- mpi_args+=" --map-by node --rank-by slot --bind-to none "
-
- #Check if this is OpenMPI+UCX
- ompi_info=$(dirname ${mpi_bin})/ompi_info
- if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
- # ucx-specific args
- mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
+ mpi_args="--mca pml ucx ${mpi_args}"
fi
fi
@@ -153,7 +161,7 @@ while true; do
- fi
-fi
-
# #################################################
# Parameter parsing
# #################################################
@@ -153,7 +145,7 @@ while true; do
exit 0
;;
--version)
- ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+ ${mpi_bin} --allow-run-as-root -np 1 ${mpi_args} ${rochpl_runscript} --version
+ ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version
exit 0
;;
-P)
@@ -218,5 +226,25 @@ if [ ! -z "${devices}" ]; then
@@ -218,5 +210,24 @@ if [ ! -z "${devices}" ]; then
rochpl_args+=" --devices=${devices}"
fi
+echo "Copying files..."
+scp -P 3333 $0 node02:/workspace/build/
+scp -P 3333 ./build/run_rochpl node02:/workspace/build/
+scp -P 3333 ./${filename} node02:/workspace/
+rsync -az -e 'ssh -p 3333' build tpl ${filename} node02:/workspace/
+
#run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ ${mpi_args} \
+ --map-by ppr:8:node --bind-to none \
+ --mca pml ucx \
+ --mca btl ^openib \
+ --mca btl_tcp_if_include p14p2 \
+ --mca plm_rsh_args "-p 3333" \
+ --mca coll_hcoll_enable 0 \
+ -x UCX_TLS=self,sm,rocm_ipc,rocm_copy,rc_mlx5,ud_mlx5 \
+ -x UCX_TLS=self,sm,rocm,rc \
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ -x UCX_RNDV_FRAG_MEM_TYPE=rocm \
+ -x UCX_MEMTYPE_CACHE=n \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -x UCX_NET_DEVICES=mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1,mlx5_10:1 \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ -x UCX_WARN_UNUSED_ENV_VARS=n \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -np 16 \
+ -H node01:8,node02:8 \
+ ${rochpl_runscript} ${rochpl_args}
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..81fed8a 100755
index 1522e5d..68c1958 100755
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
......@@ -109,7 +115,7 @@ index 1522e5d..81fed8a 100755
#count the cpus per core
threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
@@ -361,9 +363,24 @@ export OMP_NUM_THREADS=${omp_num_threads}
@@ -361,9 +363,23 @@ export OMP_NUM_THREADS=${omp_num_threads}
export OMP_PLACES=${omp_places}
export OMP_PROC_BIND=true
......@@ -131,11 +137,10 @@ index 1522e5d..81fed8a 100755
if [[ $globalRank -lt $size ]]; then
- echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+ echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, IB: $myib, CPU Cores: $omp_num_threads - $places"
+# echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma"
fi
rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +391,5 @@ else
@@ -374,4 +390,5 @@ else
fi
#run
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment