"googlemock/git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "af463c43ac22279239c1b8065ded7026b9224de1"
Commit 432e86b9 authored by one's avatar one
Browse files

[rocHPL] Add test files for 32 ranks

parent 16f1130a
HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out output file name (if any)
0 device out (6=stdout,7=stderr,file)
1 # of problems sizes (N)
512000 Ns
1 # of NBs
512 NBs
1 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
8 Ps
4 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
1 # of recursive stopping criterium
32 NBMINs (>= 1)
1 # of panels in recursion
2 NDIVs
1 # of recursive panel fact.
2 RFACTs (0=left, 1=Crout, 2=Right)
1 # of broadcast
5 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
1 # of lookahead depth
1 DEPTHs (>=0)
1 SWAP (0=bin-exch,1=long,2=mix)
64 swapping threshold
0 L1 in (0=transposed,1=no-transposed) form
0 U in (0=transposed,1=no-transposed) form
0 Equilibration (0=no,1=yes)
8 memory alignment in double (> 0)
#!/bin/bash
./build/mpirun_rochpl -P 8 -Q 4 --it 6 -i HPL-32.dat
./build/mpirun_rochpl -P 8 -Q 2 --it 6 -i HPL-16.dat
./build/mpirun_rochpl -P 4 -Q 2 --it 6 -i HPL-8.dat # N=512*498
./build/mpirun_rochpl -P 4 -Q 1 --it 6 -i HPL-4.dat
......
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
index 155f502..72b25c1 100755
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@ filename=HPL.dat
inputfile=false
cmdrun=false
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
devices=
# #################################################
@@ -111,21 +118,6 @@ num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
-#Default MPI options
-mpi_args=
-
-#Check if using OpenMPI
-if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
- mpi_args+=" --map-by node --rank-by slot --bind-to none "
-
- #Check if this is OpenMPI+UCX
- ompi_info=$(dirname ${mpi_bin})/ompi_info
- if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
- # ucx-specific args
- mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
- fi
-fi
-
# #################################################
# Parameter parsing
# #################################################
@@ -153,7 +145,7 @@ while true; do
exit 0
;;
--version)
- ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+ ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version
exit 0
;;
-P)
@@ -218,5 +210,33 @@ if [ ! -z "${devices}" ]; then
rochpl_args+=" --devices=${devices}"
fi
+echo "Copying files..."
+if [[ "${inputfile}" == false ]]; then
+ rsync -az -e 'ssh -p 3333' build tpl node02:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl node03:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl node04:/workspace/
+else
+ rsync -az -e 'ssh -p 3333' build tpl ${filename} node02:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl ${filename} node03:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl ${filename} node04:/workspace/
+fi
+
#run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ --map-by ppr:8:node --bind-to none \
+ --mca pml ucx \
+ --mca osc ucx \
+ --mca btl ^openib \
+ --mca btl_tcp_if_include p14p2 \
+ --mca plm_rsh_args "-p 3333" \
+ --mca coll_hcoll_enable 0 \
+ -x UCX_TLS=self,sm,rocm,rc \
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ -x UCX_RNDV_FRAG_MEM_TYPE=rocm \
+ -x UCX_MEMTYPE_CACHE=n \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -np 32 \
+ -H node01:8,node02:8,node03:8,node04:8 \
+ ${rochpl_runscript} ${rochpl_args}
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..68c1958 100755
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -104,7 +104,9 @@ cmdrun=false
devices=
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
# #################################################
# Parameter parsing
@@ -274,7 +276,7 @@ myq=$((rank/p))
cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
#construct list of devices and their numa affinities
-devicelist=$(${rocm_dir}/bin/rocm-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
#count the cpus per core
threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
@@ -361,9 +363,23 @@ export OMP_NUM_THREADS=${omp_num_threads}
export OMP_PLACES=${omp_places}
export OMP_PROC_BIND=true
+# Hard-coded IB mapping for now
+declare -a IB_MAP=(
+ "mlx5_1:1" # GPU 0 -> NUMA 3
+ "mlx5_2:1" # GPU 1 -> NUMA 3
+ "mlx5_3:1" # GPU 2 -> NUMA 0
+ "mlx5_4:1" # GPU 3 -> NUMA 0
+ "mlx5_7:1" # GPU 4 -> NUMA 7
+ "mlx5_8:1" # GPU 5 -> NUMA 7
+ "mlx5_9:1" # GPU 6 -> NUMA 4
+ "mlx5_10:1" # GPU 7 -> NUMA 4
+)
+
+myib=${IB_MAP[$rank]}
+export UCX_NET_DEVICES=${myib}
if [[ $globalRank -lt $size ]]; then
- echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+ echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, IB: $myib, CPU Cores: $omp_num_threads - $places"
fi
rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +390,5 @@ else
fi
#run
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl -N ${mynuma} -m ${mynuma} ${rochpl_bin} ${rochpl_args}
diff --git a/scripts/wrapper.sh b/scripts/wrapper.sh
new file mode 100755
index 0000000..e11d6dc
--- /dev/null
+++ b/scripts/wrapper.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+tpl_dir=/workspace/tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${ompi_lib_dir}:${ucx_lib_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
+export PATH=${ompi_lib_dir}/../bin:${ucx_lib_dir}/../bin:${PATH}
+
+exec mpirun --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ -np 16 \
+ -H node01:8,node02:8 \
+ --mca plm_rsh_args "-p 3333" \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -x UCX_NET_DEVICES=mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_7,mlx5_8,mlx5_9,mlx5_10 \
+ "$@"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment