rochpl-scripts-bw.patch 4.02 KB
Newer Older
one's avatar
one committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
index 155f502..2344410 100755
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@ filename=HPL.dat
 inputfile=false
 cmdrun=false
 
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
 devices=
 
 # #################################################
@@ -111,21 +118,6 @@ num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
 num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
 total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
 
-#Default MPI options
-mpi_args=
-
-#Check if using OpenMPI
-if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
-  mpi_args+=" --map-by node --rank-by slot --bind-to none "
-
-  #Check if this is OpenMPI+UCX
-  ompi_info=$(dirname ${mpi_bin})/ompi_info
-  if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
-    # ucx-specific args
-    mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
-  fi
-fi
-
 # #################################################
 # Parameter parsing
 # #################################################
@@ -153,7 +145,7 @@ while true; do
         exit 0
         ;;
     --version)
-        ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+        ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version
         exit 0
         ;;
     -P)
@@ -219,4 +211,11 @@ if [ ! -z "${devices}" ]; then
 fi
 
 #run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+  --bind-to none \
+  --mca pml ucx \
+  --mca btl ^vader,tcp,openib,uct \
+  -x UCX_TLS=self,sm,rocm,rc \
+  -x UCX_MEMTYPE_CACHE=n \
+  -np ${np} \
+  ${rochpl_runscript} ${rochpl_args}
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..3f840a7 100755
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -104,7 +104,9 @@ cmdrun=false
 
 devices=
 
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
 
 # #################################################
 # Parameter parsing
@@ -274,7 +276,7 @@ myq=$((rank/p))
 cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
 
 #construct list of devices and their numa affinities
-devicelist=$(${rocm_dir}/bin/rocm-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
 
 #count the cpus per core
 threads_per_core=$(echo "${cpulist}" | grep -c ".*	0	.*")
@@ -363,7 +365,7 @@ export OMP_PROC_BIND=true
 
 
 if [[ $globalRank -lt $size ]]; then
-  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, CPU Cores: $omp_num_threads - $places"
 fi
 
 rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +376,5 @@ else
 fi
 
 #run
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl --cpunodebind=${mynuma} --membind=${mynuma} ${rochpl_bin} ${rochpl_args}