rochpl-scripts-bw.patch 4.13 KB
Newer Older
one's avatar
one committed
1
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
one's avatar
one committed
2
index 155f502..df6bd9b 100755
one's avatar
one committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@ filename=HPL.dat
 inputfile=false
 cmdrun=false
 
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
 devices=
 
 # #################################################
@@ -111,21 +118,6 @@ num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
 num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
 total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
 
-#Default MPI options
-mpi_args=
-
-#Check if using OpenMPI
-if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
-  mpi_args+=" --map-by node --rank-by slot --bind-to none "
-
-  #Check if this is OpenMPI+UCX
-  ompi_info=$(dirname ${mpi_bin})/ompi_info
-  if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
-    # ucx-specific args
-    mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
-  fi
-fi
-
 # #################################################
 # Parameter parsing
 # #################################################
@@ -153,7 +145,7 @@ while true; do
         exit 0
         ;;
     --version)
-        ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+        ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version
         exit 0
         ;;
     -P)
one's avatar
one committed
59
@@ -219,4 +211,15 @@ if [ ! -z "${devices}" ]; then
one's avatar
one committed
60
61
62
63
64
65
66
 fi
 
 #run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+  --bind-to none \
+  --mca pml ucx \
one's avatar
one committed
67
+  --mca osc ucx \
one's avatar
one committed
68
+  --mca btl ^vader,tcp,openib,uct \
one's avatar
one committed
69
70
71
+  --mca coll ^hcoll \
+  -x UCX_TLS=self,sm,rocm \
+  -x UCX_RNDV_SCHEME=put_zcopy \
one's avatar
one committed
72
+  -x UCX_MEMTYPE_CACHE=n \
one's avatar
one committed
73
+  -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
one's avatar
one committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
+  -np ${np} \
+  ${rochpl_runscript} ${rochpl_args}
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..3f840a7 100755
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -104,7 +104,9 @@ cmdrun=false
 
 devices=
 
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
 
 # #################################################
 # Parameter parsing
@@ -274,7 +276,7 @@ myq=$((rank/p))
 cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
 
 #construct list of devices and their numa affinities
-devicelist=$(${rocm_dir}/bin/rocm-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
 
 #count the cpus per core
 threads_per_core=$(echo "${cpulist}" | grep -c ".*	0	.*")
@@ -363,7 +365,7 @@ export OMP_PROC_BIND=true
 
 
 if [[ $globalRank -lt $size ]]; then
-  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, CPU Cores: $omp_num_threads - $places"
 fi
 
 rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +376,5 @@ else
 fi
 
 #run
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl --cpunodebind=${mynuma} --membind=${mynuma} ${rochpl_bin} ${rochpl_args}