rochpl-bw1000-16r.patch 4.93 KB
Newer Older
one's avatar
one committed
1
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
2
index 155f502..3984e62 100755
one's avatar
one committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@ filename=HPL.dat
 inputfile=false
 cmdrun=false
 
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
 devices=
 
 # #################################################
@@ -117,12 +124,13 @@ mpi_args=
 #Check if using OpenMPI
 if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
   mpi_args+=" --map-by node --rank-by slot --bind-to none "
+  #mpi_args+=" --map-by numa:PE=16 --bind-to core --report-bindings "
 
   #Check if this is OpenMPI+UCX
   ompi_info=$(dirname ${mpi_bin})/ompi_info
   if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
     # ucx-specific args
-    mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
+    mpi_args="--mca pml ucx ${mpi_args}"
   fi
 fi
 
@@ -153,7 +161,7 @@ while true; do
         exit 0
         ;;
     --version)
-        ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+        ${mpi_bin} --allow-run-as-root -np 1 ${mpi_args} ${rochpl_runscript} --version
         exit 0
         ;;
     -P)
52
@@ -218,5 +226,25 @@ if [ ! -z "${devices}" ]; then
one's avatar
one committed
53
54
55
56
57
   rochpl_args+=" --devices=${devices}"
 fi
 
+echo "Copying files..."
+scp -P 3333 $0 node02:/workspace/build/
58
59
+scp -P 3333 ./build/run_rochpl node02:/workspace/build/
+scp -P 3333 ./${filename} node02:/workspace/
one's avatar
one committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
+
 #run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+  --prefix ${ompi_prefix} \
+  ${mpi_args} \
+  --mca btl ^openib \
+  --mca btl_tcp_if_include p14p2 \
+  --mca plm_rsh_args "-p 3333" \
+  --mca coll_hcoll_enable 0 \
+  -x UCX_TLS=self,sm,rocm_ipc,rocm_copy,rc_mlx5,ud_mlx5 \
+  -x UCX_MEMTYPE_CACHE=n \
+  -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+  -x UCX_NET_DEVICES=mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1,mlx5_10:1 \
+  -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+  -x UCX_WARN_UNUSED_ENV_VARS=n \
+  -np 16 \
+  -H node01:8,node02:8 \
78
+  ${rochpl_runscript} ${rochpl_args}
one's avatar
one committed
79
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
80
index 1522e5d..81fed8a 100755
one's avatar
one committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -104,7 +104,9 @@ cmdrun=false
 
 devices=
 
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
 
 # #################################################
 # Parameter parsing
@@ -274,7 +276,7 @@ myq=$((rank/p))
 cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
 
 #construct list of devices and their numa affinities
-devicelist=$(${rocm_dir}/bin/rocm-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
 
 #count the cpus per core
 threads_per_core=$(echo "${cpulist}" | grep -c ".*	0	.*")
@@ -361,9 +363,24 @@ export OMP_NUM_THREADS=${omp_num_threads}
 export OMP_PLACES=${omp_places}
 export OMP_PROC_BIND=true
 
+# Hard-coded IB mapping for now
+declare -a IB_MAP=(
118
119
120
121
122
123
124
125
+    "mlx5_1:1"   # GPU 0 -> NUMA 3
+    "mlx5_2:1"   # GPU 1 -> NUMA 3
+    "mlx5_3:1"   # GPU 2 -> NUMA 0
+    "mlx5_4:1"   # GPU 3 -> NUMA 0
+    "mlx5_7:1"   # GPU 4 -> NUMA 7
+    "mlx5_8:1"   # GPU 5 -> NUMA 7
+    "mlx5_9:1"   # GPU 6 -> NUMA 4
+    "mlx5_10:1"  # GPU 7 -> NUMA 4
one's avatar
one committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
+)
+
+myib=${IB_MAP[$rank]}
+export UCX_NET_DEVICES=${myib}
 
 if [[ $globalRank -lt $size ]]; then
-  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, IB: $myib, CPU Cores: $omp_num_threads - $places"
+#  echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma"
 fi
 
 rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +391,5 @@ else
 fi
 
 #run
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl -N ${mynuma} -m ${mynuma} ${rochpl_bin} ${rochpl_args}