Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
7e8d5453
"docs/XcodeGuide.md" did not exist on "642acbd61235dc68f606237193cf7e7c4a61af67"
Commit
7e8d5453
authored
Mar 02, 2026
by
one
Browse files
[rocHPL] Test single-node and multi-node using a single script
parent
d49f25a8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
863 additions
and
357 deletions
+863
-357
projects/rocHPL/mpirun_rochpl.in
projects/rocHPL/mpirun_rochpl.in
+310
-0
projects/rocHPL/rochpl-scripts-2xbw1000.patch
projects/rocHPL/rochpl-scripts-2xbw1000.patch
+0
-150
projects/rocHPL/rochpl-scripts-4xbw1000.patch
projects/rocHPL/rochpl-scripts-4xbw1000.patch
+0
-183
projects/rocHPL/rochpl-scripts-bw.patch
projects/rocHPL/rochpl-scripts-bw.patch
+159
-24
projects/rocHPL/run_rochpl.in
projects/rocHPL/run_rochpl.in
+394
-0
No files found.
projects/rocHPL/mpirun_rochpl.in
0 → 100644
View file @
7e8d5453
#!/usr/bin/env bash
# Author: Noel Chalmers
# set -x #echo on
# #################################################
# helper functions
# #################################################
function
display_help
()
{
echo
"rocHPL MPI run helper script"
echo
"./mpirun_rochpl "
echo
" [-P] Specific MPI grid size: the number of "
echo
" rows in MPI grid. "
echo
" [-Q] Specific MPI grid size: the number of "
echo
" columns in MPI grid. "
echo
" [-p] Specific node-local MPI grid size: the number "
echo
" of rows in node-local MPI grid. Must evenly "
echo
" divide P. "
echo
" [-q] Specific node-local MPI grid size: the number "
echo
" of columns in node-local MPI grid. Must evenly"
echo
" divide Q. "
echo
" [-N] Specific matrix size: the number of "
echo
" rows/columns in global matrix. "
echo
" [--NB] Specific panel size: the number of "
echo
" rows/columns in panels. "
echo
" [--it] Iterations: the number of times to run each "
echo
" problem size. "
echo
" [-f] Specific split fraction: the percentange to "
echo
" split the trailing submatrix. "
echo
" [-i] Input file. When set, all other commnand "
echo
" line parameters are ignored, and problem "
echo
" parameters are read from input file. "
echo
" [-H|--hosts] Comma-separated list of nodes to run on. "
echo
" [--tcp-iface] TCP interface to use for communication. "
echo
" [--port] SSH port to use for remote connections. "
echo
" [-h|--help] prints this help message "
echo
" [--version] Print rocHPL version number. "
}
# This function is helpful for dockerfiles that do not have sudo installed, but the default user is root
# true is a system command that completes successfully, function returns success
# prereq: ${ID} must be defined before calling
supported_distro
(
)
{
if
[
-z
${
ID
+foo
}
]
;
then
printf
"supported_distro():
\$
ID must be set
\n
"
exit
2
fi
case
"
${
ID
}
"
in
debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky
)
true
;;
*
)
printf
"This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES
\n
"
exit
2
;;
esac
}
# #################################################
# Pre-requisites check
# #################################################
# Exit code 0: alls well
# Exit code 1: problems with getopt
# Exit code 2: problems with supported platforms
# check if getopt command is installed
type
getopt
>
/dev/null
if
[[
$?
-ne
0
]]
;
then
echo
"This script uses getopt to parse arguments; try installing the util-linux package"
;
exit
1
fi
# os-release file describes the system
if
[[
-e
"/etc/os-release"
]]
;
then
source
/etc/os-release
else
echo
"This script depends on the /etc/os-release file"
exit
2
fi
# The following function exits script if an unsupported distro is detected
supported_distro
# Detect the number of GPUs per node
ngpu_per_node
=
$(
hy-smi
--showid
2>/dev/null |
grep
-ic
"Device ID"
)
if
[[
-z
"
${
ngpu_per_node
}
"
||
"
${
ngpu_per_node
}
"
-eq
0
]]
;
then
echo
"Failed to get the number of GPUs per node via hy-smi. Defaulting to 8."
ngpu_per_node
=
8
else
echo
"Detected
${
ngpu_per_node
}
GPUs per node."
fi
# #################################################
# global variables
# #################################################
# Grab options from CMake config
rochpl_bin
=
@CMAKE_INSTALL_PREFIX@/bin/rochpl
mpi_bin
=
@MPIEXEC_EXECUTABLE@
rochpl_runscript
=
$(
dirname
"
$0
"
)
/run_rochpl
#assume run_rochpl is in the same location
P
=
1
Q
=
1
p
=
-1
q
=
-1
N
=
45312
NB
=
384
it
=
1
frac
=
0.3
filename
=
HPL.dat
inputfile
=
false
cmdrun
=
false
tpl_dir
=
$(
dirname
"
$(
readlink
-f
"
$0
"
)
"
)
/../tpl
ompi_prefix
=
$tpl_dir
/openmpi
ompi_lib_dir
=
$tpl_dir
/openmpi/lib
ucx_lib_dir
=
$tpl_dir
/ucx/lib
export
LD_LIBRARY_PATH
=
$ompi_lib_dir
:
$ucx_lib_dir
:
$LD_LIBRARY_PATH
export
OPAL_PREFIX
=
$ompi_prefix
devices
=
nodes
=
tcp_iface
=
p14p2
ssh_port
=
3333
# #################################################
# MPI Args
# #################################################
# count the number of physical cores
num_cpu_cores
=
$(
lscpu |
grep
"Core(s)"
|
awk
'{print $4}'
)
num_cpu_sockets
=
$(
lscpu |
grep
Socket |
awk
'{print $2}'
)
total_cpu_cores
=
$((
$num_cpu_cores
*
$num_cpu_sockets
))
# #################################################
# Parameter parsing
# #################################################
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt
-T
if
[[
$?
-eq
4
]]
;
then
GETOPT_PARSE
=
$(
getopt
--name
"
${
0
}
"
--longoptions
NB:,it:,help,version,devices:,hosts:,port:,tcp-iface:,
--options
hP:Q:p:q:N:i:f:H:
--
"
$@
"
)
else
echo
"Need a new version of getopt"
exit
1
fi
if
[[
$?
-ne
0
]]
;
then
echo
"getopt invocation failed; could not parse the command line"
;
exit
1
fi
eval set
--
"
${
GETOPT_PARSE
}
"
while
true
;
do
case
"
${
1
}
"
in
-h
|
--help
)
display_help
exit
0
;;
--version
)
${
mpi_bin
}
--allow-run-as-root
-np
1
${
rochpl_runscript
}
--version
exit
0
;;
-P
)
P
=
${
2
}
shift
2
;;
-Q
)
Q
=
${
2
}
shift
2
;;
-p
)
p
=
${
2
}
shift
2
;;
-q
)
q
=
${
2
}
shift
2
;;
-N
)
N
=
${
2
}
cmdrun
=
true
shift
2
;;
--NB
)
NB
=
${
2
}
cmdrun
=
true
shift
2
;;
--it
)
it
=
${
2
}
shift
2
;;
-f
)
frac
=
${
2
}
shift
2
;;
-i
)
filename
=
${
2
}
inputfile
=
true
shift
2
;;
--devices
)
devices
=
${
2
}
shift
2
;;
-H
|
--hosts
)
nodes
=
${
2
}
shift
2
;;
--tcp-iface
)
tcp_iface
=
${
2
}
shift
2
;;
--port
)
ssh_port
=
${
2
}
shift
2
;;
--
)
shift
;
break
;;
*
)
echo
"Unexpected command line parameter received; aborting"
;
exit
1
;;
esac
done
#if nothing but np and ppn parameters where given, default to running
# with default input file
if
[[
"
${
inputfile
}
"
==
false
&&
"
${
cmdrun
}
"
==
false
]]
;
then
inputfile
=
true
fi
np
=
$((
$P
*
$Q
))
if
[[
"
$np
"
-lt
1
]]
;
then
echo
"Invalid MPI grid parameters; aborting"
;
exit
1
fi
if
[[
"
${
inputfile
}
"
==
true
]]
;
then
rochpl_args
=
"-P
${
P
}
-Q
${
Q
}
-p
${
p
}
-q
${
q
}
-i
${
filename
}
-f
${
frac
}
--it
${
it
}
"
else
rochpl_args
=
"-P
${
P
}
-Q
${
Q
}
-p
${
p
}
-q
${
q
}
-N
${
N
}
--NB
${
NB
}
-f
${
frac
}
--it
${
it
}
"
fi
if
[
!
-z
"
${
devices
}
"
]
;
then
rochpl_args+
=
" --devices=
${
devices
}
"
fi
# Run single-node test if --hosts is not set
if
[
-z
"
${
nodes
}
"
]
;
then
echo
"No compute nodes specified. Running in single-node mode."
${
mpi_bin
}
--allow-run-as-root
\
--bind-to
none
\
--mca
pml ucx
\
--mca
osc ucx
\
--mca
btl ^vader,tcp,openib,uct
\
--mca
coll ^hcoll
\
-x
UCX_TLS
=
self,sm,rocm
\
-x
UCX_RNDV_SCHEME
=
put_zcopy
\
-x
UCX_MEMTYPE_CACHE
=
n
\
-x
HSA_FORCE_FINE_GRAIN_PCIE
=
1
\
-np
${
np
}
\
${
rochpl_runscript
}
${
rochpl_args
}
else
echo
"Running in multi-node mode. Using nodes:
${
nodes
}
"
echo
"Using TCP interface:
${
tcp_iface
}
"
echo
"Using SSH port:
${
ssh_port
}
"
# Set rank counts for hosts
IFS
=
','
read
-ra
node_array
<<<
"
${
nodes
}
"
hosts_string
=
""
for
node
in
"
${
node_array
[@]
}
"
;
do
hosts_string+
=
"
${
node
}
:
${
ngpu_per_node
}
,"
done
hosts_string
=
"
${
hosts_string
%,
}
"
echo
"MPI hosts:
${
hosts_string
}
"
# Copy files to other nodes
current_node
=
$(
hostname
)
copyto_hosts
=()
for
node
in
"
${
node_array
[@]
}
"
;
do
if
[[
"
${
node
}
"
!=
"
${
current_node
}
"
]]
;
then
copyto_hosts+
=(
"
${
node
}
"
)
fi
done
# Copy files using rsync only if there are other nodes to copy to
if
[
${#
copyto_hosts
[@]
}
-gt
0
]
;
then
echo
"Copying files to other nodes in parallel:
${
copyto_hosts
[@]
}
"
for
node
in
"
${
copyto_hosts
[@]
}
"
;
do
if
[[
"
${
inputfile
}
"
==
false
]]
;
then
rsync
-az
-e
"ssh -p
${
ssh_port
}
"
build tpl
"
${
node
}
:/workspace/"
&
else
rsync
-az
-e
"ssh -p
${
ssh_port
}
"
build tpl
${
filename
}
"
${
node
}
:/workspace/"
&
fi
done
wait
echo
"Files synchronized successfully."
fi
# Multi-node run
${
mpi_bin
}
--allow-run-as-root
\
--prefix
${
ompi_prefix
}
\
--map-by
ppr:
${
ngpu_per_node
}
:node
--bind-to
none
\
--mca
pml ucx
\
--mca
osc ucx
\
--mca
btl ^openib
\
--mca
btl_tcp_if_include
${
tcp_iface
}
\
--mca
plm_rsh_args
"-p
${
ssh_port
}
"
\
--mca
coll_hcoll_enable 0
\
-x
UCX_TLS
=
self,sm,rocm,rc
\
-x
UCX_RNDV_SCHEME
=
put_zcopy
\
-x
UCX_RNDV_FRAG_MEM_TYPE
=
rocm
\
-x
UCX_MEMTYPE_CACHE
=
n
\
-x
HSA_FORCE_FINE_GRAIN_PCIE
=
1
\
-x
PATH
-x
LD_LIBRARY_PATH
-x
OPAL_PREFIX
\
-np
${
np
}
\
-H
${
hosts_string
}
\
${
rochpl_runscript
}
${
rochpl_args
}
fi
\ No newline at end of file
projects/rocHPL/rochpl-scripts-2xbw1000.patch
deleted
100644 → 0
View file @
d49f25a8
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
index 155f502..a0e8a41 100755
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@
supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@
filename=HPL.dat
inputfile=false
cmdrun=false
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
devices=
# #################################################
@@ -111,21 +118,6 @@
num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
-#Default MPI options
-mpi_args=
-
-#Check if using OpenMPI
-if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
- mpi_args+=" --map-by node --rank-by slot --bind-to none "
-
- #Check if this is OpenMPI+UCX
- ompi_info=$(dirname ${mpi_bin})/ompi_info
- if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
- # ucx-specific args
- mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
- fi
-fi
-
# #################################################
# Parameter parsing
# #################################################
@@ -153,7 +145,7 @@
while true; do
exit 0
;;
--version)
- ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+ ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version
exit 0
;;
-P)
@@ -218,5 +210,25 @@
if [ ! -z "${devices}" ]; then
rochpl_args+=" --devices=${devices}"
fi
+echo "Copying files..."
+rsync -az -e 'ssh -p 3333' build tpl ${filename} node02:/workspace/
+
#run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ --map-by ppr:8:node --bind-to none \
+ --mca pml ucx \
+ --mca osc ucx \
+ --mca btl ^openib \
+ --mca btl_tcp_if_include p14p2 \
+ --mca plm_rsh_args "-p 3333" \
+ --mca coll_hcoll_enable 0 \
+ -x UCX_TLS=self,sm,rocm,rc \
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ -x UCX_RNDV_FRAG_MEM_TYPE=rocm \
+ -x UCX_MEMTYPE_CACHE=n \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -np 16 \
+ -H node01:8,node02:8 \
+ ${rochpl_runscript} ${rochpl_args}
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..68c1958 100755
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@
supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -104,7 +104,9 @@
cmdrun=false
devices=
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
# #################################################
# Parameter parsing
@@ -274,7 +276,7 @@
myq=$((rank/p))
cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
#construct list of devices and their numa affinities
-devicelist=$(${rocm_dir}/bin/rocm-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
#count the cpus per core
threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
@@ -361,9 +363,23 @@
export OMP_NUM_THREADS=${omp_num_threads}
export OMP_PLACES=${omp_places}
export OMP_PROC_BIND=true
+# Hard-coded IB mapping for now
+declare -a IB_MAP=(
+ "mlx5_1:1" # GPU 0 -> NUMA 3
+ "mlx5_2:1" # GPU 1 -> NUMA 3
+ "mlx5_3:1" # GPU 2 -> NUMA 0
+ "mlx5_4:1" # GPU 3 -> NUMA 0
+ "mlx5_7:1" # GPU 4 -> NUMA 7
+ "mlx5_8:1" # GPU 5 -> NUMA 7
+ "mlx5_9:1" # GPU 6 -> NUMA 4
+ "mlx5_10:1" # GPU 7 -> NUMA 4
+)
+
+myib=${IB_MAP[$rank]}
+export UCX_NET_DEVICES=${myib}
if [[ $globalRank -lt $size ]]; then
- echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+ echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, IB: $myib, CPU Cores: $omp_num_threads - $places"
fi
rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +390,5 @@
else
fi
#run
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl -N ${mynuma} -m ${mynuma} ${rochpl_bin} ${rochpl_args}
projects/rocHPL/rochpl-scripts-4xbw1000.patch
deleted
100644 → 0
View file @
d49f25a8
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
index 155f502..72b25c1 100755
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@
supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@
filename=HPL.dat
inputfile=false
cmdrun=false
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
devices=
# #################################################
@@ -111,21 +118,6 @@
num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
-#Default MPI options
-mpi_args=
-
-#Check if using OpenMPI
-if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
- mpi_args+=" --map-by node --rank-by slot --bind-to none "
-
- #Check if this is OpenMPI+UCX
- ompi_info=$(dirname ${mpi_bin})/ompi_info
- if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
- # ucx-specific args
- mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
- fi
-fi
-
# #################################################
# Parameter parsing
# #################################################
@@ -153,7 +145,7 @@
while true; do
exit 0
;;
--version)
- ${mpi_bin} -np 1 ${mpi_args} ${rochpl_runscript} --version
+ ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version
exit 0
;;
-P)
@@ -218,5 +210,33 @@
if [ ! -z "${devices}" ]; then
rochpl_args+=" --devices=${devices}"
fi
+echo "Copying files..."
+if [[ "${inputfile}" == false ]]; then
+ rsync -az -e 'ssh -p 3333' build tpl node02:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl node03:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl node04:/workspace/
+else
+ rsync -az -e 'ssh -p 3333' build tpl ${filename} node02:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl ${filename} node03:/workspace/
+ rsync -az -e 'ssh -p 3333' build tpl ${filename} node04:/workspace/
+fi
+
#run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ --map-by ppr:8:node --bind-to none \
+ --mca pml ucx \
+ --mca osc ucx \
+ --mca btl ^openib \
+ --mca btl_tcp_if_include p14p2 \
+ --mca plm_rsh_args "-p 3333" \
+ --mca coll_hcoll_enable 0 \
+ -x UCX_TLS=self,sm,rocm,rc \
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ -x UCX_RNDV_FRAG_MEM_TYPE=rocm \
+ -x UCX_MEMTYPE_CACHE=n \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -np 32 \
+ -H node01:8,node02:8,node03:8,node04:8 \
+ ${rochpl_runscript} ${rochpl_args}
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..68c1958 100755
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@
supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -104,7 +104,9 @@
cmdrun=false
devices=
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
# #################################################
# Parameter parsing
@@ -274,7 +276,7 @@
myq=$((rank/p))
cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
#construct list of devices and their numa affinities
-devicelist=$(${rocm_dir}/bin/rocm-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
#count the cpus per core
threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
@@ -361,9 +363,23 @@
export OMP_NUM_THREADS=${omp_num_threads}
export OMP_PLACES=${omp_places}
export OMP_PROC_BIND=true
+# Hard-coded IB mapping for now
+declare -a IB_MAP=(
+ "mlx5_1:1" # GPU 0 -> NUMA 3
+ "mlx5_2:1" # GPU 1 -> NUMA 3
+ "mlx5_3:1" # GPU 2 -> NUMA 0
+ "mlx5_4:1" # GPU 3 -> NUMA 0
+ "mlx5_7:1" # GPU 4 -> NUMA 7
+ "mlx5_8:1" # GPU 5 -> NUMA 7
+ "mlx5_9:1" # GPU 6 -> NUMA 4
+ "mlx5_10:1" # GPU 7 -> NUMA 4
+)
+
+myib=${IB_MAP[$rank]}
+export UCX_NET_DEVICES=${myib}
if [[ $globalRank -lt $size ]]; then
- echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+ echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, IB: $myib, CPU Cores: $omp_num_threads - $places"
fi
rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +390,5 @@
else
fi
#run
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl -N ${mynuma} -m ${mynuma} ${rochpl_bin} ${rochpl_args}
diff --git a/scripts/wrapper.sh b/scripts/wrapper.sh
new file mode 100755
index 0000000..e11d6dc
--- /dev/null
+++ b/scripts/wrapper.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+tpl_dir=/workspace/tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${ompi_lib_dir}:${ucx_lib_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
+export PATH=${ompi_lib_dir}/../bin:${ucx_lib_dir}/../bin:${PATH}
+
+exec mpirun --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ -np 16 \
+ -H node01:8,node02:8 \
+ --mca plm_rsh_args "-p 3333" \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -x UCX_NET_DEVICES=mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_7,mlx5_8,mlx5_9,mlx5_10 \
+ "$@"
projects/rocHPL/rochpl-scripts-bw.patch
View file @
7e8d5453
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
diff --git a/scripts/mpirun_rochpl.in b/scripts/mpirun_rochpl.in
index 155f502..
df6bd9b
100755
index 155f502..
0f15d38
100755
--- a/scripts/mpirun_rochpl.in
--- a/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
+++ b/scripts/mpirun_rochpl.in
@@ -46,7 +46,7 @@
supported_distro( )
@@ -31,6 +31,9 @@
function display_help()
echo " [-i] Input file. When set, all other commnand "
echo " line parameters are ignored, and problem "
echo " parameters are read from input file. "
+ echo " [-H|--hosts] Comma-separated list of nodes to run on. "
+ echo " [--tcp-iface] TCP interface to use for communication. "
+ echo " [--port] SSH port to use for remote connections. "
echo " [-h|--help] prints this help message "
echo " [--version] Print rocHPL version number. "
}
@@ -46,7 +49,7 @@
supported_distro( )
fi
fi
case "${ID}" in
case "${ID}" in
...
@@ -11,7 +21,23 @@ index 155f502..df6bd9b 100755
...
@@ -11,7 +21,23 @@ index 155f502..df6bd9b 100755
true
true
;;
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -101,6 +101,13 @@
filename=HPL.dat
@@ -80,6 +83,15 @@
fi
# The following function exits script if an unsupported distro is detected
supported_distro
+# Detect the number of GPUs per node
+ngpu_per_node=$(hy-smi --showid 2>/dev/null | grep -ic "Device ID")
+if [[ -z "${ngpu_per_node}" || "${ngpu_per_node}" -eq 0 ]]; then
+ echo "Failed to get the number of GPUs per node via hy-smi. Defaulting to 8."
+ ngpu_per_node=8
+else
+ echo "Detected ${ngpu_per_node} GPUs per node."
+fi
+
# #################################################
# global variables
# #################################################
@@ -101,7 +113,17 @@
filename=HPL.dat
inputfile=false
inputfile=false
cmdrun=false
cmdrun=false
...
@@ -23,9 +49,13 @@ index 155f502..df6bd9b 100755
...
@@ -23,9 +49,13 @@ index 155f502..df6bd9b 100755
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
+export OPAL_PREFIX=$ompi_prefix
devices=
devices=
+nodes=
+tcp_iface=p14p2
+ssh_port=3333
# #################################################
# #################################################
@@ -111,21 +118,6 @@
num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
# MPI Args
@@ -111,21 +133,6 @@
num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}')
num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}')
total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets))
...
@@ -47,7 +77,16 @@ index 155f502..df6bd9b 100755
...
@@ -47,7 +77,16 @@ index 155f502..df6bd9b 100755
# #################################################
# #################################################
# Parameter parsing
# Parameter parsing
# #################################################
# #################################################
@@ -153,7 +145,7 @@
while true; do
@@ -133,7 +140,7 @@
fi
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
- GETOPT_PARSE=$(getopt --name "${0}" --longoptions NB:,it:,help,version,devices:, --options hP:Q:p:q:N:i:f: -- "$@")
+ GETOPT_PARSE=$(getopt --name "${0}" --longoptions NB:,it:,help,version,devices:,hosts:,port:,tcp-iface:, --options hP:Q:p:q:N:i:f:H: -- "$@")
else
echo "Need a new version of getopt"
exit 1
@@ -153,7 +160,7 @@
while true; do
exit 0
exit 0
;;
;;
--version)
--version)
...
@@ -56,25 +95,105 @@ index 155f502..df6bd9b 100755
...
@@ -56,25 +95,105 @@ index 155f502..df6bd9b 100755
exit 0
exit 0
;;
;;
-P)
-P)
@@ -219,4 +211,15 @@
if [ ! -z "${devices}" ]; then
@@ -189,6 +196,15 @@
while true; do
--devices)
devices=${2}
shift 2 ;;
+ -H|--hosts)
+ nodes=${2}
+ shift 2 ;;
+ --tcp-iface)
+ tcp_iface=${2}
+ shift 2 ;;
+ --port)
+ ssh_port=${2}
+ shift 2 ;;
--) shift ; break ;;
*) echo "Unexpected command line parameter received; aborting";
exit 1
@@ -218,5 +234,77 @@
if [ ! -z "${devices}" ]; then
rochpl_args+=" --devices=${devices}"
fi
fi
#run
-
#run
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
-${mpi_bin} -np ${np} ${mpi_args} ${rochpl_runscript} ${rochpl_args}
+${mpi_bin} --allow-run-as-root \
+# Run single-node test if --hosts is not set
+ --bind-to none \
+if [ -z "${nodes}" ]; then
+ --mca pml ucx \
+ echo "No compute nodes specified. Running in single-node mode."
+ --mca osc ucx \
+
+ --mca btl ^vader,tcp,openib,uct \
+ ${mpi_bin} --allow-run-as-root \
+ --mca coll ^hcoll \
+ --bind-to none \
+ -x UCX_TLS=self,sm,rocm \
+ --mca pml ucx \
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ --mca osc ucx \
+ -x UCX_MEMTYPE_CACHE=n \
+ --mca btl ^vader,tcp,openib,uct \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ --mca coll ^hcoll \
+ -np ${np} \
+ -x UCX_TLS=self,sm,rocm \
+ ${rochpl_runscript} ${rochpl_args}
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ -x UCX_MEMTYPE_CACHE=n \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ -np ${np} \
+ ${rochpl_runscript} ${rochpl_args}
+else
+ echo "Running in multi-node mode. Using nodes: ${nodes}"
+ echo "Using TCP interface: ${tcp_iface}"
+ echo "Using SSH port: ${ssh_port}"
+
+ # Set rank counts for hosts
+ IFS=',' read -ra node_array <<< "${nodes}"
+ hosts_string=""
+ for node in "${node_array[@]}"; do
+ hosts_string+="${node}:${ngpu_per_node},"
+ done
+ hosts_string="${hosts_string%,}"
+
+ echo "MPI hosts: ${hosts_string}"
+
+ # Copy files to other nodes
+ current_node=$(hostname)
+ copyto_hosts=()
+ for node in "${node_array[@]}"; do
+ if [[ "${node}" != "${current_node}" ]]; then
+ copyto_hosts+=("${node}")
+ fi
+ done
+
+ # Copy files using rsync only if there are other nodes to copy to
+ if [ ${#copyto_hosts[@]} -gt 0 ]; then
+ echo "Copying files to other nodes in parallel: ${copyto_hosts[@]}"
+ for node in "${copyto_hosts[@]}"; do
+ if [[ "${inputfile}" == false ]]; then
+ rsync -az -e "ssh -p ${ssh_port}" build tpl "${node}:/workspace/" &
+ else
+ rsync -az -e "ssh -p ${ssh_port}" build tpl ${filename} "${node}:/workspace/" &
+ fi
+ done
+ wait
+ echo "Files synchronized successfully."
+ fi
+
+ # Multi-node run
+ ${mpi_bin} --allow-run-as-root \
+ --prefix ${ompi_prefix} \
+ --map-by ppr:${ngpu_per_node}:node --bind-to none \
+ --mca pml ucx \
+ --mca osc ucx \
+ --mca btl ^openib \
+ --mca btl_tcp_if_include ${tcp_iface} \
+ --mca plm_rsh_args "-p ${ssh_port}" \
+ --mca coll_hcoll_enable 0 \
+ -x UCX_TLS=self,sm,rocm,rc \
+ -x UCX_RNDV_SCHEME=put_zcopy \
+ -x UCX_RNDV_FRAG_MEM_TYPE=rocm \
+ -x UCX_MEMTYPE_CACHE=n \
+ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \
+ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \
+ -np ${np} \
+ -H ${hosts_string} \
+ ${rochpl_runscript} ${rochpl_args}
+fi
\
No newline at end of file
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
diff --git a/scripts/run_rochpl.in b/scripts/run_rochpl.in
index 1522e5d..
3f840a7
100755
index 1522e5d..
68c1958
100755
--- a/scripts/run_rochpl.in
--- a/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
+++ b/scripts/run_rochpl.in
@@ -46,7 +46,7 @@
supported_distro( )
@@ -46,7 +46,7 @@
supported_distro( )
...
@@ -106,19 +225,35 @@ index 1522e5d..3f840a7 100755
...
@@ -106,19 +225,35 @@ index 1522e5d..3f840a7 100755
#count the cpus per core
#count the cpus per core
threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
@@ -363,7 +365,7 @@
export OMP_PROC_BIND=true
@@ -361,9 +363,23 @@
export OMP_NUM_THREADS=${omp_num_threads}
export OMP_PLACES=${omp_places}
export OMP_PROC_BIND=true
+# Hard-coded IB mapping for now
+declare -a IB_MAP=(
+ "mlx5_1:1" # GPU 0 -> NUMA 3
+ "mlx5_2:1" # GPU 1 -> NUMA 3
+ "mlx5_3:1" # GPU 2 -> NUMA 0
+ "mlx5_4:1" # GPU 3 -> NUMA 0
+ "mlx5_7:1" # GPU 4 -> NUMA 7
+ "mlx5_8:1" # GPU 5 -> NUMA 7
+ "mlx5_9:1" # GPU 6 -> NUMA 4
+ "mlx5_10:1" # GPU 7 -> NUMA 4
+)
+
+myib=${IB_MAP[$rank]}
+export UCX_NET_DEVICES=${myib}
if [[ $globalRank -lt $size ]]; then
if [[ $globalRank -lt $size ]]; then
- echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
- echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, CPU Cores: $omp_num_threads - $places"
+ echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma, CPU Cores: $omp_num_threads - $places"
+ echo "Node Binding: Process $rank [(p,q)=($myp,$myq)] GPU: $mygpu, NUMA: $mynuma,
IB: $myib,
CPU Cores: $omp_num_threads - $places"
fi
fi
rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -f ${frac} -it ${it}"
@@ -374,4 +3
76
,5 @@
else
@@ -374,4 +3
90
,5 @@
else
fi
fi
#run
#run
-${rochpl_bin} ${rochpl_args}
-${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+#${rochpl_bin} ${rochpl_args}
+numactl -
-cpunodebind=
${mynuma} -
-membind=
${mynuma} ${rochpl_bin} ${rochpl_args}
+numactl -
N
${mynuma} -
m
${mynuma} ${rochpl_bin} ${rochpl_args}
projects/rocHPL/run_rochpl.in
0 → 100644
View file @
7e8d5453
#!/usr/bin/env bash
# Author: Noel Chalmers
# set -x #echo on
# #################################################
# helper functions
# #################################################
function
display_help
()
{
echo
"rocHPL run helper script"
echo
"./run_rochpl "
echo
" [-P] Specific MPI grid size: the number of "
echo
" rows in MPI grid. "
echo
" [-Q] Specific MPI grid size: the number of "
echo
" columns in MPI grid. "
echo
" [-p] Specific node-local MPI grid size: the number "
echo
" of rows in node-local MPI grid. Must evenly "
echo
" divide P. "
echo
" [-q] Specific node-local MPI grid size: the number "
echo
" of columns in node-local MPI grid. Must evenly"
echo
" divide Q. "
echo
" [-N] Specific matrix size: the number of "
echo
" rows/columns in global matrix. "
echo
" [--NB] Specific panel size: the number of "
echo
" rows/columns in panels. "
echo
" [--it] Iterations: the number of times to run each "
echo
" problem size. "
echo
" [-f] Specific split fraction: the percentange to "
echo
" split the trailing submatrix. "
echo
" [-i] Input file. When set, all other commnand "
echo
" line parameters are ignored, and problem "
echo
" parameters are read from input file. "
echo
" [-h|--help] prints this help message "
echo
" [--version] Print rocHPL version number. "
}
# This function is helpful for dockerfiles that do not have sudo installed, but the default user is root
# true is a system command that completes successfully, function returns success
# prereq: ${ID} must be defined before calling
supported_distro
(
)
{
if
[
-z
${
ID
+foo
}
]
;
then
printf
"supported_distro():
\$
ID must be set
\n
"
exit
2
fi
case
"
${
ID
}
"
in
debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky
)
true
;;
*
)
printf
"This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES
\n
"
exit
2
;;
esac
}
# #################################################
# Pre-requisites check
# #################################################
# Exit code 0: alls well
# Exit code 1: problems with getopt
# Exit code 2: problems with supported platforms
# check if getopt command is installed
type
getopt
>
/dev/null
if
[[
$?
-ne
0
]]
;
then
echo
"This script uses getopt to parse arguments; try installing the util-linux package"
;
exit
1
fi
# os-release file describes the system
if
[[
-e
"/etc/os-release"
]]
;
then
source
/etc/os-release
else
echo
"This script depends on the /etc/os-release file"
exit
2
fi
# The following function exits script if an unsupported distro is detected
supported_distro
# #################################################
# global variables
# #################################################
# Grab options from CMake config
rochpl_bin
=
@CMAKE_INSTALL_PREFIX@/bin/rochpl
rocm_dir
=
@ROCM_PATH@
rocblas_dir
=
@ROCBLAS_LIB_PATH@
blas_dir
=
@HPL_BLAS_DIR@
P
=
1
Q
=
1
p
=
-1
q
=
-1
N
=
45312
NB
=
384
it
=
1
frac
=
0.3
filename
=
HPL.dat
inputfile
=
false
cmdrun
=
false
devices
=
tpl_dir
=
$(
dirname
"
$(
readlink
-f
"
$0
"
)
"
)
/../tpl
ucx_lib_dir
=
$tpl_dir
/ucx/lib
export
LD_LIBRARY_PATH
=
${
rocblas_dir
}
:
${
blas_dir
}
:
$ucx_lib_dir
:
${
rocm_dir
}
/lib:
$LD_LIBRARY_PATH
# #################################################
# Parameter parsing
# #################################################
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt
-T
if
[[
$?
-eq
4
]]
;
then
GETOPT_PARSE
=
$(
getopt
--name
"
${
0
}
"
--longoptions
NB:,it:,help,version,devices:,
--options
hP:Q:p:q:N:i:f:
--
"
$@
"
)
else
echo
"Need a new version of getopt"
exit
1
fi
if
[[
$?
-ne
0
]]
;
then
echo
"getopt invocation failed; could not parse the command line"
;
exit
1
fi
eval set
--
"
${
GETOPT_PARSE
}
"
while
true
;
do
case
"
${
1
}
"
in
-h
|
--help
)
display_help
exit
0
;;
--version
)
${
rochpl_bin
}
--version
exit
0
;;
-P
)
P
=
${
2
}
shift
2
;;
-Q
)
Q
=
${
2
}
shift
2
;;
-p
)
p
=
${
2
}
shift
2
;;
-q
)
q
=
${
2
}
shift
2
;;
-N
)
N
=
${
2
}
cmdrun
=
true
shift
2
;;
--NB
)
NB
=
${
2
}
cmdrun
=
true
shift
2
;;
--it
)
it
=
${
2
}
shift
2
;;
-f
)
frac
=
${
2
}
shift
2
;;
-i
)
filename
=
${
2
}
inputfile
=
true
shift
2
;;
--devices
)
devices
=
${
2
}
shift
2
;;
--
)
shift
;
break
;;
*
)
echo
"Unexpected command line parameter received; aborting"
;
exit
1
;;
esac
done
#if nothing but np and ppn parameters where given, default to running
# with default input file
if
[[
"
${
inputfile
}
"
==
false
&&
"
${
cmdrun
}
"
==
false
]]
;
then
inputfile
=
true
fi
np
=
$((
$P
*
$Q
))
if
[[
"
$np
"
-lt
1
]]
;
then
echo
"Invalid MPI grid parameters; aborting"
;
exit
1
fi
#######################################
# Now figure out the CPU core mappings
#######################################
# Get local process numbering
set
+u
if
[[
-n
${
OMPI_COMM_WORLD_LOCAL_RANK
+x
}
]]
;
then
globalRank
=
$OMPI_COMM_WORLD_RANK
globalSize
=
$OMPI_COMM_WORLD_SIZE
rank
=
$OMPI_COMM_WORLD_LOCAL_RANK
size
=
$OMPI_COMM_WORLD_LOCAL_SIZE
elif
[[
-n
${
SLURM_LOCALID
+x
}
]]
;
then
globalRank
=
$SLURM_PROCID
globalSize
=
$SLURM_NTASKS
rank
=
$SLURM_LOCALID
size
=
$SLURM_TASKS_PER_NODE
#Slurm can return a string like "2(x2),1". Get the first number
size
=
$(
echo
$size
|
sed
-r
's/^([^.]+).*$/\1/; s/^[^0-9]*([0-9]+).*$/\1/'
)
elif
[[
-n
${
FLUX_TASK_LOCAL_ID
+x
}
]]
;
then
globalRank
=
$FLUX_TASK_RANK
globalSize
=
$FLUX_JOB_SIZE
nnodes
=
$FLUX_JOB_NNODES
rank
=
$FLUX_TASK_LOCAL_ID
size
=
$((
globalSize/nnodes
))
fi
set
-u
#Determing node-local grid size
if
[[
"
$p
"
-lt
1
&&
"
$q
"
-lt
1
]]
;
then
# no node-local grid was specified, pick defaults
q
=
$((
(
Q<
=
size
)
? Q : size
))
if
[[
$((
size
%
q
))
-gt
0
]]
;
then
echo
"Invalid MPI grid parameters; Unable to form node-local grid; aborting"
;
exit
1
fi
p
=
$((
size/q
))
elif
[[
"
$p
"
-lt
1
]]
;
then
#q was specified
if
[[
$((
size
%
q
))
-gt
0
]]
;
then
echo
"Invalid MPI grid parameters; Unable to form node-local grid; aborting"
;
exit
1
fi
p
=
$((
size/q
))
elif
[[
"
$q
"
-lt
1
]]
;
then
#p was specified
if
[[
$((
size
%
p
))
-gt
0
]]
;
then
echo
"Invalid MPI grid parameters; Unable to form node-local grid; aborting"
;
exit
1
fi
q
=
$((
size/p
))
else
#Both p and q were specified
if
[[
$size
-ne
$((
p
*
q
))
]]
;
then
echo
"Invalid MPI grid parameters; Unable to form node-local grid; aborting"
;
exit
1
fi
fi
# Check that the columns are evenly divided among nodes
if
[[
$((
P
%
p
))
-gt
0
]]
;
then
echo
"Invalid MPI grid parameters; Must have the same number of P rows on every node; aborting"
;
exit
1
fi
# Check that the rows are evenly divided among nodes
if
[[
$((
Q
%
q
))
-gt
0
]]
;
then
echo
"Invalid MPI grid parameters; Must have the same number of Q columns on every node; aborting"
;
exit
1
fi
myp
=
$((
rank%p
))
myq
=
$((
rank/p
))
#construct a list of all cpus, sorted by core
cpulist
=
$(
lscpu
--parse
=
CPU,CORE,NODE |
awk
'!/#/'
|
tr
','
"
\t
"
|
sort
-k
2
-g
-s
)
#construct list of devices and their numa affinities
devicelist
=
$(
hy-smi
--csv
--showtoponuma
|
tail
-n
+2 |
tr
','
"
\t
"
)
#count the cpus per core
threads_per_core
=
$(
echo
"
${
cpulist
}
"
|
grep
-c
".* 0 .*"
)
#remove the extra cpus on each core to make a list of just physical cores, then sort by numa domain
corelist
=
$(
echo
"
$cpulist
"
|
awk
-v
tpc
=
${
threads_per_core
}
'(NR-1)%tpc==0'
|
sort
-k
3
-g
-s
)
#count numa domains
line
=(
$(
echo
"
$cpulist
"
|
tail
-n
1
)
)
n_numa
=
$((
line[2]+1
))
numa_core_counts
=()
numa_proc_counts
=()
for
i
in
$(
seq
1
${
n_numa
}
)
;
do
numa_core_counts+
=(
0
)
;
numa_proc_counts+
=(
0
)
;
done
#parse the list of cpus to array and count cpus in each numa
cpus
=()
while
read
-a
line
;
do
cpus+
=(
${
line
[0]
}
)
((
numa_core_counts[
${
line
[2]
}
]
++
))
done
<<<
${
corelist
}
numa_core_offsets
=(
0
)
for
i
in
$(
seq
1
$((
n_numa-1
))
)
;
do
numa_core_offsets+
=(
$((
numa_core_offsets[
$((
i-1
))
]
+
numa_core_counts[
$i
]
))
)
;
done
#parse device to numa mapping
device_to_numa
=()
while
read
-a
line
;
do
device_to_numa+
=(
${
line
[1]
}
)
done
<<<
${
devicelist
}
rank_to_device
=()
if
[
!
-z
"
${
devices
}
"
]
;
then
IFS
=
','
read
-r
-a
device_array
<<<
"
$devices
"
n_devices
=
${#
device_array
[@]
}
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_device+
=(
${
device_array
[
$((
i%n_devices
))
]
}
)
done
export
ROCR_VISIBLE_DEVICES
=
${
rank_to_device
[rank]
}
else
n_devices
=
$(
echo
"
${
devicelist
}
"
|
grep
-c
"card"
)
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_device+
=(
$((
i%n_devices
))
)
done
fi
mygpu
=
${
rank_to_device
[rank]
}
mynuma
=
${
device_to_numa
[mygpu]
}
rank_to_numa
=()
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_numa+
=(
${
device_to_numa
[
${
rank_to_device
[
$((
i%n_devices
))
]
}
]
}
)
done
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
numa
=
${
rank_to_numa
[
$i
]
}
((
numa_proc_counts[numa]++
))
done
omp_num_threads
=
$((
numa_core_counts[mynuma]/numa_proc_counts[mynuma]
))
core_offset
=
${
numa_core_offsets
[mynuma]
}
for
i
in
$(
seq
0
$((
rank-1
))
)
;
do
numa
=
${
rank_to_numa
[
$i
]
}
if
[[
$numa
-eq
$mynuma
]]
;
then
core_offset
=
$((
core_offset
+
omp_num_threads
))
fi
done
omp_places
=
"{
${
cpus
[core_offset]
}
}"
for
c
in
$(
seq
1
$((
omp_num_threads-1
))
)
;
do
omp_places+
=
",{
${
cpus
[core_offset+c]
}
}"
done
if
[[
$omp_num_threads
-gt
1
]]
;
then
places
=
"{
${
cpus
[core_offset]
}
-
${
cpus
[core_offset+
$((
omp_num_threads-1
))
]
}
}"
else
places
=
"{
${
cpus
[core_offset]
}
}"
fi
# Export OpenMP config
export
OMP_NUM_THREADS
=
${
omp_num_threads
}
export
OMP_PLACES
=
${
omp_places
}
export
OMP_PROC_BIND
=
true
# Hard-coded IB mapping for now
declare
-a
IB_MAP
=(
"mlx5_1:1"
# GPU 0 -> NUMA 3
"mlx5_2:1"
# GPU 1 -> NUMA 3
"mlx5_3:1"
# GPU 2 -> NUMA 0
"mlx5_4:1"
# GPU 3 -> NUMA 0
"mlx5_7:1"
# GPU 4 -> NUMA 7
"mlx5_8:1"
# GPU 5 -> NUMA 7
"mlx5_9:1"
# GPU 6 -> NUMA 4
"mlx5_10:1"
# GPU 7 -> NUMA 4
)
myib
=
${
IB_MAP
[
$rank
]
}
export
UCX_NET_DEVICES
=
${
myib
}
if
[[
$globalRank
-lt
$size
]]
;
then
echo
"Node Binding: Process
$rank
[(p,q)=(
$myp
,
$myq
)] GPU:
$mygpu
, NUMA:
$mynuma
, IB:
$myib
, CPU Cores:
$omp_num_threads
-
$places
"
fi
rochpl_args
=
"-P
${
P
}
-Q
${
Q
}
-p
${
p
}
-q
${
q
}
-f
${
frac
}
-it
${
it
}
"
if
[[
"
${
inputfile
}
"
==
true
]]
;
then
rochpl_args+
=
" -i
${
filename
}
"
else
rochpl_args+
=
" -N
${
N
}
-NB
${
NB
}
"
fi
#run
#${rochpl_bin} ${rochpl_args}
numactl
-N
${
mynuma
}
-m
${
mynuma
}
${
rochpl_bin
}
${
rochpl_args
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment