Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
b623c7e9
Commit
b623c7e9
authored
Apr 01, 2026
by
one
Browse files
Convert rochpcg script patch into shell script
parent
a10c3e15
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
186 additions
and
196 deletions
+186
-196
third_party/Makefile
third_party/Makefile
+1
-4
third_party/rochpcg-scripts-bw.patch
third_party/rochpcg-scripts-bw.patch
+0
-192
third_party/run_rochpcg.sh
third_party/run_rochpcg.sh
+185
-0
No files found.
third_party/Makefile
View file @
b623c7e9
...
...
@@ -196,12 +196,9 @@ endif
rocm_hpcg
:
sb_micro_path
ifneq
(,$(wildcard rocHPCG/install.sh))
cd
./rocHPCG
&&
\
if
[
!
-f
./run_rochpcg
]
;
then
\
git apply ../rochpcg-scripts-bw.patch
;
\
fi
&&
\
./install.sh
--with-rocm
=
$(ROCM_PATH)
--with-mpi
=
$(MPI_HOME)
--gpu-aware-mpi
=
on
cp
-v
./rocHPCG/build/release/rochpcg-install/bin/rochpcg
$(SB_MICRO_PATH)
/bin/
cp
-v
./
rocHPCG/
run_rochpcg
$(SB_MICRO_PATH)
/bin/
cp
-v
./run_rochpcg
.sh
$(SB_MICRO_PATH)
/bin/
run_rochpcg
chmod
+x
$(SB_MICRO_PATH)
/bin/rochpcg
$(SB_MICRO_PATH)
/bin/run_rochpcg
endif
...
...
third_party/rochpcg-scripts-bw.patch
deleted
100644 → 0
View file @
a10c3e15
diff --git a/run_rochpcg b/run_rochpcg
new file mode 100755
index 0000000..0f806fe
--- /dev/null
+++ b/run_rochpcg
@@ -0,0 +1,180 @@
+#!/bin/bash
+
+# =================================================
+# Helper functions
+# =================================================
+help() {
+ cat << EOF
+rocHPCG helper script
+Usage: $(basename "$0") [OPTIONS]
+
+OPTIONS:
+ -h, --help Show this help message and exit
+ --npx Number of processes in x dimension of process grid (default: ${npx})
+ --npy Number of processes in y dimension of process grid (default: ${npy})
+ --npz Number of processes in z dimension of process grid (default: ${npz})
+ --nx Problem size in x dimension (default: ${nx})
+ --ny Problem size in y dimension (default: ${ny})
+ --nz Problem size in z dimension (default: ${nz})
+ --rt Benchmarking time in seconds (> 1800s for official runs) (default: ${runtime})
+ --tol Residual tolerance, skip reference verification if set (default: ${tol})
+ --pz Partition boundary in z process dimension (default: 0, uniform grid)
+ --zl Local nz value for processes with z rank < pz (default: equal to ${nz})
+ --zu Local nz value for processes with z rank >= pz (default: equal to ${nz})
+EOF
+}
+
+# =================================================
+# Global variables
+# =================================================
+npx=1
+npy=1
+npz=1
+nx=560
+ny=280
+nz=280
+runtime=60
+tol=1
+pz=0
+zl=${nz}
+zu=${nz}
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+rochpcg_bin="${SCRIPT_DIR}/rochpcg"
+
+if [[ ! -x "${rochpcg_bin}" ]]; then
+ echo "Cannot find rochpcg binary at ${rochpcg_bin}"
+ exit 1
+fi
+
+# =================================================
+# Parameter parsing
+# =================================================
+GETOPT_PARSE=$(getopt --name "${0}" --options h --longoptions help,npx:,npy:,npz:,nx:,ny:,nz:,rt:,tol:,pz:,zl:,zu: -- "$@") \
+ || { echo "getopt invocation failed; could not parse the command line"; exit 1; }
+
+eval set -- "${GETOPT_PARSE}"
+
+while true; do
+ case "${1}" in
+ -h|--help) help; exit 0 ;;
+ --npx) npx=${2}; shift 2 ;;
+ --npy) npy=${2}; shift 2 ;;
+ --npz) npz=${2}; shift 2 ;;
+ --nx) nx=${2}; shift 2 ;;
+ --ny) ny=${2}; shift 2 ;;
+ --nz)
+ nz=${2}
+ zl=${nz}
+ zu=${nz}
+ shift 2 ;;
+ --rt) runtime=${2}; shift 2 ;;
+ --tol) tol=${2}; shift 2 ;;
+ --pz) pz=${2}; shift 2 ;;
+ --zl) zl=${2}; shift 2 ;;
+ --zu) zu=${2}; shift 2 ;;
+ --) shift ; break ;;
+ *) echo "Unexpected command line parameter received; aborting";
+ exit 1
+ ;;
+ esac
+done
+
+# Build rochpcg arguments
+rochpcg_args="--npx=${npx} --npy=${npy} --npz=${npz}"
+rochpcg_args+=" --nx=${nx} --ny=${ny} --nz=${nz}"
+rochpcg_args+=" --rt=${runtime}"
+rochpcg_args+=" --tol=${tol}"
+rochpcg_args+=" --pz=${pz}"
+rochpcg_args+=" --zl=${zl}"
+rochpcg_args+=" --zu=${zu}"
+
+# =================================================
+# Affinity setup
+# =================================================
+globalRank=${OMPI_COMM_WORLD_RANK:-0}
+rank=${OMPI_COMM_WORLD_LOCAL_RANK:-0}
+size=${OMPI_COMM_WORLD_LOCAL_SIZE:-1}
+
+#construct a list of all cpus, sorted by core
+cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
+
+#construct list of devices and their numa affinities
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+
+#count the cpus per core
+threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
+
+#remove the extra cpus on each core to make a list of just physical cores, then sort by numa domain
+corelist=$(echo "$cpulist" | awk -v tpc=${threads_per_core} '(NR-1)%tpc==0' | sort -k 3 -g -s)
+
+#count numa domains
+line=($(echo "$cpulist" | tail -n 1))
+n_numa=$((line[2]+1))
+
+numa_core_counts=()
+numa_proc_counts=()
+for i in $(seq 1 ${n_numa}); do numa_core_counts+=(0); numa_proc_counts+=(0); done
+
+#parse the list of cpus to array and count cpus in each numa
+cpus=()
+while read -a line; do
+ cpus+=(${line[0]})
+ ((numa_core_counts[${line[2]}]++)) || true
+done <<< "${corelist}"
+
+numa_core_offsets=(0)
+for i in $(seq 1 $((n_numa-1))); do numa_core_offsets+=($((numa_core_offsets[$((i-1))] + numa_core_counts[$i]))); done
+
+#parse device to numa mapping
+device_to_numa=()
+while read -a line; do
+ device_to_numa+=(${line[1]})
+done <<< "${devicelist}"
+
+rank_to_device=()
+n_devices=$(echo "${devicelist}" | grep -c "card")
+for i in $(seq 0 $((size-1))); do
+ rank_to_device+=($((i%n_devices)))
+done
+
+mygpu=${rank_to_device[rank]}
+mynuma=${device_to_numa[mygpu]}
+
+rank_to_numa=()
+for i in $(seq 0 $((size-1))); do
+ rank_to_numa+=(${device_to_numa[${rank_to_device[$((i%n_devices))]}]})
+done
+
+for i in $(seq 0 $((size-1))); do
+ numa=${rank_to_numa[$i]}
+ ((numa_proc_counts[numa]++)) || true
+done
+
+omp_num_threads=$((numa_core_counts[mynuma]/numa_proc_counts[mynuma]))
+
+core_offset=${numa_core_offsets[mynuma]}
+for i in $(seq 0 $((rank-1))); do
+ numa=${rank_to_numa[$i]}
+ if [[ $numa -eq $mynuma ]]; then
+ core_offset=$((core_offset + omp_num_threads))
+ fi
+done
+
+omp_places="{${cpus[core_offset]}}"
+for c in $(seq 1 $((omp_num_threads-1))); do
+ omp_places+=",{${cpus[core_offset+c]}}"
+done
+
+if [[ $omp_num_threads -gt 1 ]]; then
+ places="{${cpus[core_offset]}-${cpus[core_offset+$((omp_num_threads-1))]}}"
+else
+ places="{${cpus[core_offset]}}"
+fi
+
+# Export OpenMP config
+export OMP_NUM_THREADS=${omp_num_threads}
+export OMP_PLACES=${omp_places}
+export OMP_PROC_BIND=true
+
+if [[ $globalRank -lt $size ]]; then
+ echo "Node Binding: Process $rank [(nx,ny,nz)=(${nx},${ny},${nz})] GPU: $mygpu, NUMA: $mynuma, CPU Cores: $omp_num_threads - $places"
+fi
+
+# Run
+numactl -N ${mynuma} -m ${mynuma} ${rochpcg_bin} ${rochpcg_args}
\
No newline at end of file
third_party/run_rochpcg.sh
0 → 100644
View file @
b623c7e9
#!/bin/bash
# =================================================
# Helper functions
# =================================================
help
()
{
cat
<<
EOF
rocHPCG helper script
Usage:
$(
basename
"
$0
"
)
[OPTIONS]
OPTIONS:
-h, --help Show this help message and exit
--npx Number of processes in x dimension of process grid (default:
${
npx
}
)
--npy Number of processes in y dimension of process grid (default:
${
npy
}
)
--npz Number of processes in z dimension of process grid (default:
${
npz
}
)
--nx Problem size in x dimension (default:
${
nx
}
)
--ny Problem size in y dimension (default:
${
ny
}
)
--nz Problem size in z dimension (default:
${
nz
}
)
--rt Benchmarking time in seconds (> 1800s for official runs) (default:
${
runtime
}
)
--tol Residual tolerance, skip reference verification if set (default:
${
tol
}
)
--pz Partition boundary in z process dimension (default: 0, uniform grid)
--zl Local nz value for processes with z rank < pz (default: equal to
${
nz
}
)
--zu Local nz value for processes with z rank >= pz (default: equal to
${
nz
}
)
EOF
}
# =================================================
# Global variables
# =================================================
npx
=
1
npy
=
1
npz
=
1
nx
=
560
ny
=
280
nz
=
280
runtime
=
60
tol
=
1
pz
=
0
zl
=
${
nz
}
zu
=
${
nz
}
SCRIPT_DIR
=
"
$(
cd
"
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
"
&&
pwd
)
"
rochpcg_bin
=
"
${
SCRIPT_DIR
}
/rochpcg"
if
[[
!
-x
"
${
rochpcg_bin
}
"
]]
;
then
echo
"Cannot find rochpcg binary at
${
rochpcg_bin
}
"
exit
1
fi
# =================================================
# Parameter parsing
# =================================================
GETOPT_PARSE
=
$(
getopt
--name
"
${
0
}
"
--options
h
--longoptions
help
,npx:,npy:,npz:,nx:,ny:,nz:,rt:,tol:,pz:,zl:,zu:
--
"
$@
"
)
\
||
{
echo
"getopt invocation failed; could not parse the command line"
;
exit
1
;
}
eval set
--
"
${
GETOPT_PARSE
}
"
while
true
;
do
case
"
${
1
}
"
in
-h
|
--help
)
help
;
exit
0
;;
--npx
)
npx
=
${
2
}
;
shift
2
;;
--npy
)
npy
=
${
2
}
;
shift
2
;;
--npz
)
npz
=
${
2
}
;
shift
2
;;
--nx
)
nx
=
${
2
}
;
shift
2
;;
--ny
)
ny
=
${
2
}
;
shift
2
;;
--nz
)
nz
=
${
2
}
zl
=
${
nz
}
zu
=
${
nz
}
shift
2
;;
--rt
)
runtime
=
${
2
}
;
shift
2
;;
--tol
)
tol
=
${
2
}
;
shift
2
;;
--pz
)
pz
=
${
2
}
;
shift
2
;;
--zl
)
zl
=
${
2
}
;
shift
2
;;
--zu
)
zu
=
${
2
}
;
shift
2
;;
--
)
shift
;
break
;;
*
)
echo
"Unexpected command line parameter received; aborting"
;
exit
1
;;
esac
done
# Build rochpcg arguments
rochpcg_args
=
"--npx=
${
npx
}
--npy=
${
npy
}
--npz=
${
npz
}
"
rochpcg_args+
=
" --nx=
${
nx
}
--ny=
${
ny
}
--nz=
${
nz
}
"
rochpcg_args+
=
" --rt=
${
runtime
}
"
rochpcg_args+
=
" --tol=
${
tol
}
"
rochpcg_args+
=
" --pz=
${
pz
}
"
rochpcg_args+
=
" --zl=
${
zl
}
"
rochpcg_args+
=
" --zu=
${
zu
}
"
# =================================================
# Affinity setup
# =================================================
globalRank
=
${
OMPI_COMM_WORLD_RANK
:-
0
}
rank
=
${
OMPI_COMM_WORLD_LOCAL_RANK
:-
0
}
size
=
${
OMPI_COMM_WORLD_LOCAL_SIZE
:-
1
}
#construct a list of all cpus, sorted by core
cpulist
=
$(
lscpu
--parse
=
CPU,CORE,NODE |
awk
'!/#/'
|
tr
','
"
\t
"
|
sort
-k
2
-g
-s
)
#construct list of devices and their numa affinities
devicelist
=
$(
hy-smi
--csv
--showtoponuma
|
tail
-n
+2 |
tr
','
"
\t
"
)
#count the cpus per core
threads_per_core
=
$(
echo
"
${
cpulist
}
"
|
grep
-c
".* 0 .*"
)
#remove the extra cpus on each core to make a list of just physical cores, then sort by numa domain
corelist
=
$(
echo
"
$cpulist
"
|
awk
-v
tpc
=
${
threads_per_core
}
'(NR-1)%tpc==0'
|
sort
-k
3
-g
-s
)
#count numa domains
line
=(
$(
echo
"
$cpulist
"
|
tail
-n
1
)
)
n_numa
=
$((
line[2]+1
))
numa_core_counts
=()
numa_proc_counts
=()
for
i
in
$(
seq
1
${
n_numa
}
)
;
do
numa_core_counts+
=(
0
)
;
numa_proc_counts+
=(
0
)
;
done
#parse the list of cpus to array and count cpus in each numa
cpus
=()
while
read
-a
line
;
do
cpus+
=(
${
line
[0]
}
)
((
numa_core_counts[
${
line
[2]
}
]
++
))
||
true
done
<<<
"
${
corelist
}
"
numa_core_offsets
=(
0
)
for
i
in
$(
seq
1
$((
n_numa-1
))
)
;
do
numa_core_offsets+
=(
$((
numa_core_offsets[
$((
i-1
))
]
+
numa_core_counts[
$i
]
))
)
;
done
#parse device to numa mapping
device_to_numa
=()
while
read
-a
line
;
do
device_to_numa+
=(
${
line
[1]
}
)
done
<<<
"
${
devicelist
}
"
rank_to_device
=()
n_devices
=
$(
echo
"
${
devicelist
}
"
|
grep
-c
"card"
)
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_device+
=(
$((
i%n_devices
))
)
done
mygpu
=
${
rank_to_device
[rank]
}
mynuma
=
${
device_to_numa
[mygpu]
}
rank_to_numa
=()
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_numa+
=(
${
device_to_numa
[
${
rank_to_device
[
$((
i%n_devices
))
]
}
]
}
)
done
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
numa
=
${
rank_to_numa
[
$i
]
}
((
numa_proc_counts[numa]++
))
||
true
done
omp_num_threads
=
$((
numa_core_counts[mynuma]/numa_proc_counts[mynuma]
))
core_offset
=
${
numa_core_offsets
[mynuma]
}
for
i
in
$(
seq
0
$((
rank-1
))
)
;
do
numa
=
${
rank_to_numa
[
$i
]
}
if
[[
$numa
-eq
$mynuma
]]
;
then
core_offset
=
$((
core_offset
+
omp_num_threads
))
fi
done
omp_places
=
"{
${
cpus
[core_offset]
}
}"
for
c
in
$(
seq
1
$((
omp_num_threads-1
))
)
;
do
omp_places+
=
",{
${
cpus
[core_offset+c]
}
}"
done
if
[[
$omp_num_threads
-gt
1
]]
;
then
places
=
"{
${
cpus
[core_offset]
}
-
${
cpus
[core_offset+
$((
omp_num_threads-1
))
]
}
}"
else
places
=
"{
${
cpus
[core_offset]
}
}"
fi
# Export OpenMP config
export
OMP_NUM_THREADS
=
${
omp_num_threads
}
export
OMP_PLACES
=
${
omp_places
}
export
OMP_PROC_BIND
=
true
if
[[
$globalRank
-lt
$size
]]
;
then
echo
"Node Binding: Process
$rank
[(nx,ny,nz)=(
${
nx
}
,
${
ny
}
,
${
nz
}
)] GPU:
$mygpu
, NUMA:
$mynuma
, CPU Cores:
$omp_num_threads
-
$places
"
fi
# Run
numactl
-N
${
mynuma
}
-m
${
mynuma
}
${
rochpcg_bin
}
${
rochpcg_args
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment