Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
b623c7e9
Commit
b623c7e9
authored
Apr 01, 2026
by
one
Browse files
Convert rochpcg script patch into shell script
parent
a10c3e15
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
186 additions
and
196 deletions
+186
-196
third_party/Makefile
third_party/Makefile
+1
-4
third_party/rochpcg-scripts-bw.patch
third_party/rochpcg-scripts-bw.patch
+0
-192
third_party/run_rochpcg.sh
third_party/run_rochpcg.sh
+185
-0
No files found.
third_party/Makefile
View file @
b623c7e9
...
...
@@ -196,12 +196,9 @@ endif
rocm_hpcg
:
sb_micro_path
ifneq
(,$(wildcard rocHPCG/install.sh))
cd
./rocHPCG
&&
\
if
[
!
-f
./run_rochpcg
]
;
then
\
git apply ../rochpcg-scripts-bw.patch
;
\
fi
&&
\
./install.sh
--with-rocm
=
$(ROCM_PATH)
--with-mpi
=
$(MPI_HOME)
--gpu-aware-mpi
=
on
cp
-v
./rocHPCG/build/release/rochpcg-install/bin/rochpcg
$(SB_MICRO_PATH)
/bin/
cp
-v
./
rocHPCG/
run_rochpcg
$(SB_MICRO_PATH)
/bin/
cp
-v
./run_rochpcg
.sh
$(SB_MICRO_PATH)
/bin/
run_rochpcg
chmod
+x
$(SB_MICRO_PATH)
/bin/rochpcg
$(SB_MICRO_PATH)
/bin/run_rochpcg
endif
...
...
third_party/rochpcg-scripts-bw.patch
deleted
100644 → 0
View file @
a10c3e15
diff --git a/run_rochpcg b/run_rochpcg
new file mode 100755
index 0000000..0f806fe
--- /dev/null
+++ b/run_rochpcg
@@ -0,0 +1,180 @@
+#!/bin/bash
+
+# =================================================
+# Helper functions
+# =================================================
+help() {
+ cat << EOF
+rocHPCG helper script
+Usage: $(basename "$0") [OPTIONS]
+
+OPTIONS:
+ -h, --help Show this help message and exit
+ --npx Number of processes in x dimension of process grid (default: ${npx})
+ --npy Number of processes in y dimension of process grid (default: ${npy})
+ --npz Number of processes in z dimension of process grid (default: ${npz})
+ --nx Problem size in x dimension (default: ${nx})
+ --ny Problem size in y dimension (default: ${ny})
+ --nz Problem size in z dimension (default: ${nz})
+ --rt Benchmarking time in seconds (> 1800s for official runs) (default: ${runtime})
+ --tol Residual tolerance, skip reference verification if set (default: ${tol})
+ --pz Partition boundary in z process dimension (default: 0, uniform grid)
+ --zl Local nz value for processes with z rank < pz (default: equal to ${nz})
+ --zu Local nz value for processes with z rank >= pz (default: equal to ${nz})
+EOF
+}
+
+# =================================================
+# Global variables
+# =================================================
+npx=1
+npy=1
+npz=1
+nx=560
+ny=280
+nz=280
+runtime=60
+tol=1
+pz=0
+zl=${nz}
+zu=${nz}
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+rochpcg_bin="${SCRIPT_DIR}/rochpcg"
+
+if [[ ! -x "${rochpcg_bin}" ]]; then
+ echo "Cannot find rochpcg binary at ${rochpcg_bin}"
+ exit 1
+fi
+
+# =================================================
+# Parameter parsing
+# =================================================
+GETOPT_PARSE=$(getopt --name "${0}" --options h --longoptions help,npx:,npy:,npz:,nx:,ny:,nz:,rt:,tol:,pz:,zl:,zu: -- "$@") \
+ || { echo "getopt invocation failed; could not parse the command line"; exit 1; }
+
+eval set -- "${GETOPT_PARSE}"
+
+while true; do
+ case "${1}" in
+ -h|--help) help; exit 0 ;;
+ --npx) npx=${2}; shift 2 ;;
+ --npy) npy=${2}; shift 2 ;;
+ --npz) npz=${2}; shift 2 ;;
+ --nx) nx=${2}; shift 2 ;;
+ --ny) ny=${2}; shift 2 ;;
+ --nz)
+ nz=${2}
+ zl=${nz}
+ zu=${nz}
+ shift 2 ;;
+ --rt) runtime=${2}; shift 2 ;;
+ --tol) tol=${2}; shift 2 ;;
+ --pz) pz=${2}; shift 2 ;;
+ --zl) zl=${2}; shift 2 ;;
+ --zu) zu=${2}; shift 2 ;;
+ --) shift ; break ;;
+ *) echo "Unexpected command line parameter received; aborting";
+ exit 1
+ ;;
+ esac
+done
+
+# Build rochpcg arguments
+rochpcg_args="--npx=${npx} --npy=${npy} --npz=${npz}"
+rochpcg_args+=" --nx=${nx} --ny=${ny} --nz=${nz}"
+rochpcg_args+=" --rt=${runtime}"
+rochpcg_args+=" --tol=${tol}"
+rochpcg_args+=" --pz=${pz}"
+rochpcg_args+=" --zl=${zl}"
+rochpcg_args+=" --zu=${zu}"
+
+# =================================================
+# Affinity setup
+# =================================================
+globalRank=${OMPI_COMM_WORLD_RANK:-0}
+rank=${OMPI_COMM_WORLD_LOCAL_RANK:-0}
+size=${OMPI_COMM_WORLD_LOCAL_SIZE:-1}
+
+#construct a list of all cpus, sorted by core
+cpulist=$(lscpu --parse=CPU,CORE,NODE | awk '!/#/' | tr ',' "\t" | sort -k 2 -g -s)
+
+#construct list of devices and their numa affinities
+devicelist=$(hy-smi --csv --showtoponuma | tail -n +2 | tr ',' "\t")
+
+#count the cpus per core
+threads_per_core=$(echo "${cpulist}" | grep -c ".* 0 .*")
+
+#remove the extra cpus on each core to make a list of just physical cores, then sort by numa domain
+corelist=$(echo "$cpulist" | awk -v tpc=${threads_per_core} '(NR-1)%tpc==0' | sort -k 3 -g -s)
+
+#count numa domains
+line=($(echo "$cpulist" | tail -n 1))
+n_numa=$((line[2]+1))
+
+numa_core_counts=()
+numa_proc_counts=()
+for i in $(seq 1 ${n_numa}); do numa_core_counts+=(0); numa_proc_counts+=(0); done
+
+#parse the list of cpus to array and count cpus in each numa
+cpus=()
+while read -a line; do
+ cpus+=(${line[0]})
+ ((numa_core_counts[${line[2]}]++)) || true
+done <<< "${corelist}"
+
+numa_core_offsets=(0)
+for i in $(seq 1 $((n_numa-1))); do numa_core_offsets+=($((numa_core_offsets[$((i-1))] + numa_core_counts[$i]))); done
+
+#parse device to numa mapping
+device_to_numa=()
+while read -a line; do
+ device_to_numa+=(${line[1]})
+done <<< "${devicelist}"
+
+rank_to_device=()
+n_devices=$(echo "${devicelist}" | grep -c "card")
+for i in $(seq 0 $((size-1))); do
+ rank_to_device+=($((i%n_devices)))
+done
+
+mygpu=${rank_to_device[rank]}
+mynuma=${device_to_numa[mygpu]}
+
+rank_to_numa=()
+for i in $(seq 0 $((size-1))); do
+ rank_to_numa+=(${device_to_numa[${rank_to_device[$((i%n_devices))]}]})
+done
+
+for i in $(seq 0 $((size-1))); do
+ numa=${rank_to_numa[$i]}
+ ((numa_proc_counts[numa]++)) || true
+done
+
+omp_num_threads=$((numa_core_counts[mynuma]/numa_proc_counts[mynuma]))
+
+core_offset=${numa_core_offsets[mynuma]}
+for i in $(seq 0 $((rank-1))); do
+ numa=${rank_to_numa[$i]}
+ if [[ $numa -eq $mynuma ]]; then
+ core_offset=$((core_offset + omp_num_threads))
+ fi
+done
+
+omp_places="{${cpus[core_offset]}}"
+for c in $(seq 1 $((omp_num_threads-1))); do
+ omp_places+=",{${cpus[core_offset+c]}}"
+done
+
+if [[ $omp_num_threads -gt 1 ]]; then
+ places="{${cpus[core_offset]}-${cpus[core_offset+$((omp_num_threads-1))]}}"
+else
+ places="{${cpus[core_offset]}}"
+fi
+
+# Export OpenMP config
+export OMP_NUM_THREADS=${omp_num_threads}
+export OMP_PLACES=${omp_places}
+export OMP_PROC_BIND=true
+
+if [[ $globalRank -lt $size ]]; then
+ echo "Node Binding: Process $rank [(nx,ny,nz)=(${nx},${ny},${nz})] GPU: $mygpu, NUMA: $mynuma, CPU Cores: $omp_num_threads - $places"
+fi
+
+# Run
+numactl -N ${mynuma} -m ${mynuma} ${rochpcg_bin} ${rochpcg_args}
\
No newline at end of file
third_party/run_rochpcg.sh
0 → 100644
View file @
b623c7e9
#!/bin/bash
# =================================================
# Helper functions
# =================================================
help
()
{
cat
<<
EOF
rocHPCG helper script
Usage:
$(
basename
"
$0
"
)
[OPTIONS]
OPTIONS:
-h, --help Show this help message and exit
--npx Number of processes in x dimension of process grid (default:
${
npx
}
)
--npy Number of processes in y dimension of process grid (default:
${
npy
}
)
--npz Number of processes in z dimension of process grid (default:
${
npz
}
)
--nx Problem size in x dimension (default:
${
nx
}
)
--ny Problem size in y dimension (default:
${
ny
}
)
--nz Problem size in z dimension (default:
${
nz
}
)
--rt Benchmarking time in seconds (> 1800s for official runs) (default:
${
runtime
}
)
--tol Residual tolerance, skip reference verification if set (default:
${
tol
}
)
--pz Partition boundary in z process dimension (default: 0, uniform grid)
--zl Local nz value for processes with z rank < pz (default: equal to
${
nz
}
)
--zu Local nz value for processes with z rank >= pz (default: equal to
${
nz
}
)
EOF
}
# =================================================
# Global variables
# =================================================
npx
=
1
npy
=
1
npz
=
1
nx
=
560
ny
=
280
nz
=
280
runtime
=
60
tol
=
1
pz
=
0
zl
=
${
nz
}
zu
=
${
nz
}
SCRIPT_DIR
=
"
$(
cd
"
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
"
&&
pwd
)
"
rochpcg_bin
=
"
${
SCRIPT_DIR
}
/rochpcg"
if
[[
!
-x
"
${
rochpcg_bin
}
"
]]
;
then
echo
"Cannot find rochpcg binary at
${
rochpcg_bin
}
"
exit
1
fi
# =================================================
# Parameter parsing
# =================================================
GETOPT_PARSE
=
$(
getopt
--name
"
${
0
}
"
--options
h
--longoptions
help
,npx:,npy:,npz:,nx:,ny:,nz:,rt:,tol:,pz:,zl:,zu:
--
"
$@
"
)
\
||
{
echo
"getopt invocation failed; could not parse the command line"
;
exit
1
;
}
eval set
--
"
${
GETOPT_PARSE
}
"
while
true
;
do
case
"
${
1
}
"
in
-h
|
--help
)
help
;
exit
0
;;
--npx
)
npx
=
${
2
}
;
shift
2
;;
--npy
)
npy
=
${
2
}
;
shift
2
;;
--npz
)
npz
=
${
2
}
;
shift
2
;;
--nx
)
nx
=
${
2
}
;
shift
2
;;
--ny
)
ny
=
${
2
}
;
shift
2
;;
--nz
)
nz
=
${
2
}
zl
=
${
nz
}
zu
=
${
nz
}
shift
2
;;
--rt
)
runtime
=
${
2
}
;
shift
2
;;
--tol
)
tol
=
${
2
}
;
shift
2
;;
--pz
)
pz
=
${
2
}
;
shift
2
;;
--zl
)
zl
=
${
2
}
;
shift
2
;;
--zu
)
zu
=
${
2
}
;
shift
2
;;
--
)
shift
;
break
;;
*
)
echo
"Unexpected command line parameter received; aborting"
;
exit
1
;;
esac
done
# Build rochpcg arguments
rochpcg_args
=
"--npx=
${
npx
}
--npy=
${
npy
}
--npz=
${
npz
}
"
rochpcg_args+
=
" --nx=
${
nx
}
--ny=
${
ny
}
--nz=
${
nz
}
"
rochpcg_args+
=
" --rt=
${
runtime
}
"
rochpcg_args+
=
" --tol=
${
tol
}
"
rochpcg_args+
=
" --pz=
${
pz
}
"
rochpcg_args+
=
" --zl=
${
zl
}
"
rochpcg_args+
=
" --zu=
${
zu
}
"
# =================================================
# Affinity setup
# =================================================
globalRank
=
${
OMPI_COMM_WORLD_RANK
:-
0
}
rank
=
${
OMPI_COMM_WORLD_LOCAL_RANK
:-
0
}
size
=
${
OMPI_COMM_WORLD_LOCAL_SIZE
:-
1
}
#construct a list of all cpus, sorted by core
cpulist
=
$(
lscpu
--parse
=
CPU,CORE,NODE |
awk
'!/#/'
|
tr
','
"
\t
"
|
sort
-k
2
-g
-s
)
#construct list of devices and their numa affinities
devicelist
=
$(
hy-smi
--csv
--showtoponuma
|
tail
-n
+2 |
tr
','
"
\t
"
)
#count the cpus per core
threads_per_core
=
$(
echo
"
${
cpulist
}
"
|
grep
-c
".* 0 .*"
)
#remove the extra cpus on each core to make a list of just physical cores, then sort by numa domain
corelist
=
$(
echo
"
$cpulist
"
|
awk
-v
tpc
=
${
threads_per_core
}
'(NR-1)%tpc==0'
|
sort
-k
3
-g
-s
)
#count numa domains
line
=(
$(
echo
"
$cpulist
"
|
tail
-n
1
)
)
n_numa
=
$((
line[2]+1
))
numa_core_counts
=()
numa_proc_counts
=()
for
i
in
$(
seq
1
${
n_numa
}
)
;
do
numa_core_counts+
=(
0
)
;
numa_proc_counts+
=(
0
)
;
done
#parse the list of cpus to array and count cpus in each numa
cpus
=()
while
read
-a
line
;
do
cpus+
=(
${
line
[0]
}
)
((
numa_core_counts[
${
line
[2]
}
]
++
))
||
true
done
<<<
"
${
corelist
}
"
numa_core_offsets
=(
0
)
for
i
in
$(
seq
1
$((
n_numa-1
))
)
;
do
numa_core_offsets+
=(
$((
numa_core_offsets[
$((
i-1
))
]
+
numa_core_counts[
$i
]
))
)
;
done
#parse device to numa mapping
device_to_numa
=()
while
read
-a
line
;
do
device_to_numa+
=(
${
line
[1]
}
)
done
<<<
"
${
devicelist
}
"
rank_to_device
=()
n_devices
=
$(
echo
"
${
devicelist
}
"
|
grep
-c
"card"
)
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_device+
=(
$((
i%n_devices
))
)
done
mygpu
=
${
rank_to_device
[rank]
}
mynuma
=
${
device_to_numa
[mygpu]
}
rank_to_numa
=()
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
rank_to_numa+
=(
${
device_to_numa
[
${
rank_to_device
[
$((
i%n_devices
))
]
}
]
}
)
done
for
i
in
$(
seq
0
$((
size-1
))
)
;
do
numa
=
${
rank_to_numa
[
$i
]
}
((
numa_proc_counts[numa]++
))
||
true
done
omp_num_threads
=
$((
numa_core_counts[mynuma]/numa_proc_counts[mynuma]
))
core_offset
=
${
numa_core_offsets
[mynuma]
}
for
i
in
$(
seq
0
$((
rank-1
))
)
;
do
numa
=
${
rank_to_numa
[
$i
]
}
if
[[
$numa
-eq
$mynuma
]]
;
then
core_offset
=
$((
core_offset
+
omp_num_threads
))
fi
done
omp_places
=
"{
${
cpus
[core_offset]
}
}"
for
c
in
$(
seq
1
$((
omp_num_threads-1
))
)
;
do
omp_places+
=
",{
${
cpus
[core_offset+c]
}
}"
done
if
[[
$omp_num_threads
-gt
1
]]
;
then
places
=
"{
${
cpus
[core_offset]
}
-
${
cpus
[core_offset+
$((
omp_num_threads-1
))
]
}
}"
else
places
=
"{
${
cpus
[core_offset]
}
}"
fi
# Export OpenMP config
export
OMP_NUM_THREADS
=
${
omp_num_threads
}
export
OMP_PLACES
=
${
omp_places
}
export
OMP_PROC_BIND
=
true
if
[[
$globalRank
-lt
$size
]]
;
then
echo
"Node Binding: Process
$rank
[(nx,ny,nz)=(
${
nx
}
,
${
ny
}
,
${
nz
}
)] GPU:
$mygpu
, NUMA:
$mynuma
, CPU Cores:
$omp_num_threads
-
$places
"
fi
# Run
numactl
-N
${
mynuma
}
-m
${
mynuma
}
${
rochpcg_bin
}
${
rochpcg_args
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment