Unverified Commit 991c0051 authored by Hongtao Zhang's avatar Hongtao Zhang Committed by GitHub
Browse files

microbenchmark - CPU Stream Benchmark Revise (#712)



In the current implementation, the CPU‑stream benchmark code renames the
binary before the microbench base class can verify its existence,
causing the default‐binary check to fail.

This PR adds a “default” binary—built with the standard compile
parameters—so that the base class can always find and validate it. Once
the default binary is in place, the CPU‑stream code will rename it as
needed and re‑check its presence before running the benchmark.

The PR also enable CPU stream in the default settings.

---------
Co-authored-by: default avatarHongtao Zhang <hongtaozhang@microsoft.com>
parent 431bf19c
......@@ -22,7 +22,7 @@ def __init__(self, name, parameters=''):
"""
super().__init__(name, parameters)
self._bin_name = 'streamZen3.exe'
self._bin_name = 'stream'
self.__cpu_arch = ['other', 'zen3', 'zen4', 'neo2']
def add_parser_arguments(self):
......@@ -32,7 +32,7 @@ def add_parser_arguments(self):
self._parser.add_argument(
'--cpu_arch',
type=str,
default='zen4',
default='other',
required=False,
help='The targeted cpu architectures to run \
STREAM. Default is zen4. Possible values are {}.'.format(' '.join(self.__cpu_arch))
......@@ -76,17 +76,15 @@ def _preprocess(self):
envar = 'OMP_SCHEDULE=static && OMP_DYNAMIC=false && OMP_MAX_ACTIVE_LEVELS=1 && OMP_STACKSIZE=256M && \
OMP_PROC_BIND=true && OMP_NUM_THREADS={} && OMP_PLACES={}'.format(len(self._args.cores), omp_places)
# set the binary name based on cpu architecture
if self._args.cpu_arch == 'zen3':
exe = 'streamZen3.exe'
self._bin_name = 'streamZen3'
elif self._args.cpu_arch == 'zen4':
exe = 'streamZen4.exe'
self._bin_name = 'streamZen4'
elif self._args.cpu_arch == 'neo2':
exe = 'streamNeo2.exe'
else:
exe = 'streamx86.exe'
self._bin_name = 'streamNeo2'
command = envar + ' ' + os.path.join(self._args.bin_dir, exe)
self._bin_name = exe
command = envar + ' ' + os.path.join(self._args.bin_dir, self._bin_name)
if not self._set_binary_path():
logger.error(
......
......@@ -17,7 +17,8 @@ def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/streamZen3.exe'])
cls.createMockFiles(cls, ['bin/stream'])
cls.createMockFiles(cls, ['bin/streamZen3'])
return True
@decorator.load_data('tests/data/streamResult.log')
......
......@@ -23,12 +23,12 @@ all: cuda rocm
cuda_with_msccl: cuda cuda_msccl
cuda: common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn megatron_lm megatron_deepspeed nvbandwidth
rocm: common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt megatron_deepspeed apex_rocm
cpu: common cpu_perftest cpu_stream
common: fio
cpu: common cpu_perftest
common: fio cpu_stream
# non aarch64 specific targets
ifneq ($(shell uname -m), aarch64)
common: fio cpu_hpl
common: cpu_hpl
directx_amd: directx_amf_encoding_latency
endif
......@@ -184,7 +184,7 @@ ifneq (,$(wildcard stream-tests/Makefile))
cd ./stream-tests && \
wget https://www.cs.virginia.edu/stream/FTP/Code/stream.c && \
make all
cp -v ./stream-tests/stream*.exe $(SB_MICRO_PATH)/bin/
cp -v ./stream-tests/stream* $(SB_MICRO_PATH)/bin/
endif
# Build AMD Encoder Latency Test
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
GENFLAGS := -DSTREAM_ARRAY_SIZE=400000000
GENFLAGS := -DSTREAM_ARRAY_SIZE=120000000
ZEN3FLAGS := -DSTREAM_ARRAY_SIZE=400000000 -march=znver3
ZEN4FLAGS := -DSTREAM_ARRAY_SIZE=800000000 -march=znver4
NEO2FLAGS := -DSTREAM_ARRAY_SIZE=120000000 -mcpu=neoverse-v2
GEN_OUTPUT := streamx86.exe
ZEN3_OUTPUT := streamZen3.exe
ZEN4_OUTPUT := streamZen4.exe
NEO2_OUTPUT := streamNeo2.exe
GEN_OUTPUT := stream
ZEN3_OUTPUT := streamZen3
ZEN4_OUTPUT := streamZen4
NEO2_OUTPUT := streamNeo2
ALL_TARGETS := GEN
ARCH := $(shell uname -m)
# ARM64 build gets NEO2 added
ifeq ($(ARCH), aarch64)
CFLAGS := -Ofast -fopenmp -DNTIMES=200
CC := gcc
all: NEO2
else
CC := /opt/AMD/aocc-compiler-4.0.0/bin/clang
CFLAGS := -Ofast -mcmodel=large -mavx2 -ffp-contract=fast -lomp -fopenmp -fnt-store=aggressive -DNTIMES=10
all: ZEN3 ZEN4 X86
CC := gcc
CFLAGS := -Ofast -fopenmp -DNTIMES=200
ALL_TARGETS += NEO2
endif
# AMD AOCC clang present? add ZEN3 and ZEN4
ifneq ("$(wildcard /opt/AMD/aocc-compiler-4.0.0/bin/clang)","")
CC := /opt/AMD/aocc-compiler-4.0.0/bin/clang
CFLAGS := -Ofast -mcmodel=large -mavx2 -ffp-contract=fast -lomp -fopenmp \
-fnt-store=aggressive -DNTIMES=10
ALL_TARGETS += ZEN3 ZEN4
endif
ZEN3: stream.c
# the one all: definition
all: $(ALL_TARGETS)
GEN:
$(CC) $(CFLAGS) $(GENFLAGS) stream.c -o $(GEN_OUTPUT)
ZEN3:
$(CC) $(CFLAGS) $(ZEN3FLAGS) stream.c -o $(ZEN3_OUTPUT)
ZEN4:
$(CC) $(CFLAGS) $(ZEN4FLAGS) stream.c -o $(ZEN4_OUTPUT)
X86:
$(CC) $(CFLAGS) $(GENFLAGS) stream.c -o $(GEN_OUTPUT)
NEO2:
$(CC) $(CFLAGS) $(NEO2FLAGS) stream.c -o $(NEO2_OUTPUT)
ifeq ($(ARCH), aarch64)
# clean up the generated files
clean:
rm $(NEO2_OUTPUT)
else
clean:
rm $(GEN_OUTPUT) $(ZEN3_OUTPUT) $(ZEN4_OUTPUT)
endif
rm -f $(GEN_OUTPUT) $(ZEN3_OUTPUT) $(ZEN4_OUTPUT) $(NEO2_OUTPUT)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment