Makefile 6.26 KB
Newer Older
1
2
3
4
# Copyright (c) Microsoft Corporation - All rights reserved
# Licensed under the MIT License


5
6
7
8
SB_MICRO_PATH ?= /usr/local
MPI_HOME ?= /usr/local/mpi
HIP_HOME ?= /opt/rocm/hip
RCCL_HOME ?= /opt/rocm/rccl
9
HPCX_HOME ?= /opt/hpcx
10

11
12
13
CUDA_VER ?= $(shell nvcc --version | grep 'release' | awk '{print $$6}' | cut -c2- | cut -d '.' -f1-2)
ROCBLAS_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3)

rafsalas19's avatar
rafsalas19 committed
14
.PHONY: all cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl
15
16

# Build all targets.
17
all: cuda rocm
18
cuda: common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn
19
rocm: common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest
20
cpu: common cpu_perftest
rafsalas19's avatar
rafsalas19 committed
21
common: cpu_hpl cpu_stream fio
22

23
24
25
26
# Create $(SB_MICRO_PATH)/bin and $(SB_MICRO_PATH)/lib, no error if existing, make parent directories as needed.
sb_micro_path:
	mkdir -p $(SB_MICRO_PATH)/bin
	mkdir -p $(SB_MICRO_PATH)/lib
27

28
# Build cutlass.
29
cuda_cutlass:
30
31
32
33
34
ifeq ($(shell echo $(CUDA_VER)">=11.8" | bc -l), 1)
	$(eval ARCHS := "70;75;80;86;90")
else
	$(eval ARCHS := "70;75;80;86")
endif
35
36
ifneq (,$(wildcard cutlass/CMakeLists.txt))
	cmake -DCMAKE_INSTALL_BINDIR=$(SB_MICRO_PATH)/bin -DCMAKE_INSTALL_LIBDIR=$(SB_MICRO_PATH)/lib -DCMAKE_BUILD_TYPE=Release \
37
		-DCUTLASS_NVCC_ARCHS=$(ARCHS) -DCUTLASS_ENABLE_EXAMPLES=OFF -DCUTLASS_ENABLE_TESTS=OFF -S ./cutlass -B ./cutlass/build
38
	cmake --build ./cutlass/build -j $(shell nproc --ignore=2) --target install
39
endif
40

41
42
43
44
# Build cuda-samples/Samples/bandwidthTest.
# cuda-samples is released together with CUDA, they have the exact same version. Like v10.0, v11.1 and so on.
# The version we use is the released tag of cuda-samples which is consistent with the cuda version in the environment or docker.
# The Makefile of bandwidthTest does not have 'install' target, so need to copy bin to $(SB_MICRO_PATH)/bin/ and create $(SB_MICRO_PATH)/bin/ if not existing.
45
cuda_bandwidthTest: sb_micro_path
46
47
48
49
50
51
52
ifeq ($(shell echo $(CUDA_VER)">=11.8" | bc -l), 1)
	$(eval TEST_PATH := "./cuda-samples/Samples/1_Utilities/bandwidthTest")
	$(eval ARCHS := "70 75 80 86 90")
else
	$(eval TEST_PATH := "./cuda-samples/Samples/bandwidthTest")
	$(eval ARCHS := "70 75 80 86")
endif
53
	if [ -d cuda-samples ]; then rm -rf cuda-samples; fi
54
55
56
	git clone -b v$(CUDA_VER) https://github.com/NVIDIA/cuda-samples.git
	cd ./$(TEST_PATH) && make clean && make TARGET_ARCH=x86_64 SMS=$(ARCHS)
	cp -v ./$(TEST_PATH)/bandwidthTest $(SB_MICRO_PATH)/bin/
57

58
# Build nccl-tests from commit 8274cb4 of default branch.
59
cuda_nccl_tests: sb_micro_path
60
ifneq (,$(wildcard nccl-tests/Makefile))
61
	cd ./nccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) -j
62
63
	cp -v ./nccl-tests/build/* $(SB_MICRO_PATH)/bin/
endif
64
65
66

# Build perftest.
# The version we use is the tag v4.5-0.2.
67
cuda_perftest:
68
69
70
ifneq (,$(wildcard perftest/autogen.sh))
	cd perftest && ./autogen.sh && ./configure CUDA_H_PATH=/usr/local/cuda/include/cuda.h --prefix=$(SB_MICRO_PATH) && make -j && make install
endif
71
72
73
74
rocm_perftest:
ifneq (,$(wildcard perftest/autogen.sh))
	cd perftest && ./autogen.sh && ./configure --enable-rocm --with-rocm=/opt/rocm --prefix=$(SB_MICRO_PATH) && make -j && make install
endif
75
76
77
78
cpu_perftest:
ifneq (,$(wildcard perftest/autogen.sh))
	cd perftest && ./autogen.sh && ./configure --prefix=$(SB_MICRO_PATH) && make -j && make install
endif
79

80
# Build FIO from commit d83ac9 (fio-3.28 tag).
81
82
fio:
ifneq (,$(wildcard fio/Makefile))
83
	cd ./fio && ./configure --prefix=$(SB_MICRO_PATH) --disable-native && make -j && make install
84
endif
85

86
# Build rccl-tests from commit 2a18737 of default branch.
87
88
89
90
91
rocm_rccl_tests: sb_micro_path
ifneq (, $(wildcard rccl-tests/Makefile))
	cd ./rccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) HIP_HOME=$(HIP_HOME) RCCL_HOME=$(RCCL_HOME) -j
	cp -v ./rccl-tests/build/* $(SB_MICRO_PATH)/bin/
endif
92
93
94
95
96
97

# Build rocblas-bench.
# RocBLAS is released with rocm, like rocm-4.2.0 and so on.
# The version we use is the released tag which is consistent with the rocm version in the environment or docker.
# Since it takes several hours to build, avoid to build again if rocblas-bench exsists.
rocm_rocblas: sb_micro_path
98
ifeq (, $(wildcard $(SB_MICRO_PATH)/bin/rocblas-bench))
99
	if [ -d rocBLAS ]; then rm -rf rocBLAS; fi
Yifan Xiong's avatar
Yifan Xiong committed
100
	git clone -b ${ROCBLAS_BRANCH} https://github.com/ROCmSoftwarePlatform/rocBLAS.git ./rocBLAS
101
	cd ./rocBLAS && ./install.sh --dependencies --clients-only
102
	cp -v ./rocBLAS/build/release/clients/staging/rocblas-bench $(SB_MICRO_PATH)/bin/
103
endif
104
105
106
107

# Build hipBusBandwidth.
# HIP is released with rocm, like rocm-4.2.0 and so on.
# The version we use is the released tag which is consistent with the rocm version in the environment or docker.
108
109
110
111
rocm_bandwidthTest: sb_micro_path
	cp -r -v $(shell hipconfig -p)/samples/1_Utils/hipBusBandwidth ./
	cd ./hipBusBandwidth/ && mkdir -p build && cd build && cmake .. && make
	cp -v ./hipBusBandwidth/build/hipBusBandwidth $(SB_MICRO_PATH)/bin/
112
113
114
115
116
117

# Build GPCNET from commit c56fd9.
gpcnet: sb_micro_path
	bash -c "source ${HPCX_HOME}/hpcx-init.sh && hpcx_load && make CC=mpicc -C GPCNET all && hpcx_unload"
	cp -v ./GPCNET/network_test $(SB_MICRO_PATH)/bin/
	cp -v ./GPCNET/network_load_test $(SB_MICRO_PATH)/bin/
118

rafsalas19's avatar
rafsalas19 committed
119
# Build GPU burn from main branch (only branch that exists)
120
121
122
123
124
125
cuda_gpuburn: sb_micro_path
ifneq (,$(wildcard gpu-burn/Makefile))
	cd ./gpu-burn && make
	cp -v ./gpu-burn/gpu_burn $(SB_MICRO_PATH)/bin/
	cp -v ./gpu-burn/compare.ptx $(SB_MICRO_PATH)/bin/
endif
rafsalas19's avatar
rafsalas19 committed
126

rafsalas19's avatar
rafsalas19 committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Build HPL from main branch 
cpu_hpl: sb_micro_path
ifneq (,$(wildcard hpl-tests/Makefile))
	cd ./hpl-tests && \
    wget https://netlib.org/benchmark/hpl/hpl-2.3.tar.gz && \
	tar xzf hpl-2.3.tar.gz && \
	cp Make.Linux_zen3 hpl-2.3 && \
	cp Make.Linux_zen4 hpl-2.3 && \
	make all
	cp -v ./hpl-tests/hpl-2.3/bin/Linux_zen3/xhpl $(SB_MICRO_PATH)/bin/xhpl_z3
	cp -v ./hpl-tests/hpl-2.3/bin/Linux_zen4/xhpl $(SB_MICRO_PATH)/bin/xhpl_z4
	cp -v ./hpl-tests/hpl_run.sh $(SB_MICRO_PATH)/bin/
	cp -v ./hpl-tests/bindmem.sh $(SB_MICRO_PATH)/bin/
	cp -v ./hpl-tests/template_hpl.dat $(SB_MICRO_PATH)/bin/
endif

# Build STREAM 
rafsalas19's avatar
rafsalas19 committed
144
145
146
147
148
149
150
cpu_stream: sb_micro_path
ifneq (,$(wildcard stream-tests/Makefile))
	cd ./stream-tests && \
    wget https://www.cs.virginia.edu/stream/FTP/Code/stream.c && \
	make all
	cp -v ./stream-tests/stream*.exe $(SB_MICRO_PATH)/bin/
endif