Makefile 6.07 KB
Newer Older
wangkx1's avatar
init  
wangkx1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#
# This is the makefile for the eddy project. It can be used
# to compile the following variants of eddy:
#
#   - make cuda=1 (compiles a CUDA-capable version)
#   - make cpu=1  (compiles a multi-threaded CPU version)
#   - make        (equivalent to make cpu=1)
#
# The resulting eddy binary executables will be named like so:
#   - cpu:   eddy_cpu
#   - cuda:  eddy_cudaX.Y (where X.Y is the CUDA version that
#            the binary was compiled against)
#
# Both variants can be compiled in one invocation, e.g.:
#
#     make cpu=1 cuda=1
#
# Notes regarding the cuda variant:
#
#  - By default a fat binary with code for all the compute
#    capabilities allowed for by a given CUDA version will
#    be compiled. If one only need 3.0--3.5 (for testing)
#    one can build with the fastbuild=1 option which saves
#    a lot of compilation time, e.g.:
#
#        make cuda=1 fastbuild=1
#
#  - Alternately, to compile for a specific compute
#    capabillity (or range thereof), you can specify the
#    GENCODEFLAGS variable, e.g.:
#
#        make cuda=1 GENCODEFLAGS="-gencode arch=compute_30,code=sm_30"
#
#  - Or you can specify a specific compute capability with
#    the COMPUTE_CAPABILITY vraiable, e.g.:
#
#        make cuda=1 COMPUTE_CAPABILITY="30"
#
#  - By default, all CUDA libraries (libcuda, libcudart, and those
#    specified in the CUDALIBS variable below) will be dynamically
#    linked in the resulting executable. The CUDA_STATIC variable
#    can be specified to statically link these libraries instead,
#    e.g.:
#
#        make cuda=1 CUDA_STATIC=1
#
#  - To compile against a specific version of the CUDA toolkit,
#    just make sure that the relevant nvcc command is on your $PATH
#    variable. For example:
#
#        PATH=/usr/local/cuda10.2/bin:$PATH make cuda=1
#
#    Alternately, you can set the $NVCC variable to refer to the
#    specific nvcc executable, e.g.:
#
#        NVCC=/usr/local/cuda11.3/bin/nvcc make cuda=1
#
# The CUDA_HOME, GENCODEFLAGS, and CUDA_STATIC variables are all
# handled in $FSLCONFDIR/buildSettings.mk.

include ${FSLCONFDIR}/default.mk

PROJNAME = eddy
SCRIPTS  =
XFILES   =

ifdef cpu
  XFILES  += eddy_cpu
  SCRIPTS += eddy
endif
ifdef cuda
  XFILES += eddy_cuda${CUDA_VER}
endif

# Default to cpu variant
# if no flags were provided
ifeq (${XFILES},)
  XFILES = eddy_cpu
endif

# -rdynamic allows meaningful backtraces to
# be emitted on segmentation faults and
# other crashes (see fsl/utils/stack_dump.h).
# Understood by both clang++ and g++
USRLDFLAGS   = -rdynamic
USRNVCCFLAGS = -DCOMPILE_GPU -Icuda
CUDALIBS     = -L /datav/wkx/fsl/FSL-install-new/lib -lfsl-cudabasisfield_cuda11.8 \
  -lhipblas -L/datav/wkx/fsl/FSL-install-new/lib -L/datav/wkx/fsl/FSL-install-new/lib \
  -lfsl-topup -lfsl-warpfns -lfsl-meshclass -lfsl-basisfield -lfsl-newimage \
  -lfsl-miscmaths -lfsl-cprob -lfsl-NewNifti -lfsl-znz -lfsl-utils  \
  -L /usr/local/lib/python3.10/dist-packages/ray/core -ljemalloc \
  -llapack -lblas -lz -lm -fvisibility=default -fPIC 
LIBS         = -L /datav/wkx/fsl/FSL-install-new/lib -lfsl-topup -lfsl-warpfns -lfsl-meshclass -lfsl-basisfield \
               -lfsl-newimage -lfsl-miscmaths -lfsl-cprob -lfsl-NewNifti \
               -lfsl-znz -lfsl-utils 

# Skip CUDA fat binary creation if fastbuild
# is set (overriding GENCODEFLAGS defined
# in FSLDIR/config/buildSettings.mk)
ifeq ($(fastbuild),1)
  GENCODEFLAGS := -gencode arch=compute_30,code=sm_30
endif

# Build a specific compute capability if
# requested (overridding fastbuild and
# GENCODEFLAGS)
ifdef COMPUTE_CAPABILITY
  GENCODEFLAGS := -gencode arch=compute_$(COMPUTE_CAPABILITY),code=sm_$(COMPUTE_CAPABILITY)
endif

# Compiled for all variants
OBJS     := eddy.o b0Predictor.o BiasFieldEstimatorImpl.o \
            CPUStackResampler.o DiffusionGP.o fmriPredictor.o ECModels.o \
            LongECModels.o ECScanClasses.o EddyCommandLineOptions.o \
            EddyHelperClasses.o EddyUtils.o HyParEstimator.o \
            KMatrix.o MoveBySuscCF.o PostEddyAlignShellsFunctions.o

# Compiled for CPU variant
CPUOBJS  := LSResampler.o PostEddyCF.o

# Compiled for cuda variant
CUDAOBJS := CudaVolume.o DerivativeCalculator.o DiffusionGP.o fmriPredictor.o \
            EddyCudaHelperFunctions.o EddyGpuUtils.o EddyInternalGpuUtils.o \
            EddyKernels.o EddyMatrixKernels.o GpuPredictorChunk.o \
            LSResampler.o PostEddyCF.o StackResampler.o

# use separate build dirs for each variant.  The
# BUILDDIR and CUDABUILDDIR variables are used by
# the depend.mk rule in $FSLDIR/config/rules.mk,
# which is used to automatically generate
# dependencies for each object file. Dependencies
# for CUDA object files are explicitly listed at
# the end of this Makefile.
CUDABUILDDIR = cudabuild/
BUILDDIR     = cpubuild/
CPUOBJS     := $(OBJS:%.o=cpubuild/%.o)  $(CPUOBJS:%.o=cpubuild/%.o)
CUDAOBJS    := $(OBJS:%.o=cudabuild/%.o) $(CUDAOBJS:%.o=cudabuild/cuda${CUDA_VER}/%.o)

all: ${XFILES}

clean:
	@rm -f depend.mk eddy_cpu eddy_cuda*
	@rm -rf cudabuild cpubuild

HIPCXXFLAGS = -DARMA_ALLOW_FAKE_GCC -std=c++17 -fPIC -g -O0 \
 -I /opt/dtk/include -I  ${FSLCONFDIR}/../include  -I .
HIPLDFLAGS = -L /opt/dtk/lib -l hipblas -l hipblaslt -l galaxyhip -l amdhip64 -DARMA_ALLOW_FAKE_GCC -std=c++17 -fPIC -g -O0

# -DARMA_ALLOW_FAKE_GCC -std=c++17 -fPIC -g -O0
#################################
# CPU executable and object files
#################################

eddy_cpu: ${CPUOBJS}
	@mkdir -p cpubuild
	hipcc ${CXXFLAGS} -o $@ $^ ${LDFLAGS} -fvisibility=default -fPIC ${LIBS}

cpubuild/%.o: %.cpp
	@mkdir -p cpubuild
	hipcc $(CXXFLAGS) -c -o $@ $< -g -O0 -fvisibility=default -fPIC

#################################
# GPU executable and object files
#################################

# eddy_cuda${CUDA_VER}: ${CUDAOBJS}
# 	${NVCC} ${NVCCFLAGS} -o $@ $^ ${NVCCLDFLAGS} -fvisibility=default -fPIC

eddy_cuda${CUDA_VER}: ${CUDAOBJS}
	hipcc ${NVCCFLAGS} -o $@ $^ ${HIPLDFLAGS} ${CUDALIBS} 


cudabuild/cuda${CUDA_VER}/%.o: cuda/%.cpp
	@mkdir -p cudabuild/cuda${CUDA_VER}
	hipcc $(NVCCFLAGS) -c -o $@ $< -g -O0 -fvisibility=default -fPIC

cudabuild/%.o: %.cpp
	@mkdir -p cudabuild
	hipcc $(CUDACXXFLAGS) -c -o $@ $< -g -O0 -g -fvisibility=default -fPIC