# # This is the makefile for the eddy project. It can be used # to compile the following variants of eddy: # # - make cuda=1 (compiles a CUDA-capable version) # - make cpu=1 (compiles a multi-threaded CPU version) # - make (equivalent to make cpu=1) # # The resulting eddy binary executables will be named like so: # - cpu: eddy_cpu # - cuda: eddy_cudaX.Y (where X.Y is the CUDA version that # the binary was compiled against) # # Both variants can be compiled in one invocation, e.g.: # # make cpu=1 cuda=1 # # Notes regarding the cuda variant: # # - By default a fat binary with code for all the compute # capabilities allowed for by a given CUDA version will # be compiled. If one only need 3.0--3.5 (for testing) # one can build with the fastbuild=1 option which saves # a lot of compilation time, e.g.: # # make cuda=1 fastbuild=1 # # - Alternately, to compile for a specific compute # capabillity (or range thereof), you can specify the # GENCODEFLAGS variable, e.g.: # # make cuda=1 GENCODEFLAGS="-gencode arch=compute_30,code=sm_30" # # - Or you can specify a specific compute capability with # the COMPUTE_CAPABILITY vraiable, e.g.: # # make cuda=1 COMPUTE_CAPABILITY="30" # # - By default, all CUDA libraries (libcuda, libcudart, and those # specified in the CUDALIBS variable below) will be dynamically # linked in the resulting executable. The CUDA_STATIC variable # can be specified to statically link these libraries instead, # e.g.: # # make cuda=1 CUDA_STATIC=1 # # - To compile against a specific version of the CUDA toolkit, # just make sure that the relevant nvcc command is on your $PATH # variable. For example: # # PATH=/usr/local/cuda10.2/bin:$PATH make cuda=1 # # Alternately, you can set the $NVCC variable to refer to the # specific nvcc executable, e.g.: # # NVCC=/usr/local/cuda11.3/bin/nvcc make cuda=1 # # The CUDA_HOME, GENCODEFLAGS, and CUDA_STATIC variables are all # handled in $FSLCONFDIR/buildSettings.mk. include ${FSLCONFDIR}/default.mk PROJNAME = eddy SCRIPTS = XFILES = ifdef cpu XFILES += eddy_cpu SCRIPTS += eddy endif ifdef cuda XFILES += eddy_cuda${CUDA_VER} endif # Default to cpu variant # if no flags were provided ifeq (${XFILES},) XFILES = eddy_cpu endif # -rdynamic allows meaningful backtraces to # be emitted on segmentation faults and # other crashes (see fsl/utils/stack_dump.h). # Understood by both clang++ and g++ USRLDFLAGS = -rdynamic USRNVCCFLAGS = -DCOMPILE_GPU -Icuda CUDALIBS = -L /workspace/FSL-install/lib -lfsl-cudabasisfield_cuda11.8 \ -lhipblas -L/workspace/FSL-install/lib -L/workspace/FSL-install/lib \ -lfsl-topup -lfsl-warpfns -lfsl-meshclass -lfsl-basisfield -lfsl-newimage \ -lfsl-miscmaths -lfsl-cprob -lfsl-NewNifti -lfsl-znz -lfsl-utils \ -llapack -lblas -lz -lm -fvisibility=default -fPIC LIBS = -L /workspace/FSL-install/lib -lfsl-topup -lfsl-warpfns -lfsl-meshclass -lfsl-basisfield \ -lfsl-newimage -lfsl-miscmaths -lfsl-cprob -lfsl-NewNifti \ -lfsl-znz -lfsl-utils # Skip CUDA fat binary creation if fastbuild # is set (overriding GENCODEFLAGS defined # in FSLDIR/config/buildSettings.mk) ifeq ($(fastbuild),1) GENCODEFLAGS := -gencode arch=compute_30,code=sm_30 endif # Build a specific compute capability if # requested (overridding fastbuild and # GENCODEFLAGS) ifdef COMPUTE_CAPABILITY GENCODEFLAGS := -gencode arch=compute_$(COMPUTE_CAPABILITY),code=sm_$(COMPUTE_CAPABILITY) endif # Compiled for all variants OBJS := eddy.o b0Predictor.o BiasFieldEstimatorImpl.o \ CPUStackResampler.o DiffusionGP.o fmriPredictor.o ECModels.o \ LongECModels.o ECScanClasses.o EddyCommandLineOptions.o \ EddyHelperClasses.o EddyUtils.o HyParEstimator.o \ KMatrix.o MoveBySuscCF.o PostEddyAlignShellsFunctions.o # Compiled for CPU variant CPUOBJS := LSResampler.o PostEddyCF.o # Compiled for cuda variant CUDAOBJS := CudaVolume.o DerivativeCalculator.o DiffusionGP.o fmriPredictor.o \ EddyCudaHelperFunctions.o EddyGpuUtils.o EddyInternalGpuUtils.o \ EddyKernels.o EddyMatrixKernels.o GpuPredictorChunk.o \ LSResampler.o PostEddyCF.o StackResampler.o # use separate build dirs for each variant. The # BUILDDIR and CUDABUILDDIR variables are used by # the depend.mk rule in $FSLDIR/config/rules.mk, # which is used to automatically generate # dependencies for each object file. Dependencies # for CUDA object files are explicitly listed at # the end of this Makefile. CUDABUILDDIR = cudabuild/ BUILDDIR = cpubuild/ CPUOBJS := $(OBJS:%.o=cpubuild/%.o) $(CPUOBJS:%.o=cpubuild/%.o) CUDAOBJS := $(OBJS:%.o=cudabuild/%.o) $(CUDAOBJS:%.o=cudabuild/cuda${CUDA_VER}/%.o) all: ${XFILES} clean: @rm -f depend.mk eddy_cpu eddy_cuda* @rm -rf cudabuild cpubuild HIPCXXFLAGS = -DARMA_ALLOW_FAKE_GCC -std=c++17 -fPIC -g -O0 \ -I /opt/dtk/include -I ${FSLCONFDIR}/../include -I . HIPLDFLAGS = -L /opt/dtk/lib -l hipblas -l hipblaslt -l galaxyhip -l amdhip64 -DARMA_ALLOW_FAKE_GCC -std=c++17 -fPIC -g -O0 # -DARMA_ALLOW_FAKE_GCC -std=c++17 -fPIC -g -O0 ################################# # CPU executable and object files ################################# eddy_cpu: ${CPUOBJS} @mkdir -p cpubuild hipcc ${CXXFLAGS} -o $@ $^ ${LDFLAGS} -fvisibility=default -fPIC ${LIBS} cpubuild/%.o: %.cpp @mkdir -p cpubuild hipcc $(CXXFLAGS) -c -o $@ $< -g -O0 -fvisibility=default -fPIC ################################# # GPU executable and object files ################################# # eddy_cuda${CUDA_VER}: ${CUDAOBJS} # ${NVCC} ${NVCCFLAGS} -o $@ $^ ${NVCCLDFLAGS} -fvisibility=default -fPIC eddy_cuda${CUDA_VER}: ${CUDAOBJS} hipcc ${NVCCFLAGS} -o $@ $^ ${HIPLDFLAGS} ${CUDALIBS} cudabuild/cuda${CUDA_VER}/%.o: cuda/%.cpp @mkdir -p cudabuild/cuda${CUDA_VER} hipcc $(NVCCFLAGS) -c -o $@ $< -g -O0 -fvisibility=default -fPIC --gpu-max-threads-per-block=1024 cudabuild/%.o: %.cpp @mkdir -p cudabuild hipcc $(CUDACXXFLAGS) -c -o $@ $< -g -O0 -g -fvisibility=default -fPIC