///////////////////////////////////////////////////////////////////// /// /// \file GpuPredictorChunk.cu /// \brief Definition of helper class for efficient prediction making on the Gpu. /// /// \author Jesper Andersson /// \version 1.0b, March, 2013. /// \Copyright (C) 2013 University of Oxford /// ///////////////////////////////////////////////////////////////////// // Because of a bug in cuda_fp16.hpp, that gets included by cublas_v2.h, it has to // be included before any include files that set up anything related to the std-lib. // If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the // old-style C isinf or the new (since C++11) std::isinf. #include "cublas_v2.h" #include #include #include #pragma push #pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap #include "newimage/newimage.h" #pragma pop #include "EddyHelperClasses.h" #include "EddyCudaHelperFunctions.h" #include "cuda/GpuPredictorChunk.h" namespace EDDY { GpuPredictorChunk::GpuPredictorChunk(const EDDY::ECScanManager& sm, EDDY::ScanType st) EddyTry : _ntot(sm.NScans(st)), _curr(0) { // Find total global memory on "our" device int dev; cudaError_t err = cudaGetDevice(&dev); if (err != cudaSuccess) throw EddyException("GpuPredictorChunk::GpuPredictorChunk: Unable to get device: cudaGetDevice returned an error: " + EddyCudaHelperFunctions::cudaError2String(err)); struct cudaDeviceProp prop; err = cudaGetDeviceProperties(&prop,dev); if (err != cudaSuccess) throw EddyException("GpuPredictorChunk::GpuPredictorChunk: Unable to get device properties: cudaGetDeviceProperties reurned an error: " + EddyCudaHelperFunctions::cudaError2String(err)); // Check how much of that we can get float *skrutt = NULL; size_t memsz = my_min(static_cast(0.5*prop.totalGlobalMem),_ntot*my_sizeof(sm.Scan(0,st).GetIma())); for (; memsz > my_sizeof(sm.Scan(0,st).GetIma()); memsz *= 0.9) { if (cudaMalloc(&skrutt,memsz) == cudaSuccess) break; } if (memsz != _ntot*my_sizeof(sm.Scan(0,st).GetIma())) memsz *= 0.9; // Reduce a little further to accomodate changes in GPU use by other processes. if (memsz < my_sizeof(sm.Scan(0,st).GetIma())) throw EddyException("GpuPredictorChunk::GpuPredictorChunk: Not enough memory on device"); cudaFree(skrutt); // Calculate chunk-size and # of chunks. unsigned int chsz = my_min(_ntot, static_cast(memsz / my_sizeof(sm.Scan(0,st).GetIma()))); for (_nchnk=1; _nchnk*chsz < _ntot; _nchnk++) ; _ind.resize(_nchnk+1); _gind.resize(_nchnk); // Populate vectors with index and global index unsigned int indx = 0; for (unsigned int i=0; i<_nchnk; i++) { _ind[i].resize(my_min(_ntot-indx,chsz)); _gind[i].resize(my_min(_ntot-indx,chsz)); for (unsigned int j=0; j& ima) EddyTry : _ntot(ntot) { // Find total global memory on "our" device int dev; cudaError_t err = cudaGetDevice(&dev); if (err != cudaSuccess) throw EddyException("GpuPredictorChunk::GpuPredictorChunk: Unable to get device: cudaGetDevice returned an error: " + EddyCudaHelperFunctions::cudaError2String(err)); struct cudaDeviceProp prop; err = cudaGetDeviceProperties(&prop,dev); if (err != cudaSuccess) throw EddyException("GpuPredictorChunk::GpuPredictorChunk: Unable to get device properties: cudaGetDeviceProperties reurned an error: " + EddyCudaHelperFunctions::cudaError2String(err)); // Check how much of that we can get float *skrutt = NULL; size_t memsz; for (memsz = 0.5 * prop.totalGlobalMem; memsz > my_sizeof(ima); memsz *= 0.9) { if (cudaMalloc(&skrutt,memsz) == cudaSuccess) break; } memsz *= 0.9; // Reduce a little further to accomodate changes in GPU use by other processes. if (memsz < my_sizeof(ima)) throw EddyException("GpuPredictorChunk::GpuPredictorChunk: Not enough memory on device"); cudaFree(skrutt); // Calculate chunk-size and make vector of indicies _chsz = my_min(_ntot, static_cast(memsz / my_sizeof(ima))); _ind.resize(_chsz); for (unsigned int i=0; i<_chsz; i++) _ind[i] = i; } EddyCatch */ /* GpuPredictorChunk& GpuPredictorChunk::operator++() EddyTry // Prefix ++ { if (_ind.back() == (_ntot-1)) { // If we're at the end; _ind.resize(1); _ind[0] = _ntot; } else { unsigned int first = _ind.back() + 1; unsigned int last = first + _chsz - 1; last = (last >= _ntot) ? _ntot-1 : last; _ind.resize(last-first+1); for (unsigned int i=0; i<_ind.size(); i++) { _ind[i] = first + i; } } return(*this); } EddyCatch */ } // End namespace EDDY