Commit b9e18ed6 authored by wangkx1's avatar wangkx1
Browse files

init

parents
// Because of a bug in cuda_fp16.hpp, that gets included by hipblas.h, it has to
// be included before any include files that set up anything related to the std-lib.
// If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the
// old-style C isinf or the new (since C++11) std::isinf.
#include "hipblas.h"
#include <cstdlib>
#include <string>
#include <sys/time.h>
#include <vector>
#include <cmath>
#include <hip/hip_runtime.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#include <thrust/inner_product.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#include "armawrap/newmat.h"
#include "miscmaths/miscmaths.h"
#ifndef EXPOSE_TREACHEROUS
#define I_CUDAVOLUME_H_DEFINED_ET
#define EXPOSE_TREACHEROUS // To allow us to use .sampling_mat()
#endif
#include "newimage/newimageall.h"
#pragma pop
#include "EddyHelperClasses.h"
#include "EddyCudaHelperFunctions.h"
#include "EddyKernels.h"
#include "EddyFunctors.h"
#include "CudaVolume.h"
using namespace EDDY;
using namespace EddyKernels;
void CudaVolume::SetHdr(const CudaVolume4D& cv) EddyTry
{
_sz[0] = cv._sz[0]; _sz[1] = cv._sz[1]; _sz[2] = cv._sz[2];
_spv=false; _hdr=cv._hdr;
try {
_devec.resize(this->Size()); _spcoef.clear();
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::SetHdr with message: " << e.what() << std::endl;
throw;
}
} EddyCatch
void CudaVolume::Sample(const EDDY::CudaImageCoordinates& coord,
CudaVolume& smpl) const EddyTry
{
if (Interp()!=NEWIMAGE::spline && Interp()!=NEWIMAGE::trilinear) throw EddyException("CudaVolume::Sample: Invalid interpolation option");
if (Extrap()!=NEWIMAGE::extraslice && Extrap()!=NEWIMAGE::periodic && Extrap()!=NEWIMAGE::mirror) throw EddyException("CudaVolume::Sample: Invalid extrapolation option");
if (smpl!=*this) throw EddyException("CudaVolume::Sample: Dimension mismatch");
if (Interp()==NEWIMAGE::spline && !_spv) {
if (_spcoef.size() != _devec.size()) {
try {
_spcoef.resize(_devec.size());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sample_1 after call to resize with message: " << e.what() << std::endl;
throw;
}
}
calculate_spline_coefs(_sz,_devec,_spcoef);
_spv = true;
}
int tpb = threads_per_block_interpolate;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
else if (Extrap()==NEWIMAGE::mirror) ep = EddyKernels::MIRROR;
if (Interp()==NEWIMAGE::spline) {
EddyKernels::spline_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),sp_ptr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::spline_interpolate");
}
else {
EddyKernels::linear_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::linear_interpolate");
}
} EddyCatch
void CudaVolume::Sample(const EDDY::CudaImageCoordinates& coord,
CudaVolume& smpl,
CudaVolume4D& dsmpl) const EddyTry
{
if (Interp()!=NEWIMAGE::spline && Interp()!=NEWIMAGE::trilinear) throw EddyException("CudaVolume::Sample: Invalid interpolation option");
if (Extrap()!=NEWIMAGE::extraslice && Extrap()!=NEWIMAGE::periodic && Extrap()!=NEWIMAGE::mirror) throw EddyException("CudaVolume::Sample: Invalid extrapolation option");
if (smpl!=(*this) || dsmpl!=(*this)) throw EddyException("CudaVolume::Sample: Dimension mismatch");
if (dsmpl.Size(3)!=3) throw EddyException("CudaVolume::Sample: dsmpl.Size(3) must be 3");
if (Interp()==NEWIMAGE::spline && !_spv) {
if (_spcoef.size() != _devec.size()) {
try {
_spcoef.resize(_devec.size());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sample_2 after call to resize with message: " << e.what() << std::endl;
throw;
}
}
calculate_spline_coefs(_sz,_devec,_spcoef);
_spv = true;
}
int tpb = threads_per_block_interpolate;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
else if (Extrap()==NEWIMAGE::mirror) ep = EddyKernels::MIRROR;
if (Interp()==NEWIMAGE::spline) {
EddyKernels::spline_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),sp_ptr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr(),
dsmpl.GetPtr(0),dsmpl.GetPtr(1),dsmpl.GetPtr(2));
EddyCudaHelperFunctions::CudaSync("EddyKernels::spline_interpolate");
}
else {
EddyKernels::linear_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr(),
dsmpl.GetPtr(0),dsmpl.GetPtr(1),dsmpl.GetPtr(2));
EddyCudaHelperFunctions::CudaSync("EddyKernels::linear_interpolate");
}
} EddyCatch
void CudaVolume::ValidMask(const EDDY::CudaImageCoordinates& coord, CudaVolume& mask) const EddyTry
{
int tpb = threads_per_block_interpolate;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
std::vector<bool> epval = ExtrapValid();
EddyKernels::valid_voxels<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),epval[0],epval[1],epval[2],
coord.XPtr(),coord.YPtr(),coord.ZPtr(),nthreads,mask.GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::valid_voxels");
} EddyCatch
bool CudaVolume::operator==(const CudaVolume& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs._sz[0]), static_cast<int>(rhs._sz[1]), static_cast<int>(rhs._sz[2]) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs._hdr.xdim(), rhs._hdr.ydim(), rhs._hdr.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume::operator==(const NEWIMAGE::volume<float>& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.xsize()), static_cast<int>(rhs.ysize()), static_cast<int>(rhs.zsize()) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.xdim(), rhs.ydim(), rhs.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume::operator==(const CudaVolume4D& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.Size(0)), static_cast<int>(rhs.Size(1)), static_cast<int>(rhs.Size(2)) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.Vxs(0), rhs.Vxs(1), rhs.Vxs(2) };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
CudaVolume& CudaVolume::operator+=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume::operator+=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume::operator+=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::plus<float>());
if (_spv && cv._spv) {
thrust::transform(_spcoef.begin(),_spcoef.end(),cv._spcoef.begin(),_spcoef.begin(),thrust::plus<float>());
}
else _spv=false;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator+= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::operator-=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume::operator-=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume::operator-=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::minus<float>());
if (_spv && cv._spv) {
thrust::transform(_spcoef.begin(),_spcoef.end(),cv._spcoef.begin(),_spcoef.begin(),thrust::minus<float>());
}
else _spv=false;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator-= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::operator*=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume::operator*=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume::operator*=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::multiplies<float>());
_spv=false;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator*= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::operator/=(float a) EddyTry
{
if (!a) throw EddyException("CudaVolume::operator/=: Division by zero");
try {
thrust::transform(_devec.begin(),_devec.end(),_devec.begin(),EDDY::MulByScalar<float>(1.0/a));
if (_spv) thrust::transform(_spcoef.begin(),_spcoef.end(),_spcoef.begin(),EDDY::MulByScalar<float>(1.0/a));
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator/= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
void CudaVolume::Smooth(float fwhm, // fwhm in mm
const CudaVolume& mask) EddyTry // Mask within which to smooth
{
CudaVolume smask=mask;
*this *= mask; // Mask image
this->Smooth(fwhm); // Smooth masked image
smask.Smooth(fwhm); // Smooth mask
this->DivideWithinMask(smask,mask);
*this *= mask;
} EddyCatch
void CudaVolume::MultiplyAndAddToMe(const CudaVolume& pv, float a) EddyTry
{
if (pv!=*this) throw EddyException("CudaVolume::MultiplyAndAddToMe: Dimension mismatch");
try {
thrust::transform(_devec.begin(),_devec.end(),pv._devec.begin(),_devec.begin(),EDDY::MulAndAdd<float>(a));
if (_spv) {
if (pv._spv) thrust::transform(_spcoef.begin(),_spcoef.end(),pv._spcoef.begin(),_spcoef.begin(),EDDY::MulAndAdd<float>(a));
else { _spcoef.clear(); _spv=false; };
}
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MultiplyAndAddToMe: with message: " << e.what() << std::endl;
throw;
}
} EddyCatch
void CudaVolume::SubtractMultiplyAndAddToMe(const CudaVolume& pv, const CudaVolume& nv, float a) EddyTry
{
if (pv!=*this || nv!=*this) throw EddyException("CudaVolume::SubtractMultiplyAndAddToMe: Dimension mismatch");
int tpb = threads_per_block_smaatm;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::subtract_multiply_and_add_to_me<<<nblocks,tpb>>>(pv.GetPtr(),nv.GetPtr(),a,nthreads,GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::subtract_multiply_and_add_to_me");
if (_spv) { _spcoef.clear(); _spv=false; } // I could in principle infer coefs from input. Too busy.
return;
} EddyCatch
void CudaVolume::SubtractSquareAndAddToMe(const CudaVolume& pv, const CudaVolume& nv) EddyTry
{
if (pv!=*this || nv!=*this) throw EddyException("CudaVolume::SubtractSquareAndAddToMe: Dimension mismatch");
int tpb = threads_per_block_ssaatm;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::subtract_square_and_add_to_me<<<nblocks,tpb>>>(pv.GetPtr(),nv.GetPtr(),nthreads,GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::subtract_square_and_add_to_me");
if (_spv) { _spcoef.clear(); _spv=false; }
} EddyCatch
void CudaVolume::DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask) EddyTry
{
if (divisor!=*this || mask!=*this) throw EddyException("CudaVolume::DivideWithinMask: Dimension mismatch");
cuda_volume_utils::divide_within_mask(divisor._devec,mask._devec,_devec.begin(),_devec.end());
if (_spv) { _spcoef.clear(); _spv=false; }
} EddyCatch
CudaVolume& CudaVolume::Binarise(float tv) EddyTry
{
try {
thrust::transform(_devec.begin(),_devec.end(),_devec.begin(),EDDY::Binarise<float>(tv));
if (_spv) { _spcoef.clear(); _spv=false; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Binarise_1: with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::Binarise(float ll, float ul) EddyTry
{
try {
thrust::transform(_devec.begin(),_devec.end(),_devec.begin(),EDDY::Binarise<float>(ll,ul));
if (_spv) { _spcoef.clear(); _spv=false; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Binarise_2: with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::MakeNormRand(float mu, float sigma) EddyTry
{
try {
thrust::counting_iterator<unsigned int> index_seq_begin(0);
thrust::transform(index_seq_begin,index_seq_begin+_devec.size(),_devec.begin(),EDDY::MakeNormRand<float>(mu,sigma));
if (_spv) { _spcoef.clear(); _spv=false; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MakeRandom: with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
double CudaVolume::Sum(const CudaVolume& mask) const EddyTry
{
double sum = 0.0;
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::Sum: Mismatched volumes");
try {
sum = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),sum,
thrust::plus<double>(),EDDY::Product<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sum in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
sum = thrust::reduce(_devec.begin(),_devec.end(),sum,EDDY::Sum<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sum in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(sum);
} EddyCatch
double CudaVolume::SumOfSquares(const CudaVolume& mask) const EddyTry
{
double sos = 0.0;
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::SumOfSquares: Mismatched volumes");
try {
sos = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),sos,
thrust::plus<double>(),EDDY::MaskedSquare<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::SumOfSquares in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
sos = thrust::reduce(_devec.begin(),_devec.end(),sos,EDDY::SumSquare<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::SumOfSquares in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(sos);
} EddyCatch
double CudaVolume::Max(const CudaVolume& mask) const EddyTry
{
double maxval = std::numeric_limits<double>::lowest();
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::Max: Mismatched volumes");
try {
maxval = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),maxval,
thrust::maximum<double>(),EDDY::Product<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Max in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
maxval = thrust::reduce(_devec.begin(),_devec.end(),maxval,EDDY::Max<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Max in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(maxval);
} EddyCatch
double CudaVolume::MaxAbs(const CudaVolume& mask) const EddyTry
{
double maxval = 0.0;
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::MaxAbs: Mismatched volumes");
try {
maxval = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),maxval,
EDDY::MaxAbs<double,double>(),EDDY::Product<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MaxAbs in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
maxval = thrust::reduce(_devec.begin(),_devec.end(),maxval,EDDY::MaxAbs<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MaxAbs in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(maxval);
} EddyCatch
CudaVolume& CudaVolume::operator=(float val) EddyTry
{
try {
thrust::fill(_devec.begin(), _devec.end(), val);
if (_spcoef.size()) { thrust::fill(_spcoef.begin(), _spcoef.end(), val); _spv=true; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
unsigned int CudaVolume::Size(unsigned int indx) const EddyTry
{
if (indx > 2) throw EddyException("CudaVolume::Size: Index out of range");
return(_sz[indx]);
} EddyCatch
float CudaVolume::Vxs(unsigned int indx) const EddyTry
{
if (indx > 2) throw EddyException("CudaVolume::Vxs: Index out of range");
float vxs = (!indx) ? _hdr.xdim() : ((indx==1) ? _hdr.ydim() : _hdr.zdim());
return(vxs);
} EddyCatch
NEWMAT::Matrix CudaVolume::Ima2WorldMatrix() const EddyTry { return(_hdr.sampling_mat()); } EddyCatch
NEWMAT::Matrix CudaVolume::World2ImaMatrix() const EddyTry { return(_hdr.sampling_mat().i()); } EddyCatch
void CudaVolume::GetVolume(NEWIMAGE::volume<float>& ovol) const EddyTry
{
thrust::host_vector<float> on_host;
try {
on_host = _devec; // Transfer from device
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::GetVolume with message: " << e.what() << std::endl;
throw;
}
ovol.reinitialize(_sz[0],_sz[1],_sz[2]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
unsigned int indx=0;
for (int k=0; k<ovol.zsize(); k++) { for (int j=0; j<ovol.ysize(); j++) { for (int i=0; i<ovol.xsize(); i++) {
ovol(i,j,k) = on_host[indx++]; // Copy data to volume
} } }
return;
} EddyCatch
void CudaVolume::GetSplineCoefs(NEWIMAGE::volume<float>& ovol) const EddyTry
{
if (!_spv) throw EddyException("CudaVolume::GetSplineCoefs: Attempt to obtain invalid spline coefficients");
thrust::host_vector<float> on_host;
try {
on_host = _spcoef; // Transfer from device
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::GetSplineCoefs with message: " << e.what() << std::endl;
throw;
}
ovol.reinitialize(_sz[0],_sz[1],_sz[2]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
unsigned int indx=0;
for (int k=0; k<ovol.zsize(); k++) { for (int j=0; j<ovol.ysize(); j++) { for (int i=0; i<ovol.xsize(); i++) {
ovol(i,j,k) = on_host[indx++]; // Copy data to volume
} } }
return;
} EddyCatch
void CudaVolume::common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
bool ifvol) EddyTry
{
if (ifvol) { // If we are to initialize data from volume
thrust::host_vector<float> hvec(vol.xsize()*vol.ysize()*vol.zsize());
unsigned int i=0;
for (NEWIMAGE::volume<float>::fast_const_iterator it=vol.fbegin(); it!=vol.fend(); it++, i++) {
hvec[i] = *it; // Fill vector on host
}
try {
_devec = hvec; // Do the actual copy to the GPU
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::common_assignment_from_newimage_vol after transfer with message: " << e.what() << std::endl;
throw;
}
}
else { // If we are to just use the vol for header info
try {
_devec.resize(vol.xsize()*vol.ysize()*vol.zsize());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::common_assignment_from_newimage_vol after resize() with message: " << e.what() << std::endl;
throw;
}
}
_sz[0] = vol.xsize(); _sz[1] = vol.ysize(); _sz[2] = vol.zsize();
try {
_spcoef.clear();
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::common_assignment_from_newimage_vol after clear() with message: " << e.what() << std::endl;
throw;
}
_spv = false;
_hdr.reinitialize(1,1,1);
NEWIMAGE::copybasicproperties(vol,_hdr);
} EddyCatch
void CudaVolume::calculate_spline_coefs(const std::vector<unsigned int>& sz,
const thrust::device_vector<float>& ima,
thrust::device_vector<float>& coef) const EddyTry
{
if (ima.size() != coef.size()) throw EddyException("CudaVolume::calculate_spline_coefs: Mismatched ima and coef");
try {
thrust::copy(ima.begin(),ima.end(),coef.begin()); // Copy image into coef
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::calculate_spline_coefs after copy() with message: " << e.what() << std::endl;
throw;
}
float *cptr = NULL;
try {
cptr = thrust::raw_pointer_cast(coef.data());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::calculate_spline_coefs after raw_pointer_cast() with message: " << e.what() << std::endl;
throw;
}
float z = -0.267949192431123f; // Valid for cubic spline
unsigned int nburn = ((log(1e-8)/log(abs(z))) + 1.5); // Length of "burn in" to obtain 1e-8 relative precision
std::vector<unsigned int> initn(3);
// Make sure that burn in is not longer than length of data
for (unsigned int i=0; i<3; i++) initn[i] = (nburn > sz[i]) ? sz[i] : nburn;
int tpb = threads_per_block_deconv;
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
for (unsigned int dir=0; dir<3; dir++) {
int nthreads = 1;
for (int i=0; i<3; i++) if (i!=dir) nthreads *= sz[i];
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::cubic_spline_deconvolution<<<nblocks,tpb>>> (cptr,sz[0],sz[1],sz[2],dir,
initn[dir],ep,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::cubic_spline_deconvolution");
}
return;
} EddyCatch
CudaVolume3D_2_4D_Helper CudaVolume4D::operator[](unsigned int indx) EddyTry
{
if (indx >= _sz[3]) throw EddyException("CudaVolume4D::operator[]: indx out of range");
CudaVolume3D_2_4D_Helper hlp(*this,indx);
return(hlp);
} EddyCatch
void CudaVolume4D::SetVolume(unsigned int indx, const CudaVolume& vol) EddyTry
{
if (indx >= _sz[3]) throw EddyException("CudaVolume4D::SetVolume: indx out of range");
for (unsigned int i=0; i<3; i++) if (_sz[i] != vol._sz[i]) throw EddyException("CudaVolume4D::SetVolume: Mismatched volumes");
if (!NEWIMAGE::samedim(_hdr,vol._hdr,3)) throw EddyException("CudaVolume4D::SetVolume: Mismatched volumes");
thrust::copy(vol._devec.begin(),vol._devec.end(),_devec.begin()+indx*this->Size());
} EddyCatch
CudaVolume4D& CudaVolume4D::operator+=(const CudaVolume4D& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator+=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator+=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::plus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator+= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume4D& CudaVolume4D::operator-=(const CudaVolume4D& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator-=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator-=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::minus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator-= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume4D& CudaVolume4D::operator*=(const CudaVolume4D& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator*=(const CudaVolume4D&): Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator*=(const CudaVolume4D&): Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::multiplies<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator*=(const CudaVolume4D&) with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume4D& CudaVolume4D::operator*=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator*=(const CudaVolume&): Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator*=(const CudaVolume&): Empty volume");
for (unsigned int i=0; i<_sz[3]; i++) {
try {
thrust::transform(this->volbegin(i),this->volend(i),cv._devec.begin(),this->volbegin(i),thrust::multiplies<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator*= with index: " << i << ", and message: " << e.what() << std::endl;
throw;
}
}
return(*this);
} EddyCatch
const CudaVolume CudaVolume4D::SumAlongFourthDim() const EddyTry
{
CudaVolume rval;
rval.SetHdr(*this);
try {
thrust::copy(this->volbegin(0),this->volend(0),rval._devec.begin());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::SumAlongFourthDim when copying first volume: with message: " << e.what() << std::endl;
throw;
}
for (unsigned int i=1; i<_sz[3]; i++) {
try {
thrust::transform(rval._devec.begin(),rval._devec.end(),this->volbegin(i),rval._devec.begin(),thrust::plus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::SumAlongFourthDim with index: " << i << ", and message: " << e.what() << std::endl;
throw;
}
}
return(rval);
} EddyCatch
void CudaVolume4D::CoordinatesToDisplacementField(const CudaImageCoordinates& coord) EddyTry
{
if (Size(3) != 3) throw EddyException("CudaVolume4D::CoordinatesToDisplacementField: Only defined when fourth dimension is 3");
if (Size(0) != coord.Size(0) || Size(1) != coord.Size(1) || Size(2) != coord.Size(2)) {
throw EddyException("CudaVolume4D::CoordinatesToDisplacementField: Mismatch between volume and coordinates");
}
try {
int tpb = threads_per_block;
int nthreads = coord.Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::CopyAndMultiply<<<nblocks,tpb>>>(coord.XPtr(),coord.Size(),this->Vxs(0),this->GetPtr(0));
EddyCudaHelperFunctions::CudaSync("EddyKernels::CopyAndMultiply: X");
EddyKernels::CopyAndMultiply<<<nblocks,tpb>>>(coord.YPtr(),coord.Size(),this->Vxs(1),this->GetPtr(1));
EddyCudaHelperFunctions::CudaSync("EddyKernels::CopyAndMultiply: Y");
EddyKernels::CopyAndMultiply<<<nblocks,tpb>>>(coord.ZPtr(),coord.Size(),this->Vxs(2),this->GetPtr(2));
EddyCudaHelperFunctions::CudaSync("EddyKernels::CopyAndMultiply: Z");
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::CoordinatesToDisplacementField with index message: " << e.what() << std::endl;
throw;
}
return;
} EddyCatch
void CudaVolume4D::DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask) EddyTry
{
if (divisor!=*this || mask!=*this) throw EddyException("CudaVolume::DivideWithinMask: Dimension mismatch");
if (!this->Size()) throw EddyException("CudaVolume4D::DivideWithinMask: Empty volume");
for (unsigned int i=0; i<_sz[3]; i++) {
cuda_volume_utils::divide_within_mask(divisor._devec,mask._devec,this->volbegin(i),this->volend(i));
}
} EddyCatch
void CudaVolume4D::Smooth(float fwhm, const CudaVolume& mask) EddyTry
{
*this *= mask;
for (unsigned int i=0; i<_sz[3]; i++) {
cuda_volume_utils::smooth(fwhm,_sz,_hdr,this->GetPtr(i));
}
CudaVolume smask=mask;
smask.Smooth(fwhm);
DivideWithinMask(smask,mask);
*this *= mask;
} EddyCatch
CudaVolume4D& CudaVolume4D::operator=(float val) EddyTry
{
try {
thrust::fill(_devec.begin(), _devec.end(), val);
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator= " << "with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
bool CudaVolume4D::operator==(const CudaVolume4D& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs._sz[0]), static_cast<int>(rhs._sz[1]), static_cast<int>(rhs._sz[2]) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs._hdr.xdim(), rhs._hdr.ydim(), rhs._hdr.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume4D::operator==(const CudaVolume& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.Size(0)), static_cast<int>(rhs.Size(1)), static_cast<int>(rhs.Size(2)) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.Vxs(0), rhs.Vxs(1), rhs.Vxs(2) };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume4D::operator==(const NEWIMAGE::volume<float>& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.xsize()), static_cast<int>(rhs.ysize()), static_cast<int>(rhs.zsize()) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.xdim(), rhs.ydim(), rhs.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
unsigned int CudaVolume4D::Size(unsigned int indx) const EddyTry
{
if (indx > 3) throw EddyException("CudaVolume4D::Size: Index out of range");
return(_sz[indx]);
} EddyCatch
float CudaVolume4D::Vxs(unsigned int indx) const EddyTry
{
if (indx > 2) throw EddyException("CudaVolume4D::Vxs: Index out of range");
float vxs = (!indx) ? _hdr.xdim() : ((indx==1) ? _hdr.ydim() : _hdr.zdim());
return(vxs);
} EddyCatch
NEWMAT::Matrix CudaVolume4D::Ima2WorldMatrix() const EddyTry { return(_hdr.sampling_mat()); } EddyCatch
NEWMAT::Matrix CudaVolume4D::World2ImaMatrix() const EddyTry { return(_hdr.sampling_mat().i()); } EddyCatch
/****************************************************************//**
*
* A slightly weird function in that it is highly specialised and
* is used for the special case where the 4D CudaVolume is a
* displacement field with only one non-zero direction. It is then
* used to calculate the derivative of that field at the voxel centres,
* along that same direction, and hence the Jacobian determinant.
* \param[in] dir Specifies which volume is non-zero, and also what
* direction to calculate the derivative along.
* \param[in] mask Specifies where the field is valid. Optional.
* \param[out] deriv A 3D volume of the local derivative/Jacobian.
* \param[in] add_one If true, 1.0 is added to each derivative to
* directly make it the Jacobian determinant.
*
********************************************************************/
void CudaVolume4D::SampleTrilinearDerivOnVoxelCentres(unsigned int dir,
const CudaVolume& mask,
CudaVolume& deriv,
bool add_one) const EddyTry
{
if (Size(3) != 3) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: Only defined when fourth dimension is 3");
if (dir > 1) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: Only defined for x- and y-directions");
if (Extrap()!=NEWIMAGE::extraslice && Extrap()!=NEWIMAGE::periodic && Extrap()!=NEWIMAGE::mirror) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: Invalid extrapolation option");
if (deriv!=*this) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: derive dimension mismatch");
if (mask.Size() && mask!=*this) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: mask dimension mismatch");
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
else if (Extrap()==NEWIMAGE::mirror) ep = EddyKernels::MIRROR;
int tpb = static_cast<int>(Size(0));
int nblocks = static_cast<int>(Size(2));
if (mask.Size()) {
if (dir==0) { // PE in x
EddyKernels::masked_sample_derivs_along_x<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),mask.GetPtr(),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::masked_sample_derivs_along_x");
}
else if (dir==1) { // PE in y
EddyKernels::masked_sample_derivs_along_y<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),mask.GetPtr(),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::masked_sample_derivs_along_y");
}
}
else {
if (dir==0) { // PE in x
EddyKernels::sample_derivs_along_x<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::sample_derivs_along_x");
}
else if (dir==1) { // PE in y
EddyKernels::sample_derivs_along_y<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::sample_derivs_along_y");
}
}
} EddyCatch
void CudaVolume4D::GetVolume(NEWIMAGE::volume4D<float>& ovol) const EddyTry
{
ovol.reinitialize(_sz[0],_sz[1],_sz[2],_sz[3]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
thrust::host_vector<float> on_host;
try {
on_host = _devec; // Transfer from device
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::GetVolume_1: with message: " << e.what() << std::endl;
throw;
}
NEWIMAGE::volume<float>::nonsafe_fast_iterator it = ovol.nsfbegin();
for (unsigned int i=0; i<on_host.size(); i++) { *it = on_host[i]; ++it; } // Copy data to volume
return;
} EddyCatch
void CudaVolume4D::GetVolume(unsigned int indx, NEWIMAGE::volume<float>& ovol) const EddyTry
{
if (indx >= _sz[3]) throw EddyException("CudaVolume4D::GetVolume(indx,ovol): indx out of range");
ovol.reinitialize(_sz[0],_sz[1],_sz[2]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
// Transfer from device
float *on_host = new float[this->Size()]; // Not sure how unique_ptr would play with hipMemcpy
hipError_t status = hipMemcpy(on_host,this->GetPtr(indx),this->Size()*sizeof(float),hipMemcpyDeviceToHost);
if (status != hipSuccess) {
delete[] on_host;
throw EddyException("CudaVolume4D::GetVolume(indx,ovol): Failed to copy memory from device: hipMemcpy returned an error: " + EddyCudaHelperFunctions::cudaError2String(status));
}
try {
unsigned int i=0;
for (auto it=ovol.nsfbegin(); it!=ovol.nsfend(); ++it, ++i) *it = on_host[i];
delete[] on_host;
}
catch(...) {
delete[] on_host;
throw EddyException("CudaVolume4D::GetVolume(indx,ovol): Unknown failure");
}
return;
} EddyCatch
void CudaVolume4D::common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
unsigned int tsize_override,
bool ifvol) EddyTry
{
if (tsize_override!=0 && ifvol) {
throw EddyException("CudaVolume4D::common_assignment_from_newimage_vol: tsize_override cannot be combined with ifvol");
}
int tsize = (tsize_override == 0) ? vol.tsize() : static_cast<int>(tsize_override);
unsigned int size = static_cast<unsigned int>(vol.xsize()*vol.ysize()*vol.zsize()*tsize);
try {
_devec.resize(size);
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::common_assignment_from_newimage_vol after memory allocation with message: " << e.what() << std::endl;
throw;
}
if (ifvol) { // If we are to initialize data from volume
thrust::host_vector<float> hvec(size);
unsigned int i=0;
for (auto it=vol.fbegin(); it!=vol.fend(); ++it, ++i) hvec[i] = *it; // Fill vector on host
try {
_devec = hvec;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::common_assignment_from_newimage_vol after transfer with message: " << e.what() << std::endl;
throw;
}
}
_sz[0] = vol.xsize(); _sz[1] = vol.ysize(); _sz[2] = vol.zsize(); _sz[3] = tsize;
_hdr.reinitialize(1,1,1);
NEWIMAGE::copybasicproperties(vol,_hdr);
} EddyCatch
/****************************************************************//**
*
* Smooths the image volume in the destination pointed to by imaptr.
*
* \param[in] fwhm FWHM in mm
* \param[in] sz 3 element vector with ima size in x, y and z
* \param[in] hdr Header for image pointed to by imaptr
* \param[in,out] imaptr Pointer to image on device
*
********************************************************************/
void cuda_volume_utils::smooth(float fwhm,
const std::vector<unsigned int>& sz,
const NEWIMAGE::volume<float>& hdr,
float *imaptr) EddyTry
{
// Gaussian Kernels for x-, y- and z-directions
thrust::device_vector<float> xk = cuda_volume_utils::gaussian_1D_kernel(fwhm/hdr.xdim());
thrust::device_vector<float> yk = cuda_volume_utils::gaussian_1D_kernel(fwhm/hdr.ydim());
thrust::device_vector<float> zk = cuda_volume_utils::gaussian_1D_kernel(fwhm/hdr.zdim());
// Smoothed volume
thrust::device_vector<float> sv(sz[0]*sz[1]*sz[2]);
// Convolve in x-, y- and z-directions
int tpb = threads_per_block_convolve_1D;
int nthreads = sz[0]*sz[1]*sz[2];
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
// Do the job
EddyKernels::convolve_1D<<<nblocks,tpb>>>(sz[0],sz[1],sz[2],imaptr,thrust::raw_pointer_cast(xk.data()),
xk.size(),0,nthreads,thrust::raw_pointer_cast(sv.data()));
EddyKernels::convolve_1D<<<nblocks,tpb>>>(sz[0],sz[1],sz[2],thrust::raw_pointer_cast(sv.data()),thrust::raw_pointer_cast(yk.data()),
yk.size(),1,nthreads,imaptr);
EddyKernels::convolve_1D<<<nblocks,tpb>>>(sz[0],sz[1],sz[2],imaptr,thrust::raw_pointer_cast(zk.data()),
zk.size(),2,nthreads,thrust::raw_pointer_cast(sv.data()));
hipError_t status = hipMemcpy(imaptr,thrust::raw_pointer_cast(sv.data()),sz[0]*sz[1]*sz[2]*sizeof(float),hipMemcpyDeviceToDevice);
if (status != hipSuccess) {
throw EddyException("cuda_volume_utils::smooth: Device->device copy failed: hipMemcpy returned an error: " + EddyCudaHelperFunctions::cudaError2String(status));
}
return;
} EddyCatch
thrust::host_vector<float> cuda_volume_utils::gaussian_1D_kernel(float fwhm) EddyTry // fwhm in voxels
{
float s = fwhm/std::sqrt(8.0*std::log(2.0));
unsigned int sz = 6*s + 0.5;
sz = 2*sz+1;
thrust::host_vector<float> rval(sz);
double sum=0.0;
for (unsigned int i=0; i<sz; i++) {
rval[i] = exp(-sqr(int(i)-int(sz)/2)/(2.0*sqr(s)));
sum += rval[i];
}
for (unsigned int i=0; i<sz; i++) rval[i] /= sum;
return(rval);
} EddyCatch
/****************************************************************//**
*
* Divides the image residing between iterators imbegin and imend
* by the image in divisor for the voxels where mask is non-zero.
*
* \param[in] divisor Image to divide by
* \param[in] mask Only divide where mask is non-zero
* \param[in,out] imbegin Iterator to first element of image to divide
* \param[in,out] imend Iterator to one-past-last element of image to divide
*
********************************************************************/
void cuda_volume_utils::divide_within_mask(const thrust::device_vector<float>& divisor,
const thrust::device_vector<float>& mask,
thrust::device_vector<float>::iterator imbegin,
thrust::device_vector<float>::iterator imend) EddyTry
{
try {
thrust::transform_if(imbegin,imend,divisor.begin(),mask.begin(),imbegin,
thrust::divides<float>(),thrust::identity<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in cuda_volume_utils::divide_within_mask: with message: " << e.what() << std::endl;
throw;
}
} EddyCatch
bool cuda_volume_utils::same_dim_size(const std::vector<int>& sz1,
const std::vector<float>& vxs1,
const std::vector<int>& sz2,
const std::vector<float>& vxs2) EddyTry
{
bool isequal = sz1[0]==sz2[0] && sz1[1]==sz2[1] && sz1[2]==sz2[2];
for (int i=0; i<3; i++) isequal = isequal && std::fabs(vxs1[i]-vxs2[i]) <= 5e-5 * (vxs1[i]+vxs2[i]);
return(isequal);
} EddyCatch
void CudaVolume3D_2_4D_Helper::operator=(const CudaVolume& threed) EddyTry
{
for (unsigned int i=0; i<3; i++) if (_fourd._sz[i] != threed._sz[i]) throw EddyException("CudaVolume4D::operator=(CudaVolume): Mismatched 3D volume");
if (!NEWIMAGE::samedim(_fourd._hdr,threed._hdr,3)) throw EddyException("CudaVolume4D::operator=(CudaVolume): Mismatched 3D volume");
thrust::copy(threed._devec.begin(),threed._devec.end(),_fourd.volbegin(_indx));
} EddyCatch
void CudaImageCoordinates::Transform(const NEWMAT::Matrix& A) EddyTry
{
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::affine_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,A(1,1),A(1,2),A(1,3),A(1,4),A(2,1),
A(2,2),A(2,3),A(2,4),A(3,1),A(3,2),A(3,3),A(3,4),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::affine_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::Transform(const std::vector<NEWMAT::Matrix>& A) EddyTry
{
if (A.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::Transform: Mismatched vector of matrices A");
thrust::device_vector<float> dA = this->repack_vector_of_matrices(A);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_wise_affine_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,thrust::raw_pointer_cast(dA.data()),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_wise_affine_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::Transform(const NEWMAT::Matrix& A,
const EDDY::CudaVolume4D& dfield,
const NEWMAT::Matrix& B) EddyTry
{
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::general_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),A(1,1),A(1,2),A(1,3),A(1,4),
A(2,1),A(2,2),A(2,3),A(2,4),A(3,1),A(3,2),
A(3,3),A(3,4),B(1,1),B(1,2),B(1,3),B(1,4),
B(2,1),B(2,2),B(2,3),B(2,4),B(3,1),B(3,2),B(3,3),
B(3,4),XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::general_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::Transform(const std::vector<NEWMAT::Matrix>& A,
const EDDY::CudaVolume4D& dfield,
const std::vector<NEWMAT::Matrix>& B) EddyTry
{
if (A.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::Transform: Mismatched vector of matrices A");
if (B.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::Transform: Mismatched vector of matrices B");
thrust::device_vector<float> dA = this->repack_vector_of_matrices(A);
thrust::device_vector<float> dB = this->repack_vector_of_matrices(B);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_wise_general_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),thrust::raw_pointer_cast(dA.data()),
thrust::raw_pointer_cast(dB.data()),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_wise_general_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::GetSliceToVolXYZCoord(const NEWMAT::Matrix& M1,
const std::vector<NEWMAT::Matrix>& R,
const EDDY::CudaVolume4D& dfield,
const NEWMAT::Matrix& M2,
EDDY::CudaVolume& zcoord) EddyTry
{
if (R.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::GetSliceToVolXYZCoord: Mismatched vector of matrices R");
if (M1(1,2) != 0.0 || M1(1,3) != 0.0 || M1(2,1) != 0.0 || M1(2,3) != 0.0 || M1(3,1) != 0.0 || M1(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolXYZCoord: Invalid M1 matrix");
}
if (M2(1,2) != 0.0 || M2(1,3) != 0.0 || M2(2,1) != 0.0 || M2(2,3) != 0.0 || M2(3,1) != 0.0 || M2(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolXYZCoord: Invalid M2 matrix");
}
thrust::device_vector<float> dM1 = this->repack_matrix(M1);
thrust::device_vector<float> dR = this->repack_vector_of_matrices(R);
thrust::device_vector<float> dM2 = this->repack_matrix(M2);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_to_vol_xyz_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),thrust::raw_pointer_cast(dM1.data()),
thrust::raw_pointer_cast(dR.data()),thrust::raw_pointer_cast(dM2.data()),
XPtr(),YPtr(),ZPtr(),zcoord.GetPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_to_vol_xyz_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::GetSliceToVolZCoord(const NEWMAT::Matrix& M1,
const std::vector<NEWMAT::Matrix>& R,
const EDDY::CudaVolume4D& dfield,
const NEWMAT::Matrix& M2) EddyTry
{
if (R.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::GetSliceToVolZCoord: Mismatched vector of matrices R");
if (M1(1,2) != 0.0 || M1(1,3) != 0.0 || M1(2,1) != 0.0 || M1(2,3) != 0.0 || M1(3,1) != 0.0 || M1(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolZCoord: Invalid M1 matrix");
}
if (M2(1,2) != 0.0 || M2(1,3) != 0.0 || M2(2,1) != 0.0 || M2(2,3) != 0.0 || M2(3,1) != 0.0 || M2(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolZCoord: Invalid M2 matrix");
}
thrust::device_vector<float> dM1 = this->repack_matrix(M1);
thrust::device_vector<float> dR = this->repack_vector_of_matrices(R);
thrust::device_vector<float> dM2 = this->repack_matrix(M2);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_to_vol_z_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),thrust::raw_pointer_cast(dM1.data()),
thrust::raw_pointer_cast(dR.data()),thrust::raw_pointer_cast(dM2.data()),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_to_vol_z_coordinates");
_init=true;
return;
} EddyCatch
CudaImageCoordinates& CudaImageCoordinates::operator-=(const CudaImageCoordinates& rhs) EddyTry
{
if (this->Size() != rhs.Size()) throw EddyException("CudaImageCoordinates::operator-=: Size mismatch.");
if (!_init) init_coord();
if (!rhs._init) {
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::implicit_coord_sub<<<nblocks,tpb>>>(_xn,_yn,_zn,XPtr(),YPtr(),ZPtr(),nthreads);
}
else {
try {
thrust::transform(_x.begin(),_x.end(),rhs._x.begin(),_x.begin(),thrust::minus<float>());
thrust::transform(_y.begin(),_y.end(),rhs._y.begin(),_y.begin(),thrust::minus<float>());
thrust::transform(_z.begin(),_z.end(),rhs._z.begin(),_z.begin(),thrust::minus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaImageCoordinates::::operator-= with message: " << e.what() << std::endl;
throw;
}
}
return(*this);
} EddyCatch
NEWMAT::Matrix CudaImageCoordinates::AsMatrix() const EddyTry
{
NEWMAT::Matrix rval(Size(),3);
thrust::host_vector<float> x = _x;
thrust::host_vector<float> y = _y;
thrust::host_vector<float> z = _z;
for (unsigned int i=0; i<Size(); i++) {
rval(i+1,1) = x[i];
rval(i+1,2) = y[i];
rval(i+1,3) = z[i];
}
return(rval);
} EddyCatch
void CudaImageCoordinates::Write(const std::string& fname,
unsigned int n) const EddyTry
{
NEWMAT::Matrix coord = AsMatrix();
if (n && n<Size()) MISCMATHS::write_ascii_matrix(fname,coord.Rows(1,n));
else MISCMATHS::write_ascii_matrix(fname,coord);
} EddyCatch
void CudaImageCoordinates::init_coord() EddyTry
{
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::make_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,XPtr(),YPtr(),ZPtr(),nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::make_coordinates");
_init = true;
return;
} EddyCatch
thrust::device_vector<float> CudaImageCoordinates::repack_matrix(const NEWMAT::Matrix& A) EddyTry
{
thrust::host_vector<float> hA(12);
hA[0] = A(1,1); hA[1] = A(1,2); hA[2] = A(1,3); hA[3] = A(1,4);
hA[4] = A(2,1); hA[5] = A(2,2); hA[6] = A(2,3); hA[7] = A(2,4);
hA[8] = A(3,1); hA[9] = A(3,2); hA[10] = A(3,3); hA[11] = A(3,4);
return(hA); // This is where the transfer occurs
} EddyCatch
thrust::device_vector<float> CudaImageCoordinates::repack_vector_of_matrices(const std::vector<NEWMAT::Matrix>& A) EddyTry
{
thrust::host_vector<float> hA(12*A.size());
for (unsigned int i=0; i<A.size(); i++) {
unsigned int offs = 12*i;
hA[offs] = A[i](1,1); hA[offs+1] = A[i](1,2); hA[offs+2] = A[i](1,3); hA[offs+3] = A[i](1,4);
hA[offs+4] = A[i](2,1); hA[offs+5] = A[i](2,2); hA[offs+6] = A[i](2,3); hA[offs+7] = A[i](2,4);
hA[offs+8] = A[i](3,1); hA[offs+9] = A[i](3,2); hA[offs+10] = A[i](3,3); hA[offs+11] = A[i](3,4);
}
return(hA); // This is where the transfer occurs
} EddyCatch
// Because of a bug in cuda_fp16.hpp, that gets included by cublas_v2.h, it has to
// be included before any include files that set up anything related to the std-lib.
// If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the
// old-style C isinf or the new (since C++11) std::isinf.
#include "cublas_v2.h"
#include <cstdlib>
#include <string>
#include <sys/time.h>
#include <vector>
#include <cmath>
#include <cuda.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#include <thrust/inner_product.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#include "armawrap/newmat.h"
#include "miscmaths/miscmaths.h"
#ifndef EXPOSE_TREACHEROUS
#define I_CUDAVOLUME_H_DEFINED_ET
#define EXPOSE_TREACHEROUS // To allow us to use .sampling_mat()
#endif
#include "newimage/newimageall.h"
#pragma pop
#include "EddyHelperClasses.h"
#include "EddyCudaHelperFunctions.h"
#include "EddyKernels.h"
#include "EddyFunctors.h"
#include "CudaVolume.h"
using namespace EDDY;
using namespace EddyKernels;
void CudaVolume::SetHdr(const CudaVolume4D& cv) EddyTry
{
_sz[0] = cv._sz[0]; _sz[1] = cv._sz[1]; _sz[2] = cv._sz[2];
_spv=false; _hdr=cv._hdr;
try {
_devec.resize(this->Size()); _spcoef.clear();
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::SetHdr with message: " << e.what() << std::endl;
throw;
}
} EddyCatch
void CudaVolume::Sample(const EDDY::CudaImageCoordinates& coord,
CudaVolume& smpl) const EddyTry
{
if (Interp()!=NEWIMAGE::spline && Interp()!=NEWIMAGE::trilinear) throw EddyException("CudaVolume::Sample: Invalid interpolation option");
if (Extrap()!=NEWIMAGE::extraslice && Extrap()!=NEWIMAGE::periodic && Extrap()!=NEWIMAGE::mirror) throw EddyException("CudaVolume::Sample: Invalid extrapolation option");
if (smpl!=*this) throw EddyException("CudaVolume::Sample: Dimension mismatch");
if (Interp()==NEWIMAGE::spline && !_spv) {
if (_spcoef.size() != _devec.size()) {
try {
_spcoef.resize(_devec.size());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sample_1 after call to resize with message: " << e.what() << std::endl;
throw;
}
}
calculate_spline_coefs(_sz,_devec,_spcoef);
_spv = true;
}
int tpb = threads_per_block_interpolate;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
else if (Extrap()==NEWIMAGE::mirror) ep = EddyKernels::MIRROR;
if (Interp()==NEWIMAGE::spline) {
EddyKernels::spline_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),sp_ptr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::spline_interpolate");
}
else {
EddyKernels::linear_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::linear_interpolate");
}
} EddyCatch
void CudaVolume::Sample(const EDDY::CudaImageCoordinates& coord,
CudaVolume& smpl,
CudaVolume4D& dsmpl) const EddyTry
{
if (Interp()!=NEWIMAGE::spline && Interp()!=NEWIMAGE::trilinear) throw EddyException("CudaVolume::Sample: Invalid interpolation option");
if (Extrap()!=NEWIMAGE::extraslice && Extrap()!=NEWIMAGE::periodic && Extrap()!=NEWIMAGE::mirror) throw EddyException("CudaVolume::Sample: Invalid extrapolation option");
if (smpl!=(*this) || dsmpl!=(*this)) throw EddyException("CudaVolume::Sample: Dimension mismatch");
if (dsmpl.Size(3)!=3) throw EddyException("CudaVolume::Sample: dsmpl.Size(3) must be 3");
if (Interp()==NEWIMAGE::spline && !_spv) {
if (_spcoef.size() != _devec.size()) {
try {
_spcoef.resize(_devec.size());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sample_2 after call to resize with message: " << e.what() << std::endl;
throw;
}
}
calculate_spline_coefs(_sz,_devec,_spcoef);
_spv = true;
}
int tpb = threads_per_block_interpolate;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
else if (Extrap()==NEWIMAGE::mirror) ep = EddyKernels::MIRROR;
if (Interp()==NEWIMAGE::spline) {
EddyKernels::spline_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),sp_ptr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr(),
dsmpl.GetPtr(0),dsmpl.GetPtr(1),dsmpl.GetPtr(2));
EddyCudaHelperFunctions::CudaSync("EddyKernels::spline_interpolate");
}
else {
EddyKernels::linear_interpolate<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(),coord.XPtr(),
coord.YPtr(),coord.ZPtr(),nthreads,ep,smpl.GetPtr(),
dsmpl.GetPtr(0),dsmpl.GetPtr(1),dsmpl.GetPtr(2));
EddyCudaHelperFunctions::CudaSync("EddyKernels::linear_interpolate");
}
} EddyCatch
void CudaVolume::ValidMask(const EDDY::CudaImageCoordinates& coord, CudaVolume& mask) const EddyTry
{
int tpb = threads_per_block_interpolate;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
std::vector<bool> epval = ExtrapValid();
EddyKernels::valid_voxels<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),epval[0],epval[1],epval[2],
coord.XPtr(),coord.YPtr(),coord.ZPtr(),nthreads,mask.GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::valid_voxels");
} EddyCatch
bool CudaVolume::operator==(const CudaVolume& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs._sz[0]), static_cast<int>(rhs._sz[1]), static_cast<int>(rhs._sz[2]) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs._hdr.xdim(), rhs._hdr.ydim(), rhs._hdr.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume::operator==(const NEWIMAGE::volume<float>& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.xsize()), static_cast<int>(rhs.ysize()), static_cast<int>(rhs.zsize()) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.xdim(), rhs.ydim(), rhs.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume::operator==(const CudaVolume4D& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.Size(0)), static_cast<int>(rhs.Size(1)), static_cast<int>(rhs.Size(2)) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.Vxs(0), rhs.Vxs(1), rhs.Vxs(2) };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
CudaVolume& CudaVolume::operator+=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume::operator+=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume::operator+=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::plus<float>());
if (_spv && cv._spv) {
thrust::transform(_spcoef.begin(),_spcoef.end(),cv._spcoef.begin(),_spcoef.begin(),thrust::plus<float>());
}
else _spv=false;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator+= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::operator-=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume::operator-=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume::operator-=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::minus<float>());
if (_spv && cv._spv) {
thrust::transform(_spcoef.begin(),_spcoef.end(),cv._spcoef.begin(),_spcoef.begin(),thrust::minus<float>());
}
else _spv=false;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator-= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::operator*=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume::operator*=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume::operator*=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::multiplies<float>());
_spv=false;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator*= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::operator/=(float a) EddyTry
{
if (!a) throw EddyException("CudaVolume::operator/=: Division by zero");
try {
thrust::transform(_devec.begin(),_devec.end(),_devec.begin(),EDDY::MulByScalar<float>(1.0/a));
if (_spv) thrust::transform(_spcoef.begin(),_spcoef.end(),_spcoef.begin(),EDDY::MulByScalar<float>(1.0/a));
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator/= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
void CudaVolume::Smooth(float fwhm, // fwhm in mm
const CudaVolume& mask) EddyTry // Mask within which to smooth
{
CudaVolume smask=mask;
*this *= mask; // Mask image
this->Smooth(fwhm); // Smooth masked image
smask.Smooth(fwhm); // Smooth mask
this->DivideWithinMask(smask,mask);
*this *= mask;
} EddyCatch
void CudaVolume::MultiplyAndAddToMe(const CudaVolume& pv, float a) EddyTry
{
if (pv!=*this) throw EddyException("CudaVolume::MultiplyAndAddToMe: Dimension mismatch");
try {
thrust::transform(_devec.begin(),_devec.end(),pv._devec.begin(),_devec.begin(),EDDY::MulAndAdd<float>(a));
if (_spv) {
if (pv._spv) thrust::transform(_spcoef.begin(),_spcoef.end(),pv._spcoef.begin(),_spcoef.begin(),EDDY::MulAndAdd<float>(a));
else { _spcoef.clear(); _spv=false; };
}
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MultiplyAndAddToMe: with message: " << e.what() << std::endl;
throw;
}
} EddyCatch
void CudaVolume::SubtractMultiplyAndAddToMe(const CudaVolume& pv, const CudaVolume& nv, float a) EddyTry
{
if (pv!=*this || nv!=*this) throw EddyException("CudaVolume::SubtractMultiplyAndAddToMe: Dimension mismatch");
int tpb = threads_per_block_smaatm;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::subtract_multiply_and_add_to_me<<<nblocks,tpb>>>(pv.GetPtr(),nv.GetPtr(),a,nthreads,GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::subtract_multiply_and_add_to_me");
if (_spv) { _spcoef.clear(); _spv=false; } // I could in principle infer coefs from input. Too busy.
return;
} EddyCatch
void CudaVolume::SubtractSquareAndAddToMe(const CudaVolume& pv, const CudaVolume& nv) EddyTry
{
if (pv!=*this || nv!=*this) throw EddyException("CudaVolume::SubtractSquareAndAddToMe: Dimension mismatch");
int tpb = threads_per_block_ssaatm;
int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::subtract_square_and_add_to_me<<<nblocks,tpb>>>(pv.GetPtr(),nv.GetPtr(),nthreads,GetPtr());
EddyCudaHelperFunctions::CudaSync("EddyKernels::subtract_square_and_add_to_me");
if (_spv) { _spcoef.clear(); _spv=false; }
} EddyCatch
void CudaVolume::DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask) EddyTry
{
if (divisor!=*this || mask!=*this) throw EddyException("CudaVolume::DivideWithinMask: Dimension mismatch");
cuda_volume_utils::divide_within_mask(divisor._devec,mask._devec,_devec.begin(),_devec.end());
if (_spv) { _spcoef.clear(); _spv=false; }
} EddyCatch
CudaVolume& CudaVolume::Binarise(float tv) EddyTry
{
try {
thrust::transform(_devec.begin(),_devec.end(),_devec.begin(),EDDY::Binarise<float>(tv));
if (_spv) { _spcoef.clear(); _spv=false; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Binarise_1: with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::Binarise(float ll, float ul) EddyTry
{
try {
thrust::transform(_devec.begin(),_devec.end(),_devec.begin(),EDDY::Binarise<float>(ll,ul));
if (_spv) { _spcoef.clear(); _spv=false; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Binarise_2: with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume& CudaVolume::MakeNormRand(float mu, float sigma) EddyTry
{
try {
thrust::counting_iterator<unsigned int> index_seq_begin(0);
thrust::transform(index_seq_begin,index_seq_begin+_devec.size(),_devec.begin(),EDDY::MakeNormRand<float>(mu,sigma));
if (_spv) { _spcoef.clear(); _spv=false; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MakeRandom: with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
double CudaVolume::Sum(const CudaVolume& mask) const EddyTry
{
double sum = 0.0;
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::Sum: Mismatched volumes");
try {
sum = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),sum,
thrust::plus<double>(),EDDY::Product<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sum in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
sum = thrust::reduce(_devec.begin(),_devec.end(),sum,EDDY::Sum<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Sum in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(sum);
} EddyCatch
double CudaVolume::SumOfSquares(const CudaVolume& mask) const EddyTry
{
double sos = 0.0;
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::SumOfSquares: Mismatched volumes");
try {
sos = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),sos,
thrust::plus<double>(),EDDY::MaskedSquare<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::SumOfSquares in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
sos = thrust::reduce(_devec.begin(),_devec.end(),sos,EDDY::SumSquare<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::SumOfSquares in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(sos);
} EddyCatch
double CudaVolume::Max(const CudaVolume& mask) const EddyTry
{
double maxval = std::numeric_limits<double>::lowest();
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::Max: Mismatched volumes");
try {
maxval = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),maxval,
thrust::maximum<double>(),EDDY::Product<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Max in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
maxval = thrust::reduce(_devec.begin(),_devec.end(),maxval,EDDY::Max<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::Max in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(maxval);
} EddyCatch
double CudaVolume::MaxAbs(const CudaVolume& mask) const EddyTry
{
double maxval = 0.0;
if (mask.Size()) {
if (mask != *this) throw EddyException("CudaVolume::MaxAbs: Mismatched volumes");
try {
maxval = thrust::inner_product(_devec.begin(),_devec.end(),mask._devec.begin(),maxval,
EDDY::MaxAbs<double,double>(),EDDY::Product<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MaxAbs in call inner_product: with message: " << e.what() << std::endl;
throw;
}
}
else {
try {
maxval = thrust::reduce(_devec.begin(),_devec.end(),maxval,EDDY::MaxAbs<float,double>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::MaxAbs in call reduce: with message: " << e.what() << std::endl;
throw;
}
}
return(maxval);
} EddyCatch
CudaVolume& CudaVolume::operator=(float val) EddyTry
{
try {
thrust::fill(_devec.begin(), _devec.end(), val);
if (_spcoef.size()) { thrust::fill(_spcoef.begin(), _spcoef.end(), val); _spv=true; }
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::operator= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
unsigned int CudaVolume::Size(unsigned int indx) const EddyTry
{
if (indx > 2) throw EddyException("CudaVolume::Size: Index out of range");
return(_sz[indx]);
} EddyCatch
float CudaVolume::Vxs(unsigned int indx) const EddyTry
{
if (indx > 2) throw EddyException("CudaVolume::Vxs: Index out of range");
float vxs = (!indx) ? _hdr.xdim() : ((indx==1) ? _hdr.ydim() : _hdr.zdim());
return(vxs);
} EddyCatch
NEWMAT::Matrix CudaVolume::Ima2WorldMatrix() const EddyTry { return(_hdr.sampling_mat()); } EddyCatch
NEWMAT::Matrix CudaVolume::World2ImaMatrix() const EddyTry { return(_hdr.sampling_mat().i()); } EddyCatch
void CudaVolume::GetVolume(NEWIMAGE::volume<float>& ovol) const EddyTry
{
thrust::host_vector<float> on_host;
try {
on_host = _devec; // Transfer from device
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::GetVolume with message: " << e.what() << std::endl;
throw;
}
ovol.reinitialize(_sz[0],_sz[1],_sz[2]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
unsigned int indx=0;
for (int k=0; k<ovol.zsize(); k++) { for (int j=0; j<ovol.ysize(); j++) { for (int i=0; i<ovol.xsize(); i++) {
ovol(i,j,k) = on_host[indx++]; // Copy data to volume
} } }
return;
} EddyCatch
void CudaVolume::GetSplineCoefs(NEWIMAGE::volume<float>& ovol) const EddyTry
{
if (!_spv) throw EddyException("CudaVolume::GetSplineCoefs: Attempt to obtain invalid spline coefficients");
thrust::host_vector<float> on_host;
try {
on_host = _spcoef; // Transfer from device
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::GetSplineCoefs with message: " << e.what() << std::endl;
throw;
}
ovol.reinitialize(_sz[0],_sz[1],_sz[2]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
unsigned int indx=0;
for (int k=0; k<ovol.zsize(); k++) { for (int j=0; j<ovol.ysize(); j++) { for (int i=0; i<ovol.xsize(); i++) {
ovol(i,j,k) = on_host[indx++]; // Copy data to volume
} } }
return;
} EddyCatch
void CudaVolume::common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
bool ifvol) EddyTry
{
if (ifvol) { // If we are to initialize data from volume
thrust::host_vector<float> hvec(vol.xsize()*vol.ysize()*vol.zsize());
unsigned int i=0;
for (NEWIMAGE::volume<float>::fast_const_iterator it=vol.fbegin(); it!=vol.fend(); it++, i++) {
hvec[i] = *it; // Fill vector on host
}
try {
_devec = hvec; // Do the actual copy to the GPU
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::common_assignment_from_newimage_vol after transfer with message: " << e.what() << std::endl;
throw;
}
}
else { // If we are to just use the vol for header info
try {
_devec.resize(vol.xsize()*vol.ysize()*vol.zsize());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::common_assignment_from_newimage_vol after resize() with message: " << e.what() << std::endl;
throw;
}
}
_sz[0] = vol.xsize(); _sz[1] = vol.ysize(); _sz[2] = vol.zsize();
try {
_spcoef.clear();
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::common_assignment_from_newimage_vol after clear() with message: " << e.what() << std::endl;
throw;
}
_spv = false;
_hdr.reinitialize(1,1,1);
NEWIMAGE::copybasicproperties(vol,_hdr);
} EddyCatch
void CudaVolume::calculate_spline_coefs(const std::vector<unsigned int>& sz,
const thrust::device_vector<float>& ima,
thrust::device_vector<float>& coef) const EddyTry
{
if (ima.size() != coef.size()) throw EddyException("CudaVolume::calculate_spline_coefs: Mismatched ima and coef");
try {
thrust::copy(ima.begin(),ima.end(),coef.begin()); // Copy image into coef
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::calculate_spline_coefs after copy() with message: " << e.what() << std::endl;
throw;
}
float *cptr = NULL;
try {
cptr = thrust::raw_pointer_cast(coef.data());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume::calculate_spline_coefs after raw_pointer_cast() with message: " << e.what() << std::endl;
throw;
}
float z = -0.267949192431123f; // Valid for cubic spline
unsigned int nburn = ((log(1e-8)/log(abs(z))) + 1.5); // Length of "burn in" to obtain 1e-8 relative precision
std::vector<unsigned int> initn(3);
// Make sure that burn in is not longer than length of data
for (unsigned int i=0; i<3; i++) initn[i] = (nburn > sz[i]) ? sz[i] : nburn;
int tpb = threads_per_block_deconv;
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
for (unsigned int dir=0; dir<3; dir++) {
int nthreads = 1;
for (int i=0; i<3; i++) if (i!=dir) nthreads *= sz[i];
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::cubic_spline_deconvolution<<<nblocks,tpb>>> (cptr,sz[0],sz[1],sz[2],dir,
initn[dir],ep,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::cubic_spline_deconvolution");
}
return;
} EddyCatch
CudaVolume3D_2_4D_Helper CudaVolume4D::operator[](unsigned int indx) EddyTry
{
if (indx >= _sz[3]) throw EddyException("CudaVolume4D::operator[]: indx out of range");
CudaVolume3D_2_4D_Helper hlp(*this,indx);
return(hlp);
} EddyCatch
void CudaVolume4D::SetVolume(unsigned int indx, const CudaVolume& vol) EddyTry
{
if (indx >= _sz[3]) throw EddyException("CudaVolume4D::SetVolume: indx out of range");
for (unsigned int i=0; i<3; i++) if (_sz[i] != vol._sz[i]) throw EddyException("CudaVolume4D::SetVolume: Mismatched volumes");
if (!NEWIMAGE::samedim(_hdr,vol._hdr,3)) throw EddyException("CudaVolume4D::SetVolume: Mismatched volumes");
thrust::copy(vol._devec.begin(),vol._devec.end(),_devec.begin()+indx*this->Size());
} EddyCatch
CudaVolume4D& CudaVolume4D::operator+=(const CudaVolume4D& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator+=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator+=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::plus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator+= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume4D& CudaVolume4D::operator-=(const CudaVolume4D& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator-=: Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator-=: Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::minus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator-= with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume4D& CudaVolume4D::operator*=(const CudaVolume4D& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator*=(const CudaVolume4D&): Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator*=(const CudaVolume4D&): Empty volume");
try {
thrust::transform(_devec.begin(),_devec.end(),cv._devec.begin(),_devec.begin(),thrust::multiplies<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator*=(const CudaVolume4D&) with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
CudaVolume4D& CudaVolume4D::operator*=(const CudaVolume& cv) EddyTry
{
if (*this != cv) throw EddyException("CudaVolume4D::operator*=(const CudaVolume&): Mismatched volumes");
if (!this->Size()) throw EddyException("CudaVolume4D::operator*=(const CudaVolume&): Empty volume");
for (unsigned int i=0; i<_sz[3]; i++) {
try {
thrust::transform(this->volbegin(i),this->volend(i),cv._devec.begin(),this->volbegin(i),thrust::multiplies<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator*= with index: " << i << ", and message: " << e.what() << std::endl;
throw;
}
}
return(*this);
} EddyCatch
const CudaVolume CudaVolume4D::SumAlongFourthDim() const EddyTry
{
CudaVolume rval;
rval.SetHdr(*this);
try {
thrust::copy(this->volbegin(0),this->volend(0),rval._devec.begin());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::SumAlongFourthDim when copying first volume: with message: " << e.what() << std::endl;
throw;
}
for (unsigned int i=1; i<_sz[3]; i++) {
try {
thrust::transform(rval._devec.begin(),rval._devec.end(),this->volbegin(i),rval._devec.begin(),thrust::plus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::SumAlongFourthDim with index: " << i << ", and message: " << e.what() << std::endl;
throw;
}
}
return(rval);
} EddyCatch
void CudaVolume4D::CoordinatesToDisplacementField(const CudaImageCoordinates& coord) EddyTry
{
if (Size(3) != 3) throw EddyException("CudaVolume4D::CoordinatesToDisplacementField: Only defined when fourth dimension is 3");
if (Size(0) != coord.Size(0) || Size(1) != coord.Size(1) || Size(2) != coord.Size(2)) {
throw EddyException("CudaVolume4D::CoordinatesToDisplacementField: Mismatch between volume and coordinates");
}
try {
int tpb = threads_per_block;
int nthreads = coord.Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::CopyAndMultiply<<<nblocks,tpb>>>(coord.XPtr(),coord.Size(),this->Vxs(0),this->GetPtr(0));
EddyCudaHelperFunctions::CudaSync("EddyKernels::CopyAndMultiply: X");
EddyKernels::CopyAndMultiply<<<nblocks,tpb>>>(coord.YPtr(),coord.Size(),this->Vxs(1),this->GetPtr(1));
EddyCudaHelperFunctions::CudaSync("EddyKernels::CopyAndMultiply: Y");
EddyKernels::CopyAndMultiply<<<nblocks,tpb>>>(coord.ZPtr(),coord.Size(),this->Vxs(2),this->GetPtr(2));
EddyCudaHelperFunctions::CudaSync("EddyKernels::CopyAndMultiply: Z");
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::CoordinatesToDisplacementField with index message: " << e.what() << std::endl;
throw;
}
return;
} EddyCatch
void CudaVolume4D::DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask) EddyTry
{
if (divisor!=*this || mask!=*this) throw EddyException("CudaVolume::DivideWithinMask: Dimension mismatch");
if (!this->Size()) throw EddyException("CudaVolume4D::DivideWithinMask: Empty volume");
for (unsigned int i=0; i<_sz[3]; i++) {
cuda_volume_utils::divide_within_mask(divisor._devec,mask._devec,this->volbegin(i),this->volend(i));
}
} EddyCatch
void CudaVolume4D::Smooth(float fwhm, const CudaVolume& mask) EddyTry
{
*this *= mask;
for (unsigned int i=0; i<_sz[3]; i++) {
cuda_volume_utils::smooth(fwhm,_sz,_hdr,this->GetPtr(i));
}
CudaVolume smask=mask;
smask.Smooth(fwhm);
DivideWithinMask(smask,mask);
*this *= mask;
} EddyCatch
CudaVolume4D& CudaVolume4D::operator=(float val) EddyTry
{
try {
thrust::fill(_devec.begin(), _devec.end(), val);
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::operator= " << "with message: " << e.what() << std::endl;
throw;
}
return(*this);
} EddyCatch
bool CudaVolume4D::operator==(const CudaVolume4D& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs._sz[0]), static_cast<int>(rhs._sz[1]), static_cast<int>(rhs._sz[2]) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs._hdr.xdim(), rhs._hdr.ydim(), rhs._hdr.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume4D::operator==(const CudaVolume& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.Size(0)), static_cast<int>(rhs.Size(1)), static_cast<int>(rhs.Size(2)) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.Vxs(0), rhs.Vxs(1), rhs.Vxs(2) };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
bool CudaVolume4D::operator==(const NEWIMAGE::volume<float>& rhs) const EddyTry {
std::vector<int> sz1 = { static_cast<int>(_sz[0]), static_cast<int>(_sz[1]), static_cast<int>(_sz[2]) };
std::vector<int> sz2 = { static_cast<int>(rhs.xsize()), static_cast<int>(rhs.ysize()), static_cast<int>(rhs.zsize()) };
std::vector<float> vxs1 = { _hdr.xdim(), _hdr.ydim(), _hdr.zdim() };
std::vector<float> vxs2 = { rhs.xdim(), rhs.ydim(), rhs.zdim() };
return(cuda_volume_utils::same_dim_size(sz1,vxs1,sz2,vxs2));
} EddyCatch
unsigned int CudaVolume4D::Size(unsigned int indx) const EddyTry
{
if (indx > 3) throw EddyException("CudaVolume4D::Size: Index out of range");
return(_sz[indx]);
} EddyCatch
float CudaVolume4D::Vxs(unsigned int indx) const EddyTry
{
if (indx > 2) throw EddyException("CudaVolume4D::Vxs: Index out of range");
float vxs = (!indx) ? _hdr.xdim() : ((indx==1) ? _hdr.ydim() : _hdr.zdim());
return(vxs);
} EddyCatch
NEWMAT::Matrix CudaVolume4D::Ima2WorldMatrix() const EddyTry { return(_hdr.sampling_mat()); } EddyCatch
NEWMAT::Matrix CudaVolume4D::World2ImaMatrix() const EddyTry { return(_hdr.sampling_mat().i()); } EddyCatch
/****************************************************************//**
*
* A slightly weird function in that it is highly specialised and
* is used for the special case where the 4D CudaVolume is a
* displacement field with only one non-zero direction. It is then
* used to calculate the derivative of that field at the voxel centres,
* along that same direction, and hence the Jacobian determinant.
* \param[in] dir Specifies which volume is non-zero, and also what
* direction to calculate the derivative along.
* \param[in] mask Specifies where the field is valid. Optional.
* \param[out] deriv A 3D volume of the local derivative/Jacobian.
* \param[in] add_one If true, 1.0 is added to each derivative to
* directly make it the Jacobian determinant.
*
********************************************************************/
void CudaVolume4D::SampleTrilinearDerivOnVoxelCentres(unsigned int dir,
const CudaVolume& mask,
CudaVolume& deriv,
bool add_one) const EddyTry
{
if (Size(3) != 3) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: Only defined when fourth dimension is 3");
if (dir > 1) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: Only defined for x- and y-directions");
if (Extrap()!=NEWIMAGE::extraslice && Extrap()!=NEWIMAGE::periodic && Extrap()!=NEWIMAGE::mirror) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: Invalid extrapolation option");
if (deriv!=*this) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: derive dimension mismatch");
if (mask.Size() && mask!=*this) throw EddyException("CudaVolume4D::SampleTrilinearDerivOnVoxelCentres: mask dimension mismatch");
EddyKernels::ExtrapType ep = EddyKernels::PERIODIC;
if (Extrap()==NEWIMAGE::extraslice) ep = EddyKernels::CONSTANT;
else if (Extrap()==NEWIMAGE::mirror) ep = EddyKernels::MIRROR;
int tpb = static_cast<int>(Size(0));
int nblocks = static_cast<int>(Size(2));
if (mask.Size()) {
if (dir==0) { // PE in x
EddyKernels::masked_sample_derivs_along_x<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),mask.GetPtr(),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::masked_sample_derivs_along_x");
}
else if (dir==1) { // PE in y
EddyKernels::masked_sample_derivs_along_y<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),mask.GetPtr(),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::masked_sample_derivs_along_y");
}
}
else {
if (dir==0) { // PE in x
EddyKernels::sample_derivs_along_x<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::sample_derivs_along_x");
}
else if (dir==1) { // PE in y
EddyKernels::sample_derivs_along_y<<<nblocks,tpb>>>(Size(0),Size(1),Size(2),GetPtr(dir),add_one,ep,deriv.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::sample_derivs_along_y");
}
}
} EddyCatch
void CudaVolume4D::GetVolume(NEWIMAGE::volume4D<float>& ovol) const EddyTry
{
ovol.reinitialize(_sz[0],_sz[1],_sz[2],_sz[3]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
thrust::host_vector<float> on_host;
try {
on_host = _devec; // Transfer from device
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::GetVolume_1: with message: " << e.what() << std::endl;
throw;
}
NEWIMAGE::volume<float>::nonsafe_fast_iterator it = ovol.nsfbegin();
for (unsigned int i=0; i<on_host.size(); i++) { *it = on_host[i]; ++it; } // Copy data to volume
return;
} EddyCatch
void CudaVolume4D::GetVolume(unsigned int indx, NEWIMAGE::volume<float>& ovol) const EddyTry
{
if (indx >= _sz[3]) throw EddyException("CudaVolume4D::GetVolume(indx,ovol): indx out of range");
ovol.reinitialize(_sz[0],_sz[1],_sz[2]);
NEWIMAGE::copybasicproperties(_hdr,ovol); // Copy header
// Transfer from device
float *on_host = new float[this->Size()]; // Not sure how unique_ptr would play with cudaMemcpy
cudaError_t status = cudaMemcpy(on_host,this->GetPtr(indx),this->Size()*sizeof(float),cudaMemcpyDeviceToHost);
if (status != cudaSuccess) {
delete[] on_host;
throw EddyException("CudaVolume4D::GetVolume(indx,ovol): Failed to copy memory from device: cudaMemcpy returned an error: " + EddyCudaHelperFunctions::cudaError2String(status));
}
try {
unsigned int i=0;
for (auto it=ovol.nsfbegin(); it!=ovol.nsfend(); ++it, ++i) *it = on_host[i];
delete[] on_host;
}
catch(...) {
delete[] on_host;
throw EddyException("CudaVolume4D::GetVolume(indx,ovol): Unknown failure");
}
return;
} EddyCatch
void CudaVolume4D::common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
unsigned int tsize_override,
bool ifvol) EddyTry
{
if (tsize_override!=0 && ifvol) {
throw EddyException("CudaVolume4D::common_assignment_from_newimage_vol: tsize_override cannot be combined with ifvol");
}
int tsize = (tsize_override == 0) ? vol.tsize() : static_cast<int>(tsize_override);
unsigned int size = static_cast<unsigned int>(vol.xsize()*vol.ysize()*vol.zsize()*tsize);
try {
_devec.resize(size);
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::common_assignment_from_newimage_vol after memory allocation with message: " << e.what() << std::endl;
throw;
}
if (ifvol) { // If we are to initialize data from volume
thrust::host_vector<float> hvec(size);
unsigned int i=0;
for (auto it=vol.fbegin(); it!=vol.fend(); ++it, ++i) hvec[i] = *it; // Fill vector on host
try {
_devec = hvec;
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaVolume4D::common_assignment_from_newimage_vol after transfer with message: " << e.what() << std::endl;
throw;
}
}
_sz[0] = vol.xsize(); _sz[1] = vol.ysize(); _sz[2] = vol.zsize(); _sz[3] = tsize;
_hdr.reinitialize(1,1,1);
NEWIMAGE::copybasicproperties(vol,_hdr);
} EddyCatch
/****************************************************************//**
*
* Smooths the image volume in the destination pointed to by imaptr.
*
* \param[in] fwhm FWHM in mm
* \param[in] sz 3 element vector with ima size in x, y and z
* \param[in] hdr Header for image pointed to by imaptr
* \param[in,out] imaptr Pointer to image on device
*
********************************************************************/
void cuda_volume_utils::smooth(float fwhm,
const std::vector<unsigned int>& sz,
const NEWIMAGE::volume<float>& hdr,
float *imaptr) EddyTry
{
// Gaussian Kernels for x-, y- and z-directions
thrust::device_vector<float> xk = cuda_volume_utils::gaussian_1D_kernel(fwhm/hdr.xdim());
thrust::device_vector<float> yk = cuda_volume_utils::gaussian_1D_kernel(fwhm/hdr.ydim());
thrust::device_vector<float> zk = cuda_volume_utils::gaussian_1D_kernel(fwhm/hdr.zdim());
// Smoothed volume
thrust::device_vector<float> sv(sz[0]*sz[1]*sz[2]);
// Convolve in x-, y- and z-directions
int tpb = threads_per_block_convolve_1D;
int nthreads = sz[0]*sz[1]*sz[2];
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
// Do the job
EddyKernels::convolve_1D<<<nblocks,tpb>>>(sz[0],sz[1],sz[2],imaptr,thrust::raw_pointer_cast(xk.data()),
xk.size(),0,nthreads,thrust::raw_pointer_cast(sv.data()));
EddyKernels::convolve_1D<<<nblocks,tpb>>>(sz[0],sz[1],sz[2],thrust::raw_pointer_cast(sv.data()),thrust::raw_pointer_cast(yk.data()),
yk.size(),1,nthreads,imaptr);
EddyKernels::convolve_1D<<<nblocks,tpb>>>(sz[0],sz[1],sz[2],imaptr,thrust::raw_pointer_cast(zk.data()),
zk.size(),2,nthreads,thrust::raw_pointer_cast(sv.data()));
cudaError_t status = cudaMemcpy(imaptr,thrust::raw_pointer_cast(sv.data()),sz[0]*sz[1]*sz[2]*sizeof(float),cudaMemcpyDeviceToDevice);
if (status != cudaSuccess) {
throw EddyException("cuda_volume_utils::smooth: Device->device copy failed: cudaMemcpy returned an error: " + EddyCudaHelperFunctions::cudaError2String(status));
}
return;
} EddyCatch
thrust::host_vector<float> cuda_volume_utils::gaussian_1D_kernel(float fwhm) EddyTry // fwhm in voxels
{
float s = fwhm/std::sqrt(8.0*std::log(2.0));
unsigned int sz = 6*s + 0.5;
sz = 2*sz+1;
thrust::host_vector<float> rval(sz);
double sum=0.0;
for (unsigned int i=0; i<sz; i++) {
rval[i] = exp(-sqr(int(i)-int(sz)/2)/(2.0*sqr(s)));
sum += rval[i];
}
for (unsigned int i=0; i<sz; i++) rval[i] /= sum;
return(rval);
} EddyCatch
/****************************************************************//**
*
* Divides the image residing between iterators imbegin and imend
* by the image in divisor for the voxels where mask is non-zero.
*
* \param[in] divisor Image to divide by
* \param[in] mask Only divide where mask is non-zero
* \param[in,out] imbegin Iterator to first element of image to divide
* \param[in,out] imend Iterator to one-past-last element of image to divide
*
********************************************************************/
void cuda_volume_utils::divide_within_mask(const thrust::device_vector<float>& divisor,
const thrust::device_vector<float>& mask,
thrust::device_vector<float>::iterator imbegin,
thrust::device_vector<float>::iterator imend) EddyTry
{
try {
thrust::transform_if(imbegin,imend,divisor.begin(),mask.begin(),imbegin,
thrust::divides<float>(),thrust::identity<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in cuda_volume_utils::divide_within_mask: with message: " << e.what() << std::endl;
throw;
}
} EddyCatch
bool cuda_volume_utils::same_dim_size(const std::vector<int>& sz1,
const std::vector<float>& vxs1,
const std::vector<int>& sz2,
const std::vector<float>& vxs2) EddyTry
{
bool isequal = sz1[0]==sz2[0] && sz1[1]==sz2[1] && sz1[2]==sz2[2];
for (int i=0; i<3; i++) isequal = isequal && std::fabs(vxs1[i]-vxs2[i]) <= 5e-5 * (vxs1[i]+vxs2[i]);
return(isequal);
} EddyCatch
void CudaVolume3D_2_4D_Helper::operator=(const CudaVolume& threed) EddyTry
{
for (unsigned int i=0; i<3; i++) if (_fourd._sz[i] != threed._sz[i]) throw EddyException("CudaVolume4D::operator=(CudaVolume): Mismatched 3D volume");
if (!NEWIMAGE::samedim(_fourd._hdr,threed._hdr,3)) throw EddyException("CudaVolume4D::operator=(CudaVolume): Mismatched 3D volume");
thrust::copy(threed._devec.begin(),threed._devec.end(),_fourd.volbegin(_indx));
} EddyCatch
void CudaImageCoordinates::Transform(const NEWMAT::Matrix& A) EddyTry
{
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::affine_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,A(1,1),A(1,2),A(1,3),A(1,4),A(2,1),
A(2,2),A(2,3),A(2,4),A(3,1),A(3,2),A(3,3),A(3,4),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::affine_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::Transform(const std::vector<NEWMAT::Matrix>& A) EddyTry
{
if (A.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::Transform: Mismatched vector of matrices A");
thrust::device_vector<float> dA = this->repack_vector_of_matrices(A);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_wise_affine_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,thrust::raw_pointer_cast(dA.data()),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_wise_affine_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::Transform(const NEWMAT::Matrix& A,
const EDDY::CudaVolume4D& dfield,
const NEWMAT::Matrix& B) EddyTry
{
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::general_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),A(1,1),A(1,2),A(1,3),A(1,4),
A(2,1),A(2,2),A(2,3),A(2,4),A(3,1),A(3,2),
A(3,3),A(3,4),B(1,1),B(1,2),B(1,3),B(1,4),
B(2,1),B(2,2),B(2,3),B(2,4),B(3,1),B(3,2),B(3,3),
B(3,4),XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::general_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::Transform(const std::vector<NEWMAT::Matrix>& A,
const EDDY::CudaVolume4D& dfield,
const std::vector<NEWMAT::Matrix>& B) EddyTry
{
if (A.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::Transform: Mismatched vector of matrices A");
if (B.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::Transform: Mismatched vector of matrices B");
thrust::device_vector<float> dA = this->repack_vector_of_matrices(A);
thrust::device_vector<float> dB = this->repack_vector_of_matrices(B);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_wise_general_transform_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),thrust::raw_pointer_cast(dA.data()),
thrust::raw_pointer_cast(dB.data()),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_wise_general_transform_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::GetSliceToVolXYZCoord(const NEWMAT::Matrix& M1,
const std::vector<NEWMAT::Matrix>& R,
const EDDY::CudaVolume4D& dfield,
const NEWMAT::Matrix& M2,
EDDY::CudaVolume& zcoord) EddyTry
{
if (R.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::GetSliceToVolXYZCoord: Mismatched vector of matrices R");
if (M1(1,2) != 0.0 || M1(1,3) != 0.0 || M1(2,1) != 0.0 || M1(2,3) != 0.0 || M1(3,1) != 0.0 || M1(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolXYZCoord: Invalid M1 matrix");
}
if (M2(1,2) != 0.0 || M2(1,3) != 0.0 || M2(2,1) != 0.0 || M2(2,3) != 0.0 || M2(3,1) != 0.0 || M2(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolXYZCoord: Invalid M2 matrix");
}
thrust::device_vector<float> dM1 = this->repack_matrix(M1);
thrust::device_vector<float> dR = this->repack_vector_of_matrices(R);
thrust::device_vector<float> dM2 = this->repack_matrix(M2);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_to_vol_xyz_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),thrust::raw_pointer_cast(dM1.data()),
thrust::raw_pointer_cast(dR.data()),thrust::raw_pointer_cast(dM2.data()),
XPtr(),YPtr(),ZPtr(),zcoord.GetPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_to_vol_xyz_coordinates");
_init=true;
return;
} EddyCatch
void CudaImageCoordinates::GetSliceToVolZCoord(const NEWMAT::Matrix& M1,
const std::vector<NEWMAT::Matrix>& R,
const EDDY::CudaVolume4D& dfield,
const NEWMAT::Matrix& M2) EddyTry
{
if (R.size() != this->Size(2)) throw EddyException("CudaImageCoordinates::GetSliceToVolZCoord: Mismatched vector of matrices R");
if (M1(1,2) != 0.0 || M1(1,3) != 0.0 || M1(2,1) != 0.0 || M1(2,3) != 0.0 || M1(3,1) != 0.0 || M1(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolZCoord: Invalid M1 matrix");
}
if (M2(1,2) != 0.0 || M2(1,3) != 0.0 || M2(2,1) != 0.0 || M2(2,3) != 0.0 || M2(3,1) != 0.0 || M2(3,2) != 0.0) {
EddyException("CudaImageCoordinates::GetSliceToVolZCoord: Invalid M2 matrix");
}
thrust::device_vector<float> dM1 = this->repack_matrix(M1);
thrust::device_vector<float> dR = this->repack_vector_of_matrices(R);
thrust::device_vector<float> dM2 = this->repack_matrix(M2);
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::slice_to_vol_z_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,dfield.GetPtr(0),dfield.GetPtr(1),
dfield.GetPtr(2),thrust::raw_pointer_cast(dM1.data()),
thrust::raw_pointer_cast(dR.data()),thrust::raw_pointer_cast(dM2.data()),
XPtr(),YPtr(),ZPtr(),_init,nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::slice_to_vol_z_coordinates");
_init=true;
return;
} EddyCatch
CudaImageCoordinates& CudaImageCoordinates::operator-=(const CudaImageCoordinates& rhs) EddyTry
{
if (this->Size() != rhs.Size()) throw EddyException("CudaImageCoordinates::operator-=: Size mismatch.");
if (!_init) init_coord();
if (!rhs._init) {
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::implicit_coord_sub<<<nblocks,tpb>>>(_xn,_yn,_zn,XPtr(),YPtr(),ZPtr(),nthreads);
}
else {
try {
thrust::transform(_x.begin(),_x.end(),rhs._x.begin(),_x.begin(),thrust::minus<float>());
thrust::transform(_y.begin(),_y.end(),rhs._y.begin(),_y.begin(),thrust::minus<float>());
thrust::transform(_z.begin(),_z.end(),rhs._z.begin(),_z.begin(),thrust::minus<float>());
}
catch(thrust::system_error &e) {
std::cerr << "thrust::system_error thrown in CudaImageCoordinates::::operator-= with message: " << e.what() << std::endl;
throw;
}
}
return(*this);
} EddyCatch
NEWMAT::Matrix CudaImageCoordinates::AsMatrix() const EddyTry
{
NEWMAT::Matrix rval(Size(),3);
thrust::host_vector<float> x = _x;
thrust::host_vector<float> y = _y;
thrust::host_vector<float> z = _z;
for (unsigned int i=0; i<Size(); i++) {
rval(i+1,1) = x[i];
rval(i+1,2) = y[i];
rval(i+1,3) = z[i];
}
return(rval);
} EddyCatch
void CudaImageCoordinates::Write(const std::string& fname,
unsigned int n) const EddyTry
{
NEWMAT::Matrix coord = AsMatrix();
if (n && n<Size()) MISCMATHS::write_ascii_matrix(fname,coord.Rows(1,n));
else MISCMATHS::write_ascii_matrix(fname,coord);
} EddyCatch
void CudaImageCoordinates::init_coord() EddyTry
{
int tpb = threads_per_block;
unsigned int nthreads = Size();
int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb;
EddyKernels::make_coordinates<<<nblocks,tpb>>>(_xn,_yn,_zn,XPtr(),YPtr(),ZPtr(),nthreads);
EddyCudaHelperFunctions::CudaSync("EddyKernels::make_coordinates");
_init = true;
return;
} EddyCatch
thrust::device_vector<float> CudaImageCoordinates::repack_matrix(const NEWMAT::Matrix& A) EddyTry
{
thrust::host_vector<float> hA(12);
hA[0] = A(1,1); hA[1] = A(1,2); hA[2] = A(1,3); hA[3] = A(1,4);
hA[4] = A(2,1); hA[5] = A(2,2); hA[6] = A(2,3); hA[7] = A(2,4);
hA[8] = A(3,1); hA[9] = A(3,2); hA[10] = A(3,3); hA[11] = A(3,4);
return(hA); // This is where the transfer occurs
} EddyCatch
thrust::device_vector<float> CudaImageCoordinates::repack_vector_of_matrices(const std::vector<NEWMAT::Matrix>& A) EddyTry
{
thrust::host_vector<float> hA(12*A.size());
for (unsigned int i=0; i<A.size(); i++) {
unsigned int offs = 12*i;
hA[offs] = A[i](1,1); hA[offs+1] = A[i](1,2); hA[offs+2] = A[i](1,3); hA[offs+3] = A[i](1,4);
hA[offs+4] = A[i](2,1); hA[offs+5] = A[i](2,2); hA[offs+6] = A[i](2,3); hA[offs+7] = A[i](2,4);
hA[offs+8] = A[i](3,1); hA[offs+9] = A[i](3,2); hA[offs+10] = A[i](3,3); hA[offs+11] = A[i](3,4);
}
return(hA); // This is where the transfer occurs
} EddyCatch
/////////////////////////////////////////////////////////////////////
///
/// \file CudaVolume.h
/// \brief Declarations of class intended to mimic some of the functionality of Newimage, but on the GPU.
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef CudaVolume_h
#define CudaVolume_h
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <hip/hip_runtime.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#pragma pop
#include "EddyHelperClasses.h"
#include "EddyKernels.h"
namespace EDDY {
class CudaImageCoordinates;
class CudaVolume4D;
class CudaVolume3D_2_4D_Helper;
class CudaVolume;
/****************************************************************//**
*
* \brief Helper class that implements some tasks common to
* CudaVolume and CudaVolume4D.
*
********************************************************************/
class cuda_volume_utils
{
private:
friend class CudaVolume;
friend class CudaVolume4D;
static const int threads_per_block_convolve_1D = 128;
static float sqr(float a) { return(a*a); }
static thrust::host_vector<float> gaussian_1D_kernel(float fwhm); // fwhm in voxels
static void smooth(float fwhm, // fwhm in mm
const std::vector<unsigned int>& sz,
const NEWIMAGE::volume<float>& hdr,
float *imaptr);
static void divide_within_mask(const thrust::device_vector<float>& divisor,
const thrust::device_vector<float>& mask,
thrust::device_vector<float>::iterator imbegin,
thrust::device_vector<float>::iterator imend);
static bool same_dim_size(const std::vector<int>& sz1,
const std::vector<float>& vxs1,
const std::vector<int>& sz2,
const std::vector<float>& vxs2);
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Helper class for transfering NEWIMAGE volumes to and from
/// a CUDA device.
///
/////////////////////////////////////////////////////////////////////
class CudaVolume
{
public:
/// Default constructor.
CudaVolume() EddyTry : _spv(false), _sz(3,0) {} EddyCatch
/// Construct a CudaVolume from another. ifcv determines if data or only the header is copied.
CudaVolume(const CudaVolume& cv, bool ifcv=true) EddyTry : _spv(false), _hdr(cv._hdr), _sz(cv._sz) {
if (ifcv) {_devec=cv._devec; _spcoef=cv._spcoef; _spv=cv._spv; } else _devec.resize(cv.Size());
} EddyCatch
/// Construct a CudaVolume from vol. ifvol determines if data or only the header is copied.
CudaVolume(const NEWIMAGE::volume<float>& vol, bool ifvol=true) EddyTry : _spv(false), _sz(3,0) {
common_assignment_from_newimage_vol(vol,ifvol);
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume& cv) EddyTry {
if (this != &cv) { _spv=false; _sz=cv._sz; _hdr=cv._hdr; _devec.resize(cv.Size()); _spcoef.clear(); }
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume4D& cv);
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const NEWIMAGE::volume<float>& vol) EddyTry {
common_assignment_from_newimage_vol(vol,false);
} EddyCatch
/// Sets header and GPU data to that in cv
CudaVolume& operator=(const CudaVolume& cv) EddyTry {
if (this != &cv) { _sz=cv._sz; _hdr=cv._hdr; _devec=cv._devec; _spcoef=cv._spcoef; _spv=cv._spv; } return(*this);
} EddyCatch
/// Sets header and GPU data to that in vol
CudaVolume& operator=(const NEWIMAGE::volume<float>& vol) EddyTry {
common_assignment_from_newimage_vol(vol,true); return(*this);
} EddyCatch
/// Samples volume at points given by coord and returns it in smpl
void Sample(const EDDY::CudaImageCoordinates& coord, CudaVolume& smpl) const;
/// Samples volume at points given by coord and returns it in smpl with derivs in dsmpl
void Sample(const EDDY::CudaImageCoordinates& coord, CudaVolume& smpl, CudaVolume4D& dsmpl) const;
/// Check which coordinates falls outside FOV and for which extrapolation isn't valid
void ValidMask(const EDDY::CudaImageCoordinates& coord, CudaVolume& mask) const;
/// Do an interpolation in z given columns with varying z-ccordinates
void ResampleStack(const CudaVolume& zcoord, const CudaVolume& inmask, CudaVolume oima) const;
/// Adds GPU data in rhs to *this
CudaVolume& operator+=(const CudaVolume& rhs);
/// Subtracts GPU data in rhs from *this
CudaVolume& operator-=(const CudaVolume& rhs);
/// Multiplies GPU data in *this with rhs
CudaVolume& operator*=(const CudaVolume& rhs);
/// Divides GPU data in *this with scalar
CudaVolume& operator/=(float a);
/// + operator
const CudaVolume operator+(const CudaVolume& rhs) const EddyTry { return(CudaVolume(*this) += rhs); } EddyCatch
/// - operator
const CudaVolume operator-(const CudaVolume& rhs) const EddyTry { return(CudaVolume(*this) -= rhs); } EddyCatch
/// * operator
const CudaVolume operator*(const CudaVolume& rhs) const EddyTry { return(CudaVolume(*this) *= rhs); } EddyCatch
/// /scalar operator
const CudaVolume operator/(float a) const EddyTry { return(CudaVolume(*this) /= a); } EddyCatch
/// Smooths to requested FWHM
void Smooth(float fwhm) EddyTry { cuda_volume_utils::smooth(fwhm,_sz,_hdr,this->GetPtr()); if (_spv) { _spcoef.clear(); _spv=false; } } EddyCatch
/// Smooths to requested FWHM within mask
void Smooth(float fwhm, const CudaVolume& mask);
/// Performs += a*pv;
void MultiplyAndAddToMe(const CudaVolume& pv, float a);
/// Performs += a * (pv - nv);
void SubtractMultiplyAndAddToMe(const CudaVolume& pv, const CudaVolume& nv, float a);
/// Performs += pow(pv-nv,2);
void SubtractSquareAndAddToMe(const CudaVolume& pv, const CudaVolume& nv);
/// Divides one image with another for all voxels within mask
void DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask);
/// Set all voxels greater than val to one and the rest to zero
CudaVolume& Binarise(float tv);
/// Set all voxels > ll and < ul to one and the rest to zero
CudaVolume& Binarise(float ll, float ul);
/// Make volume with N(mu,sigma) distributed noise
CudaVolume& MakeNormRand(float mu, float sigma);
/// Returns sum of all voxel values inside mask
double Sum(const CudaVolume& mask) const;
/// Returns sum of all voxel values
double Sum() const EddyTry { CudaVolume skrutt; return(Sum(skrutt)); } EddyCatch
/// Returns sum-of-squares of all voxel values inside mask
double SumOfSquares(const CudaVolume& mask) const;
/// Returns sum-of-squares of all voxel values
double SumOfSquares() const EddyTry { CudaVolume skrutt; return(SumOfSquares(skrutt)); } EddyCatch
/// Returns max of all voxel values inside mask
double Max(const CudaVolume& mask) const;
/// Returns max of all voxel values
double Max() const EddyTry { CudaVolume skrutt; return(Max(skrutt)); } EddyCatch
/// Returns max of the absolute values all voxel values inside mask
double MaxAbs(const CudaVolume& mask) const;
/// Returns max of the absolute values all voxel values
double MaxAbs() const EddyTry { CudaVolume skrutt; return(MaxAbs(skrutt)); } EddyCatch
/// Assigns val to all voxels
CudaVolume& operator=(float val);
/// Returns true if basic image dimensions are the same. Does NOT consider the data.
bool operator==(const CudaVolume& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data.
bool operator==(const NEWIMAGE::volume<float>& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const NEWIMAGE::volume<float>& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data.
bool operator==(const CudaVolume4D& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume4D& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Writes some useful debug info to the screen. N.B. not a member function
friend std::ostream& operator<<(std::ostream& out, const CudaVolume& cv) EddyTry {
out << "Matrix size: " << cv._sz[0] << ", " << cv._sz[1] << ", " << cv._sz[2] << std::endl;
out << "Voxel size: " << cv._hdr.xdim() << "mm, " << cv._hdr.ydim() << "mm, " << cv._hdr.zdim() << "mm" << std::endl;
out << "_devec.size() = " << cv._devec.size() << ", _spv = " << cv._spv << ", _spcoef.size() = " << cv._spcoef.size();
return(out);
} EddyCatch
/// Returns a pointer to the memory on the GPU
float *GetPtr() EddyTry { _spv=false; return((Size()) ? thrust::raw_pointer_cast(_devec.data()) : 0); } EddyCatch
/// Returns a const pointer to the memory on the GPU
const float *GetPtr() const EddyTry { return((Size()) ? thrust::raw_pointer_cast(_devec.data()) : 0); } EddyCatch
/// Returns an iterator to the start of the memory on the GPU
thrust::device_vector<float>::iterator Begin() { _spv=false; return(_devec.begin()); }
/// Returns an iterator to the end of the memory on the GPU
thrust::device_vector<float>::iterator End() { _spv=false; return(_devec.end()); }
/// Returns a const iterator to the start of the memory on the GPU
thrust::device_vector<float>::const_iterator Begin() const { return(_devec.begin()); }
/// Returns a const iterator to the end of the memory on the GPU
thrust::device_vector<float>::const_iterator End() const { return(_devec.end()); }
/// Returns the total size of the volume
unsigned int Size() const { return(_sz[0]*_sz[1]*_sz[2]); }
/// Returns the matrix size in direction indx, indx=0,1,2.
unsigned int Size(unsigned int indx) const;
/// Returns the voxel size (mm) in direction indx, indx=0,1,2.
float Vxs(unsigned int indx) const;
/// Returns image-to-world mapping matrix
NEWMAT::Matrix Ima2WorldMatrix() const; // { return(_hdr.sampling_mat()); } Actual definition in .cu
/// Returns image-to-world mapping matrix
NEWMAT::Matrix World2ImaMatrix() const; // { return(_hdr.sampling_mat().i()); } Actual definition in .cu
/// Returns interpolation method.
NEWIMAGE::interpolation Interp() const EddyTry { return(_hdr.getinterpolationmethod()); } EddyCatch
/// Returnd extrapolation method.
NEWIMAGE::extrapolation Extrap() const EddyTry { return(_hdr.getextrapolationmethod()); } EddyCatch
/// Returns a vector indicating in which directions extrapolation is valid (e.g. periodic in the PE direction).
std::vector<bool> ExtrapValid() const EddyTry { return(_hdr.getextrapolationvalidity()); } EddyCatch
/// Sets interpolation method
void SetInterp(NEWIMAGE::interpolation im) EddyTry { _hdr.setinterpolationmethod(im); } EddyCatch
/// Sets extrapolation method
void SetExtrap(NEWIMAGE::extrapolation im) EddyTry { _hdr.setextrapolationmethod(im); } EddyCatch
/// Copies the data from GPU into provided volume.
void GetVolume(NEWIMAGE::volume<float>& ovol) const;
/// Copies the data from GPU into returned volume.
NEWIMAGE::volume<float> GetVolume() const EddyTry { NEWIMAGE::volume<float> ovol; GetVolume(ovol); return(ovol); } EddyCatch
/// Writes image to disc
void Write(const std::string& fname) const EddyTry { NEWIMAGE::write_volume(GetVolume(),fname); } EddyCatch
/// Copies the spline coefficients from GPU into provided volume.
void GetSplineCoefs(NEWIMAGE::volume<float>& ovol) const;
/// Copies the spline coefficients from GPU into returned volume.
NEWIMAGE::volume<float> GetSplineCoefs() const EddyTry { NEWIMAGE::volume<float> ovol; GetSplineCoefs(ovol); return(ovol); } EddyCatch
/// Writes spline coefficients to disc
void WriteSplineCoefs(const std::string& fname) const EddyTry { NEWIMAGE::write_volume(GetSplineCoefs(),fname); } EddyCatch
friend class CudaVolume4D; // To allow CudaVolume4D access to private members.
friend class CudaVolume3D_2_4D_Helper; // To allow CudaVolume3D_2_4D_Helper to access private members
private:
static const int threads_per_block_interpolate = 128;
static const int threads_per_block_deconv = 128;
static const int threads_per_block_smaatm = 128;
static const int threads_per_block_ssaatm = 128;
thrust::device_vector<float> _devec;
mutable thrust::device_vector<float> _spcoef; // Spline coefficients for 3D deconv
mutable bool _spv; // True if spcoef valid
NEWIMAGE::volume<float> _hdr;
std::vector<unsigned int> _sz;
void common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
bool ifvol);
const float *sp_ptr() const EddyTry { return(thrust::raw_pointer_cast(_spcoef.data())); } EddyCatch
void calculate_spline_coefs(const std::vector<unsigned int>& sz,
const thrust::device_vector<float>& ima,
thrust::device_vector<float>& coef) const;
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Helper class for transfering NEWIMAGE 4D volumes to and
/// from a CUDA device.
///
/////////////////////////////////////////////////////////////////////
class CudaVolume4D
{
public:
CudaVolume4D() EddyTry : _sz(4,0) {} EddyCatch
CudaVolume4D(const CudaVolume4D& cv, bool ifcv=true) EddyTry : _sz(cv._sz), _hdr(cv._hdr) {
if (ifcv) _devec = cv._devec;
else _devec.resize(cv._devec.size());
} EddyCatch
CudaVolume4D(const CudaVolume& cv, unsigned int nv, bool ifcv=true) EddyTry : _sz(4,0), _hdr(cv._hdr) {
_sz[0]=cv._sz[0]; _sz[1]=cv._sz[1]; _sz[2]=cv._sz[2]; _sz[3]=nv;
_devec.resize(_sz[3]*cv._devec.size());
if (ifcv) for (int i=0; i<_sz[3]; i++) thrust::copy(cv._devec.begin(),cv._devec.end(),this->volbegin(i));
} EddyCatch
CudaVolume4D(const NEWIMAGE::volume<float>& vol, bool ifvol=true) EddyTry : _sz(4,0) {
common_assignment_from_newimage_vol(vol,0,ifvol);
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume4D& cv) EddyTry {
if (this != &cv) { _sz=cv._sz; _hdr=cv._hdr; _devec.resize(cv._devec.size()); }
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume& cv, unsigned int nv) EddyTry {
_sz[0]=cv._sz[0]; _sz[0]=cv._sz[1]; _sz[2]=cv._sz[0]; _sz[3]=nv; _hdr=cv._hdr; _devec.resize(nv*cv._devec.size());
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const NEWIMAGE::volume<float>& vol, unsigned int tsize_override=0) EddyTry {
common_assignment_from_newimage_vol(vol,tsize_override,false);
} EddyCatch
/// Sets header and GPU data to that in cv
CudaVolume4D& operator=(const CudaVolume4D& cv) EddyTry {
if (this != &cv) { _sz=cv._sz; _hdr=cv._hdr; _devec=cv._devec; } return(*this);
} EddyCatch
/// Sets header and GPU data to that in vol
CudaVolume4D& operator=(const NEWIMAGE::volume<float>& vol) EddyTry {
common_assignment_from_newimage_vol(vol,0,true); return(*this);
} EddyCatch
/// Allows for assignments of type FourD[i] = ThreeD;
CudaVolume3D_2_4D_Helper operator[](unsigned int indx);
/// Assigns a 3D CudaVolume to a "slot" in the 4D volume
void SetVolume(unsigned int i, const CudaVolume& vol);
/// Adds GPU data in cv to *this
CudaVolume4D& operator+=(const CudaVolume4D& cv);
/// Subtracts GPU data in cv from *this
CudaVolume4D& operator-=(const CudaVolume4D& cv);
/// Multiply cv with this. It is a volume-by-volume elementwise multiplication.
CudaVolume4D& operator*=(const CudaVolume4D& cv);
/// Multiplies (masks) 4D data with 3D volume
CudaVolume4D& operator*=(const CudaVolume& cv);
/// * operator
const CudaVolume4D operator*(const CudaVolume4D& rhs) const EddyTry { return(CudaVolume4D(*this) *= rhs); } EddyCatch
/// - operator
const CudaVolume4D operator-(const CudaVolume4D& rhs) const EddyTry { return(CudaVolume4D(*this) -= rhs); } EddyCatch
/// Sums all volumes, collapsing it into a 3D volume
const CudaVolume SumAlongFourthDim() const;
/// Converts a set of (delta) coordinates to a displacement field in mm
void CoordinatesToDisplacementField(const CudaImageCoordinates& coord);
/// Divides all volumes with another (3D volume) for all voxels within mask (3D).
void DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask);
/// Smooths (3D) to requested FWHM
void Smooth(float fwhm) EddyTry { for (unsigned int i=0; i<_sz[3]; i++) cuda_volume_utils::smooth(fwhm,_sz,_hdr,this->GetPtr(i)); } EddyCatch
/// Smooths (3D) to requested FWHM within mask
void Smooth(float fwhm, const CudaVolume& mask);
/// Assigns val to all voxels
CudaVolume4D& operator=(float val);
/// Returns true if basic image dimensions are the same. Does NOT consider the data or the fourth dimension.
bool operator==(const CudaVolume4D& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume4D& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data or the fourth dimension.
bool operator==(const CudaVolume& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data or the fourth dimension.
bool operator==(const NEWIMAGE::volume<float>& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const NEWIMAGE::volume<float>& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns a pointer to the memory on the GPU
float *GetPtr() EddyTry { return(thrust::raw_pointer_cast(_devec.data())); } EddyCatch
/// Returns a const pointer to the memory for a specific volume on the GPU
const float *GetPtr() const EddyTry { return(thrust::raw_pointer_cast(_devec.data())); } EddyCatch
/// Returns a pointer to the memory for a specific volume on the GPU
float *GetPtr(unsigned int i) EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::GetPtr: index out of range");
return(thrust::raw_pointer_cast(_devec.data())+i*this->Size());
} EddyCatch
/// Returns a const pointer to the memory for a specific volume on the GPU
const float *GetPtr(unsigned int i) const EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::GetPtr: index out of range");
return(thrust::raw_pointer_cast(_devec.data())+i*this->Size());
} EddyCatch
/// Returns an iterator to the start of the memory on the GPU
thrust::device_vector<float>::iterator Begin(unsigned int i) EddyTry { return(this->volbegin(i)); } EddyCatch
/// Returns an iterator to the end of the memory on the GPU
thrust::device_vector<float>::iterator End(unsigned int i) EddyTry { return(this->volend(i)); } EddyCatch
/// Returns a const iterator to the start of the memory on the GPU
thrust::device_vector<float>::const_iterator Begin(unsigned int i) const EddyTry { return(this->volbegin(i)); } EddyCatch
/// Returns a const iterator to the end of the memory on the GPU
thrust::device_vector<float>::const_iterator End(unsigned int i) const EddyTry { return(this->volend(i)); } EddyCatch
/// Returns the total size of one volume
unsigned int Size() const { return(_sz[0]*_sz[1]*_sz[2]); }
/// Returns the matrix size in direction indx, indx=0,1,2,3.
unsigned int Size(unsigned int indx) const;
/// Returns the voxel size (mm) in direction indx, indx=0,1,2.
float Vxs(unsigned int indx) const;
/// Returns image-to-world mapping matrix
NEWMAT::Matrix Ima2WorldMatrix() const; // { return(_hdr.sampling_mat()); } Actual definition in .cu
/// Returns image-to-world mapping matrix
NEWMAT::Matrix World2ImaMatrix() const; // { return(_hdr.sampling_mat().i()); } Actual definition in .cu
/// Return information about inter/extrapolation
NEWIMAGE::interpolation Interp() const EddyTry { return(_hdr.getinterpolationmethod()); } EddyCatch
NEWIMAGE::extrapolation Extrap() const EddyTry { return(_hdr.getextrapolationmethod()); } EddyCatch
std::vector<bool> ExtrapValid() const EddyTry { return(_hdr.getextrapolationvalidity()); } EddyCatch
/// Sets interpolation method
void SetInterp(NEWIMAGE::interpolation im) EddyTry { _hdr.setinterpolationmethod(im); } EddyCatch
/// Caclulates first derivative in direction i of volume i sampled trilinearly at voxel centres
void SampleTrilinearDerivOnVoxelCentres(unsigned int dir, const CudaVolume& mask, CudaVolume& deriv, bool add_one=true) const;
/// Copies the data from GPU into returned 4D volume.
NEWIMAGE::volume4D<float> GetVolume() const EddyTry { NEWIMAGE::volume4D<float> ovol; GetVolume(ovol); return(ovol); } EddyCatch
/// Copies the data from GPU into returned 4D volume.
void GetVolume(NEWIMAGE::volume4D<float>& ovol) const;
/// Copies the data from index'th volume into returned 3D volume
NEWIMAGE::volume<float> GetVolume(unsigned int indx) const EddyTry { NEWIMAGE::volume<float> ovol; GetVolume(indx,ovol); return(ovol); } EddyCatch
/// Copies the data from index'th volume into returned 3D volume
void GetVolume(unsigned int indx, NEWIMAGE::volume<float>& ovol) const;
/// Writes 4D volume to disc.
void Write(const std::string& fname) const EddyTry { NEWIMAGE::write_volume4D(GetVolume(),fname); } EddyCatch
/// Writes 3D volume to disc.
void Write(unsigned int indx, const std::string& fname) const EddyTry { NEWIMAGE::write_volume(GetVolume(indx),fname); } EddyCatch
friend class CudaVolume; // To allow CudaVolume to access private members
friend class CudaVolume3D_2_4D_Helper; // To allow CudaVolume3D_2_4D_Helper to access private members
private:
static const int threads_per_block = 128;
std::vector<unsigned int> _sz;
NEWIMAGE::volume<float> _hdr;
thrust::device_vector<float> _devec;
void common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
unsigned int tsize_override,
bool ifvol);
thrust::device_vector<float>::iterator volbegin(unsigned int i) EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::volbegin: index out of range");
return(_devec.begin()+i*this->Size());
} EddyCatch
thrust::device_vector<float>::const_iterator volbegin(unsigned int i) const EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::volbegin:const: index out of range");
return(_devec.begin()+i*this->Size());
} EddyCatch
thrust::device_vector<float>::iterator volend(unsigned int i) EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::volend: index out of range");
if (i<_sz[3]-1) return(_devec.begin()+(i+1)*this->Size());
else return(_devec.end());
} EddyCatch
thrust::device_vector<float>::const_iterator volend(unsigned int i) const EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::End:const: index out of range");
if (i<_sz[3]-1) return(_devec.begin()+(i+1)*this->Size());
else return(_devec.end());
} EddyCatch
};
/****************************************************************//**
*
* \brief Tiny helper class whos only purpose is to allow for
* skrutt[i] = plutt;
* where skrutt is of type CudaVolume4D and plutt of type CudaVolume.
*
********************************************************************/
class CudaVolume3D_2_4D_Helper
{
public:
void operator=(const CudaVolume& threed);
friend class CudaVolume4D; // To allow CudaVolume4D to access private members
private:
CudaVolume3D_2_4D_Helper(CudaVolume4D& fourd, unsigned int indx) EddyTry : _fourd(fourd), _indx(indx) {} EddyCatch // N.B. Private
CudaVolume4D& _fourd;
unsigned int _indx;
};
/****************************************************************//**
*
* \brief Helper class that manages a set of image coordinates in a way that
* it enables calculation/implementation of partial derivatives of
* images w.r.t. transformation parameters.
*
********************************************************************/
class CudaImageCoordinates
{
public:
CudaImageCoordinates() EddyTry : _xn(0), _yn(0), _zn(0), _init(false) {} EddyCatch
CudaImageCoordinates(unsigned int xn, unsigned int yn, unsigned int zn, bool init=false) EddyTry
: _xn(xn), _yn(yn), _zn(zn), _x(xn*yn*zn), _y(xn*yn*zn), _z(xn*yn*zn), _init(init) { if (init) init_coord(); } EddyCatch
void Resize(unsigned int xn, unsigned int yn, unsigned int zn, bool init=false) EddyTry {
_xn=xn; _yn=yn; _zn=zn;
_x.resize(xn*yn*zn); _y.resize(xn*yn*zn); _y.resize(xn*yn*zn); _init=false;
if (init) init_coord();
} EddyCatch
/// Affine transform
void Transform(const NEWMAT::Matrix& A);
/// Slice-wise affine transform
void Transform(const std::vector<NEWMAT::Matrix>& A);
/// General transform
void Transform(const NEWMAT::Matrix& A, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& B);
/// Slice-wise general transform
void Transform(const std::vector<NEWMAT::Matrix>& A, const EDDY::CudaVolume4D& dfield, const std::vector<NEWMAT::Matrix>& B);
/// Calculate x, y and z-cordinates for slice-to-vol (the tricky direction) transform
void GetSliceToVolXYZCoord(const NEWMAT::Matrix& M1, const std::vector<NEWMAT::Matrix>& R, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& M2, EDDY::CudaVolume& zcoord);
/// Calculate z-cordinates for slice-to-vol (the tricky direction) transform
void GetSliceToVolZCoord(const NEWMAT::Matrix& M1, const std::vector<NEWMAT::Matrix>& R, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& M2);
unsigned int Size() const { return(_xn*_yn*_zn); }
unsigned int Size(unsigned int indx) const EddyTry {
if (indx>2) throw EddyException("CudaImageCoordinates::Size: Index out of range.");
return((!indx) ? _xn : ((indx==1) ? _yn : _zn));
} EddyCatch
CudaImageCoordinates& operator-=(const CudaImageCoordinates& rhs);
/// Returns a raw pointer to the start of the memory on the GPU
const float *XPtr() const EddyTry { return(thrust::raw_pointer_cast(_x.data())); } EddyCatch
const float *YPtr() const EddyTry { return(thrust::raw_pointer_cast(_y.data())); } EddyCatch
const float *ZPtr() const EddyTry { return(thrust::raw_pointer_cast(_z.data())); } EddyCatch
/// Returns coordinates as nx3 matrix. For debugging only.
NEWMAT::Matrix AsMatrix() const;
/// Writes list of coordinates to text-file
void Write(const std::string& fname, unsigned int n=0) const;
private:
float *XPtr() EddyTry { return(thrust::raw_pointer_cast(_x.data())); } EddyCatch
float *YPtr() EddyTry { return(thrust::raw_pointer_cast(_y.data())); } EddyCatch
float *ZPtr() EddyTry { return(thrust::raw_pointer_cast(_z.data())); } EddyCatch
static const int threads_per_block = 128;
unsigned int _xn;
unsigned int _yn;
unsigned int _zn;
thrust::device_vector<float> _x;
thrust::device_vector<float> _y;
thrust::device_vector<float> _z;
bool _init;
void init_coord();
thrust::device_vector<float> repack_matrix(const NEWMAT::Matrix& A);
thrust::device_vector<float> repack_vector_of_matrices(const std::vector<NEWMAT::Matrix>& A);
};
} // End namespace EDDY
#ifdef I_CUDAVOLUME_H_DEFINED_ET
#undef I_CUDAVOLUME_H_DEFINED_ET
#undef EXPOSE_TREACHEROUS // Avoid exporting dodgy routines
#endif
#endif // End #ifndef CudaVolume_h
/////////////////////////////////////////////////////////////////////
///
/// \file CudaVolume.h
/// \brief Declarations of class intended to mimic some of the functionality of Newimage, but on the GPU.
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef CudaVolume_h
#define CudaVolume_h
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <cuda.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#pragma pop
#include "EddyHelperClasses.h"
#include "EddyKernels.h"
namespace EDDY {
class CudaImageCoordinates;
class CudaVolume4D;
class CudaVolume3D_2_4D_Helper;
class CudaVolume;
/****************************************************************//**
*
* \brief Helper class that implements some tasks common to
* CudaVolume and CudaVolume4D.
*
********************************************************************/
class cuda_volume_utils
{
private:
friend class CudaVolume;
friend class CudaVolume4D;
static const int threads_per_block_convolve_1D = 128;
static float sqr(float a) { return(a*a); }
static thrust::host_vector<float> gaussian_1D_kernel(float fwhm); // fwhm in voxels
static void smooth(float fwhm, // fwhm in mm
const std::vector<unsigned int>& sz,
const NEWIMAGE::volume<float>& hdr,
float *imaptr);
static void divide_within_mask(const thrust::device_vector<float>& divisor,
const thrust::device_vector<float>& mask,
thrust::device_vector<float>::iterator imbegin,
thrust::device_vector<float>::iterator imend);
static bool same_dim_size(const std::vector<int>& sz1,
const std::vector<float>& vxs1,
const std::vector<int>& sz2,
const std::vector<float>& vxs2);
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Helper class for transfering NEWIMAGE volumes to and from
/// a CUDA device.
///
/////////////////////////////////////////////////////////////////////
class CudaVolume
{
public:
/// Default constructor.
CudaVolume() EddyTry : _spv(false), _sz(3,0) {} EddyCatch
/// Construct a CudaVolume from another. ifcv determines if data or only the header is copied.
CudaVolume(const CudaVolume& cv, bool ifcv=true) EddyTry : _spv(false), _hdr(cv._hdr), _sz(cv._sz) {
if (ifcv) {_devec=cv._devec; _spcoef=cv._spcoef; _spv=cv._spv; } else _devec.resize(cv.Size());
} EddyCatch
/// Construct a CudaVolume from vol. ifvol determines if data or only the header is copied.
CudaVolume(const NEWIMAGE::volume<float>& vol, bool ifvol=true) EddyTry : _spv(false), _sz(3,0) {
common_assignment_from_newimage_vol(vol,ifvol);
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume& cv) EddyTry {
if (this != &cv) { _spv=false; _sz=cv._sz; _hdr=cv._hdr; _devec.resize(cv.Size()); _spcoef.clear(); }
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume4D& cv);
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const NEWIMAGE::volume<float>& vol) EddyTry {
common_assignment_from_newimage_vol(vol,false);
} EddyCatch
/// Sets header and GPU data to that in cv
CudaVolume& operator=(const CudaVolume& cv) EddyTry {
if (this != &cv) { _sz=cv._sz; _hdr=cv._hdr; _devec=cv._devec; _spcoef=cv._spcoef; _spv=cv._spv; } return(*this);
} EddyCatch
/// Sets header and GPU data to that in vol
CudaVolume& operator=(const NEWIMAGE::volume<float>& vol) EddyTry {
common_assignment_from_newimage_vol(vol,true); return(*this);
} EddyCatch
/// Samples volume at points given by coord and returns it in smpl
void Sample(const EDDY::CudaImageCoordinates& coord, CudaVolume& smpl) const;
/// Samples volume at points given by coord and returns it in smpl with derivs in dsmpl
void Sample(const EDDY::CudaImageCoordinates& coord, CudaVolume& smpl, CudaVolume4D& dsmpl) const;
/// Check which coordinates falls outside FOV and for which extrapolation isn't valid
void ValidMask(const EDDY::CudaImageCoordinates& coord, CudaVolume& mask) const;
/// Do an interpolation in z given columns with varying z-ccordinates
void ResampleStack(const CudaVolume& zcoord, const CudaVolume& inmask, CudaVolume oima) const;
/// Adds GPU data in rhs to *this
CudaVolume& operator+=(const CudaVolume& rhs);
/// Subtracts GPU data in rhs from *this
CudaVolume& operator-=(const CudaVolume& rhs);
/// Multiplies GPU data in *this with rhs
CudaVolume& operator*=(const CudaVolume& rhs);
/// Divides GPU data in *this with scalar
CudaVolume& operator/=(float a);
/// + operator
const CudaVolume operator+(const CudaVolume& rhs) const EddyTry { return(CudaVolume(*this) += rhs); } EddyCatch
/// - operator
const CudaVolume operator-(const CudaVolume& rhs) const EddyTry { return(CudaVolume(*this) -= rhs); } EddyCatch
/// * operator
const CudaVolume operator*(const CudaVolume& rhs) const EddyTry { return(CudaVolume(*this) *= rhs); } EddyCatch
/// /scalar operator
const CudaVolume operator/(float a) const EddyTry { return(CudaVolume(*this) /= a); } EddyCatch
/// Smooths to requested FWHM
void Smooth(float fwhm) EddyTry { cuda_volume_utils::smooth(fwhm,_sz,_hdr,this->GetPtr()); if (_spv) { _spcoef.clear(); _spv=false; } } EddyCatch
/// Smooths to requested FWHM within mask
void Smooth(float fwhm, const CudaVolume& mask);
/// Performs += a*pv;
void MultiplyAndAddToMe(const CudaVolume& pv, float a);
/// Performs += a * (pv - nv);
void SubtractMultiplyAndAddToMe(const CudaVolume& pv, const CudaVolume& nv, float a);
/// Performs += pow(pv-nv,2);
void SubtractSquareAndAddToMe(const CudaVolume& pv, const CudaVolume& nv);
/// Divides one image with another for all voxels within mask
void DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask);
/// Set all voxels greater than val to one and the rest to zero
CudaVolume& Binarise(float tv);
/// Set all voxels > ll and < ul to one and the rest to zero
CudaVolume& Binarise(float ll, float ul);
/// Make volume with N(mu,sigma) distributed noise
CudaVolume& MakeNormRand(float mu, float sigma);
/// Returns sum of all voxel values inside mask
double Sum(const CudaVolume& mask) const;
/// Returns sum of all voxel values
double Sum() const EddyTry { CudaVolume skrutt; return(Sum(skrutt)); } EddyCatch
/// Returns sum-of-squares of all voxel values inside mask
double SumOfSquares(const CudaVolume& mask) const;
/// Returns sum-of-squares of all voxel values
double SumOfSquares() const EddyTry { CudaVolume skrutt; return(SumOfSquares(skrutt)); } EddyCatch
/// Returns max of all voxel values inside mask
double Max(const CudaVolume& mask) const;
/// Returns max of all voxel values
double Max() const EddyTry { CudaVolume skrutt; return(Max(skrutt)); } EddyCatch
/// Returns max of the absolute values all voxel values inside mask
double MaxAbs(const CudaVolume& mask) const;
/// Returns max of the absolute values all voxel values
double MaxAbs() const EddyTry { CudaVolume skrutt; return(MaxAbs(skrutt)); } EddyCatch
/// Assigns val to all voxels
CudaVolume& operator=(float val);
/// Returns true if basic image dimensions are the same. Does NOT consider the data.
bool operator==(const CudaVolume& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data.
bool operator==(const NEWIMAGE::volume<float>& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const NEWIMAGE::volume<float>& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data.
bool operator==(const CudaVolume4D& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume4D& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Writes some useful debug info to the screen. N.B. not a member function
friend std::ostream& operator<<(std::ostream& out, const CudaVolume& cv) EddyTry {
out << "Matrix size: " << cv._sz[0] << ", " << cv._sz[1] << ", " << cv._sz[2] << std::endl;
out << "Voxel size: " << cv._hdr.xdim() << "mm, " << cv._hdr.ydim() << "mm, " << cv._hdr.zdim() << "mm" << std::endl;
out << "_devec.size() = " << cv._devec.size() << ", _spv = " << cv._spv << ", _spcoef.size() = " << cv._spcoef.size();
return(out);
} EddyCatch
/// Returns a pointer to the memory on the GPU
float *GetPtr() EddyTry { _spv=false; return((Size()) ? thrust::raw_pointer_cast(_devec.data()) : 0); } EddyCatch
/// Returns a const pointer to the memory on the GPU
const float *GetPtr() const EddyTry { return((Size()) ? thrust::raw_pointer_cast(_devec.data()) : 0); } EddyCatch
/// Returns an iterator to the start of the memory on the GPU
thrust::device_vector<float>::iterator Begin() { _spv=false; return(_devec.begin()); }
/// Returns an iterator to the end of the memory on the GPU
thrust::device_vector<float>::iterator End() { _spv=false; return(_devec.end()); }
/// Returns a const iterator to the start of the memory on the GPU
thrust::device_vector<float>::const_iterator Begin() const { return(_devec.begin()); }
/// Returns a const iterator to the end of the memory on the GPU
thrust::device_vector<float>::const_iterator End() const { return(_devec.end()); }
/// Returns the total size of the volume
unsigned int Size() const { return(_sz[0]*_sz[1]*_sz[2]); }
/// Returns the matrix size in direction indx, indx=0,1,2.
unsigned int Size(unsigned int indx) const;
/// Returns the voxel size (mm) in direction indx, indx=0,1,2.
float Vxs(unsigned int indx) const;
/// Returns image-to-world mapping matrix
NEWMAT::Matrix Ima2WorldMatrix() const; // { return(_hdr.sampling_mat()); } Actual definition in .cu
/// Returns image-to-world mapping matrix
NEWMAT::Matrix World2ImaMatrix() const; // { return(_hdr.sampling_mat().i()); } Actual definition in .cu
/// Returns interpolation method.
NEWIMAGE::interpolation Interp() const EddyTry { return(_hdr.getinterpolationmethod()); } EddyCatch
/// Returnd extrapolation method.
NEWIMAGE::extrapolation Extrap() const EddyTry { return(_hdr.getextrapolationmethod()); } EddyCatch
/// Returns a vector indicating in which directions extrapolation is valid (e.g. periodic in the PE direction).
std::vector<bool> ExtrapValid() const EddyTry { return(_hdr.getextrapolationvalidity()); } EddyCatch
/// Sets interpolation method
void SetInterp(NEWIMAGE::interpolation im) EddyTry { _hdr.setinterpolationmethod(im); } EddyCatch
/// Sets extrapolation method
void SetExtrap(NEWIMAGE::extrapolation im) EddyTry { _hdr.setextrapolationmethod(im); } EddyCatch
/// Copies the data from GPU into provided volume.
void GetVolume(NEWIMAGE::volume<float>& ovol) const;
/// Copies the data from GPU into returned volume.
NEWIMAGE::volume<float> GetVolume() const EddyTry { NEWIMAGE::volume<float> ovol; GetVolume(ovol); return(ovol); } EddyCatch
/// Writes image to disc
void Write(const std::string& fname) const EddyTry { NEWIMAGE::write_volume(GetVolume(),fname); } EddyCatch
/// Copies the spline coefficients from GPU into provided volume.
void GetSplineCoefs(NEWIMAGE::volume<float>& ovol) const;
/// Copies the spline coefficients from GPU into returned volume.
NEWIMAGE::volume<float> GetSplineCoefs() const EddyTry { NEWIMAGE::volume<float> ovol; GetSplineCoefs(ovol); return(ovol); } EddyCatch
/// Writes spline coefficients to disc
void WriteSplineCoefs(const std::string& fname) const EddyTry { NEWIMAGE::write_volume(GetSplineCoefs(),fname); } EddyCatch
friend class CudaVolume4D; // To allow CudaVolume4D access to private members.
friend class CudaVolume3D_2_4D_Helper; // To allow CudaVolume3D_2_4D_Helper to access private members
private:
static const int threads_per_block_interpolate = 128;
static const int threads_per_block_deconv = 128;
static const int threads_per_block_smaatm = 128;
static const int threads_per_block_ssaatm = 128;
thrust::device_vector<float> _devec;
mutable thrust::device_vector<float> _spcoef; // Spline coefficients for 3D deconv
mutable bool _spv; // True if spcoef valid
NEWIMAGE::volume<float> _hdr;
std::vector<unsigned int> _sz;
void common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
bool ifvol);
const float *sp_ptr() const EddyTry { return(thrust::raw_pointer_cast(_spcoef.data())); } EddyCatch
void calculate_spline_coefs(const std::vector<unsigned int>& sz,
const thrust::device_vector<float>& ima,
thrust::device_vector<float>& coef) const;
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Helper class for transfering NEWIMAGE 4D volumes to and
/// from a CUDA device.
///
/////////////////////////////////////////////////////////////////////
class CudaVolume4D
{
public:
CudaVolume4D() EddyTry : _sz(4,0) {} EddyCatch
CudaVolume4D(const CudaVolume4D& cv, bool ifcv=true) EddyTry : _sz(cv._sz), _hdr(cv._hdr) {
if (ifcv) _devec = cv._devec;
else _devec.resize(cv._devec.size());
} EddyCatch
CudaVolume4D(const CudaVolume& cv, unsigned int nv, bool ifcv=true) EddyTry : _sz(4,0), _hdr(cv._hdr) {
_sz[0]=cv._sz[0]; _sz[1]=cv._sz[1]; _sz[2]=cv._sz[2]; _sz[3]=nv;
_devec.resize(_sz[3]*cv._devec.size());
if (ifcv) for (int i=0; i<_sz[3]; i++) thrust::copy(cv._devec.begin(),cv._devec.end(),this->volbegin(i));
} EddyCatch
CudaVolume4D(const NEWIMAGE::volume<float>& vol, bool ifvol=true) EddyTry : _sz(4,0) {
common_assignment_from_newimage_vol(vol,0,ifvol);
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume4D& cv) EddyTry {
if (this != &cv) { _sz=cv._sz; _hdr=cv._hdr; _devec.resize(cv._devec.size()); }
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const CudaVolume& cv, unsigned int nv) EddyTry {
_sz[0]=cv._sz[0]; _sz[0]=cv._sz[1]; _sz[2]=cv._sz[0]; _sz[3]=nv; _hdr=cv._hdr; _devec.resize(nv*cv._devec.size());
} EddyCatch
/// Sets header and allocates memory on GPU. Does NOT copy any data to the GPU.
void SetHdr(const NEWIMAGE::volume<float>& vol, unsigned int tsize_override=0) EddyTry {
common_assignment_from_newimage_vol(vol,tsize_override,false);
} EddyCatch
/// Sets header and GPU data to that in cv
CudaVolume4D& operator=(const CudaVolume4D& cv) EddyTry {
if (this != &cv) { _sz=cv._sz; _hdr=cv._hdr; _devec=cv._devec; } return(*this);
} EddyCatch
/// Sets header and GPU data to that in vol
CudaVolume4D& operator=(const NEWIMAGE::volume<float>& vol) EddyTry {
common_assignment_from_newimage_vol(vol,0,true); return(*this);
} EddyCatch
/// Allows for assignments of type FourD[i] = ThreeD;
CudaVolume3D_2_4D_Helper operator[](unsigned int indx);
/// Assigns a 3D CudaVolume to a "slot" in the 4D volume
void SetVolume(unsigned int i, const CudaVolume& vol);
/// Adds GPU data in cv to *this
CudaVolume4D& operator+=(const CudaVolume4D& cv);
/// Subtracts GPU data in cv from *this
CudaVolume4D& operator-=(const CudaVolume4D& cv);
/// Multiply cv with this. It is a volume-by-volume elementwise multiplication.
CudaVolume4D& operator*=(const CudaVolume4D& cv);
/// Multiplies (masks) 4D data with 3D volume
CudaVolume4D& operator*=(const CudaVolume& cv);
/// * operator
const CudaVolume4D operator*(const CudaVolume4D& rhs) const EddyTry { return(CudaVolume4D(*this) *= rhs); } EddyCatch
/// - operator
const CudaVolume4D operator-(const CudaVolume4D& rhs) const EddyTry { return(CudaVolume4D(*this) -= rhs); } EddyCatch
/// Sums all volumes, collapsing it into a 3D volume
const CudaVolume SumAlongFourthDim() const;
/// Converts a set of (delta) coordinates to a displacement field in mm
void CoordinatesToDisplacementField(const CudaImageCoordinates& coord);
/// Divides all volumes with another (3D volume) for all voxels within mask (3D).
void DivideWithinMask(const CudaVolume& divisor, const CudaVolume& mask);
/// Smooths (3D) to requested FWHM
void Smooth(float fwhm) EddyTry { for (unsigned int i=0; i<_sz[3]; i++) cuda_volume_utils::smooth(fwhm,_sz,_hdr,this->GetPtr(i)); } EddyCatch
/// Smooths (3D) to requested FWHM within mask
void Smooth(float fwhm, const CudaVolume& mask);
/// Assigns val to all voxels
CudaVolume4D& operator=(float val);
/// Returns true if basic image dimensions are the same. Does NOT consider the data or the fourth dimension.
bool operator==(const CudaVolume4D& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume4D& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data or the fourth dimension.
bool operator==(const CudaVolume& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const CudaVolume& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns true if basic image dimensions are the same. Does NOT consider the data or the fourth dimension.
bool operator==(const NEWIMAGE::volume<float>& rhs) const;
/// Same as !(lhs==rhs)
bool operator!=(const NEWIMAGE::volume<float>& rhs) const EddyTry { return(!(*this==rhs)); } EddyCatch
/// Returns a pointer to the memory on the GPU
float *GetPtr() EddyTry { return(thrust::raw_pointer_cast(_devec.data())); } EddyCatch
/// Returns a const pointer to the memory for a specific volume on the GPU
const float *GetPtr() const EddyTry { return(thrust::raw_pointer_cast(_devec.data())); } EddyCatch
/// Returns a pointer to the memory for a specific volume on the GPU
float *GetPtr(unsigned int i) EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::GetPtr: index out of range");
return(thrust::raw_pointer_cast(_devec.data())+i*this->Size());
} EddyCatch
/// Returns a const pointer to the memory for a specific volume on the GPU
const float *GetPtr(unsigned int i) const EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::GetPtr: index out of range");
return(thrust::raw_pointer_cast(_devec.data())+i*this->Size());
} EddyCatch
/// Returns an iterator to the start of the memory on the GPU
thrust::device_vector<float>::iterator Begin(unsigned int i) EddyTry { return(this->volbegin(i)); } EddyCatch
/// Returns an iterator to the end of the memory on the GPU
thrust::device_vector<float>::iterator End(unsigned int i) EddyTry { return(this->volend(i)); } EddyCatch
/// Returns a const iterator to the start of the memory on the GPU
thrust::device_vector<float>::const_iterator Begin(unsigned int i) const EddyTry { return(this->volbegin(i)); } EddyCatch
/// Returns a const iterator to the end of the memory on the GPU
thrust::device_vector<float>::const_iterator End(unsigned int i) const EddyTry { return(this->volend(i)); } EddyCatch
/// Returns the total size of one volume
unsigned int Size() const { return(_sz[0]*_sz[1]*_sz[2]); }
/// Returns the matrix size in direction indx, indx=0,1,2,3.
unsigned int Size(unsigned int indx) const;
/// Returns the voxel size (mm) in direction indx, indx=0,1,2.
float Vxs(unsigned int indx) const;
/// Returns image-to-world mapping matrix
NEWMAT::Matrix Ima2WorldMatrix() const; // { return(_hdr.sampling_mat()); } Actual definition in .cu
/// Returns image-to-world mapping matrix
NEWMAT::Matrix World2ImaMatrix() const; // { return(_hdr.sampling_mat().i()); } Actual definition in .cu
/// Return information about inter/extrapolation
NEWIMAGE::interpolation Interp() const EddyTry { return(_hdr.getinterpolationmethod()); } EddyCatch
NEWIMAGE::extrapolation Extrap() const EddyTry { return(_hdr.getextrapolationmethod()); } EddyCatch
std::vector<bool> ExtrapValid() const EddyTry { return(_hdr.getextrapolationvalidity()); } EddyCatch
/// Sets interpolation method
void SetInterp(NEWIMAGE::interpolation im) EddyTry { _hdr.setinterpolationmethod(im); } EddyCatch
/// Caclulates first derivative in direction i of volume i sampled trilinearly at voxel centres
void SampleTrilinearDerivOnVoxelCentres(unsigned int dir, const CudaVolume& mask, CudaVolume& deriv, bool add_one=true) const;
/// Copies the data from GPU into returned 4D volume.
NEWIMAGE::volume4D<float> GetVolume() const EddyTry { NEWIMAGE::volume4D<float> ovol; GetVolume(ovol); return(ovol); } EddyCatch
/// Copies the data from GPU into returned 4D volume.
void GetVolume(NEWIMAGE::volume4D<float>& ovol) const;
/// Copies the data from index'th volume into returned 3D volume
NEWIMAGE::volume<float> GetVolume(unsigned int indx) const EddyTry { NEWIMAGE::volume<float> ovol; GetVolume(indx,ovol); return(ovol); } EddyCatch
/// Copies the data from index'th volume into returned 3D volume
void GetVolume(unsigned int indx, NEWIMAGE::volume<float>& ovol) const;
/// Writes 4D volume to disc.
void Write(const std::string& fname) const EddyTry { NEWIMAGE::write_volume4D(GetVolume(),fname); } EddyCatch
/// Writes 3D volume to disc.
void Write(unsigned int indx, const std::string& fname) const EddyTry { NEWIMAGE::write_volume(GetVolume(indx),fname); } EddyCatch
friend class CudaVolume; // To allow CudaVolume to access private members
friend class CudaVolume3D_2_4D_Helper; // To allow CudaVolume3D_2_4D_Helper to access private members
private:
static const int threads_per_block = 128;
std::vector<unsigned int> _sz;
NEWIMAGE::volume<float> _hdr;
thrust::device_vector<float> _devec;
void common_assignment_from_newimage_vol(const NEWIMAGE::volume<float>& vol,
unsigned int tsize_override,
bool ifvol);
thrust::device_vector<float>::iterator volbegin(unsigned int i) EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::volbegin: index out of range");
return(_devec.begin()+i*this->Size());
} EddyCatch
thrust::device_vector<float>::const_iterator volbegin(unsigned int i) const EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::volbegin:const: index out of range");
return(_devec.begin()+i*this->Size());
} EddyCatch
thrust::device_vector<float>::iterator volend(unsigned int i) EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::volend: index out of range");
if (i<_sz[3]-1) return(_devec.begin()+(i+1)*this->Size());
else return(_devec.end());
} EddyCatch
thrust::device_vector<float>::const_iterator volend(unsigned int i) const EddyTry {
if (i>=_sz[3]) throw EddyException("CudaVolume4D::End:const: index out of range");
if (i<_sz[3]-1) return(_devec.begin()+(i+1)*this->Size());
else return(_devec.end());
} EddyCatch
};
/****************************************************************//**
*
* \brief Tiny helper class whos only purpose is to allow for
* skrutt[i] = plutt;
* where skrutt is of type CudaVolume4D and plutt of type CudaVolume.
*
********************************************************************/
class CudaVolume3D_2_4D_Helper
{
public:
void operator=(const CudaVolume& threed);
friend class CudaVolume4D; // To allow CudaVolume4D to access private members
private:
CudaVolume3D_2_4D_Helper(CudaVolume4D& fourd, unsigned int indx) EddyTry : _fourd(fourd), _indx(indx) {} EddyCatch // N.B. Private
CudaVolume4D& _fourd;
unsigned int _indx;
};
/****************************************************************//**
*
* \brief Helper class that manages a set of image coordinates in a way that
* it enables calculation/implementation of partial derivatives of
* images w.r.t. transformation parameters.
*
********************************************************************/
class CudaImageCoordinates
{
public:
CudaImageCoordinates() EddyTry : _xn(0), _yn(0), _zn(0), _init(false) {} EddyCatch
CudaImageCoordinates(unsigned int xn, unsigned int yn, unsigned int zn, bool init=false) EddyTry
: _xn(xn), _yn(yn), _zn(zn), _x(xn*yn*zn), _y(xn*yn*zn), _z(xn*yn*zn), _init(init) { if (init) init_coord(); } EddyCatch
void Resize(unsigned int xn, unsigned int yn, unsigned int zn, bool init=false) EddyTry {
_xn=xn; _yn=yn; _zn=zn;
_x.resize(xn*yn*zn); _y.resize(xn*yn*zn); _y.resize(xn*yn*zn); _init=false;
if (init) init_coord();
} EddyCatch
/// Affine transform
void Transform(const NEWMAT::Matrix& A);
/// Slice-wise affine transform
void Transform(const std::vector<NEWMAT::Matrix>& A);
/// General transform
void Transform(const NEWMAT::Matrix& A, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& B);
/// Slice-wise general transform
void Transform(const std::vector<NEWMAT::Matrix>& A, const EDDY::CudaVolume4D& dfield, const std::vector<NEWMAT::Matrix>& B);
/// Calculate x, y and z-cordinates for slice-to-vol (the tricky direction) transform
void GetSliceToVolXYZCoord(const NEWMAT::Matrix& M1, const std::vector<NEWMAT::Matrix>& R, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& M2, EDDY::CudaVolume& zcoord);
/// Calculate z-cordinates for slice-to-vol (the tricky direction) transform
void GetSliceToVolZCoord(const NEWMAT::Matrix& M1, const std::vector<NEWMAT::Matrix>& R, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& M2);
unsigned int Size() const { return(_xn*_yn*_zn); }
unsigned int Size(unsigned int indx) const EddyTry {
if (indx>2) throw EddyException("CudaImageCoordinates::Size: Index out of range.");
return((!indx) ? _xn : ((indx==1) ? _yn : _zn));
} EddyCatch
CudaImageCoordinates& operator-=(const CudaImageCoordinates& rhs);
/// Returns a raw pointer to the start of the memory on the GPU
const float *XPtr() const EddyTry { return(thrust::raw_pointer_cast(_x.data())); } EddyCatch
const float *YPtr() const EddyTry { return(thrust::raw_pointer_cast(_y.data())); } EddyCatch
const float *ZPtr() const EddyTry { return(thrust::raw_pointer_cast(_z.data())); } EddyCatch
/// Returns coordinates as nx3 matrix. For debugging only.
NEWMAT::Matrix AsMatrix() const;
/// Writes list of coordinates to text-file
void Write(const std::string& fname, unsigned int n=0) const;
private:
float *XPtr() EddyTry { return(thrust::raw_pointer_cast(_x.data())); } EddyCatch
float *YPtr() EddyTry { return(thrust::raw_pointer_cast(_y.data())); } EddyCatch
float *ZPtr() EddyTry { return(thrust::raw_pointer_cast(_z.data())); } EddyCatch
static const int threads_per_block = 128;
unsigned int _xn;
unsigned int _yn;
unsigned int _zn;
thrust::device_vector<float> _x;
thrust::device_vector<float> _y;
thrust::device_vector<float> _z;
bool _init;
void init_coord();
thrust::device_vector<float> repack_matrix(const NEWMAT::Matrix& A);
thrust::device_vector<float> repack_vector_of_matrices(const std::vector<NEWMAT::Matrix>& A);
};
} // End namespace EDDY
#ifdef I_CUDAVOLUME_H_DEFINED_ET
#undef I_CUDAVOLUME_H_DEFINED_ET
#undef EXPOSE_TREACHEROUS // Avoid exporting dodgy routines
#endif
#endif // End #ifndef CudaVolume_h
/////////////////////////////////////////////////////////////////////
///
/// \file DerivativeCalculator.cu
/// \brief Definitions of class used to calculate the derivatives of a prediction in scan space w.r.t. all parameters.
///
/// \author Jesper Andersson
/// \version 1.0b, Dec., 2019.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
// Because of a bug in cuda_fp16.hpp, that gets included by hipblas.h, it has to
// be included before any include files that set up anything related to the std-lib.
// If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the
// old-style C isinf or the new (since C++11) std::isinf.
#include "hipblas.h"
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <chrono>
#include <ctime>
#include <hip/hip_runtime.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#include <thrust/inner_product.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#pragma pop
#include "utils/FSLProfiler.h"
#include "EddyHelperClasses.h"
#include "EddyUtils.h"
#include "EddyCudaHelperFunctions.h"
#include "EddyGpuUtils.h"
#include "EddyKernels.h"
#include "EddyFunctors.h"
#include "DerivativeCalculator.h"
using namespace EDDY;
/****************************************************************//**
*
* Writes out information useful mainly for debugging purposes.
*
* \param[in] basename Common "basename" for all output files.
*
********************************************************************/
void DerivativeCalculator::Write(const std::string& basename) const EddyTry
{
_derivs.Write(basename+"_derivatives_in_scan_space");
_pios.Write(basename+"_prediction_in_scan_space");
_mios.Write(basename+"_mask_in_scan_space");
_jac.Write(basename+"_jacobian_in_scan_space");
return;
} EddyCatch
/****************************************************************//**
*
* Calculates the derivatives of the prediction pred with respect
* to the parameters given by whichp. The derivatives are in the
* scan space. This call will use only functions/kernels that yield
* results identical to the "original" implementation.
*
* \param[in] pred Prediction in model space
* \param[in] mask Predefined mask in model space
* \param[in] scan Scan that we want derivatives for
* \param[in] susc Susceptibility field
* \param[in] whichp Specifies which parameters we want the
* derivatives with respect to
*
********************************************************************/
void DerivativeCalculator::calculate_direct_derivatives(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
ParametersType whichp) EddyTry
{
static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__));
double total_key = prof.StartEntry("Total");
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives: susc-scan size mismatch");
EDDY::CudaVolume mask(pred,false);
EDDY::CudaVolume4D grad; // Zero size placeholder
// Calculated prediction in scan space. Also serves as base for derivative calculations.
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,_pios,mask,_jac,grad);
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
EddyInternalGpuUtils::transform_model_to_scan_space(pmask,scan,susc,false,_mios,skrutt,skrutt,skrutt4D);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
// Calculate derivatives
EDDY::CudaVolume perturbed(pred,false);
EDDY::CudaVolume jac(pred,false);
EDDY::ECScan sc = scan;
for (unsigned int i=0; i<sc.NDerivs(whichp); i++) {
double p = sc.GetDerivParam(i,whichp);
sc.SetDerivParam(i,p+sc.GetDerivScale(i,whichp),whichp);
EddyInternalGpuUtils::transform_model_to_scan_space(pred,sc,susc,true,perturbed,mask,jac,grad);
_derivs[i] = _mios * (perturbed-_pios)/sc.GetDerivScale(i,whichp);
sc.SetDerivParam(i,p,whichp);
}
prof.EndEntry(total_key);
return;
} EddyCatch
/****************************************************************//**
*
* Calculates the derivatives of the prediction pred with respect
* to the parameters given by whichp. It uses modulation for the
* movement parameters in the case of slice-to-vol, but direct
* calculation for the other parameters.
*
* \param[in] pred Prediction in model space
* \param[in] scan Scan that we want derivatives for
* \param[in] susc Susceptibility field
* \param[in] whichp Specifies which parameters we want the
* derivatives with respect to
*
********************************************************************/
void DerivativeCalculator::calculate_mixed_derivatives(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
ParametersType whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_mixed_derivatives: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_mixed_derivatives: susc-scan size mismatch");
EDDY::CudaVolume mask(pred,false); // Total mask
EDDY::CudaVolume fmask(pred,false); // Mask where field is valid
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
// Get field for model->scan_space transform
this->get_field(scan,susc,skrutt4D,fmask,_dfield,_jac);
// Calculate prediction in scan space. Also serves as base for derivative calculations.
this->transform_to_scan_space(pred,scan,_dfield,_pios,mask);
_pios *= _jac;
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
this->transform_to_scan_space(pmask,scan,_dfield,_mios,skrutt);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask; _mios *= fmask;
EDDY::CudaVolume4D pdfield(pred,3,false);
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of field
EDDY::CudaVolume perturbed(pred,false);
// We are relying on the order of derivatives being movement followed by EC.
// First we calculate the movement derivatives using modulation.
if (whichp == ParametersType::All || whichp == ParametersType::Movement) { // If we are asked for movement
for (unsigned int i=0; i<scan.NCompoundDerivs(ParametersType::Movement); i++) {
// First calculate primary derivative for the compound
EDDY::DerivativeInstructions di = scan.GetCompoundDerivInstructions(i,ParametersType::Movement);
double p = scan.GetDerivParam(di.GetPrimaryIndex(),ParametersType::Movement);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),ParametersType::Movement);
this->get_field(scan,susc,_dfield,fmask,pdfield,jac);
this->transform_to_scan_space(pred,scan,pdfield,perturbed,skrutt);
perturbed *= jac;
_derivs[di.GetPrimaryIndex()] = _mios * (perturbed-_pios) / di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,ParametersType::Movement);
// Next calculate any secondary/modulated derivatives
if (di.IsSliceMod()) {
for (unsigned int j=0; j<di.NSecondary(); j++) {
EDDY::SliceDerivModulator sdm = di.GetSliceModulator(j);
get_slice_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
}
else if (di.IsSpatiallyMod()) throw EDDY::EddyException("DerivativeCalculator::calculate_mixed_derivatives: Spatial modulation requested");
}
}
// Next we calculate the EC derivatives using "direct derivatives"
if (whichp == ParametersType::All || whichp == ParametersType::EC) { // If we are asked for EC (eddy currents)
unsigned int offset = (whichp == ParametersType::All) ? scan.NDerivs(ParametersType::Movement) : 0;
for (unsigned int i=0; i<scan.NDerivs(ParametersType::EC); i++) {
double p = scan.GetDerivParam(i,ParametersType::EC);
scan.SetDerivParam(i,p+scan.GetDerivScale(i,ParametersType::EC),ParametersType::EC);
this->get_field(scan,susc,_dfield,fmask,pdfield,jac);
this->transform_to_scan_space(pred,scan,pdfield,perturbed,skrutt);
perturbed *= jac;
_derivs[offset+i] = _mios * (perturbed-_pios) / scan.GetDerivScale(i,ParametersType::EC);
scan.SetDerivParam(i,p,ParametersType::EC);
}
}
return;
} EddyCatch
void DerivativeCalculator::calculate_long_ec_derivatives(const CudaVolume& pred,
const CudaVolume& pmask,
const ECScan& scan,
unsigned int scindx,
const CudaVolume& susc,
const LongECModel& lecm) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_long_ec_derivatives: pred-scan size mismatch");
if (pmask != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_long_ec_derivatives: pmask-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_long_ec_derivatives: susc-scan size mismatch");
EDDY::CudaVolume mask(pred,false);
EDDY::CudaVolume4D grad; // Zero size placeholder
// Calculated prediction in scan space. Also serves as base for derivative calculations.
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,_pios,mask,_jac,grad);
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
EddyInternalGpuUtils::transform_model_to_scan_space(pmask,scan,susc,false,_mios,skrutt,skrutt,skrutt4D);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
// Calculate derivatives
EDDY::CudaVolume perturbed(pred,false);
EDDY::CudaVolume jac(pred,false);
EDDY::ECScan sc = scan; // Copy to allow for changes to parameters
for (unsigned int i=0; i<lecm.NDerivs(); i++) {
double p = lecm.GetDerivParam(sc,i,scindx);
lecm.SetDerivParam(sc,i,scindx,p+lecm.GetDerivScale(i));
EddyInternalGpuUtils::transform_model_to_scan_space(pred,sc,susc,true,perturbed,mask,jac,grad);
_derivs[i] = _mios * (perturbed-_pios)/lecm.GetDerivScale(i);
lecm.SetDerivParam(sc,i,scindx,p);
}
return;
} EddyCatch
/****************************************************************//**
*
* Calculates the field for model-to-scan transformation.
*
* \param[in] scan Scan that we want derivatives for
* \param[in] susc Susceptibility field
* \param[in] infield Displacement field calculated by an earlier
* call. If it has size zero it will be ignored. If size is non-zero it
* will be passed into kernels calculating the inverse.
* \param[in,out] mask If infield has zero size a mask is calculated and
* returned in mask. If infield is non-zero it is expected that mask is
* mask specifying where the field is valid.
* \param[out] field The calculated displacement field
* \param[out] jac Jacobian determinants of field
*
********************************************************************/
void DerivativeCalculator::get_field(// Input
const EDDY::ECScan& scan,
const EDDY::CudaVolume& susc,
const EDDY::CudaVolume4D& infield,
// Input/output
EDDY::CudaVolume& mask,
// Output
EDDY::CudaVolume4D& field,
EDDY::CudaVolume& jac) const EddyTry
{
EDDY::CudaVolume tot(scan.GetIma(),false); // Total (EC and susc) field
EDDY::CudaVolume smask(scan.GetIma(),false); smask = 1.0; // Defines where transformed susc field is valid
unsigned int dir = (scan.GetAcqPara().PhaseEncodeVector()(1)!=0) ? 0 : 1;
// Get EC field and combine with susc field
EddyInternalGpuUtils::get_ec_field(scan,tot);
if (susc.Size()) {
EDDY::CudaVolume tsusc(susc,false);
if (scan.IsSliceToVol()) {
std::vector<NEWMAT::Matrix> R = EddyUtils::GetSliceWiseForwardMovementMatrices(scan);
EDDY::CudaVolume4D skrutt;
EddyInternalGpuUtils::affine_transform(susc,R,tsusc,skrutt,smask);
}
else {
NEWMAT::Matrix R = scan.ForwardMovementMatrix();
EDDY::CudaVolume4D skrutt;
EddyInternalGpuUtils::affine_transform(susc,R,tsusc,skrutt,smask);
}
tot += tsusc;
}
// Convert Hz map to voxel displacement field
EDDY::CudaVolume4D dfield(tot,3,false);
FieldGpuUtils::Hz2VoxelDisplacements(tot,scan.GetAcqPara(),dfield);
if (infield.Size()) {
field = infield;
this->mm_2_voxel_displacements(field,dir);
this->invert_field(dfield,scan.GetAcqPara(),mask,field,field);
}
else {
this->invert_field(dfield,scan.GetAcqPara(),smask,field,mask);
}
// Get Jacobian of inverted field
if (jac.Size()) field.SampleTrilinearDerivOnVoxelCentres(dir,mask,jac,true);
// Convert voxel displacement field to mm
this->voxel_2_mm_displacements(field,dir);
return;
} EddyCatch
void DerivativeCalculator::transform_to_scan_space(// Input
const EDDY::CudaVolume& pred,
const EDDY::ECScan& scan,
const EDDY::CudaVolume4D& dfield,
// Output
EDDY::CudaVolume& oima,
EDDY::CudaVolume& omask) const EddyTry
{
// Some input checking
if (oima != pred) oima.SetHdr(pred);
if (omask != pred) omask.SetHdr(pred);
EDDY::CudaVolume4D grad; // Zero size place holder
NEWMAT::IdentityMatrix I(4);
if (scan.IsSliceToVol()) {
// Get RB matrices, one per slice.
std::vector<NEWMAT::Matrix> R = EddyUtils::GetSliceWiseForwardMovementMatrices(scan);
std::vector<NEWMAT::Matrix> II(R.size()); for (unsigned int i=0; i<R.size(); i++) II[i] = I;
// Transform prediction/model
EddyInternalGpuUtils::general_transform(pred,II,dfield,R,oima,grad,omask);
}
else {
// Get RB matrix
NEWMAT::Matrix R = scan.ForwardMovementMatrix();
// Transform prediction/model
EddyInternalGpuUtils::general_transform(pred,I,dfield,R,oima,grad,omask);
}
return;
} EddyCatch
void DerivativeCalculator::invert_field(// Input
const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
// Output
CudaVolume4D& ifield,
CudaVolume& omask) const EddyTry
{
int tpb = field.Size(0);
int nblocks = field.Size(2);
if (acqp.PhaseEncodeVector()(1) != 0.0) { // If PE in x
EddyKernels::invert_displacement_field_x<<<nblocks,tpb>>>(field.GetPtr(0),inmask.GetPtr(),field.Size(0),field.Size(1),
ifield.GetPtr(0),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_x");
}
else if (acqp.PhaseEncodeVector()(2) != 0.0) { // If PE in y
EddyKernels::invert_displacement_field_y<<<nblocks,tpb>>>(field.GetPtr(1),inmask.GetPtr(),field.Size(0),field.Size(1),
ifield.GetPtr(1),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_y");
}
else throw EddyException("DerivativeCaclulator::invert_field_1: Invalid phase encode vector");
return;
} EddyCatch
void DerivativeCalculator::invert_field(// Input
const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
const CudaVolume4D& inifield,
// Output
CudaVolume4D& ifield) const EddyTry
{
int tpb = field.Size(0);
int nblocks = field.Size(2);
if (acqp.PhaseEncodeVector()(1) != 0.0) { // If PE in x
EddyKernels::invert_displacement_field_x<<<nblocks,tpb>>>(field.GetPtr(0),inmask.GetPtr(),inifield.GetPtr(0),
field.Size(0),field.Size(1),ifield.GetPtr(0),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_x");
}
else if (acqp.PhaseEncodeVector()(2) != 0.0) { // If PE in y
EddyKernels::invert_displacement_field_y<<<nblocks,tpb>>>(field.GetPtr(1),inmask.GetPtr(),inifield.GetPtr(1),
field.Size(0),field.Size(1),ifield.GetPtr(1),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_y");
}
else throw EddyException("DerivativeCaclulator::invert_field_2: Invalid phase encode vector");
return;
} EddyCatch
void DerivativeCalculator::voxel_2_mm_displacements(// Input/Output
CudaVolume4D& field,
// Input
unsigned int dir) const EddyTry
{
thrust::transform(field.Begin(dir),field.End(dir),field.Begin(dir),EDDY::MulByScalar<float>(field.Vxs(dir)));
return;
} EddyCatch
void DerivativeCalculator::mm_2_voxel_displacements(// Input/Output
CudaVolume4D& field,
// Input
unsigned int dir) const EddyTry
{
thrust::transform(field.Begin(dir),field.End(dir),field.Begin(dir),EDDY::MulByScalar<float>(1.0/field.Vxs(dir)));
return;
} EddyCatch
void DerivativeCalculator::get_slice_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SliceDerivModulator& sdm) const EddyTry
{
thrust::device_vector<float> dmod = sdm.GetMod();
int tpb = derivs.Size(0);
int nblocks = derivs.Size(2);
EddyKernels::slice_modulate_deriv<<<nblocks,tpb>>>(derivs.GetPtr(primi),mask.GetPtr(),derivs.Size(0),derivs.Size(1),derivs.Size(2),
thrust::raw_pointer_cast(dmod.data()),derivs.GetPtr(scndi),tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_slice_modulated_deriv::slice_modulate_deriv");
return;
} EddyCatch
// This section has some dead code that may (but probably not) be
// useful in the future.
/*
auto start_invert = std::chrono::system_clock::now();
FieldGpuUtils::InvertDisplacementField(dfield,scan.GetAcqPara(),mask,idfield,omask);
auto end_invert = std::chrono::system_clock::now();
// Get jacobian of inverted field
auto start_jacobian = std::chrono::system_clock::now();
if (jac.Size()) FieldGpuUtils::GetJacobian(idfield,scan.GetAcqPara(),jac);
auto end_jacobian = std::chrono::system_clock::now();
CudaVolume new_jac(jac,false);
auto start_new_jacobian = std::chrono::system_clock::now();
auto end_new_jacobian = std::chrono::system_clock::now();
// Transform field to mm displacements
FieldGpuUtils::Voxel2MMDisplacements(idfield);
auto end = std::chrono::system_clock::now();
char fname[256];
sprintf(fname,"old_jac_%03d",cnt);
NEWIMAGE::write_volume(jac.GetVolume(),fname);
sprintf(fname,"new_jac_%03d",cnt);
NEWIMAGE::write_volume(new_jac.GetVolume(),fname);
std::chrono::duration<double> duration = end-start;
std::chrono::duration<double> inv_duration = end_invert-start_invert;
std::chrono::duration<double> jac_duration = end_jacobian-start_jacobian;
std::chrono::duration<double> new_jac_duration = end_new_jacobian-start_new_jacobian;
std::chrono::duration<double> duration1 = start_second_part - start;
std::chrono::duration<double> duration2 = end - start_second_part;
cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform took " << duration.count() << " sec, of which the inverse was " << 100.0*inv_duration.count()/duration.count() << " %, and the Jacobian was " << 100.0*jac_duration.count()/duration.count() << " %" << endl;
cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform took " << duration.count() << " sec, of which the first half was " << 100.0*duration1.count()/duration.count() << " %, and the second half " << 100.0*duration2.count()/duration.count() << " %" << endl;
cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform old Jacobian took " << jac_duration.count() << " sec, and new Jacobian took " << new_jac_duration.count() << " sec" << endl;
*/
/*
void DerivativeCalculator::calculate_direct_derivatives_very_fast(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
Parameters whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_very_fast: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_very_fast: susc-scan size mismatch");
EDDY::CudaVolume4D dfield(pred,3,false); dfield = 0.0;
EDDY::CudaVolume fmask(pred,false); // Mask where field is valid
EDDY::CudaVolume mask(pred,false); // Mask where resampled image is valid
EDDY::CudaVolume base(pred,false); // zero-point for derivatives
thrust::device_vector<int> lbindx; // Lower bounds of range when calculating inverses
this->get_field(scan,susc,lbindx,dfield,fmask,_jac);
this->transform_to_scan_space(pred,scan,dfield,_pios,mask);
_pios *= _jac; // Used for returning resampled predictions
NEWIMAGE::interpolation old_interp = pred.Interp();
if (old_interp != NEWIMAGE::trilinear) {
pred.SetInterp(NEWIMAGE::trilinear);
this->transform_to_scan_space(pred,scan,dfield,base,mask);
}
else base = _pios;
// Transform predefined mask to scan space and combine with sampling mask
this->transform_to_scan_space(pmask,scan,dfield,_mios,mask);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
// char fname[256]; sprintf(fname,"field_%03d",1); dfield.Write(fname);
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of field
EDDY::CudaVolume perturbed(pred,false);
for (unsigned int i=0; i<scan.NCompoundDerivs(whichp); i++) {
// First calculate primary derivative for the compound
EDDY::DerivativeInstructions di = scan.GetCompoundDerivInstructions(i,whichp);
double p = scan.GetDerivParam(di.GetPrimaryIndex(),whichp);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),whichp);
this->get_field(scan,susc,lbindx,dfield,fmask,jac);
this->transform_to_scan_space(pred,scan,dfield,perturbed,mask);
perturbed *= jac;
_derivs[di.GetPrimaryIndex()] = (perturbed-base)/di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,whichp);
// Next calculate any secondary/modulated derivatives
for (unsigned int j=0; j<di.NSecondary(); j++) {
if (di.IsSliceMod()) {
EDDY::SliceDerivModulator sdm = di.GetSliceModulator(j);
get_slice_modulated_deriv(_derivs,fmask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
else if (di.IsSpatiallyMod()) {
EDDY::SpatialDerivModulator sdm = di.GetSpatialModulator(j);
// get_spatially_modulated_deriv(_derivs,fmask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
}
}
if (old_interp != NEWIMAGE::trilinear) pred.SetInterp(old_interp);
return;
} EddyCatch
void DerivativeCalculator::calculate_direct_derivatives_fast(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
Parameters whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_fast: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_fast: susc-scan size mismatch");
EDDY::CudaVolume4D skrutt;
EDDY::CudaVolume fmask(pred,false); // Mask where field is valid
EDDY::CudaVolume mask(pred,false); // Mask where resampled image is valid
this->get_field(scan,susc,skrutt,_dfield,fmask,_jac);
this->transform_to_scan_space(pred,scan,_dfield,_pios,mask);
_pios *= _jac; // Used for returning resampled predictions
// Transform predefined mask to scan space and combine with sampling mask
this->transform_to_scan_space(pmask,scan,dfield,_mios,mask);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
char fname[256]; sprintf(fname,"field_%03d",1); dfield.Write(fname);
EDDY::CudaVolume4D dfield(pred,3,false); // Displacements of perturbed field
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of perturbed field
EDDY::CudaVolume perturbed(pred,false);
for (unsigned int i=0; i<scan.NDerivs(whichp); i++) {
double p = scan.GetDerivParam(i,whichp);
scan.SetDerivParam(i,p+scan.GetDerivScale(i,whichp),whichp);
// auto start_get_field = std::chrono::system_clock::now();
this->get_field(scan,susc,_dfield,dfield,fmask,jac);
// auto end_get_field = std::chrono::system_clock::now();
// std::chrono::duration<double> duration = end_get_field-start_get_field;
// cout << "this->get_field(scan,susc,lbindx,dfield,fmask,jac); took " << duration.count() << " seconds" << endl;
sprintf(fname,"field_%03d",i+2); dfield.Write(fname);
this->transform_to_scan_space(pred,scan,dfield,perturbed,mask);
perturbed *= jac;
sprintf(fname,"perturbed_%03d",i+1); perturbed.Write(fname);
_derivs[i] = (perturbed-_pios)/scan.GetDerivScale(i,whichp);
scan.SetDerivParam(i,p,whichp);
}
return;
} EddyCatch
void DerivativeCalculator::calculate_modulated_derivatives(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
Parameters whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_modulated_derivatives: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_modulated_derivatives: susc-scan size mismatch");
// Check for the case of EC estimation without a susceptibiltity field, which means that
// the constant off-resonance field should not be estimated. That also means that one of
// the derivatives that we need in order to estimate the other should be estimated, but
// not be included with the other derivatives.
EDDY::CudaVolume offset_deriv;
if (scan.NDerivs() < scan.NParam()) { // Indicates that the constant off-resonance field is not directly estimated
if (susc.Size()) throw EDDY::EddyException("DerivativeCalculator::calculate_modulated_derivatives: pred-scan size mismatch");
offset_deriv.SetHdr(pred);
}
EDDY::CudaVolume mask(pred,false);
EDDY::CudaVolume4D grad; // Zero size placeholder
// Calculate prediction in scan space. Also serves as base for derivative calculations.
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,_pios,mask,_jac,grad);
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
EddyInternalGpuUtils::transform_model_to_scan_space(pmask,scan,susc,false,_mios,skrutt,skrutt,skrutt4D);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of field
EDDY::CudaVolume perturbed(pred,false);
// cout << "scan.NCompoundDerivs(whichp) = " << scan.NCompoundDerivs(whichp) << endl; cout.flush();
for (unsigned int i=0; i<scan.NCompoundDerivs(whichp); i++) {
// cout << "i = " << i << endl; cout.flush();
// First calculate primary derivative for the compound
EDDY::DerivativeInstructions di = scan.GetCompoundDerivInstructions(i,whichp);
// cout << "di.NSecondary() = " << di.NSecondary() << endl; cout.flush();
if (offset_deriv.Size()) {
double p = scan.GetDerivParam(di.GetPrimaryIndex(),whichp,true);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),whichp,true);
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,perturbed,mask,jac,grad);
if (di.GetPrimaryIndex() == scan.NDerivs()) offset_deriv = _mios * (perturbed-_pios) / di.GetPrimaryScale();
else _derivs[di.GetPrimaryIndex()] = _mios * (perturbed-_pios) / di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,whichp,true);
}
else {
double p = scan.GetDerivParam(di.GetPrimaryIndex(),whichp);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),whichp);
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,perturbed,mask,jac,grad);
_derivs[di.GetPrimaryIndex()] = _mios * (perturbed-_pios) / di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,whichp);
}
// Next calculate any secondary/modulated derivatives
if (di.IsSliceMod()) {
for (unsigned int j=0; j<di.NSecondary(); j++) {
EDDY::SliceDerivModulator sdm = di.GetSliceModulator(j);
get_slice_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
}
else if (di.IsSpatiallyMod()) {
for (unsigned int j=0; j<di.NSecondary(); j++) {
EDDY::SpatialDerivModulator sdm = di.GetSpatialModulator(j);
if (offset_deriv.Size() && di.GetPrimaryIndex() == scan.NDerivs()) {
get_spatially_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm,offset_deriv);
}
else {
get_spatially_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm,skrutt);
}
}
}
}
return;
} EddyCatch
void DerivativeCalculator::get_lower_bound_indicies(// Input
const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
// Output
thrust::device_vector<int>& lbindx,
CudaVolume& omask) const EddyTry
{
if (lbindx.size() != field.Size()) {
lbindx.resize(field.Size());
}
int tpb = field.Size(0);
int nblocks = field.Size(2);
if (acqp.PhaseEncodeVector()(1) != 0.0) { // If PE in x
EddyKernels::get_lower_bound_of_inverse_x<<<nblocks,tpb>>>(field.GetPtr(0),inmask.GetPtr(),field.Size(0),field.Size(1),field.Size(2),
thrust::raw_pointer_cast(lbindx.data()),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::get_lower_bound_of_inverse_x");
}
else if (acqp.PhaseEncodeVector()(2) != 0.0) { // If PE in y
EddyKernels::get_lower_bound_of_inverse_y<<<nblocks,tpb>>>(field.GetPtr(1),inmask.GetPtr(),field.Size(0),field.Size(1),field.Size(2),
thrust::raw_pointer_cast(lbindx.data()),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::get_lower_bound_of_inverse_y");
}
else throw EddyException("DerivativeCaclulator::get_lower_bound_indicies: Invalid phase encode vector");
return;
} EddyCatch
void DerivativeCalculator::get_spatially_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SpatialDerivModulator& sdm,
const CudaVolume& offset) const EddyTry
{
std::vector<unsigned int> mod = sdm.GetModulation();
int tpb = derivs.Size(0);
int nblocks = derivs.Size(2);
const float *inptr = nullptr;
if (offset.Size()) inptr = offset.GetPtr();
else inptr = derivs.GetPtr(primi);
float *outptr = derivs.GetPtr(scndi);
if (mod[0]) {
for (unsigned int i=0; i<mod[0]; i++) {
EddyKernels::x_modulate_deriv<<<nblocks,tpb>>>(inptr,mask.GetPtr(),derivs.Size(0),derivs.Size(1),
derivs.Size(2),derivs.Vxs(0),outptr,tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_spatially_modulated_deriv::x_modulate_deriv");
inptr = outptr;
}
}
if (mod[1]) {
for (unsigned int i=0; i<mod[1]; i++) {
EddyKernels::y_modulate_deriv<<<nblocks,tpb>>>(inptr,mask.GetPtr(),derivs.Size(0),derivs.Size(1),
derivs.Size(2),derivs.Vxs(1),outptr,tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_spatially_modulated_deriv::y_modulate_deriv");
inptr = outptr;
}
}
if (mod[2]) {
for (unsigned int i=0; i<mod[2]; i++) {
EddyKernels::z_modulate_deriv<<<nblocks,tpb>>>(inptr,mask.GetPtr(),derivs.Size(0),derivs.Size(1),
derivs.Size(2),derivs.Vxs(2),outptr,tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_spatially_modulated_deriv::z_modulate_deriv");
inptr = outptr;
}
}
return;
} EddyCatch
*/
/////////////////////////////////////////////////////////////////////
///
/// \file DerivativeCalculator.cu
/// \brief Definitions of class used to calculate the derivatives of a prediction in scan space w.r.t. all parameters.
///
/// \author Jesper Andersson
/// \version 1.0b, Dec., 2019.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
// Because of a bug in cuda_fp16.hpp, that gets included by cublas_v2.h, it has to
// be included before any include files that set up anything related to the std-lib.
// If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the
// old-style C isinf or the new (since C++11) std::isinf.
#include "cublas_v2.h"
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <chrono>
#include <ctime>
#include <cuda.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#include <thrust/inner_product.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#pragma pop
#include "utils/FSLProfiler.h"
#include "EddyHelperClasses.h"
#include "EddyUtils.h"
#include "EddyCudaHelperFunctions.h"
#include "EddyGpuUtils.h"
#include "EddyKernels.h"
#include "EddyFunctors.h"
#include "DerivativeCalculator.h"
using namespace EDDY;
/****************************************************************//**
*
* Writes out information useful mainly for debugging purposes.
*
* \param[in] basename Common "basename" for all output files.
*
********************************************************************/
void DerivativeCalculator::Write(const std::string& basename) const EddyTry
{
_derivs.Write(basename+"_derivatives_in_scan_space");
_pios.Write(basename+"_prediction_in_scan_space");
_mios.Write(basename+"_mask_in_scan_space");
_jac.Write(basename+"_jacobian_in_scan_space");
return;
} EddyCatch
/****************************************************************//**
*
* Calculates the derivatives of the prediction pred with respect
* to the parameters given by whichp. The derivatives are in the
* scan space. This call will use only functions/kernels that yield
* results identical to the "original" implementation.
*
* \param[in] pred Prediction in model space
* \param[in] mask Predefined mask in model space
* \param[in] scan Scan that we want derivatives for
* \param[in] susc Susceptibility field
* \param[in] whichp Specifies which parameters we want the
* derivatives with respect to
*
********************************************************************/
void DerivativeCalculator::calculate_direct_derivatives(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
ParametersType whichp) EddyTry
{
static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__));
double total_key = prof.StartEntry("Total");
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives: susc-scan size mismatch");
EDDY::CudaVolume mask(pred,false);
EDDY::CudaVolume4D grad; // Zero size placeholder
// Calculated prediction in scan space. Also serves as base for derivative calculations.
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,_pios,mask,_jac,grad);
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
EddyInternalGpuUtils::transform_model_to_scan_space(pmask,scan,susc,false,_mios,skrutt,skrutt,skrutt4D);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
// Calculate derivatives
EDDY::CudaVolume perturbed(pred,false);
EDDY::CudaVolume jac(pred,false);
EDDY::ECScan sc = scan;
for (unsigned int i=0; i<sc.NDerivs(whichp); i++) {
double p = sc.GetDerivParam(i,whichp);
sc.SetDerivParam(i,p+sc.GetDerivScale(i,whichp),whichp);
EddyInternalGpuUtils::transform_model_to_scan_space(pred,sc,susc,true,perturbed,mask,jac,grad);
_derivs[i] = _mios * (perturbed-_pios)/sc.GetDerivScale(i,whichp);
sc.SetDerivParam(i,p,whichp);
}
prof.EndEntry(total_key);
return;
} EddyCatch
/****************************************************************//**
*
* Calculates the derivatives of the prediction pred with respect
* to the parameters given by whichp. It uses modulation for the
* movement parameters in the case of slice-to-vol, but direct
* calculation for the other parameters.
*
* \param[in] pred Prediction in model space
* \param[in] scan Scan that we want derivatives for
* \param[in] susc Susceptibility field
* \param[in] whichp Specifies which parameters we want the
* derivatives with respect to
*
********************************************************************/
void DerivativeCalculator::calculate_mixed_derivatives(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
ParametersType whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_mixed_derivatives: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_mixed_derivatives: susc-scan size mismatch");
EDDY::CudaVolume mask(pred,false); // Total mask
EDDY::CudaVolume fmask(pred,false); // Mask where field is valid
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
// Get field for model->scan_space transform
this->get_field(scan,susc,skrutt4D,fmask,_dfield,_jac);
// Calculate prediction in scan space. Also serves as base for derivative calculations.
this->transform_to_scan_space(pred,scan,_dfield,_pios,mask);
_pios *= _jac;
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
this->transform_to_scan_space(pmask,scan,_dfield,_mios,skrutt);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask; _mios *= fmask;
EDDY::CudaVolume4D pdfield(pred,3,false);
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of field
EDDY::CudaVolume perturbed(pred,false);
// We are relying on the order of derivatives being movement followed by EC.
// First we calculate the movement derivatives using modulation.
if (whichp == ParametersType::All || whichp == ParametersType::Movement) { // If we are asked for movement
for (unsigned int i=0; i<scan.NCompoundDerivs(ParametersType::Movement); i++) {
// First calculate primary derivative for the compound
EDDY::DerivativeInstructions di = scan.GetCompoundDerivInstructions(i,ParametersType::Movement);
double p = scan.GetDerivParam(di.GetPrimaryIndex(),ParametersType::Movement);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),ParametersType::Movement);
this->get_field(scan,susc,_dfield,fmask,pdfield,jac);
this->transform_to_scan_space(pred,scan,pdfield,perturbed,skrutt);
perturbed *= jac;
_derivs[di.GetPrimaryIndex()] = _mios * (perturbed-_pios) / di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,ParametersType::Movement);
// Next calculate any secondary/modulated derivatives
if (di.IsSliceMod()) {
for (unsigned int j=0; j<di.NSecondary(); j++) {
EDDY::SliceDerivModulator sdm = di.GetSliceModulator(j);
get_slice_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
}
else if (di.IsSpatiallyMod()) throw EDDY::EddyException("DerivativeCalculator::calculate_mixed_derivatives: Spatial modulation requested");
}
}
// Next we calculate the EC derivatives using "direct derivatives"
if (whichp == ParametersType::All || whichp == ParametersType::EC) { // If we are asked for EC (eddy currents)
unsigned int offset = (whichp == ParametersType::All) ? scan.NDerivs(ParametersType::Movement) : 0;
for (unsigned int i=0; i<scan.NDerivs(ParametersType::EC); i++) {
double p = scan.GetDerivParam(i,ParametersType::EC);
scan.SetDerivParam(i,p+scan.GetDerivScale(i,ParametersType::EC),ParametersType::EC);
this->get_field(scan,susc,_dfield,fmask,pdfield,jac);
this->transform_to_scan_space(pred,scan,pdfield,perturbed,skrutt);
perturbed *= jac;
_derivs[offset+i] = _mios * (perturbed-_pios) / scan.GetDerivScale(i,ParametersType::EC);
scan.SetDerivParam(i,p,ParametersType::EC);
}
}
return;
} EddyCatch
void DerivativeCalculator::calculate_long_ec_derivatives(const CudaVolume& pred,
const CudaVolume& pmask,
const ECScan& scan,
unsigned int scindx,
const CudaVolume& susc,
const LongECModel& lecm) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_long_ec_derivatives: pred-scan size mismatch");
if (pmask != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_long_ec_derivatives: pmask-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_long_ec_derivatives: susc-scan size mismatch");
EDDY::CudaVolume mask(pred,false);
EDDY::CudaVolume4D grad; // Zero size placeholder
// Calculated prediction in scan space. Also serves as base for derivative calculations.
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,_pios,mask,_jac,grad);
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
EddyInternalGpuUtils::transform_model_to_scan_space(pmask,scan,susc,false,_mios,skrutt,skrutt,skrutt4D);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
// Calculate derivatives
EDDY::CudaVolume perturbed(pred,false);
EDDY::CudaVolume jac(pred,false);
EDDY::ECScan sc = scan; // Copy to allow for changes to parameters
for (unsigned int i=0; i<lecm.NDerivs(); i++) {
double p = lecm.GetDerivParam(sc,i,scindx);
lecm.SetDerivParam(sc,i,scindx,p+lecm.GetDerivScale(i));
EddyInternalGpuUtils::transform_model_to_scan_space(pred,sc,susc,true,perturbed,mask,jac,grad);
_derivs[i] = _mios * (perturbed-_pios)/lecm.GetDerivScale(i);
lecm.SetDerivParam(sc,i,scindx,p);
}
return;
} EddyCatch
/****************************************************************//**
*
* Calculates the field for model-to-scan transformation.
*
* \param[in] scan Scan that we want derivatives for
* \param[in] susc Susceptibility field
* \param[in] infield Displacement field calculated by an earlier
* call. If it has size zero it will be ignored. If size is non-zero it
* will be passed into kernels calculating the inverse.
* \param[in,out] mask If infield has zero size a mask is calculated and
* returned in mask. If infield is non-zero it is expected that mask is
* mask specifying where the field is valid.
* \param[out] field The calculated displacement field
* \param[out] jac Jacobian determinants of field
*
********************************************************************/
void DerivativeCalculator::get_field(// Input
const EDDY::ECScan& scan,
const EDDY::CudaVolume& susc,
const EDDY::CudaVolume4D& infield,
// Input/output
EDDY::CudaVolume& mask,
// Output
EDDY::CudaVolume4D& field,
EDDY::CudaVolume& jac) const EddyTry
{
EDDY::CudaVolume tot(scan.GetIma(),false); // Total (EC and susc) field
EDDY::CudaVolume smask(scan.GetIma(),false); smask = 1.0; // Defines where transformed susc field is valid
unsigned int dir = (scan.GetAcqPara().PhaseEncodeVector()(1)!=0) ? 0 : 1;
// Get EC field and combine with susc field
EddyInternalGpuUtils::get_ec_field(scan,tot);
if (susc.Size()) {
EDDY::CudaVolume tsusc(susc,false);
if (scan.IsSliceToVol()) {
std::vector<NEWMAT::Matrix> R = EddyUtils::GetSliceWiseForwardMovementMatrices(scan);
EDDY::CudaVolume4D skrutt;
EddyInternalGpuUtils::affine_transform(susc,R,tsusc,skrutt,smask);
}
else {
NEWMAT::Matrix R = scan.ForwardMovementMatrix();
EDDY::CudaVolume4D skrutt;
EddyInternalGpuUtils::affine_transform(susc,R,tsusc,skrutt,smask);
}
tot += tsusc;
}
// Convert Hz map to voxel displacement field
EDDY::CudaVolume4D dfield(tot,3,false);
FieldGpuUtils::Hz2VoxelDisplacements(tot,scan.GetAcqPara(),dfield);
if (infield.Size()) {
field = infield;
this->mm_2_voxel_displacements(field,dir);
this->invert_field(dfield,scan.GetAcqPara(),mask,field,field);
}
else {
this->invert_field(dfield,scan.GetAcqPara(),smask,field,mask);
}
// Get Jacobian of inverted field
if (jac.Size()) field.SampleTrilinearDerivOnVoxelCentres(dir,mask,jac,true);
// Convert voxel displacement field to mm
this->voxel_2_mm_displacements(field,dir);
return;
} EddyCatch
void DerivativeCalculator::transform_to_scan_space(// Input
const EDDY::CudaVolume& pred,
const EDDY::ECScan& scan,
const EDDY::CudaVolume4D& dfield,
// Output
EDDY::CudaVolume& oima,
EDDY::CudaVolume& omask) const EddyTry
{
// Some input checking
if (oima != pred) oima.SetHdr(pred);
if (omask != pred) omask.SetHdr(pred);
EDDY::CudaVolume4D grad; // Zero size place holder
NEWMAT::IdentityMatrix I(4);
if (scan.IsSliceToVol()) {
// Get RB matrices, one per slice.
std::vector<NEWMAT::Matrix> R = EddyUtils::GetSliceWiseForwardMovementMatrices(scan);
std::vector<NEWMAT::Matrix> II(R.size()); for (unsigned int i=0; i<R.size(); i++) II[i] = I;
// Transform prediction/model
EddyInternalGpuUtils::general_transform(pred,II,dfield,R,oima,grad,omask);
}
else {
// Get RB matrix
NEWMAT::Matrix R = scan.ForwardMovementMatrix();
// Transform prediction/model
EddyInternalGpuUtils::general_transform(pred,I,dfield,R,oima,grad,omask);
}
return;
} EddyCatch
void DerivativeCalculator::invert_field(// Input
const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
// Output
CudaVolume4D& ifield,
CudaVolume& omask) const EddyTry
{
int tpb = field.Size(0);
int nblocks = field.Size(2);
if (acqp.PhaseEncodeVector()(1) != 0.0) { // If PE in x
EddyKernels::invert_displacement_field_x<<<nblocks,tpb>>>(field.GetPtr(0),inmask.GetPtr(),field.Size(0),field.Size(1),
ifield.GetPtr(0),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_x");
}
else if (acqp.PhaseEncodeVector()(2) != 0.0) { // If PE in y
EddyKernels::invert_displacement_field_y<<<nblocks,tpb>>>(field.GetPtr(1),inmask.GetPtr(),field.Size(0),field.Size(1),
ifield.GetPtr(1),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_y");
}
else throw EddyException("DerivativeCaclulator::invert_field_1: Invalid phase encode vector");
return;
} EddyCatch
void DerivativeCalculator::invert_field(// Input
const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
const CudaVolume4D& inifield,
// Output
CudaVolume4D& ifield) const EddyTry
{
int tpb = field.Size(0);
int nblocks = field.Size(2);
if (acqp.PhaseEncodeVector()(1) != 0.0) { // If PE in x
EddyKernels::invert_displacement_field_x<<<nblocks,tpb>>>(field.GetPtr(0),inmask.GetPtr(),inifield.GetPtr(0),
field.Size(0),field.Size(1),ifield.GetPtr(0),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_x");
}
else if (acqp.PhaseEncodeVector()(2) != 0.0) { // If PE in y
EddyKernels::invert_displacement_field_y<<<nblocks,tpb>>>(field.GetPtr(1),inmask.GetPtr(),inifield.GetPtr(1),
field.Size(0),field.Size(1),ifield.GetPtr(1),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field_y");
}
else throw EddyException("DerivativeCaclulator::invert_field_2: Invalid phase encode vector");
return;
} EddyCatch
void DerivativeCalculator::voxel_2_mm_displacements(// Input/Output
CudaVolume4D& field,
// Input
unsigned int dir) const EddyTry
{
thrust::transform(field.Begin(dir),field.End(dir),field.Begin(dir),EDDY::MulByScalar<float>(field.Vxs(dir)));
return;
} EddyCatch
void DerivativeCalculator::mm_2_voxel_displacements(// Input/Output
CudaVolume4D& field,
// Input
unsigned int dir) const EddyTry
{
thrust::transform(field.Begin(dir),field.End(dir),field.Begin(dir),EDDY::MulByScalar<float>(1.0/field.Vxs(dir)));
return;
} EddyCatch
void DerivativeCalculator::get_slice_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SliceDerivModulator& sdm) const EddyTry
{
thrust::device_vector<float> dmod = sdm.GetMod();
int tpb = derivs.Size(0);
int nblocks = derivs.Size(2);
EddyKernels::slice_modulate_deriv<<<nblocks,tpb>>>(derivs.GetPtr(primi),mask.GetPtr(),derivs.Size(0),derivs.Size(1),derivs.Size(2),
thrust::raw_pointer_cast(dmod.data()),derivs.GetPtr(scndi),tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_slice_modulated_deriv::slice_modulate_deriv");
return;
} EddyCatch
// This section has some dead code that may (but probably not) be
// useful in the future.
/*
auto start_invert = std::chrono::system_clock::now();
FieldGpuUtils::InvertDisplacementField(dfield,scan.GetAcqPara(),mask,idfield,omask);
auto end_invert = std::chrono::system_clock::now();
// Get jacobian of inverted field
auto start_jacobian = std::chrono::system_clock::now();
if (jac.Size()) FieldGpuUtils::GetJacobian(idfield,scan.GetAcqPara(),jac);
auto end_jacobian = std::chrono::system_clock::now();
CudaVolume new_jac(jac,false);
auto start_new_jacobian = std::chrono::system_clock::now();
auto end_new_jacobian = std::chrono::system_clock::now();
// Transform field to mm displacements
FieldGpuUtils::Voxel2MMDisplacements(idfield);
auto end = std::chrono::system_clock::now();
char fname[256];
sprintf(fname,"old_jac_%03d",cnt);
NEWIMAGE::write_volume(jac.GetVolume(),fname);
sprintf(fname,"new_jac_%03d",cnt);
NEWIMAGE::write_volume(new_jac.GetVolume(),fname);
std::chrono::duration<double> duration = end-start;
std::chrono::duration<double> inv_duration = end_invert-start_invert;
std::chrono::duration<double> jac_duration = end_jacobian-start_jacobian;
std::chrono::duration<double> new_jac_duration = end_new_jacobian-start_new_jacobian;
std::chrono::duration<double> duration1 = start_second_part - start;
std::chrono::duration<double> duration2 = end - start_second_part;
cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform took " << duration.count() << " sec, of which the inverse was " << 100.0*inv_duration.count()/duration.count() << " %, and the Jacobian was " << 100.0*jac_duration.count()/duration.count() << " %" << endl;
cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform took " << duration.count() << " sec, of which the first half was " << 100.0*duration1.count()/duration.count() << " %, and the second half " << 100.0*duration2.count()/duration.count() << " %" << endl;
cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform old Jacobian took " << jac_duration.count() << " sec, and new Jacobian took " << new_jac_duration.count() << " sec" << endl;
*/
/*
void DerivativeCalculator::calculate_direct_derivatives_very_fast(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
Parameters whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_very_fast: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_very_fast: susc-scan size mismatch");
EDDY::CudaVolume4D dfield(pred,3,false); dfield = 0.0;
EDDY::CudaVolume fmask(pred,false); // Mask where field is valid
EDDY::CudaVolume mask(pred,false); // Mask where resampled image is valid
EDDY::CudaVolume base(pred,false); // zero-point for derivatives
thrust::device_vector<int> lbindx; // Lower bounds of range when calculating inverses
this->get_field(scan,susc,lbindx,dfield,fmask,_jac);
this->transform_to_scan_space(pred,scan,dfield,_pios,mask);
_pios *= _jac; // Used for returning resampled predictions
NEWIMAGE::interpolation old_interp = pred.Interp();
if (old_interp != NEWIMAGE::trilinear) {
pred.SetInterp(NEWIMAGE::trilinear);
this->transform_to_scan_space(pred,scan,dfield,base,mask);
}
else base = _pios;
// Transform predefined mask to scan space and combine with sampling mask
this->transform_to_scan_space(pmask,scan,dfield,_mios,mask);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
// char fname[256]; sprintf(fname,"field_%03d",1); dfield.Write(fname);
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of field
EDDY::CudaVolume perturbed(pred,false);
for (unsigned int i=0; i<scan.NCompoundDerivs(whichp); i++) {
// First calculate primary derivative for the compound
EDDY::DerivativeInstructions di = scan.GetCompoundDerivInstructions(i,whichp);
double p = scan.GetDerivParam(di.GetPrimaryIndex(),whichp);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),whichp);
this->get_field(scan,susc,lbindx,dfield,fmask,jac);
this->transform_to_scan_space(pred,scan,dfield,perturbed,mask);
perturbed *= jac;
_derivs[di.GetPrimaryIndex()] = (perturbed-base)/di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,whichp);
// Next calculate any secondary/modulated derivatives
for (unsigned int j=0; j<di.NSecondary(); j++) {
if (di.IsSliceMod()) {
EDDY::SliceDerivModulator sdm = di.GetSliceModulator(j);
get_slice_modulated_deriv(_derivs,fmask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
else if (di.IsSpatiallyMod()) {
EDDY::SpatialDerivModulator sdm = di.GetSpatialModulator(j);
// get_spatially_modulated_deriv(_derivs,fmask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
}
}
if (old_interp != NEWIMAGE::trilinear) pred.SetInterp(old_interp);
return;
} EddyCatch
void DerivativeCalculator::calculate_direct_derivatives_fast(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
Parameters whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_fast: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_direct_derivatives_fast: susc-scan size mismatch");
EDDY::CudaVolume4D skrutt;
EDDY::CudaVolume fmask(pred,false); // Mask where field is valid
EDDY::CudaVolume mask(pred,false); // Mask where resampled image is valid
this->get_field(scan,susc,skrutt,_dfield,fmask,_jac);
this->transform_to_scan_space(pred,scan,_dfield,_pios,mask);
_pios *= _jac; // Used for returning resampled predictions
// Transform predefined mask to scan space and combine with sampling mask
this->transform_to_scan_space(pmask,scan,dfield,_mios,mask);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
char fname[256]; sprintf(fname,"field_%03d",1); dfield.Write(fname);
EDDY::CudaVolume4D dfield(pred,3,false); // Displacements of perturbed field
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of perturbed field
EDDY::CudaVolume perturbed(pred,false);
for (unsigned int i=0; i<scan.NDerivs(whichp); i++) {
double p = scan.GetDerivParam(i,whichp);
scan.SetDerivParam(i,p+scan.GetDerivScale(i,whichp),whichp);
// auto start_get_field = std::chrono::system_clock::now();
this->get_field(scan,susc,_dfield,dfield,fmask,jac);
// auto end_get_field = std::chrono::system_clock::now();
// std::chrono::duration<double> duration = end_get_field-start_get_field;
// cout << "this->get_field(scan,susc,lbindx,dfield,fmask,jac); took " << duration.count() << " seconds" << endl;
sprintf(fname,"field_%03d",i+2); dfield.Write(fname);
this->transform_to_scan_space(pred,scan,dfield,perturbed,mask);
perturbed *= jac;
sprintf(fname,"perturbed_%03d",i+1); perturbed.Write(fname);
_derivs[i] = (perturbed-_pios)/scan.GetDerivScale(i,whichp);
scan.SetDerivParam(i,p,whichp);
}
return;
} EddyCatch
void DerivativeCalculator::calculate_modulated_derivatives(CudaVolume& pred,
CudaVolume& pmask,
ECScan& scan,
const CudaVolume& susc,
Parameters whichp) EddyTry
{
// Check input parameters
if (pred != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_modulated_derivatives: pred-scan size mismatch");
if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("DerivativeCalculator::calculate_modulated_derivatives: susc-scan size mismatch");
// Check for the case of EC estimation without a susceptibiltity field, which means that
// the constant off-resonance field should not be estimated. That also means that one of
// the derivatives that we need in order to estimate the other should be estimated, but
// not be included with the other derivatives.
EDDY::CudaVolume offset_deriv;
if (scan.NDerivs() < scan.NParam()) { // Indicates that the constant off-resonance field is not directly estimated
if (susc.Size()) throw EDDY::EddyException("DerivativeCalculator::calculate_modulated_derivatives: pred-scan size mismatch");
offset_deriv.SetHdr(pred);
}
EDDY::CudaVolume mask(pred,false);
EDDY::CudaVolume4D grad; // Zero size placeholder
// Calculate prediction in scan space. Also serves as base for derivative calculations.
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,_pios,mask,_jac,grad);
// Transform predefined mask to scan space and combine with sampling mask
EDDY::CudaVolume skrutt; // Zero size placeholder
EDDY::CudaVolume4D skrutt4D; // Zero size placeholder
EddyInternalGpuUtils::transform_model_to_scan_space(pmask,scan,susc,false,_mios,skrutt,skrutt,skrutt4D);
// Binarise resampled prediction mask and combine with sampling mask
_mios.Binarise(0.99); _mios *= mask;
EDDY::CudaVolume jac(pred,false); // Jacobian determinant of field
EDDY::CudaVolume perturbed(pred,false);
// cout << "scan.NCompoundDerivs(whichp) = " << scan.NCompoundDerivs(whichp) << endl; cout.flush();
for (unsigned int i=0; i<scan.NCompoundDerivs(whichp); i++) {
// cout << "i = " << i << endl; cout.flush();
// First calculate primary derivative for the compound
EDDY::DerivativeInstructions di = scan.GetCompoundDerivInstructions(i,whichp);
// cout << "di.NSecondary() = " << di.NSecondary() << endl; cout.flush();
if (offset_deriv.Size()) {
double p = scan.GetDerivParam(di.GetPrimaryIndex(),whichp,true);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),whichp,true);
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,perturbed,mask,jac,grad);
if (di.GetPrimaryIndex() == scan.NDerivs()) offset_deriv = _mios * (perturbed-_pios) / di.GetPrimaryScale();
else _derivs[di.GetPrimaryIndex()] = _mios * (perturbed-_pios) / di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,whichp,true);
}
else {
double p = scan.GetDerivParam(di.GetPrimaryIndex(),whichp);
scan.SetDerivParam(di.GetPrimaryIndex(),p+di.GetPrimaryScale(),whichp);
EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,perturbed,mask,jac,grad);
_derivs[di.GetPrimaryIndex()] = _mios * (perturbed-_pios) / di.GetPrimaryScale();
scan.SetDerivParam(di.GetPrimaryIndex(),p,whichp);
}
// Next calculate any secondary/modulated derivatives
if (di.IsSliceMod()) {
for (unsigned int j=0; j<di.NSecondary(); j++) {
EDDY::SliceDerivModulator sdm = di.GetSliceModulator(j);
get_slice_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm);
}
}
else if (di.IsSpatiallyMod()) {
for (unsigned int j=0; j<di.NSecondary(); j++) {
EDDY::SpatialDerivModulator sdm = di.GetSpatialModulator(j);
if (offset_deriv.Size() && di.GetPrimaryIndex() == scan.NDerivs()) {
get_spatially_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm,offset_deriv);
}
else {
get_spatially_modulated_deriv(_derivs,mask,di.GetPrimaryIndex(),di.GetSecondaryIndex(j),sdm,skrutt);
}
}
}
}
return;
} EddyCatch
void DerivativeCalculator::get_lower_bound_indicies(// Input
const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
// Output
thrust::device_vector<int>& lbindx,
CudaVolume& omask) const EddyTry
{
if (lbindx.size() != field.Size()) {
lbindx.resize(field.Size());
}
int tpb = field.Size(0);
int nblocks = field.Size(2);
if (acqp.PhaseEncodeVector()(1) != 0.0) { // If PE in x
EddyKernels::get_lower_bound_of_inverse_x<<<nblocks,tpb>>>(field.GetPtr(0),inmask.GetPtr(),field.Size(0),field.Size(1),field.Size(2),
thrust::raw_pointer_cast(lbindx.data()),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::get_lower_bound_of_inverse_x");
}
else if (acqp.PhaseEncodeVector()(2) != 0.0) { // If PE in y
EddyKernels::get_lower_bound_of_inverse_y<<<nblocks,tpb>>>(field.GetPtr(1),inmask.GetPtr(),field.Size(0),field.Size(1),field.Size(2),
thrust::raw_pointer_cast(lbindx.data()),omask.GetPtr(),nblocks*tpb);
EddyCudaHelperFunctions::CudaSync("EddyKernels::get_lower_bound_of_inverse_y");
}
else throw EddyException("DerivativeCaclulator::get_lower_bound_indicies: Invalid phase encode vector");
return;
} EddyCatch
void DerivativeCalculator::get_spatially_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SpatialDerivModulator& sdm,
const CudaVolume& offset) const EddyTry
{
std::vector<unsigned int> mod = sdm.GetModulation();
int tpb = derivs.Size(0);
int nblocks = derivs.Size(2);
const float *inptr = nullptr;
if (offset.Size()) inptr = offset.GetPtr();
else inptr = derivs.GetPtr(primi);
float *outptr = derivs.GetPtr(scndi);
if (mod[0]) {
for (unsigned int i=0; i<mod[0]; i++) {
EddyKernels::x_modulate_deriv<<<nblocks,tpb>>>(inptr,mask.GetPtr(),derivs.Size(0),derivs.Size(1),
derivs.Size(2),derivs.Vxs(0),outptr,tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_spatially_modulated_deriv::x_modulate_deriv");
inptr = outptr;
}
}
if (mod[1]) {
for (unsigned int i=0; i<mod[1]; i++) {
EddyKernels::y_modulate_deriv<<<nblocks,tpb>>>(inptr,mask.GetPtr(),derivs.Size(0),derivs.Size(1),
derivs.Size(2),derivs.Vxs(1),outptr,tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_spatially_modulated_deriv::y_modulate_deriv");
inptr = outptr;
}
}
if (mod[2]) {
for (unsigned int i=0; i<mod[2]; i++) {
EddyKernels::z_modulate_deriv<<<nblocks,tpb>>>(inptr,mask.GetPtr(),derivs.Size(0),derivs.Size(1),
derivs.Size(2),derivs.Vxs(2),outptr,tpb*nblocks);
EddyCudaHelperFunctions::CudaSync("DerivativeCalculator::get_spatially_modulated_deriv::z_modulate_deriv");
inptr = outptr;
}
}
return;
} EddyCatch
*/
/////////////////////////////////////////////////////////////////////
///
/// \file DerivativeCalculator.h
/// \brief Declarations of class used to calculate the derivatives of a prediction in scan space w.r.t. all parameters.
///
/// \author Jesper Andersson
/// \version 1.0b, Dec., 2019.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef DerivativeCalculator_h
#define DerivativeCalculator_h
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <hip/hip_runtime.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#pragma pop
#include "EddyHelperClasses.h"
#include "ECScanClasses.h"
#include "EddyKernels.h"
#include "CudaVolume.h"
#include "EddyInternalGpuUtils.h"
namespace EDDY {
enum class DerivType { Old, Mixed, LongEC };
/****************************************************************//**
*
* \brief Calculates derivatives for use in EddyInternalUtils::param_update
*
********************************************************************/
class DerivativeCalculator
{
public:
/// Constructor that calculates all the derivatives (movement and EC) in scan space
DerivativeCalculator(CudaVolume& pred, /// [in] Prediction in model space
CudaVolume& pmask, /// [in] Predefined mask in model space
ECScan& scan, /// [in] Scan
const CudaVolume& susc, /// [in] Susceptibility field
ParametersType whichp, /// [in] Specifies whis parameters to calculate derivatives for
float fwhm, /// [in] FWHM for optional smoothing of derivative images
DerivType dt=DerivType::Old) /// [in] Specify details of how to calculate derivatives
EddyTry : _dt(dt), _fwhm(fwhm), _whichp(whichp), _derivs(pred,scan.NDerivs(whichp),false), _dfield(pred,3,false), _pios(pred,false), _mios(pmask,false), _jac(pred,false)
{
if (dt == DerivType::Old) calculate_direct_derivatives(pred,pmask,scan,susc,whichp);
else if (dt == DerivType::Mixed) calculate_mixed_derivatives(pred,pmask,scan,susc,whichp);
else throw EDDY::EddyException("DerivativeCalculator::DerivativeCalculator: Unknown derivative type");
if (fwhm) _derivs.Smooth(fwhm,_mios);
} EddyCatch
/// Constructor that calculates the long EC derivatives in scan space
DerivativeCalculator(const CudaVolume& pred, /// [in] Prediction in model space
const CudaVolume& pmask, /// [in] Predefined mask in model space
const ECScan& scan, /// [in] Scan
unsigned int scindx, /// [in] Scan index, given ScanType::Any
const CudaVolume& susc, /// [in] Susceptibility field
const LongECModel& lecm, /// [in] Specifies whis parameters to calculate derivatives for
float fwhm) /// [in] FWHM for optional smoothing of derivative images
EddyTry : _dt(DerivType::LongEC), _fwhm(fwhm), _whichp(ParametersType::All), _derivs(pred,lecm.NDerivs(),false), _dfield(pred,3,false), _pios(pred,false), _mios(pmask,false), _jac(pred,false)
{
calculate_long_ec_derivatives(pred,pmask,scan,scindx,susc,lecm);
if (fwhm) _derivs.Smooth(fwhm,_mios);
} EddyCatch
/// Returns prediction in scan space
const CudaVolume& PredInScanSpace() const { return(_pios); }
/// Returns mask in scan space
const CudaVolume& MaskInScanSpace() const { return(_mios); }
/// Returns Jacobian determinant map in scan space
const CudaVolume& JacInScanSpace() const { return(_jac); }
/// Returns a const reference to the pre-calculated derivatives
const CudaVolume4D& Derivatives() const { return(_derivs); }
/// Returns a value indicating how the derivatives were calculated.
DerivType WhichDerivativeType() const { return(_dt); }
/// Writes derivatives as 4D nifti and other images as 3D niftis
void Write(const std::string& basename) const;
private:
DerivType _dt; /// Flag that indicates how derivatives were calculated
float _fwhm; /// FWHM of optional smoothing of derivative images
ParametersType _whichp; /// Specifies whis parameters to calculate derivatives for
CudaVolume4D _derivs; /// The partial derivative images
CudaVolume4D _dfield; /// The displacement field for (original) model->scan transformation
CudaVolume _pios; /// Prediction in scan (original) space
CudaVolume _mios; /// Indicates where pred in scan space is valid
CudaVolume _jac; /// Jacobian in scan space
/// Calculates partial derivatives using finite differences and interpolation given by scan
void calculate_direct_derivatives(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, ParametersType whichp);
/// Calculates partial derivatives. Started out as experimental, but is now the method of choice
void calculate_mixed_derivatives(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, ParametersType whichp);
/// Calculate derivatives for long time-constant EC model.
void calculate_long_ec_derivatives(const CudaVolume& pred, const CudaVolume& pmask, const ECScan& scan, unsigned int scindx, const CudaVolume& susc, const LongECModel& lecm);
/// Caclulates field for model-to-scan transform given parameters in scan
void get_field(const EDDY::ECScan& scan,
const EDDY::CudaVolume& susc,
const EDDY::CudaVolume4D& infield,
EDDY::CudaVolume& mask,
EDDY::CudaVolume4D& field,
EDDY::CudaVolume& jac) const;
/// Transform from model to scan space, give a model-to-scan-field as input.
void transform_to_scan_space(const EDDY::CudaVolume& pred,
const EDDY::ECScan& scan,
const EDDY::CudaVolume4D& dfield,
EDDY::CudaVolume& oima,
EDDY::CudaVolume& omask) const;
/// Inverts the field.
void invert_field(const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
CudaVolume4D& ifield,
CudaVolume& omask) const;
/// Inverts the field, using the inifield for bracketing the new inverse
void invert_field(const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
const CudaVolume4D& inifield,
CudaVolume4D& ifield) const;
void voxel_2_mm_displacements(CudaVolume4D& field, unsigned int dir) const;
void mm_2_voxel_displacements(CudaVolume4D& field, unsigned int dir) const;
void get_slice_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SliceDerivModulator& sdm) const;
};
/****************************************************************//**
*
* \fn DerivativeCalculator::DerivativeCaclulator(CudaVolume& pred,
* CudaVolume& pmask,
* ECScan& scan,
* const CudaVolume& susc,
* Parameters whichp,
* bool fast=false)
* \brief Constructor for the DerivativeCalculator class
*
*
********************************************************************/
} // End namespace EDDY
#endif // End #ifndef DerivativeCalculator_h
// Dead code
/*
/// Calculates partial derivatives using finite differences and tri-linear interpolation
void calculate_direct_derivatives_fast(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, Parameters whichp);
/// Experimental
void calculate_direct_derivatives_very_fast(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, Parameters whichp);
/// Experimental
void calculate_modulated_derivatives(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, Parameters whichp);
/// Finds the two bracketing indicies for each point of the inverse field, and returns the lower index
void get_lower_bound_indicies(const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
thrust::device_vector<int>& lbindx,
CudaVolume& omask) const;
void get_spatially_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SpatialDerivModulator& sdm,
const CudaVolume& offset) const;
*/
/////////////////////////////////////////////////////////////////////
///
/// \file DerivativeCalculator.h
/// \brief Declarations of class used to calculate the derivatives of a prediction in scan space w.r.t. all parameters.
///
/// \author Jesper Andersson
/// \version 1.0b, Dec., 2019.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef DerivativeCalculator_h
#define DerivativeCalculator_h
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <cuda.h>
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <thrust/fill.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#pragma pop
#include "EddyHelperClasses.h"
#include "ECScanClasses.h"
#include "EddyKernels.h"
#include "CudaVolume.h"
#include "EddyInternalGpuUtils.h"
namespace EDDY {
enum class DerivType { Old, Mixed, LongEC };
/****************************************************************//**
*
* \brief Calculates derivatives for use in EddyInternalUtils::param_update
*
********************************************************************/
class DerivativeCalculator
{
public:
/// Constructor that calculates all the derivatives (movement and EC) in scan space
DerivativeCalculator(CudaVolume& pred, /// [in] Prediction in model space
CudaVolume& pmask, /// [in] Predefined mask in model space
ECScan& scan, /// [in] Scan
const CudaVolume& susc, /// [in] Susceptibility field
ParametersType whichp, /// [in] Specifies whis parameters to calculate derivatives for
float fwhm, /// [in] FWHM for optional smoothing of derivative images
DerivType dt=DerivType::Old) /// [in] Specify details of how to calculate derivatives
EddyTry : _dt(dt), _fwhm(fwhm), _whichp(whichp), _derivs(pred,scan.NDerivs(whichp),false), _dfield(pred,3,false), _pios(pred,false), _mios(pmask,false), _jac(pred,false)
{
if (dt == DerivType::Old) calculate_direct_derivatives(pred,pmask,scan,susc,whichp);
else if (dt == DerivType::Mixed) calculate_mixed_derivatives(pred,pmask,scan,susc,whichp);
else throw EDDY::EddyException("DerivativeCalculator::DerivativeCalculator: Unknown derivative type");
if (fwhm) _derivs.Smooth(fwhm,_mios);
} EddyCatch
/// Constructor that calculates the long EC derivatives in scan space
DerivativeCalculator(const CudaVolume& pred, /// [in] Prediction in model space
const CudaVolume& pmask, /// [in] Predefined mask in model space
const ECScan& scan, /// [in] Scan
unsigned int scindx, /// [in] Scan index, given ScanType::Any
const CudaVolume& susc, /// [in] Susceptibility field
const LongECModel& lecm, /// [in] Specifies whis parameters to calculate derivatives for
float fwhm) /// [in] FWHM for optional smoothing of derivative images
EddyTry : _dt(DerivType::LongEC), _fwhm(fwhm), _whichp(ParametersType::All), _derivs(pred,lecm.NDerivs(),false), _dfield(pred,3,false), _pios(pred,false), _mios(pmask,false), _jac(pred,false)
{
calculate_long_ec_derivatives(pred,pmask,scan,scindx,susc,lecm);
if (fwhm) _derivs.Smooth(fwhm,_mios);
} EddyCatch
/// Returns prediction in scan space
const CudaVolume& PredInScanSpace() const { return(_pios); }
/// Returns mask in scan space
const CudaVolume& MaskInScanSpace() const { return(_mios); }
/// Returns Jacobian determinant map in scan space
const CudaVolume& JacInScanSpace() const { return(_jac); }
/// Returns a const reference to the pre-calculated derivatives
const CudaVolume4D& Derivatives() const { return(_derivs); }
/// Returns a value indicating how the derivatives were calculated.
DerivType WhichDerivativeType() const { return(_dt); }
/// Writes derivatives as 4D nifti and other images as 3D niftis
void Write(const std::string& basename) const;
private:
DerivType _dt; /// Flag that indicates how derivatives were calculated
float _fwhm; /// FWHM of optional smoothing of derivative images
ParametersType _whichp; /// Specifies whis parameters to calculate derivatives for
CudaVolume4D _derivs; /// The partial derivative images
CudaVolume4D _dfield; /// The displacement field for (original) model->scan transformation
CudaVolume _pios; /// Prediction in scan (original) space
CudaVolume _mios; /// Indicates where pred in scan space is valid
CudaVolume _jac; /// Jacobian in scan space
/// Calculates partial derivatives using finite differences and interpolation given by scan
void calculate_direct_derivatives(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, ParametersType whichp);
/// Calculates partial derivatives. Started out as experimental, but is now the method of choice
void calculate_mixed_derivatives(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, ParametersType whichp);
/// Calculate derivatives for long time-constant EC model.
void calculate_long_ec_derivatives(const CudaVolume& pred, const CudaVolume& pmask, const ECScan& scan, unsigned int scindx, const CudaVolume& susc, const LongECModel& lecm);
/// Caclulates field for model-to-scan transform given parameters in scan
void get_field(const EDDY::ECScan& scan,
const EDDY::CudaVolume& susc,
const EDDY::CudaVolume4D& infield,
EDDY::CudaVolume& mask,
EDDY::CudaVolume4D& field,
EDDY::CudaVolume& jac) const;
/// Transform from model to scan space, give a model-to-scan-field as input.
void transform_to_scan_space(const EDDY::CudaVolume& pred,
const EDDY::ECScan& scan,
const EDDY::CudaVolume4D& dfield,
EDDY::CudaVolume& oima,
EDDY::CudaVolume& omask) const;
/// Inverts the field.
void invert_field(const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
CudaVolume4D& ifield,
CudaVolume& omask) const;
/// Inverts the field, using the inifield for bracketing the new inverse
void invert_field(const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
const CudaVolume4D& inifield,
CudaVolume4D& ifield) const;
void voxel_2_mm_displacements(CudaVolume4D& field, unsigned int dir) const;
void mm_2_voxel_displacements(CudaVolume4D& field, unsigned int dir) const;
void get_slice_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SliceDerivModulator& sdm) const;
};
/****************************************************************//**
*
* \fn DerivativeCalculator::DerivativeCaclulator(CudaVolume& pred,
* CudaVolume& pmask,
* ECScan& scan,
* const CudaVolume& susc,
* Parameters whichp,
* bool fast=false)
* \brief Constructor for the DerivativeCalculator class
*
*
********************************************************************/
} // End namespace EDDY
#endif // End #ifndef DerivativeCalculator_h
// Dead code
/*
/// Calculates partial derivatives using finite differences and tri-linear interpolation
void calculate_direct_derivatives_fast(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, Parameters whichp);
/// Experimental
void calculate_direct_derivatives_very_fast(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, Parameters whichp);
/// Experimental
void calculate_modulated_derivatives(CudaVolume& pred, CudaVolume& pmask, ECScan& scan, const CudaVolume& susc, Parameters whichp);
/// Finds the two bracketing indicies for each point of the inverse field, and returns the lower index
void get_lower_bound_indicies(const CudaVolume4D& field,
const EDDY::AcqPara& acqp,
const CudaVolume& inmask,
thrust::device_vector<int>& lbindx,
CudaVolume& omask) const;
void get_spatially_modulated_deriv(// Input/Output
CudaVolume4D& derivs,
// Input
const CudaVolume& mask,
unsigned int primi,
unsigned int scndi,
const SpatialDerivModulator& sdm,
const CudaVolume& offset) const;
*/
/*! \file DiffusionGP.cu
\brief Contains definitions for class for making Gaussian process based predictions about DWI data.
\author Jesper Andersson
\version 1.0b, Feb., 2013.
*/
// Definitions of class to make Gaussian-Process
// based predictions about diffusion data.
//
// DiffusionGP.cu
//
// Jesper Andersson, FMRIB Image Analysis Group
//
// Copyright (C) 2011 University of Oxford
//
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#pragma pop
#include "miscmaths/miscmaths.h"
#include "EddyHelperClasses.h"
#include "EddyUtils.h"
#include "DiffusionGP.h"
#include "CudaVolume.h"
using namespace EDDY;
/****************************************************************//**
* \brief Returns prediction for point given by indx
*
* Returns a predicted image for the given index, where index refers
* to the corresponding bvec (i.e. bvecs[index]).
* except that the noise variance is given as a parameter (rather than
* using that which is stored in the object).
* It should be noted that this implementation isn't very efficient as
* it reloads all image to the GPU. That means that if there are N images
* and we want to predict them all it takes N*N transfers to the GPU.
* \param index refers to the corresponding bvec (i.e. bvecs[index]).
* \param exclude Decides if indx itself should be used in the prediction
* (exclude=false) or not (exclude=true)
* \param pvec Prediction vector for index
* \param pi The "predicted image"
*
********************************************************************/
void DiffusionGP::predict_image_gpu(// Input
unsigned int indx,
bool exclude,
const NEWMAT::RowVector& pvec,
// Output
NEWIMAGE::volume<float>& pi) const EddyTry
{
if (!NEWIMAGE::samesize(pi,*_sptrs[0])) {
pi.reinitialize(_sptrs[0]->xsize(),_sptrs[0]->ysize(),_sptrs[0]->zsize());
NEWIMAGE::copybasicproperties(*_sptrs[0],pi);
}
EDDY::CudaVolume pcv(pi,false);
for (unsigned int s=0; s<_sptrs.size(); s++) {
// Next row shows what the function below does (a little faster)
// pcv += pvec(i+1) * EDDY::CudaVolume(*(_sptrs[s]));
if (exclude) {
if (s < indx) pcv.MultiplyAndAddToMe(EDDY::CudaVolume(*(_sptrs[s])),pvec(s+1));
// Do nothing if (s == indicies[i])
else if (s > indx) pcv.MultiplyAndAddToMe(EDDY::CudaVolume(*(_sptrs[s])),pvec(s));
}
else pcv.MultiplyAndAddToMe(EDDY::CudaVolume(*(_sptrs[s])),pvec(s+1));
}
pcv += EDDY::CudaVolume(*_mptrs[which_mean(indx)]);
pcv.GetVolume(pi);
return;
} EddyCatch
void DiffusionGP::predict_images_gpu(// Input
const std::vector<unsigned int>& indicies,
bool exclude,
const std::vector<NEWMAT::RowVector>& pvecs,
// Output
std::vector<NEWIMAGE::volume<float> >& pi) const EddyTry
{
if (indicies.size() != pvecs.size() || indicies.size() != pi.size()) {
throw EDDY::EddyException("DiffusionGP::predict_images_gpu: mismatch among indicies, pvecs and pi");
}
// Start by allocating space on GPU for all output images
std::vector<EDDY::CudaVolume> pcvs(indicies.size());
for (unsigned int i=0; i<indicies.size(); i++) {
if (!NEWIMAGE::samesize(pi[i],*_sptrs[0])) {
pi[i].reinitialize(_sptrs[0]->xsize(),_sptrs[0]->ysize(),_sptrs[0]->zsize());
NEWIMAGE::copybasicproperties(*_sptrs[0],pi[i]);
}
pcvs[i].SetHdr(pi[i]);
}
// Transfer all mean images to the GPU
std::vector<EDDY::CudaVolume> means(_mptrs.size());
for (unsigned int m=0; m<means.size(); m++) means[m] = *(_mptrs[m]);
// Do the GP predictions
for (unsigned int s=0; s<_sptrs.size(); s++) { // s index into original volumes
EDDY::CudaVolume cv = *(_sptrs[s]);
for (unsigned int i=0; i<indicies.size(); i++) { // i index into predictions
if (exclude) {
if (s < indicies[i]) pcvs[i].MultiplyAndAddToMe(cv,pvecs[i](s+1));
// Do nothing if (s == indicies[i])
else if (s > indicies[i]) pcvs[i].MultiplyAndAddToMe(cv,pvecs[i](s));
}
else pcvs[i].MultiplyAndAddToMe(cv,pvecs[i](s+1));
}
}
// Add means to predictions and transfer back from GPU
for (unsigned int i=0; i<indicies.size(); i++) {
pcvs[i] += means[which_mean(indicies[i])];
pcvs[i].GetVolume(pi[i]);
}
return;
} EddyCatch
/*! \file DiffusionGP.cu
\brief Contains definitions for class for making Gaussian process based predictions about DWI data.
\author Jesper Andersson
\version 1.0b, Feb., 2013.
*/
// Definitions of class to make Gaussian-Process
// based predictions about diffusion data.
//
// DiffusionGP.cu
//
// Jesper Andersson, FMRIB Image Analysis Group
//
// Copyright (C) 2011 University of Oxford
//
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#pragma pop
#include "miscmaths/miscmaths.h"
#include "EddyHelperClasses.h"
#include "EddyUtils.h"
#include "DiffusionGP.h"
#include "CudaVolume.h"
using namespace EDDY;
/****************************************************************//**
* \brief Returns prediction for point given by indx
*
* Returns a predicted image for the given index, where index refers
* to the corresponding bvec (i.e. bvecs[index]).
* except that the noise variance is given as a parameter (rather than
* using that which is stored in the object).
* It should be noted that this implementation isn't very efficient as
* it reloads all image to the GPU. That means that if there are N images
* and we want to predict them all it takes N*N transfers to the GPU.
* \param index refers to the corresponding bvec (i.e. bvecs[index]).
* \param exclude Decides if indx itself should be used in the prediction
* (exclude=false) or not (exclude=true)
* \param pvec Prediction vector for index
* \param pi The "predicted image"
*
********************************************************************/
void DiffusionGP::predict_image_gpu(// Input
unsigned int indx,
bool exclude,
const NEWMAT::RowVector& pvec,
// Output
NEWIMAGE::volume<float>& pi) const EddyTry
{
if (!NEWIMAGE::samesize(pi,*_sptrs[0])) {
pi.reinitialize(_sptrs[0]->xsize(),_sptrs[0]->ysize(),_sptrs[0]->zsize());
NEWIMAGE::copybasicproperties(*_sptrs[0],pi);
}
EDDY::CudaVolume pcv(pi,false);
for (unsigned int s=0; s<_sptrs.size(); s++) {
// Next row shows what the function below does (a little faster)
// pcv += pvec(i+1) * EDDY::CudaVolume(*(_sptrs[s]));
if (exclude) {
if (s < indx) pcv.MultiplyAndAddToMe(EDDY::CudaVolume(*(_sptrs[s])),pvec(s+1));
// Do nothing if (s == indicies[i])
else if (s > indx) pcv.MultiplyAndAddToMe(EDDY::CudaVolume(*(_sptrs[s])),pvec(s));
}
else pcv.MultiplyAndAddToMe(EDDY::CudaVolume(*(_sptrs[s])),pvec(s+1));
}
pcv += EDDY::CudaVolume(*_mptrs[which_mean(indx)]);
pcv.GetVolume(pi);
return;
} EddyCatch
void DiffusionGP::predict_images_gpu(// Input
const std::vector<unsigned int>& indicies,
bool exclude,
const std::vector<NEWMAT::RowVector>& pvecs,
// Output
std::vector<NEWIMAGE::volume<float> >& pi) const EddyTry
{
if (indicies.size() != pvecs.size() || indicies.size() != pi.size()) {
throw EDDY::EddyException("DiffusionGP::predict_images_gpu: mismatch among indicies, pvecs and pi");
}
// Start by allocating space on GPU for all output images
std::vector<EDDY::CudaVolume> pcvs(indicies.size());
for (unsigned int i=0; i<indicies.size(); i++) {
if (!NEWIMAGE::samesize(pi[i],*_sptrs[0])) {
pi[i].reinitialize(_sptrs[0]->xsize(),_sptrs[0]->ysize(),_sptrs[0]->zsize());
NEWIMAGE::copybasicproperties(*_sptrs[0],pi[i]);
}
pcvs[i].SetHdr(pi[i]);
}
// Transfer all mean images to the GPU
std::vector<EDDY::CudaVolume> means(_mptrs.size());
for (unsigned int m=0; m<means.size(); m++) means[m] = *(_mptrs[m]);
// Do the GP predictions
for (unsigned int s=0; s<_sptrs.size(); s++) { // s index into original volumes
EDDY::CudaVolume cv = *(_sptrs[s]);
for (unsigned int i=0; i<indicies.size(); i++) { // i index into predictions
if (exclude) {
if (s < indicies[i]) pcvs[i].MultiplyAndAddToMe(cv,pvecs[i](s+1));
// Do nothing if (s == indicies[i])
else if (s > indicies[i]) pcvs[i].MultiplyAndAddToMe(cv,pvecs[i](s));
}
else pcvs[i].MultiplyAndAddToMe(cv,pvecs[i](s+1));
}
}
// Add means to predictions and transfer back from GPU
for (unsigned int i=0; i<indicies.size(); i++) {
pcvs[i] += means[which_mean(indicies[i])];
pcvs[i].GetVolume(pi[i]);
}
return;
} EddyCatch
#include "hipblas.h"
#include <cstdlib>
#include <string>
#include <hip/hip_runtime.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#include "EddyHelperClasses.h"
#pragma pop
#include "EddyCudaHelperFunctions.h"
namespace EDDY {
std::string EddyCudaHelperFunctions::cudaError2String(const hipError_t& ce) EddyTry
{
std::ostringstream oss;
oss << "hipError_t = " << ce << ", cudaErrorName = " << hipGetErrorName(ce) << ", cudaErrorString = " << hipGetErrorString(ce);
return(oss.str());
} EddyCatch
void EddyCudaHelperFunctions::CudaSync(const std::string& msg) EddyTry
{
hipError_t err = hipDeviceSynchronize();
if (err!=hipSuccess) {
std::ostringstream os;
os << "EddyKernels::CudaSync: CUDA error after call to " << msg << ", " << EddyCudaHelperFunctions::cudaError2String(err);
throw EDDY::EddyException(os.str());
}
} EddyCatch
void EddyCudaHelperFunctions::InitGpu(bool verbose) EddyTry
{
static bool initialized=false;
if (!initialized) {
initialized=true;
int device;
hipError_t ce;
if ((ce = hipGetDevice(&device)) != hipSuccess) throw EddyException("EddyCudaHelperFunctions::InitGpu: hipGetDevice returned an error: " + EddyCudaHelperFunctions::cudaError2String(ce));
if (verbose) printf("\n...................Allocated GPU # %d...................\n", device);
int *q;
if ((ce = hipMalloc((void **)&q, sizeof(int))) != hipSuccess) {
throw EddyException("EddyCudaHelperFunctions::InitGpu: hipMalloc returned an error: " + EddyCudaHelperFunctions::cudaError2String(ce));
}
hipFree(q);
EddyCudaHelperFunctions::CudaSync("EddyGpuUtils::InitGpu");
}
} EddyCatch
std::string EddyCudaHelperFunctions::cuBLASGetErrorName(const hipblasStatus_t& cs) EddyTry
{
std::string rval;
switch (cs) {
case HIPBLAS_STATUS_SUCCESS:
rval = "HIPBLAS_STATUS_SUCCESS";
break;
case HIPBLAS_STATUS_NOT_INITIALIZED:
rval = "HIPBLAS_STATUS_NOT_INITIALIZED";
break;
case HIPBLAS_STATUS_ALLOC_FAILED:
rval = "HIPBLAS_STATUS_ALLOC_FAILED";
break;
case HIPBLAS_STATUS_INVALID_VALUE:
rval = "HIPBLAS_STATUS_INVALID_VALUE";
break;
case HIPBLAS_STATUS_ARCH_MISMATCH:
rval = "HIPBLAS_STATUS_ARCH_MISMATCH";
break;
case HIPBLAS_STATUS_MAPPING_ERROR:
rval = "HIPBLAS_STATUS_MAPPING_ERROR";
break;
case HIPBLAS_STATUS_EXECUTION_FAILED:
rval = "HIPBLAS_STATUS_EXECUTION_FAILED";
break;
case HIPBLAS_STATUS_INTERNAL_ERROR:
rval = "HIPBLAS_STATUS_INTERNAL_ERROR";
break;
case HIPBLAS_STATUS_NOT_SUPPORTED:
rval = "HIPBLAS_STATUS_NOT_SUPPORTED";
break;
case HIPBLAS_STATUS_UNKNOWN:
rval = "HIPBLAS_STATUS_UNKNOWN";
break;
default:
rval = "Unkown CUBLAS status code";
}
return(rval);
} EddyCatch
std::string EddyCudaHelperFunctions::cuBLASGetErrorString(const hipblasStatus_t& cs) EddyTry
{
std::string rval;
switch (cs) {
case HIPBLAS_STATUS_SUCCESS:
rval = "The operation completed successfully";
break;
case HIPBLAS_STATUS_NOT_INITIALIZED:
rval = "The cuBLAS library was not initialized. This is usually caused by the lack of a prior hipblasCreate() call, an error in the CUDA Runtime API called by the cuBLAS routine, or an error in the hardware setup.\n\nTo correct: call hipblasCreate() prior to the function call; and check that the hardware, an appropriate version of the driver, and the cuBLAS library are correctly installed.";
break;
case HIPBLAS_STATUS_ALLOC_FAILED:
rval = "Resource allocation failed inside the cuBLAS library. This is usually caused by a hipMalloc() failure.\n\nTo correct: prior to the function call, deallocate previously allocated memory as much as possible.";
break;
case HIPBLAS_STATUS_INVALID_VALUE:
rval = "An unsupported value or parameter was passed to the function (a negative vector size, for example).\n\nTo correct: ensure that all the parameters being passed have valid values.";
break;
case HIPBLAS_STATUS_ARCH_MISMATCH:
rval = "The function requires a feature absent from the device architecture; usually caused by the lack of support for double precision.\n\nTo correct: compile and run the application on a device with appropriate compute capability, which is 1.3 for double precision.";
break;
case HIPBLAS_STATUS_MAPPING_ERROR:
rval = "An access to GPU memory space failed, which is usually caused by a failure to bind a texture.\n\nTo correct: prior to the function call, unbind any previously bound textures.";
break;
case HIPBLAS_STATUS_EXECUTION_FAILED:
rval = "The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.\n\nTo correct: check that the hardware, an appropriate version of the driver, and the cuBLAS library are correctly installed.";
break;
case HIPBLAS_STATUS_INTERNAL_ERROR:
rval = "An internal cuBLAS operation failed. This error is usually caused by a hipMemcpyAsync() failure.\n\nTo correct: check that the hardware, an appropriate version of the driver, and the cuBLAS library are correctly installed. Also, check that the memory passed as a parameter to the routine is not being deallocated prior to the routine’s completion.";
break;
case HIPBLAS_STATUS_NOT_SUPPORTED:
rval = "The functionality requested is not supported";
break;
case HIPBLAS_STATUS_UNKNOWN:
rval = "The functionality requested requires some license and an error was detected when trying to check the current licensing. This error can happen if the license is not present or is expired or if the environment variable NVIDIA_LICENSE_FILE is not set properly.";
break;
default:
rval = "An unknown hipblasStatus_t values was encountered";
}
return(rval);
} EddyCatch
std::string EddyCudaHelperFunctions::cublasError2String(const hipblasStatus_t& ce) EddyTry
{
std::ostringstream oss;
oss << "hipblasStatus_t = " << ce << ", cublasErrorName = " << cuBLASGetErrorName(ce) << "," << std::endl << "cublasErrorString = " << cuBLASGetErrorString(ce);
return(oss.str());
} EddyCatch
} // End namespace EDDY
#include "cublas_v2.h"
#include <cstdlib>
#include <string>
#include <cuda.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#include "EddyHelperClasses.h"
#pragma pop
#include "EddyCudaHelperFunctions.h"
namespace EDDY {
std::string EddyCudaHelperFunctions::cudaError2String(const cudaError_t& ce) EddyTry
{
std::ostringstream oss;
oss << "cudaError_t = " << ce << ", cudaErrorName = " << cudaGetErrorName(ce) << ", cudaErrorString = " << cudaGetErrorString(ce);
return(oss.str());
} EddyCatch
void EddyCudaHelperFunctions::CudaSync(const std::string& msg) EddyTry
{
cudaError_t err = cudaDeviceSynchronize();
if (err!=cudaSuccess) {
std::ostringstream os;
os << "EddyKernels::CudaSync: CUDA error after call to " << msg << ", " << EddyCudaHelperFunctions::cudaError2String(err);
throw EDDY::EddyException(os.str());
}
} EddyCatch
void EddyCudaHelperFunctions::InitGpu(bool verbose) EddyTry
{
static bool initialized=false;
if (!initialized) {
initialized=true;
int device;
cudaError_t ce;
if ((ce = cudaGetDevice(&device)) != cudaSuccess) throw EddyException("EddyCudaHelperFunctions::InitGpu: cudaGetDevice returned an error: " + EddyCudaHelperFunctions::cudaError2String(ce));
if (verbose) printf("\n...................Allocated GPU # %d...................\n", device);
int *q;
if ((ce = cudaMalloc((void **)&q, sizeof(int))) != cudaSuccess) {
throw EddyException("EddyCudaHelperFunctions::InitGpu: cudaMalloc returned an error: " + EddyCudaHelperFunctions::cudaError2String(ce));
}
cudaFree(q);
EddyCudaHelperFunctions::CudaSync("EddyGpuUtils::InitGpu");
}
} EddyCatch
std::string EddyCudaHelperFunctions::cuBLASGetErrorName(const cublasStatus_t& cs) EddyTry
{
std::string rval;
switch (cs) {
case CUBLAS_STATUS_SUCCESS:
rval = "CUBLAS_STATUS_SUCCESS";
break;
case CUBLAS_STATUS_NOT_INITIALIZED:
rval = "CUBLAS_STATUS_NOT_INITIALIZED";
break;
case CUBLAS_STATUS_ALLOC_FAILED:
rval = "CUBLAS_STATUS_ALLOC_FAILED";
break;
case CUBLAS_STATUS_INVALID_VALUE:
rval = "CUBLAS_STATUS_INVALID_VALUE";
break;
case CUBLAS_STATUS_ARCH_MISMATCH:
rval = "CUBLAS_STATUS_ARCH_MISMATCH";
break;
case CUBLAS_STATUS_MAPPING_ERROR:
rval = "CUBLAS_STATUS_MAPPING_ERROR";
break;
case CUBLAS_STATUS_EXECUTION_FAILED:
rval = "CUBLAS_STATUS_EXECUTION_FAILED";
break;
case CUBLAS_STATUS_INTERNAL_ERROR:
rval = "CUBLAS_STATUS_INTERNAL_ERROR";
break;
case CUBLAS_STATUS_NOT_SUPPORTED:
rval = "CUBLAS_STATUS_NOT_SUPPORTED";
break;
case CUBLAS_STATUS_LICENSE_ERROR:
rval = "CUBLAS_STATUS_LICENSE_ERROR";
break;
default:
rval = "Unkown CUBLAS status code";
}
return(rval);
} EddyCatch
std::string EddyCudaHelperFunctions::cuBLASGetErrorString(const cublasStatus_t& cs) EddyTry
{
std::string rval;
switch (cs) {
case CUBLAS_STATUS_SUCCESS:
rval = "The operation completed successfully";
break;
case CUBLAS_STATUS_NOT_INITIALIZED:
rval = "The cuBLAS library was not initialized. This is usually caused by the lack of a prior cublasCreate() call, an error in the CUDA Runtime API called by the cuBLAS routine, or an error in the hardware setup.\n\nTo correct: call cublasCreate() prior to the function call; and check that the hardware, an appropriate version of the driver, and the cuBLAS library are correctly installed.";
break;
case CUBLAS_STATUS_ALLOC_FAILED:
rval = "Resource allocation failed inside the cuBLAS library. This is usually caused by a cudaMalloc() failure.\n\nTo correct: prior to the function call, deallocate previously allocated memory as much as possible.";
break;
case CUBLAS_STATUS_INVALID_VALUE:
rval = "An unsupported value or parameter was passed to the function (a negative vector size, for example).\n\nTo correct: ensure that all the parameters being passed have valid values.";
break;
case CUBLAS_STATUS_ARCH_MISMATCH:
rval = "The function requires a feature absent from the device architecture; usually caused by the lack of support for double precision.\n\nTo correct: compile and run the application on a device with appropriate compute capability, which is 1.3 for double precision.";
break;
case CUBLAS_STATUS_MAPPING_ERROR:
rval = "An access to GPU memory space failed, which is usually caused by a failure to bind a texture.\n\nTo correct: prior to the function call, unbind any previously bound textures.";
break;
case CUBLAS_STATUS_EXECUTION_FAILED:
rval = "The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.\n\nTo correct: check that the hardware, an appropriate version of the driver, and the cuBLAS library are correctly installed.";
break;
case CUBLAS_STATUS_INTERNAL_ERROR:
rval = "An internal cuBLAS operation failed. This error is usually caused by a cudaMemcpyAsync() failure.\n\nTo correct: check that the hardware, an appropriate version of the driver, and the cuBLAS library are correctly installed. Also, check that the memory passed as a parameter to the routine is not being deallocated prior to the routine’s completion.";
break;
case CUBLAS_STATUS_NOT_SUPPORTED:
rval = "The functionality requested is not supported";
break;
case CUBLAS_STATUS_LICENSE_ERROR:
rval = "The functionality requested requires some license and an error was detected when trying to check the current licensing. This error can happen if the license is not present or is expired or if the environment variable NVIDIA_LICENSE_FILE is not set properly.";
break;
default:
rval = "An unknown cublasStatus_t values was encountered";
}
return(rval);
} EddyCatch
std::string EddyCudaHelperFunctions::cublasError2String(const cublasStatus_t& ce) EddyTry
{
std::ostringstream oss;
oss << "cublasStatus_t = " << ce << ", cublasErrorName = " << cuBLASGetErrorName(ce) << "," << std::endl << "cublasErrorString = " << cuBLASGetErrorString(ce);
return(oss.str());
} EddyCatch
} // End namespace EDDY
/////////////////////////////////////////////////////////////////////
///
/// \file EddyCudaHelperFunctions.h
/// \brief Declarations of some low level helper functions for eddy
/// CUDA code.
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2020.
/// \Copyright (C) 2020 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef EddyCudaHelperFunctions_h
#define EddyCudaHelperFunctions_h
#include "hipblas.h"
#include <string>
#include <sstream>
#include <hip/hip_runtime.h>
namespace EDDY {
/////////////////////////////////////////////////////////////////////
///
/// \brief This class contains a set of static methods that implement
/// various CUDA utility functions for the eddy project.
///
/////////////////////////////////////////////////////////////////////
class EddyCudaHelperFunctions
{
public:
/// Does a little song and dance to initialize GPU
static void InitGpu(bool verbose=true);
/// Returns a formatted string with info about a hipError_t code
static std::string cudaError2String(const hipError_t& ce);
/// Waits for GPU to finish and checks error status
static void CudaSync(const std::string& msg);
/// Returns name of error associated with cs
static std::string cuBLASGetErrorName(const hipblasStatus_t& cs);
/// Returns explanatory string for error associated with cs
static std::string cuBLASGetErrorString(const hipblasStatus_t& cs);
/// Returns a formatted string with info about a hipblasStatus_t code
static std::string cublasError2String(const hipblasStatus_t& ce);
};
} // End namespace EDDY
#endif // End #ifndef EddyCudaHelperFunctions_h
/////////////////////////////////////////////////////////////////////
///
/// \file EddyCudaHelperFunctions.h
/// \brief Declarations of some low level helper functions for eddy
/// CUDA code.
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2020.
/// \Copyright (C) 2020 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef EddyCudaHelperFunctions_h
#define EddyCudaHelperFunctions_h
#include "cublas_v2.h"
#include <string>
#include <sstream>
#include <cuda.h>
namespace EDDY {
/////////////////////////////////////////////////////////////////////
///
/// \brief This class contains a set of static methods that implement
/// various CUDA utility functions for the eddy project.
///
/////////////////////////////////////////////////////////////////////
class EddyCudaHelperFunctions
{
public:
/// Does a little song and dance to initialize GPU
static void InitGpu(bool verbose=true);
/// Returns a formatted string with info about a cudaError_t code
static std::string cudaError2String(const cudaError_t& ce);
/// Waits for GPU to finish and checks error status
static void CudaSync(const std::string& msg);
/// Returns name of error associated with cs
static std::string cuBLASGetErrorName(const cublasStatus_t& cs);
/// Returns explanatory string for error associated with cs
static std::string cuBLASGetErrorString(const cublasStatus_t& cs);
/// Returns a formatted string with info about a cublasStatus_t code
static std::string cublasError2String(const cublasStatus_t& ce);
};
} // End namespace EDDY
#endif // End #ifndef EddyCudaHelperFunctions_h
/////////////////////////////////////////////////////////////////////
///
/// \file EddyFunctors.h
/// \brief Declarations of functors that I use for the CUDA implementation of Eddy
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef EddyFunctors_h
#define EddyFunctors_h
#include <hip/hip_runtime.h>
#include <thrust/functional.h>
#include <thrust/random.h>
namespace EDDY {
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor classes for binarising vectors.
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class Binarise : public thrust::unary_function<T,T>
{
public:
Binarise(const T& thr) : _ll(thr), _ul(std::numeric_limits<T>::max()) {}
Binarise(const T& ll, const T& ul) : _ll(ll), _ul(ul) {}
__host__ __device__ T operator()(const T& x) const { return(static_cast<T>(x > _ll && x < _ul)); }
private:
const T _ll;
const T _ul;
Binarise() : _ll(static_cast<T>(0)), _ul(static_cast<T>(0)) {} // Hidden
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class for generating normal distributed random numbers
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class MakeNormRand : public thrust::unary_function<unsigned int,T>
{
public:
MakeNormRand(const T& mu, const T& sigma) : _mu(mu), _sigma(sigma) {}
__host__ __device__ T operator()(const unsigned int n) const {
thrust::default_random_engine rng;
thrust::normal_distribution<T> dist(_mu,_sigma);
rng.discard(n);
return(dist(rng));
}
private:
const T _mu;
const T _sigma;
MakeNormRand() : _mu(static_cast<T>(0)), _sigma(static_cast<T>(1)) {} // Hidden
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class for multiplying vector with a scalar.
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class MulByScalar : public thrust::unary_function<T,T>
{
public:
MulByScalar(const T& scalar) : _scalar(scalar) {}
__host__ __device__ T operator()(const T& x) const { return(_scalar * x); }
private:
const T _scalar;
MulByScalar() : _scalar(static_cast<T>(1)) {} // Hidden
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1*arg1*arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class MaskedSquare : public thrust::binary_function<T1,T1,T2>
{
public:
MaskedSquare() {}
__host__ __device__ T2 operator()(const T1& arg1, const T1& arg2) const { return(static_cast<T2>(arg1*arg1*arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1+arg2*arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class SumSquare : public thrust::binary_function<T2,T1,T2>
{
public:
SumSquare() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const { return(arg1 + static_cast<T2>(arg2*arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1*arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class Product : public thrust::binary_function<T1,T1,T2>
{
public:
Product() {}
__host__ __device__ T2 operator()(const T1& arg1, const T1& arg2) const { return(static_cast<T2>(arg1*arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1 + arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class Sum : public thrust::binary_function<T2,T1,T2>
{
public:
Sum() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const { return(arg1 + static_cast<T2>(arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class returning the largest value of arg1 and arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class Max : public thrust::binary_function<T2,T1,T2>
{
public:
Max() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const {
return((static_cast<double>(arg1) > arg2) ? static_cast<double>(arg1) : arg2);
}
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class returning the largest value of abs(arg1) and abs(arg2).
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class MaxAbs : public thrust::binary_function<T2,T1,T2>
{
public:
MaxAbs() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const {
return((fabs(static_cast<double>(arg1)) > fabs(arg2)) ? fabs(static_cast<double>(arg1)) : fabs(arg2));
}
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1 + scalar*arg2. (SAXPY)
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class MulAndAdd : public thrust::binary_function<T,T,T>
{
public:
MulAndAdd(const T& scalar) : _scalar(scalar) {}
__host__ __device__ T operator()(const T& arg1, const T& arg2) const { return(arg1 + _scalar*arg2); }
private:
const T _scalar;
MulAndAdd() : _scalar(static_cast<T>(1)) {} // Hidden
};
} // End namespace EDDY
#endif // End #ifndef EddyFunctors_h
/////////////////////////////////////////////////////////////////////
///
/// \file EddyFunctors.h
/// \brief Declarations of functors that I use for the CUDA implementation of Eddy
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef EddyFunctors_h
#define EddyFunctors_h
#include <cuda.h>
#include <thrust/functional.h>
#include <thrust/random.h>
namespace EDDY {
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor classes for binarising vectors.
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class Binarise : public thrust::unary_function<T,T>
{
public:
Binarise(const T& thr) : _ll(thr), _ul(std::numeric_limits<T>::max()) {}
Binarise(const T& ll, const T& ul) : _ll(ll), _ul(ul) {}
__host__ __device__ T operator()(const T& x) const { return(static_cast<T>(x > _ll && x < _ul)); }
private:
const T _ll;
const T _ul;
Binarise() : _ll(static_cast<T>(0)), _ul(static_cast<T>(0)) {} // Hidden
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class for generating normal distributed random numbers
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class MakeNormRand : public thrust::unary_function<unsigned int,T>
{
public:
MakeNormRand(const T& mu, const T& sigma) : _mu(mu), _sigma(sigma) {}
__host__ __device__ T operator()(const unsigned int n) const {
thrust::default_random_engine rng;
thrust::normal_distribution<T> dist(_mu,_sigma);
rng.discard(n);
return(dist(rng));
}
private:
const T _mu;
const T _sigma;
MakeNormRand() : _mu(static_cast<T>(0)), _sigma(static_cast<T>(1)) {} // Hidden
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class for multiplying vector with a scalar.
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class MulByScalar : public thrust::unary_function<T,T>
{
public:
MulByScalar(const T& scalar) : _scalar(scalar) {}
__host__ __device__ T operator()(const T& x) const { return(_scalar * x); }
private:
const T _scalar;
MulByScalar() : _scalar(static_cast<T>(1)) {} // Hidden
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1*arg1*arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class MaskedSquare : public thrust::binary_function<T1,T1,T2>
{
public:
MaskedSquare() {}
__host__ __device__ T2 operator()(const T1& arg1, const T1& arg2) const { return(static_cast<T2>(arg1*arg1*arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1+arg2*arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class SumSquare : public thrust::binary_function<T2,T1,T2>
{
public:
SumSquare() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const { return(arg1 + static_cast<T2>(arg2*arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1*arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class Product : public thrust::binary_function<T1,T1,T2>
{
public:
Product() {}
__host__ __device__ T2 operator()(const T1& arg1, const T1& arg2) const { return(static_cast<T2>(arg1*arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1 + arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class Sum : public thrust::binary_function<T2,T1,T2>
{
public:
Sum() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const { return(arg1 + static_cast<T2>(arg2)); }
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class returning the largest value of arg1 and arg2.
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class Max : public thrust::binary_function<T2,T1,T2>
{
public:
Max() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const {
return((static_cast<double>(arg1) > arg2) ? static_cast<double>(arg1) : arg2);
}
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class returning the largest value of abs(arg1) and abs(arg2).
///
/////////////////////////////////////////////////////////////////////
template<typename T1, typename T2>
class MaxAbs : public thrust::binary_function<T2,T1,T2>
{
public:
MaxAbs() {}
__host__ __device__ T2 operator()(const T2& arg1, const T1& arg2) const {
return((fabs(static_cast<double>(arg1)) > fabs(arg2)) ? fabs(static_cast<double>(arg1)) : fabs(arg2));
}
};
/////////////////////////////////////////////////////////////////////
///
/// \brief Functor class performing arg1 + scalar*arg2. (SAXPY)
///
/////////////////////////////////////////////////////////////////////
template<typename T>
class MulAndAdd : public thrust::binary_function<T,T,T>
{
public:
MulAndAdd(const T& scalar) : _scalar(scalar) {}
__host__ __device__ T operator()(const T& arg1, const T& arg2) const { return(arg1 + _scalar*arg2); }
private:
const T _scalar;
MulAndAdd() : _scalar(static_cast<T>(1)) {} // Hidden
};
} // End namespace EDDY
#endif // End #ifndef EddyFunctors_h
/////////////////////////////////////////////////////////////////////
///
/// \file EddyGpuUtils.cu
/// \brief Definitions of static class with collection of GPU routines used in the eddy project
///
/// \author Jesper Andersson & Moises Hernandez
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
// Because of a bug in cuda_fp16.hpp, that gets included by hipblas.h, it has to
// be included before any include files that set up anything related to the std-lib.
// If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the
// old-style C isinf or the new (since C++11) std::isinf.
#include "hipblas.h"
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <hip/hip_runtime.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#pragma pop
#include "miscmaths/miscmaths.h"
#include "EddyCudaHelperFunctions.h"
#include "EddyInternalGpuUtils.h"
#include "EddyHelperClasses.h"
#include "DiffusionGP.h"
#include "b0Predictor.h"
#include "ECScanClasses.h"
#include "EddyUtils.h"
#include "EddyGpuUtils.h"
#include "EddyKernels.h"
using namespace EDDY;
std::shared_ptr<DWIPredictionMaker> EddyGpuUtils::LoadPredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
unsigned int iter,
float fwhm,
// Output
NEWIMAGE::volume<float>& mask,
// Optional input
bool use_orig) EddyTry
{
std::shared_ptr<DWIPredictionMaker> pmp; // Prediction Maker Pointer
if (st==ScanType::DWI) { // If diffusion weighted data
std::shared_ptr<KMatrix> K;
if (clo.CovarianceFunction() == CovarianceFunctionType::Spherical) K = std::shared_ptr<SphericalKMatrix>(new SphericalKMatrix(clo.DontCheckShelling()));
else if (clo.CovarianceFunction() == CovarianceFunctionType::Exponential) K = std::shared_ptr<ExponentialKMatrix>(new ExponentialKMatrix(clo.DontCheckShelling()));
else if (clo.CovarianceFunction() == CovarianceFunctionType::NewSpherical) K = std::shared_ptr<NewSphericalKMatrix>(new NewSphericalKMatrix(clo.DontCheckShelling()));
else throw EddyException("LoadPredictionMaker: Unknown covariance function");
std::shared_ptr<HyParCF> hpcf;
std::shared_ptr<HyParEstimator> hpe;
if (clo.HyperParFixed()) hpe = std::shared_ptr<FixedValueHyParEstimator>(new FixedValueHyParEstimator(clo.HyperParValues()));
else {
if (clo.HyParCostFunction() == HyParCostFunctionType::CC) hpe = std::shared_ptr<CheapAndCheerfulHyParEstimator>(new CheapAndCheerfulHyParEstimator(clo.NVoxHp(),clo.InitRand()));
else {
if (clo.HyParCostFunction() == HyParCostFunctionType::MML) hpcf = std::shared_ptr<MMLHyParCF>(new MMLHyParCF);
else if (clo.HyParCostFunction() == HyParCostFunctionType::CV) hpcf = std::shared_ptr<CVHyParCF>(new CVHyParCF);
else if (clo.HyParCostFunction() == HyParCostFunctionType::GPP) hpcf = std::shared_ptr<GPPHyParCF>(new GPPHyParCF);
else throw EddyException("LoadPredictionMaker: Unknown hyperparameter cost-function");
hpe = std::shared_ptr<FullMontyHyParEstimator>(new FullMontyHyParEstimator(hpcf,clo.HyParFudgeFactor(),clo.NVoxHp(),clo.InitRand(),clo.VeryVerbose()));
}
}
pmp = std::shared_ptr<DWIPredictionMaker>(new DiffusionGP(K,hpe)); // GP
}
else pmp = std::shared_ptr<DWIPredictionMaker>(new b0Predictor); // Silly mean predictor
pmp->SetNoOfScans(sm.NScans(st));
mask = sm.Scan(0,ScanType::Any).GetIma(); EddyUtils::SetTrilinearInterp(mask); mask = 1.0;
EddyCudaHelperFunctions::InitGpu();
EddyInternalGpuUtils::load_prediction_maker(clo,st,sm,iter,fwhm,use_orig,pmp,mask);
return(pmp);
} EddyCatch
void EddyGpuUtils::MakeScatterBrainPredictions(// Input
const EddyCommandLineOptions& clo,
const ECScanManager& sm,
const std::vector<double>& hypar,
// Output
NEWIMAGE::volume4D<float>& pred,
// Optional input
bool vwbvrot) EddyTry
{
EddyInternalGpuUtils::make_scatter_brain_predictions(clo,sm,hypar,pred,vwbvrot);
} EddyCatch
/*
void EddyGpuUtils::UpdatePredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
const ReplacementManager& rm,
const NEWIMAGE::volume<float>& mask,
// Input/Output
std::shared_ptr<DWIPredictionMaker> pmp)
{
EddyCudaHelperFunctions::InitGpu();
EddyInternalGpuUtils::update_prediction_maker(clo,st,sm,rm,mask,pmp);
return;
}
*/
NEWIMAGE::volume<float> EddyGpuUtils::GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask) EddyTry
{
EDDY::CudaVolume cuda_susc;
if (susc) cuda_susc = *susc;
EDDY::CudaVolume cuda_bias;
if (bias) cuda_bias = *bias;
EDDY::CudaVolume empty;
EDDY::CudaVolume uwscan(scan.GetIma(),false);
if (omask) {
EDDY::CudaVolume tmpmask(*omask,false);
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,empty,true,use_orig,uwscan,tmpmask);
*omask = tmpmask.GetVolume();
}
else {
EDDY::CudaVolume tmpmask;
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,empty,true,use_orig,uwscan,tmpmask);
}
return(uwscan.GetVolume());
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pred,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask) EddyTry
{
if (!scan.IsSliceToVol()) {
std::cout << "EddyGpuUtils::GetUnwarpedScan: Warning, it does not make sense to supply pred for volumetric resampling" << std::endl;
}
if (scan.GetPolation().GetS2VInterp() != NEWIMAGE::spline) {
throw EddyException("EddyGpuUtils::GetUnwarpedScan: use of prediction cannot be combined with trilinear interpolation");
}
EDDY::CudaVolume cuda_susc;
if (susc) cuda_susc = *susc;
EDDY::CudaVolume cuda_bias;
if (bias) cuda_bias = *bias;
EDDY::CudaVolume uwscan(scan.GetIma(),false);
EDDY::CudaVolume cuda_pred = pred;
if (omask) {
EDDY::CudaVolume tmpmask(*omask,false);
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,cuda_pred,true,use_orig,uwscan,tmpmask);
*omask = tmpmask.GetVolume();
}
else {
EDDY::CudaVolume tmpmask;
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,cuda_pred,true,use_orig,uwscan,tmpmask);
}
return(uwscan.GetVolume());
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::GetVolumetricUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask,
NEWIMAGE::volume4D<float> *deriv) EddyTry
{
EDDY::CudaVolume cuda_susc;
if (susc) cuda_susc = *susc;
EDDY::CudaVolume cuda_bias;
if (bias) cuda_bias = *bias;
EDDY::CudaVolume empty;
EDDY::CudaVolume uwscan(scan.GetIma(),false);
if (omask && deriv) {
EDDY::CudaVolume tmpmask(*omask,false);
EDDY::CudaVolume4D tmpderiv(*deriv,false);
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
*omask = tmpmask.GetVolume();
*deriv = tmpderiv.GetVolume();
}
else if (omask) {
EDDY::CudaVolume tmpmask(*omask,false);
EDDY::CudaVolume4D tmpderiv;
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
*omask = tmpmask.GetVolume();
}
else if (deriv) {
EDDY::CudaVolume tmpmask;
EDDY::CudaVolume4D tmpderiv(*deriv,false);
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
*deriv = tmpderiv.GetVolume();
}
else {
EDDY::CudaVolume tmpmask;
EDDY::CudaVolume4D tmpderiv;
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
}
return(uwscan.GetVolume());
} EddyCatch
arma::mat EddyGpuUtils::GetMovementRMS(const ECScanManager& sm,
ScanType st,
bool restricted) EddyTry
{
arma::mat rms(sm.NScans(st),2);
EDDY::CudaVolume mask = sm.Mask();
EDDY::CudaVolume4D mov_field;
EDDY::CudaVolume4D prev_mov_field;
for (unsigned int s=0; s<sm.NScans(st); s++) {
if (s) prev_mov_field = mov_field;
EddyInternalGpuUtils::MovementDisplacementToModelSpace(sm.Scan(s,st),restricted,mov_field);
EDDY::CudaVolume4D sqr_mov_field = mov_field * mov_field;
EDDY::CudaVolume sqr_norm = sqr_mov_field.SumAlongFourthDim();
double ms = sqr_norm.Sum(mask) / mask.Sum();
rms(s,0) = std::sqrt(ms);
if (s) {
EDDY::CudaVolume4D delta_field = mov_field - prev_mov_field;
delta_field *= delta_field; // Is now squared delta field
sqr_norm = delta_field.SumAlongFourthDim();
ms = sqr_norm.Sum(mask) / mask.Sum();
rms(s,1) = std::sqrt(ms);
}
else rms(s,1) = 0.0;
}
return(rms);
} EddyCatch
void EddyGpuUtils::GetMotionCorrectedScan(// Input
const EDDY::ECScan& scan,
bool use_orig,
// Output
NEWIMAGE::volume<float>& ovol,
// Optional output
NEWIMAGE::volume<float> *omask) EddyTry
{
EDDY::CudaVolume covol(scan.GetIma(),false);
EDDY::CudaVolume comask;
if (omask) {comask.SetHdr(covol); comask = 1.0; }
EddyInternalGpuUtils::get_motion_corrected_scan(scan,use_orig,covol,comask);
ovol = covol.GetVolume();
if (omask) *omask = comask.GetVolume();
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::TransformModelToScanSpace(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
bool jacmod) EddyTry
{
EDDY::CudaVolume mima_gpu = mima;
EDDY::CudaVolume susc_gpu;
if (susc != nullptr) susc_gpu = *susc;
EDDY::CudaVolume mios(mima,false);
EDDY::CudaVolume mask(mima,false); mask=1.0;
EDDY::CudaVolume jac(mima,false);
EDDY::CudaVolume4D skrutt4D;
// cout << "Calling EddyInternalGpuUtils::transform_model_to_scan_space" << endl;
EddyInternalGpuUtils::transform_model_to_scan_space(mima_gpu,scan,susc_gpu,jacmod,mios,mask,jac,skrutt4D);
// cout << "Returning from EddyInternalGpuUtils::transform_model_to_scan_space" << endl;
return(mios.GetVolume());
} EddyCatch
NEWIMAGE::volume4D<float> EddyGpuUtils::DerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc) EddyTry
{
EDDY::CudaVolume mima_gpu = mima;
EDDY::CudaVolume susc_gpu;
if (susc != nullptr) susc_gpu = *susc;
EDDY::CudaVolume4D derivs(mima,scan.NDerivs(),false);
EddyInternalGpuUtils::get_partial_derivatives_in_scan_space(mima_gpu,scan,susc_gpu,ParametersType::All,derivs);
return(derivs.GetVolume());
} EddyCatch
NEWIMAGE::volume4D<float> EddyGpuUtils::DirectDerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc) EddyTry
{
EDDY::CudaVolume mima_gpu = mima;
EDDY::CudaVolume susc_gpu;
if (susc != nullptr) susc_gpu = *susc;
EDDY::CudaVolume4D derivs(mima,scan.NDerivs(),false);
EddyInternalGpuUtils::get_direct_partial_derivatives_in_scan_space(mima_gpu,scan,susc_gpu,ParametersType::All,derivs);
return(derivs.GetVolume());
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::Smooth(const NEWIMAGE::volume<float>& ima,
float fwhm) EddyTry
{
EDDY::CudaVolume cuda_ima(ima,true);
cuda_ima.Smooth(fwhm);
return(cuda_ima.GetVolume());
} EddyCatch
DiffStatsVector EddyGpuUtils::DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// Input/Output
ReplacementManager& rm) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
DiffStatsVector dsv(sm.NScans(st));
EddyInternalGpuUtils::detect_outliers(clo,st,pmp,mask,sm,0,0,rm,dsv);
return(dsv);
} EddyCatch
DiffStatsVector EddyGpuUtils::DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// Input for debugging purposes only
unsigned int iter,
unsigned int level,
// Input/Output
ReplacementManager& rm) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
DiffStatsVector dsv(sm.NScans(st));
EddyInternalGpuUtils::detect_outliers(clo,st,pmp,mask,sm,iter,level,rm,dsv);
return(dsv);
} EddyCatch
void EddyGpuUtils::ReplaceOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ReplacementManager& rm,
bool add_noise,
// Input/Output
ECScanManager& sm) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
EddyInternalGpuUtils::replace_outliers(clo,st,pmp,mask,rm,add_noise,sm);
} EddyCatch
double EddyGpuUtils::MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
unsigned int scindex,
// Input/output
EDDY::ECScan& scan) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::param_update(pred,susc,bias,pmask,ParametersType::All,fwhm,very_verbose,scindex,0,0,scan,NULL));
} EddyCatch
double EddyGpuUtils::MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
// These inputs are for debug purposes only
unsigned int scindex,
unsigned int iter,
unsigned int level,
// Input/output
EDDY::ECScan& scan) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::param_update(pred,susc,bias,pmask,ParametersType::All,fwhm,very_verbose,scindex,iter,level,scan,NULL));
} EddyCatch
std::vector<double> EddyGpuUtils::LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// Input/output
EDDY::ECScanManager& sm) EddyTry // Scans we want to register to predictions
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::long_ec_update(pred,pmask,fwhm,very_verbose,0,0,std::vector<unsigned int>(0),sm));
} EddyCatch
std::vector<double> EddyGpuUtils::LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// These input parameters are for debugging only
unsigned int iter, // Iteration
unsigned int level, // Determines how much gets written
const std::vector<unsigned int>& debug_index, // Indicies of scans to write debug info for
// Input/output
EDDY::ECScanManager& sm) EddyTry // Scans we want to register to predictions
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::long_ec_update(pred,pmask,fwhm,very_verbose,iter,level,debug_index,sm));
} EddyCatch
/////////////////////////////////////////////////////////////////////
///
/// \file EddyGpuUtils.cu
/// \brief Definitions of static class with collection of GPU routines used in the eddy project
///
/// \author Jesper Andersson & Moises Hernandez
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
// Because of a bug in cuda_fp16.hpp, that gets included by cublas_v2.h, it has to
// be included before any include files that set up anything related to the std-lib.
// If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the
// old-style C isinf or the new (since C++11) std::isinf.
#include "cublas_v2.h"
#include <cstdlib>
#include <string>
#include <vector>
#include <cmath>
#include <cuda.h>
#pragma push
#pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap
#pragma diag_suppress = expr_has_no_effect // Supress warnings from boost
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#pragma pop
#include "miscmaths/miscmaths.h"
#include "EddyCudaHelperFunctions.h"
#include "EddyInternalGpuUtils.h"
#include "EddyHelperClasses.h"
#include "DiffusionGP.h"
#include "b0Predictor.h"
#include "ECScanClasses.h"
#include "EddyUtils.h"
#include "EddyGpuUtils.h"
#include "EddyKernels.h"
using namespace EDDY;
std::shared_ptr<DWIPredictionMaker> EddyGpuUtils::LoadPredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
unsigned int iter,
float fwhm,
// Output
NEWIMAGE::volume<float>& mask,
// Optional input
bool use_orig) EddyTry
{
std::shared_ptr<DWIPredictionMaker> pmp; // Prediction Maker Pointer
if (st==ScanType::DWI) { // If diffusion weighted data
std::shared_ptr<KMatrix> K;
if (clo.CovarianceFunction() == CovarianceFunctionType::Spherical) K = std::shared_ptr<SphericalKMatrix>(new SphericalKMatrix(clo.DontCheckShelling()));
else if (clo.CovarianceFunction() == CovarianceFunctionType::Exponential) K = std::shared_ptr<ExponentialKMatrix>(new ExponentialKMatrix(clo.DontCheckShelling()));
else if (clo.CovarianceFunction() == CovarianceFunctionType::NewSpherical) K = std::shared_ptr<NewSphericalKMatrix>(new NewSphericalKMatrix(clo.DontCheckShelling()));
else throw EddyException("LoadPredictionMaker: Unknown covariance function");
std::shared_ptr<HyParCF> hpcf;
std::shared_ptr<HyParEstimator> hpe;
if (clo.HyperParFixed()) hpe = std::shared_ptr<FixedValueHyParEstimator>(new FixedValueHyParEstimator(clo.HyperParValues()));
else {
if (clo.HyParCostFunction() == HyParCostFunctionType::CC) hpe = std::shared_ptr<CheapAndCheerfulHyParEstimator>(new CheapAndCheerfulHyParEstimator(clo.NVoxHp(),clo.InitRand()));
else {
if (clo.HyParCostFunction() == HyParCostFunctionType::MML) hpcf = std::shared_ptr<MMLHyParCF>(new MMLHyParCF);
else if (clo.HyParCostFunction() == HyParCostFunctionType::CV) hpcf = std::shared_ptr<CVHyParCF>(new CVHyParCF);
else if (clo.HyParCostFunction() == HyParCostFunctionType::GPP) hpcf = std::shared_ptr<GPPHyParCF>(new GPPHyParCF);
else throw EddyException("LoadPredictionMaker: Unknown hyperparameter cost-function");
hpe = std::shared_ptr<FullMontyHyParEstimator>(new FullMontyHyParEstimator(hpcf,clo.HyParFudgeFactor(),clo.NVoxHp(),clo.InitRand(),clo.VeryVerbose()));
}
}
pmp = std::shared_ptr<DWIPredictionMaker>(new DiffusionGP(K,hpe)); // GP
}
else pmp = std::shared_ptr<DWIPredictionMaker>(new b0Predictor); // Silly mean predictor
pmp->SetNoOfScans(sm.NScans(st));
mask = sm.Scan(0,ScanType::Any).GetIma(); EddyUtils::SetTrilinearInterp(mask); mask = 1.0;
EddyCudaHelperFunctions::InitGpu();
EddyInternalGpuUtils::load_prediction_maker(clo,st,sm,iter,fwhm,use_orig,pmp,mask);
return(pmp);
} EddyCatch
void EddyGpuUtils::MakeScatterBrainPredictions(// Input
const EddyCommandLineOptions& clo,
const ECScanManager& sm,
const std::vector<double>& hypar,
// Output
NEWIMAGE::volume4D<float>& pred,
// Optional input
bool vwbvrot) EddyTry
{
EddyInternalGpuUtils::make_scatter_brain_predictions(clo,sm,hypar,pred,vwbvrot);
} EddyCatch
/*
void EddyGpuUtils::UpdatePredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
const ReplacementManager& rm,
const NEWIMAGE::volume<float>& mask,
// Input/Output
std::shared_ptr<DWIPredictionMaker> pmp)
{
EddyCudaHelperFunctions::InitGpu();
EddyInternalGpuUtils::update_prediction_maker(clo,st,sm,rm,mask,pmp);
return;
}
*/
NEWIMAGE::volume<float> EddyGpuUtils::GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask) EddyTry
{
EDDY::CudaVolume cuda_susc;
if (susc) cuda_susc = *susc;
EDDY::CudaVolume cuda_bias;
if (bias) cuda_bias = *bias;
EDDY::CudaVolume empty;
EDDY::CudaVolume uwscan(scan.GetIma(),false);
if (omask) {
EDDY::CudaVolume tmpmask(*omask,false);
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,empty,true,use_orig,uwscan,tmpmask);
*omask = tmpmask.GetVolume();
}
else {
EDDY::CudaVolume tmpmask;
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,empty,true,use_orig,uwscan,tmpmask);
}
return(uwscan.GetVolume());
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pred,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask) EddyTry
{
if (!scan.IsSliceToVol()) {
std::cout << "EddyGpuUtils::GetUnwarpedScan: Warning, it does not make sense to supply pred for volumetric resampling" << std::endl;
}
if (scan.GetPolation().GetS2VInterp() != NEWIMAGE::spline) {
throw EddyException("EddyGpuUtils::GetUnwarpedScan: use of prediction cannot be combined with trilinear interpolation");
}
EDDY::CudaVolume cuda_susc;
if (susc) cuda_susc = *susc;
EDDY::CudaVolume cuda_bias;
if (bias) cuda_bias = *bias;
EDDY::CudaVolume uwscan(scan.GetIma(),false);
EDDY::CudaVolume cuda_pred = pred;
if (omask) {
EDDY::CudaVolume tmpmask(*omask,false);
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,cuda_pred,true,use_orig,uwscan,tmpmask);
*omask = tmpmask.GetVolume();
}
else {
EDDY::CudaVolume tmpmask;
EddyInternalGpuUtils::get_unwarped_scan(scan,cuda_susc,cuda_bias,cuda_pred,true,use_orig,uwscan,tmpmask);
}
return(uwscan.GetVolume());
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::GetVolumetricUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask,
NEWIMAGE::volume4D<float> *deriv) EddyTry
{
EDDY::CudaVolume cuda_susc;
if (susc) cuda_susc = *susc;
EDDY::CudaVolume cuda_bias;
if (bias) cuda_bias = *bias;
EDDY::CudaVolume empty;
EDDY::CudaVolume uwscan(scan.GetIma(),false);
if (omask && deriv) {
EDDY::CudaVolume tmpmask(*omask,false);
EDDY::CudaVolume4D tmpderiv(*deriv,false);
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
*omask = tmpmask.GetVolume();
*deriv = tmpderiv.GetVolume();
}
else if (omask) {
EDDY::CudaVolume tmpmask(*omask,false);
EDDY::CudaVolume4D tmpderiv;
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
*omask = tmpmask.GetVolume();
}
else if (deriv) {
EDDY::CudaVolume tmpmask;
EDDY::CudaVolume4D tmpderiv(*deriv,false);
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
*deriv = tmpderiv.GetVolume();
}
else {
EDDY::CudaVolume tmpmask;
EDDY::CudaVolume4D tmpderiv;
EddyInternalGpuUtils::get_volumetric_unwarped_scan(scan,cuda_susc,cuda_bias,true,use_orig,uwscan,tmpmask,tmpderiv);
}
return(uwscan.GetVolume());
} EddyCatch
arma::mat EddyGpuUtils::GetMovementRMS(const ECScanManager& sm,
ScanType st,
bool restricted) EddyTry
{
arma::mat rms(sm.NScans(st),2);
EDDY::CudaVolume mask = sm.Mask();
EDDY::CudaVolume4D mov_field;
EDDY::CudaVolume4D prev_mov_field;
for (unsigned int s=0; s<sm.NScans(st); s++) {
if (s) prev_mov_field = mov_field;
EddyInternalGpuUtils::MovementDisplacementToModelSpace(sm.Scan(s,st),restricted,mov_field);
EDDY::CudaVolume4D sqr_mov_field = mov_field * mov_field;
EDDY::CudaVolume sqr_norm = sqr_mov_field.SumAlongFourthDim();
double ms = sqr_norm.Sum(mask) / mask.Sum();
rms(s,0) = std::sqrt(ms);
if (s) {
EDDY::CudaVolume4D delta_field = mov_field - prev_mov_field;
delta_field *= delta_field; // Is now squared delta field
sqr_norm = delta_field.SumAlongFourthDim();
ms = sqr_norm.Sum(mask) / mask.Sum();
rms(s,1) = std::sqrt(ms);
}
else rms(s,1) = 0.0;
}
return(rms);
} EddyCatch
void EddyGpuUtils::GetMotionCorrectedScan(// Input
const EDDY::ECScan& scan,
bool use_orig,
// Output
NEWIMAGE::volume<float>& ovol,
// Optional output
NEWIMAGE::volume<float> *omask) EddyTry
{
EDDY::CudaVolume covol(scan.GetIma(),false);
EDDY::CudaVolume comask;
if (omask) {comask.SetHdr(covol); comask = 1.0; }
EddyInternalGpuUtils::get_motion_corrected_scan(scan,use_orig,covol,comask);
ovol = covol.GetVolume();
if (omask) *omask = comask.GetVolume();
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::TransformModelToScanSpace(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
bool jacmod) EddyTry
{
EDDY::CudaVolume mima_gpu = mima;
EDDY::CudaVolume susc_gpu;
if (susc != nullptr) susc_gpu = *susc;
EDDY::CudaVolume mios(mima,false);
EDDY::CudaVolume mask(mima,false); mask=1.0;
EDDY::CudaVolume jac(mima,false);
EDDY::CudaVolume4D skrutt4D;
// cout << "Calling EddyInternalGpuUtils::transform_model_to_scan_space" << endl;
EddyInternalGpuUtils::transform_model_to_scan_space(mima_gpu,scan,susc_gpu,jacmod,mios,mask,jac,skrutt4D);
// cout << "Returning from EddyInternalGpuUtils::transform_model_to_scan_space" << endl;
return(mios.GetVolume());
} EddyCatch
NEWIMAGE::volume4D<float> EddyGpuUtils::DerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc) EddyTry
{
EDDY::CudaVolume mima_gpu = mima;
EDDY::CudaVolume susc_gpu;
if (susc != nullptr) susc_gpu = *susc;
EDDY::CudaVolume4D derivs(mima,scan.NDerivs(),false);
EddyInternalGpuUtils::get_partial_derivatives_in_scan_space(mima_gpu,scan,susc_gpu,ParametersType::All,derivs);
return(derivs.GetVolume());
} EddyCatch
NEWIMAGE::volume4D<float> EddyGpuUtils::DirectDerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc) EddyTry
{
EDDY::CudaVolume mima_gpu = mima;
EDDY::CudaVolume susc_gpu;
if (susc != nullptr) susc_gpu = *susc;
EDDY::CudaVolume4D derivs(mima,scan.NDerivs(),false);
EddyInternalGpuUtils::get_direct_partial_derivatives_in_scan_space(mima_gpu,scan,susc_gpu,ParametersType::All,derivs);
return(derivs.GetVolume());
} EddyCatch
NEWIMAGE::volume<float> EddyGpuUtils::Smooth(const NEWIMAGE::volume<float>& ima,
float fwhm) EddyTry
{
EDDY::CudaVolume cuda_ima(ima,true);
cuda_ima.Smooth(fwhm);
return(cuda_ima.GetVolume());
} EddyCatch
DiffStatsVector EddyGpuUtils::DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// Input/Output
ReplacementManager& rm) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
DiffStatsVector dsv(sm.NScans(st));
EddyInternalGpuUtils::detect_outliers(clo,st,pmp,mask,sm,0,0,rm,dsv);
return(dsv);
} EddyCatch
DiffStatsVector EddyGpuUtils::DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// Input for debugging purposes only
unsigned int iter,
unsigned int level,
// Input/Output
ReplacementManager& rm) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
DiffStatsVector dsv(sm.NScans(st));
EddyInternalGpuUtils::detect_outliers(clo,st,pmp,mask,sm,iter,level,rm,dsv);
return(dsv);
} EddyCatch
void EddyGpuUtils::ReplaceOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ReplacementManager& rm,
bool add_noise,
// Input/Output
ECScanManager& sm) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
EddyInternalGpuUtils::replace_outliers(clo,st,pmp,mask,rm,add_noise,sm);
} EddyCatch
double EddyGpuUtils::MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
unsigned int scindex,
// Input/output
EDDY::ECScan& scan) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::param_update(pred,susc,bias,pmask,ParametersType::All,fwhm,very_verbose,scindex,0,0,scan,NULL));
} EddyCatch
double EddyGpuUtils::MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
// These inputs are for debug purposes only
unsigned int scindex,
unsigned int iter,
unsigned int level,
// Input/output
EDDY::ECScan& scan) EddyTry
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::param_update(pred,susc,bias,pmask,ParametersType::All,fwhm,very_verbose,scindex,iter,level,scan,NULL));
} EddyCatch
std::vector<double> EddyGpuUtils::LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// Input/output
EDDY::ECScanManager& sm) EddyTry // Scans we want to register to predictions
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::long_ec_update(pred,pmask,fwhm,very_verbose,0,0,std::vector<unsigned int>(0),sm));
} EddyCatch
std::vector<double> EddyGpuUtils::LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// These input parameters are for debugging only
unsigned int iter, // Iteration
unsigned int level, // Determines how much gets written
const std::vector<unsigned int>& debug_index, // Indicies of scans to write debug info for
// Input/output
EDDY::ECScanManager& sm) EddyTry // Scans we want to register to predictions
{
EddyCudaHelperFunctions::InitGpu();
return(EddyInternalGpuUtils::long_ec_update(pred,pmask,fwhm,very_verbose,iter,level,debug_index,sm));
} EddyCatch
/////////////////////////////////////////////////////////////////////
///
/// \file EddyGpuUtils.h
/// \brief Declarations of static class with collection of GPU routines used in the eddy project
///
/// The routines declared here are "bridges" on to the actual GPU
/// routines. The interface to these routines only display classes
/// that are part of the "regular" FSL libraries. Hence this file
/// can be safely included by files that know nothing of the GPU
/// and that are compiled by gcc.
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef EddyGpuUtils_h
#define EddyGpuUtils_h
#include <cstdlib>
#include <cstddef>
#include <string>
#include <vector>
#include <cmath>
#include <memory>
#include <hip/hip_runtime.h>
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#include "EddyHelperClasses.h"
#include "DiffusionGP.h"
#include "b0Predictor.h"
#include "ECScanClasses.h"
#include "EddyCommandLineOptions.h"
namespace EDDY {
/////////////////////////////////////////////////////////////////////
///
/// \brief This class contains a set of static methods that implement
/// various utility functions for the eddy project implemented on
/// CUDA GPU.
///
/////////////////////////////////////////////////////////////////////
class EddyGpuUtils
{
public:
/// Loads prediction maker with images unwarped according to current EC estimates
static std::shared_ptr<DWIPredictionMaker> LoadPredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
unsigned int iter,
float fwhm,
// Output
NEWIMAGE::volume<float>& mask,
// Optional input
bool use_orig=false);
///
static void MakeScatterBrainPredictions(// Input
const EddyCommandLineOptions& clo,
const ECScanManager& sm,
const std::vector<double>& hypar,
// Output
NEWIMAGE::volume4D<float>& pred,
// Optional input
bool vwbvrot=false);
/*
/// Replaces the scans indicated by rm
static void UpdatePredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
const ReplacementManager& rm,
const NEWIMAGE::volume<float>& mask,
// Input/Output
std::shared_ptr<DWIPredictionMaker> pmp);
*/
/// Returns a scan corrected for motion and distortions
static NEWIMAGE::volume<float> GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask=NULL);
/// Returns a scan corrected for motion and distortions, helped by the prediction in pred
static NEWIMAGE::volume<float> GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pred,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask=NULL);
/// Returns a scan corrected for motion and distortions. Will override slice-to-vol
static NEWIMAGE::volume<float> GetVolumetricUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask=nullptr,
NEWIMAGE::volume4D<float> *deriv=nullptr);
/// Calculate the movement RMS for all scans of type st.
static arma::mat GetMovementRMS(const ECScanManager& sm,
ScanType st,
bool restricted);
/// Returns a scan corrected for motion (scanner->model(sort of))
static void GetMotionCorrectedScan(// Input
const EDDY::ECScan& scan,
bool use_orig,
// Output
NEWIMAGE::volume<float>& ovol,
// Optional output
NEWIMAGE::volume<float> *omask=NULL);
/// Returns a scan (in model space) warped into observation space
static NEWIMAGE::volume<float> TransformModelToScanSpace(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
bool jacmod=true);
static NEWIMAGE::volume4D<float> DerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc);
static NEWIMAGE::volume4D<float> DirectDerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc);
/// Returns a scan convolved with a Gaussian with fwhm in mm
static NEWIMAGE::volume<float> Smooth(const NEWIMAGE::volume<float>& ima,
float fwhm);
/// Detects outlier-slices
static DiffStatsVector DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// Input/Output
ReplacementManager& rm);
/// Detects outlier-slices
static DiffStatsVector DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// These inputs are for debugging purposes only
unsigned int iter,
unsigned int level,
// Input/Output
ReplacementManager& rm);
/// Replaces outlier-slices
static void ReplaceOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ReplacementManager& rm,
bool add_noise,
// Input/Output
ECScanManager& sm);
/// Performs update of movement and EC parameters for one scan.
static double MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
unsigned int scindex,
// Input/output
EDDY::ECScan& scan);
/// Performs update of movement and EC parameters for one scan.
static double MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
// These inputs are for debug purposes only
unsigned int scindex,
unsigned int iter,
unsigned int level,
// Input/output
EDDY::ECScan& scan);
/// Performs update of long time-constant EC parameters for all scans
// Does currently not use the bias parameter
static std::vector<double> LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// Input/output
EDDY::ECScanManager& sm); // Scans we want to register to predictions
/// Performs update of long time-constant EC parameters for all scans
// Does currently not use the bias parameter
static std::vector<double> LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// These input parameters are for debugging only
unsigned int iter, // Iteration
unsigned int level, // Determines how much gets written
const std::vector<unsigned int>& debug_index, // Indicies of scans to write debug info for
// Input/output
EDDY::ECScanManager& sm); // Scans we want to register to predictions
};
} // End namespace EDDY
#endif // End #ifndef EddyGpuUtils_h
/////////////////////////////////////////////////////////////////////
///
/// \file EddyGpuUtils.h
/// \brief Declarations of static class with collection of GPU routines used in the eddy project
///
/// The routines declared here are "bridges" on to the actual GPU
/// routines. The interface to these routines only display classes
/// that are part of the "regular" FSL libraries. Hence this file
/// can be safely included by files that know nothing of the GPU
/// and that are compiled by gcc.
///
/// \author Jesper Andersson
/// \version 1.0b, Nov., 2012.
/// \Copyright (C) 2012 University of Oxford
///
/////////////////////////////////////////////////////////////////////
#ifndef EddyGpuUtils_h
#define EddyGpuUtils_h
#include <cstdlib>
#include <cstddef>
#include <string>
#include <vector>
#include <cmath>
#include <memory>
#include <cuda.h>
#include "armawrap/newmat.h"
#include "newimage/newimageall.h"
#include "miscmaths/miscmaths.h"
#include "EddyHelperClasses.h"
#include "DiffusionGP.h"
#include "b0Predictor.h"
#include "ECScanClasses.h"
#include "EddyCommandLineOptions.h"
namespace EDDY {
/////////////////////////////////////////////////////////////////////
///
/// \brief This class contains a set of static methods that implement
/// various utility functions for the eddy project implemented on
/// CUDA GPU.
///
/////////////////////////////////////////////////////////////////////
class EddyGpuUtils
{
public:
/// Loads prediction maker with images unwarped according to current EC estimates
static std::shared_ptr<DWIPredictionMaker> LoadPredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
unsigned int iter,
float fwhm,
// Output
NEWIMAGE::volume<float>& mask,
// Optional input
bool use_orig=false);
///
static void MakeScatterBrainPredictions(// Input
const EddyCommandLineOptions& clo,
const ECScanManager& sm,
const std::vector<double>& hypar,
// Output
NEWIMAGE::volume4D<float>& pred,
// Optional input
bool vwbvrot=false);
/*
/// Replaces the scans indicated by rm
static void UpdatePredictionMaker(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const ECScanManager& sm,
const ReplacementManager& rm,
const NEWIMAGE::volume<float>& mask,
// Input/Output
std::shared_ptr<DWIPredictionMaker> pmp);
*/
/// Returns a scan corrected for motion and distortions
static NEWIMAGE::volume<float> GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask=NULL);
/// Returns a scan corrected for motion and distortions, helped by the prediction in pred
static NEWIMAGE::volume<float> GetUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pred,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask=NULL);
/// Returns a scan corrected for motion and distortions. Will override slice-to-vol
static NEWIMAGE::volume<float> GetVolumetricUnwarpedScan(// Input
const EDDY::ECScan& scan,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
bool use_orig,
// Optional output
NEWIMAGE::volume<float> *omask=nullptr,
NEWIMAGE::volume4D<float> *deriv=nullptr);
/// Calculate the movement RMS for all scans of type st.
static arma::mat GetMovementRMS(const ECScanManager& sm,
ScanType st,
bool restricted);
/// Returns a scan corrected for motion (scanner->model(sort of))
static void GetMotionCorrectedScan(// Input
const EDDY::ECScan& scan,
bool use_orig,
// Output
NEWIMAGE::volume<float>& ovol,
// Optional output
NEWIMAGE::volume<float> *omask=NULL);
/// Returns a scan (in model space) warped into observation space
static NEWIMAGE::volume<float> TransformModelToScanSpace(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
bool jacmod=true);
static NEWIMAGE::volume4D<float> DerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc);
static NEWIMAGE::volume4D<float> DirectDerivativesForModelToScanSpaceTransform(const EDDY::ECScan& scan,
const NEWIMAGE::volume<float>& mima,
std::shared_ptr<const NEWIMAGE::volume<float> > susc);
/// Returns a scan convolved with a Gaussian with fwhm in mm
static NEWIMAGE::volume<float> Smooth(const NEWIMAGE::volume<float>& ima,
float fwhm);
/// Detects outlier-slices
static DiffStatsVector DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// Input/Output
ReplacementManager& rm);
/// Detects outlier-slices
static DiffStatsVector DetectOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ECScanManager& sm,
// These inputs are for debugging purposes only
unsigned int iter,
unsigned int level,
// Input/Output
ReplacementManager& rm);
/// Replaces outlier-slices
static void ReplaceOutliers(// Input
const EddyCommandLineOptions& clo,
ScanType st,
const std::shared_ptr<DWIPredictionMaker> pmp,
const NEWIMAGE::volume<float>& mask,
const ReplacementManager& rm,
bool add_noise,
// Input/Output
ECScanManager& sm);
/// Performs update of movement and EC parameters for one scan.
static double MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
unsigned int scindex,
// Input/output
EDDY::ECScan& scan);
/// Performs update of movement and EC parameters for one scan.
static double MovAndECParamUpdate(// Input
const NEWIMAGE::volume<float>& pred,
std::shared_ptr<const NEWIMAGE::volume<float> > susc,
std::shared_ptr<const NEWIMAGE::volume<float> > bias,
const NEWIMAGE::volume<float>& pmask,
float fwhm,
bool very_verbose,
// These inputs are for debug purposes only
unsigned int scindex,
unsigned int iter,
unsigned int level,
// Input/output
EDDY::ECScan& scan);
/// Performs update of long time-constant EC parameters for all scans
// Does currently not use the bias parameter
static std::vector<double> LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// Input/output
EDDY::ECScanManager& sm); // Scans we want to register to predictions
/// Performs update of long time-constant EC parameters for all scans
// Does currently not use the bias parameter
static std::vector<double> LongECParamUpdate(// Input
const std::vector<NEWIMAGE::volume<float> >& pred, // Predictions in model space
const NEWIMAGE::volume<float>& pmask, // "Data valid" mask in model space
float fwhm, // FWHM for Gaussian smoothing
bool very_verbose, // Detailed output to screen?
// These input parameters are for debugging only
unsigned int iter, // Iteration
unsigned int level, // Determines how much gets written
const std::vector<unsigned int>& debug_index, // Indicies of scans to write debug info for
// Input/output
EDDY::ECScanManager& sm); // Scans we want to register to predictions
};
} // End namespace EDDY
#endif // End #ifndef EddyGpuUtils_h
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment