///////////////////////////////////////////////////////////////////// /// /// \file EddyInternalGpuUtils.cu /// \brief Definitions of static class with collection of GPU routines used in the eddy project /// /// \author Jesper Andersson /// \version 1.0b, Nov., 2012. /// \Copyright (C) 2012 University of Oxford /// ///////////////////////////////////////////////////////////////////// // Because of a bug in cuda_fp16.hpp, that gets included by hipblas.h, it has to // be included before any include files that set up anything related to the std-lib. // If not, there will be an ambiguity in cuda_fp16.hpp about wether to use the // old-style C isinf or the new (since C++11) std::isinf. #include "hipblas.h" #include #include #include #include #include #include #include // #include #include #include #include #include #include #include #include #pragma push #pragma diag_suppress = code_is_unreachable // Supress warnings from armawrap #pragma diag_suppress = expr_has_no_effect // Supress warnings from boost #include "armawrap/newmat.h" #include "newimage/newimageall.h" #include "miscmaths/miscmaths.h" #pragma pop #include "utils/FSLProfiler.h" #include "EddyHelperClasses.h" #include "DiffusionGP.h" #include "b0Predictor.h" #include "ECScanClasses.h" #include "EddyUtils.h" #include "EddyCudaHelperFunctions.h" #include "CudaVolume.h" #include "EddyKernels.h" #include "EddyFunctors.h" #include "EddyInternalGpuUtils.h" #include "GpuPredictorChunk.h" #include "StackResampler.h" #include "DerivativeCalculator.h" #include "CublasHandleManager.h" using namespace EDDY; void EddyInternalGpuUtils::load_prediction_maker(// Input const EddyCommandLineOptions& clo, ScanType st, const ECScanManager& sm, unsigned int iter, float fwhm, bool use_orig, // Output std::shared_ptr pmp, NEWIMAGE::volume& mask) EddyTry { static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__)); double total_key = prof.StartEntry("Total"); if (sm.NScans(st)) { EDDY::CudaVolume omask(sm.Scan(0,st).GetIma(),false); omask.SetInterp(NEWIMAGE::trilinear); omask = 1.0; EDDY::CudaVolume tmpmask(omask,false); EDDY::CudaVolume uwscan; EDDY::CudaVolume empty; if (clo.Verbose()) std::cout << "Loading prediction maker"; if (clo.VeryVerbose()) std::cout << std::endl << "Scan:" << std::flush; double load_key = prof.StartEntry("Loading"); for (int s=0; sSetScan(uwscan.GetVolume(),sm.Scan(s,st).GetDiffPara(clo.RotateBVecsDuringEstimation()),s); omask *= tmpmask; } if (clo.VeryVerbose()) std::cout << std::endl << std::flush; mask = omask.GetVolume(); prof.EndEntry(load_key); if (clo.Verbose()) std::cout << std::endl << "Evaluating prediction maker model" << std::endl; double eval_key = prof.StartEntry("Evaluating"); pmp->EvaluateModel(sm.Mask()*mask,fwhm,clo.VeryVerbose()); prof.EndEntry(eval_key); } prof.EndEntry(total_key); return; } EddyCatch /* void EddyInternalGpuUtils::update_prediction_maker(// Input const EddyCommandLineOptions& clo, ScanType st, const ECScanManager& sm, const ReplacementManager& rm, const NEWIMAGE::volume& mask, // Input/Output std::shared_ptr pmp) { EDDY::CudaVolume susc; if (sm.GetSuscHzOffResField()) susc = *(sm.GetSuscHzOffResField()); EDDY::CudaVolume uwscan; EDDY::CudaVolume skrutt; if (clo.Verbose()) std::cout << "Updating prediction maker"; if (clo.VeryVerbose()) std::cout << std::endl << "Scan: "; for (unsigned int s=0; sSetScan(uwscan.GetVolume(),sm.Scan(s,st).GetDiffPara(),s); } } if (clo.Verbose()) std::cout << std::endl << "Evaluating prediction maker model" << std::endl; pmp->EvaluateModel(sm.Mask()*mask,clo.FWHM(),clo.VeryVerbose()); } */ void EddyInternalGpuUtils::get_motion_corrected_scan(// Input const EDDY::ECScan& scan, bool use_orig, // Output EDDY::CudaVolume& oima, // Optional output EDDY::CudaVolume& omask) EddyTry { if (!oima.Size()) oima.SetHdr(scan.GetIma()); else if (oima != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_motion_corrected_scan: scan<->oima mismatch"); if (omask.Size() && omask != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_motion_corrected_scan: scan<->omask mismatch"); EDDY::CudaVolume ima; EDDY::CudaVolume4D skrutt; if (use_orig) ima = scan.GetOriginalIma(); else ima = scan.GetIma(); if (scan.IsSliceToVol()) { std::vector iR = EddyUtils::GetSliceWiseInverseMovementMatrices(scan); EddyInternalGpuUtils::affine_transform(ima,iR,oima,skrutt,omask); } else { // Transform image using inverse RB NEWMAT::Matrix iR = scan.InverseMovementMatrix(); if (omask.Size()) omask = 1.0; EddyInternalGpuUtils::affine_transform(ima,iR,oima,skrutt,omask); } return; } EddyCatch void EddyInternalGpuUtils::get_unwarped_scan(// Input const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, const EDDY::CudaVolume& bias, const EDDY::CudaVolume& pred, bool jacmod, bool use_orig, // Output EDDY::CudaVolume& oima, // Optional output EDDY::CudaVolume& omask) EddyTry { if (!oima.Size()) oima.SetHdr(scan.GetIma()); else if (oima != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_unwarped_scan: scan<->oima mismatch"); if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_unwarped_scan: scan<->susc mismatch"); if (bias.Size() && bias != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_unwarped_scan: scan<->bias mismatch"); if (omask.Size() && omask != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_unwarped_scan: scan<->omask mismatch"); if (pred.Size() && !scan.IsSliceToVol()) throw EDDY::EddyException("EddyInternalGpuUtils::get_unwarped_scan: pred for volumetric does not make sense"); EDDY::CudaVolume ima; if (use_orig) ima = scan.GetOriginalIma(); else ima = scan.GetIma(); if (bias.Size()) { // If we are to correct for receieve bias EDDY::CudaVolume4D idfield(ima,3,false); EDDY::CudaVolume mask1(ima,false); mask1 = 1.0; EDDY::CudaVolume empty; EddyInternalGpuUtils::field_for_model_to_scan_transform(scan,susc,idfield,empty,empty); EDDY::CudaVolume obias; // Biasfield sampled where each voxel in the scan "actually was" NEWMAT::IdentityMatrix I(4); EDDY::CudaVolume4D empty4D; EddyInternalGpuUtils::general_transform(bias,I,idfield,I,obias,empty4D,mask1); ima.DivideWithinMask(obias,mask1); // Correct ima for bias-field } EDDY::CudaVolume4D dfield(ima,3,false); EDDY::CudaVolume4D skrutt; EDDY::CudaVolume jac(ima,false); EDDY::CudaVolume mask2; if (omask.Size()) { mask2.SetHdr(jac); mask2 = 1.0; } EddyInternalGpuUtils::field_for_scan_to_model_transform(scan,susc,dfield,omask,jac); if (scan.IsSliceToVol()) { std::vector iR = EddyUtils::GetSliceWiseInverseMovementMatrices(scan); EddyInternalGpuUtils::general_slice_to_vol_transform(ima,iR,dfield,jac,pred,jacmod,scan.GetPolation(),oima,mask2); } else { NEWMAT::Matrix iR = scan.InverseMovementMatrix(); NEWMAT::IdentityMatrix I(4); EddyInternalGpuUtils::general_transform(ima,iR,dfield,I,oima,skrutt,mask2); if (jacmod) oima *= jac; } if (omask.Size()) { omask *= mask2; omask.SetInterp(NEWIMAGE::trilinear); } } EddyCatch void EddyInternalGpuUtils::get_volumetric_unwarped_scan(// Input const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, const EDDY::CudaVolume& bias, bool jacmod, bool use_orig, // Output EDDY::CudaVolume& oima, // Optional output EDDY::CudaVolume& omask, EDDY::CudaVolume4D& deriv) EddyTry { if (!oima.Size()) oima.SetHdr(scan.GetIma()); else if (oima != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_volumetric_unwarped_scan: scan<->oima mismatch"); if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_volumetric_unwarped_scan: scan<->susc mismatch"); if (bias.Size() && bias != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_volumetric_unwarped_scan: scan<->bias mismatch"); if (omask.Size() && omask != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_volumetric_unwarped_scan: scan<->omask mismatch"); if (deriv.Size() && deriv != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::get_volumetric_unwarped_scan: scan<->deriv mismatch"); EDDY::CudaVolume ima; if (use_orig) ima = scan.GetOriginalIma(); else ima = scan.GetIma(); if (bias.Size()) { // If we are to correct for receieve bias EDDY::CudaVolume4D idfield(ima,3,false); EDDY::CudaVolume mask1(ima,false); mask1 = 1.0; EDDY::CudaVolume empty; EddyInternalGpuUtils::field_for_model_to_scan_transform(scan,susc,idfield,empty,empty); EDDY::CudaVolume obias; // Biasfield sampled where each voxel in the scan "actually was" NEWMAT::IdentityMatrix I(4); EDDY::CudaVolume4D empty4D; EddyInternalGpuUtils::general_transform(bias,I,idfield,I,obias,empty4D,mask1); ima.DivideWithinMask(obias,mask1); // Correct ima for bias-field } EDDY::CudaVolume4D dfield(ima,3,false); EDDY::CudaVolume jac(ima,false); EDDY::CudaVolume mask2; if (omask.Size()) { mask2.SetHdr(jac); mask2 = 1.0; } EddyInternalGpuUtils::field_for_scan_to_model_volumetric_transform(scan,susc,dfield,omask,jac); NEWMAT::Matrix iR = scan.InverseMovementMatrix(); NEWMAT::IdentityMatrix I(4); EddyInternalGpuUtils::general_transform(ima,iR,dfield,I,oima,deriv,mask2); if (jacmod) oima *= jac; if (omask.Size()) { omask *= mask2; omask.SetInterp(NEWIMAGE::trilinear); } } EddyCatch void EddyInternalGpuUtils::detect_outliers(// Input const EddyCommandLineOptions& clo, ScanType st, const std::shared_ptr pmp, const NEWIMAGE::volume& pmask, const ECScanManager& sm, // Input for debugging purposes only unsigned int iter, unsigned int level, // Input/Output ReplacementManager& rm, DiffStatsVector& dsv) EddyTry { static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__)); double total_key = prof.StartEntry("Total"); if (dsv.NScan() != sm.NScans(st)) throw EDDY::EddyException("EddyInternalGpuUtils::detect_outliers: dsv<->sm mismatch"); if (clo.Verbose()) std::cout << "Checking for outliers" << std::endl; // Generate slice-wise stats on difference between observation and prediction for (GpuPredictorChunk c(sm,st); c si = c.Indicies(); EDDY::CudaVolume pios(pmask,false); EDDY::CudaVolume mios(pmask,false); EDDY::CudaVolume mask(pmask,false); EDDY::CudaVolume jac(pmask,false); EDDY::CudaVolume skrutt(pmask,false); EDDY::CudaVolume4D skrutt4D; if (clo.VeryVerbose()) std::cout << "Making predictions for scans: " << c << std::endl; std::vector > cpred = pmp->Predict(si); if (clo.VeryVerbose()) { std::cout << "Checking scan: "; std::cout.flush(); } for (unsigned int i=0; i 2 && clo.DebugIndicies().IsAmongIndicies(sm.GetGlobalIndex(si[i],st))) { // Note that we use a local "level" instead of clo.DebugLevel() NEWIMAGE::volume out = sm.Scan(si[i],st).GetOriginalIma()-pios.GetVolume(); out *= mask.GetVolume(); std::string prefix("EDDY_DEBUG_GPU_"); if (st==ScanType::b0) prefix += "b0"; else if (st==ScanType::DWI) prefix += "DWI"; else if (st==ScanType::fMRI) prefix += "fMRI"; char fname[256]; sprintf(fname,"%s_repol_diff_%02d_%04d",prefix.c_str(),iter,sm.GetGlobalIndex(si[i],st)); NEWIMAGE::write_volume(out,fname); } } } if (clo.VeryVerbose()) std::cout << std::endl; // Detect outliers and update replacement manager rm.Update(dsv); prof.EndEntry(total_key); if (level) { // Note that we use a local "level" instead of clo.DebugLevel() std::string prefix("EDDY_DEBUG_GPU_"); if (st==ScanType::b0) prefix += "b0"; else if (st==ScanType::DWI) prefix += "DWI"; else if (st==ScanType::fMRI) prefix += "fMRI"; char fname[256]; sprintf(fname,"%s_repol_info_%02d.json",prefix.c_str(),iter); rm.WriteDebugInfo(fname,sm.GetDwi2GlobalIndexMapping(),sm.NScans()); } return; } EddyCatch void EddyInternalGpuUtils::replace_outliers(// Input const EddyCommandLineOptions& clo, ScanType st, const std::shared_ptr pmp, const NEWIMAGE::volume& pmask, const ReplacementManager& rm, bool add_noise, // Input/Output ECScanManager& sm) EddyTry { static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__)); double total_key = prof.StartEntry("Total"); // Replace outlier slices with their predictions if (clo.VeryVerbose()) std::cout << "Replacing outliers with predictions" << std::endl; for (unsigned int s=0; s ol = rm.OutliersInScan(s); if (ol.size()) { // If this scan has outlier slices if (clo.VeryVerbose()) { std::cout << "Scan " << sm.GetGlobalIndex(s,st) << " has outlier slices:"; for (unsigned int i=0; iPredict(s,true); // Make prediction EDDY::CudaVolume pios(pred,false); EDDY::CudaVolume mios(pred,false); EDDY::CudaVolume mask(pred,false); EDDY::CudaVolume jac(pred,false); EDDY::CudaVolume skrutt;; EDDY::CudaVolume4D skrutt4D; EDDY::CudaVolume susc; if (sm.HasSuscHzOffResField()) susc = *(sm.GetSuscHzOffResField(s,st)); // Transform prediction into observation space EddyInternalGpuUtils::transform_model_to_scan_space(pred,sm.Scan(s,st),susc,true,pios,mask,jac,skrutt4D); // Transform binary mask into observation space EDDY::CudaVolume pmask_cuda = pmask; pmask_cuda.SetInterp(NEWIMAGE::trilinear); EddyInternalGpuUtils::transform_model_to_scan_space(pmask_cuda,sm.Scan(s,st),susc,false,mios,mask,skrutt,skrutt4D); mios.Binarise(0.9); // Value above (arbitrary) 0.9 implies valid voxels mask *= mios; // Volume and prediction mask falls within FOV if (add_noise) { double vp = pmp->PredictionVariance(s,true); double ve = pmp->ErrorVariance(s); double stdev = std::sqrt(vp+ve) - std::sqrt(vp); EDDY::CudaVolume nvol(pios,false); nvol.MakeNormRand(0.0,stdev); pios += nvol; } sm.Scan(s,st).SetAsOutliers(pios.GetVolume(),mask.GetVolume(),ol); } } prof.EndEntry(total_key); return; } EddyCatch void EddyInternalGpuUtils::field_for_scan_to_model_transform(// Input const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, // Output EDDY::CudaVolume4D& dfield, // Optional output EDDY::CudaVolume& omask, EDDY::CudaVolume& jac) EddyTry { if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->susc mismatch"); if (dfield.Size() && dfield != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->dfield mismatch"); if (omask.Size() && omask != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->omask mismatch"); if (jac.Size() && jac != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->jac mismatch"); // Get EC field EDDY::CudaVolume ec; EddyInternalGpuUtils::get_ec_field(scan,ec); // omask defines where the transformed EC map is valid if (omask.Size()) { omask = 1.0; omask.SetInterp(NEWIMAGE::trilinear); } EDDY::CudaVolume tot(ec,false); tot = 0.0; if (scan.IsSliceToVol()) { // Original code /* std::vector iR = EddyUtils::GetSliceWiseInverseMovementMatrices(scan); EDDY::CudaVolume4D skrutt; EddyInternalGpuUtils::affine_transform(ec,iR,tot,skrutt,omask); */ // New code std::vector iR = EddyUtils::GetSliceWiseInverseMovementMatrices(scan); NEWMAT::Matrix M1 = ec.Ima2WorldMatrix(); std::vector AA(iR.size()); for (unsigned int i=0; isusc mismatch"); if (dfield.Size() && dfield != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->dfield mismatch"); if (omask.Size() && omask != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->omask mismatch"); if (jac.Size() && jac != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_scan_to_model_transform: scan<->jac mismatch"); // Get EC field EDDY::CudaVolume ec; EddyInternalGpuUtils::get_ec_field(scan,ec); // omask defines where the transformed EC map is valid if (omask.Size()) { omask = 1.0; omask.SetInterp(NEWIMAGE::trilinear); } EDDY::CudaVolume tot(ec,false); tot = 0.0; // Get RB matrix NEWMAT::Matrix iR = scan.InverseMovementMatrix(); EDDY::CudaVolume4D skrutt; // Transform EC field using RB EddyInternalGpuUtils::affine_transform(ec,iR,tot,skrutt,omask); // Add transformed EC and susc if (susc.Size()) tot += susc; // Convert Hz-map to displacement field FieldGpuUtils::Hz2VoxelDisplacements(tot,scan.GetAcqPara(),dfield); // Get Jacobian of tot map if (jac.Size()) FieldGpuUtils::GetJacobian(dfield,scan.GetAcqPara(),jac); // Transform dfield from voxels to mm FieldGpuUtils::Voxel2MMDisplacements(dfield); } EddyCatch double EddyInternalGpuUtils::param_update(// Input const NEWIMAGE::volume& pred, // Prediction in model space std::shared_ptr > susc, // Susc-induced off-resonance field std::shared_ptr > bias, // Recieve bias field const NEWIMAGE::volume& pmask, // Pre-defined mask in model space EDDY::ParametersType whichp, // Which parameters to update float fwhm, // FWHM of smoothing bool very_verbose, // These inputs are for debug purposes only unsigned int scindx, unsigned int iter, unsigned int level, // Input/output EDDY::ECScan& scan, // Scan we want to register to pred // Optional output NEWMAT::ColumnVector *rupdate) EddyTry // Vector of updates { static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__)); double total_key = prof.StartEntry("Total"); // Put input images onto the GPU EDDY::CudaVolume pred_gpu(pred); // Transfer susceptibility field to GPU EDDY::CudaVolume susc_gpu; if (susc != nullptr) susc_gpu = *susc; // Transfer bias field to GPU EDDY::CudaVolume bias_gpu; if (bias != nullptr) bias_gpu = *bias; // Transfer binary input mask to GPU EDDY::CudaVolume pmask_gpu(pmask); pmask_gpu.SetInterp(NEWIMAGE::trilinear); // Define zero-size placeholders for use throughout function EDDY::CudaVolume skrutt; EDDY::CudaVolume4D skrutt4D; double deriv_key = prof.StartEntry("Calculating derivatives"); EDDY::DerivativeCalculator dc(pred_gpu,pmask_gpu,scan,susc_gpu,whichp,fwhm,DerivType::Mixed); prof.EndEntry(deriv_key); // Calculate XtX where X is a matrix whose columns are the partial derivatives arma::mat XtX; double XtX_key = prof.StartEntry("Calculating XtX"); if (scan.IsSliceToVol()) XtX = EddyInternalGpuUtils::make_XtX_cuBLAS(dc.Derivatives()); else XtX = EddyInternalGpuUtils::make_XtX(dc.Derivatives(),dc.MaskInScanSpace()); prof.EndEntry(XtX_key); // Calculate difference image between observed and predicted EDDY::CudaVolume dima = dc.PredInScanSpace() - EDDY::CudaVolume(scan.GetIma()); if (fwhm) dima.Smooth(fwhm,dc.MaskInScanSpace()); // Calculate Xty where y is the difference between observed and predicted. X as above. arma::colvec Xty = EddyInternalGpuUtils::make_Xty(dc.Derivatives(),dima,dc.MaskInScanSpace()); // Get derivative and Hessian of regularisation (relevant only for slice-to-vol); arma::colvec lHb = scan.GetRegGrad(whichp); arma::mat H = scan.GetRegHess(whichp); // Calculate mean sum of squares from difference image and add regularisation double masksum = dc.MaskInScanSpace().Sum(); double mss = dima.SumOfSquares(dc.MaskInScanSpace()) / masksum + scan.GetReg(whichp); // Very mild Tikhonov regularisation to select solution with smaller norm arma::mat regm = scan.GetLevenbergLambda() * arma::eye(XtX.n_rows,XtX.n_cols); // Calculate update to parameters arma::colvec update = -arma::solve(XtX/masksum+H+regm,Xty/masksum+lHb,arma::solve_opts::likely_sympd); // Update parameters for (unsigned int i=0; i= mss) && scan.GetLevenbergLambda() < 1.0e6) { if (very_verbose) { std::cout << "EddyInternalGpuUtils::param_update: updates rejected for volume #" << scindx << std::endl; std::cout << "EddyInternalGpuUtils::param_update: Levenberg-lambda = " << scan.GetLevenbergLambda(); std::cout << ", original mss = " << mss << ", after update mss = " << new_mss << std::endl; std::cout.flush(); } // Set back parameters to old values for (unsigned int i=0; i= mss) { // It never decreased if (very_verbose) { std::cout << "EddyInternalGpuUtils::param_update: Final attempt, updates rejected for volume #" << scindx << std::endl; std::cout << "EddyInternalGpuUtils::param_update: Levenberg-lambda = " << scan.GetLevenbergLambda(); std::cout << ", original mss = " << mss << ", after update mss = " << new_mss << std::endl; std::cout.flush(); } // Set back parameters to old values for (unsigned int i=0; i0) { sprintf(fname,"%s_masked_dima_%02d_%04d",bname,iter,scindx); scratch = dima * dc.MaskInScanSpace(); scratch.Write(fname); } if (level>1) { sprintf(fname,"%s_mask_%02d_%04d",bname,iter,scindx); dc.MaskInScanSpace().Write(fname); sprintf(fname,"%s_pios_%02d_%04d",bname,iter,scindx); dc.PredInScanSpace().Write(fname); sprintf(fname,"%s_pred_%02d_%04d",bname,iter,scindx); pred.Write(fname); sprintf(fname,"%s_dima_%02d_%04d",bname,iter,scindx); dima.Write(fname); sprintf(fname,"%s_jac_%02d_%04d",bname,iter,scindx); dc.JacInScanSpace().Write(fname); sprintf(fname,"%s_orig_%02d_%04d",bname,iter,scindx); scratch = scan.GetIma(); scratch.Write(fname); } if (level>2) { sprintf(fname,"%s_mios_%02d_%04d",bname,iter,scindx); dc.MaskInScanSpace().Write(fname); sprintf(fname,"%s_pmask_%02d_%04d",bname,iter,scindx); pmask.Write(fname); sprintf(fname,"%s_derivs_%02d_%04d",bname,iter,scindx); dc.Derivatives().Write(fname); sprintf(fname,"%s_XtX_%02d_%04d.txt",bname,iter,scindx); MISCMATHS::write_ascii_matrix(fname,XtX,20); sprintf(fname,"%s_Xty_%02d_%04d.txt",bname,iter,scindx); MISCMATHS::write_ascii_matrix(fname,Xty,20); sprintf(fname,"%s_update_%02d_%04d.txt",bname,iter,scindx); MISCMATHS::write_ascii_matrix(fname,update,20); } return; } EddyCatch std::vector EddyInternalGpuUtils::long_ec_update(// Input const std::vector >& pred, // Predictions in model space const NEWIMAGE::volume& pmask, // "Data valid" mask in model space float fwhm, // FWHM for Gaussian smoothing bool very_verbose, // Write detailed output to screen? // These input parameters are for debugging only unsigned int iter, // Iteration unsigned int level, // Determines how much gets written const std::vector& debug_index, // Indicies of scans to write debug info for // Input/output EDDY::ECScanManager& sm) EddyTry // Scans we want to register to predictions { static Utilities::FSLProfiler prof("_"+std::string(__FILE__)+"_"+std::string(__func__)); double total_key = prof.StartEntry("Total"); // Allocate matrices used for calculating updates. std::vector XtX(sm.NScans()); std::vector Xty(sm.NScans()); std::vector mss(sm.NScans(),0.0); std::vector updates; double mean_masksum = 0.0; arma::mat joint_XtX; arma::colvec joint_Xty; // Transfer images to the GPU EDDY::CudaVolume susc_gpu; if (sm.HasSuscHzOffResField() && !sm.HasSuscHzOffResDerivField()) susc_gpu = *(sm.GetSuscHzOffResField()); EDDY::CudaVolume pmask_gpu(pmask); // Define zero-size placeholders for use throughout function EDDY::CudaVolume skrutt; EDDY::CudaVolume4D skrutt4D; // Caclulate matrices (XtX and Xty) and mss for all scans for (unsigned int s=0; s(sm.NScans()); // Next calculate updates. This will look a little different depending on which model we should use if (sm.LongTimeConstantECModel().IsIndividual()) { // We should not average matrices unsigned int nd = sm.LongTimeConstantECModel().NDerivs(); updates.resize(sm.NScans(),arma::colvec(nd,arma::fill::zeros)); for (unsigned int s=0; s= mss[s]) && sm.LongTimeConstantECModel().GetLevenbergLambda(s) < 1e6) { if (very_verbose) { // Write info about failed update if output is very verbose std::cout << "EddyInternalGpuUtils::long_ec_update: updates rejected for volume #" << s << std::endl; std::cout << "EddyInternalGpuUtils::long_ec_update: Levenberg-lambda = " << sm.LongTimeConstantECModel().GetLevenbergLambda(s); std::cout << ", original mss = " << mss[s] << ", after update mss = " << new_mss << std::endl; std::cout.flush(); } sm.LongTimeConstantECModel().UpdateParameters(sm.Scan(s),s,-updates[s]); // Set back to old values sm.LongTimeConstantECModel().SetLevenbergLambda(s,10.0*sm.LongTimeConstantECModel().GetLevenbergLambda(s)); regm = sm.LongTimeConstantECModel().GetLevenbergLambda(s) * arma::eye(nd,nd); // Tikhonov/Levenberg regularisation updates[s] = -arma::solve(XtX[s]+regm,Xty[s],arma::solve_opts::likely_sympd); sm.LongTimeConstantECModel().UpdateParameters(sm.Scan(s),s,updates[s]); // Try the new updates new_mss = EddyInternalGpuUtils::calculate_new_mss(pred_gpu,pmask_gpu,susc_gpu,fwhm,sm.Scan(s)); } if (std::isnan(new_mss) || new_mss >= mss[s]) { if (very_verbose) { // Write info about failed update if output is very verbose std::cout << "EddyInternalGpuUtils::long_ec_update: Final attempt, updates rejected for volume #" << s << std::endl; std::cout << "EddyInternalGpuUtils::long_ec_update: Levenberg-lambda = " << sm.LongTimeConstantECModel().GetLevenbergLambda(s); std::cout << ", original mss = " << mss[s] << ", after update mss = " << new_mss << std::endl; std::cout.flush(); } sm.LongTimeConstantECModel().UpdateParameters(sm.Scan(s),s,-updates[s]); // Set back if it never improved } else { // It worked in the end, so let's roll back lambda by one step sm.LongTimeConstantECModel().SetLevenbergLambda(s,0.1*sm.LongTimeConstantECModel().GetLevenbergLambda(s)); } } } } else if (sm.LongTimeConstantECModel().IsJoint()) { unsigned int nd = sm.LongTimeConstantECModel().NDerivs(); updates.resize(1,arma::colvec(nd,arma::fill::zeros)); joint_XtX = std::accumulate(XtX.begin(),XtX.end(),arma::mat(nd,nd,arma::fill::zeros)); joint_Xty = std::accumulate(Xty.begin(),Xty.end(),arma::colvec(nd,arma::fill::zeros)); arma::mat regm = sm.LongTimeConstantECModel().GetLevenbergLambda(0) * arma::eye(nd,nd); // Tikhonov/Levenberg regularisation updates[0] = -arma::solve(joint_XtX+regm,joint_Xty,arma::solve_opts::likely_sympd); sm.LongTimeConstantECModel().UpdateParameters(sm,updates); double total_new_mss = EddyInternalGpuUtils::calculate_new_mss(pred,pmask_gpu,fwhm,sm); if (!std::isnan(total_new_mss) && total_new_mss < std::accumulate(mss.begin(),mss.end(),0.0)) { // Success sm.LongTimeConstantECModel().SetLevenbergLambda(0,0.1*sm.LongTimeConstantECModel().GetLevenbergLambda(0)); } else { // mss didn't increase, increase regularisation until it does. while ((std::isnan(total_new_mss) || total_new_mss >= std::accumulate(mss.begin(),mss.end(),0.0)) && sm.LongTimeConstantECModel().GetLevenbergLambda(0) < 1e6) { if (very_verbose) { // Write info about failed update if output is very verbose std::cout << "EddyInternalGpuUtils::long_ec_update: updates rejected for all volumes" << std::endl; std::cout << "EddyInternalGpuUtils::long_ec_update: Levenberg-lambda = " << sm.LongTimeConstantECModel().GetLevenbergLambda(0); std::cout << ", original mss = " << std::accumulate(mss.begin(),mss.end(),0.0) << ", after update mss = " << total_new_mss << std::endl; std::cout.flush(); } sm.LongTimeConstantECModel().UpdateParameters(sm,EddyUtils::Negate(updates)); // Set back to old values sm.LongTimeConstantECModel().SetLevenbergLambda(0,10.0*sm.LongTimeConstantECModel().GetLevenbergLambda(0)); regm = sm.LongTimeConstantECModel().GetLevenbergLambda(0) * arma::eye(nd,nd); // Tikhonov/Levenberg regularisation updates[0] = -arma::solve(joint_XtX+regm,joint_Xty,arma::solve_opts::likely_sympd); sm.LongTimeConstantECModel().UpdateParameters(sm,updates); total_new_mss = EddyInternalGpuUtils::calculate_new_mss(pred,pmask_gpu,fwhm,sm); } if (std::isnan(total_new_mss) || total_new_mss >= std::accumulate(mss.begin(),mss.end(),0.0)) { // Set back if it never improved if (very_verbose) { // Write info about failed update if output is very verbose std::cout << "EddyInternalGpuUtils::long_ec_update: updates rejected for all volumes" << std::endl; std::cout << "EddyInternalGpuUtils::long_ec_update: Levenberg-lambda = " << sm.LongTimeConstantECModel().GetLevenbergLambda(0); std::cout << ", original mss = " << std::accumulate(mss.begin(),mss.end(),0.0) << ", after update mss = " << total_new_mss << std::endl; std::cout.flush(); } sm.LongTimeConstantECModel().UpdateParameters(sm,EddyUtils::Negate(updates)); // Set back to old values } } } else throw EddyException("EddyInternalGpuUtils::long_ec_update: Invalid long EC model"); prof.EndEntry(total_key); return(mss); } EddyCatch double EddyInternalGpuUtils::calculate_new_mss(// Input const EDDY::CudaVolume& pred_gpu, // Predictions in model space const EDDY::CudaVolume& pmask_gpu, // "Data valid" mask in model space const EDDY::CudaVolume& susc_gpu, // Susc map already on GPU float fwhm, const EDDY::ECScan& scan) EddyTry { // Define zero-size placeholders EDDY::CudaVolume skrutt; EDDY::CudaVolume4D skrutt4D; EDDY::CudaVolume pios(pred_gpu,false); EDDY::CudaVolume jac(pred_gpu,false); EDDY::CudaVolume mask(pred_gpu,false); mask = 1.0; EddyInternalGpuUtils::transform_model_to_scan_space(pred_gpu,scan,susc_gpu,true,pios,mask,jac,skrutt4D); // Transform binary mask into observation space mask = 0.0; EDDY::CudaVolume mios(pmask_gpu,false); EddyInternalGpuUtils::transform_model_to_scan_space(pmask_gpu,scan,susc_gpu,false,mios,mask,skrutt,skrutt4D); mios.Binarise(0.99); // Value above (arbitrary) 0.99 implies valid voxels mask *= mios; // Volume and prediction mask falls within FOV EDDY::CudaVolume ndima = pios-EDDY::CudaVolume(scan.GetIma()); // new difference ima if (fwhm) ndima.Smooth(fwhm,mask); double mss = ndima.SumOfSquares(mask) / mask.Sum() ; return(mss); } EddyCatch double EddyInternalGpuUtils::calculate_new_mss(// Input const std::vector >& pred, // Predictions in model space const EDDY::CudaVolume& pmask_gpu, // "Data valid" mask in model space float fwhm, const EDDY::ECScanManager& sm) EddyTry { CudaVolume susc_gpu = *(sm.GetSuscHzOffResField(0)); // May be valid for all volumes double mss = 0.0; for (unsigned int s=0; s& debug_indx, unsigned int iter, unsigned int level, const EDDY::DerivativeCalculator& dc, const EDDY::CudaVolume& pred, const EDDY::CudaVolume& dima, const EDDY::CudaVolume& pmask, const arma::mat& XtX, const arma::colvec& Xty) EddyTry { if (std::any_of(debug_indx.begin(),debug_indx.end(),[scindx](unsigned int dindx){ return(dindx==scindx); })) { // If scindx among debug_indx char fname[256]; EDDY::CudaVolume scratch; if (level>0) { sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_masked_dima_%02d_%04d",iter,scindx); scratch = dima * dc.MaskInScanSpace(); scratch.Write(fname); } if (level>1) { sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_mask_%02d_%04d",iter,scindx); dc.MaskInScanSpace().Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_pios_%02d_%04d",iter,scindx); dc.PredInScanSpace().Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_pred_%02d_%04d",iter,scindx); pred.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_dima_%02d_%04d",iter,scindx); dima.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_jac_%02d_%04d",iter,scindx); dc.JacInScanSpace().Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_orig_%02d_%04d",iter,scindx); scratch = scan.GetIma(); scratch.Write(fname); } if (level>2) { sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_mios_%02d_%04d",iter,scindx); dc.MaskInScanSpace().Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_pmask_%02d_%04d",iter,scindx); pmask.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_derivs_%02d_%04d",iter,scindx); dc.Derivatives().Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_XtX_%02d_%04d.txt",iter,scindx); MISCMATHS::write_ascii_matrix(fname,XtX,20); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_Xty_%02d_%04d.txt",iter,scindx); MISCMATHS::write_ascii_matrix(fname,Xty,20); } } return; } EddyCatch void EddyInternalGpuUtils::write_debug_info_long_ec_updates(unsigned int iter, const EDDY::LongECModel& lecm, const std::vector& updates) EddyTry { char fname[256]; sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_updates_%02d",iter); std::ofstream file; try { file.open(fname,std::ios::out|std::ios::trunc); switch (lecm.Model()) { case LongECModelType::Individual: file << "Individual weights: Two rows per volume, Previous+Current. One column per MB-group." << std::endl; break; case LongECModelType::Joint: file << "Joint weights: Two rows, Previous+Current. One column per MB-group." << std::endl; break; case LongECModelType::IndividualTimeConstant: file << "Individual time-constants: One row (half-life in seconds) per volume." << std::endl; break; case LongECModelType::JointTimeConstant: file << "Joint time-constant: One row (half-life in seconds)." << std::endl; break; default: throw EddyException("EddyInternalGpuUtils::write_debug_info_long_ec_updates: Invalid LongECModelType"); break; } if (lecm.EstimatesWeights()) { for (unsigned int i=0; i& debug_indx, unsigned int iter, unsigned int level, const EDDY::CudaVolume& dima, const EDDY::CudaVolume& mask, const EDDY::CudaVolume& pios, const EDDY::CudaVolume& jac) EddyTry { if (std::any_of(debug_indx.begin(),debug_indx.end(),[scindx](unsigned int dindx){ return(dindx==scindx); })) { // If scindx among debug_indx if (level > 1) { char fname[256]; EDDY::CudaVolume scratch; sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_new_masked_dima_%02d_%04d",iter,scindx); scratch = dima * mask; scratch.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_new_mask_%02d_%04d",iter,scindx); mask.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_new_pios_%02d_%04d",iter,scindx); pios.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_new_dima_%02d_%04d",iter,scindx); dima.Write(fname); sprintf(fname,"EDDY_DEBUG_LONG_EC_GPU_new_jac_%02d_%04d",iter,scindx); jac.Write(fname); } } } EddyCatch void EddyInternalGpuUtils::transform_model_to_scan_space(// Input const EDDY::CudaVolume& pred, const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, bool jacmod, // Output EDDY::CudaVolume& oima, EDDY::CudaVolume& omask, // Optional output EDDY::CudaVolume& jac, EDDY::CudaVolume4D& grad) EddyTry { // static int cnt=1; // Some input checking if (oima != pred) oima.SetHdr(pred); if (omask != pred) omask.SetHdr(pred); if (jac.Size() && jac!=pred) throw EDDY::EddyException("EddyInternalGpuUtils::transform_model_to_scan_space: jac size mismatch"); if (grad.Size() && grad!=pred) throw EDDY::EddyException("EddyInternalGpuUtils::transform_model_to_scan_space: grad size mismatch"); if (jacmod && !jac.Size()) throw EDDY::EddyException("EddyInternalGpuUtils::transform_model_to_scan_space: jacmod can only be used with valid jac"); EDDY::CudaVolume4D dfield(susc,3,false); EDDY::CudaVolume mask2(omask,false); NEWMAT::IdentityMatrix I(4); if (scan.IsSliceToVol()) { // Get field for the transform EddyInternalGpuUtils::field_for_model_to_scan_transform(scan,susc,dfield,omask,jac); // Get RB matrices, one per slice. std::vector R = EddyUtils::GetSliceWiseForwardMovementMatrices(scan); std::vector II(R.size()); for (unsigned int i=0; i duration = end_get_field-start_get_field; // std::cout << "EddyInternalGpuUtils::field_for_model_to_scan_transform(scan,susc,dfield,omask,jac); took " << duration.count() << " seconds" << std::endl; // Get RB matrix NEWMAT::Matrix R = scan.ForwardMovementMatrix(); // Transform prediction/model EddyInternalGpuUtils::general_transform(pred,I,dfield,R,oima,grad,mask2); // char fname[256]; sprintf(fname,"field_%03d",cnt); dfield.Write(fname); cnt++; } omask *= mask2; omask.SetInterp(NEWIMAGE::trilinear); if (jacmod) oima *= jac; return; } EddyCatch void EddyInternalGpuUtils::field_for_model_to_scan_transform(// Input const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, // Output EDDY::CudaVolume4D& idfield, EDDY::CudaVolume& omask, // Optional output EDDY::CudaVolume& jac) EddyTry { // Some input checking if (idfield != scan.GetIma()) idfield.SetHdr(scan.GetIma(),3); if (omask.Size() && omask != scan.GetIma()) omask.SetHdr(scan.GetIma()); if (susc.Size() && susc != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_model_to_scan_transform: susc size mismatch"); if (jac.Size() && jac != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::field_for_model_to_scan_transform: jac size mismatch"); EDDY::CudaVolume tot(scan.GetIma(),false); // Total (EC and susc) field EDDY::CudaVolume mask(scan.GetIma(),false); mask = 1.0; // Defines where transformed susc field is valid EddyInternalGpuUtils::get_ec_field(scan,tot); if (susc.Size()) { EDDY::CudaVolume tsusc(susc,false); if (scan.IsSliceToVol()) { std::vector R = EddyUtils::GetSliceWiseForwardMovementMatrices(scan); EDDY::CudaVolume4D skrutt; EddyInternalGpuUtils::affine_transform(susc,R,tsusc,skrutt,mask); } else { NEWMAT::Matrix R = scan.ForwardMovementMatrix(); EDDY::CudaVolume4D skrutt; EddyInternalGpuUtils::affine_transform(susc,R,tsusc,skrutt,mask); } tot += tsusc; } // Convert Hz map to displacement field EDDY::CudaVolume4D dfield(tot,3,false); FieldGpuUtils::Hz2VoxelDisplacements(tot,scan.GetAcqPara(),dfield); // Invert displacement field FieldGpuUtils::InvertDisplacementField(dfield,scan.GetAcqPara(),mask,idfield,omask); // Get jacobian of inverted field if (jac.Size()) FieldGpuUtils::GetDiscreteJacobian(idfield,scan.GetAcqPara(),jac); // Transform field to mm displacements FieldGpuUtils::Voxel2MMDisplacements(idfield); } EddyCatch EDDY::CudaImageCoordinates EddyInternalGpuUtils::transform_coordinates_from_model_to_scan_space(// Input const EDDY::CudaVolume& pred, const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, // Output EDDY::CudaImageCoordinates& coord, // Optional Output EDDY::CudaVolume& omask, EDDY::CudaVolume& jac) EddyTry { if (pred != scan.GetIma()) throw EDDY::EddyException("EddyInternalGpuUtils::transform_coordinates_from_model_to_scan_space: pred<->scan mismatch"); if (susc.Size() && pred != susc) throw EDDY::EddyException("EddyInternalGpuUtils::transform_coordinates_from_model_to_scan_space: pred<->susc mismatch"); if (omask.Size() && omask != pred) throw EDDY::EddyException("EddyInternalGpuUtils::transform_coordinates_from_model_to_scan_space: pred<->omask mismatch"); if (jac.Size() && jac != pred) throw EDDY::EddyException("EddyInternalGpuUtils::transform_coordinates_from_model_to_scan_space: pred<->jac mismatch"); // Get total field from scan EDDY::CudaVolume4D dfield; EddyInternalGpuUtils::field_for_model_to_scan_transform(scan,susc,dfield,omask,jac); // Get RB matrix NEWMAT::Matrix R = scan.ForwardMovementMatrix(); // Convert matrices to mimic behaviour of warpfns:general_transform NEWMAT::Matrix A = pred.Ima2WorldMatrix(); NEWMAT::Matrix M = pred.World2ImaMatrix() * R.i(); // Transform coordinates using RB and inverted Tot map if (omask.Size()) { EDDY::CudaVolume mask2(omask,false); coord.Transform(A,dfield,M); pred.ValidMask(coord,mask2); omask *= mask2; omask.SetInterp(NEWIMAGE::trilinear); } else coord.Transform(A,dfield,M); return(coord); } EddyCatch void EddyInternalGpuUtils::get_partial_derivatives_in_scan_space(const EDDY::CudaVolume& pred, const EDDY::ECScan& scan, const EDDY::CudaVolume& susc, EDDY::ParametersType whichp, EDDY::CudaVolume4D& derivs) EddyTry { EDDY::CudaVolume base(pred,false); EDDY::CudaVolume mask(pred,false); EDDY::CudaVolume basejac(pred,false); EDDY::CudaVolume4D grad(pred,3,false); EddyInternalGpuUtils::transform_model_to_scan_space(pred,scan,susc,true,base,mask,basejac,grad); EDDY::CudaImageCoordinates basecoord(pred.Size(0),pred.Size(1),pred.Size(2)); EDDY::CudaVolume skrutt; EddyInternalGpuUtils::transform_coordinates_from_model_to_scan_space(pred,scan,susc,basecoord,skrutt,skrutt); if (derivs != pred || derivs.Size(3) != scan.NDerivs(whichp)) derivs.SetHdr(pred,scan.NDerivs(whichp)); EDDY::CudaVolume jac(pred,false); EDDY::ECScan sc = scan; for (unsigned int i=0; i>>(base.Size(0),base.Size(1),base.Size(2),coord.XPtr(),coord.YPtr(),coord.ZPtr(), grad.GetPtr(0),grad.GetPtr(1),grad.GetPtr(2),base.GetPtr(),jac.GetPtr(), basejac.GetPtr(),dstep,deriv.GetPtr(indx),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::make_deriv"); /* EddyKernels::make_deriv_first_part<<>>(base.Size(0),base.Size(1),base.Size(2),coord.XPtr(),coord.YPtr(),coord.ZPtr(), grad.GetPtr(0),grad.GetPtr(1),grad.GetPtr(2),base.GetPtr(),jac.GetPtr(), basejac.GetPtr(),dstep,deriv.GetPtr(indx),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::make_deriv_first_part"); if (indx == 6) deriv.Write(indx,"derivative_first_part"); EddyKernels::make_deriv_second_part<<>>(base.Size(0),base.Size(1),base.Size(2),coord.XPtr(),coord.YPtr(),coord.ZPtr(), grad.GetPtr(0),grad.GetPtr(1),grad.GetPtr(2),base.GetPtr(),jac.GetPtr(), basejac.GetPtr(),dstep,deriv.GetPtr(indx),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::make_deriv_second_part"); if (indx == 6) { deriv.Write(indx,"derivative_second_part"); exit(0); } */ return; } EddyCatch NEWMAT::Matrix EddyInternalGpuUtils::make_XtX(const EDDY::CudaVolume4D& X, const EDDY::CudaVolume& mask) EddyTry { thrust::device_vector dXtX(X.Size(3)*X.Size(3)); thrust::device_vector masked(X.Size()); for (int i=0; i()); } catch(thrust::system_error &e) { std::cerr << "thrust::system_error thrown in EddyInternalGpuUtils::make_XtX after copy and transform with index: " << i << ", and message: " << e.what() << std::endl; throw; } for (int j=i; j hXtX = dXtX; // Whole matrix in one transfer NEWMAT::Matrix XtX(X.Size(3),X.Size(3)); for (int i=0; i masked(X.Size()); try { thrust::copy(mask.Begin(),mask.End(),masked.begin()); thrust::transform(y.Begin(),y.End(),masked.begin(),masked.begin(),thrust::multiplies()); } catch(thrust::system_error &e) { std::cerr << "thrust::system_error thrown in EddyInternalGpuUtils::make_Xty after copy and transform with message: " << e.what() << std::endl; throw; } for (int i=0; i& hypar, // Output NEWIMAGE::volume4D& pred, // Optional input bool vwbvrot) EddyTry { ScanType st = ScanType::DWI; const NEWIMAGE::volume& timp = sm.Scan(0,st).GetIma(); // Scratch image ref // Get a grid of rotated bvecs for each slice and volume std::vector > bvecs(sm.NScans(st)); for (unsigned int s=0; s dpv = sm.GetDiffParas(st); std::vector grpb; // Vector of b-values for the different shells std::vector grpi; // Vector of indicies (one per dwi) into grpb std::vector > grps; // grpb.size() number of vectors of vectors of indicies into dpv EddyUtils::GetGroups(dpv,grps,grpi,grpb); // Perform a "half-way" slice-to-vol reorientation for all scans. NEWIMAGE::volume4D stacks(timp.xsize(),timp.ysize(),timp.zsize(),sm.NScans(st)); NEWIMAGE::volume4D masks(timp.xsize(),timp.ysize(),timp.zsize(),sm.NScans(st)); NEWIMAGE::volume4D zcoords(timp.xsize(),timp.ysize(),timp.zsize(),sm.NScans(st)); NEWIMAGE::volume4D shellmeans(timp.xsize(),timp.ysize(),timp.zsize(),grpb.size()); NEWIMAGE::volume4D shellcount(timp.xsize(),timp.ysize(),timp.zsize(),grpb.size()); shellmeans = 0.0; shellcount = 0.0; NEWIMAGE::copybasicproperties(timp,stacks); NEWIMAGE::copybasicproperties(timp,masks); NEWIMAGE::copybasicproperties(timp,zcoords); EDDY::CudaVolume ima = sm.Scan(0,st).GetIma(); EDDY::CudaVolume4D dfield(ima,3,false); EDDY::CudaVolume jac(ima,false); EDDY::CudaVolume susc; EDDY::CudaVolume hwima(ima,false); // Stack of slices resampled in x and y only EDDY::CudaVolume zc(ima,false); // z-ccordinates for the stack in hwima EDDY::CudaVolume oima(ima,false); // "Finished" images resampled in all directions EDDY::CudaVolume fieldmask(ima,false); // Mask showing where field is valid EDDY::CudaVolume imamask(ima,false); // Mask showing where image is valid for (unsigned int s=0; s iR = EddyUtils::GetSliceWiseInverseMovementMatrices(sm.Scan(s,st)); EddyInternalGpuUtils::half_way_slice_to_vol_transform(ima,iR,dfield,jac,hwima,zc,oima,imamask); // EddyInternalGpuUtils::half_way_slice_to_vol_transform(ima,iR,dfield,jac,hwima,zc,imamask); stacks[s] = hwima.GetVolume(); masks[s] = (imamask * fieldmask).GetVolume(); zcoords[s] = zc.GetVolume(); shellmeans[grpi[s]] += oima.GetVolume(); shellcount[grpi[s]] += masks[s]; } // Finish the shell mean images for (int k=0; k 0) shellmeans(i,j,k,s) /= shellcount(i,j,k,s); else shellmeans(i,j,k,s) = 0.0; } } } } // Mean correct all the values in stacks based on a linear interpolation in z for (int k=0; k 0.0 && zcoords(i,j,k,s) < (stacks.zsize()-1) && masks(i,j,k,s) != 0.0) { float m1 = shellmeans(i,j,std::floor(zcoords(i,j,k,s)),grpi[s]); float m2 = shellmeans(i,j,std::ceil(zcoords(i,j,k,s)),grpi[s]); if (m1 != 0.0 && m2 != 0.0) { stacks(i,j,k,s) -= m1 * (1.0+std::floor(zcoords(i,j,k,s))-zcoords(i,j,k,s)) + m2 * (1.0+zcoords(i,j,k,s)-std::ceil(zcoords(i,j,k,s))); } else { stacks(i,j,k,s) = 0.0; masks(i,j,k,s) = 0.0; } } else { stacks(i,j,k,s) = 0.0; masks(i,j,k,s) = 0.0; } } } } } // Loop over all z-columns making predictions for all scans Stacks2YVecsAndWgts s2y(stacks.zsize(),stacks.tsize()); if (clo.VeryVerbose()) { std::cout << "EddyInternalGpuUtils::make_scatter_brain_predictions: "; std::cout.flush(); } for (unsigned int j=0; j 20) { pred(i,j,k,sm.GetDwi2GlobalIndexMapping(s)) = (i2k.GetkVector(sm.Scan(s,st).GetDiffPara(true).bVec(),grpi[s])*predvec).AsScalar(); } else pred(i,j,k,sm.GetDwi2GlobalIndexMapping(s)) = 0.0; } } } } if (clo.VeryVerbose()) { std::cout << std::endl; std::cout.flush(); } // Add back shell means for (int k=0; k((acqp.PhaseEncodeVector())(i+1) * acqp.ReadOutTime())); } else thrust::fill(dfield.Begin(i),dfield.End(i),0.0); } } EddyCatch void FieldGpuUtils::Voxel2MMDisplacements(EDDY::CudaVolume4D& dfield) EddyTry { if (dfield.Size(3) != 3) throw EDDY::EddyException("FieldGpuUtils::Voxel2MMDisplacements: dfield.Size(3) must be 3"); for (unsigned int i=0; i(dfield.Vxs(i))); } } EddyCatch void FieldGpuUtils::InvertDisplacementField(// Input const EDDY::CudaVolume4D& dfield, const EDDY::AcqPara& acqp, const EDDY::CudaVolume& inmask, // Output EDDY::CudaVolume4D& idfield, EDDY::CudaVolume& omask) EddyTry { if (inmask != dfield) throw EDDY::EddyException("FieldGpuUtils::InvertDisplacementField: dfield<->inmask mismatch"); if (acqp.PhaseEncodeVector()(1) && acqp.PhaseEncodeVector()(2)) throw EDDY::EddyException("FieldGpuUtils::InvertDisplacementField: Phase encode vector must have exactly one non-zero component"); if (acqp.PhaseEncodeVector()(3)) throw EDDY::EddyException("FieldGpuUtils::InvertDisplacementField: Phase encode in z not allowed."); if (idfield != dfield) idfield.SetHdr(dfield); idfield = 0.0; if (omask != inmask) omask.SetHdr(inmask); omask = 0.0; int tpb = FieldGpuUtils::threads_per_block_invert_field; if (acqp.PhaseEncodeVector()(1)) { int nthreads = dfield.Size(1)*dfield.Size(2); int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb; EddyKernels::invert_displacement_field<<>>(dfield.GetPtr(0),inmask.GetPtr(),dfield.Size(0),dfield.Size(1), dfield.Size(2),0,idfield.GetPtr(0),omask.GetPtr(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field x"); } else if (acqp.PhaseEncodeVector()(2)) { int nthreads = dfield.Size(0)*dfield.Size(2); int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb; EddyKernels::invert_displacement_field<<>>(dfield.GetPtr(1),inmask.GetPtr(),dfield.Size(0),dfield.Size(1), dfield.Size(2),1,idfield.GetPtr(1),omask.GetPtr(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::invert_displacement_field y"); } } EddyCatch void FieldGpuUtils::GetJacobian(// Input const EDDY::CudaVolume4D& dfield, const EDDY::AcqPara& acqp, // Output EDDY::CudaVolume& jac) EddyTry { if (jac != dfield) jac.SetHdr(dfield); unsigned int cnt=0; for (unsigned int i=0; i<3; i++) if ((acqp.PhaseEncodeVector())(i+1)) cnt++; if (cnt != 1) throw EDDY::EddyException("FieldGpuUtils::GetJacobian: Phase encode vector must have exactly one non-zero component"); unsigned int dir=0; for (dir=0; dir<3; dir++) if ((acqp.PhaseEncodeVector())(dir+1)) break; EDDY::CudaImageCoordinates coord(jac.Size(0),jac.Size(1),jac.Size(2),true); EDDY::CudaVolume tmpfield(jac,false); tmpfield.SetInterp(NEWIMAGE::spline); thrust::copy(dfield.Begin(dir),dfield.End(dir),tmpfield.Begin()); EDDY::CudaVolume skrutt(jac,false); EDDY::CudaVolume4D grad(jac,3,false); tmpfield.Sample(coord,skrutt,grad); jac = 1.0; thrust::transform(jac.Begin(),jac.End(),grad.Begin(dir),jac.Begin(),thrust::plus()); return; } EddyCatch void FieldGpuUtils::GetDiscreteJacobian(// Input const EDDY::CudaVolume4D& dfield, const EDDY::AcqPara& acqp, // Output EDDY::CudaVolume& jac) EddyTry { if (jac != dfield) jac.SetHdr(dfield); unsigned int cnt=0; for (unsigned int i=0; i<3; i++) if ((acqp.PhaseEncodeVector())(i+1)) cnt++; if (cnt != 1) throw EDDY::EddyException("FieldGpuUtils::GetDiscreteJacobian: Phase encode vector must have exactly one non-zero component"); unsigned int dir=0; for (dir=0; dir<3; dir++) if ((acqp.PhaseEncodeVector())(dir+1)) break; EDDY::CudaVolume skrutt; dfield.SampleTrilinearDerivOnVoxelCentres(dir,skrutt,jac); return; } EddyCatch void EddyInternalGpuUtils::general_transform(// Input const EDDY::CudaVolume& inima, const NEWMAT::Matrix& A, const EDDY::CudaVolume4D& dfield, const NEWMAT::Matrix& M, // Output EDDY::CudaVolume& oima, // Optional output EDDY::CudaVolume4D& deriv, EDDY::CudaVolume& omask) EddyTry { if (oima != inima) oima.SetHdr(inima); if (dfield != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: ima<->field mismatch"); if (omask.Size() && omask != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: wrong size omask"); if (deriv.Size() && deriv != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: wrong size deriv"); // Convert matrices to mimic behaviour of warpfns:general_transform NEWMAT::Matrix AA = A.i() * inima.Ima2WorldMatrix(); NEWMAT::Matrix MM = oima.World2ImaMatrix() * M.i(); // Get transformed coordinates EDDY::CudaImageCoordinates coord(inima.Size(0),inima.Size(1),inima.Size(2),false); coord.Transform(AA,dfield,MM); // Interpolate image if (deriv.Size()) inima.Sample(coord,oima,deriv); else inima.Sample(coord,oima); // Calculate binary mask with 1 for valid voxels if (omask.Size()) inima.ValidMask(coord,omask); return; } EddyCatch void EddyInternalGpuUtils::general_transform(// Input const EDDY::CudaVolume& inima, const std::vector& A, const EDDY::CudaVolume4D& dfield, const std::vector& M, // Output EDDY::CudaVolume& oima, // Optional output EDDY::CudaVolume4D& deriv, EDDY::CudaVolume& omask) EddyTry { if (oima != inima) oima.SetHdr(inima); if (dfield != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: ima<->field mismatch"); if (omask.Size() && omask != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: wrong size omask"); if (deriv.Size() && deriv != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: wrong size deriv"); if (A.size() != inima.Size(2) || M.size() != inima.Size(2)) throw EDDY::EddyException("EddyInternalGpuUtils::general_transform: mismatched A or M vector"); // Convert matrices to mimic behaviour of warpfns:general_transform std::vector AA(A.size()); for (unsigned int i=0; i MM(M.size()); for (unsigned int i=0; i& A, const EDDY::CudaVolume4D& dfield, const EDDY::CudaVolume& jac, const EDDY::CudaVolume& pred, bool jacmod, const EDDY::PolationPara& pp, // Output EDDY::CudaVolume& oima, // Optional output EDDY::CudaVolume& omask) EddyTry { if (oima != inima) oima.SetHdr(inima); if (omask.Size() && omask != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_slice_to_vol_transform: wrong size omask"); if (A.size() != inima.Size(2)) throw EDDY::EddyException("EddyInternalGpuUtils::general_slice_to_vol_transform: mismatched A vector"); // Check if it might be an affine transform if (dfield.Size()) { // This means it isn't affine if (dfield != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_slice_to_vol_transform: ima<->field mismatch"); if (jacmod && jac != inima) throw EDDY::EddyException("EddyInternalGpuUtils::general_slice_to_vol_transform: ima<->jac mismatch"); } else { // So, affine transform intended if (jacmod) throw EDDY::EddyException("EddyInternalGpuUtils::general_slice_to_vol_transform: Invalid combination of jacmod and affine transform"); } // Convert matrices to mimic behaviour of warpfns:general_transform NEWMAT::Matrix M1 = inima.Ima2WorldMatrix(); std::vector AA(A.size()); for (unsigned int i=0; i& A, const EDDY::CudaVolume4D& dfield, const EDDY::CudaVolume& jac, // Output EDDY::CudaVolume& hwima, EDDY::CudaVolume& zcoordV, // Optional output EDDY::CudaVolume& oima, EDDY::CudaVolume& omask) EddyTry { if (hwima != inima) hwima.SetHdr(inima); if (zcoordV != inima) zcoordV.SetHdr(inima); if (oima.Size() && oima != inima) throw EDDY::EddyException("EddyInternalGpuUtils::half_way_slice_to_vol_transform: wrong size oima"); if (omask.Size() && omask != inima) throw EDDY::EddyException("EddyInternalGpuUtils::half_way_slice_to_vol_transform: wrong size omask"); if (A.size() != inima.Size(2)) throw EDDY::EddyException("EddyInternalGpuUtils::half_way_slice_to_vol_transform: mismatched A vector"); if (dfield != inima) throw EDDY::EddyException("EddyInternalGpuUtils::half_way_slice_to_vol_transform: ima<->field mismatch"); if (jac != inima) throw EDDY::EddyException("EddyInternalGpuUtils::half_way_slice_to_vol_transform: ima<->jac mismatch"); // Convert matrices to mimic behaviour of warpfns:general_transform NEWMAT::Matrix M1 = inima.Ima2WorldMatrix(); std::vector AA(A.size()); for (unsigned int i=0; i rindx; if (pe(0) != 0) rindx.push_back(0); if (pe(1) != 0) rindx.push_back(1); iR = scan.RestrictedInverseMovementMatrix(rindx); } else iR = scan.InverseMovementMatrix(); arma::mat A = mov_field.World2ImaMatrix() * iR.i() * mov_field.Ima2WorldMatrix(); mov_coord.Transform(A); mov_coord -= EDDY::CudaImageCoordinates(mov_field.Size(0),mov_field.Size(1),mov_field.Size(2),true); mov_field.CoordinatesToDisplacementField(mov_coord); return; } EddyCatch void EddyInternalGpuUtils::affine_transform(const EDDY::CudaVolume& inima, const NEWMAT::Matrix& R, EDDY::CudaVolume& oima, EDDY::CudaVolume4D& deriv, EDDY::CudaVolume& omask) EddyTry { if (oima != inima) oima.SetHdr(inima); if (omask.Size() && omask != inima) throw EDDY::EddyException("EddyInternalGpuUtils::affine_transform: inima<->omask mismatch"); if (deriv.Size() && deriv != inima) throw EDDY::EddyException("EddyInternalGpuUtils::affine_transform: inima<->deriv mismatch"); // Convert matrix to mimic behaviour of warpfns:general_transform NEWMAT::Matrix A = oima.World2ImaMatrix() * R.i() * inima.Ima2WorldMatrix(); // Get transformed coordinates EDDY::CudaImageCoordinates coord(inima.Size(0),inima.Size(1),inima.Size(2),false); coord.Transform(A); // Interpolate image if (deriv.Size()) inima.Sample(coord,oima,deriv); else inima.Sample(coord,oima); // Calculate binary mask with 1 for valid voxels if (omask.Size()) inima.ValidMask(coord,omask); return; } EddyCatch void EddyInternalGpuUtils::affine_transform(const EDDY::CudaVolume& inima, const std::vector& R, EDDY::CudaVolume& oima, EDDY::CudaVolume4D& deriv, EDDY::CudaVolume& omask) EddyTry { if (oima != inima) oima.SetHdr(inima); if (omask.Size() && omask != inima) throw EDDY::EddyException("EddyInternalGpuUtils::affine_transform: inima<->omask mismatch"); if (deriv.Size() && deriv != inima) throw EDDY::EddyException("EddyInternalGpuUtils::affine_transform: inima<->deriv mismatch"); if (R.size() != inima.Size(2)) throw EDDY::EddyException("EddyInternalGpuUtils::affine_transform: mismatched R vector"); // Convert matrix to mimic behaviour of warpfns:general_transform std::vector A(R.size()); for (unsigned int i=0; i epp_host(scan.NParam(ParametersType::EC)); for (unsigned int i=0; i epp_dev = epp_host; // EC parameters -> device int tpb = EddyInternalGpuUtils::threads_per_block_ec_field; int nthreads = ecfield.Size(); int nblocks = (nthreads % tpb) ? nthreads / tpb + 1 : nthreads / tpb; if (scan.Model() == ECModelType::NoEC) { ecfield = 0.0; } if (scan.Model() == ECModelType::Linear) { EddyKernels::linear_ec_field<<>>(ecfield.GetPtr(),ecfield.Size(0),ecfield.Size(1),ecfield.Size(2), ecfield.Vxs(0),ecfield.Vxs(1),ecfield.Vxs(2), thrust::raw_pointer_cast(epp_dev.data()),epp_dev.size(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::linear_ec_field"); } else if (scan.Model() == ECModelType::Quadratic) { EddyKernels::quadratic_ec_field<<>>(ecfield.GetPtr(),ecfield.Size(0),ecfield.Size(1),ecfield.Size(2), ecfield.Vxs(0),ecfield.Vxs(1),ecfield.Vxs(2), thrust::raw_pointer_cast(epp_dev.data()),epp_dev.size(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::quadratic_ec_field"); } else if (scan.Model() == ECModelType::Cubic) { EddyKernels::cubic_ec_field<<>>(ecfield.GetPtr(),ecfield.Size(0),ecfield.Size(1),ecfield.Size(2), ecfield.Vxs(0),ecfield.Vxs(1),ecfield.Vxs(2), thrust::raw_pointer_cast(epp_dev.data()),epp_dev.size(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::cubic_ec_field"); } if ((scan.PreviousPointer() == nullptr || std::all_of(scan.GetWeightsForPrevious().begin(),scan.GetWeightsForPrevious().end(),[](double w){ return(w==0.0); })) // If no involvement from previous volume && std::all_of(scan.GetWeightsForCurrent().begin(),scan.GetWeightsForCurrent().end(),[](double w){ return(w==1.0); })) { // If all ones for current field return; // We are done } else { // Construct field from "current" (already calculated) and (possibly zero) "previous" EDDY::CudaVolume prevfield(scan.GetIma(),false); if (scan.PreviousPointer() != nullptr) { epp = scan.PreviousPointer()->GetParams(ParametersType::EC); epp_host.resize(scan.PreviousPointer()->NParam(ParametersType::EC)); for (unsigned int i=0; i device if (scan.PreviousPointer()->Model() == ECModelType::NoEC) { prevfield = 0.0; } if (scan.PreviousPointer()->Model() == ECModelType::Linear) { EddyKernels::linear_ec_field<<>>(prevfield.GetPtr(),prevfield.Size(0),prevfield.Size(1),prevfield.Size(2), prevfield.Vxs(0),prevfield.Vxs(1),prevfield.Vxs(2), thrust::raw_pointer_cast(epp_dev.data()),epp_dev.size(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::linear_ec_field"); } else if (scan.PreviousPointer()->Model() == ECModelType::Quadratic) { EddyKernels::quadratic_ec_field<<>>(prevfield.GetPtr(),prevfield.Size(0),prevfield.Size(1),prevfield.Size(2), prevfield.Vxs(0),prevfield.Vxs(1),prevfield.Vxs(2), thrust::raw_pointer_cast(epp_dev.data()),epp_dev.size(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::quadratic_ec_field"); } else if (scan.PreviousPointer()->Model() == ECModelType::Cubic) { EddyKernels::cubic_ec_field<<>>(prevfield.GetPtr(),prevfield.Size(0),prevfield.Size(1),prevfield.Size(2), prevfield.Vxs(0),prevfield.Vxs(1),prevfield.Vxs(2), thrust::raw_pointer_cast(epp_dev.data()),epp_dev.size(),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::cubic_ec_field"); } } else prevfield = 0.0; // Next add up the two with MB-group dependent weights // First get weights std::vector prev = arma::conv_to >::from(scan.GetWeightsForPrevious()); std::vector curr = arma::conv_to >::from(scan.GetWeightsForCurrent()); thrust::device_vector prev_dev = prev; thrust::device_vector curr_dev = curr; // Next get slices numbers for all multi-band groups. It is collected in a "flat" vector so that mbn[i] gives the // MB-group number for slice i. std::vector mbn(scan.GetMBG().NSlices(),-1); for (unsigned int sl=0; sl(scan.GetMBG().WhichGroupIsSliceIn(sl)); thrust::device_vector mbn_dev = mbn; EddyKernels::weighted_mean_ec_field<<>>(ecfield.GetPtr(),prevfield.GetPtr(),ecfield.Size(0),ecfield.Size(1),ecfield.Size(2), thrust::raw_pointer_cast(mbn_dev.data()),thrust::raw_pointer_cast(curr_dev.data()), thrust::raw_pointer_cast(prev_dev.data()),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::weighted_mean_ec_field"); } return; } EddyCatch // The rest of this file is dead code /* NEWMAT::Matrix EddyInternalGpuUtils::make_XtX_old(const EDDY::CudaVolume4D& X, const EDDY::CudaVolume& mask) EddyTry { NEWMAT::Matrix XtX(X.Size(3),X.Size(3)); for (int i=0; i masked(X.Size()); try { thrust::copy(mask.Begin(),mask.End(),masked.begin()); thrust::transform(X.Begin(i),X.End(i),masked.begin(),masked.begin(),thrust::multiplies()); } catch(thrust::system_error &e) { std::cerr << "thrust::system_error thrown in EddyInternalGpuUtils::make_XtX after copy and transform with index: " << i << ", and message: " << e.what() << std::endl; throw; } for (int j=i; j dXtX(X.Size(3)*X.Size(3)); const float* *ima_ptr_vec; hipMalloc((void ***) &ima_ptr_vec,X.Size(3) * sizeof(float *)); const float* *ima_ptr_vec_host = new const float*[X.Size(3)]; for (unsigned int i=0; i(X.Size(3)); int tpb = static_cast(X.Size(3)); int nthreads = nblocks*tpb; EddyKernels::XtX<<>>(X.Size(3),X.Size(),ima_ptr_vec,mask.GetPtr(),thrust::raw_pointer_cast(dXtX.data()),nthreads); EddyCudaHelperFunctions::CudaSync("EddyKernels::XtX"); thrust::host_vector hXtX = dXtX; NEWMAT::Matrix XtX(X.Size(3),X.Size(3)); for (int c=0; c