use super::core::*; use crate::{ cudnn::{result, result::CudnnError, sys}, driver::{DevicePtr, DevicePtrMut}, }; use std::{marker::PhantomData, sync::Arc}; /// A descriptor of the filters for conv2d operation. Create with [`Cudnn::create_4d_filter()`] #[derive(Debug)] pub struct FilterDescriptor { pub(crate) desc: sys::cudnnFilterDescriptor_t, #[allow(unused)] pub(crate) handle: Arc, pub(crate) marker: PhantomData, } impl Cudnn { /// Create a filter 4d descriptor. pub fn create_4d_filter( self: &Arc, format: sys::cudnnTensorFormat_t, dims: [std::ffi::c_int; 4], ) -> Result, CudnnError> { let desc = result::create_filter_descriptor()?; let desc = FilterDescriptor { desc, handle: self.clone(), marker: PhantomData, }; unsafe { result::set_filter4d_descriptor(desc.desc, T::DATA_TYPE, format, dims) }?; Ok(desc) } } impl Drop for FilterDescriptor { fn drop(&mut self) { let desc = std::mem::replace(&mut self.desc, std::ptr::null_mut()); if !desc.is_null() { unsafe { result::destroy_filter_descriptor(desc) }.unwrap() } } } /// A descriptor for a conv2d operation holding stride, padding, and dilation. #[derive(Debug)] pub struct Conv2dDescriptor { pub(crate) desc: sys::cudnnConvolutionDescriptor_t, pub(crate) handle: Arc, pub(crate) marker: PhantomData, } impl Cudnn { /// Creates a conv2d descriptor. /// - `pad` is the padding to apply to height and width of image /// - `stride` is the kernel strides /// - `dilation` is the kernel dilation /// - `mode` - CROSS_CORRELATION is standard convolution pub fn create_conv2d( self: &Arc, pad: [std::ffi::c_int; 2], stride: [std::ffi::c_int; 2], dilation: [std::ffi::c_int; 2], mode: sys::cudnnConvolutionMode_t, ) -> Result, CudnnError> { let [pad_h, pad_w] = pad; let [stride_h, stride_w] = stride; let [dilation_h, dilation_w] = dilation; let desc = result::create_convolution_descriptor()?; let desc = Conv2dDescriptor { desc, handle: self.clone(), marker: PhantomData, }; unsafe { result::set_convolution2d_descriptor( desc.desc, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, mode, T::DATA_TYPE, ) }?; Ok(desc) } } impl Conv2dDescriptor { /// Set's the math type for this convolution. Refer to [nvidia docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetConvolutionMathType) /// for more information. pub fn set_math_type(&mut self, math_type: sys::cudnnMathType_t) -> Result<(), CudnnError> { unsafe { result::set_convolution_math_type(self.desc, math_type) } } /// Set's the group count for this convolution. Refer to [nvidia docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetConvolutionGroupCount) /// for more information. pub fn set_group_count(&mut self, group_count: i32) -> Result<(), CudnnError> { unsafe { result::set_convolution_group_count(self.desc, group_count) } } } impl Drop for Conv2dDescriptor { fn drop(&mut self) { let desc = std::mem::replace(&mut self.desc, std::ptr::null_mut()); if !desc.is_null() { unsafe { result::destroy_convolution_descriptor(desc) }.unwrap() } } } /// The convolution 2d forward operation. Pass in references to descriptors /// directly, and then call: /// 1. [`Conv2dForward::pick_algorithm()`] to use cudnn heuristics to select the algorithm /// 2. [`Conv2dForward::get_workspace_size()`] to get required workspace size. /// 3. [`Conv2dForward::launch()`] to execute it #[derive(Debug)] pub struct Conv2dForward<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> { /// Conv parameters pub conv: &'a Conv2dDescriptor, /// Input image descriptor pub x: &'a TensorDescriptor, /// Filter descriptor pub w: &'a FilterDescriptor, /// Output image descriptor pub y: &'a TensorDescriptor, } impl<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> Conv2dForward<'a, X, C, Y> { /// Picks the fastest algorithm from all available cuDNN algorithms based on cudnn heuristics. pub fn pick_algorithm(&self) -> Result { const NUM_ALGOS: usize = 8; debug_assert_eq!( sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_COUNT as u32, NUM_ALGOS as u32 ); let mut returned_count = [0; 1]; let mut perf_results = [Default::default(); NUM_ALGOS]; unsafe { result::get_convolution_forward_algorithm( self.conv.handle.handle, self.x.desc, self.w.desc, self.conv.desc, self.y.desc, NUM_ALGOS as std::ffi::c_int, returned_count.as_mut_ptr(), perf_results.as_mut_ptr(), ) }?; assert!(returned_count[0] > 0); perf_results[0].status.result()?; Ok(perf_results[0].algo) } /// Returns size in **bytes** to execute the selected algorithm. pub fn get_workspace_size( &self, algo: sys::cudnnConvolutionFwdAlgo_t, ) -> Result { unsafe { result::get_convolution_forward_workspace_size( self.conv.handle.handle, self.x.desc, self.w.desc, self.conv.desc, self.y.desc, algo, ) } } /// Launches the operation. /// /// - `img` is the input image /// - `filter` is the convolution kernels /// - `y` is the output /// /// # Safety /// The img/filter/y arguments must match the data type/layout specified in the /// descriptors in `self. pub unsafe fn launch( &self, algo: sys::cudnnConvolutionFwdAlgo_t, workspace: Option<&mut Workspace>, (alpha, beta): (Y, Y), img: &Img, filter: &Filter, y: &mut Dst, ) -> Result<(), CudnnError> where Workspace: DevicePtrMut, Img: DevicePtr, Filter: DevicePtr, Dst: DevicePtrMut, { let (num_bytes, workspace_ptr) = match workspace { Some(w) => ( w.num_bytes(), *w.device_ptr_mut() as *mut u8 as *mut std::ffi::c_void, ), None => (0, std::ptr::null_mut()), }; let alpha = alpha.into_scaling_parameter(); let beta = beta.into_scaling_parameter(); result::convolution_forward( self.conv.handle.handle, (&alpha) as *const Y::Scalar as *const std::ffi::c_void, self.x.desc, *img.device_ptr() as *const X as *const std::ffi::c_void, self.w.desc, *filter.device_ptr() as *const X as *const std::ffi::c_void, self.conv.desc, algo, workspace_ptr, num_bytes, (&beta) as *const Y::Scalar as *const std::ffi::c_void, self.y.desc, *y.device_ptr_mut() as *mut Y as *mut std::ffi::c_void, ) } } /// The convolution 2d backward operation for the input image. Pass in references to descriptors /// directly, and then call: /// 1. [`Conv2dBackwardData::pick_algorithm()`] to use cudnn heuristics to select the algorithm /// 2. [`Conv2dBackwardData::get_workspace_size()`] to get required workspace size. /// 3. [`Conv2dBackwardData::launch()`] to execute it #[derive(Debug)] pub struct Conv2dBackwardData<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> { /// Conv descriptor pub conv: &'a Conv2dDescriptor, /// Input image descriptor pub dx: &'a TensorDescriptor, /// Filter descriptor pub w: &'a FilterDescriptor, /// Output image descriptor pub dy: &'a TensorDescriptor, } impl<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> Conv2dBackwardData<'a, X, C, Y> { /// Picks the fastest algorithm from all available cuDNN algorithms based on cudnn heuristics. pub fn pick_algorithm(&self) -> Result { const NUM_ALGOS: usize = 6; debug_assert_eq!( sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT as u32, NUM_ALGOS as u32 ); let mut returned_count = [0; 1]; let mut perf_results = [Default::default(); NUM_ALGOS]; unsafe { result::get_convolution_backward_data_algorithm( self.conv.handle.handle, self.w.desc, self.dy.desc, self.conv.desc, self.dx.desc, NUM_ALGOS as std::ffi::c_int, returned_count.as_mut_ptr(), perf_results.as_mut_ptr(), ) }?; assert!(returned_count[0] > 0); perf_results[0].status.result()?; Ok(perf_results[0].algo) } /// Returns size in **bytes** to execute the selected algorithm. pub fn get_workspace_size( &self, algo: sys::cudnnConvolutionBwdDataAlgo_t, ) -> Result { unsafe { result::get_convolution_backward_data_workspace_size( self.conv.handle.handle, self.w.desc, self.dy.desc, self.conv.desc, self.dx.desc, algo, ) } } /// Launches the operation. /// /// - `dx` is the gradient of the input image to populate /// - `filter` is the convolution kernels /// - `dy` is the gradient of the output image /// /// # Safety /// The arguments must match the data type/layout specified in the /// descriptors in `self. pub unsafe fn launch( &self, algo: sys::cudnnConvolutionBwdDataAlgo_t, workspace: Option<&mut Workspace>, (alpha, beta): (Y, Y), dx: &mut Img, filter: &Filter, dy: &Dst, ) -> Result<(), CudnnError> where Workspace: DevicePtrMut, Img: DevicePtrMut, Filter: DevicePtr, Dst: DevicePtr, { let (num_bytes, workspace_ptr) = match workspace { Some(w) => ( w.num_bytes(), *w.device_ptr_mut() as *mut u8 as *mut std::ffi::c_void, ), None => (0, std::ptr::null_mut()), }; let alpha = alpha.into_scaling_parameter(); let beta = beta.into_scaling_parameter(); result::convolution_backward_data( self.conv.handle.handle, (&alpha) as *const Y::Scalar as *const std::ffi::c_void, self.w.desc, *filter.device_ptr() as *const X as *const std::ffi::c_void, self.dy.desc, *dy.device_ptr() as *const Y as *const std::ffi::c_void, self.conv.desc, algo, workspace_ptr, num_bytes, (&beta) as *const Y::Scalar as *const std::ffi::c_void, self.dx.desc, *dx.device_ptr_mut() as *mut X as *mut std::ffi::c_void, ) } } /// The convolution 2d backward operation for the filters. Pass in references to descriptors /// directly, and then call: /// 1. [`Conv2dBackwardFilter::pick_algorithm()`] to use cudnn heuristics to select the algorithm /// 2. [`Conv2dBackwardFilter::get_workspace_size()`] to get required workspace size. /// 3. [`Conv2dBackwardFilter::launch()`] to execute it #[derive(Debug)] pub struct Conv2dBackwardFilter<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> { /// Conv descriptor pub conv: &'a Conv2dDescriptor, /// Input image descriptor pub x: &'a TensorDescriptor, /// Filter descriptor pub dw: &'a FilterDescriptor, /// Output image descriptor pub dy: &'a TensorDescriptor, } impl<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> Conv2dBackwardFilter<'a, X, C, Y> { /// Picks the fastest algorithm from all available cuDNN algorithms based on cudnn heuristics. pub fn pick_algorithm(&self) -> Result { const NUM_ALGOS: usize = 7; debug_assert_eq!( sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT as u32, NUM_ALGOS as u32 ); let mut returned_count = [0; 1]; let mut perf_results = [Default::default(); NUM_ALGOS]; unsafe { result::get_convolution_backward_filter_algorithm( self.conv.handle.handle, self.x.desc, self.dy.desc, self.conv.desc, self.dw.desc, NUM_ALGOS as std::ffi::c_int, returned_count.as_mut_ptr(), perf_results.as_mut_ptr(), ) }?; assert!(returned_count[0] > 0); perf_results[0].status.result()?; Ok(perf_results[0].algo) } /// Returns size in **bytes** to execute the selected algorithm. pub fn get_workspace_size( &self, algo: sys::cudnnConvolutionBwdFilterAlgo_t, ) -> Result { unsafe { result::get_convolution_backward_filter_workspace_size( self.conv.handle.handle, self.x.desc, self.dy.desc, self.conv.desc, self.dw.desc, algo, ) } } /// Launches the operation. /// /// - `x` is the input image /// - `dfilter` is the gradient of the convolution kernels /// - `dy` is the gradient of the output image /// /// # Safety /// The arguments must match the data type/layout specified in the /// descriptors in `self. pub unsafe fn launch( &self, algo: sys::cudnnConvolutionBwdFilterAlgo_t, workspace: Option<&mut Workspace>, (alpha, beta): (Y, Y), x: &Img, dfilter: &mut Filter, dy: &Dst, ) -> Result<(), CudnnError> where Workspace: DevicePtrMut, Img: DevicePtr, Filter: DevicePtrMut, Dst: DevicePtr, { let (num_bytes, workspace_ptr) = workspace .map(|x| (x.num_bytes(), *x.device_ptr_mut() as *mut std::ffi::c_void)) .unwrap_or((0, std::ptr::null_mut())); let alpha = alpha.into_scaling_parameter(); let beta = beta.into_scaling_parameter(); result::convolution_backward_filter( self.conv.handle.handle, (&alpha) as *const Y::Scalar as *const std::ffi::c_void, self.x.desc, *x.device_ptr() as *const _, self.dy.desc, *dy.device_ptr() as *const _, self.conv.desc, algo, workspace_ptr, num_bytes, (&beta) as *const Y::Scalar as *const std::ffi::c_void, self.dw.desc, *dfilter.device_ptr_mut() as *mut _, ) } }