-- Copyright 2016-present, Facebook, Inc. -- All rights reserved. -- -- This source code is licensed under the license found in the -- LICENSE file in the root directory of this source tree. return function (sparseconvnet) local ffi = require 'ffi' local libpath, ok libpath = package.searchpath('libcusparseconvnet', package.cpath) if not libpath then libpath = package.searchpath('libsparseconvnet', package.cpath) end assert(libpath) local F = ffi.load(libpath) --local fc=io.open('header_cpu.h','w') --local fg=io.open('header_gpu.h','w') local cdef = [[ void scn_set_THCState(void *state); ]] ffi.cdef(cdef) if cutorch then F['scn_set_THCState'](cutorch.getState()) end cdef = [[ void scn_ptrCopyA(long *dst, void **src); void scn_ptrCopyB(void **dst, long *src); double scn_ruleBookBits(); void scn_2_drawCurve(void **m, THFloatTensor *features, THFloatTensor *stroke); ]] if fc then fc:write(cdef) end ffi.cdef(cdef) sparseconvnet.ruleBookBits=F['scn_ruleBookBits']() cdef = [[ double scn_DIMENSION_addSampleFromThresholdedTensor( void **m, THFloatTensor *features_, THFloatTensor *tensor_, THLongTensor *offset_, THLongTensor *spatialSize_, float threshold); void scn_DIMENSION_batchAddSample(void **m); void scn_DIMENSION_createMetadataForDenseToSparse( void **m, THLongTensor *spatialSize_, THLongTensor *pad, THLongTensor *nz, long batchSize); void scn_DIMENSION_freeMetadata(void **metadata); void scn_DIMENSION_generateRuleBooks3s2(void **m); void scn_DIMENSION_generateRuleBooks2s2(void **m); void scn_DIMENSION_setInputSpatialSize(void **m, THLongTensor *spatialSize); void scn_DIMENSION_setInputSpatialLocation(void **m, THFloatTensor *features, THLongTensor *location, THFloatTensor *vec, bool overwrite); void scn_DIMENSION_setInputSpatialLocations(void **m, THFloatTensor *features, THLongTensor *locations, THFloatTensor *vecs, bool overwrite); void scn_DIMENSION_getSpatialLocations(void **m, THLongTensor *spatialSize, THLongTensor *locations); ]] for DIMENSION = 1,10 do local def = string.gsub(cdef, 'DIMENSION', DIMENSION) ffi.cdef(def) if fc then def=string.gsub(def,'bool','_Bool') fc:write(def) end end --types CPU float, double; --type GPU half, float, double; int_cpu and int_gpu cdef = [[ void scn_ARCH_REAL_AffineReluTrivialConvolution_updateOutput( THTensor *input_features, THTensor *output_features, THTensor *affineWeight, THTensor *affineBias, THTensor *convWeight); void scn_ARCH_REAL_AffineReluTrivialConvolution_backward( THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, THTensor *affineWeight, THTensor *d_affineWeight, THTensor *affineBias, THTensor *d_affineBias, THTensor *convWeight, THTensor *d_convWeight, bool additiveGrad); // BatchwiseMultiplicativeDropout void scn_ARCH_REAL_BatchwiseMultiplicativeDropout_updateOutput( THTensor *input_features, THTensor *output_features, THTensor *noise, long nPlanes, long input_stride, long output_stride, float alpha); void scn_ARCH_REAL_BatchwiseMultiplicativeDropout_updateGradInput( THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, THTensor *noise, long nPlanes, long input_stride, long output_stride, float alpha); // BatchNormalization void scn_ARCH_REAL_BatchNormalization_updateOutput( THTensor *input_features, THTensor *output_features, THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar, THTensor *weight, THTensor *bias, REAL eps, REAL momentum, bool train, REAL leakiness); void scn_ARCH_REAL_BatchNormalization_backward( THTensor *input_features, THTensor *d_input_features, THTensor *output_features, THTensor *d_output_features, THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar, THTensor *weight, THTensor *bias, THTensor *d_weight, THTensor *d_bias, REAL leakiness); // BatchNormalizationInTensor void scn_ARCH_REAL_BatchNormalizationInTensor_updateOutput( THTensor *input_features, THTensor *output_features, THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar, THTensor *weight, THTensor *bias, REAL eps, REAL momentum, bool train, REAL leakiness); // LeakyReLU void scn_ARCH_REAL_LeakyReLU_updateOutput( THTensor *input_features, THTensor *output_features, float alpha); void scn_ARCH_REAL_LeakyReLU_updateGradInput( THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, float alpha); // NetworkInNetwork double scn_ARCH_REAL_NetworkInNetwork_updateOutput( THTensor *input_features, THTensor *output_features, THTensor *weight, THTensor *bias); void scn_ARCH_REAL_NetworkInNetwork_updateGradInput( THTensor *d_input_features, THTensor *d_output_features, THTensor *weight); void scn_ARCH_REAL_NetworkInNetwork_accGradParameters( THTensor *input_features, THTensor *d_output_features, THTensor *d_weight, THTensor *d_bias); ]] for _,v in ipairs({{'float', 'THFloatTensor'}, {'double','THDoubleTensor'}}) do local def = cdef def = string.gsub(def, 'ARCH', 'cpu') def = string.gsub(def, 'THITensor', 'void') def = string.gsub(def, 'REAL', v[1]) def = string.gsub(def, 'THTensor', v[2]) ffi.cdef(def) if fc then def=string.gsub(def,'bool','_Bool') fc:write(def) end end if sparseconvnet.cutorch then for k,v in ipairs({ {'float', 'THCudaTensor'}, --{'double', 'THCudaDoubleTensor'} }) do local def = cdef def = string.gsub(def, 'ARCH', 'gpu') def = string.gsub(def, 'THITensor', sparseconvnet.ruleBookBits==64 and 'THCudaLongTensor' or 'THCudaIntTensor') def = string.gsub(def, 'REAL', v[1]) def = string.gsub(def, 'THTensor', v[2]) ffi.cdef(def) if fg then def=string.gsub(def,'bool','_Bool') fg:write(def) end end end cdef = [[ // ActivePooling void scn_ARCH_REAL_DIMENSIONActivePooling_updateOutput( THLongTensor *inputSize, void **m, THTensor *input_features, THTensor *output_features, THITensor *rulesBuffer, bool average); void scn_ARCH_REAL_DIMENSIONActivePooling_updateGradInput( THLongTensor *inputSize, void **m, THTensor *d_input_features, THTensor *d_output_features, THITensor *rulesBuffer, bool average); // Average Pooling void scn_ARCH_REAL_DIMENSIONAveragePooling_updateOutput( THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize, THLongTensor *poolStride, void **m, THTensor *input_features, THTensor *output_features, long nFeaturesToDrop, THITensor *rulesBuffer); void scn_ARCH_REAL_DIMENSIONAveragePooling_updateGradInput( THLongTensor * inputSize, THLongTensor * outputSize, THLongTensor * poolSize, THLongTensor * poolStride, void **m, THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, long nFeaturesToDrop, THITensor *rulesBuffer); double scn_ARCH_REAL_DIMENSIONConvolution_updateOutput( THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *filterStride, void **m, THTensor *input_features, THTensor *output_features, THTensor *weight, THTensor *bias, long filterVolume, THITensor *rulesBuffer); void scn_ARCH_REAL_DIMENSIONConvolution_backward( THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *filterStride, void **m, THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, THTensor *weight, THTensor *d_weight, THTensor *d_bias, long filterVolume, THITensor *rulesBuffer); double scn_ARCH_REAL_DIMENSIONDeconvolution_updateOutput( THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *filterStride, void **m, THTensor *input_features, THTensor *output_features, THTensor *weight, THTensor *bias, long filterVolume, THITensor *rulesBuffer); void scn_ARCH_REAL_DIMENSIONDeconvolution_backward( THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *filterStride, void **m, THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, THTensor *weight, THTensor *d_weight, THTensor *d_bias, long filterVolume, THITensor *rulesBuffer); // Max Pooling void scn_ARCH_REAL_DIMENSIONMaxPooling_updateOutput( THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize, THLongTensor *poolStride, void **m, THTensor *input_features, THTensor *output_features, long nFeaturesToDrop, THITensor *rulesBuffer); void scn_ARCH_REAL_DIMENSIONMaxPooling_updateGradInput( THLongTensor * inputSize, THLongTensor * outputSize, THLongTensor * poolSize, THLongTensor * poolStride, void **m, THTensor *input_features, THTensor *d_input_features, THTensor *output_features, THTensor *d_output_features, long nFeaturesToDrop, THITensor *rulesBuffer); // SparseToDense void scn_ARCH_REAL_DIMENSIONSparseToDense_updateOutput( THLongTensor *inputSize, void **m, THTensor *input_features, THTensor *output_features, THITensor *rulesBuffer, long nPlanes); void scn_ARCH_REAL_DIMENSIONSparseToDense_updateGradInput( THLongTensor *inputSize, void **m, THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, THITensor *rulesBuffer); double scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_updateOutput( THLongTensor *inputSize, THLongTensor *filterSize, void **m, THTensor *input_features, THTensor *output_features, THTensor *weight, THTensor *bias, long filterVolume, THITensor *rulesBuffer); void scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_backward( THLongTensor *inputSize, THLongTensor *filterSize, void **m, THTensor *input_features, THTensor *d_input_features, THTensor *d_output_features, THTensor *weight, THTensor *d_weight, THTensor *d_bias, long filterVolume, THITensor *rulesBuffer); ]] for _,v in ipairs({{'float', 'THFloatTensor'}, {'double', 'THDoubleTensor'}}) do for DIMENSION = 1,10 do local def = cdef def = string.gsub(def, 'ARCH', 'cpu') def = string.gsub(def, '_DIMENSION', DIMENSION) def = string.gsub(def, 'THITensor', 'void') def = string.gsub(def, 'REAL', v[1]) def = string.gsub(def, 'THTensor', v[2]) ffi.cdef(def) if fc then def=string.gsub(def,'bool','_Bool') fc:write(def) end end end if sparseconvnet.cutorch then for k,v in ipairs({ {'float', 'THCudaTensor'}, --{'double', 'THCudaDoubleTensor'} }) do for DIMENSION = 1,10 do local def = cdef def = string.gsub(def, 'ARCH', 'gpu') def = string.gsub(def, '_DIMENSION', DIMENSION) def = string.gsub(def, 'THITensor', sparseconvnet.ruleBookBits==64 and 'THCudaLongTensor' or 'THCudaIntTensor') def = string.gsub(def, 'REAL', v[1]) def = string.gsub(def, 'THTensor', v[2]) ffi.cdef(def) if fg then def=string.gsub(def,'bool','_Bool') fg:write(def) end end end end if fc then fc:close() fg:close() end sparseconvnet.C = {} local C = sparseconvnet.C local typeTable={} typeTable['torch.FloatTensor'] = 'cpu_float' typeTable['torch.DoubleTensor'] = 'cpu_double' typeTable['torch.CudaHalfTensor'] = 'gpu_half' --todo typeTable['torch.CudaTensor'] = 'gpu_float' typeTable['torch.CudaDoubleTensor'] = 'gpu_double' function C.fn(name) return F['scn_' .. name] end function C.typedFn(type,name) return F['scn_' .. typeTable[type] .. '_' .. name] end function C.dimensionFn(dimension,name) return F['scn_' .. dimension .. '_' .. name] end function C.dimTypedFn(dimension,type,name) return F['scn_' .. typeTable[type] .. dimension .. name] end function C.copyFfiPtrToLong(dst,src) F['scn_ptrCopyA'](dst:data(), src) end function C.copyLongToFfiPtr(dst,src) F['scn_ptrCopyB'](dst, src:data()) end end