Commit 256a5e3a authored by Zimin Li's avatar Zimin Li
Browse files

issue/127: Add arguments to CREATE_ELEMENTWISE_PLATFORM_DESCRIPTOR macros for...

issue/127: Add arguments to CREATE_ELEMENTWISE_PLATFORM_DESCRIPTOR macros for indirecting variable names, change DeviceImpl to use Result for the return type of the create function, change CEIL_DIV
parent 82adff3d
...@@ -8,18 +8,23 @@ ...@@ -8,18 +8,23 @@
/** /**
* @brief Define the process for initializing a Descriptor of an elementwise operation * @brief Define the process for initializing a Descriptor of an elementwise operation
* for its CPU implementation * for its CPU implementation
*
* @param handle The device handle.
* @param dtype The output dtype.
* @param out_desc The output tensor descriptor.
* @param input_desc_vec A vector containing input tensor descriptors.
*/ */
#define CREATE_ELEMENTWISE_CPU_DESCRIPTOR \ #define CREATE_ELEMENTWISE_CPU_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) \
\ \
auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc); \ auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc_vec); \
CHECK_RESULT(info_result); \ CHECK_RESULT(info_result); \
\ \
*desc_ptr = new Descriptor( \ *desc_ptr = new Descriptor( \
dtype, \ dtype, \
info_result.take(), \ info_result.take(), \
nullptr, \ nullptr, \
0, \ 0, \
handle->device, \ handle->device, \
handle->device_id); handle->device_id);
namespace op::elementwise::cpu { namespace op::elementwise::cpu {
...@@ -41,9 +46,7 @@ public: ...@@ -41,9 +46,7 @@ public:
~DeviceImpl() = default; ~DeviceImpl() = default;
template <typename... Args> template <typename... Args>
static infiniStatus_t create( static utils::Result<DeviceImpl> create(Args &&...args);
DeviceImpl **device_info,
Args &&...args);
/** /**
* @brief Dispatches an elementwise operation with uniform input types. * @brief Dispatches an elementwise operation with uniform input types.
...@@ -98,9 +101,8 @@ public: ...@@ -98,9 +101,8 @@ public:
struct DeviceImpl::Opaque {}; struct DeviceImpl::Opaque {};
template <typename... Args> template <typename... Args>
infiniStatus_t DeviceImpl::create(DeviceImpl **device_info, Args &&...args) { utils::Result<DeviceImpl> DeviceImpl::create(Args &&...args) {
*device_info = new DeviceImpl(nullptr); return utils::Result<DeviceImpl>(nullptr);
return INFINI_STATUS_SUCCESS;
} }
// Perform elementwise operation for different input types // Perform elementwise operation for different input types
......
...@@ -377,11 +377,9 @@ private: ...@@ -377,11 +377,9 @@ private:
}; };
template <typename... Args> template <typename... Args>
infiniStatus_t DeviceImpl::create(DeviceImpl **device_info, utils::Result<DeviceImpl *> DeviceImpl::create(Args &&...args) {
Args &&...args) {
auto opaque = std::make_shared<Opaque>(std::forward<Args>(args)...); auto opaque = std::make_shared<Opaque>(std::forward<Args>(args)...);
*device_info = new DeviceImpl(opaque); return utils::Result<DeviceImpl *>(new DeviceImpl(opaque));
return INFINI_STATUS_SUCCESS;
} }
/* Invoke elementwise operation for different input types */ /* Invoke elementwise operation for different input types */
......
...@@ -18,7 +18,7 @@ public: ...@@ -18,7 +18,7 @@ public:
~DeviceImpl() = default; ~DeviceImpl() = default;
template <typename... Args> template <typename... Args>
static infiniStatus_t create(DeviceImpl **device_info, Args &&...args); static utils::Result<DeviceImpl *> create(Args &&...args);
/** /**
* @brief Launches elementwise operation where all input types are the same. * @brief Launches elementwise operation where all input types are the same.
...@@ -82,23 +82,28 @@ public: ...@@ -82,23 +82,28 @@ public:
/** /**
* @brief Define the process for initializing a Descriptor of an elementwise operation * @brief Define the process for initializing a Descriptor of an elementwise operation
* for its CUDA implementation * for its CUDA implementation
*
* @param handle The device handle.
* @param dtype The output dtype.
* @param out_desc The output tensor descriptor.
* @param input_desc_vec A vector containing input tensor descriptors.
*/ */
#define CREATE_ELEMENTWISE_CUDA_DESCRIPTOR \ #define CREATE_ELEMENTWISE_CUDA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) \
\ \
auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc); \ auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc_vec); \
CHECK_RESULT(info_result); \ CHECK_RESULT(info_result); \
auto info = info_result.take(); \ auto info = info_result.take(); \
auto workspace_size = info.getMetaMemSize() + info.getInputSize() * sizeof(void *); \ auto workspace_size = info.getMetaMemSize() + info.getInputSize() * sizeof(void *); \
\ \
op::elementwise::cuda::DeviceImpl *device_impl; \ auto device_impl_result = op::elementwise::cuda::DeviceImpl::create(handle->internal()); \
CHECK_STATUS(op::elementwise::cuda::DeviceImpl::create(&device_impl, handle->internal())); \ CHECK_RESULT(device_impl_result); \
\ \
*desc_ptr = new Descriptor( \ *desc_ptr = new Descriptor( \
dtype, \ dtype, \
std::move(info), \ std::move(info), \
device_impl, \ std::move(device_impl_result.take()), \
workspace_size, \ workspace_size, \
handle->device, \ handle->device, \
handle->device_id); handle->device_id);
#endif // __INFINIOP_ELEMENTWISE_CUDA_API_H__ #endif // __INFINIOP_ELEMENTWISE_CUDA_API_H__
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
: InfiniopDescriptor{device_type, device_id}, \ : InfiniopDescriptor{device_type, device_id}, \
_dtype(dtype), \ _dtype(dtype), \
_info(std::move(info)), \ _info(std::move(info)), \
_device_info(device_info), \ _device_info(std::move(device_info)), \
_workspace_size(workspace_size) {} \ _workspace_size(workspace_size) {} \
\ \
public: \ public: \
......
...@@ -8,13 +8,13 @@ infiniStatus_t Descriptor::create( ...@@ -8,13 +8,13 @@ infiniStatus_t Descriptor::create(
infiniopHandle_t handle_, infiniopHandle_t handle_,
Descriptor **desc_ptr, Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc, infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc) { std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::cpu::Handle *>(handle_); auto handle = reinterpret_cast<device::cpu::Handle *>(handle_);
auto dtype = out_desc->dtype(); auto dtype = out_desc->dtype();
const auto &up_desc = input_desc.at(0); const auto &up_desc = input_desc_vec.at(0);
const auto &gate_desc = input_desc.at(1); const auto &gate_desc = input_desc_vec.at(1);
const auto &out_shape = out_desc->shape(); const auto &out_shape = out_desc->shape();
const auto &up_shape = up_desc->shape(); const auto &up_shape = up_desc->shape();
const auto &gate_shape = gate_desc->shape(); const auto &gate_shape = gate_desc->shape();
...@@ -24,7 +24,7 @@ infiniStatus_t Descriptor::create( ...@@ -24,7 +24,7 @@ infiniStatus_t Descriptor::create(
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape); CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create CPU elementwise descriptor // create CPU elementwise descriptor
CREATE_ELEMENTWISE_CPU_DESCRIPTOR; CREATE_ELEMENTWISE_CPU_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
......
...@@ -9,13 +9,13 @@ infiniStatus_t Descriptor::create( ...@@ -9,13 +9,13 @@ infiniStatus_t Descriptor::create(
infiniopHandle_t handle_, infiniopHandle_t handle_,
Descriptor **desc_ptr, Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc, infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc) { std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_); auto handle = reinterpret_cast<device::cuda::Handle *>(handle_);
auto dtype = out_desc->dtype(); auto dtype = out_desc->dtype();
const auto &up_desc = input_desc.at(0); const auto &up_desc = input_desc_vec.at(0);
const auto &gate_desc = input_desc.at(1); const auto &gate_desc = input_desc_vec.at(1);
const auto &out_shape = out_desc->shape(); const auto &out_shape = out_desc->shape();
const auto &up_shape = up_desc->shape(); const auto &up_shape = up_desc->shape();
const auto &gate_shape = gate_desc->shape(); const auto &gate_shape = gate_desc->shape();
...@@ -24,7 +24,7 @@ infiniStatus_t Descriptor::create( ...@@ -24,7 +24,7 @@ infiniStatus_t Descriptor::create(
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape); CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create CUDA elementwise descriptor // create CUDA elementwise descriptor
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR CREATE_ELEMENTWISE_CUDA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
......
...@@ -98,6 +98,6 @@ inline std::string infiniDtypeToString(infiniDtype_t dtype) { ...@@ -98,6 +98,6 @@ inline std::string infiniDtypeToString(infiniDtype_t dtype) {
} }
} }
#define CEIL_DIV(x, y) ((x + y - 1) / y) #define CEIL_DIV(x, y) (((x) + (y)-1) / (y))
#endif #endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment