Commit 256a5e3a authored by Zimin Li's avatar Zimin Li
Browse files

issue/127: Add arguments to CREATE_ELEMENTWISE_PLATFORM_DESCRIPTOR macros for...

issue/127: Add arguments to CREATE_ELEMENTWISE_PLATFORM_DESCRIPTOR macros for indirecting variable names, change DeviceImpl to use Result for the return type of the create function, change CEIL_DIV
parent 82adff3d
......@@ -8,18 +8,23 @@
/**
* @brief Define the process for initializing a Descriptor of an elementwise operation
* for its CPU implementation
*
* @param handle The device handle.
* @param dtype The output dtype.
* @param out_desc The output tensor descriptor.
* @param input_desc_vec A vector containing input tensor descriptors.
*/
#define CREATE_ELEMENTWISE_CPU_DESCRIPTOR \
\
auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc); \
CHECK_RESULT(info_result); \
\
*desc_ptr = new Descriptor( \
dtype, \
info_result.take(), \
nullptr, \
0, \
handle->device, \
#define CREATE_ELEMENTWISE_CPU_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) \
\
auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc_vec); \
CHECK_RESULT(info_result); \
\
*desc_ptr = new Descriptor( \
dtype, \
info_result.take(), \
nullptr, \
0, \
handle->device, \
handle->device_id);
namespace op::elementwise::cpu {
......@@ -41,9 +46,7 @@ public:
~DeviceImpl() = default;
template <typename... Args>
static infiniStatus_t create(
DeviceImpl **device_info,
Args &&...args);
static utils::Result<DeviceImpl> create(Args &&...args);
/**
* @brief Dispatches an elementwise operation with uniform input types.
......@@ -98,9 +101,8 @@ public:
struct DeviceImpl::Opaque {};
template <typename... Args>
infiniStatus_t DeviceImpl::create(DeviceImpl **device_info, Args &&...args) {
*device_info = new DeviceImpl(nullptr);
return INFINI_STATUS_SUCCESS;
utils::Result<DeviceImpl> DeviceImpl::create(Args &&...args) {
return utils::Result<DeviceImpl>(nullptr);
}
// Perform elementwise operation for different input types
......
......@@ -377,11 +377,9 @@ private:
};
template <typename... Args>
infiniStatus_t DeviceImpl::create(DeviceImpl **device_info,
Args &&...args) {
utils::Result<DeviceImpl *> DeviceImpl::create(Args &&...args) {
auto opaque = std::make_shared<Opaque>(std::forward<Args>(args)...);
*device_info = new DeviceImpl(opaque);
return INFINI_STATUS_SUCCESS;
return utils::Result<DeviceImpl *>(new DeviceImpl(opaque));
}
/* Invoke elementwise operation for different input types */
......
......@@ -18,7 +18,7 @@ public:
~DeviceImpl() = default;
template <typename... Args>
static infiniStatus_t create(DeviceImpl **device_info, Args &&...args);
static utils::Result<DeviceImpl *> create(Args &&...args);
/**
* @brief Launches elementwise operation where all input types are the same.
......@@ -82,23 +82,28 @@ public:
/**
* @brief Define the process for initializing a Descriptor of an elementwise operation
* for its CUDA implementation
*
* @param handle The device handle.
* @param dtype The output dtype.
* @param out_desc The output tensor descriptor.
* @param input_desc_vec A vector containing input tensor descriptors.
*/
#define CREATE_ELEMENTWISE_CUDA_DESCRIPTOR \
\
auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc); \
CHECK_RESULT(info_result); \
auto info = info_result.take(); \
auto workspace_size = info.getMetaMemSize() + info.getInputSize() * sizeof(void *); \
\
op::elementwise::cuda::DeviceImpl *device_impl; \
CHECK_STATUS(op::elementwise::cuda::DeviceImpl::create(&device_impl, handle->internal())); \
\
*desc_ptr = new Descriptor( \
dtype, \
std::move(info), \
device_impl, \
workspace_size, \
handle->device, \
#define CREATE_ELEMENTWISE_CUDA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) \
\
auto info_result = op::elementwise::ElementwiseInfo::create(out_desc, input_desc_vec); \
CHECK_RESULT(info_result); \
auto info = info_result.take(); \
auto workspace_size = info.getMetaMemSize() + info.getInputSize() * sizeof(void *); \
\
auto device_impl_result = op::elementwise::cuda::DeviceImpl::create(handle->internal()); \
CHECK_RESULT(device_impl_result); \
\
*desc_ptr = new Descriptor( \
dtype, \
std::move(info), \
std::move(device_impl_result.take()), \
workspace_size, \
handle->device, \
handle->device_id);
#endif // __INFINIOP_ELEMENTWISE_CUDA_API_H__
......@@ -31,7 +31,7 @@
: InfiniopDescriptor{device_type, device_id}, \
_dtype(dtype), \
_info(std::move(info)), \
_device_info(device_info), \
_device_info(std::move(device_info)), \
_workspace_size(workspace_size) {} \
\
public: \
......
......@@ -8,13 +8,13 @@ infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc) {
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::cpu::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &up_desc = input_desc.at(0);
const auto &gate_desc = input_desc.at(1);
const auto &up_desc = input_desc_vec.at(0);
const auto &gate_desc = input_desc_vec.at(1);
const auto &out_shape = out_desc->shape();
const auto &up_shape = up_desc->shape();
const auto &gate_shape = gate_desc->shape();
......@@ -24,7 +24,7 @@ infiniStatus_t Descriptor::create(
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create CPU elementwise descriptor
CREATE_ELEMENTWISE_CPU_DESCRIPTOR;
CREATE_ELEMENTWISE_CPU_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec);
return INFINI_STATUS_SUCCESS;
}
......
......@@ -9,13 +9,13 @@ infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc) {
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &up_desc = input_desc.at(0);
const auto &gate_desc = input_desc.at(1);
const auto &up_desc = input_desc_vec.at(0);
const auto &gate_desc = input_desc_vec.at(1);
const auto &out_shape = out_desc->shape();
const auto &up_shape = up_desc->shape();
const auto &gate_shape = gate_desc->shape();
......@@ -24,7 +24,7 @@ infiniStatus_t Descriptor::create(
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create CUDA elementwise descriptor
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
......
......@@ -98,6 +98,6 @@ inline std::string infiniDtypeToString(infiniDtype_t dtype) {
}
}
#define CEIL_DIV(x, y) ((x + y - 1) / y)
#define CEIL_DIV(x, y) (((x) + (y)-1) / (y))
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment