Unverified Commit e1c836b8 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Issue/705 - Refactor infinirt multi-device support. (#708)


Co-authored-by: default avatarzhuyue <zhuyue@qiyuanlab.com>
parents e238ace2 35e73b83
......@@ -149,7 +149,7 @@ __device__ void warpLPNormKernel(T const *input, T *output,
}
__syncthreads();
float global_max = max(p_max[threadIdx.y], eps);
float global_max_inv = __fdividef(1.0F, max(p_max[threadIdx.y], eps));
float global_max_inv = __fdividef(1.0F, global_max);
float p_data = 0.0f;
for (int ind = threadIdx.x; ind < dimsize; ind += BLOCK_SIZE_x) {
......@@ -201,7 +201,7 @@ __device__ void warpLPNormStridesKernel(T const *input, T *output, const ptrdiff
}
__syncthreads();
float global_max = max(p_max[threadIdx.y], eps);
float global_max_inv = __fdividef(1.0F, max(p_max[threadIdx.y], eps));
float global_max_inv = __fdividef(1.0F, global_max);
float p_data = 0.0f;
for (int ind = threadIdx.x; ind < dimsize; ind += BLOCK_SIZE_x) {
......
......@@ -4,7 +4,19 @@
#define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess)
// 根据宏定义选择命名空间并实现
#if defined(ENABLE_NVIDIA_API)
namespace infinirt::cuda {
#elif defined(ENABLE_ILUVATAR_API)
namespace infinirt::iluvatar {
#elif defined(ENABLE_QY_API)
namespace infinirt::qy {
#elif defined(ENABLE_HYGON_API)
namespace infinirt::hygon {
#else
namespace infinirt::cuda { // 默认回退
#endif
infiniStatus_t getDeviceCount(int *count) {
CHECK_CUDART(cudaGetDeviceCount(count));
return INFINI_STATUS_SUCCESS;
......@@ -156,4 +168,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
} // namespace infinirt::cuda
}
......@@ -2,12 +2,40 @@
#define __INFINIRT_CUDA_H__
#include "../infinirt_impl.h"
// NVIDIA namespace
namespace infinirt::cuda {
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)
#ifdef ENABLE_NVIDIA_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::cuda
// ILUVATAR namespace
namespace infinirt::iluvatar {
#ifdef ENABLE_ILUVATAR_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::iluvatar
// QY namespace
namespace infinirt::qy {
#ifdef ENABLE_QY_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::qy
// HYGON namespace
namespace infinirt::hygon {
#ifdef ENABLE_HYGON_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::hygon
#endif // __INFINIRT_CUDA_H__
......@@ -23,10 +23,6 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
return INFINI_STATUS_NULL_POINTER;
}
for (size_t i = 0; i < INFINI_DEVICE_TYPE_COUNT; i++) {
if (i == INFINI_DEVICE_ILUVATAR || i == INFINI_DEVICE_HYGON || i == INFINI_DEVICE_QY) {
count_array[i] = 0;
continue;
}
auto status = infinirtGetDeviceCount(static_cast<infiniDevice_t>(i), &count_array[i]);
if (status != INFINI_STATUS_SUCCESS) {
return status;
......@@ -75,13 +71,13 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
_status = infinirt::kunlun::API PARAMS; \
break; \
case INFINI_DEVICE_ILUVATAR: \
_status = infinirt::cuda::API PARAMS; \
_status = infinirt::iluvatar::API PARAMS; \
break; \
case INFINI_DEVICE_QY: \
_status = infinirt::cuda::API PARAMS; \
_status = infinirt::qy::API PARAMS; \
break; \
case INFINI_DEVICE_HYGON: \
_status = infinirt::cuda::API PARAMS; \
_status = infinirt::hygon::API PARAMS; \
break; \
default: \
_status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \
......
......@@ -3,37 +3,38 @@
#include "infinirt.h"
#include <stdint.h>
#define INFINIRT_DEVICE_API(IMPL, COUNT) \
infiniStatus_t getDeviceCount(int *count) COUNT; \
infiniStatus_t setDevice(int device_id) IMPL; \
infiniStatus_t deviceSynchronize() IMPL; \
#define INFINIRT_DEVICE_API(INLINE, IMPL, COUNT) \
INLINE infiniStatus_t getDeviceCount(int *count) COUNT; \
INLINE infiniStatus_t setDevice(int device_id) IMPL; \
INLINE infiniStatus_t deviceSynchronize() IMPL; \
\
infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \
infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \
infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \
INLINE infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \
INLINE infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \
INLINE infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \
INLINE infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \
\
infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \
infiniStatus_t eventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) IMPL; \
infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \
infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \
infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \
infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \
infiniStatus_t eventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) IMPL; \
INLINE infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \
INLINE infiniStatus_t eventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) IMPL; \
INLINE infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \
INLINE infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \
INLINE infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \
INLINE infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \
INLINE infiniStatus_t eventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) IMPL; \
\
infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \
infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \
infiniStatus_t freeDevice(void *ptr) IMPL; \
infiniStatus_t freeHost(void *ptr) IMPL; \
INLINE infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \
INLINE infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \
INLINE infiniStatus_t freeDevice(void *ptr) IMPL; \
INLINE infiniStatus_t freeHost(void *ptr) IMPL; \
\
infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \
infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \
INLINE infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \
INLINE infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \
\
infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL;
INLINE infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \
INLINE infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL;
#define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, )
#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, \
#define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, , )
#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API( \
inline, { return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, \
{*count = 0; return INFINI_STATUS_SUCCESS; })
#endif // __INFINIRT_IMPL_H__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment