Unverified Commit e1c836b8 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Issue/705 - Refactor infinirt multi-device support. (#708)


Co-authored-by: default avatarzhuyue <zhuyue@qiyuanlab.com>
parents e238ace2 35e73b83
...@@ -149,7 +149,7 @@ __device__ void warpLPNormKernel(T const *input, T *output, ...@@ -149,7 +149,7 @@ __device__ void warpLPNormKernel(T const *input, T *output,
} }
__syncthreads(); __syncthreads();
float global_max = max(p_max[threadIdx.y], eps); float global_max = max(p_max[threadIdx.y], eps);
float global_max_inv = __fdividef(1.0F, max(p_max[threadIdx.y], eps)); float global_max_inv = __fdividef(1.0F, global_max);
float p_data = 0.0f; float p_data = 0.0f;
for (int ind = threadIdx.x; ind < dimsize; ind += BLOCK_SIZE_x) { for (int ind = threadIdx.x; ind < dimsize; ind += BLOCK_SIZE_x) {
...@@ -201,7 +201,7 @@ __device__ void warpLPNormStridesKernel(T const *input, T *output, const ptrdiff ...@@ -201,7 +201,7 @@ __device__ void warpLPNormStridesKernel(T const *input, T *output, const ptrdiff
} }
__syncthreads(); __syncthreads();
float global_max = max(p_max[threadIdx.y], eps); float global_max = max(p_max[threadIdx.y], eps);
float global_max_inv = __fdividef(1.0F, max(p_max[threadIdx.y], eps)); float global_max_inv = __fdividef(1.0F, global_max);
float p_data = 0.0f; float p_data = 0.0f;
for (int ind = threadIdx.x; ind < dimsize; ind += BLOCK_SIZE_x) { for (int ind = threadIdx.x; ind < dimsize; ind += BLOCK_SIZE_x) {
......
...@@ -4,7 +4,19 @@ ...@@ -4,7 +4,19 @@
#define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess) #define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess)
// 根据宏定义选择命名空间并实现
#if defined(ENABLE_NVIDIA_API)
namespace infinirt::cuda { namespace infinirt::cuda {
#elif defined(ENABLE_ILUVATAR_API)
namespace infinirt::iluvatar {
#elif defined(ENABLE_QY_API)
namespace infinirt::qy {
#elif defined(ENABLE_HYGON_API)
namespace infinirt::hygon {
#else
namespace infinirt::cuda { // 默认回退
#endif
infiniStatus_t getDeviceCount(int *count) { infiniStatus_t getDeviceCount(int *count) {
CHECK_CUDART(cudaGetDeviceCount(count)); CHECK_CUDART(cudaGetDeviceCount(count));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
...@@ -156,4 +168,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { ...@@ -156,4 +168,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream)); CHECK_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace infinirt::cuda }
...@@ -2,12 +2,40 @@ ...@@ -2,12 +2,40 @@
#define __INFINIRT_CUDA_H__ #define __INFINIRT_CUDA_H__
#include "../infinirt_impl.h" #include "../infinirt_impl.h"
// NVIDIA namespace
namespace infinirt::cuda { namespace infinirt::cuda {
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) #ifdef ENABLE_NVIDIA_API
INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API_IMPL
#else #else
INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API_NOOP
#endif #endif
} // namespace infinirt::cuda } // namespace infinirt::cuda
// ILUVATAR namespace
namespace infinirt::iluvatar {
#ifdef ENABLE_ILUVATAR_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::iluvatar
// QY namespace
namespace infinirt::qy {
#ifdef ENABLE_QY_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::qy
// HYGON namespace
namespace infinirt::hygon {
#ifdef ENABLE_HYGON_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::hygon
#endif // __INFINIRT_CUDA_H__ #endif // __INFINIRT_CUDA_H__
...@@ -23,10 +23,6 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) { ...@@ -23,10 +23,6 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
return INFINI_STATUS_NULL_POINTER; return INFINI_STATUS_NULL_POINTER;
} }
for (size_t i = 0; i < INFINI_DEVICE_TYPE_COUNT; i++) { for (size_t i = 0; i < INFINI_DEVICE_TYPE_COUNT; i++) {
if (i == INFINI_DEVICE_ILUVATAR || i == INFINI_DEVICE_HYGON || i == INFINI_DEVICE_QY) {
count_array[i] = 0;
continue;
}
auto status = infinirtGetDeviceCount(static_cast<infiniDevice_t>(i), &count_array[i]); auto status = infinirtGetDeviceCount(static_cast<infiniDevice_t>(i), &count_array[i]);
if (status != INFINI_STATUS_SUCCESS) { if (status != INFINI_STATUS_SUCCESS) {
return status; return status;
...@@ -75,13 +71,13 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ ...@@ -75,13 +71,13 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
_status = infinirt::kunlun::API PARAMS; \ _status = infinirt::kunlun::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_ILUVATAR: \ case INFINI_DEVICE_ILUVATAR: \
_status = infinirt::cuda::API PARAMS; \ _status = infinirt::iluvatar::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_QY: \ case INFINI_DEVICE_QY: \
_status = infinirt::cuda::API PARAMS; \ _status = infinirt::qy::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_HYGON: \ case INFINI_DEVICE_HYGON: \
_status = infinirt::cuda::API PARAMS; \ _status = infinirt::hygon::API PARAMS; \
break; \ break; \
default: \ default: \
_status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \ _status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \
......
...@@ -3,37 +3,38 @@ ...@@ -3,37 +3,38 @@
#include "infinirt.h" #include "infinirt.h"
#include <stdint.h> #include <stdint.h>
#define INFINIRT_DEVICE_API(IMPL, COUNT) \ #define INFINIRT_DEVICE_API(INLINE, IMPL, COUNT) \
infiniStatus_t getDeviceCount(int *count) COUNT; \ INLINE infiniStatus_t getDeviceCount(int *count) COUNT; \
infiniStatus_t setDevice(int device_id) IMPL; \ INLINE infiniStatus_t setDevice(int device_id) IMPL; \
infiniStatus_t deviceSynchronize() IMPL; \ INLINE infiniStatus_t deviceSynchronize() IMPL; \
\ \
infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \ INLINE infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \
infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \ INLINE infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \
infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \ INLINE infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \ INLINE infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \
\ \
infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \ INLINE infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \
infiniStatus_t eventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) IMPL; \ INLINE infiniStatus_t eventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) IMPL; \
infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \ INLINE infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \
infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \ INLINE infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \
infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \ INLINE infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \
infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \ INLINE infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \
infiniStatus_t eventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) IMPL; \ INLINE infiniStatus_t eventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) IMPL; \
\ \
infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \ INLINE infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \
infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \ INLINE infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \
infiniStatus_t freeDevice(void *ptr) IMPL; \ INLINE infiniStatus_t freeDevice(void *ptr) IMPL; \
infiniStatus_t freeHost(void *ptr) IMPL; \ INLINE infiniStatus_t freeHost(void *ptr) IMPL; \
\ \
infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \ INLINE infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \
infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \ INLINE infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \
\ \
infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ INLINE infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; INLINE infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL;
#define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, ) #define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, , )
#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, \ #define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API( \
inline, { return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, \
{*count = 0; return INFINI_STATUS_SUCCESS; }) {*count = 0; return INFINI_STATUS_SUCCESS; })
#endif // __INFINIRT_IMPL_H__ #endif // __INFINIRT_IMPL_H__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment