Commit ceb57c2a authored by YdrMaster's avatar YdrMaster
Browse files

issue/291/style: 根据实际情况将 cuda 改为 nvidia


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent d76a2607
......@@ -3,6 +3,6 @@
#include "../conv.h"
DESCRIPTOR(cuda)
DESCRIPTOR(nvidia)
#endif // __GEMM_CUDA_CUH__
......@@ -6,7 +6,7 @@
#include "cpu/conv_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "cuda/conv_cuda.cuh"
#include "nvidia/conv_nvidia.cuh"
#endif
__C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
......@@ -37,7 +37,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda);
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -61,7 +61,7 @@ infiniopGetConvWorkspaceSize(
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda);
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -93,7 +93,7 @@ __C infiniStatus_t infiniopConv(
CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda);
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
......@@ -114,7 +114,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
DELETE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda);
DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
#include "../../../devices/cuda/cuda_handle.cuh"
#include "gemm_cuda.cuh"
#include "../../../devices/nvidia/nvidia_handle.cuh"
#include "gemm_nvidia.cuh"
namespace op::gemm::cuda {
namespace op::gemm::nvidia {
struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal;
std::shared_ptr<device::nvidia::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -17,7 +17,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<device::cuda::nvidia::Handle *>(handle_);
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto dtype = c_desc->dtype();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16);
......@@ -121,4 +121,4 @@ infiniStatus_t Descriptor::calculate(
return INFINI_STATUS_SUCCESS;
}
} // namespace op::gemm::cuda
} // namespace op::gemm::nvidia
......@@ -3,6 +3,6 @@
#include "../gemm.h"
DESCRIPTOR(cuda)
DESCRIPTOR(nvidia)
#endif // __GEMM_CUDA_CUH__
......@@ -6,7 +6,7 @@
#include "cpu/gemm_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "cuda/gemm_cuda.cuh"
#include "nvidia/gemm_nvidia.cuh"
#endif
#ifdef ENABLE_CAMBRICON_API
#include "bang/gemm_bang.h"
......@@ -46,7 +46,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor(
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda);
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_CAMBRICON_API
CREATE(INFINI_DEVICE_CAMBRICON, bang);
......@@ -88,7 +88,7 @@ infiniopGetGemmWorkspaceSize(
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda);
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_CAMBRICON_API
GET(INFINI_DEVICE_CAMBRICON, bang);
......@@ -137,7 +137,7 @@ __C infiniStatus_t infiniopGemm(
CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda);
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_CAMBRICON_API
CALCULATE(INFINI_DEVICE_CAMBRICON, bang);
......@@ -176,7 +176,7 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) {
DELETE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda);
DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_CAMBRICON_API
DELETE(INFINI_DEVICE_CAMBRICON, bang);
......
......@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(mul, cpu, cpu)
ELEMENTWISE_DESCRIPTOR(mul, cpu)
namespace op::mul::cpu {
typedef struct MulOp {
......
......@@ -3,6 +3,6 @@
#include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR(mul, metax, metax)
ELEMENTWISE_DESCRIPTOR(mul, metax)
#endif // __MUL_METAX_API_H__
#include "../../../elementwise/cuda/elementwise_cuda.cuh"
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh"
#include "mul_nvidia.cuh"
......@@ -13,7 +13,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_);
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
......
#ifndef __MUL_CUDA_API_H__
#define __MUL_CUDA_API_H__
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
#include "../../../elementwise/nvidia/elementwise_nvidia_api.cuh"
ELEMENTWISE_DESCRIPTOR(mul, nvidia, cuda)
ELEMENTWISE_DESCRIPTOR(mul, nvidia)
#endif // __MUL_CUDA_API_H__
#include "../../../devices/cuda/cuda_kernel_common.cuh"
#include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "infinicore.h"
#include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_reduce.cuh>
#include <cub/device/device_scan.cuh>
namespace op::random_sample::cuda {
namespace op::random_sample::nvidia {
// ↓↓↓ 重新封装 cub api,减少模板参数,方便调用
......@@ -256,4 +256,4 @@ struct Algo {
}
};
} // namespace op::random_sample::cuda
} // namespace op::random_sample::nvidia
#include "../../../devices/cuda/cuda_handle.cuh"
#include "../../../devices/nvidia/nvidia_handle.cuh"
#include "../info.h"
#include "random_sample_cuda.cuh"
#include "random_sample_kernel.cuh"
#include "random_sample_nvidia.cuh"
namespace op::random_sample::cuda {
namespace op::random_sample::nvidia {
struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal;
std::shared_ptr<device::nvidia::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -18,7 +18,7 @@ infiniStatus_t Descriptor::create(
Descriptor **desc_ptr,
infiniopTensorDescriptor_t result_desc,
infiniopTensorDescriptor_t probs_desc) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_);
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto result = RandomSampleInfo::create(result_desc, probs_desc);
CHECK_RESULT(result);
......@@ -99,4 +99,4 @@ infiniStatus_t Descriptor::calculate(
return INFINI_STATUS_SUCCESS;
}
} // namespace op::random_sample::cuda
} // namespace op::random_sample::nvidia
......@@ -3,6 +3,6 @@
#include "../random_sample.h"
DESCRIPTOR(cuda)
DESCRIPTOR(nvidia)
#endif // __RANDOM_SAMPLE_CUDA_CUH__
......@@ -6,7 +6,7 @@
#include "cpu/random_sample_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "cuda/random_sample_cuda.cuh"
#include "nvidia/random_sample_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/random_sample_metax.h"
......@@ -36,7 +36,7 @@ infiniopCreateRandomSampleDescriptor(
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda);
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax);
......@@ -69,7 +69,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda);
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax);
......@@ -112,7 +112,7 @@ __C infiniStatus_t infiniopRandomSample(
CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda);
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax);
......@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor(
DELETE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda);
DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, metax);
......
#ifndef __REARRANGE_CUDA_KERNEL_H__
#define __REARRANGE_CUDA_KERNEL_H__
#include "../../../devices/cuda/cuda_common.cuh"
#include "../../../devices/nvidia/nvidia_common.cuh"
#define ARRAY_TYPE_STRIDE ptrdiff_t
#define ARRAY_TYPE_SIZE size_t
......
#include "../../../devices/cuda/cuda_common.cuh"
#include "../../../devices/cuda/cuda_kernel_common.cuh"
#include "../../../devices/nvidia/nvidia_common.cuh"
#include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "../../../tensor.h"
#include "rearrange_cuda.cuh"
#include "rearrange_kernel.cuh"
#include "rearrange_nvidia.cuh"
#include <algorithm>
#include <cmath>
#include <memory>
#include <stdint.h>
#include <vector>
namespace op::rearrange::cuda {
namespace op::rearrange::nvidia {
struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal;
std::shared_ptr<device::nvidia::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -49,7 +49,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor(
std::move(*meta),
new Opaque{reinterpret_cast<device::cuda::Handle *>(handle)->internal()},
new Opaque{reinterpret_cast<device::nvidia::Handle *>(handle)->internal()},
handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS;
}
......@@ -482,4 +482,4 @@ infiniStatus_t Descriptor::calculate(
return status;
}
} // namespace op::rearrange::cuda
} // namespace op::rearrange::nvidia
......@@ -3,6 +3,6 @@
#include "../rearrange.h"
DESCRIPTOR(cuda)
DESCRIPTOR(nvidia)
#endif // __REARRANGE_CUDA_H__
......@@ -10,7 +10,7 @@
#endif
#ifdef ENABLE_NVIDIA_API
#include "cuda/rearrange_cuda.cuh"
#include "nvidia/rearrange_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/rearrange_metax.h"
......@@ -40,7 +40,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor(
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda);
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax);
......@@ -73,7 +73,7 @@ __C infiniStatus_t infiniopRearrange(
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda);
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax);
......@@ -104,7 +104,7 @@ __C infiniStatus_t infiniopDestroyRearrangeDescriptor(
#endif
#ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda);
DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, metax);
......
......@@ -5,7 +5,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(relu, cpu, cpu)
ELEMENTWISE_DESCRIPTOR(relu, cpu)
namespace op::relu::cpu {
typedef struct ReluOp {
......
#include "../../../devices/cuda/cuda_common.cuh"
#include "../../../devices/nvidia/nvidia_common.cuh"
#include "rms_norm_nvidia.cuh"
#include "../../../devices/cuda/cuda_kernel_common.cuh"
#include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include <cub/block/block_reduce.cuh>
#include "../../../reduce/cuda/reduce.cuh"
......@@ -23,7 +23,7 @@ INFINIOP_CUDA_KERNEL rmsnormKernel(
namespace op::rms_norm::nvidia {
struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal;
std::shared_ptr<device::nvidia::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create(
}
*desc_ptr = new Descriptor(
new Opaque{reinterpret_cast<device::cuda::Handle *>(handle)->internal()},
new Opaque{reinterpret_cast<device::nvidia::Handle *>(handle)->internal()},
std::move(info),
0,
handle->device, handle->device_id);
......
#include "../../../devices/cuda/cuda_common.cuh"
#include "../../../devices/nvidia/nvidia_common.cuh"
#include "rope_nvidia.cuh"
#include "../../../devices/cuda/cuda_kernel_common.cuh"
#include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "../cuda/kernel.cuh"
......@@ -28,7 +28,7 @@ INFINIOP_CUDA_KERNEL ropeThreadPerItemKernel(
namespace op::rope::nvidia {
struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal;
std::shared_ptr<device::nvidia::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t sin_desc,
infiniopTensorDescriptor_t cos_desc) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_);
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc);
CHECK_RESULT(info);
......@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor(
info.take(),
0,
new Opaque{reinterpret_cast<device::cuda::Handle *>(handle)->internal()},
new Opaque{reinterpret_cast<device::nvidia::Handle *>(handle)->internal()},
handle->device,
handle->device_id);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment