"git@developer.sourcefind.cn:jerrrrry/infinicore.git" did not exist on "a0abcb2cf2021ec76925744ac4a618d569c49175"
Commit ceb57c2a authored by YdrMaster's avatar YdrMaster
Browse files

issue/291/style: 根据实际情况将 cuda 改为 nvidia


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent d76a2607
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../conv.h" #include "../conv.h"
DESCRIPTOR(cuda) DESCRIPTOR(nvidia)
#endif // __GEMM_CUDA_CUH__ #endif // __GEMM_CUDA_CUH__
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "cpu/conv_cpu.h" #include "cpu/conv_cpu.h"
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
#include "cuda/conv_cuda.cuh" #include "nvidia/conv_nvidia.cuh"
#endif #endif
__C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle, __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
...@@ -37,7 +37,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle ...@@ -37,7 +37,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
CREATE(INFINI_DEVICE_CPU, cpu); CREATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda); CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -61,7 +61,7 @@ infiniopGetConvWorkspaceSize( ...@@ -61,7 +61,7 @@ infiniopGetConvWorkspaceSize(
GET(INFINI_DEVICE_CPU, cpu); GET(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda); GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -93,7 +93,7 @@ __C infiniStatus_t infiniopConv( ...@@ -93,7 +93,7 @@ __C infiniStatus_t infiniopConv(
CALCULATE(INFINI_DEVICE_CPU, cpu); CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda); CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
...@@ -114,7 +114,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) { ...@@ -114,7 +114,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
DELETE(INFINI_DEVICE_CPU, cpu); DELETE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda); DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
#include "../../../devices/cuda/cuda_handle.cuh" #include "../../../devices/nvidia/nvidia_handle.cuh"
#include "gemm_cuda.cuh" #include "gemm_nvidia.cuh"
namespace op::gemm::cuda { namespace op::gemm::nvidia {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal; std::shared_ptr<device::nvidia::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -17,7 +17,7 @@ infiniStatus_t Descriptor::create( ...@@ -17,7 +17,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t c_desc, infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) { infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<device::cuda::nvidia::Handle *>(handle_); auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto dtype = c_desc->dtype(); auto dtype = c_desc->dtype();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16); CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_BF16);
...@@ -121,4 +121,4 @@ infiniStatus_t Descriptor::calculate( ...@@ -121,4 +121,4 @@ infiniStatus_t Descriptor::calculate(
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace op::gemm::cuda } // namespace op::gemm::nvidia
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../gemm.h" #include "../gemm.h"
DESCRIPTOR(cuda) DESCRIPTOR(nvidia)
#endif // __GEMM_CUDA_CUH__ #endif // __GEMM_CUDA_CUH__
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "cpu/gemm_cpu.h" #include "cpu/gemm_cpu.h"
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
#include "cuda/gemm_cuda.cuh" #include "nvidia/gemm_nvidia.cuh"
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
#include "bang/gemm_bang.h" #include "bang/gemm_bang.h"
...@@ -46,7 +46,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor( ...@@ -46,7 +46,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor(
CREATE(INFINI_DEVICE_CPU, cpu); CREATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda); CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
CREATE(INFINI_DEVICE_CAMBRICON, bang); CREATE(INFINI_DEVICE_CAMBRICON, bang);
...@@ -88,7 +88,7 @@ infiniopGetGemmWorkspaceSize( ...@@ -88,7 +88,7 @@ infiniopGetGemmWorkspaceSize(
GET(INFINI_DEVICE_CPU, cpu); GET(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda); GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
GET(INFINI_DEVICE_CAMBRICON, bang); GET(INFINI_DEVICE_CAMBRICON, bang);
...@@ -137,7 +137,7 @@ __C infiniStatus_t infiniopGemm( ...@@ -137,7 +137,7 @@ __C infiniStatus_t infiniopGemm(
CALCULATE(INFINI_DEVICE_CPU, cpu); CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda); CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
CALCULATE(INFINI_DEVICE_CAMBRICON, bang); CALCULATE(INFINI_DEVICE_CAMBRICON, bang);
...@@ -176,7 +176,7 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) { ...@@ -176,7 +176,7 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) {
DELETE(INFINI_DEVICE_CPU, cpu); DELETE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda); DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
DELETE(INFINI_DEVICE_CAMBRICON, bang); DELETE(INFINI_DEVICE_CAMBRICON, bang);
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(mul, cpu, cpu) ELEMENTWISE_DESCRIPTOR(mul, cpu)
namespace op::mul::cpu { namespace op::mul::cpu {
typedef struct MulOp { typedef struct MulOp {
......
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../../../elementwise/metax/elementwise_metax_api.h" #include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR(mul, metax, metax) ELEMENTWISE_DESCRIPTOR(mul, metax)
#endif // __MUL_METAX_API_H__ #endif // __MUL_METAX_API_H__
#include "../../../elementwise/cuda/elementwise_cuda.cuh" #include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh" #include "../cuda/kernel.cuh"
#include "mul_nvidia.cuh" #include "mul_nvidia.cuh"
...@@ -13,7 +13,7 @@ infiniStatus_t Descriptor::create( ...@@ -13,7 +13,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t out_desc, infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) { std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_); auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto dtype = out_desc->dtype(); auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0); const auto &a_desc = input_desc_vec.at(0);
......
#ifndef __MUL_CUDA_API_H__ #ifndef __MUL_CUDA_API_H__
#define __MUL_CUDA_API_H__ #define __MUL_CUDA_API_H__
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh" #include "../../../elementwise/nvidia/elementwise_nvidia_api.cuh"
ELEMENTWISE_DESCRIPTOR(mul, nvidia, cuda) ELEMENTWISE_DESCRIPTOR(mul, nvidia)
#endif // __MUL_CUDA_API_H__ #endif // __MUL_CUDA_API_H__
#include "../../../devices/cuda/cuda_kernel_common.cuh" #include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "infinicore.h" #include "infinicore.h"
#include <cub/device/device_radix_sort.cuh> #include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_reduce.cuh> #include <cub/device/device_reduce.cuh>
#include <cub/device/device_scan.cuh> #include <cub/device/device_scan.cuh>
namespace op::random_sample::cuda { namespace op::random_sample::nvidia {
// ↓↓↓ 重新封装 cub api,减少模板参数,方便调用 // ↓↓↓ 重新封装 cub api,减少模板参数,方便调用
...@@ -256,4 +256,4 @@ struct Algo { ...@@ -256,4 +256,4 @@ struct Algo {
} }
}; };
} // namespace op::random_sample::cuda } // namespace op::random_sample::nvidia
#include "../../../devices/cuda/cuda_handle.cuh" #include "../../../devices/nvidia/nvidia_handle.cuh"
#include "../info.h" #include "../info.h"
#include "random_sample_cuda.cuh"
#include "random_sample_kernel.cuh" #include "random_sample_kernel.cuh"
#include "random_sample_nvidia.cuh"
namespace op::random_sample::cuda { namespace op::random_sample::nvidia {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal; std::shared_ptr<device::nvidia::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -18,7 +18,7 @@ infiniStatus_t Descriptor::create( ...@@ -18,7 +18,7 @@ infiniStatus_t Descriptor::create(
Descriptor **desc_ptr, Descriptor **desc_ptr,
infiniopTensorDescriptor_t result_desc, infiniopTensorDescriptor_t result_desc,
infiniopTensorDescriptor_t probs_desc) { infiniopTensorDescriptor_t probs_desc) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_); auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto result = RandomSampleInfo::create(result_desc, probs_desc); auto result = RandomSampleInfo::create(result_desc, probs_desc);
CHECK_RESULT(result); CHECK_RESULT(result);
...@@ -99,4 +99,4 @@ infiniStatus_t Descriptor::calculate( ...@@ -99,4 +99,4 @@ infiniStatus_t Descriptor::calculate(
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace op::random_sample::cuda } // namespace op::random_sample::nvidia
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../random_sample.h" #include "../random_sample.h"
DESCRIPTOR(cuda) DESCRIPTOR(nvidia)
#endif // __RANDOM_SAMPLE_CUDA_CUH__ #endif // __RANDOM_SAMPLE_CUDA_CUH__
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "cpu/random_sample_cpu.h" #include "cpu/random_sample_cpu.h"
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
#include "cuda/random_sample_cuda.cuh" #include "nvidia/random_sample_nvidia.cuh"
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
#include "metax/random_sample_metax.h" #include "metax/random_sample_metax.h"
...@@ -36,7 +36,7 @@ infiniopCreateRandomSampleDescriptor( ...@@ -36,7 +36,7 @@ infiniopCreateRandomSampleDescriptor(
CREATE(INFINI_DEVICE_CPU, cpu); CREATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda); CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax); CREATE(INFINI_DEVICE_METAX, metax);
...@@ -69,7 +69,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize( ...@@ -69,7 +69,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
GET(INFINI_DEVICE_CPU, cpu); GET(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda); GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax); GET(INFINI_DEVICE_METAX, metax);
...@@ -112,7 +112,7 @@ __C infiniStatus_t infiniopRandomSample( ...@@ -112,7 +112,7 @@ __C infiniStatus_t infiniopRandomSample(
CALCULATE(INFINI_DEVICE_CPU, cpu); CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda); CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax); CALCULATE(INFINI_DEVICE_METAX, metax);
...@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor( ...@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor(
DELETE(INFINI_DEVICE_CPU, cpu); DELETE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda); DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, metax); DELETE(INFINI_DEVICE_METAX, metax);
......
#ifndef __REARRANGE_CUDA_KERNEL_H__ #ifndef __REARRANGE_CUDA_KERNEL_H__
#define __REARRANGE_CUDA_KERNEL_H__ #define __REARRANGE_CUDA_KERNEL_H__
#include "../../../devices/cuda/cuda_common.cuh" #include "../../../devices/nvidia/nvidia_common.cuh"
#define ARRAY_TYPE_STRIDE ptrdiff_t #define ARRAY_TYPE_STRIDE ptrdiff_t
#define ARRAY_TYPE_SIZE size_t #define ARRAY_TYPE_SIZE size_t
......
#include "../../../devices/cuda/cuda_common.cuh" #include "../../../devices/nvidia/nvidia_common.cuh"
#include "../../../devices/cuda/cuda_kernel_common.cuh" #include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "../../../tensor.h" #include "../../../tensor.h"
#include "rearrange_cuda.cuh"
#include "rearrange_kernel.cuh" #include "rearrange_kernel.cuh"
#include "rearrange_nvidia.cuh"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <memory> #include <memory>
#include <stdint.h> #include <stdint.h>
#include <vector> #include <vector>
namespace op::rearrange::cuda { namespace op::rearrange::nvidia {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal; std::shared_ptr<device::nvidia::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -49,7 +49,7 @@ infiniStatus_t Descriptor::create( ...@@ -49,7 +49,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
std::move(*meta), std::move(*meta),
new Opaque{reinterpret_cast<device::cuda::Handle *>(handle)->internal()}, new Opaque{reinterpret_cast<device::nvidia::Handle *>(handle)->internal()},
handle->device, handle->device_id); handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
...@@ -482,4 +482,4 @@ infiniStatus_t Descriptor::calculate( ...@@ -482,4 +482,4 @@ infiniStatus_t Descriptor::calculate(
return status; return status;
} }
} // namespace op::rearrange::cuda } // namespace op::rearrange::nvidia
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../rearrange.h" #include "../rearrange.h"
DESCRIPTOR(cuda) DESCRIPTOR(nvidia)
#endif // __REARRANGE_CUDA_H__ #endif // __REARRANGE_CUDA_H__
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
#include "cuda/rearrange_cuda.cuh" #include "nvidia/rearrange_nvidia.cuh"
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
#include "metax/rearrange_metax.h" #include "metax/rearrange_metax.h"
...@@ -40,7 +40,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor( ...@@ -40,7 +40,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor(
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda); CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax); CREATE(INFINI_DEVICE_METAX, metax);
...@@ -73,7 +73,7 @@ __C infiniStatus_t infiniopRearrange( ...@@ -73,7 +73,7 @@ __C infiniStatus_t infiniopRearrange(
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda); CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax); CALCULATE(INFINI_DEVICE_METAX, metax);
...@@ -104,7 +104,7 @@ __C infiniStatus_t infiniopDestroyRearrangeDescriptor( ...@@ -104,7 +104,7 @@ __C infiniStatus_t infiniopDestroyRearrangeDescriptor(
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda); DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, metax); DELETE(INFINI_DEVICE_METAX, metax);
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(relu, cpu, cpu) ELEMENTWISE_DESCRIPTOR(relu, cpu)
namespace op::relu::cpu { namespace op::relu::cpu {
typedef struct ReluOp { typedef struct ReluOp {
......
#include "../../../devices/cuda/cuda_common.cuh" #include "../../../devices/nvidia/nvidia_common.cuh"
#include "rms_norm_nvidia.cuh" #include "rms_norm_nvidia.cuh"
#include "../../../devices/cuda/cuda_kernel_common.cuh" #include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include <cub/block/block_reduce.cuh> #include <cub/block/block_reduce.cuh>
#include "../../../reduce/cuda/reduce.cuh" #include "../../../reduce/cuda/reduce.cuh"
...@@ -23,7 +23,7 @@ INFINIOP_CUDA_KERNEL rmsnormKernel( ...@@ -23,7 +23,7 @@ INFINIOP_CUDA_KERNEL rmsnormKernel(
namespace op::rms_norm::nvidia { namespace op::rms_norm::nvidia {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal; std::shared_ptr<device::nvidia::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create( ...@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create(
} }
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
new Opaque{reinterpret_cast<device::cuda::Handle *>(handle)->internal()}, new Opaque{reinterpret_cast<device::nvidia::Handle *>(handle)->internal()},
std::move(info), std::move(info),
0, 0,
handle->device, handle->device_id); handle->device, handle->device_id);
......
#include "../../../devices/cuda/cuda_common.cuh" #include "../../../devices/nvidia/nvidia_common.cuh"
#include "rope_nvidia.cuh" #include "rope_nvidia.cuh"
#include "../../../devices/cuda/cuda_kernel_common.cuh" #include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "../cuda/kernel.cuh" #include "../cuda/kernel.cuh"
...@@ -28,7 +28,7 @@ INFINIOP_CUDA_KERNEL ropeThreadPerItemKernel( ...@@ -28,7 +28,7 @@ INFINIOP_CUDA_KERNEL ropeThreadPerItemKernel(
namespace op::rope::nvidia { namespace op::rope::nvidia {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::cuda::Handle::Internal> internal; std::shared_ptr<device::nvidia::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create( ...@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t sin_desc, infiniopTensorDescriptor_t sin_desc,
infiniopTensorDescriptor_t cos_desc) { infiniopTensorDescriptor_t cos_desc) {
auto handle = reinterpret_cast<device::cuda::Handle *>(handle_); auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc); auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc);
CHECK_RESULT(info); CHECK_RESULT(info);
...@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create( ...@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
info.take(), info.take(),
0, 0,
new Opaque{reinterpret_cast<device::cuda::Handle *>(handle)->internal()}, new Opaque{reinterpret_cast<device::nvidia::Handle *>(handle)->internal()},
handle->device, handle->device,
handle->device_id); handle->device_id);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment