Commit 7e2a4c08 authored by wooway777's avatar wooway777
Browse files

issue/988 - adapt to ali ppu

parent bf0c825d
......@@ -47,6 +47,7 @@ typedef enum {
INFINI_DEVICE_KUNLUN = 7,
INFINI_DEVICE_HYGON = 8,
INFINI_DEVICE_QY = 9,
INFINI_DEVICE_ALI = 10,
INFINI_DEVICE_TYPE_COUNT
} infiniDevice_t;
......
......@@ -22,6 +22,7 @@ public:
KUNLUN = INFINI_DEVICE_KUNLUN,
HYGON = INFINI_DEVICE_HYGON,
QY = INFINI_DEVICE_QY,
ALI = INFINI_DEVICE_ALI,
COUNT = INFINI_DEVICE_TYPE_COUNT,
};
......
......@@ -82,6 +82,7 @@ _TORCH_DEVICE_MAP = {
_infinicore.Device.Type.KUNLUN: "cuda",
_infinicore.Device.Type.HYGON: "cuda",
_infinicore.Device.Type.QY: "cuda",
_infinicore.Device.Type.ALI: "cuda",
}
......
......@@ -12,7 +12,7 @@ void printUsage() {
std::cout << "infiniccl-test --<device>" << std::endl
<< std::endl;
std::cout << " --<device>" << std::endl;
std::cout << " Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon)." << std::endl
std::cout << " Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon|ali)." << std::endl
<< std::endl;
std::cout << "The program will run tests on all visible devices of the specified device type."
<< " Use Environmental Variables such as CUDA_VSIBLE_DEVICES to limit visible device IDs.";
......@@ -46,6 +46,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
else PARSE_DEVICE("--qy", INFINI_DEVICE_QY)
else PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN)
else PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON)
else PARSE_DEVICE("--ali", INFINI_DEVICE_ALI)
else {
printUsage();
}
......
......@@ -4,7 +4,7 @@
#include "../infiniccl_impl.h"
// Windows does not support CUDA
#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)) && defined(ENABLE_CCL) && !defined(_WIN32)
#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API)) && defined(ENABLE_CCL) && !defined(_WIN32)
INFINICCL_DEVICE_API_IMPL(cuda)
#else
INFINICCL_DEVICE_API_NOOP(cuda)
......
......@@ -27,6 +27,7 @@ __C infiniStatus_t infinicclCommInitAll(
COMM_INIT_ALL(INFINI_DEVICE_METAX, metax);
COMM_INIT_ALL(INFINI_DEVICE_MOORE, moore);
COMM_INIT_ALL(INFINI_DEVICE_KUNLUN, kunlun);
COMM_INIT_ALL(INFINI_DEVICE_ALI, cuda);
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
......@@ -53,6 +54,7 @@ __C infiniStatus_t infinicclCommDestroy(infinicclComm_t comm) {
COMM_DESTROY(INFINI_DEVICE_METAX, metax);
COMM_DESTROY(INFINI_DEVICE_MOORE, moore);
COMM_DESTROY(INFINI_DEVICE_KUNLUN, kunlun);
COMM_DESTROY(INFINI_DEVICE_ALI, cuda);
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
......@@ -86,6 +88,7 @@ __C infiniStatus_t infinicclAllReduce(
ALL_REDUCE(INFINI_DEVICE_METAX, metax);
ALL_REDUCE(INFINI_DEVICE_MOORE, moore);
ALL_REDUCE(INFINI_DEVICE_KUNLUN, kunlun);
ALL_REDUCE(INFINI_DEVICE_ALI, cuda);
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
......@@ -66,6 +66,7 @@ xmake build infinicore-test
./infinicore-test --qy
./infinicore-test --kunlun
./infinicore-test --hygon
./infinicore-test --ali
```
### Customize Test Parameters
......
......@@ -42,6 +42,7 @@ void printUsage() {
<< " qy" << std::endl
<< " kunlun" << std::endl
<< " hygon" << std::endl
<< " ali" << std::endl
<< std::endl
<< "Available tests:" << std::endl
<< " basic - Basic memory allocation and deallocation tests" << std::endl
......@@ -84,6 +85,8 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
args.device_type = INFINI_DEVICE_KUNLUN;
} else if (arg == "--hygon") {
args.device_type = INFINI_DEVICE_HYGON;
} else if (arg == "--ali") {
args.device_type = INFINI_DEVICE_ALI;
} else if (arg == "--test") {
if (i + 1 >= argc) {
std::cerr << "Error: --test requires a test name" << std::endl;
......
......@@ -41,6 +41,8 @@ std::string Device::toString(const Type &type) {
return "KUNLUN";
case Type::HYGON:
return "HYGON";
case Type::ALI:
return "ALI";
case Type::COUNT:
return "COUNT";
default:
......
......@@ -45,7 +45,7 @@ Embedding::Embedding(size_t num_embeddings,
Tensor Embedding::forward(const Tensor &indices) const {
// TODO: Implement on-device embedding for all devices, then remove the condition and the classic approach
auto device_type = device_.getType();
if (device_type == Device::Type::NVIDIA || device_type == Device::Type::ILUVATAR || device_type == Device::Type::METAX || device_type == Device::Type::MOORE) {
if (device_type == Device::Type::NVIDIA || device_type == Device::Type::ILUVATAR || device_type == Device::Type::METAX || device_type == Device::Type::MOORE || device_type == Device::Type::ALI) {
// Use op::embedding which supports device-side input and batch dimension
return op::embedding(indices->contiguous()->to(device_), weight_);
}
......
......@@ -30,7 +30,8 @@ void RMSNorm::forward_inplace(Tensor &x, Tensor &residual) const {
|| device_.getType() == Device::Type::NVIDIA
|| device_.getType() == Device::Type::ILUVATAR
|| device_.getType() == Device::Type::METAX
|| device_.getType() == Device::Type::MOORE) {
|| device_.getType() == Device::Type::MOORE
|| device_.getType() == Device::Type::ALI) {
op::add_rms_norm_inplace(x, residual, weight_, static_cast<float>(eps_));
} else {
op::add_(residual, x, residual);
......
......@@ -22,6 +22,7 @@ inline void bind(py::module &m) {
.value("QY", Device::Type::QY)
.value("KUNLUN", Device::Type::KUNLUN)
.value("HYGON", Device::Type::HYGON)
.value("ALI", Device::Type::ALI)
.value("COUNT", Device::Type::COUNT);
device
......
......@@ -22,7 +22,7 @@ void printUsage() {
std::cout << " Path to the test gguf file" << std::endl
<< std::endl;
std::cout << " --<device>[:id]" << std::endl;
std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon) and device ID (optional). CPU by default." << std::endl
std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon|ali) and device ID (optional). CPU by default." << std::endl
<< std::endl;
std::cout << " --warmup <warmups>" << std::endl;
std::cout << " (Optional) Number of warmups to perform before timing. Default to 0." << std::endl
......@@ -80,6 +80,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
PARSE_DEVICE("--qy", INFINI_DEVICE_QY)
PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN)
PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON)
PARSE_DEVICE("--ali", INFINI_DEVICE_ALI)
else if (arg == "--warmup" && i + 1 < argc) {
args.warmups = std::stoi(argv[++i]);
}
......
......@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/cpu_handle.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API)
#include "nvidia/nvidia_handle.h"
#endif
#ifdef ENABLE_CAMBRICON_API
......@@ -47,6 +47,9 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, iluvatar);
#endif
#ifdef ENABLE_ALI_API
CREATE(INFINI_DEVICE_ALI, ali);
#endif
#ifdef ENABLE_QY_API
CREATE(INFINI_DEVICE_QY, qy);
#endif
......@@ -93,6 +96,9 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
#ifdef ENABLE_ILUVATAR_API
DELETE(INFINI_DEVICE_ILUVATAR, iluvatar);
#endif
#ifdef ENABLE_ALI_API
DELETE(INFINI_DEVICE_ALI, ali);
#endif
#ifdef ENABLE_QY_API
DELETE(INFINI_DEVICE_QY, qy);
#endif
......
......@@ -110,6 +110,18 @@ infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) {
} // namespace iluvatar
namespace ali {
Handle::Handle(int device_id)
: nvidia::Handle(INFINI_DEVICE_ALI, device_id) {}
infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) {
*handle_ptr = new Handle(device_id);
return INFINI_STATUS_SUCCESS;
}
} // namespace ali
namespace qy {
Handle::Handle(int device_id)
......
......@@ -35,6 +35,17 @@ public:
} // namespace iluvatar
namespace ali {
struct Handle : public nvidia::Handle {
Handle(int device_id);
public:
static infiniStatus_t create(InfiniopHandle **handle_ptr, int device_id);
};
} // namespace ali
namespace qy {
struct Handle : public nvidia::Handle {
......
......@@ -53,7 +53,7 @@ exp_(const float val) {
return expf(val);
}
#if !defined(ENABLE_ILUVATAR_API) && !defined(ENABLE_QY_API) && !defined(ENABLE_HYGON_API)
#if !defined(ENABLE_ILUVATAR_API) && !defined(ENABLE_QY_API) && !defined(ENABLE_HYGON_API) && !defined(ENABLE_ALI_API)
__forceinline__ __device__ long double
exp_(const long double val) {
return expl(val);
......
......@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/add_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API)
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_ALI_API)
#include "nvidia/add_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
......@@ -48,6 +48,9 @@ __C infiniStatus_t infiniopCreateAddDescriptor(
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
CREATE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
CREATE(INFINI_DEVICE_QY, nvidia);
#endif
......@@ -88,6 +91,9 @@ __C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, siz
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
GET(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
GET(INFINI_DEVICE_QY, nvidia);
#endif
......@@ -136,6 +142,9 @@ __C infiniStatus_t infiniopAdd(
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
CALCULATE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
CALCULATE(INFINI_DEVICE_QY, nvidia);
#endif
......@@ -178,6 +187,9 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) {
#ifdef ENABLE_ILUVATAR_API
DELETE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
DELETE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
DELETE(INFINI_DEVICE_QY, nvidia);
#endif
......
......@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/add_rms_norm_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API)
#include "nvidia/add_rms_norm_nvidia.cuh"
#endif
#ifdef ENABLE_ASCEND_API
......@@ -59,6 +59,9 @@ __C infiniStatus_t infiniopCreateAddRMSNormDescriptor(
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
CREATE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_MOORE_API
CREATE(INFINI_DEVICE_MOORE, moore);
#endif
......@@ -98,6 +101,9 @@ __C infiniStatus_t infiniopGetAddRMSNormWorkspaceSize(infiniopAddRMSNormDescript
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
GET(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, moore);
#endif
......@@ -148,6 +154,9 @@ __C infiniStatus_t infiniopAddRMSNorm(
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
CALCULATE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_MOORE_API
CALCULATE(INFINI_DEVICE_MOORE, moore);
#endif
......@@ -189,6 +198,9 @@ __C infiniStatus_t infiniopDestroyAddRMSNormDescriptor(infiniopAddRMSNormDescrip
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
DESTROY(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_MOORE_API
DESTROY(INFINI_DEVICE_MOORE, moore);
#endif
......
......@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/causal_softmax_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API)
#include "nvidia/causal_softmax_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
......@@ -48,6 +48,9 @@ __C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
CREATE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
CREATE(INFINI_DEVICE_QY, nvidia);
#endif
......@@ -90,6 +93,9 @@ __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDe
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
GET(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
GET(INFINI_DEVICE_QY, nvidia);
#endif
......@@ -137,6 +143,9 @@ __C infiniStatus_t infiniopCausalSoftmax(
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
CALCULATE(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
CALCULATE(INFINI_DEVICE_QY, nvidia);
#endif
......@@ -179,6 +188,9 @@ __C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxD
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_ALI_API
DESTROY(INFINI_DEVICE_ALI, nvidia);
#endif
#ifdef ENABLE_QY_API
DESTROY(INFINI_DEVICE_QY, nvidia);
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment