Unverified Commit 85bc98ac authored by qinyiqun's avatar qinyiqun Committed by GitHub
Browse files

ISSUE/628 适配QY C610 GPU,增加编译选项,适配已有算子。添加bge类模型所需的算子, (#629)



* ISSUE/628 适配QY C610 GPU,增加编译选项,适配已有算子。添加bge类模型所需的算子,包括gelu,layer_norm,lp_norm(支持l1,l2 norm),relu,softmax,tanh。

---------
Co-authored-by: default avatarxgqdut2016 <kenan_gewei@163.com>
Co-authored-by: default avatarxgqdut2016 <140036308+xgqdut2016@users.noreply.github.com>
parent 7c397dd2
......@@ -100,6 +100,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
| `--use-mc=[y\|n]` | 是否沐曦 GPU 接口实现使用maca SDK | n
| `--moore-gpu=[y\|n]` | 是否编译摩尔线程 GPU 接口实现 | n
| `--iluvatar-gpu=[y\|n]` | 是否编译沐曦 GPU 接口实现 | n
| `--qy-gpu=[y\|n]` | 是否编译QY GPU 接口实现 | n
| `--hygon-dcu=[y\|n]` | 是否编译海光 DCU 接口实现 | n
| `--kunlun-xpu=[y\|n]` | 是否编译昆仑 XPU 接口实现 | n
| `--ninetoothed=[y\|n]` | 是否编译九齿实现 | n
......@@ -141,6 +142,9 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
# linux系统:--cuda=$CUDA_HOME
xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
# QY
xmake f --qy-gpu=true --cuda=$CUDA_HOME -cv
# 寒武纪
xmake f --cambricon-mlu=true -cv
......
......@@ -46,6 +46,7 @@ typedef enum {
INFINI_DEVICE_ILUVATAR = 6,
INFINI_DEVICE_KUNLUN = 7,
INFINI_DEVICE_HYGON = 8,
INFINI_DEVICE_QY = 9,
INFINI_DEVICE_TYPE_COUNT
} infiniDevice_t;
......
......@@ -21,6 +21,7 @@ public:
ILUVATAR = INFINI_DEVICE_ILUVATAR,
KUNLUN = INFINI_DEVICE_KUNLUN,
HYGON = INFINI_DEVICE_HYGON,
QY = INFINI_DEVICE_QY,
COUNT = INFINI_DEVICE_TYPE_COUNT,
};
......
......@@ -6,25 +6,30 @@
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/logsoftmax.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/layer_norm.h"
#include "infiniop/ops/logsoftmax.h"
#include "infiniop/ops/lp_norm.h"
#include "infiniop/ops/mul.h"
#include "infiniop/ops/ones.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/relu.h"
#include "infiniop/ops/rms_norm.h"
#include "infiniop/ops/rope.h"
#include "infiniop/ops/sigmoid.h"
#include "infiniop/ops/silu.h"
#include "infiniop/ops/softmax.h"
#include "infiniop/ops/softplus.h"
#include "infiniop/ops/sub.h"
#include "infiniop/ops/swiglu.h"
#include "infiniop/ops/tanh.h"
#include "infiniop/ops/topkrouter.h"
#include "infiniop/ops/zeros.h"
#include "infiniop/ops/ones.h"
#include "infiniop/ops/topksoftmax.h"
#include "infiniop/ops/sigmoid.h"
#include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h"
#endif // __INFINIOP_API_H__
#ifndef __INFINIOP_GELU_API_H__
#define __INFINIOP_GELU_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopGeluDescriptor_t;
__C __export infiniStatus_t infiniopCreateGeluDescriptor(infiniopHandle_t handle,
infiniopGeluDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t intput);
__C __export infiniStatus_t infiniopGetGeluWorkspaceSize(infiniopGeluDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopGelu(infiniopGeluDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *intput,
void *stream);
__C __export infiniStatus_t infiniopDestroyGeluDescriptor(infiniopGeluDescriptor_t desc);
#endif
#ifndef __INFINIOP_LAYER_NORM_API_H__
#define __INFINIOP_LAYER_NORM_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopLayerNormDescriptor_t;
__C __export infiniStatus_t infiniopCreateLayerNormDescriptor(
infiniopHandle_t handle,
infiniopLayerNormDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
infiniopTensorDescriptor_t input_standardization_desc,
infiniopTensorDescriptor_t input_std_deviation_desc,
infiniopTensorDescriptor_t input_desc,
infiniopTensorDescriptor_t weight_desc,
infiniopTensorDescriptor_t bias_desc,
float eps);
__C __export infiniStatus_t infiniopGetLayerNormWorkspaceSize(infiniopLayerNormDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopLayerNorm(infiniopLayerNormDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
void *input_standardization,
void *input_std_deviation,
const void *input,
const void *weight,
const void *bias,
void *stream);
__C __export infiniStatus_t infiniopDestroyLayerNormDescriptor(infiniopLayerNormDescriptor_t desc);
#endif
#ifndef __INFINIOP_LP_NORM_API_H__
#define __INFINIOP_LP_NORM_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopLPNormDescriptor_t;
__C __export infiniStatus_t infiniopCreateLPNormDescriptor(
infiniopHandle_t handle,
infiniopLPNormDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
infiniopTensorDescriptor_t input_desc,
int axis,
int p,
float eps);
__C __export infiniStatus_t infiniopGetLPNormWorkspaceSize(infiniopLPNormDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopLPNorm(infiniopLPNormDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);
__C __export infiniStatus_t infiniopDestroyLPNormDescriptor(infiniopLPNormDescriptor_t desc);
#endif
......@@ -10,6 +10,8 @@ __C __export infiniStatus_t infiniopCreateReluDescriptor(infiniopHandle_t handle
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__C __export infiniStatus_t infiniopGetReluWorkspaceSize(infiniopReluDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopRelu(infiniopReluDescriptor_t desc,
void *workspace,
size_t workspace_size,
......
#ifndef __INFINIOP_SOFTMAX_API_H__
#define __INFINIOP_SOFTMAX_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopSoftmaxDescriptor_t;
__C __export infiniStatus_t infiniopCreateSoftmaxDescriptor(
infiniopHandle_t handle,
infiniopSoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
int axis);
__C __export infiniStatus_t infiniopGetSoftmaxWorkspaceSize(infiniopSoftmaxDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopSoftmax(
infiniopSoftmaxDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);
__C __export infiniStatus_t infiniopDestroySoftmaxDescriptor(infiniopSoftmaxDescriptor_t desc);
#endif
#ifndef __INFINIOP_TANH_API_H__
#define __INFINIOP_TANH_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopTanhDescriptor_t;
__C __export infiniStatus_t infiniopCreateTanhDescriptor(infiniopHandle_t handle,
infiniopTanhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input);
__C __export infiniStatus_t infiniopGetTanhWorkspaceSize(infiniopTanhDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopTanh(infiniopTanhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);
__C __export infiniStatus_t infiniopDestroyTanhDescriptor(infiniopTanhDescriptor_t desc);
#endif
......@@ -108,4 +108,5 @@ _TORCH_DEVICE_MAP = {
_infinicore.Device.Type.ILUVATAR: "cuda",
_infinicore.Device.Type.KUNLUN: "cuda",
_infinicore.Device.Type.HYGON: "cuda",
_infinicore.Device.Type.QY: "cuda",
}
......@@ -16,21 +16,29 @@ def run_tests(args):
"attention.py",
"causal_softmax.py",
"clip.py",
"conv.py",
#"dequantize_awq.py",
"gelu.py",
"gemm.py",
#"layer_norm.py",
"logsoftmax.py",
#"lp_norm.py",
"mul.py",
"ones.py",
"random_sample.py",
"rearrange.py",
"relu.py",
"rms_norm.py",
"rope.py",
"sigmoid.py",
#"softmax.py",
"softplus.py",
"sub.py",
"swiglu.py",
"softplus.py",
"ones.py",
"zeros.py",
"sigmoid.py",
"tanh.py",
"topkrouter.py",
"topksoftmax.py",
"zeros.py",
]:
result = subprocess.run(
f"python {test} {args} --debug", text=True, encoding="utf-8", shell=True
......
......@@ -12,7 +12,7 @@ void printUsage() {
std::cout << "infiniccl-test --<device>" << std::endl
<< std::endl;
std::cout << " --<device>" << std::endl;
std::cout << " Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|kunlun|hygon)." << std::endl
std::cout << " Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon)." << std::endl
<< std::endl;
std::cout << "The program will run tests on all visible devices of the specified device type."
<< " Use Environmental Variables such as CUDA_VSIBLE_DEVICES to limit visible device IDs.";
......@@ -43,6 +43,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
else PARSE_DEVICE("--metax", INFINI_DEVICE_METAX)
else PARSE_DEVICE("--moore", INFINI_DEVICE_MOORE)
else PARSE_DEVICE("--iluvatar", INFINI_DEVICE_ILUVATAR)
else PARSE_DEVICE("--qy", INFINI_DEVICE_QY)
else PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN)
else PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON)
else {
......
......@@ -4,7 +4,7 @@
#include "../infiniccl_impl.h"
// Windows does not support CUDA
#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API)) && defined(ENABLE_CCL) && !defined(_WIN32)
#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)) && defined(ENABLE_CCL) && !defined(_WIN32)
INFINICCL_DEVICE_API_IMPL(cuda)
#else
INFINICCL_DEVICE_API_NOOP(cuda)
......
......@@ -20,6 +20,7 @@ __C infiniStatus_t infinicclCommInitAll(
switch (device_type) {
COMM_INIT_ALL(INFINI_DEVICE_NVIDIA, cuda);
COMM_INIT_ALL(INFINI_DEVICE_ILUVATAR, cuda);
COMM_INIT_ALL(INFINI_DEVICE_QY, cuda);
COMM_INIT_ALL(INFINI_DEVICE_HYGON, cuda);
COMM_INIT_ALL(INFINI_DEVICE_ASCEND, ascend);
COMM_INIT_ALL(INFINI_DEVICE_CAMBRICON, cambricon);
......@@ -45,6 +46,7 @@ __C infiniStatus_t infinicclCommDestroy(infinicclComm_t comm) {
switch (comm->device_type) {
COMM_DESTROY(INFINI_DEVICE_NVIDIA, cuda);
COMM_DESTROY(INFINI_DEVICE_ILUVATAR, cuda);
COMM_DESTROY(INFINI_DEVICE_QY, cuda);
COMM_DESTROY(INFINI_DEVICE_HYGON, cuda);
COMM_DESTROY(INFINI_DEVICE_ASCEND, ascend);
COMM_DESTROY(INFINI_DEVICE_CAMBRICON, cambricon);
......@@ -77,6 +79,7 @@ __C infiniStatus_t infinicclAllReduce(
switch (comm->device_type) {
ALL_REDUCE(INFINI_DEVICE_NVIDIA, cuda);
ALL_REDUCE(INFINI_DEVICE_ILUVATAR, cuda);
ALL_REDUCE(INFINI_DEVICE_QY, cuda);
ALL_REDUCE(INFINI_DEVICE_HYGON, cuda);
ALL_REDUCE(INFINI_DEVICE_ASCEND, ascend);
ALL_REDUCE(INFINI_DEVICE_CAMBRICON, cambricon);
......
......@@ -63,6 +63,7 @@ xmake build infinicore-test
./infinicore-test --metax
./infinicore-test --moore
./infinicore-test --iluvatar
./infinicore-test --qy
./infinicore-test --kunlun
./infinicore-test --hygon
```
......
......@@ -39,6 +39,7 @@ void printUsage() {
<< " metax" << std::endl
<< " moore" << std::endl
<< " iluvatar" << std::endl
<< " qy" << std::endl
<< " kunlun" << std::endl
<< " hygon" << std::endl
<< std::endl
......@@ -77,6 +78,8 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
args.device_type = INFINI_DEVICE_MOORE;
} else if (arg == "--iluvatar") {
args.device_type = INFINI_DEVICE_ILUVATAR;
} else if (arg == "--qy") {
args.device_type = INFINI_DEVICE_QY;
} else if (arg == "--kunlun") {
args.device_type = INFINI_DEVICE_KUNLUN;
} else if (arg == "--hygon") {
......
......@@ -35,6 +35,8 @@ std::string Device::toString(const Type &type) {
return "MOORE";
case Type::ILUVATAR:
return "ILUVATAR";
case Type::QY:
return "QY";
case Type::KUNLUN:
return "KUNLUN";
case Type::HYGON:
......
......@@ -19,6 +19,7 @@ inline void bind(py::module &m) {
.value("METAX", Device::Type::METAX)
.value("MOORE", Device::Type::MOORE)
.value("ILUVATAR", Device::Type::ILUVATAR)
.value("QY", Device::Type::QY)
.value("KUNLUN", Device::Type::KUNLUN)
.value("HYGON", Device::Type::HYGON)
.value("COUNT", Device::Type::COUNT);
......
......@@ -22,7 +22,7 @@ void printUsage() {
std::cout << " Path to the test gguf file" << std::endl
<< std::endl;
std::cout << " --<device>[:id]" << std::endl;
std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|kunlun|hygon) and device ID (optional). CPU by default." << std::endl
std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon) and device ID (optional). CPU by default." << std::endl
<< std::endl;
std::cout << " --warmup <warmups>" << std::endl;
std::cout << " (Optional) Number of warmups to perform before timing. Default to 0." << std::endl
......@@ -77,6 +77,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
PARSE_DEVICE("--metax", INFINI_DEVICE_METAX)
PARSE_DEVICE("--moore", INFINI_DEVICE_MOORE)
PARSE_DEVICE("--iluvatar", INFINI_DEVICE_ILUVATAR)
PARSE_DEVICE("--qy", INFINI_DEVICE_QY)
PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN)
PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON)
else if (arg == "--warmup" && i + 1 < argc) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment