Commit 9b32b4b1 authored by Catheriany's avatar Catheriany
Browse files

Merge remote-tracking branch 'origin/main' into issue/150

parents 15bcbdfc 4799ddbf
...@@ -175,6 +175,10 @@ options: ...@@ -175,6 +175,10 @@ options:
{ {
"clangd.arguments": [ "clangd.arguments": [
"--compile-commands-dir=.vscode" "--compile-commands-dir=.vscode"
] ],
"xmake.additionalConfigArguments": [
// 在这里配置 XMAKE_CONFIG_FLAGS
"--nv-gpu=y"
],
} }
``` ```
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "infiniop/ops/attention.h" #include "infiniop/ops/attention.h"
#include "infiniop/ops/avg_pool.h" #include "infiniop/ops/avg_pool.h"
#include "infiniop/ops/causal_softmax.h" #include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h" #include "infiniop/ops/conv.h"
#include "infiniop/ops/expand.h" #include "infiniop/ops/expand.h"
#include "infiniop/ops/gemm.h" #include "infiniop/ops/gemm.h"
......
#ifndef __INFINIOP_CLIP_API_H__
#define __INFINIOP_CLIP_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopClipDescriptor_t;
__C __export infiniStatus_t infiniopCreateClipDescriptor(infiniopHandle_t handle,
infiniopClipDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t min_val,
infiniopTensorDescriptor_t max_val);
__C __export infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopClip(infiniopClipDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
const void *min_val,
const void *max_val,
void *stream);
__C __export infiniStatus_t infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc);
#endif
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
typedef struct InfiniopDescriptor *infiniopMulDescriptor_t; typedef struct InfiniopDescriptor *infiniopMulDescriptor_t;
__C __export infiniStatus_t infiniopCreateMulDescriptor(infiniopHandle_t handle, __C __export infiniStatus_t infiniopCreateMulDescriptor(infiniopHandle_t handle,
infiniopMulDescriptor_t *desc_ptr, infiniopMulDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c, infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a, infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b); infiniopTensorDescriptor_t b);
__C __export infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, size_t *size); __C __export infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, size_t *size);
...@@ -20,7 +20,7 @@ __C __export infiniStatus_t infiniopMul(infiniopMulDescriptor_t desc, ...@@ -20,7 +20,7 @@ __C __export infiniStatus_t infiniopMul(infiniopMulDescriptor_t desc,
const void *a, const void *a,
const void *b, const void *b,
void *stream); void *stream);
__C __export infiniStatus_t infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc); __C __export infiniStatus_t infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc);
#endif #endif
...@@ -18,6 +18,7 @@ def run_tests(args): ...@@ -18,6 +18,7 @@ def run_tests(args):
"rms_norm.py", "rms_norm.py",
"rope.py", "rope.py",
"swiglu.py", "swiglu.py",
"attention.py",
]: ]:
result = subprocess.run( result = subprocess.run(
f"python {test} {args}", text=True, encoding="utf-8", shell=True f"python {test} {args}", text=True, encoding="utf-8", shell=True
......
...@@ -9,6 +9,9 @@ DECLARE_INFINIOP_TEST(gemm) ...@@ -9,6 +9,9 @@ DECLARE_INFINIOP_TEST(gemm)
DECLARE_INFINIOP_TEST(random_sample) DECLARE_INFINIOP_TEST(random_sample)
DECLARE_INFINIOP_TEST(mul) DECLARE_INFINIOP_TEST(mul)
DECLARE_INFINIOP_TEST(rope) DECLARE_INFINIOP_TEST(rope)
DECLARE_INFINIOP_TEST(clip)
DECLARE_INFINIOP_TEST(swiglu)
DECLARE_INFINIOP_TEST(add)
#define REGISTER_INFINIOP_TEST(name) \ #define REGISTER_INFINIOP_TEST(name) \
{ \ { \
...@@ -17,6 +20,7 @@ DECLARE_INFINIOP_TEST(rope) ...@@ -17,6 +20,7 @@ DECLARE_INFINIOP_TEST(rope)
infiniop_test::name::Test::build, \ infiniop_test::name::Test::build, \
infiniop_test::name::Test::attribute_names(), \ infiniop_test::name::Test::attribute_names(), \
infiniop_test::name::Test::tensor_names(), \ infiniop_test::name::Test::tensor_names(), \
infiniop_test::name::Test::output_names(), \
}}, }},
/* /*
...@@ -26,7 +30,10 @@ DECLARE_INFINIOP_TEST(rope) ...@@ -26,7 +30,10 @@ DECLARE_INFINIOP_TEST(rope)
{ \ { \
REGISTER_INFINIOP_TEST(gemm) \ REGISTER_INFINIOP_TEST(gemm) \
REGISTER_INFINIOP_TEST(random_sample) \ REGISTER_INFINIOP_TEST(random_sample) \
REGISTER_INFINIOP_TEST(add) \
REGISTER_INFINIOP_TEST(mul) \ REGISTER_INFINIOP_TEST(mul) \
REGISTER_INFINIOP_TEST(clip) \
REGISTER_INFINIOP_TEST(swiglu) \
REGISTER_INFINIOP_TEST(rope) \ REGISTER_INFINIOP_TEST(rope) \
} }
......
...@@ -58,7 +58,9 @@ private: ...@@ -58,7 +58,9 @@ private:
public: public:
Tensor(const GGUFTensorInfo *info, Tensor(const GGUFTensorInfo *info,
const void *ggml_ptr, const void *ggml_ptr,
const GGUFKeyValue *strides_meta = nullptr); const GGUFKeyValue *shape_meta = nullptr,
const GGUFKeyValue *strides_meta = nullptr,
bool isOutput = false);
Tensor(std::shared_ptr<Memory> memory, size_t offset, Tensor(std::shared_ptr<Memory> memory, size_t offset,
const std::vector<size_t> &shape, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, const std::vector<ptrdiff_t> &strides,
......
...@@ -92,6 +92,7 @@ public: ...@@ -92,6 +92,7 @@ public:
\ \
static std::vector<std::string> attribute_names(); \ static std::vector<std::string> attribute_names(); \
static std::vector<std::string> tensor_names(); \ static std::vector<std::string> tensor_names(); \
static std::vector<std::string> output_names(); \
\ \
std::shared_ptr<infiniop_test::Result> run( \ std::shared_ptr<infiniop_test::Result> run( \
infiniopHandle_t handle, infiniDevice_t device, int device_id, \ infiniopHandle_t handle, infiniDevice_t device, int device_id, \
...@@ -121,6 +122,7 @@ struct TestBuilder { ...@@ -121,6 +122,7 @@ struct TestBuilder {
BuilderFunc build; BuilderFunc build;
std::vector<std::string> attribute_names; std::vector<std::string> attribute_names;
std::vector<std::string> tensor_names; std::vector<std::string> tensor_names;
std::vector<std::string> output_names;
}; };
} // namespace infiniop_test } // namespace infiniop_test
......
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::add {
struct Test::Attributes {
std::shared_ptr<Tensor> a;
std::shared_ptr<Tensor> b;
std::shared_ptr<Tensor> c;
std::shared_ptr<Tensor> ans;
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
if (tensors.find("a") == tensors.end()
|| tensors.find("b") == tensors.end()
|| tensors.find("c") == tensors.end()
|| tensors.find("ans") == tensors.end()) {
throw std::runtime_error("Invalid Test");
}
test->_attributes->a = tensors["a"];
test->_attributes->b = tensors["b"];
test->_attributes->c = tensors["c"];
test->_attributes->ans = tensors["ans"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
infiniopAddDescriptor_t op_desc;
auto a = _attributes->a->to(device, device_id);
auto b = _attributes->b->to(device, device_id);
auto c = _attributes->c->to(device, device_id);
CHECK_OR(infiniopCreateAddDescriptor(handle, &op_desc,
c->desc(),
a->desc(),
b->desc()),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create op descriptor."));
size_t workspace_size;
CHECK_OR(infiniopGetAddWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
void *workspace;
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
CHECK_OR(infiniopAdd(op_desc, workspace, workspace_size,
c->data(),
a->data(),
b->data(),
nullptr),
return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
try {
allClose(c, _attributes->ans, _rtol, _atol);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
double elapsed_time = 0.;
elapsed_time = benchmark(
[=]() {
infiniopAdd(
op_desc, workspace, workspace_size,
c->data(),
a->data(),
b->data(),
nullptr);
},
warm_ups, iterations);
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {};
}
std::vector<std::string> Test::tensor_names() {
return {"a", "b", "c", "ans"};
}
std::vector<std::string> Test::output_names() {
return {"c"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- a: " << _attributes->a->info() << std::endl;
oss << "- b: " << _attributes->b->info() << std::endl;
oss << "- c: " << _attributes->c->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::add
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::clip {
struct Test::Attributes {
std::shared_ptr<Tensor> x;
std::shared_ptr<Tensor> min_val;
std::shared_ptr<Tensor> max_val;
std::shared_ptr<Tensor> y;
std::shared_ptr<Tensor> ans;
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
if (tensors.find("x") == tensors.end()
|| tensors.find("min_val") == tensors.end()
|| tensors.find("max_val") == tensors.end()
|| tensors.find("y") == tensors.end()
|| tensors.find("ans") == tensors.end()) {
throw std::runtime_error("Invalid Test");
}
test->_attributes->x = tensors["x"];
test->_attributes->min_val = tensors["min_val"];
test->_attributes->max_val = tensors["max_val"];
test->_attributes->y = tensors["y"];
test->_attributes->ans = tensors["ans"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
infiniopClipDescriptor_t op_desc;
auto x = _attributes->x->to(device, device_id);
auto min_val = _attributes->min_val->to(device, device_id);
auto max_val = _attributes->max_val->to(device, device_id);
auto y = _attributes->y->to(device, device_id);
CHECK_OR(infiniopCreateClipDescriptor(handle, &op_desc,
y->desc(),
x->desc(),
min_val->desc(),
max_val->desc()),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create clip descriptor."));
size_t workspace_size;
CHECK_OR(infiniopGetClipWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
void *workspace;
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
CHECK_OR(infiniopClip(op_desc, workspace, workspace_size,
y->data(),
x->data(),
min_val->data(),
max_val->data(),
nullptr),
return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
try {
allClose(y, _attributes->ans, _rtol, _atol);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
double elapsed_time = 0.;
elapsed_time = benchmark(
[=]() {
infiniopClip(
op_desc, workspace, workspace_size,
y->data(),
x->data(),
min_val->data(),
max_val->data(),
nullptr);
},
warm_ups, iterations);
infiniopDestroyClipDescriptor(op_desc);
infinirtFree(workspace);
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {};
}
std::vector<std::string> Test::tensor_names() {
return {"x", "min_val", "max_val", "y", "ans"};
}
std::vector<std::string> Test::output_names() {
return {"y"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- x: " << _attributes->x->info() << std::endl;
oss << "- min_val: " << _attributes->min_val->info() << std::endl;
oss << "- max_val: " << _attributes->max_val->info() << std::endl;
oss << "- y: " << _attributes->y->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::clip
...@@ -113,6 +113,10 @@ std::vector<std::string> Test::tensor_names() { ...@@ -113,6 +113,10 @@ std::vector<std::string> Test::tensor_names() {
return {"a", "b", "c", "ans"}; return {"a", "b", "c", "ans"};
} }
std::vector<std::string> Test::output_names() {
return {};
}
std::string Test::toString() const { std::string Test::toString() const {
std::ostringstream oss; std::ostringstream oss;
oss << op_name() << std::endl; oss << op_name() << std::endl;
......
...@@ -87,6 +87,10 @@ std::vector<std::string> Test::tensor_names() { ...@@ -87,6 +87,10 @@ std::vector<std::string> Test::tensor_names() {
return {"a", "b", "c", "ans"}; return {"a", "b", "c", "ans"};
} }
std::vector<std::string> Test::output_names() {
return {"c"};
}
std::string Test::toString() const { std::string Test::toString() const {
std::ostringstream oss; std::ostringstream oss;
oss << op_name() << std::endl; oss << op_name() << std::endl;
......
...@@ -109,6 +109,10 @@ std::vector<std::string> Test::tensor_names() { ...@@ -109,6 +109,10 @@ std::vector<std::string> Test::tensor_names() {
return {"data", "ans", "result"}; return {"data", "ans", "result"};
} }
std::vector<std::string> Test::output_names() {
return {"result"};
}
std::string Test::toString() const { std::string Test::toString() const {
std::ostringstream oss; std::ostringstream oss;
oss << op_name() << std::endl; oss << op_name() << std::endl;
......
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::swiglu {
struct Test::Attributes {
std::shared_ptr<Tensor> a;
std::shared_ptr<Tensor> b;
std::shared_ptr<Tensor> ans;
std::shared_ptr<Tensor> c;
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
if (tensors.find("a") == tensors.end()
|| tensors.find("b") == tensors.end()
|| tensors.find("c") == tensors.end()
|| tensors.find("ans") == tensors.end()) {
throw std::runtime_error("Invalid Test");
}
test->_attributes->a = tensors["a"];
test->_attributes->b = tensors["b"];
test->_attributes->c = tensors["c"];
test->_attributes->ans = tensors["ans"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
infiniopSwiGLUDescriptor_t op_desc;
auto a = _attributes->a->to(device, device_id);
auto b = _attributes->b->to(device, device_id);
auto c = _attributes->c->to(device, device_id);
CHECK_OR(infiniopCreateSwiGLUDescriptor(handle, &op_desc,
c->desc(),
a->desc(),
b->desc()),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create op descriptor."));
size_t workspace_size;
CHECK_OR(infiniopGetSwiGLUWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
void *workspace;
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
CHECK_OR(infiniopSwiGLU(op_desc, workspace, workspace_size, c->data(), a->data(), b->data(), nullptr),
return TEST_FAILED(OP_CREATION_FAILED, "Failed during execution."));
try {
allClose(c, _attributes->ans, _rtol, _atol);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
double elapsed_time = 0.;
elapsed_time = benchmark(
[=]() {
infiniopSwiGLU(
op_desc,
workspace,
workspace_size,
c->data(),
a->data(),
b->data(),
nullptr);
},
warm_ups, iterations);
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {};
}
std::vector<std::string> Test::tensor_names() {
return {"a", "b", "c", "ans"};
}
std::vector<std::string> Test::output_names() {
return {"c"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- a: " << _attributes->a->info() << std::endl;
oss << "- b: " << _attributes->b->info() << std::endl;
oss << "- c: " << _attributes->c->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::swiglu
...@@ -98,20 +98,28 @@ void *Tensor::data() const { ...@@ -98,20 +98,28 @@ void *Tensor::data() const {
Tensor::Tensor(const GGUFTensorInfo *info, Tensor::Tensor(const GGUFTensorInfo *info,
const void *ggml_ptr, const void *ggml_ptr,
const GGUFKeyValue *strides_meta) { const GGUFKeyValue *shape_meta,
const GGUFKeyValue *strides_meta,
bool isOutput) {
_ggml_type = info->ggml_type; _ggml_type = info->ggml_type;
_offset = 0; _offset = 0;
size_t ndim = static_cast<size_t>(info->ndim); size_t ndim = static_cast<size_t>(info->ndim);
// `_shape`存储真实的tensor形状(来自shape_meta),`temp_shape`存储用于rearrange和计算内存的tensor形状
_shape = std::vector<size_t>(ndim); _shape = std::vector<size_t>(ndim);
std::vector<size_t> temp_shape(ndim);
_strides = std::vector<ptrdiff_t>(ndim); _strides = std::vector<ptrdiff_t>(ndim);
std::vector<ptrdiff_t> contiguous_strides(ndim); std::vector<ptrdiff_t> contiguous_strides(ndim);
for (size_t i = 0; i < ndim; i++) { for (size_t i = 0; i < ndim; i++) {
_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]); temp_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
if (i == 0) { if (i == 0) {
contiguous_strides[ndim - 1] = (ptrdiff_t)1; contiguous_strides[ndim - 1] = (ptrdiff_t)1;
} else { } else {
contiguous_strides[ndim - 1 - i] = (ptrdiff_t)info->shape[i - 1] * contiguous_strides[ndim - i]; contiguous_strides[ndim - 1 - i] = (ptrdiff_t)info->shape[i - 1] * contiguous_strides[ndim - i];
} }
if (isOutput) {
contiguous_strides[i] = (ptrdiff_t)0;
}
} }
if (strides_meta == nullptr) { if (strides_meta == nullptr) {
...@@ -120,7 +128,6 @@ Tensor::Tensor(const GGUFTensorInfo *info, ...@@ -120,7 +128,6 @@ Tensor::Tensor(const GGUFTensorInfo *info,
} }
} else { } else {
for (size_t i = 0; i < ndim; i++) { for (size_t i = 0; i < ndim; i++) {
_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
if (strides_meta->gguf_type == GGUF_TYPE_INT64) { if (strides_meta->gguf_type == GGUF_TYPE_INT64) {
_strides[i] = (ptrdiff_t)(reinterpret_cast<const int64_t *>( _strides[i] = (ptrdiff_t)(reinterpret_cast<const int64_t *>(
strides_meta->value.data())[ndim - 1 - i]); strides_meta->value.data())[ndim - 1 - i]);
...@@ -133,18 +140,62 @@ Tensor::Tensor(const GGUFTensorInfo *info, ...@@ -133,18 +140,62 @@ Tensor::Tensor(const GGUFTensorInfo *info,
} }
} }
infiniopCreateTensorDescriptor(&_desc, ndim, _shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type)); if (isOutput) {
if (shape_meta == nullptr) {
throw std::runtime_error("Error Creating Tensor: shape_meta cannot be null for output tensor");
}
for (size_t i = 0; i < ndim; i++) {
if (shape_meta->gguf_type == GGUF_TYPE_INT64) {
int64_t val = reinterpret_cast<const int64_t *>(shape_meta->value.data())[i];
if (val < 0) {
throw std::runtime_error("Shape must be non-negative");
}
temp_shape[i] = static_cast<size_t>(val);
} else if (shape_meta->gguf_type == GGUF_TYPE_INT32) {
int32_t val = reinterpret_cast<const int32_t *>(shape_meta->value.data())[i];
if (val < 0) {
throw std::runtime_error("Shape must be non-negative");
}
temp_shape[i] = static_cast<size_t>(val);
} else {
throw std::runtime_error("Error Creating Tensor: Unsupported shape type");
}
}
}
infiniopCreateTensorDescriptor(&_desc, ndim, temp_shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type));
size_t size; size_t size;
calculateTensorMemory(size, _offset, _shape, _strides, ggmlTypeSize(_ggml_type)); calculateTensorMemory(size, _offset, temp_shape, _strides, ggmlTypeSize(_ggml_type));
_memory = std::make_shared<Memory>(size, INFINI_DEVICE_CPU, 0); _memory = std::make_shared<Memory>(size, INFINI_DEVICE_CPU, 0);
utils::rearrange( utils::rearrange(
(char *)_memory->ptr() + _offset, (char *)_memory->ptr() + _offset,
(char *)ggml_ptr + info->data_offset, (char *)ggml_ptr + info->data_offset,
_shape.data(), temp_shape.data(),
_strides.data(), _strides.data(),
contiguous_strides.data(), contiguous_strides.data(),
ndim, ndim,
ggmlTypeSize(_ggml_type)); ggmlTypeSize(_ggml_type));
if (shape_meta == nullptr) {
_shape = temp_shape;
} else {
for (size_t i = 0; i < ndim; i++) {
if (shape_meta->gguf_type == GGUF_TYPE_INT64) {
int64_t val = reinterpret_cast<const int64_t *>(shape_meta->value.data())[i];
if (val < 0) {
throw std::runtime_error("Shape must be non-negative");
}
_shape[i] = static_cast<size_t>(val);
} else if (shape_meta->gguf_type == GGUF_TYPE_INT32) {
int32_t val = reinterpret_cast<const int32_t *>(shape_meta->value.data())[i];
if (val < 0) {
throw std::runtime_error("Shape must be non-negative");
}
_shape[i] = static_cast<size_t>(val);
} else {
throw std::runtime_error("Error Creating Tensor: Unsupported shape type");
}
}
}
} }
Tensor::Tensor(std::shared_ptr<Memory> memory, size_t offset, Tensor::Tensor(std::shared_ptr<Memory> memory, size_t offset,
......
...@@ -90,14 +90,19 @@ std::shared_ptr<Result> runTest(const GGUFFileReader &gguf_reader, ...@@ -90,14 +90,19 @@ std::shared_ptr<Result> runTest(const GGUFFileReader &gguf_reader,
attrs[attr_name] = attr->second->value; attrs[attr_name] = attr->second->value;
} }
} }
for (auto tensor_name : builder.tensor_names) { for (auto tensor_name : builder.tensor_names) {
auto info = tensor_info.find("test." + std::to_string(test_id) + "." + tensor_name); auto info = tensor_info.find("test." + std::to_string(test_id) + "." + tensor_name);
if (info != tensor_info.end()) { if (info != tensor_info.end()) {
auto shape = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".shape");
auto strides = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".strides"); auto strides = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".strides");
bool is_output = std::find(builder.output_names.begin(), builder.output_names.end(), tensor_name) != builder.output_names.end();
tensors[tensor_name] = std::make_shared<Tensor>( tensors[tensor_name] = std::make_shared<Tensor>(
info->second.get(), info->second.get(),
gguf_reader.getGgmlStart(), gguf_reader.getGgmlStart(),
strides != meta.end() ? strides->second.get() : nullptr); shape != meta.end() ? shape->second.get() : nullptr,
strides != meta.end() ? strides->second.get() : nullptr,
is_output);
} }
} }
std::shared_ptr<infiniop_test::base::Test> test; std::shared_ptr<infiniop_test::base::Test> test;
......
...@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16.0) ...@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16.0)
# project information # project information
project(Ascend_C) project(Ascend_C)
set(SOC_VERSION "Ascend910B3" CACHE STRING "system on chip type") set(SOC_VERSION "Ascend910B3" CACHE STRING "system on chip type")
set(ASCEND_CANN_PACKAGE_PATH $ENV{ASCEND_HOME} CACHE PATH "ASCEND CANN package installation directory") set(ASCEND_CANN_PACKAGE_PATH $ENV{ASCEND_TOOLKIT_HOME} CACHE PATH "ASCEND CANN package installation directory")
set(RUN_MODE "npu" CACHE STRING "run mode: npu") set(RUN_MODE "npu" CACHE STRING "run mode: npu")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
...@@ -19,10 +19,13 @@ else() ...@@ -19,10 +19,13 @@ else()
endif() endif()
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
include_directories(
${CMAKE_SOURCE_DIR}/../../../../include/infiniop/
)
ascendc_library(ascend_kernels STATIC ascendc_library(ascend_kernels STATIC
../../ops/swiglu/ascend/swiglu_kernel.cpp ../../ops/swiglu/ascend/swiglu_ascend_kernel.cpp
../../ops/rotary_embedding/ascend/rotary_embedding_kernel.cpp ../../ops/rope/ascend/rope_ascend_kernel.cpp
../../ops/random_sample/ascend/random_sample_kernel.cpp # ../../ops/random_sample/ascend/random_sample_kernel.cpp
) )
#ifndef __INFINIOP_ASCEND_KERNEL_COMMON_H__
#define __INFINIOP_ASCEND_KERNEL_COMMON_H__
#include "../../../../include/infinicore.h"
#include "kernel_operator.h"
constexpr size_t BLOCK_NUM = 8;
constexpr size_t BUFFER_NUM = 2;
constexpr size_t BYTE_ALIGN = 32;
template <typename T>
__aicore__ inline size_t alignTileLen(size_t tile_len, size_t byte_align) {
size_t bytes = tile_len * sizeof(T);
size_t aligned_bytes = (bytes % byte_align == 0)
? bytes
: (bytes + (byte_align - bytes % byte_align));
return aligned_bytes / sizeof(T);
}
#endif
#include "common_ascend.h" #include "common_ascend.h"
std::vector<int64_t> inferStorageShape(std::vector<int64_t> shape, std::vector<int64_t> strides) { std::vector<int64_t> inferStorageShape(std::vector<int64_t> shape, std::vector<int64_t> strides) {
auto index = std::max_element(strides.begin(), strides.end()); if (shape.size() != strides.size()) {
uint64_t max_stride_index = std::distance(strides.begin(), index); throw std::invalid_argument("Shape and strides must have the same length.");
auto storageShape = std::vector<int64_t>({shape[max_stride_index] * strides[max_stride_index]}); }
int64_t max_offset = 0;
for (size_t i = 0; i < shape.size(); ++i) {
max_offset += (shape[i] - 1) * strides[i];
}
return storageShape; // storage shape is 1D buffer that must cover all accessed elements
return {max_offset + 1};
} }
size_t aclnnTensorDescriptor::numel() const { size_t aclnnTensorDescriptor::numel() const {
...@@ -18,7 +24,7 @@ aclnnTensorDescriptor::aclnnTensorDescriptor(infiniopTensorDescriptor_t desc, vo ...@@ -18,7 +24,7 @@ aclnnTensorDescriptor::aclnnTensorDescriptor(infiniopTensorDescriptor_t desc, vo
this->strides = std::vector<int64_t>(ndim); this->strides = std::vector<int64_t>(ndim);
for (uint64_t i = 0; i < ndim; ++i) { for (uint64_t i = 0; i < ndim; ++i) {
this->shape[i] = static_cast<int64_t>(desc->dim(i)); this->shape[i] = static_cast<int64_t>(desc->dim(i));
this->strides[i] = desc->stride(i); this->strides[i] = static_cast<int64_t>(desc->stride(i));
} }
this->storageShape = inferStorageShape(this->shape, this->strides); this->storageShape = inferStorageShape(this->shape, this->strides);
this->dataType = toAclDataType(desc->dtype()); this->dataType = toAclDataType(desc->dtype());
......
...@@ -16,7 +16,7 @@ typedef XPUStream kunlunStream_t; ...@@ -16,7 +16,7 @@ typedef XPUStream kunlunStream_t;
typedef XPUEvent kunlunEvent_t; typedef XPUEvent kunlunEvent_t;
typedef xdnn::Context *xdnnHandle_t; typedef xdnn::Context *xdnnHandle_t;
#define CHECK_XDNN(API) CHECK_INTERNAL(API, XPU_SUCCESS) #define CHECK_KUNLUN(API) CHECK_INTERNAL(API, XPU_SUCCESS)
namespace device::kunlun { namespace device::kunlun {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment