Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
9b32b4b1
Commit
9b32b4b1
authored
Jun 04, 2025
by
Catheriany
Browse files
Merge remote-tracking branch 'origin/main' into issue/150
parents
15bcbdfc
4799ddbf
Changes
103
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
499 additions
and
24 deletions
+499
-24
README.md
README.md
+5
-1
include/infiniop.h
include/infiniop.h
+1
-0
include/infiniop/ops/clip.h
include/infiniop/ops/clip.h
+28
-0
include/infiniop/ops/mul.h
include/infiniop/ops/mul.h
+5
-5
scripts/python_test.py
scripts/python_test.py
+1
-0
src/infiniop-test/include/ops.hpp
src/infiniop-test/include/ops.hpp
+7
-0
src/infiniop-test/include/tensor.hpp
src/infiniop-test/include/tensor.hpp
+3
-1
src/infiniop-test/include/test.hpp
src/infiniop-test/include/test.hpp
+2
-0
src/infiniop-test/src/ops/add.cpp
src/infiniop-test/src/ops/add.cpp
+109
-0
src/infiniop-test/src/ops/clip.cpp
src/infiniop-test/src/ops/clip.cpp
+120
-0
src/infiniop-test/src/ops/gemm.cpp
src/infiniop-test/src/ops/gemm.cpp
+4
-0
src/infiniop-test/src/ops/mul.cpp
src/infiniop-test/src/ops/mul.cpp
+4
-0
src/infiniop-test/src/ops/random_sample.cpp
src/infiniop-test/src/ops/random_sample.cpp
+4
-0
src/infiniop-test/src/ops/swiglu.cpp
src/infiniop-test/src/ops/swiglu.cpp
+104
-0
src/infiniop-test/src/tensor.cpp
src/infiniop-test/src/tensor.cpp
+57
-6
src/infiniop-test/src/test.cpp
src/infiniop-test/src/test.cpp
+6
-1
src/infiniop/devices/ascend/CMakeLists.txt
src/infiniop/devices/ascend/CMakeLists.txt
+7
-4
src/infiniop/devices/ascend/ascend_kernel_common.h
src/infiniop/devices/ascend/ascend_kernel_common.h
+20
-0
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+11
-5
src/infiniop/devices/kunlun/kunlun_handle.h
src/infiniop/devices/kunlun/kunlun_handle.h
+1
-1
No files found.
README.md
View file @
9b32b4b1
...
...
@@ -175,6 +175,10 @@ options:
{
"clangd.arguments": [
"--compile-commands-dir=.vscode"
]
],
"xmake.additionalConfigArguments": [
// 在这里配置 XMAKE_CONFIG_FLAGS
"--nv-gpu=y"
],
}
```
include/infiniop.h
View file @
9b32b4b1
...
...
@@ -6,6 +6,7 @@
#include "infiniop/ops/attention.h"
#include "infiniop/ops/avg_pool.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/expand.h"
#include "infiniop/ops/gemm.h"
...
...
include/infiniop/ops/clip.h
0 → 100644
View file @
9b32b4b1
#ifndef __INFINIOP_CLIP_API_H__
#define __INFINIOP_CLIP_API_H__
#include "../operator_descriptor.h"
typedef
struct
InfiniopDescriptor
*
infiniopClipDescriptor_t
;
__C
__export
infiniStatus_t
infiniopCreateClipDescriptor
(
infiniopHandle_t
handle
,
infiniopClipDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
x
,
infiniopTensorDescriptor_t
min_val
,
infiniopTensorDescriptor_t
max_val
);
__C
__export
infiniStatus_t
infiniopGetClipWorkspaceSize
(
infiniopClipDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniStatus_t
infiniopClip
(
infiniopClipDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
const
void
*
min_val
,
const
void
*
max_val
,
void
*
stream
);
__C
__export
infiniStatus_t
infiniopDestroyClipDescriptor
(
infiniopClipDescriptor_t
desc
);
#endif
include/infiniop/ops/mul.h
View file @
9b32b4b1
scripts/python_test.py
View file @
9b32b4b1
...
...
@@ -18,6 +18,7 @@ def run_tests(args):
"rms_norm.py"
,
"rope.py"
,
"swiglu.py"
,
"attention.py"
,
]:
result
=
subprocess
.
run
(
f
"python
{
test
}
{
args
}
"
,
text
=
True
,
encoding
=
"utf-8"
,
shell
=
True
...
...
src/infiniop-test/include/ops.hpp
View file @
9b32b4b1
...
...
@@ -9,6 +9,9 @@ DECLARE_INFINIOP_TEST(gemm)
DECLARE_INFINIOP_TEST
(
random_sample
)
DECLARE_INFINIOP_TEST
(
mul
)
DECLARE_INFINIOP_TEST
(
rope
)
DECLARE_INFINIOP_TEST
(
clip
)
DECLARE_INFINIOP_TEST
(
swiglu
)
DECLARE_INFINIOP_TEST
(
add
)
#define REGISTER_INFINIOP_TEST(name) \
{ \
...
...
@@ -17,6 +20,7 @@ DECLARE_INFINIOP_TEST(rope)
infiniop_test::name::Test::build, \
infiniop_test::name::Test::attribute_names(), \
infiniop_test::name::Test::tensor_names(), \
infiniop_test::name::Test::output_names(), \
}},
/*
...
...
@@ -26,7 +30,10 @@ DECLARE_INFINIOP_TEST(rope)
{ \
REGISTER_INFINIOP_TEST(gemm) \
REGISTER_INFINIOP_TEST(random_sample) \
REGISTER_INFINIOP_TEST(add) \
REGISTER_INFINIOP_TEST(mul) \
REGISTER_INFINIOP_TEST(clip) \
REGISTER_INFINIOP_TEST(swiglu) \
REGISTER_INFINIOP_TEST(rope) \
}
...
...
src/infiniop-test/include/tensor.hpp
View file @
9b32b4b1
...
...
@@ -58,7 +58,9 @@ private:
public:
Tensor
(
const
GGUFTensorInfo
*
info
,
const
void
*
ggml_ptr
,
const
GGUFKeyValue
*
strides_meta
=
nullptr
);
const
GGUFKeyValue
*
shape_meta
=
nullptr
,
const
GGUFKeyValue
*
strides_meta
=
nullptr
,
bool
isOutput
=
false
);
Tensor
(
std
::
shared_ptr
<
Memory
>
memory
,
size_t
offset
,
const
std
::
vector
<
size_t
>
&
shape
,
const
std
::
vector
<
ptrdiff_t
>
&
strides
,
...
...
src/infiniop-test/include/test.hpp
View file @
9b32b4b1
...
...
@@ -92,6 +92,7 @@ public:
\
static std::vector<std::string> attribute_names(); \
static std::vector<std::string> tensor_names(); \
static std::vector<std::string> output_names(); \
\
std::shared_ptr<infiniop_test::Result> run( \
infiniopHandle_t handle, infiniDevice_t device, int device_id, \
...
...
@@ -121,6 +122,7 @@ struct TestBuilder {
BuilderFunc
build
;
std
::
vector
<
std
::
string
>
attribute_names
;
std
::
vector
<
std
::
string
>
tensor_names
;
std
::
vector
<
std
::
string
>
output_names
;
};
}
// namespace infiniop_test
...
...
src/infiniop-test/src/ops/add.cpp
0 → 100644
View file @
9b32b4b1
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace
infiniop_test
::
add
{
struct
Test
::
Attributes
{
std
::
shared_ptr
<
Tensor
>
a
;
std
::
shared_ptr
<
Tensor
>
b
;
std
::
shared_ptr
<
Tensor
>
c
;
std
::
shared_ptr
<
Tensor
>
ans
;
};
std
::
shared_ptr
<
Test
>
Test
::
build
(
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
uint8_t
>>
attributes
,
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
tensors
,
double
rtol
,
double
atol
)
{
auto
test
=
std
::
shared_ptr
<
Test
>
(
new
Test
(
rtol
,
atol
));
test
->
_attributes
=
new
Attributes
();
if
(
tensors
.
find
(
"a"
)
==
tensors
.
end
()
||
tensors
.
find
(
"b"
)
==
tensors
.
end
()
||
tensors
.
find
(
"c"
)
==
tensors
.
end
()
||
tensors
.
find
(
"ans"
)
==
tensors
.
end
())
{
throw
std
::
runtime_error
(
"Invalid Test"
);
}
test
->
_attributes
->
a
=
tensors
[
"a"
];
test
->
_attributes
->
b
=
tensors
[
"b"
];
test
->
_attributes
->
c
=
tensors
[
"c"
];
test
->
_attributes
->
ans
=
tensors
[
"ans"
];
return
test
;
}
std
::
shared_ptr
<
infiniop_test
::
Result
>
Test
::
run
(
infiniopHandle_t
handle
,
infiniDevice_t
device
,
int
device_id
,
size_t
warm_ups
,
size_t
iterations
)
{
infiniopAddDescriptor_t
op_desc
;
auto
a
=
_attributes
->
a
->
to
(
device
,
device_id
);
auto
b
=
_attributes
->
b
->
to
(
device
,
device_id
);
auto
c
=
_attributes
->
c
->
to
(
device
,
device_id
);
CHECK_OR
(
infiniopCreateAddDescriptor
(
handle
,
&
op_desc
,
c
->
desc
(),
a
->
desc
(),
b
->
desc
()),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to create op descriptor."
));
size_t
workspace_size
;
CHECK_OR
(
infiniopGetAddWorkspaceSize
(
op_desc
,
&
workspace_size
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to get workspace size."
));
void
*
workspace
;
CHECK_OR
(
infinirtMalloc
(
&
workspace
,
workspace_size
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to allocate workspace."
));
CHECK_OR
(
infiniopAdd
(
op_desc
,
workspace
,
workspace_size
,
c
->
data
(),
a
->
data
(),
b
->
data
(),
nullptr
),
return
TEST_FAILED
(
OP_EXECUTION_FAILED
,
"Failed during execution."
));
try
{
allClose
(
c
,
_attributes
->
ans
,
_rtol
,
_atol
);
}
catch
(
const
std
::
exception
&
e
)
{
return
TEST_FAILED
(
RESULT_INCORRECT
,
e
.
what
());
}
double
elapsed_time
=
0.
;
elapsed_time
=
benchmark
(
[
=
]()
{
infiniopAdd
(
op_desc
,
workspace
,
workspace_size
,
c
->
data
(),
a
->
data
(),
b
->
data
(),
nullptr
);
},
warm_ups
,
iterations
);
return
TEST_PASSED
(
elapsed_time
);
}
std
::
vector
<
std
::
string
>
Test
::
attribute_names
()
{
return
{};
}
std
::
vector
<
std
::
string
>
Test
::
tensor_names
()
{
return
{
"a"
,
"b"
,
"c"
,
"ans"
};
}
std
::
vector
<
std
::
string
>
Test
::
output_names
()
{
return
{
"c"
};
}
std
::
string
Test
::
toString
()
const
{
std
::
ostringstream
oss
;
oss
<<
op_name
()
<<
std
::
endl
;
oss
<<
"- a: "
<<
_attributes
->
a
->
info
()
<<
std
::
endl
;
oss
<<
"- b: "
<<
_attributes
->
b
->
info
()
<<
std
::
endl
;
oss
<<
"- c: "
<<
_attributes
->
c
->
info
()
<<
std
::
endl
;
oss
<<
std
::
scientific
<<
std
::
setprecision
(
2
);
oss
<<
"- rtol="
<<
_rtol
<<
", atol="
<<
_atol
<<
std
::
endl
;
return
oss
.
str
();
}
Test
::~
Test
()
{
delete
_attributes
;
}
}
// namespace infiniop_test::add
src/infiniop-test/src/ops/clip.cpp
0 → 100644
View file @
9b32b4b1
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace
infiniop_test
::
clip
{
struct
Test
::
Attributes
{
std
::
shared_ptr
<
Tensor
>
x
;
std
::
shared_ptr
<
Tensor
>
min_val
;
std
::
shared_ptr
<
Tensor
>
max_val
;
std
::
shared_ptr
<
Tensor
>
y
;
std
::
shared_ptr
<
Tensor
>
ans
;
};
std
::
shared_ptr
<
Test
>
Test
::
build
(
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
uint8_t
>>
attributes
,
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
tensors
,
double
rtol
,
double
atol
)
{
auto
test
=
std
::
shared_ptr
<
Test
>
(
new
Test
(
rtol
,
atol
));
test
->
_attributes
=
new
Attributes
();
if
(
tensors
.
find
(
"x"
)
==
tensors
.
end
()
||
tensors
.
find
(
"min_val"
)
==
tensors
.
end
()
||
tensors
.
find
(
"max_val"
)
==
tensors
.
end
()
||
tensors
.
find
(
"y"
)
==
tensors
.
end
()
||
tensors
.
find
(
"ans"
)
==
tensors
.
end
())
{
throw
std
::
runtime_error
(
"Invalid Test"
);
}
test
->
_attributes
->
x
=
tensors
[
"x"
];
test
->
_attributes
->
min_val
=
tensors
[
"min_val"
];
test
->
_attributes
->
max_val
=
tensors
[
"max_val"
];
test
->
_attributes
->
y
=
tensors
[
"y"
];
test
->
_attributes
->
ans
=
tensors
[
"ans"
];
return
test
;
}
std
::
shared_ptr
<
infiniop_test
::
Result
>
Test
::
run
(
infiniopHandle_t
handle
,
infiniDevice_t
device
,
int
device_id
,
size_t
warm_ups
,
size_t
iterations
)
{
infiniopClipDescriptor_t
op_desc
;
auto
x
=
_attributes
->
x
->
to
(
device
,
device_id
);
auto
min_val
=
_attributes
->
min_val
->
to
(
device
,
device_id
);
auto
max_val
=
_attributes
->
max_val
->
to
(
device
,
device_id
);
auto
y
=
_attributes
->
y
->
to
(
device
,
device_id
);
CHECK_OR
(
infiniopCreateClipDescriptor
(
handle
,
&
op_desc
,
y
->
desc
(),
x
->
desc
(),
min_val
->
desc
(),
max_val
->
desc
()),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to create clip descriptor."
));
size_t
workspace_size
;
CHECK_OR
(
infiniopGetClipWorkspaceSize
(
op_desc
,
&
workspace_size
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to get workspace size."
));
void
*
workspace
;
CHECK_OR
(
infinirtMalloc
(
&
workspace
,
workspace_size
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to allocate workspace."
));
CHECK_OR
(
infiniopClip
(
op_desc
,
workspace
,
workspace_size
,
y
->
data
(),
x
->
data
(),
min_val
->
data
(),
max_val
->
data
(),
nullptr
),
return
TEST_FAILED
(
OP_EXECUTION_FAILED
,
"Failed during execution."
));
try
{
allClose
(
y
,
_attributes
->
ans
,
_rtol
,
_atol
);
}
catch
(
const
std
::
exception
&
e
)
{
return
TEST_FAILED
(
RESULT_INCORRECT
,
e
.
what
());
}
double
elapsed_time
=
0.
;
elapsed_time
=
benchmark
(
[
=
]()
{
infiniopClip
(
op_desc
,
workspace
,
workspace_size
,
y
->
data
(),
x
->
data
(),
min_val
->
data
(),
max_val
->
data
(),
nullptr
);
},
warm_ups
,
iterations
);
infiniopDestroyClipDescriptor
(
op_desc
);
infinirtFree
(
workspace
);
return
TEST_PASSED
(
elapsed_time
);
}
std
::
vector
<
std
::
string
>
Test
::
attribute_names
()
{
return
{};
}
std
::
vector
<
std
::
string
>
Test
::
tensor_names
()
{
return
{
"x"
,
"min_val"
,
"max_val"
,
"y"
,
"ans"
};
}
std
::
vector
<
std
::
string
>
Test
::
output_names
()
{
return
{
"y"
};
}
std
::
string
Test
::
toString
()
const
{
std
::
ostringstream
oss
;
oss
<<
op_name
()
<<
std
::
endl
;
oss
<<
"- x: "
<<
_attributes
->
x
->
info
()
<<
std
::
endl
;
oss
<<
"- min_val: "
<<
_attributes
->
min_val
->
info
()
<<
std
::
endl
;
oss
<<
"- max_val: "
<<
_attributes
->
max_val
->
info
()
<<
std
::
endl
;
oss
<<
"- y: "
<<
_attributes
->
y
->
info
()
<<
std
::
endl
;
oss
<<
std
::
scientific
<<
std
::
setprecision
(
2
);
oss
<<
"- rtol="
<<
_rtol
<<
", atol="
<<
_atol
<<
std
::
endl
;
return
oss
.
str
();
}
Test
::~
Test
()
{
delete
_attributes
;
}
}
// namespace infiniop_test::clip
src/infiniop-test/src/ops/gemm.cpp
View file @
9b32b4b1
...
...
@@ -113,6 +113,10 @@ std::vector<std::string> Test::tensor_names() {
return
{
"a"
,
"b"
,
"c"
,
"ans"
};
}
std
::
vector
<
std
::
string
>
Test
::
output_names
()
{
return
{};
}
std
::
string
Test
::
toString
()
const
{
std
::
ostringstream
oss
;
oss
<<
op_name
()
<<
std
::
endl
;
...
...
src/infiniop-test/src/ops/mul.cpp
View file @
9b32b4b1
...
...
@@ -87,6 +87,10 @@ std::vector<std::string> Test::tensor_names() {
return
{
"a"
,
"b"
,
"c"
,
"ans"
};
}
std
::
vector
<
std
::
string
>
Test
::
output_names
()
{
return
{
"c"
};
}
std
::
string
Test
::
toString
()
const
{
std
::
ostringstream
oss
;
oss
<<
op_name
()
<<
std
::
endl
;
...
...
src/infiniop-test/src/ops/random_sample.cpp
View file @
9b32b4b1
...
...
@@ -109,6 +109,10 @@ std::vector<std::string> Test::tensor_names() {
return
{
"data"
,
"ans"
,
"result"
};
}
std
::
vector
<
std
::
string
>
Test
::
output_names
()
{
return
{
"result"
};
}
std
::
string
Test
::
toString
()
const
{
std
::
ostringstream
oss
;
oss
<<
op_name
()
<<
std
::
endl
;
...
...
src/infiniop-test/src/ops/swiglu.cpp
0 → 100644
View file @
9b32b4b1
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace
infiniop_test
::
swiglu
{
struct
Test
::
Attributes
{
std
::
shared_ptr
<
Tensor
>
a
;
std
::
shared_ptr
<
Tensor
>
b
;
std
::
shared_ptr
<
Tensor
>
ans
;
std
::
shared_ptr
<
Tensor
>
c
;
};
std
::
shared_ptr
<
Test
>
Test
::
build
(
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
uint8_t
>>
attributes
,
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
tensors
,
double
rtol
,
double
atol
)
{
auto
test
=
std
::
shared_ptr
<
Test
>
(
new
Test
(
rtol
,
atol
));
test
->
_attributes
=
new
Attributes
();
if
(
tensors
.
find
(
"a"
)
==
tensors
.
end
()
||
tensors
.
find
(
"b"
)
==
tensors
.
end
()
||
tensors
.
find
(
"c"
)
==
tensors
.
end
()
||
tensors
.
find
(
"ans"
)
==
tensors
.
end
())
{
throw
std
::
runtime_error
(
"Invalid Test"
);
}
test
->
_attributes
->
a
=
tensors
[
"a"
];
test
->
_attributes
->
b
=
tensors
[
"b"
];
test
->
_attributes
->
c
=
tensors
[
"c"
];
test
->
_attributes
->
ans
=
tensors
[
"ans"
];
return
test
;
}
std
::
shared_ptr
<
infiniop_test
::
Result
>
Test
::
run
(
infiniopHandle_t
handle
,
infiniDevice_t
device
,
int
device_id
,
size_t
warm_ups
,
size_t
iterations
)
{
infiniopSwiGLUDescriptor_t
op_desc
;
auto
a
=
_attributes
->
a
->
to
(
device
,
device_id
);
auto
b
=
_attributes
->
b
->
to
(
device
,
device_id
);
auto
c
=
_attributes
->
c
->
to
(
device
,
device_id
);
CHECK_OR
(
infiniopCreateSwiGLUDescriptor
(
handle
,
&
op_desc
,
c
->
desc
(),
a
->
desc
(),
b
->
desc
()),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to create op descriptor."
));
size_t
workspace_size
;
CHECK_OR
(
infiniopGetSwiGLUWorkspaceSize
(
op_desc
,
&
workspace_size
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to get workspace size."
));
void
*
workspace
;
CHECK_OR
(
infinirtMalloc
(
&
workspace
,
workspace_size
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed to allocate workspace."
));
CHECK_OR
(
infiniopSwiGLU
(
op_desc
,
workspace
,
workspace_size
,
c
->
data
(),
a
->
data
(),
b
->
data
(),
nullptr
),
return
TEST_FAILED
(
OP_CREATION_FAILED
,
"Failed during execution."
));
try
{
allClose
(
c
,
_attributes
->
ans
,
_rtol
,
_atol
);
}
catch
(
const
std
::
exception
&
e
)
{
return
TEST_FAILED
(
RESULT_INCORRECT
,
e
.
what
());
}
double
elapsed_time
=
0.
;
elapsed_time
=
benchmark
(
[
=
]()
{
infiniopSwiGLU
(
op_desc
,
workspace
,
workspace_size
,
c
->
data
(),
a
->
data
(),
b
->
data
(),
nullptr
);
},
warm_ups
,
iterations
);
return
TEST_PASSED
(
elapsed_time
);
}
std
::
vector
<
std
::
string
>
Test
::
attribute_names
()
{
return
{};
}
std
::
vector
<
std
::
string
>
Test
::
tensor_names
()
{
return
{
"a"
,
"b"
,
"c"
,
"ans"
};
}
std
::
vector
<
std
::
string
>
Test
::
output_names
()
{
return
{
"c"
};
}
std
::
string
Test
::
toString
()
const
{
std
::
ostringstream
oss
;
oss
<<
op_name
()
<<
std
::
endl
;
oss
<<
"- a: "
<<
_attributes
->
a
->
info
()
<<
std
::
endl
;
oss
<<
"- b: "
<<
_attributes
->
b
->
info
()
<<
std
::
endl
;
oss
<<
"- c: "
<<
_attributes
->
c
->
info
()
<<
std
::
endl
;
oss
<<
std
::
scientific
<<
std
::
setprecision
(
2
);
oss
<<
"- rtol="
<<
_rtol
<<
", atol="
<<
_atol
<<
std
::
endl
;
return
oss
.
str
();
}
Test
::~
Test
()
{
delete
_attributes
;
}
}
// namespace infiniop_test::swiglu
src/infiniop-test/src/tensor.cpp
View file @
9b32b4b1
...
...
@@ -98,20 +98,28 @@ void *Tensor::data() const {
Tensor
::
Tensor
(
const
GGUFTensorInfo
*
info
,
const
void
*
ggml_ptr
,
const
GGUFKeyValue
*
strides_meta
)
{
const
GGUFKeyValue
*
shape_meta
,
const
GGUFKeyValue
*
strides_meta
,
bool
isOutput
)
{
_ggml_type
=
info
->
ggml_type
;
_offset
=
0
;
size_t
ndim
=
static_cast
<
size_t
>
(
info
->
ndim
);
// `_shape`存储真实的tensor形状(来自shape_meta),`temp_shape`存储用于rearrange和计算内存的tensor形状
_shape
=
std
::
vector
<
size_t
>
(
ndim
);
std
::
vector
<
size_t
>
temp_shape
(
ndim
);
_strides
=
std
::
vector
<
ptrdiff_t
>
(
ndim
);
std
::
vector
<
ptrdiff_t
>
contiguous_strides
(
ndim
);
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
_shape
[
i
]
=
static_cast
<
size_t
>
(
info
->
shape
[
ndim
-
1
-
i
]);
temp
_shape
[
i
]
=
static_cast
<
size_t
>
(
info
->
shape
[
ndim
-
1
-
i
]);
if
(
i
==
0
)
{
contiguous_strides
[
ndim
-
1
]
=
(
ptrdiff_t
)
1
;
}
else
{
contiguous_strides
[
ndim
-
1
-
i
]
=
(
ptrdiff_t
)
info
->
shape
[
i
-
1
]
*
contiguous_strides
[
ndim
-
i
];
}
if
(
isOutput
)
{
contiguous_strides
[
i
]
=
(
ptrdiff_t
)
0
;
}
}
if
(
strides_meta
==
nullptr
)
{
...
...
@@ -120,7 +128,6 @@ Tensor::Tensor(const GGUFTensorInfo *info,
}
}
else
{
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
_shape
[
i
]
=
static_cast
<
size_t
>
(
info
->
shape
[
ndim
-
1
-
i
]);
if
(
strides_meta
->
gguf_type
==
GGUF_TYPE_INT64
)
{
_strides
[
i
]
=
(
ptrdiff_t
)(
reinterpret_cast
<
const
int64_t
*>
(
strides_meta
->
value
.
data
())[
ndim
-
1
-
i
]);
...
...
@@ -133,18 +140,62 @@ Tensor::Tensor(const GGUFTensorInfo *info,
}
}
infiniopCreateTensorDescriptor
(
&
_desc
,
ndim
,
_shape
.
data
(),
_strides
.
data
(),
ggmlTypeToInfiniType
(
_ggml_type
));
if
(
isOutput
)
{
if
(
shape_meta
==
nullptr
)
{
throw
std
::
runtime_error
(
"Error Creating Tensor: shape_meta cannot be null for output tensor"
);
}
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
if
(
shape_meta
->
gguf_type
==
GGUF_TYPE_INT64
)
{
int64_t
val
=
reinterpret_cast
<
const
int64_t
*>
(
shape_meta
->
value
.
data
())[
i
];
if
(
val
<
0
)
{
throw
std
::
runtime_error
(
"Shape must be non-negative"
);
}
temp_shape
[
i
]
=
static_cast
<
size_t
>
(
val
);
}
else
if
(
shape_meta
->
gguf_type
==
GGUF_TYPE_INT32
)
{
int32_t
val
=
reinterpret_cast
<
const
int32_t
*>
(
shape_meta
->
value
.
data
())[
i
];
if
(
val
<
0
)
{
throw
std
::
runtime_error
(
"Shape must be non-negative"
);
}
temp_shape
[
i
]
=
static_cast
<
size_t
>
(
val
);
}
else
{
throw
std
::
runtime_error
(
"Error Creating Tensor: Unsupported shape type"
);
}
}
}
infiniopCreateTensorDescriptor
(
&
_desc
,
ndim
,
temp_shape
.
data
(),
_strides
.
data
(),
ggmlTypeToInfiniType
(
_ggml_type
));
size_t
size
;
calculateTensorMemory
(
size
,
_offset
,
_shape
,
_strides
,
ggmlTypeSize
(
_ggml_type
));
calculateTensorMemory
(
size
,
_offset
,
temp
_shape
,
_strides
,
ggmlTypeSize
(
_ggml_type
));
_memory
=
std
::
make_shared
<
Memory
>
(
size
,
INFINI_DEVICE_CPU
,
0
);
utils
::
rearrange
(
(
char
*
)
_memory
->
ptr
()
+
_offset
,
(
char
*
)
ggml_ptr
+
info
->
data_offset
,
_shape
.
data
(),
temp
_shape
.
data
(),
_strides
.
data
(),
contiguous_strides
.
data
(),
ndim
,
ggmlTypeSize
(
_ggml_type
));
if
(
shape_meta
==
nullptr
)
{
_shape
=
temp_shape
;
}
else
{
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
if
(
shape_meta
->
gguf_type
==
GGUF_TYPE_INT64
)
{
int64_t
val
=
reinterpret_cast
<
const
int64_t
*>
(
shape_meta
->
value
.
data
())[
i
];
if
(
val
<
0
)
{
throw
std
::
runtime_error
(
"Shape must be non-negative"
);
}
_shape
[
i
]
=
static_cast
<
size_t
>
(
val
);
}
else
if
(
shape_meta
->
gguf_type
==
GGUF_TYPE_INT32
)
{
int32_t
val
=
reinterpret_cast
<
const
int32_t
*>
(
shape_meta
->
value
.
data
())[
i
];
if
(
val
<
0
)
{
throw
std
::
runtime_error
(
"Shape must be non-negative"
);
}
_shape
[
i
]
=
static_cast
<
size_t
>
(
val
);
}
else
{
throw
std
::
runtime_error
(
"Error Creating Tensor: Unsupported shape type"
);
}
}
}
}
Tensor
::
Tensor
(
std
::
shared_ptr
<
Memory
>
memory
,
size_t
offset
,
...
...
src/infiniop-test/src/test.cpp
View file @
9b32b4b1
...
...
@@ -90,14 +90,19 @@ std::shared_ptr<Result> runTest(const GGUFFileReader &gguf_reader,
attrs
[
attr_name
]
=
attr
->
second
->
value
;
}
}
for
(
auto
tensor_name
:
builder
.
tensor_names
)
{
auto
info
=
tensor_info
.
find
(
"test."
+
std
::
to_string
(
test_id
)
+
"."
+
tensor_name
);
if
(
info
!=
tensor_info
.
end
())
{
auto
shape
=
meta
.
find
(
"test."
+
std
::
to_string
(
test_id
)
+
"."
+
tensor_name
+
".shape"
);
auto
strides
=
meta
.
find
(
"test."
+
std
::
to_string
(
test_id
)
+
"."
+
tensor_name
+
".strides"
);
bool
is_output
=
std
::
find
(
builder
.
output_names
.
begin
(),
builder
.
output_names
.
end
(),
tensor_name
)
!=
builder
.
output_names
.
end
();
tensors
[
tensor_name
]
=
std
::
make_shared
<
Tensor
>
(
info
->
second
.
get
(),
gguf_reader
.
getGgmlStart
(),
strides
!=
meta
.
end
()
?
strides
->
second
.
get
()
:
nullptr
);
shape
!=
meta
.
end
()
?
shape
->
second
.
get
()
:
nullptr
,
strides
!=
meta
.
end
()
?
strides
->
second
.
get
()
:
nullptr
,
is_output
);
}
}
std
::
shared_ptr
<
infiniop_test
::
base
::
Test
>
test
;
...
...
src/infiniop/devices/ascend/CMakeLists.txt
View file @
9b32b4b1
...
...
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16.0)
# project information
project
(
Ascend_C
)
set
(
SOC_VERSION
"Ascend910B3"
CACHE STRING
"system on chip type"
)
set
(
ASCEND_CANN_PACKAGE_PATH $ENV{ASCEND_HOME} CACHE PATH
"ASCEND CANN package installation directory"
)
set
(
ASCEND_CANN_PACKAGE_PATH $ENV{ASCEND_
TOOLKIT_
HOME} CACHE PATH
"ASCEND CANN package installation directory"
)
set
(
RUN_MODE
"npu"
CACHE STRING
"run mode: npu"
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE STRING
"Build type Release/Debug (default Debug)"
FORCE
)
set
(
CMAKE_INSTALL_PREFIX
"
${
CMAKE_CURRENT_LIST_DIR
}
/out"
CACHE STRING
"path for install()"
FORCE
)
...
...
@@ -19,10 +19,13 @@ else()
endif
()
include
(
${
ASCENDC_CMAKE_DIR
}
/ascendc.cmake
)
include_directories
(
${
CMAKE_SOURCE_DIR
}
/../../../../include/infiniop/
)
ascendc_library
(
ascend_kernels STATIC
../../ops/swiglu/ascend/swiglu_kernel.cpp
../../ops/ro
tary_embedding/ascend/rotary_embedding
_kernel.cpp
../../ops/random_sample/ascend/random_sample_kernel.cpp
../../ops/swiglu/ascend/swiglu_
ascend_
kernel.cpp
../../ops/ro
pe/ascend/rope_ascend
_kernel.cpp
#
../../ops/random_sample/ascend/random_sample_kernel.cpp
)
src/infiniop/devices/ascend/ascend_kernel_common.h
0 → 100644
View file @
9b32b4b1
#ifndef __INFINIOP_ASCEND_KERNEL_COMMON_H__
#define __INFINIOP_ASCEND_KERNEL_COMMON_H__
#include "../../../../include/infinicore.h"
#include "kernel_operator.h"
constexpr
size_t
BLOCK_NUM
=
8
;
constexpr
size_t
BUFFER_NUM
=
2
;
constexpr
size_t
BYTE_ALIGN
=
32
;
template
<
typename
T
>
__aicore__
inline
size_t
alignTileLen
(
size_t
tile_len
,
size_t
byte_align
)
{
size_t
bytes
=
tile_len
*
sizeof
(
T
);
size_t
aligned_bytes
=
(
bytes
%
byte_align
==
0
)
?
bytes
:
(
bytes
+
(
byte_align
-
bytes
%
byte_align
));
return
aligned_bytes
/
sizeof
(
T
);
}
#endif
src/infiniop/devices/ascend/common_ascend.cc
View file @
9b32b4b1
#include "common_ascend.h"
std
::
vector
<
int64_t
>
inferStorageShape
(
std
::
vector
<
int64_t
>
shape
,
std
::
vector
<
int64_t
>
strides
)
{
auto
index
=
std
::
max_element
(
strides
.
begin
(),
strides
.
end
());
uint64_t
max_stride_index
=
std
::
distance
(
strides
.
begin
(),
index
);
auto
storageShape
=
std
::
vector
<
int64_t
>
({
shape
[
max_stride_index
]
*
strides
[
max_stride_index
]});
if
(
shape
.
size
()
!=
strides
.
size
())
{
throw
std
::
invalid_argument
(
"Shape and strides must have the same length."
);
}
int64_t
max_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
{
max_offset
+=
(
shape
[
i
]
-
1
)
*
strides
[
i
];
}
return
storageShape
;
// storage shape is 1D buffer that must cover all accessed elements
return
{
max_offset
+
1
};
}
size_t
aclnnTensorDescriptor
::
numel
()
const
{
...
...
@@ -18,7 +24,7 @@ aclnnTensorDescriptor::aclnnTensorDescriptor(infiniopTensorDescriptor_t desc, vo
this
->
strides
=
std
::
vector
<
int64_t
>
(
ndim
);
for
(
uint64_t
i
=
0
;
i
<
ndim
;
++
i
)
{
this
->
shape
[
i
]
=
static_cast
<
int64_t
>
(
desc
->
dim
(
i
));
this
->
strides
[
i
]
=
desc
->
stride
(
i
);
this
->
strides
[
i
]
=
static_cast
<
int64_t
>
(
desc
->
stride
(
i
)
)
;
}
this
->
storageShape
=
inferStorageShape
(
this
->
shape
,
this
->
strides
);
this
->
dataType
=
toAclDataType
(
desc
->
dtype
());
...
...
src/infiniop/devices/kunlun/kunlun_handle.h
View file @
9b32b4b1
...
...
@@ -16,7 +16,7 @@ typedef XPUStream kunlunStream_t;
typedef
XPUEvent
kunlunEvent_t
;
typedef
xdnn
::
Context
*
xdnnHandle_t
;
#define CHECK_
XDN
N(API) CHECK_INTERNAL(API, XPU_SUCCESS)
#define CHECK_
KUNLU
N(API) CHECK_INTERNAL(API, XPU_SUCCESS)
namespace
device
::
kunlun
{
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment