Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
0fa8805e
"superbench/benchmarks/micro_benchmarks/gpu_stream.py" did not exist on "682b2c120dd3ebfcac3be72f9f9225c53abe5bbc"
Commit
0fa8805e
authored
Jan 09, 2026
by
PanZezhong
Browse files
issue/810 add common graph op macros
parent
006d530c
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
151 additions
and
106 deletions
+151
-106
include/infinicore/graph/graph.hpp
include/infinicore/graph/graph.hpp
+47
-0
include/infinicore/ops/gemm.hpp
include/infinicore/ops/gemm.hpp
+1
-14
src/infinicore/context/allocators/device_pinned_allocator.cc
src/infinicore/context/allocators/device_pinned_allocator.cc
+6
-0
src/infinicore/context/allocators/host_allocator.cc
src/infinicore/context/allocators/host_allocator.cc
+6
-0
src/infinicore/context/allocators/pinnable_block_allocator.cc
...infinicore/context/allocators/pinnable_block_allocator.cc
+4
-1
src/infinicore/context/allocators/stream_ordered_allocator.cc
...infinicore/context/allocators/stream_ordered_allocator.cc
+6
-0
src/infinicore/ops/gemm/gemm.cc
src/infinicore/ops/gemm/gemm.cc
+3
-28
src/infinicore/ops/gemm/gemm_infiniop.cc
src/infinicore/ops/gemm/gemm_infiniop.cc
+8
-53
src/infinicore/ops/infiniop_impl.hpp
src/infinicore/ops/infiniop_impl.hpp
+50
-0
src/infinicore/ops/linear/linear.cc
src/infinicore/ops/linear/linear.cc
+11
-9
test/infinicore/framework/tensor.py
test/infinicore/framework/tensor.py
+6
-1
xmake.lua
xmake.lua
+3
-0
No files found.
include/infinicore/graph/graph.hpp
View file @
0fa8805e
...
...
@@ -43,3 +43,50 @@ protected:
friend
class
GraphManager
;
};
}
// namespace infinicore::graph
#define INFINICORE_GRAPH_OP_CLASS(__OP_NAME__, ...) \
class __OP_NAME__ : public graph::GraphOperator { \
public: \
using schema = void (*)(__VA_ARGS__); \
using plan_schema = void *(*)(__VA_ARGS__); \
static common::OpDispatcher<plan_schema> &plan_dispatcher(); \
static common::OpDispatcher<run_schema> &run_dispatcher(); \
static common::OpDispatcher<cleanup_schema> &cleanup_dispatcher(); \
__OP_NAME__(__VA_ARGS__); \
static void execute(__VA_ARGS__); \
};
#define INFINICORE_GRAPH_OP_DISPATCHERS_IMPL(__OP_NAME__) \
common::OpDispatcher<__OP_NAME__::plan_schema> &__OP_NAME__::plan_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::plan_schema> dispatcher_; \
return dispatcher_; \
} \
common::OpDispatcher<__OP_NAME__::run_schema> &__OP_NAME__::run_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::run_schema> dispatcher_; \
return dispatcher_; \
} \
common::OpDispatcher<__OP_NAME__::cleanup_schema> &__OP_NAME__::cleanup_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::cleanup_schema> dispatcher_; \
return dispatcher_; \
}
#define INFINICORE_GRAPH_OP_DISPATCH(__DEVICE_TYPE__, ...) \
planned_meta_ = plan_dispatcher().lookup(__DEVICE_TYPE__)(__VA_ARGS__); \
runner_ = run_dispatcher().lookup(__DEVICE_TYPE__); \
deleter_ = cleanup_dispatcher().lookup(__DEVICE_TYPE__);
#define INFINICORE_GRAPH_OP_RECORD_OR_RUN(__OP_NAME__, ...) \
auto op = std::make_shared<__OP_NAME__>(__VA_ARGS__); \
if (context::isGraphRecording()) { \
context::addGraphOperator(op); \
} else { \
op->run(); \
}
#define INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(__OP_NAME__, __PLAN_F__, __RUN_F__, __CLEANUP_F__) \
static bool registered = []() { \
__OP_NAME__::plan_dispatcher().registerAll(__PLAN_F__, false); \
__OP_NAME__::run_dispatcher().registerAll(__RUN_F__, false); \
__OP_NAME__::cleanup_dispatcher().registerAll(__CLEANUP_F__, false); \
return true; \
}();
include/infinicore/ops/gemm.hpp
View file @
0fa8805e
...
...
@@ -6,20 +6,7 @@
namespace
infinicore
::
op
{
class
Gemm
:
public
graph
::
GraphOperator
{
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
,
Tensor
,
float
,
float
);
using
plan_schema
=
void
*
(
*
)(
Tensor
,
Tensor
,
Tensor
,
float
,
float
);
Gemm
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
static
void
execute
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
static
common
::
OpDispatcher
<
plan_schema
>
&
plan_dispatcher
();
static
common
::
OpDispatcher
<
run_schema
>
&
run_dispatcher
();
static
common
::
OpDispatcher
<
cleanup_schema
>
&
cleanup_dispatcher
();
};
INFINICORE_GRAPH_OP_CLASS
(
Gemm
,
Tensor
,
Tensor
,
Tensor
,
float
,
float
);
Tensor
gemm
(
Tensor
a
,
Tensor
b
,
float
alpha
=
1.0
f
,
float
beta
=
0.0
f
);
void
gemm_
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
...
...
src/infinicore/context/allocators/device_pinned_allocator.cc
View file @
0fa8805e
...
...
@@ -12,12 +12,18 @@ DevicePinnedHostAllocator::~DevicePinnedHostAllocator() {
}
std
::
byte
*
DevicePinnedHostAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
void
*
ptr
;
INFINICORE_CHECK_ERROR
(
infinirtMallocHost
(
&
ptr
,
size
));
return
(
std
::
byte
*
)
ptr
;
}
void
DevicePinnedHostAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
if
(
owner_
==
context
::
getDevice
())
{
INFINICORE_CHECK_ERROR
(
infinirtFreeHost
(
ptr
));
gc
();
...
...
src/infinicore/context/allocators/host_allocator.cc
View file @
0fa8805e
...
...
@@ -4,10 +4,16 @@
namespace
infinicore
{
std
::
byte
*
HostAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
return
(
std
::
byte
*
)
std
::
malloc
(
size
);
}
void
HostAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
std
::
free
(
ptr
);
}
...
...
src/infinicore/context/allocators/pinnable_block_allocator.cc
View file @
0fa8805e
...
...
@@ -37,6 +37,9 @@ PinnableBlockAllocator::PinnableBlockAllocator(Device device)
// ------------------- allocate -------------------
std
::
byte
*
PinnableBlockAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
// Align size to 256 bytes for GPU
...
...
@@ -94,7 +97,7 @@ std::byte *PinnableBlockAllocator::allocate(size_t size) {
// ------------------- deallocate -------------------
void
PinnableBlockAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
!
ptr
)
{
if
(
ptr
==
null
ptr
)
{
return
;
}
...
...
src/infinicore/context/allocators/stream_ordered_allocator.cc
View file @
0fa8805e
...
...
@@ -8,12 +8,18 @@ namespace infinicore {
StreamOrderedAllocator
::
StreamOrderedAllocator
(
Device
device
)
:
MemoryAllocator
(),
device_
(
device
)
{}
std
::
byte
*
StreamOrderedAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
void
*
ptr
=
nullptr
;
INFINICORE_CHECK_ERROR
(
infinirtMallocAsync
(
&
ptr
,
size
,
context
::
getStream
()));
return
(
std
::
byte
*
)
ptr
;
}
void
StreamOrderedAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
INFINICORE_CHECK_ERROR
(
infinirtFreeAsync
(
ptr
,
context
::
getStream
()));
}
}
// namespace infinicore
src/infinicore/ops/gemm/gemm.cc
View file @
0fa8805e
...
...
@@ -3,40 +3,15 @@
#include "../../utils.hpp"
namespace
infinicore
::
op
{
common
::
OpDispatcher
<
Gemm
::
schema
>
&
Gemm
::
dispatcher
()
{
static
common
::
OpDispatcher
<
Gemm
::
schema
>
dispatcher_
;
return
dispatcher_
;
};
common
::
OpDispatcher
<
Gemm
::
plan_schema
>
&
Gemm
::
plan_dispatcher
()
{
static
common
::
OpDispatcher
<
Gemm
::
plan_schema
>
dispatcher_
;
return
dispatcher_
;
}
common
::
OpDispatcher
<
Gemm
::
run_schema
>
&
Gemm
::
run_dispatcher
()
{
static
common
::
OpDispatcher
<
Gemm
::
run_schema
>
dispatcher_
;
return
dispatcher_
;
}
common
::
OpDispatcher
<
Gemm
::
cleanup_schema
>
&
Gemm
::
cleanup_dispatcher
()
{
static
common
::
OpDispatcher
<
Gemm
::
cleanup_schema
>
dispatcher_
;
return
dispatcher_
;
}
INFINICORE_GRAPH_OP_DISPATCHERS_IMPL
(
Gemm
);
Gemm
::
Gemm
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
)
{
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
c
,
a
,
b
);
planned_meta_
=
plan_dispatcher
().
lookup
(
c
->
device
().
getType
())(
c
,
a
,
b
,
alpha
,
beta
);
runner_
=
run_dispatcher
().
lookup
(
c
->
device
().
getType
());
deleter_
=
cleanup_dispatcher
().
lookup
(
c
->
device
().
getType
());
INFINICORE_GRAPH_OP_DISPATCH
(
c
->
device
().
getType
(),
c
,
a
,
b
,
alpha
,
beta
);
}
void
Gemm
::
execute
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
)
{
auto
op
=
std
::
make_shared
<
Gemm
>
(
c
,
a
,
b
,
alpha
,
beta
);
if
(
context
::
isGraphRecording
())
{
context
::
addGraphOperator
(
op
);
}
else
{
op
->
run
();
}
INFINICORE_GRAPH_OP_RECORD_OR_RUN
(
Gemm
,
c
,
a
,
b
,
alpha
,
beta
);
}
Tensor
gemm
(
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
)
{
...
...
src/infinicore/ops/gemm/gemm_infiniop.cc
View file @
0fa8805e
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "../infiniop_impl.hpp"
#include "infinicore/ops/gemm.hpp"
#include <infiniop.h>
namespace
infinicore
::
op
::
gemm_impl
::
infiniop
{
// A desc holder to make it a shared pointer that can auto clean-up
struct
Descriptor
{
infiniopGemmDescriptor_t
desc
;
Descriptor
(
infiniopGemmDescriptor_t
desc
)
:
desc
(
desc
)
{}
~
Descriptor
()
{
if
(
desc
!=
nullptr
)
{
infiniopDestroyGemmDescriptor
(
desc
);
desc
=
nullptr
;
}
}
};
thread_local
common
::
OpCache
<
size_t
,
std
::
shared_ptr
<
Descriptor
>>
caches
(
// capacity
100
,
// on evict
[](
std
::
shared_ptr
<
Descriptor
>
&
desc
)
{
desc
=
nullptr
;
});
INFINIOP_CACHABLE_DESCRIPTOR
(
Descriptor
,
Gemm
,
100
);
struct
PlannedMeta
{
std
::
shared_ptr
<
Descriptor
>
descriptor
;
...
...
@@ -33,25 +12,13 @@ struct PlannedMeta {
};
void
*
plan
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
)
{
size_t
seed
=
hash_combine
(
c
,
b
,
a
,
alpha
,
beta
);
auto
device
=
context
::
getDevice
();
auto
&
cache
=
caches
.
getCache
(
device
);
auto
descriptor
=
cache
.
get
(
seed
).
value_or
(
nullptr
);
size_t
seed
=
hash_combine
(
c
,
a
,
b
);
if
(
!
descriptor
)
{
descriptor
=
std
::
make_shared
<
Descriptor
>
(
nullptr
);
INFINICORE_CHECK_ERROR
(
infiniopCreateGemmDescriptor
(
context
::
getInfiniopHandle
(
device
),
&
descriptor
->
desc
,
c
->
desc
(),
a
->
desc
(),
b
->
desc
()));
cache
.
put
(
seed
,
descriptor
);
}
INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE
(
Descriptor
,
descriptor
,
Gemm
,
seed
,
c
->
desc
(),
a
->
desc
(),
b
->
desc
());
size_t
workspace_size
=
0
;
INFINICORE_CHECK_ERROR
(
infiniopGetGemmWorkspaceSize
(
descriptor
->
desc
,
&
workspace_size
));
Tensor
workspace
=
Tensor
::
empty
({
workspace_size
},
DataType
::
U8
,
device
);
INFINIOP_WORKSPACE_TENSOR
(
workspace
,
Gemm
,
descriptor
);
auto
planned
=
new
PlannedMeta
{
descriptor
,
...
...
@@ -77,18 +44,6 @@ void cleanup(void **planned_meta_ptr) {
*
planned_meta_ptr
=
nullptr
;
}
void
calculate
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
)
{
auto
planned
=
plan
(
c
,
a
,
b
,
alpha
,
beta
);
run
(
planned
);
cleanup
(
&
planned
);
}
static
bool
registered
=
[]()
{
Gemm
::
dispatcher
().
registerAll
(
&
calculate
,
false
);
Gemm
::
plan_dispatcher
().
registerAll
(
&
plan
,
false
);
Gemm
::
run_dispatcher
().
registerAll
(
&
run
,
false
);
Gemm
::
cleanup_dispatcher
().
registerAll
(
&
cleanup
,
false
);
return
true
;
}();
INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE
(
Gemm
,
&
plan
,
&
run
,
&
cleanup
);
}
// namespace infinicore::op::gemm_impl::infiniop
src/infinicore/ops/infiniop_impl.hpp
0 → 100644
View file @
0fa8805e
#pragma once
#include "../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
#define INFINIOP_CACHABLE_DESCRIPTOR(__DESC_TYPE__, __OP_NAME__, __SIZE__) \
struct __DESC_TYPE__ { \
infiniop##__OP_NAME__##Descriptor_t desc; \
Descriptor(infiniop##__OP_NAME__##Descriptor_t desc) : desc(desc) {} \
~Descriptor() { \
if (desc != nullptr) { \
infiniopDestroy##__OP_NAME__##Descriptor(desc); \
desc = nullptr; \
} \
} \
}; \
\
thread_local common::OpCache<size_t, std::shared_ptr<__DESC_TYPE__>> \
caches( \
__SIZE__, \
[](std::shared_ptr<__DESC_TYPE__> &desc) { \
desc = nullptr; \
});
#define INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE(__DESC_TYPE__, __DESC_NAME__, __INFINIOP_NAME__, __HASH_KEY__, ...) \
std::shared_ptr<__DESC_TYPE__> __DESC_NAME__; \
{ \
auto device__ = context::getDevice(); \
auto &cache__ = caches.getCache(device__); \
__DESC_NAME__ = cache__.get(__HASH_KEY__).value_or(nullptr); \
if (!__DESC_NAME__) { \
__DESC_NAME__ = std::make_shared<__DESC_TYPE__>(nullptr); \
INFINICORE_CHECK_ERROR(infiniopCreate##__INFINIOP_NAME__##Descriptor( \
context::getInfiniopHandle(device__), \
&__DESC_NAME__->desc, \
__VA_ARGS__)); \
cache__.put(__HASH_KEY__, __DESC_NAME__); \
} \
}
#define INFINIOP_WORKSPACE_TENSOR(__TENSOR_NAME__, __INFINIOP_NAME__, __DESC_NAME__) \
Tensor __TENSOR_NAME__; \
{ \
auto device__ = context::getDevice(); \
size_t workspace_size = 0; \
INFINICORE_CHECK_ERROR(infiniopGet##__INFINIOP_NAME__##WorkspaceSize(__DESC_NAME__->desc, &workspace_size)); \
__TENSOR_NAME__ = Tensor::empty({workspace_size}, DataType::U8, device__); \
}
src/infinicore/ops/linear/linear.cc
View file @
0fa8805e
#include "infinicore/ops/linear.hpp"
#include "infinicore/ops/
add
.hpp"
#include "infinicore/ops/
matmul
.hpp"
#include "infinicore/ops/
gemm
.hpp"
#include "infinicore/ops/
rearrange
.hpp"
namespace
infinicore
::
op
{
...
...
@@ -42,16 +42,18 @@ void linear_(Tensor out,
// linear transformation
Tensor
out_view
=
out
->
view
({
N
,
out_features
});
matmul_
(
out_view
,
input
->
view
({
N
,
in_features
}),
weight
->
permute
({
1
,
0
}));
// Add bias
float
alpha
=
1.0
f
;
float
beta
=
0.0
f
;
if
(
bias
.
has_value
())
{
add
_
(
out_view
,
out_view
,
bias
.
value
()
->
as_strided
({
N
,
out_features
},
{
0
,
1
}))
;
rearrange
_
(
out_view
,
bias
.
value
()
->
as_strided
({
N
,
out_features
},
{
0
,
1
}));
beta
=
1.0
f
;
}
gemm_
(
out_view
,
input
->
view
({
N
,
in_features
}),
weight
->
permute
({
1
,
0
}),
alpha
,
beta
);
}
}
// namespace infinicore::op
test/infinicore/framework/tensor.py
View file @
0fa8805e
...
...
@@ -60,7 +60,12 @@ class TensorInitializer:
# Handle real floating-point types
if
mode
==
TensorInitializer
.
RANDOM
:
return
torch
.
rand
(
shape
,
dtype
=
torch_dtype
,
device
=
torch_device_str
)
scale
=
kwargs
.
get
(
"scale"
,
1.0
)
bias
=
kwargs
.
get
(
"bias"
,
0.0
)
return
(
torch
.
rand
(
shape
,
dtype
=
torch_dtype
,
device
=
torch_device_str
)
*
scale
+
bias
)
elif
mode
==
TensorInitializer
.
ZEROS
:
return
torch
.
zeros
(
shape
,
dtype
=
torch_dtype
,
device
=
torch_device_str
)
elif
mode
==
TensorInitializer
.
ONES
:
...
...
xmake.lua
View file @
0fa8805e
...
...
@@ -268,6 +268,9 @@ target("infinirt")
add_deps
(
"infinirt-hygon"
)
end
set_languages
(
"cxx17"
)
if
not
is_plat
(
"windows"
)
then
add_cxflags
(
"-fPIC"
)
end
set_installdir
(
os.getenv
(
"INFINI_ROOT"
)
or
(
os.getenv
(
is_host
(
"windows"
)
and
"HOMEPATH"
or
"HOME"
)
..
"/.infini"
))
add_files
(
"src/infinirt/*.cc"
)
add_installfiles
(
"include/infinirt.h"
,
{
prefixdir
=
"include"
})
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment