Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
8d09630a
Unverified
Commit
8d09630a
authored
Feb 11, 2026
by
gongchensu
Committed by
GitHub
Feb 11, 2026
Browse files
Merge branch 'demo131' into Issue/862
parents
ab52dead
012df56c
Changes
387
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
318 additions
and
41 deletions
+318
-41
.gitmodules
.gitmodules
+4
-0
README.md
README.md
+6
-4
include/infinicore.h
include/infinicore.h
+1
-0
include/infinicore.hpp
include/infinicore.hpp
+1
-0
include/infinicore/common/hash.hpp
include/infinicore/common/hash.hpp
+10
-0
include/infinicore/context/context.hpp
include/infinicore/context/context.hpp
+8
-0
include/infinicore/device.hpp
include/infinicore/device.hpp
+1
-0
include/infinicore/graph/graph.hpp
include/infinicore/graph/graph.hpp
+101
-0
include/infinicore/nn/linear.hpp
include/infinicore/nn/linear.hpp
+22
-0
include/infinicore/nn/rmsnorm.hpp
include/infinicore/nn/rmsnorm.hpp
+19
-4
include/infinicore/nn/rope.hpp
include/infinicore/nn/rope.hpp
+50
-6
include/infinicore/ops.hpp
include/infinicore/ops.hpp
+9
-0
include/infinicore/ops/add.hpp
include/infinicore/ops/add.hpp
+6
-9
include/infinicore/ops/add_rms_norm.hpp
include/infinicore/ops/add_rms_norm.hpp
+18
-0
include/infinicore/ops/causal_softmax.hpp
include/infinicore/ops/causal_softmax.hpp
+6
-8
include/infinicore/ops/dequantize_awq.hpp
include/infinicore/ops/dequantize_awq.hpp
+10
-0
include/infinicore/ops/distributed/allreduce.hpp
include/infinicore/ops/distributed/allreduce.hpp
+24
-0
include/infinicore/ops/embedding.hpp
include/infinicore/ops/embedding.hpp
+6
-2
include/infinicore/ops/flash_attention.hpp
include/infinicore/ops/flash_attention.hpp
+12
-0
include/infinicore/ops/gemm.hpp
include/infinicore/ops/gemm.hpp
+4
-8
No files found.
.gitmodules
View file @
8d09630a
[submodule "third_party/spdlog"]
path = third_party/spdlog
url = https://github.com/gabime/spdlog.git
[submodule "third_party/nlohmann_json"]
path = third_party/nlohmann_json
url = https://github.com/nlohmann/json.git
branch = master
README.md
View file @
8d09630a
...
...
@@ -20,6 +20,7 @@ InfiniCore 是一个跨平台统一编程工具集,为不同芯片平台的功
-
天数智芯 GPU;
-
沐曦 GPU;
-
海光 DCU;
-
阿里 PPU;
-
华为昇腾 NPU;
-
寒武纪 MLU;
-
昆仑芯 XPU;
...
...
@@ -103,6 +104,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
|
`--qy-gpu=[y\|n]`
| 是否编译QY GPU 接口实现 | n
|
`--hygon-dcu=[y\|n]`
| 是否编译海光 DCU 接口实现 | n
|
`--kunlun-xpu=[y\|n]`
| 是否编译昆仑 XPU 接口实现 | n
|
`--ali-ppu=[y\|n]`
| 是否编译阿里 PPU 接口实现 | n
|
`--ninetoothed=[y\|n]`
| 是否编译九齿实现 | n
|
`--ccl=[y\|n]`
| 是否编译 InfiniCCL 通信库接口实现 | n
...
...
@@ -187,9 +189,9 @@ pip install -e .
```
bash
# 测试单算子
python
test
/infinicore/ops/[operator].py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
]
python
test
/infinicore/ops/[operator].py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
|
--ali
]
# 测试全部算子
python
test
/infinicore/run.py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
]
python
test
/infinicore/run.py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--ali
]
```
使用 -h 查看更多参数。
...
...
@@ -198,9 +200,9 @@ python test/infinicore/run.py [--bench | --debug | --verbose] [--cpu | --nvidia
```
shell
# 测试单算子
python
test
/infiniop/[operator].py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
]
python
test
/infiniop/[operator].py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
|
--ali
]
# 测试全部算子
python scripts/python_test.py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
]
python scripts/python_test.py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
|
--ali
]
```
#### 通信库(InfiniCCL)测试
...
...
include/infinicore.h
View file @
8d09630a
...
...
@@ -47,6 +47,7 @@ typedef enum {
INFINI_DEVICE_KUNLUN
=
7
,
INFINI_DEVICE_HYGON
=
8
,
INFINI_DEVICE_QY
=
9
,
INFINI_DEVICE_ALI
=
10
,
INFINI_DEVICE_TYPE_COUNT
}
infiniDevice_t
;
...
...
include/infinicore.hpp
View file @
8d09630a
...
...
@@ -3,4 +3,5 @@
#include "infinicore/device_event.hpp"
#include "infinicore/nn.hpp"
#include "infinicore/ops.hpp"
#include "infinicore/quantization.hpp"
#include "infinicore/tensor.hpp"
include/infinicore/common/hash.hpp
View file @
8d09630a
...
...
@@ -2,6 +2,7 @@
#include "../tensor.hpp"
#include <optional>
#include <type_traits>
namespace
infinicore
{
...
...
@@ -24,6 +25,15 @@ inline void hash_combine(size_t &seed, Tensor tensor) {
}
}
// Specialization for optional
template
<
typename
T
>
inline
void
hash_combine
(
size_t
&
seed
,
const
std
::
optional
<
T
>
&
opt
)
{
hash_combine
(
seed
,
opt
.
has_value
());
if
(
opt
)
{
hash_combine
(
seed
,
*
opt
);
}
}
// Specialization for std::string
inline
void
hash_combine
(
size_t
&
seed
,
const
std
::
string
&
str
)
{
hash_combine
(
seed
,
std
::
hash
<
std
::
string
>
{}(
str
));
...
...
include/infinicore/context/context.hpp
View file @
8d09630a
...
...
@@ -3,6 +3,8 @@
#include "../device.hpp"
#include "../memory.hpp"
#include "../graph/graph.hpp"
#include <infiniop.h>
#include <infinirt.h>
...
...
@@ -40,6 +42,12 @@ void destroyEvent(infinirtEvent_t event);
float
elapsedTime
(
infinirtEvent_t
start
,
infinirtEvent_t
end
);
void
streamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
);
// Graph recording APIs
bool
isGraphRecording
();
void
startGraphRecording
();
void
addGraphOperator
(
std
::
shared_ptr
<
graph
::
GraphOperator
>
op
);
std
::
shared_ptr
<
graph
::
Graph
>
stopGraphRecording
();
}
// namespace context
}
// namespace infinicore
include/infinicore/device.hpp
View file @
8d09630a
...
...
@@ -22,6 +22,7 @@ public:
KUNLUN
=
INFINI_DEVICE_KUNLUN
,
HYGON
=
INFINI_DEVICE_HYGON
,
QY
=
INFINI_DEVICE_QY
,
ALI
=
INFINI_DEVICE_ALI
,
COUNT
=
INFINI_DEVICE_TYPE_COUNT
,
};
...
...
include/infinicore/graph/graph.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include <memory>
#include <vector>
#include "../tensor.hpp"
namespace
infinicore
::
graph
{
// Forward declarations
class
GraphManager
;
class
GraphTensor
:
public
Tensor
{
public:
GraphTensor
(
const
Tensor
&
);
};
class
GraphOperator
{
public:
virtual
void
run
()
const
=
0
;
virtual
~
GraphOperator
()
=
default
;
};
class
DispatchableGraphOperator
:
public
GraphOperator
{
public:
void
run
()
const
override
;
~
DispatchableGraphOperator
()
override
;
protected:
using
run_schema
=
void
(
*
)(
void
*
);
using
cleanup_schema
=
void
(
*
)(
void
**
);
void
*
planned_meta_
;
run_schema
runner_
;
cleanup_schema
deleter_
;
};
class
Graph
{
public:
Graph
();
~
Graph
();
void
run
()
const
;
protected:
void
add_operator
(
std
::
shared_ptr
<
GraphOperator
>
op
);
void
instantiate
();
std
::
vector
<
std
::
shared_ptr
<
GraphOperator
>>
op_list_
;
friend
class
GraphManager
;
private:
struct
DeviceGraph
;
std
::
unique_ptr
<
DeviceGraph
>
device_graph_
;
};
}
// namespace infinicore::graph
#define INFINICORE_GRAPH_OP_CLASS(__OP_NAME__, ...) \
class __OP_NAME__ : public graph::DispatchableGraphOperator { \
public: \
using schema = void (*)(__VA_ARGS__); \
using plan_schema = void *(*)(__VA_ARGS__); \
static common::OpDispatcher<plan_schema> &plan_dispatcher(); \
static common::OpDispatcher<run_schema> &run_dispatcher(); \
static common::OpDispatcher<cleanup_schema> &cleanup_dispatcher(); \
__OP_NAME__(__VA_ARGS__); \
static void execute(__VA_ARGS__); \
};
#define INFINICORE_GRAPH_OP_DISPATCHERS_IMPL(__OP_NAME__) \
common::OpDispatcher<__OP_NAME__::plan_schema> &__OP_NAME__::plan_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::plan_schema> dispatcher_; \
return dispatcher_; \
} \
common::OpDispatcher<__OP_NAME__::run_schema> &__OP_NAME__::run_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::run_schema> dispatcher_; \
return dispatcher_; \
} \
common::OpDispatcher<__OP_NAME__::cleanup_schema> &__OP_NAME__::cleanup_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::cleanup_schema> dispatcher_; \
return dispatcher_; \
}
#define INFINICORE_GRAPH_OP_DISPATCH(__DEVICE_TYPE__, ...) \
planned_meta_ = plan_dispatcher().lookup(__DEVICE_TYPE__)(__VA_ARGS__); \
runner_ = run_dispatcher().lookup(__DEVICE_TYPE__); \
deleter_ = cleanup_dispatcher().lookup(__DEVICE_TYPE__);
#define INFINICORE_GRAPH_OP_RECORD_OR_RUN(__OP_NAME__, ...) \
auto ___op = std::make_shared<__OP_NAME__>(__VA_ARGS__); \
if (context::isGraphRecording()) { \
context::addGraphOperator(___op); \
} else { \
___op->run(); \
}
#define INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(__OP_NAME__, __PLAN_F__, __RUN_F__, __CLEANUP_F__) \
static bool registered = []() { \
__OP_NAME__::plan_dispatcher().registerAll(__PLAN_F__, false); \
__OP_NAME__::run_dispatcher().registerAll(__RUN_F__, false); \
__OP_NAME__::cleanup_dispatcher().registerAll(__CLEANUP_F__, false); \
return true; \
}();
include/infinicore/nn/linear.hpp
View file @
8d09630a
#pragma once
#include "../ops.hpp"
#include "../quantization.hpp"
#include "module.hpp"
#include <infiniccl.h>
#include <optional>
namespace
infinicore
::
nn
{
...
...
@@ -11,6 +13,9 @@ public:
BaseLinear
(
size_t
in_features
,
size_t
out_features
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
BaseLinear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
...
...
@@ -27,12 +32,17 @@ public:
// Accessors for parameters
Tensor
weight
()
const
{
return
weight_
;
}
Tensor
bias
()
const
{
return
bias_
;
}
Tensor
weight_scale
()
const
{
return
weight_scale_
;
}
Tensor
weight_zeros
()
const
{
return
weight_zeros_
;
}
protected:
// Parameters
INFINICORE_NN_PARAMETER
(
weight
);
INFINICORE_NN_PARAMETER
(
bias
);
INFINICORE_NN_PARAMETER
(
weight_scale
);
INFINICORE_NN_PARAMETER
(
weight_zeros
);
protected:
// Helper method for common forward computation
Tensor
compute_linear
(
Tensor
&
input
)
const
;
...
...
@@ -41,6 +51,7 @@ protected:
size_t
out_features_
;
bool
has_bias_
;
DataType
dtype_
;
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization_
=
std
::
make_shared
<
infinicore
::
quantization
::
NoneQuantization
>
(
nullptr
);
};
}
// namespace infinicore::nn
...
...
@@ -52,6 +63,9 @@ public:
Linear
(
size_t
in_features
,
size_t
out_features
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
Linear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
...
...
@@ -65,6 +79,10 @@ public:
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
);
ColumnParallelLinear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
);
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
...
...
@@ -82,6 +100,10 @@ public:
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
,
infinicclComm_t
communicator
=
nullptr
);
RowParallelLinear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
,
infinicclComm_t
communicator
=
nullptr
);
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
...
...
include/infinicore/nn/rmsnorm.hpp
View file @
8d09630a
#pragma once
#include "module.hpp"
#include "../ops.hpp"
#include "module.hpp"
namespace
infinicore
::
nn
{
...
...
@@ -57,6 +57,21 @@ public:
*/
Tensor
forward
(
const
Tensor
&
x
)
const
;
/**
* @brief Forward pass: apply RMSNorm in-place with residual
*
* @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions.
* Will be modified in-place to the normalized output.
* @param residual Residual tensor to add to input before normalization.
* Will be modified in-place to the sum of input and residual.
*
* The normalization is applied over the last dimension.
* For example:
* Input: [batch, seq_len, hidden_size] -> normalize over hidden_size
* Input: [batch, hidden_size] -> normalize over hidden_size
*/
void
forward_inplace
(
Tensor
&
x
,
Tensor
&
residual
)
const
;
// Module information
size_t
normalized_shape
()
const
{
return
normalized_shape_
;
}
double
eps
()
const
{
return
eps_
;
}
...
...
@@ -73,9 +88,9 @@ protected:
INFINICORE_NN_PARAMETER
(
weight
);
private:
size_t
normalized_shape_
;
// Size of the feature dimension
double
eps_
;
// Epsilon for numerical stability
DataType
dtype_
;
// Data type for weight
size_t
normalized_shape_
;
// Size of the feature dimension
double
eps_
;
// Epsilon for numerical stability
DataType
dtype_
;
// Data type for weight
};
}
// namespace infinicore::nn
include/infinicore/nn/rope.hpp
View file @
8d09630a
...
...
@@ -17,6 +17,47 @@ public:
GPT_NEOX
=
1
,
// GPT-NeoX style RoPE algorithm (First half dimensions for sin, second half for cos)
};
enum
class
ScalingType
{
DEFAULT
=
0
,
// Default RoPE
LONGROPE
=
1
// Long-RoPE
};
class
ScalingConfig
{
public:
virtual
~
ScalingConfig
()
=
default
;
ScalingType
type
()
const
{
return
type_
;
}
protected:
ScalingType
type_
=
ScalingType
::
DEFAULT
;
ScalingConfig
(
ScalingType
type
)
:
type_
(
type
)
{}
};
// longrope scaling
class
LongRopeConfig
:
public
ScalingConfig
{
protected:
std
::
vector
<
float
>
short_factor_
;
std
::
vector
<
float
>
long_factor_
;
size_t
original_max_position_embeddings_
;
float
factor_
;
public:
LongRopeConfig
(
std
::
vector
<
float
>
short_factor
,
std
::
vector
<
float
>
long_factor
,
size_t
original_max_position_embeddings
,
float
factor
=
1.0
f
)
:
ScalingConfig
(
ScalingType
::
LONGROPE
),
short_factor_
(
short_factor
),
long_factor_
(
long_factor
),
original_max_position_embeddings_
(
original_max_position_embeddings
),
factor_
(
factor
==
1.0
f
?
1.0
f
:
std
::
sqrt
(
1
+
std
::
log
(
factor
)
/
std
::
log
(
original_max_position_embeddings
)))
{}
~
LongRopeConfig
()
override
=
default
;
size_t
original_max_position_embeddings
()
const
{
return
original_max_position_embeddings_
;
}
const
std
::
vector
<
float
>
&
short_factor
()
const
{
return
short_factor_
;
}
const
std
::
vector
<
float
>
&
long_factor
()
const
{
return
long_factor_
;
}
float
factor
()
const
{
return
factor_
;
}
};
/**
* @brief Construct a RoPE layer
*
...
...
@@ -26,13 +67,15 @@ public:
* @param algo RoPE algorithm type (default: Algo::GPT_J)
* @param dtype Data type for sin/cos cache (default: DataType::F32)
* @param device Device to create the cache on
* @param scaling RoPE scaling type (default: nullptr)
*/
RoPE
(
size_t
head_dim
,
size_t
max_seq_len
,
double
theta
=
10000.0
,
Algo
algo
=
Algo
::
GPT_J
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
const
Device
&
device
=
Device
(),
std
::
shared_ptr
<
ScalingConfig
>
scaling
=
nullptr
);
/**
* @brief Forward pass: apply RoPE to a tensor
...
...
@@ -88,11 +131,12 @@ protected:
private:
void
initialize_cache
();
size_t
head_dim_
;
// Dimension of each attention head
size_t
max_seq_len_
;
// Maximum sequence length
double
theta_
;
// Base frequency for rotary embeddings
Algo
algo_
;
// RoPE algorithm type
DataType
dtype_
;
// Data type for cache tables
size_t
head_dim_
;
// Dimension of each attention head
size_t
max_seq_len_
;
// Maximum sequence length
double
theta_
;
// Base frequency for rotary embeddings
Algo
algo_
;
// RoPE algorithm type
DataType
dtype_
;
// Data type for cache tables
std
::
shared_ptr
<
ScalingConfig
>
scaling_
;
// RoPE scaling type
};
}
// namespace infinicore::nn
include/infinicore/ops.hpp
View file @
8d09630a
#pragma once
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/attention.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/kv_caching.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
#include "ops/paged_attention_prefill.hpp"
#include "ops/paged_caching.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/silu_and_mul.hpp"
#include "ops/swiglu.hpp"
include/infinicore/ops/add.hpp
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
class
Add
{
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
,
Tensor
);
static
void
execute
(
Tensor
c
,
Tensor
a
,
Tensor
b
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
};
Tensor
add
(
Tensor
a
,
Tensor
b
);
void
add_
(
Tensor
c
,
Tensor
a
,
Tensor
b
);
Tensor
operator
+
(
Tensor
a
,
Tensor
b
);
INFINICORE_GRAPH_OP_CLASS
(
Add
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
);
Tensor
add
(
const
Tensor
&
a
,
const
Tensor
&
b
);
void
add_
(
Tensor
c
,
const
Tensor
&
a
,
const
Tensor
&
b
);
}
// namespace infinicore::op
include/infinicore/ops/add_rms_norm.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <utility>
namespace
infinicore
::
op
{
INFINICORE_GRAPH_OP_CLASS
(
AddRMSNorm
,
Tensor
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
,
float
);
// Fused Add and RMS Normalization
// Returns: (normalized_result, add_result)
// The add_result can be used as residual for subsequent layers
std
::
pair
<
Tensor
,
Tensor
>
add_rms_norm
(
const
Tensor
&
a
,
const
Tensor
&
b
,
const
Tensor
&
weight
,
float
epsilon
=
1e-5
f
);
void
add_rms_norm_
(
Tensor
out
,
Tensor
residual
,
const
Tensor
&
a
,
const
Tensor
&
b
,
const
Tensor
&
weight
,
float
epsilon
=
1e-5
f
);
// Fused Add and RMS Normalization (inplace)
// normalized_result wil be stored in input, add_result will be stored in residual
void
add_rms_norm_inplace
(
Tensor
input
,
Tensor
residual
,
const
Tensor
&
weight
,
float
epsilon
=
1e-5
f
);
}
// namespace infinicore::op
include/infinicore/ops/causal_softmax.hpp
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
class
CausalSoftmax
{
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
);
static
void
execute
(
Tensor
output
,
Tensor
input
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
};
Tensor
causal_softmax
(
Tensor
input
);
void
causal_softmax_
(
Tensor
output
,
Tensor
input
);
INFINICORE_GRAPH_OP_CLASS
(
CausalSoftmax
,
Tensor
,
const
Tensor
&
);
Tensor
causal_softmax
(
const
Tensor
&
input
);
void
causal_softmax_
(
Tensor
output
,
const
Tensor
&
input
);
}
// namespace infinicore::op
include/infinicore/ops/dequantize_awq.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
namespace
infinicore
::
op
{
INFINICORE_GRAPH_OP_CLASS
(
DequantizeAWQ
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
);
void
dequantize_awq_
(
Tensor
x
,
const
Tensor
&
x_packed
,
const
Tensor
&
x_scale
,
const
Tensor
&
x_zeros
);
}
// namespace infinicore::op
include/infinicore/ops/distributed/allreduce.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../../device.hpp"
#include "../../graph/graph.hpp"
#include "../common/op.hpp"
#include <infiniccl.h>
namespace
infinicore
::
op
::
distributed
{
class
AllReduce
:
public
graph
::
GraphOperator
{
public:
AllReduce
(
Tensor
output
,
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
~
AllReduce
();
void
run
()
const
override
;
static
void
execute
(
Tensor
output
,
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
private:
void
*
planned_meta_
;
};
Tensor
allreduce
(
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
void
allreduce_
(
Tensor
output
,
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
}
// namespace infinicore::op::distributed
include/infinicore/ops/embedding.hpp
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
Tensor
embedding
(
Tensor
input
,
Tensor
weight
);
void
embedding_
(
Tensor
out
,
Tensor
input
,
Tensor
weight
);
INFINICORE_GRAPH_OP_CLASS
(
Embedding
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
);
Tensor
embedding
(
const
Tensor
&
input
,
const
Tensor
&
weight
);
void
embedding_
(
Tensor
out
,
const
Tensor
&
input
,
const
Tensor
&
weight
);
}
// namespace infinicore::op
include/infinicore/ops/flash_attention.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
INFINICORE_GRAPH_OP_CLASS
(
FlashAttention
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
,
float
,
bool
);
Tensor
flash_attention
(
const
Tensor
&
q
,
const
Tensor
&
k
,
const
Tensor
&
v
,
const
Tensor
&
total_kv_len
,
float
scale
,
bool
is_causal
);
void
flash_attention_
(
Tensor
out
,
const
Tensor
&
q
,
const
Tensor
&
k
,
const
Tensor
&
v
,
const
Tensor
&
total_kv_len
,
float
scale
,
bool
is_causal
);
}
// namespace infinicore::op
include/infinicore/ops/gemm.hpp
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
class
Gemm
{
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
,
Tensor
,
float
,
float
);
static
void
execute
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
};
INFINICORE_GRAPH_OP_CLASS
(
Gemm
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
float
,
float
);
Tensor
gemm
(
Tensor
a
,
Tensor
b
,
float
alpha
=
1.0
f
,
float
beta
=
0.0
f
);
void
gemm_
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
Tensor
gemm
(
const
Tensor
&
a
,
const
Tensor
&
b
,
float
alpha
=
1.0
f
,
float
beta
=
0.0
f
);
void
gemm_
(
Tensor
c
,
const
Tensor
&
a
,
const
Tensor
&
b
,
float
alpha
,
float
beta
);
}
// namespace infinicore::op
Prev
1
2
3
4
5
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment