Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
8d09630a
"git@developer.sourcefind.cn:jerrrrry/infinilm.git" did not exist on "97870d3e4b3cde534dd4018621d8e601b309f2de"
Unverified
Commit
8d09630a
authored
Feb 11, 2026
by
gongchensu
Committed by
GitHub
Feb 11, 2026
Browse files
Merge branch 'demo131' into Issue/862
parents
ab52dead
012df56c
Changes
387
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
318 additions
and
41 deletions
+318
-41
.gitmodules
.gitmodules
+4
-0
README.md
README.md
+6
-4
include/infinicore.h
include/infinicore.h
+1
-0
include/infinicore.hpp
include/infinicore.hpp
+1
-0
include/infinicore/common/hash.hpp
include/infinicore/common/hash.hpp
+10
-0
include/infinicore/context/context.hpp
include/infinicore/context/context.hpp
+8
-0
include/infinicore/device.hpp
include/infinicore/device.hpp
+1
-0
include/infinicore/graph/graph.hpp
include/infinicore/graph/graph.hpp
+101
-0
include/infinicore/nn/linear.hpp
include/infinicore/nn/linear.hpp
+22
-0
include/infinicore/nn/rmsnorm.hpp
include/infinicore/nn/rmsnorm.hpp
+19
-4
include/infinicore/nn/rope.hpp
include/infinicore/nn/rope.hpp
+50
-6
include/infinicore/ops.hpp
include/infinicore/ops.hpp
+9
-0
include/infinicore/ops/add.hpp
include/infinicore/ops/add.hpp
+6
-9
include/infinicore/ops/add_rms_norm.hpp
include/infinicore/ops/add_rms_norm.hpp
+18
-0
include/infinicore/ops/causal_softmax.hpp
include/infinicore/ops/causal_softmax.hpp
+6
-8
include/infinicore/ops/dequantize_awq.hpp
include/infinicore/ops/dequantize_awq.hpp
+10
-0
include/infinicore/ops/distributed/allreduce.hpp
include/infinicore/ops/distributed/allreduce.hpp
+24
-0
include/infinicore/ops/embedding.hpp
include/infinicore/ops/embedding.hpp
+6
-2
include/infinicore/ops/flash_attention.hpp
include/infinicore/ops/flash_attention.hpp
+12
-0
include/infinicore/ops/gemm.hpp
include/infinicore/ops/gemm.hpp
+4
-8
No files found.
.gitmodules
View file @
8d09630a
[submodule "third_party/spdlog"]
[submodule "third_party/spdlog"]
path = third_party/spdlog
path = third_party/spdlog
url = https://github.com/gabime/spdlog.git
url = https://github.com/gabime/spdlog.git
[submodule "third_party/nlohmann_json"]
path = third_party/nlohmann_json
url = https://github.com/nlohmann/json.git
branch = master
README.md
View file @
8d09630a
...
@@ -20,6 +20,7 @@ InfiniCore 是一个跨平台统一编程工具集,为不同芯片平台的功
...
@@ -20,6 +20,7 @@ InfiniCore 是一个跨平台统一编程工具集,为不同芯片平台的功
-
天数智芯 GPU;
-
天数智芯 GPU;
-
沐曦 GPU;
-
沐曦 GPU;
-
海光 DCU;
-
海光 DCU;
-
阿里 PPU;
-
华为昇腾 NPU;
-
华为昇腾 NPU;
-
寒武纪 MLU;
-
寒武纪 MLU;
-
昆仑芯 XPU;
-
昆仑芯 XPU;
...
@@ -103,6 +104,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
...
@@ -103,6 +104,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
|
`--qy-gpu=[y\|n]`
| 是否编译QY GPU 接口实现 | n
|
`--qy-gpu=[y\|n]`
| 是否编译QY GPU 接口实现 | n
|
`--hygon-dcu=[y\|n]`
| 是否编译海光 DCU 接口实现 | n
|
`--hygon-dcu=[y\|n]`
| 是否编译海光 DCU 接口实现 | n
|
`--kunlun-xpu=[y\|n]`
| 是否编译昆仑 XPU 接口实现 | n
|
`--kunlun-xpu=[y\|n]`
| 是否编译昆仑 XPU 接口实现 | n
|
`--ali-ppu=[y\|n]`
| 是否编译阿里 PPU 接口实现 | n
|
`--ninetoothed=[y\|n]`
| 是否编译九齿实现 | n
|
`--ninetoothed=[y\|n]`
| 是否编译九齿实现 | n
|
`--ccl=[y\|n]`
| 是否编译 InfiniCCL 通信库接口实现 | n
|
`--ccl=[y\|n]`
| 是否编译 InfiniCCL 通信库接口实现 | n
...
@@ -187,9 +189,9 @@ pip install -e .
...
@@ -187,9 +189,9 @@ pip install -e .
```
bash
```
bash
# 测试单算子
# 测试单算子
python
test
/infinicore/ops/[operator].py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
]
python
test
/infinicore/ops/[operator].py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
|
--ali
]
# 测试全部算子
# 测试全部算子
python
test
/infinicore/run.py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
]
python
test
/infinicore/run.py
[
--bench
|
--debug
|
--verbose
]
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--ali
]
```
```
使用 -h 查看更多参数。
使用 -h 查看更多参数。
...
@@ -198,9 +200,9 @@ python test/infinicore/run.py [--bench | --debug | --verbose] [--cpu | --nvidia
...
@@ -198,9 +200,9 @@ python test/infinicore/run.py [--bench | --debug | --verbose] [--cpu | --nvidia
```
shell
```
shell
# 测试单算子
# 测试单算子
python
test
/infiniop/[operator].py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
]
python
test
/infiniop/[operator].py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
|
--ali
]
# 测试全部算子
# 测试全部算子
python scripts/python_test.py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
]
python scripts/python_test.py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
|
--iluvatar
|
--metax
|
--moore
|
--kunlun
|
--Hygon
|
--ali
]
```
```
#### 通信库(InfiniCCL)测试
#### 通信库(InfiniCCL)测试
...
...
include/infinicore.h
View file @
8d09630a
...
@@ -47,6 +47,7 @@ typedef enum {
...
@@ -47,6 +47,7 @@ typedef enum {
INFINI_DEVICE_KUNLUN
=
7
,
INFINI_DEVICE_KUNLUN
=
7
,
INFINI_DEVICE_HYGON
=
8
,
INFINI_DEVICE_HYGON
=
8
,
INFINI_DEVICE_QY
=
9
,
INFINI_DEVICE_QY
=
9
,
INFINI_DEVICE_ALI
=
10
,
INFINI_DEVICE_TYPE_COUNT
INFINI_DEVICE_TYPE_COUNT
}
infiniDevice_t
;
}
infiniDevice_t
;
...
...
include/infinicore.hpp
View file @
8d09630a
...
@@ -3,4 +3,5 @@
...
@@ -3,4 +3,5 @@
#include "infinicore/device_event.hpp"
#include "infinicore/device_event.hpp"
#include "infinicore/nn.hpp"
#include "infinicore/nn.hpp"
#include "infinicore/ops.hpp"
#include "infinicore/ops.hpp"
#include "infinicore/quantization.hpp"
#include "infinicore/tensor.hpp"
#include "infinicore/tensor.hpp"
include/infinicore/common/hash.hpp
View file @
8d09630a
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#include "../tensor.hpp"
#include "../tensor.hpp"
#include <optional>
#include <type_traits>
#include <type_traits>
namespace
infinicore
{
namespace
infinicore
{
...
@@ -24,6 +25,15 @@ inline void hash_combine(size_t &seed, Tensor tensor) {
...
@@ -24,6 +25,15 @@ inline void hash_combine(size_t &seed, Tensor tensor) {
}
}
}
}
// Specialization for optional
template
<
typename
T
>
inline
void
hash_combine
(
size_t
&
seed
,
const
std
::
optional
<
T
>
&
opt
)
{
hash_combine
(
seed
,
opt
.
has_value
());
if
(
opt
)
{
hash_combine
(
seed
,
*
opt
);
}
}
// Specialization for std::string
// Specialization for std::string
inline
void
hash_combine
(
size_t
&
seed
,
const
std
::
string
&
str
)
{
inline
void
hash_combine
(
size_t
&
seed
,
const
std
::
string
&
str
)
{
hash_combine
(
seed
,
std
::
hash
<
std
::
string
>
{}(
str
));
hash_combine
(
seed
,
std
::
hash
<
std
::
string
>
{}(
str
));
...
...
include/infinicore/context/context.hpp
View file @
8d09630a
...
@@ -3,6 +3,8 @@
...
@@ -3,6 +3,8 @@
#include "../device.hpp"
#include "../device.hpp"
#include "../memory.hpp"
#include "../memory.hpp"
#include "../graph/graph.hpp"
#include <infiniop.h>
#include <infiniop.h>
#include <infinirt.h>
#include <infinirt.h>
...
@@ -40,6 +42,12 @@ void destroyEvent(infinirtEvent_t event);
...
@@ -40,6 +42,12 @@ void destroyEvent(infinirtEvent_t event);
float
elapsedTime
(
infinirtEvent_t
start
,
infinirtEvent_t
end
);
float
elapsedTime
(
infinirtEvent_t
start
,
infinirtEvent_t
end
);
void
streamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
);
void
streamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
);
// Graph recording APIs
bool
isGraphRecording
();
void
startGraphRecording
();
void
addGraphOperator
(
std
::
shared_ptr
<
graph
::
GraphOperator
>
op
);
std
::
shared_ptr
<
graph
::
Graph
>
stopGraphRecording
();
}
// namespace context
}
// namespace context
}
// namespace infinicore
}
// namespace infinicore
include/infinicore/device.hpp
View file @
8d09630a
...
@@ -22,6 +22,7 @@ public:
...
@@ -22,6 +22,7 @@ public:
KUNLUN
=
INFINI_DEVICE_KUNLUN
,
KUNLUN
=
INFINI_DEVICE_KUNLUN
,
HYGON
=
INFINI_DEVICE_HYGON
,
HYGON
=
INFINI_DEVICE_HYGON
,
QY
=
INFINI_DEVICE_QY
,
QY
=
INFINI_DEVICE_QY
,
ALI
=
INFINI_DEVICE_ALI
,
COUNT
=
INFINI_DEVICE_TYPE_COUNT
,
COUNT
=
INFINI_DEVICE_TYPE_COUNT
,
};
};
...
...
include/infinicore/graph/graph.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include <memory>
#include <vector>
#include "../tensor.hpp"
namespace
infinicore
::
graph
{
// Forward declarations
class
GraphManager
;
class
GraphTensor
:
public
Tensor
{
public:
GraphTensor
(
const
Tensor
&
);
};
class
GraphOperator
{
public:
virtual
void
run
()
const
=
0
;
virtual
~
GraphOperator
()
=
default
;
};
class
DispatchableGraphOperator
:
public
GraphOperator
{
public:
void
run
()
const
override
;
~
DispatchableGraphOperator
()
override
;
protected:
using
run_schema
=
void
(
*
)(
void
*
);
using
cleanup_schema
=
void
(
*
)(
void
**
);
void
*
planned_meta_
;
run_schema
runner_
;
cleanup_schema
deleter_
;
};
class
Graph
{
public:
Graph
();
~
Graph
();
void
run
()
const
;
protected:
void
add_operator
(
std
::
shared_ptr
<
GraphOperator
>
op
);
void
instantiate
();
std
::
vector
<
std
::
shared_ptr
<
GraphOperator
>>
op_list_
;
friend
class
GraphManager
;
private:
struct
DeviceGraph
;
std
::
unique_ptr
<
DeviceGraph
>
device_graph_
;
};
}
// namespace infinicore::graph
#define INFINICORE_GRAPH_OP_CLASS(__OP_NAME__, ...) \
class __OP_NAME__ : public graph::DispatchableGraphOperator { \
public: \
using schema = void (*)(__VA_ARGS__); \
using plan_schema = void *(*)(__VA_ARGS__); \
static common::OpDispatcher<plan_schema> &plan_dispatcher(); \
static common::OpDispatcher<run_schema> &run_dispatcher(); \
static common::OpDispatcher<cleanup_schema> &cleanup_dispatcher(); \
__OP_NAME__(__VA_ARGS__); \
static void execute(__VA_ARGS__); \
};
#define INFINICORE_GRAPH_OP_DISPATCHERS_IMPL(__OP_NAME__) \
common::OpDispatcher<__OP_NAME__::plan_schema> &__OP_NAME__::plan_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::plan_schema> dispatcher_; \
return dispatcher_; \
} \
common::OpDispatcher<__OP_NAME__::run_schema> &__OP_NAME__::run_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::run_schema> dispatcher_; \
return dispatcher_; \
} \
common::OpDispatcher<__OP_NAME__::cleanup_schema> &__OP_NAME__::cleanup_dispatcher() { \
static common::OpDispatcher<__OP_NAME__::cleanup_schema> dispatcher_; \
return dispatcher_; \
}
#define INFINICORE_GRAPH_OP_DISPATCH(__DEVICE_TYPE__, ...) \
planned_meta_ = plan_dispatcher().lookup(__DEVICE_TYPE__)(__VA_ARGS__); \
runner_ = run_dispatcher().lookup(__DEVICE_TYPE__); \
deleter_ = cleanup_dispatcher().lookup(__DEVICE_TYPE__);
#define INFINICORE_GRAPH_OP_RECORD_OR_RUN(__OP_NAME__, ...) \
auto ___op = std::make_shared<__OP_NAME__>(__VA_ARGS__); \
if (context::isGraphRecording()) { \
context::addGraphOperator(___op); \
} else { \
___op->run(); \
}
#define INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(__OP_NAME__, __PLAN_F__, __RUN_F__, __CLEANUP_F__) \
static bool registered = []() { \
__OP_NAME__::plan_dispatcher().registerAll(__PLAN_F__, false); \
__OP_NAME__::run_dispatcher().registerAll(__RUN_F__, false); \
__OP_NAME__::cleanup_dispatcher().registerAll(__CLEANUP_F__, false); \
return true; \
}();
include/infinicore/nn/linear.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "../ops.hpp"
#include "../ops.hpp"
#include "../quantization.hpp"
#include "module.hpp"
#include "module.hpp"
#include <infiniccl.h>
#include <infiniccl.h>
#include <optional>
namespace
infinicore
::
nn
{
namespace
infinicore
::
nn
{
...
@@ -11,6 +13,9 @@ public:
...
@@ -11,6 +13,9 @@ public:
BaseLinear
(
size_t
in_features
,
size_t
out_features
,
bool
bias
=
true
,
BaseLinear
(
size_t
in_features
,
size_t
out_features
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
BaseLinear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
// Forward pass: output = input @ weight.T + bias
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
Tensor
forward
(
Tensor
&
input
)
const
;
...
@@ -27,12 +32,17 @@ public:
...
@@ -27,12 +32,17 @@ public:
// Accessors for parameters
// Accessors for parameters
Tensor
weight
()
const
{
return
weight_
;
}
Tensor
weight
()
const
{
return
weight_
;
}
Tensor
bias
()
const
{
return
bias_
;
}
Tensor
bias
()
const
{
return
bias_
;
}
Tensor
weight_scale
()
const
{
return
weight_scale_
;
}
Tensor
weight_zeros
()
const
{
return
weight_zeros_
;
}
protected:
protected:
// Parameters
// Parameters
INFINICORE_NN_PARAMETER
(
weight
);
INFINICORE_NN_PARAMETER
(
weight
);
INFINICORE_NN_PARAMETER
(
bias
);
INFINICORE_NN_PARAMETER
(
bias
);
INFINICORE_NN_PARAMETER
(
weight_scale
);
INFINICORE_NN_PARAMETER
(
weight_zeros
);
protected:
protected:
// Helper method for common forward computation
// Helper method for common forward computation
Tensor
compute_linear
(
Tensor
&
input
)
const
;
Tensor
compute_linear
(
Tensor
&
input
)
const
;
...
@@ -41,6 +51,7 @@ protected:
...
@@ -41,6 +51,7 @@ protected:
size_t
out_features_
;
size_t
out_features_
;
bool
has_bias_
;
bool
has_bias_
;
DataType
dtype_
;
DataType
dtype_
;
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization_
=
std
::
make_shared
<
infinicore
::
quantization
::
NoneQuantization
>
(
nullptr
);
};
};
}
// namespace infinicore::nn
}
// namespace infinicore::nn
...
@@ -52,6 +63,9 @@ public:
...
@@ -52,6 +63,9 @@ public:
Linear
(
size_t
in_features
,
size_t
out_features
,
bool
bias
=
true
,
Linear
(
size_t
in_features
,
size_t
out_features
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
Linear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
// Forward pass: output = input @ weight.T + bias
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
Tensor
forward
(
Tensor
&
input
)
const
;
...
@@ -65,6 +79,10 @@ public:
...
@@ -65,6 +79,10 @@ public:
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
);
Size
tp_rank
=
0
,
Size
tp_size
=
1
);
ColumnParallelLinear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
);
// Forward pass: output = input @ weight.T + bias
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
Tensor
forward
(
Tensor
&
input
)
const
;
...
@@ -82,6 +100,10 @@ public:
...
@@ -82,6 +100,10 @@ public:
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
,
infinicclComm_t
communicator
=
nullptr
);
Size
tp_rank
=
0
,
Size
tp_size
=
1
,
infinicclComm_t
communicator
=
nullptr
);
RowParallelLinear
(
size_t
in_features
,
size_t
out_features
,
std
::
shared_ptr
<
infinicore
::
quantization
::
BaseQuantization
>
quantization
,
bool
bias
=
true
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
(),
Size
tp_rank
=
0
,
Size
tp_size
=
1
,
infinicclComm_t
communicator
=
nullptr
);
// Forward pass: output = input @ weight.T + bias
// Forward pass: output = input @ weight.T + bias
Tensor
forward
(
Tensor
&
input
)
const
;
Tensor
forward
(
Tensor
&
input
)
const
;
...
...
include/infinicore/nn/rmsnorm.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "module.hpp"
#include "../ops.hpp"
#include "../ops.hpp"
#include "module.hpp"
namespace
infinicore
::
nn
{
namespace
infinicore
::
nn
{
...
@@ -57,6 +57,21 @@ public:
...
@@ -57,6 +57,21 @@ public:
*/
*/
Tensor
forward
(
const
Tensor
&
x
)
const
;
Tensor
forward
(
const
Tensor
&
x
)
const
;
/**
* @brief Forward pass: apply RMSNorm in-place with residual
*
* @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions.
* Will be modified in-place to the normalized output.
* @param residual Residual tensor to add to input before normalization.
* Will be modified in-place to the sum of input and residual.
*
* The normalization is applied over the last dimension.
* For example:
* Input: [batch, seq_len, hidden_size] -> normalize over hidden_size
* Input: [batch, hidden_size] -> normalize over hidden_size
*/
void
forward_inplace
(
Tensor
&
x
,
Tensor
&
residual
)
const
;
// Module information
// Module information
size_t
normalized_shape
()
const
{
return
normalized_shape_
;
}
size_t
normalized_shape
()
const
{
return
normalized_shape_
;
}
double
eps
()
const
{
return
eps_
;
}
double
eps
()
const
{
return
eps_
;
}
...
@@ -73,9 +88,9 @@ protected:
...
@@ -73,9 +88,9 @@ protected:
INFINICORE_NN_PARAMETER
(
weight
);
INFINICORE_NN_PARAMETER
(
weight
);
private:
private:
size_t
normalized_shape_
;
// Size of the feature dimension
size_t
normalized_shape_
;
// Size of the feature dimension
double
eps_
;
// Epsilon for numerical stability
double
eps_
;
// Epsilon for numerical stability
DataType
dtype_
;
// Data type for weight
DataType
dtype_
;
// Data type for weight
};
};
}
// namespace infinicore::nn
}
// namespace infinicore::nn
include/infinicore/nn/rope.hpp
View file @
8d09630a
...
@@ -17,6 +17,47 @@ public:
...
@@ -17,6 +17,47 @@ public:
GPT_NEOX
=
1
,
// GPT-NeoX style RoPE algorithm (First half dimensions for sin, second half for cos)
GPT_NEOX
=
1
,
// GPT-NeoX style RoPE algorithm (First half dimensions for sin, second half for cos)
};
};
enum
class
ScalingType
{
DEFAULT
=
0
,
// Default RoPE
LONGROPE
=
1
// Long-RoPE
};
class
ScalingConfig
{
public:
virtual
~
ScalingConfig
()
=
default
;
ScalingType
type
()
const
{
return
type_
;
}
protected:
ScalingType
type_
=
ScalingType
::
DEFAULT
;
ScalingConfig
(
ScalingType
type
)
:
type_
(
type
)
{}
};
// longrope scaling
class
LongRopeConfig
:
public
ScalingConfig
{
protected:
std
::
vector
<
float
>
short_factor_
;
std
::
vector
<
float
>
long_factor_
;
size_t
original_max_position_embeddings_
;
float
factor_
;
public:
LongRopeConfig
(
std
::
vector
<
float
>
short_factor
,
std
::
vector
<
float
>
long_factor
,
size_t
original_max_position_embeddings
,
float
factor
=
1.0
f
)
:
ScalingConfig
(
ScalingType
::
LONGROPE
),
short_factor_
(
short_factor
),
long_factor_
(
long_factor
),
original_max_position_embeddings_
(
original_max_position_embeddings
),
factor_
(
factor
==
1.0
f
?
1.0
f
:
std
::
sqrt
(
1
+
std
::
log
(
factor
)
/
std
::
log
(
original_max_position_embeddings
)))
{}
~
LongRopeConfig
()
override
=
default
;
size_t
original_max_position_embeddings
()
const
{
return
original_max_position_embeddings_
;
}
const
std
::
vector
<
float
>
&
short_factor
()
const
{
return
short_factor_
;
}
const
std
::
vector
<
float
>
&
long_factor
()
const
{
return
long_factor_
;
}
float
factor
()
const
{
return
factor_
;
}
};
/**
/**
* @brief Construct a RoPE layer
* @brief Construct a RoPE layer
*
*
...
@@ -26,13 +67,15 @@ public:
...
@@ -26,13 +67,15 @@ public:
* @param algo RoPE algorithm type (default: Algo::GPT_J)
* @param algo RoPE algorithm type (default: Algo::GPT_J)
* @param dtype Data type for sin/cos cache (default: DataType::F32)
* @param dtype Data type for sin/cos cache (default: DataType::F32)
* @param device Device to create the cache on
* @param device Device to create the cache on
* @param scaling RoPE scaling type (default: nullptr)
*/
*/
RoPE
(
size_t
head_dim
,
RoPE
(
size_t
head_dim
,
size_t
max_seq_len
,
size_t
max_seq_len
,
double
theta
=
10000.0
,
double
theta
=
10000.0
,
Algo
algo
=
Algo
::
GPT_J
,
Algo
algo
=
Algo
::
GPT_J
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
DataType
&
dtype
=
DataType
::
F32
,
const
Device
&
device
=
Device
());
const
Device
&
device
=
Device
(),
std
::
shared_ptr
<
ScalingConfig
>
scaling
=
nullptr
);
/**
/**
* @brief Forward pass: apply RoPE to a tensor
* @brief Forward pass: apply RoPE to a tensor
...
@@ -88,11 +131,12 @@ protected:
...
@@ -88,11 +131,12 @@ protected:
private:
private:
void
initialize_cache
();
void
initialize_cache
();
size_t
head_dim_
;
// Dimension of each attention head
size_t
head_dim_
;
// Dimension of each attention head
size_t
max_seq_len_
;
// Maximum sequence length
size_t
max_seq_len_
;
// Maximum sequence length
double
theta_
;
// Base frequency for rotary embeddings
double
theta_
;
// Base frequency for rotary embeddings
Algo
algo_
;
// RoPE algorithm type
Algo
algo_
;
// RoPE algorithm type
DataType
dtype_
;
// Data type for cache tables
DataType
dtype_
;
// Data type for cache tables
std
::
shared_ptr
<
ScalingConfig
>
scaling_
;
// RoPE scaling type
};
};
}
// namespace infinicore::nn
}
// namespace infinicore::nn
include/infinicore/ops.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "ops/add.hpp"
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/attention.hpp"
#include "ops/attention.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/kv_caching.hpp"
#include "ops/matmul.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
#include "ops/paged_attention_prefill.hpp"
#include "ops/paged_caching.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/rearrange.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/silu.hpp"
#include "ops/silu_and_mul.hpp"
#include "ops/swiglu.hpp"
#include "ops/swiglu.hpp"
include/infinicore/ops/add.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "../device.hpp"
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
namespace
infinicore
::
op
{
class
Add
{
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
,
Tensor
);
static
void
execute
(
Tensor
c
,
Tensor
a
,
Tensor
b
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
};
Tensor
add
(
Tensor
a
,
Tensor
b
);
INFINICORE_GRAPH_OP_CLASS
(
Add
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
);
void
add_
(
Tensor
c
,
Tensor
a
,
Tensor
b
);
Tensor
operator
+
(
Tensor
a
,
Tensor
b
);
Tensor
add
(
const
Tensor
&
a
,
const
Tensor
&
b
);
void
add_
(
Tensor
c
,
const
Tensor
&
a
,
const
Tensor
&
b
);
}
// namespace infinicore::op
}
// namespace infinicore::op
include/infinicore/ops/add_rms_norm.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <utility>
namespace
infinicore
::
op
{
INFINICORE_GRAPH_OP_CLASS
(
AddRMSNorm
,
Tensor
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
,
float
);
// Fused Add and RMS Normalization
// Returns: (normalized_result, add_result)
// The add_result can be used as residual for subsequent layers
std
::
pair
<
Tensor
,
Tensor
>
add_rms_norm
(
const
Tensor
&
a
,
const
Tensor
&
b
,
const
Tensor
&
weight
,
float
epsilon
=
1e-5
f
);
void
add_rms_norm_
(
Tensor
out
,
Tensor
residual
,
const
Tensor
&
a
,
const
Tensor
&
b
,
const
Tensor
&
weight
,
float
epsilon
=
1e-5
f
);
// Fused Add and RMS Normalization (inplace)
// normalized_result wil be stored in input, add_result will be stored in residual
void
add_rms_norm_inplace
(
Tensor
input
,
Tensor
residual
,
const
Tensor
&
weight
,
float
epsilon
=
1e-5
f
);
}
// namespace infinicore::op
include/infinicore/ops/causal_softmax.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "../device.hpp"
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
namespace
infinicore
::
op
{
class
CausalSoftmax
{
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
);
static
void
execute
(
Tensor
output
,
Tensor
input
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
};
Tensor
causal_softmax
(
Tensor
input
);
INFINICORE_GRAPH_OP_CLASS
(
CausalSoftmax
,
Tensor
,
const
Tensor
&
);
void
causal_softmax_
(
Tensor
output
,
Tensor
input
);
Tensor
causal_softmax
(
const
Tensor
&
input
);
void
causal_softmax_
(
Tensor
output
,
const
Tensor
&
input
);
}
// namespace infinicore::op
}
// namespace infinicore::op
include/infinicore/ops/dequantize_awq.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
namespace
infinicore
::
op
{
INFINICORE_GRAPH_OP_CLASS
(
DequantizeAWQ
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
);
void
dequantize_awq_
(
Tensor
x
,
const
Tensor
&
x_packed
,
const
Tensor
&
x_scale
,
const
Tensor
&
x_zeros
);
}
// namespace infinicore::op
include/infinicore/ops/distributed/allreduce.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../../device.hpp"
#include "../../graph/graph.hpp"
#include "../common/op.hpp"
#include <infiniccl.h>
namespace
infinicore
::
op
::
distributed
{
class
AllReduce
:
public
graph
::
GraphOperator
{
public:
AllReduce
(
Tensor
output
,
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
~
AllReduce
();
void
run
()
const
override
;
static
void
execute
(
Tensor
output
,
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
private:
void
*
planned_meta_
;
};
Tensor
allreduce
(
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
void
allreduce_
(
Tensor
output
,
const
Tensor
&
input
,
infinicclReduceOp_t
op
,
infinicclComm_t
communicator
);
}
// namespace infinicore::op::distributed
include/infinicore/ops/embedding.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
namespace
infinicore
::
op
{
Tensor
embedding
(
Tensor
input
,
Tensor
weight
);
INFINICORE_GRAPH_OP_CLASS
(
Embedding
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
);
void
embedding_
(
Tensor
out
,
Tensor
input
,
Tensor
weight
);
Tensor
embedding
(
const
Tensor
&
input
,
const
Tensor
&
weight
);
void
embedding_
(
Tensor
out
,
const
Tensor
&
input
,
const
Tensor
&
weight
);
}
// namespace infinicore::op
}
// namespace infinicore::op
include/infinicore/ops/flash_attention.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
INFINICORE_GRAPH_OP_CLASS
(
FlashAttention
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
,
const
Tensor
&
,
float
,
bool
);
Tensor
flash_attention
(
const
Tensor
&
q
,
const
Tensor
&
k
,
const
Tensor
&
v
,
const
Tensor
&
total_kv_len
,
float
scale
,
bool
is_causal
);
void
flash_attention_
(
Tensor
out
,
const
Tensor
&
q
,
const
Tensor
&
k
,
const
Tensor
&
v
,
const
Tensor
&
total_kv_len
,
float
scale
,
bool
is_causal
);
}
// namespace infinicore::op
include/infinicore/ops/gemm.hpp
View file @
8d09630a
#pragma once
#pragma once
#include "../device.hpp"
#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"
#include "common/op.hpp"
namespace
infinicore
::
op
{
namespace
infinicore
::
op
{
class
Gemm
{
INFINICORE_GRAPH_OP_CLASS
(
Gemm
,
Tensor
,
const
Tensor
&
,
const
Tensor
&
,
float
,
float
);
public:
using
schema
=
void
(
*
)(
Tensor
,
Tensor
,
Tensor
,
float
,
float
);
static
void
execute
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
static
common
::
OpDispatcher
<
schema
>
&
dispatcher
();
};
Tensor
gemm
(
Tensor
a
,
Tensor
b
,
float
alpha
=
1.0
f
,
float
beta
=
0.0
f
);
Tensor
gemm
(
const
Tensor
&
a
,
const
Tensor
&
b
,
float
alpha
=
1.0
f
,
float
beta
=
0.0
f
);
void
gemm_
(
Tensor
c
,
Tensor
a
,
Tensor
b
,
float
alpha
,
float
beta
);
void
gemm_
(
Tensor
c
,
const
Tensor
&
a
,
const
Tensor
&
b
,
float
alpha
,
float
beta
);
}
// namespace infinicore::op
}
// namespace infinicore::op
Prev
1
2
3
4
5
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment