Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
84fbe5b1
Commit
84fbe5b1
authored
Mar 05, 2026
by
wooway777
Committed by
thatPepe
Mar 06, 2026
Browse files
issue/248 - replace __C with __INFINI_C
parent
fcbf7bfc
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
43 additions
and
43 deletions
+43
-43
include/infinicore_infer/cache.h
include/infinicore_infer/cache.h
+3
-3
include/infinicore_infer/models/deepseek.h
include/infinicore_infer/models/deepseek.h
+8
-8
include/infinicore_infer/models/jiuge.h
include/infinicore_infer/models/jiuge.h
+4
-4
include/infinicore_infer/models/jiuge_awq.h
include/infinicore_infer/models/jiuge_awq.h
+5
-5
include/infinicore_infer/weights_loader.h
include/infinicore_infer/weights_loader.h
+2
-2
src/cache_manager/kvcache.cpp
src/cache_manager/kvcache.cpp
+3
-3
src/dataloader/weights_loader.cpp
src/dataloader/weights_loader.cpp
+1
-1
src/models/deepseek_v3/deepseek_v3.cpp
src/models/deepseek_v3/deepseek_v3.cpp
+4
-4
src/models/deepseek_v3/deepseek_v3_cache.cpp
src/models/deepseek_v3/deepseek_v3_cache.cpp
+2
-2
src/models/deepseek_v3/deepseek_v3_weight.cpp
src/models/deepseek_v3/deepseek_v3_weight.cpp
+2
-2
src/models/jiuge/jiuge.cpp
src/models/jiuge/jiuge.cpp
+4
-4
src/models/jiuge_awq/jiuge_awq.cpp
src/models/jiuge_awq/jiuge_awq.cpp
+4
-4
src/models/jiuge_awq/jiuge_awq_weight.cpp
src/models/jiuge_awq/jiuge_awq_weight.cpp
+1
-1
No files found.
include/infinicore_infer/cache.h
View file @
84fbe5b1
...
...
@@ -3,7 +3,7 @@
#include <infinirt.h>
__C
__export
struct
KVCache
*
createKVCache
(
__
INFINI_
C
__export
struct
KVCache
*
createKVCache
(
size_t
nlayers
,
size_t
max_len
,
size_t
nkvh_
,
...
...
@@ -14,8 +14,8 @@ __C __export struct KVCache *createKVCache(
int
*
dev_ids
,
size_t
ndev
);
__C
__export
struct
KVCache
*
duplicateKVCache
(
const
KVCache
*
kv_cache
,
size_t
seq_len
);
__
INFINI_
C
__export
struct
KVCache
*
duplicateKVCache
(
const
KVCache
*
kv_cache
,
size_t
seq_len
);
__C
__export
void
dropKVCache
(
KVCache
*
kv_cache
);
__
INFINI_
C
__export
void
dropKVCache
(
KVCache
*
kv_cache
);
#endif
/* CACHE_H */
include/infinicore_infer/models/deepseek.h
View file @
84fbe5b1
...
...
@@ -103,26 +103,26 @@ typedef struct {
/// @param device 协处理器种类
/// @param ndev 协处理器数量
/// @param dev_ids 协处理器编号,长度为 ndev
__C
__export
struct
DeepSeekV3Model
*
__
INFINI_
C
__export
struct
DeepSeekV3Model
*
createDeepSeekV3Model
(
const
DeepSeekV3Meta
*
,
const
DeepSeekV3Weights
*
);
__C
DeepSeekV3Weights
*
__
INFINI_
C
DeepSeekV3Weights
*
createDeepSeekV3Weights
(
const
DeepSeekV3Meta
*
meta
,
infiniDevice_t
device
,
int
ndev
,
const
int
*
dev_ids
);
__C
__export
DeepSeekV3WeightLoader
*
__
INFINI_
C
__export
DeepSeekV3WeightLoader
*
createDeepSeekV3WeightLoader
();
/// @brief 销毁模型
__C
__export
void
destroyDeepSeekV3Model
(
struct
DeepSeekV3Model
*
);
__
INFINI_
C
__export
void
destroyDeepSeekV3Model
(
struct
DeepSeekV3Model
*
);
__C
__export
struct
DeepSeekV3Cache
*
__
INFINI_
C
__export
struct
DeepSeekV3Cache
*
createDeepSeekV3Cache
(
const
struct
DeepSeekV3Model
*
);
__C
__export
void
__
INFINI_
C
__export
void
dropDeepSeekV3Cache
(
const
struct
DeepSeekV3Model
*
,
struct
DeepSeekV3Cache
*
);
...
...
@@ -137,7 +137,7 @@ dropDeepSeekV3Cache(const struct DeepSeekV3Model *,
/// @param topk 采样 topk(1 表示贪心采样)
/// @param topp 采样 topp
/// @param output 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
inferBatchDeepSeekV3
(
struct
DeepSeekV3Model
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -153,7 +153,7 @@ inferBatchDeepSeekV3(struct DeepSeekV3Model *,
/// @param req_pos 每个请求的起始位置
/// @param kv_caches 每个请求的 KV Cache
/// @param logits 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
forwardBatchDeepSeekV3
(
struct
DeepSeekV3Model
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
include/infinicore_infer/models/jiuge.h
View file @
84fbe5b1
...
...
@@ -54,7 +54,7 @@ typedef struct
/// @param device 协处理器种类
/// @param ndev 协处理器数量
/// @param dev_ids 协处理器编号,长度为 ndev
__C
__export
struct
JiugeModel
*
__
INFINI_
C
__export
struct
JiugeModel
*
createJiugeModel
(
const
JiugeMeta
*
,
const
JiugeWeights
*
,
infiniDevice_t
device
,
...
...
@@ -62,7 +62,7 @@ createJiugeModel(const JiugeMeta *,
const
int
*
dev_ids
);
/// @brief 销毁模型
__C
__export
void
__
INFINI_
C
__export
void
destroyJiugeModel
(
struct
JiugeModel
*
);
/// @brief 批次推理一轮,并采样出新的 token
...
...
@@ -76,7 +76,7 @@ destroyJiugeModel(struct JiugeModel *);
/// @param topk 采样 topk(1 表示贪心采样)
/// @param topp 采样 topp
/// @param output 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
inferBatchJiuge
(
struct
JiugeModel
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -92,7 +92,7 @@ inferBatchJiuge(struct JiugeModel *,
/// @param req_pos 每个请求的起始位置
/// @param kv_caches 每个请求的 KV Cache
/// @param logits 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
forwardBatchJiuge
(
struct
JiugeModel
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
include/infinicore_infer/models/jiuge_awq.h
View file @
84fbe5b1
...
...
@@ -25,7 +25,7 @@ typedef struct
}
JiugeAWQMeta
;
//////////////////// APIs ///////////////////////
__C
__export
struct
ModelWeights
*
__
INFINI_
C
__export
struct
ModelWeights
*
createJiugeAWQWeights
(
const
JiugeAWQMeta
*
,
infiniDevice_t
device
,
int
ndev
,
...
...
@@ -34,12 +34,12 @@ createJiugeAWQWeights(const JiugeAWQMeta *,
/// @param device 协处理器种类
/// @param ndev 协处理器数量
/// @param dev_ids 协处理器编号,长度为 ndev
__C
__export
struct
JiugeAWQModel
*
__
INFINI_
C
__export
struct
JiugeAWQModel
*
createJiugeAWQModel
(
const
JiugeAWQMeta
*
,
const
ModelWeights
*
);
/// @brief 销毁模型
__C
__export
void
__
INFINI_
C
__export
void
destroyJiugeAWQModel
(
struct
JiugeAWQModel
*
);
/// @brief 批次推理一轮,并采样出新的 token
...
...
@@ -53,7 +53,7 @@ destroyJiugeAWQModel(struct JiugeAWQModel *);
/// @param topk 采样 topk(1 表示贪心采样)
/// @param topp 采样 topp
/// @param output 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
inferBatchJiugeAWQ
(
struct
JiugeAWQModel
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -69,7 +69,7 @@ inferBatchJiugeAWQ(struct JiugeAWQModel *,
/// @param req_pos 每个请求的起始位置
/// @param kv_caches 每个请求的 KV Cache
/// @param logits 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
forwardBatchJiugeAWQ
(
struct
JiugeAWQModel
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
include/infinicore_infer/weights_loader.h
View file @
84fbe5b1
...
...
@@ -5,10 +5,10 @@
struct
ModelWeights
;
__C
__export
void
__
INFINI_
C
__export
void
loadModelWeight
(
struct
ModelWeights
*
weights
,
const
char
*
name
,
void
*
data
);
__C
__export
void
__
INFINI_
C
__export
void
loadModelWeightDistributed
(
struct
ModelWeights
*
weights
,
const
char
*
name
,
void
*
data
,
int
*
ranks
,
int
nrank
);
#endif // WEIGHTS_LOADER_H
src/cache_manager/kvcache.cpp
View file @
84fbe5b1
#include "../cache.hpp"
__C
struct
KVCache
*
createKVCache
(
__
INFINI_
C
struct
KVCache
*
createKVCache
(
size_t
nlayers
,
size_t
max_len
,
size_t
nkvh_
,
...
...
@@ -31,7 +31,7 @@ __C struct KVCache *createKVCache(
return
cache
;
}
__C
struct
KVCache
*
duplicateKVCache
(
const
KVCache
*
kv_cache
,
size_t
seq_len
)
{
__
INFINI_
C
struct
KVCache
*
duplicateKVCache
(
const
KVCache
*
kv_cache
,
size_t
seq_len
)
{
auto
ndev
=
kv_cache
->
k
.
size
();
auto
nlayers
=
kv_cache
->
k
[
0
].
size
();
auto
device
=
kv_cache
->
k
[
0
][
0
]
->
deviceType
();
...
...
@@ -65,7 +65,7 @@ __C struct KVCache *duplicateKVCache(const KVCache *kv_cache, size_t seq_len) {
return
new_kv_cache
;
}
__C
void
dropKVCache
(
KVCache
*
kv_cache
)
{
__
INFINI_
C
void
dropKVCache
(
KVCache
*
kv_cache
)
{
auto
ndev
=
kv_cache
->
k
.
size
();
auto
nlayers
=
kv_cache
->
k
[
0
].
size
();
auto
device
=
kv_cache
->
k
[
0
][
0
]
->
deviceType
();
...
...
src/dataloader/weights_loader.cpp
View file @
84fbe5b1
...
...
@@ -78,7 +78,7 @@ std::shared_ptr<Tensor> Loader::get(const std::string &name, int rank) {
}
// namespace infinicore::weights
__C
void
__
INFINI_
C
void
loadModelWeight
(
struct
ModelWeights
*
weights_
,
const
char
*
name
,
void
*
data
)
{
std
::
string
name_str
(
name
);
auto
weights
=
reinterpret_cast
<
infinicore
::
weights
::
Loader
*>
(
weights_
);
...
...
src/models/deepseek_v3/deepseek_v3.cpp
View file @
84fbe5b1
...
...
@@ -431,7 +431,7 @@ void inferDeviceBatch(const DeepSeekV3Meta &meta, DeepSeekV3DeviceResource &rsrc
}
}
__C
void
__
INFINI_
C
void
inferBatchDeepSeekV3
(
struct
DeepSeekV3Model
*
model
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -464,7 +464,7 @@ inferBatchDeepSeekV3(struct DeepSeekV3Model *model,
}
}
__C
void
__
INFINI_
C
void
forwardBatchDeepSeekV3
(
struct
DeepSeekV3Model
*
model
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -563,14 +563,14 @@ DeepSeekV3Model::DeepSeekV3Model(const DeepSeekV3Meta *_meta, const DeepSeekV3We
}
}
__C
struct
DeepSeekV3Model
*
__
INFINI_
C
struct
DeepSeekV3Model
*
createDeepSeekV3Model
(
const
DeepSeekV3Meta
*
_meta
,
const
DeepSeekV3Weights
*
weights
)
{
DeepSeekV3Model
*
model
=
new
DeepSeekV3Model
(
_meta
,
weights
);
return
model
;
}
__C
void
__
INFINI_
C
void
destroyDeepSeekV3Model
(
struct
DeepSeekV3Model
*
model
)
{
auto
ndev
=
model
->
dev_resources
.
size
();
...
...
src/models/deepseek_v3/deepseek_v3_cache.cpp
View file @
84fbe5b1
#include "deepseek_v3_impl.hpp"
__C
struct
DeepSeekV3Cache
*
__
INFINI_
C
struct
DeepSeekV3Cache
*
createDeepSeekV3Cache
(
const
struct
DeepSeekV3Model
*
model
)
{
DeepSeekV3Cache
*
cache
=
new
DeepSeekV3Cache
();
auto
ndev
=
model
->
dev_resources
.
size
();
...
...
@@ -25,7 +25,7 @@ createDeepSeekV3Cache(const struct DeepSeekV3Model *model) {
return
cache
;
}
__C
void
__
INFINI_
C
void
dropDeepSeekV3Cache
(
const
struct
DeepSeekV3Model
*
model
,
struct
DeepSeekV3Cache
*
cache
)
{
auto
ndev
=
model
->
dev_resources
.
size
();
...
...
src/models/deepseek_v3/deepseek_v3_weight.cpp
View file @
84fbe5b1
...
...
@@ -436,7 +436,7 @@ static DeepSeekV3WeightLoader weight_loader = {
.
load_mlp_experts
=
load_mlp_experts
,
};
__C
DeepSeekV3Weights
*
__
INFINI_
C
DeepSeekV3Weights
*
createDeepSeekV3Weights
(
const
DeepSeekV3Meta
*
meta
,
infiniDevice_t
device
,
int
ndev
,
...
...
@@ -445,7 +445,7 @@ createDeepSeekV3Weights(const DeepSeekV3Meta *meta,
return
weights
;
};
__C
DeepSeekV3WeightLoader
*
__
INFINI_
C
DeepSeekV3WeightLoader
*
createDeepSeekV3WeightLoader
()
{
return
&
weight_loader
;
}
src/models/jiuge/jiuge.cpp
View file @
84fbe5b1
...
...
@@ -315,7 +315,7 @@ void inferDeviceBatch(const JiugeMeta &meta, JiugeDeviceResource &rsrc,
}
}
__C
void
__
INFINI_
C
void
inferBatchJiuge
(
struct
JiugeModel
*
model
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -348,7 +348,7 @@ inferBatchJiuge(struct JiugeModel *model,
}
}
__C
void
__
INFINI_
C
void
forwardBatchJiuge
(
struct
JiugeModel
*
model
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -444,7 +444,7 @@ JiugeModel::JiugeModel(const JiugeMeta *_meta, const JiugeWeights *weights, infi
}
}
__C
struct
JiugeModel
*
__
INFINI_
C
struct
JiugeModel
*
createJiugeModel
(
const
JiugeMeta
*
meta
,
const
JiugeWeights
*
weights
,
infiniDevice_t
device
,
...
...
@@ -456,7 +456,7 @@ createJiugeModel(const JiugeMeta *meta,
return
model
;
}
__C
void
destroyJiugeModel
(
struct
JiugeModel
*
model
)
{
__
INFINI_
C
void
destroyJiugeModel
(
struct
JiugeModel
*
model
)
{
auto
ndev
=
model
->
dev_resources
.
size
();
for
(
size_t
idev
=
0
;
idev
<
ndev
;
idev
++
)
{
...
...
src/models/jiuge_awq/jiuge_awq.cpp
View file @
84fbe5b1
...
...
@@ -242,7 +242,7 @@ void inferDeviceBatch(const JiugeAWQMeta *meta, DeviceResource &rsrc,
}
}
__C
void
__
INFINI_
C
void
inferBatchJiugeAWQ
(
struct
JiugeAWQModel
*
model
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -275,7 +275,7 @@ inferBatchJiugeAWQ(struct JiugeAWQModel *model,
}
}
__C
void
__
INFINI_
C
void
forwardBatchJiugeAWQ
(
struct
JiugeAWQModel
*
model
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
...
@@ -372,14 +372,14 @@ JiugeAWQModel::JiugeAWQModel(const JiugeAWQMeta *meta, const ModelWeights *weigh
}
}
__C
struct
JiugeAWQModel
*
__
INFINI_
C
struct
JiugeAWQModel
*
createJiugeAWQModel
(
const
JiugeAWQMeta
*
meta
,
const
ModelWeights
*
weights
)
{
JiugeAWQModel
*
model
=
new
JiugeAWQModel
(
meta
,
weights
);
return
model
;
}
__C
void
destroyJiugeAWQModel
(
struct
JiugeAWQModel
*
model
)
{
__
INFINI_
C
void
destroyJiugeAWQModel
(
struct
JiugeAWQModel
*
model
)
{
auto
ndev
=
model
->
dev_resources
.
size
();
for
(
size_t
idev
=
0
;
idev
<
ndev
;
idev
++
)
{
...
...
src/models/jiuge_awq/jiuge_awq_weight.cpp
View file @
84fbe5b1
...
...
@@ -118,7 +118,7 @@ JiugeAWQWeights::JiugeAWQWeights(
#undef REGISTER_LAYER_QUANT_WEIGHT
}
__C
struct
ModelWeights
*
__
INFINI_
C
struct
ModelWeights
*
createJiugeAWQWeights
(
const
JiugeAWQMeta
*
meta
,
infiniDevice_t
device
,
int
ndev
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment