Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
5540d53a
Commit
5540d53a
authored
May 21, 2025
by
PanZezhong
Browse files
Add workspace allocator
parent
967bcb64
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
179 additions
and
67 deletions
+179
-67
include/infinicore_infer/models/jiuge.h
include/infinicore_infer/models/jiuge.h
+2
-1
scripts/jiuge.py
scripts/jiuge.py
+39
-22
scripts/libinfinicore_infer.py
scripts/libinfinicore_infer.py
+2
-2
src/allocator.hpp
src/allocator.hpp
+26
-0
src/allocator/workspace_allocator.cpp
src/allocator/workspace_allocator.cpp
+41
-0
src/models/jiuge/jiuge.cpp
src/models/jiuge/jiuge.cpp
+28
-22
src/models/jiuge/jiuge_impl.hpp
src/models/jiuge/jiuge_impl.hpp
+4
-0
src/models/jiuge/jiuge_kv_cache.cpp
src/models/jiuge/jiuge_kv_cache.cpp
+2
-2
src/models/jiuge/jiuge_weight.hpp
src/models/jiuge/jiuge_weight.hpp
+34
-18
xmake.lua
xmake.lua
+1
-0
No files found.
include/infinicore_infer/models/jiuge.h
View file @
5540d53a
...
@@ -11,7 +11,7 @@ struct JiugeModel;
...
@@ -11,7 +11,7 @@ struct JiugeModel;
typedef
struct
typedef
struct
{
{
infiniDtype_t
dt_logits
,
dt_norm
,
dt_mat
;
infiniDtype_t
dt_logits
;
size_t
nlayer
,
d
,
nh
,
nkvh
,
dh
,
di
,
dctx
,
dvoc
;
size_t
nlayer
,
d
,
nh
,
nkvh
,
dh
,
di
,
dctx
,
dvoc
;
float
epsilon
,
theta
;
float
epsilon
,
theta
;
uint32_t
end_token
;
uint32_t
end_token
;
...
@@ -20,6 +20,7 @@ typedef struct
...
@@ -20,6 +20,7 @@ typedef struct
typedef
struct
typedef
struct
{
{
size_t
nlayer
;
size_t
nlayer
;
infiniDtype_t
dt_norm
,
dt_mat
;
// [dvoc, d]
// [dvoc, d]
const
void
*
input_embd
;
const
void
*
input_embd
;
// [d]
// [d]
...
...
scripts/jiuge.py
View file @
5540d53a
...
@@ -73,11 +73,15 @@ class LlamaWeightsNaming:
...
@@ -73,11 +73,15 @@ class LlamaWeightsNaming:
class
JiugeMetaFromLlama
(
JiugeMeta
):
class
JiugeMetaFromLlama
(
JiugeMeta
):
def
__init__
(
self
,
config
,
infini_dtype
):
def
__init__
(
self
,
config
,
dtype
=
torch
.
float16
):
if
dtype
==
torch
.
float16
:
dt_
=
DataType
.
INFINI_DTYPE_F16
elif
dtype
==
torch
.
float32
:
dt_
=
DataType
.
INFINI_DTYPE_F32
else
:
dt_
=
DataType
.
INFINI_DTYPE_F16
super
().
__init__
(
super
().
__init__
(
dt_logits
=
infini_dtype
,
dt_logits
=
dt_
,
dt_norm
=
infini_dtype
,
dt_mat
=
infini_dtype
,
nlayer
=
config
.
num_hidden_layers
,
nlayer
=
config
.
num_hidden_layers
,
d
=
config
.
hidden_size
,
d
=
config
.
hidden_size
,
nh
=
config
.
num_attention_heads
,
nh
=
config
.
num_attention_heads
,
...
@@ -94,10 +98,11 @@ class JiugeMetaFromLlama(JiugeMeta):
...
@@ -94,10 +98,11 @@ class JiugeMetaFromLlama(JiugeMeta):
theta
=
config
.
rope_theta
,
theta
=
config
.
rope_theta
,
end_token
=
2
,
end_token
=
2
,
)
)
self
.
torch_dtype_logits
=
dtype
class
JiugeWeightsImpl
(
JiugeWeights
):
class
JiugeWeightsImpl
(
JiugeWeights
):
def
__init__
(
self
,
meta
,
naming
,
state_dict
,
ndev
=
1
):
def
__init__
(
self
,
meta
,
naming
,
state_dict
,
torch_dt_mat
=
torch
.
float16
,
torch_dt_norm
=
torch
.
float32
,
ndev
=
1
):
nlayer
=
meta
.
nlayer
nlayer
=
meta
.
nlayer
nh
=
meta
.
nh
nh
=
meta
.
nh
nkvh
=
meta
.
nkvh
nkvh
=
meta
.
nkvh
...
@@ -108,17 +113,30 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -108,17 +113,30 @@ class JiugeWeightsImpl(JiugeWeights):
assert
nh
%
ndev
==
0
assert
nh
%
ndev
==
0
assert
nkvh
%
ndev
==
0
assert
nkvh
%
ndev
==
0
assert
di
%
ndev
==
0
assert
di
%
ndev
==
0
torch_dt_logits
=
meta
.
torch_dtype_logits
if
torch_dt_mat
==
torch
.
float16
:
self
.
dt_mat
=
DataType
.
INFINI_DTYPE_F16
elif
torch_dt_mat
==
torch
.
float32
:
self
.
dt_mat
=
DataType
.
INFINI_DTYPE_F32
else
:
raise
ValueError
(
"Unsupported proj weight data type"
)
if
torch_dt_norm
==
torch
.
float16
:
self
.
dt_norm
=
DataType
.
INFINI_DTYPE_F16
elif
torch_dt_norm
==
torch
.
float32
:
self
.
dt_norm
=
DataType
.
INFINI_DTYPE_F32
else
:
raise
ValueError
(
"Unsupported norm weight data type"
)
self
.
nlayer
=
nlayer
self
.
nlayer
=
nlayer
self
.
input_embd_tensor
=
state_dict
[
naming
.
input_embd
()]
self
.
input_embd_tensor
=
state_dict
[
naming
.
input_embd
()]
.
to
(
torch_dt_logits
)
self
.
input_embd
=
self
.
input_embd_tensor
.
data_ptr
()
self
.
input_embd
=
self
.
input_embd_tensor
.
data_ptr
()
self
.
output_norm_tensor
=
state_dict
[
naming
.
output_norm
()]
self
.
output_norm_tensor
=
state_dict
[
naming
.
output_norm
()]
.
to
(
torch_dt_norm
)
self
.
output_norm
=
self
.
output_norm_tensor
.
data_ptr
()
self
.
output_norm
=
self
.
output_norm_tensor
.
data_ptr
()
self
.
output_embd_tensor
=
state_dict
[
naming
.
output_embd
()]
self
.
output_embd_tensor
=
state_dict
[
naming
.
output_embd
()]
.
to
(
torch_dt_mat
)
self
.
output_embd
=
self
.
output_embd_tensor
.
data_ptr
()
self
.
output_embd
=
self
.
output_embd_tensor
.
data_ptr
()
self
.
attn_norm_tensors
=
[
self
.
attn_norm_tensors
=
[
state_dict
[
naming
.
attn_norm
(
i
)]
for
i
in
range
(
nlayer
)
state_dict
[
naming
.
attn_norm
(
i
)]
.
to
(
torch_dt_norm
)
for
i
in
range
(
nlayer
)
]
]
self
.
attn_norm_ptrs
=
[
self
.
attn_norm_ptrs
=
[
self
.
attn_norm_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)
self
.
attn_norm_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)
...
@@ -146,7 +164,7 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -146,7 +164,7 @@ class JiugeWeightsImpl(JiugeWeights):
_result
.
append
(
_V
[
_idev
*
_nkvh
:
(
_idev
+
1
)
*
_nkvh
,
:,
:])
_result
.
append
(
_V
[
_idev
*
_nkvh
:
(
_idev
+
1
)
*
_nkvh
,
:,
:])
return
_result
return
_result
self
.
qkv_tensor
=
[
torch
.
concat
(
qkv_slices
(
i
))
for
i
in
range
(
nlayer
)]
self
.
qkv_tensor
=
[
torch
.
concat
(
qkv_slices
(
i
))
.
to
(
torch_dt_mat
)
for
i
in
range
(
nlayer
)]
self
.
qkv_tensor_ptrs
=
[
self
.
qkv_tensor
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)]
self
.
qkv_tensor_ptrs
=
[
self
.
qkv_tensor
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)]
self
.
attn_qkv
=
(
c_void_p
*
nlayer
)(
*
self
.
qkv_tensor_ptrs
)
self
.
attn_qkv
=
(
c_void_p
*
nlayer
)(
*
self
.
qkv_tensor_ptrs
)
...
@@ -172,7 +190,7 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -172,7 +190,7 @@ class JiugeWeightsImpl(JiugeWeights):
return
_result
return
_result
if
naming
.
attn_q_b
(
0
)
in
state_dict
:
if
naming
.
attn_q_b
(
0
)
in
state_dict
:
self
.
qkv_b_tensors
=
[
torch
.
concat
(
qkv_b_slices
(
i
))
for
i
in
range
(
nlayer
)]
self
.
qkv_b_tensors
=
[
torch
.
concat
(
qkv_b_slices
(
i
))
.
to
(
torch_dt_logits
)
for
i
in
range
(
nlayer
)]
self
.
qkv_b_tensor_ptrs
=
[
self
.
qkv_b_tensor_ptrs
=
[
self
.
qkv_b_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)
self
.
qkv_b_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)
]
]
...
@@ -181,7 +199,7 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -181,7 +199,7 @@ class JiugeWeightsImpl(JiugeWeights):
self
.
attn_qkv_b
=
None
self
.
attn_qkv_b
=
None
self
.
attn_o_tensor
=
[
self
.
attn_o_tensor
=
[
state_dict
[
naming
.
attn_o
(
i
)]
state_dict
[
naming
.
attn_o
(
i
)]
.
to
(
torch_dt_mat
)
.
reshape
([
d
,
ndev
,
nh
//
ndev
*
dh
])
.
reshape
([
d
,
ndev
,
nh
//
ndev
*
dh
])
.
transpose
(
0
,
1
)
.
transpose
(
0
,
1
)
.
contiguous
()
.
contiguous
()
...
@@ -190,7 +208,7 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -190,7 +208,7 @@ class JiugeWeightsImpl(JiugeWeights):
self
.
attn_o_ptrs
=
[
self
.
attn_o_tensor
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)]
self
.
attn_o_ptrs
=
[
self
.
attn_o_tensor
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)]
self
.
attn_o
=
(
c_void_p
*
nlayer
)(
*
self
.
attn_o_ptrs
)
self
.
attn_o
=
(
c_void_p
*
nlayer
)(
*
self
.
attn_o_ptrs
)
self
.
ffn_norm_tensors
=
[
state_dict
[
naming
.
ffn_norm
(
i
)]
for
i
in
range
(
nlayer
)]
self
.
ffn_norm_tensors
=
[
state_dict
[
naming
.
ffn_norm
(
i
)]
.
to
(
torch_dt_norm
)
for
i
in
range
(
nlayer
)]
self
.
ffn_norm_ptrs
=
[
self
.
ffn_norm_ptrs
=
[
self
.
ffn_norm_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)
self
.
ffn_norm_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)
]
]
...
@@ -206,12 +224,12 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -206,12 +224,12 @@ class JiugeWeightsImpl(JiugeWeights):
_result
.
append
(
state_dict
[
naming
.
up
(
_i
)][
_start
:
_end
,
:])
_result
.
append
(
state_dict
[
naming
.
up
(
_i
)][
_start
:
_end
,
:])
return
_result
return
_result
self
.
gate_up_tensors
=
[
torch
.
concat
(
gate_up_slices
(
i
))
for
i
in
range
(
nlayer
)]
self
.
gate_up_tensors
=
[
torch
.
concat
(
gate_up_slices
(
i
))
.
to
(
torch_dt_mat
)
for
i
in
range
(
nlayer
)]
self
.
gate_up_ptrs
=
[
self
.
gate_up_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)]
self
.
gate_up_ptrs
=
[
self
.
gate_up_tensors
[
i
].
data_ptr
()
for
i
in
range
(
nlayer
)]
self
.
ffn_gate_up
=
(
c_void_p
*
nlayer
)(
*
self
.
gate_up_ptrs
)
self
.
ffn_gate_up
=
(
c_void_p
*
nlayer
)(
*
self
.
gate_up_ptrs
)
self
.
ffn_down_tensor
=
[
self
.
ffn_down_tensor
=
[
state_dict
[
naming
.
down
(
i
)]
state_dict
[
naming
.
down
(
i
)]
.
to
(
torch_dt_mat
)
.
reshape
([
d
,
ndev
,
di
//
ndev
])
.
reshape
([
d
,
ndev
,
di
//
ndev
])
.
transpose
(
0
,
1
)
.
transpose
(
0
,
1
)
.
contiguous
()
.
contiguous
()
...
@@ -223,23 +241,21 @@ class JiugeWeightsImpl(JiugeWeights):
...
@@ -223,23 +241,21 @@ class JiugeWeightsImpl(JiugeWeights):
class
JiugeForCauslLM
:
class
JiugeForCauslLM
:
def
__init__
(
self
,
model_dir_path
,
device
=
DeviceType
.
DEVICE_TYPE_CPU
,
ndev
=
1
):
def
__init__
(
self
,
model_dir_path
,
device
=
DeviceType
.
DEVICE_TYPE_CPU
,
ndev
=
1
):
def
load_all_safetensors_from_dir
(
dir_path_
:
str
,
torch_type
=
torch
.
float16
):
def
load_all_safetensors_from_dir
(
dir_path_
:
str
):
tensors_
=
{}
tensors_
=
{}
dir_path_
=
Path
(
dir_path_
)
dir_path_
=
Path
(
dir_path_
)
for
file
in
sorted
(
dir_path_
.
glob
(
"*.safetensors"
)):
for
file
in
sorted
(
dir_path_
.
glob
(
"*.safetensors"
)):
data_
=
safetensors
.
safe_open
(
file
,
"pt"
)
data_
=
safetensors
.
safe_open
(
file
,
"pt"
)
for
name_
in
data_
.
keys
():
for
name_
in
data_
.
keys
():
tensors_
[
name_
]
=
data_
.
get_tensor
(
name_
)
.
to
(
torch_type
)
tensors_
[
name_
]
=
data_
.
get_tensor
(
name_
)
return
tensors_
return
tensors_
config
=
transformers
.
AutoConfig
.
from_pretrained
(
config
=
transformers
.
AutoConfig
.
from_pretrained
(
model_dir_path
,
trust_remote_code
=
True
model_dir_path
,
trust_remote_code
=
True
)
)
if
"llama"
==
config
.
model_type
:
if
"llama"
==
config
.
model_type
:
model
=
transformers
.
LlamaForCausalLM
.
from_pretrained
(
model_dir_path
).
to
(
model
=
transformers
.
LlamaForCausalLM
.
from_pretrained
(
model_dir_path
).
half
()
torch
.
float16
self
.
meta
=
JiugeMetaFromLlama
(
model
.
config
)
)
self
.
meta
=
JiugeMetaFromLlama
(
model
.
config
,
DataType
.
INFINI_DTYPE_F16
)
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
model_dir_path
)
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
model_dir_path
)
self
.
weights
=
JiugeWeightsImpl
(
self
.
weights
=
JiugeWeightsImpl
(
self
.
meta
,
LlamaWeightsNaming
(),
model
.
state_dict
(),
ndev
=
ndev
self
.
meta
,
LlamaWeightsNaming
(),
model
.
state_dict
(),
ndev
=
ndev
...
@@ -247,7 +263,7 @@ class JiugeForCauslLM:
...
@@ -247,7 +263,7 @@ class JiugeForCauslLM:
elif
"fm9g"
==
config
.
model_type
:
elif
"fm9g"
==
config
.
model_type
:
state_dict
=
load_all_safetensors_from_dir
(
model_dir_path
)
state_dict
=
load_all_safetensors_from_dir
(
model_dir_path
)
if
LlamaWeightsNaming
.
match
(
state_dict
):
if
LlamaWeightsNaming
.
match
(
state_dict
):
self
.
meta
=
JiugeMetaFromLlama
(
config
,
DataType
.
INFINI_DTYPE_F16
)
self
.
meta
=
JiugeMetaFromLlama
(
config
)
self
.
weights
=
JiugeWeightsImpl
(
self
.
weights
=
JiugeWeightsImpl
(
self
.
meta
,
LlamaWeightsNaming
(),
state_dict
,
ndev
=
ndev
self
.
meta
,
LlamaWeightsNaming
(),
state_dict
,
ndev
=
ndev
)
)
...
@@ -308,6 +324,7 @@ class JiugeForCauslLM:
...
@@ -308,6 +324,7 @@ class JiugeForCauslLM:
break
break
output_content
+=
output_str
output_content
+=
output_str
print
(
output_str
,
end
=
""
,
flush
=
True
)
print
(
output_str
,
end
=
""
,
flush
=
True
)
# print(output_tokens[0])
req_pos
[
0
]
=
req_pos
[
0
]
+
ntok
req_pos
[
0
]
=
req_pos
[
0
]
+
ntok
ntok
=
1
ntok
=
1
tokens
=
(
c_uint
*
ntok
)(
*
output_tokens
)
tokens
=
(
c_uint
*
ntok
)(
*
output_tokens
)
...
...
scripts/libinfinicore_infer.py
View file @
5540d53a
...
@@ -38,8 +38,6 @@ class DeviceType(ctypes.c_int):
...
@@ -38,8 +38,6 @@ class DeviceType(ctypes.c_int):
class
JiugeMeta
(
ctypes
.
Structure
):
class
JiugeMeta
(
ctypes
.
Structure
):
_fields_
=
[
_fields_
=
[
(
"dt_logits"
,
DataType
),
(
"dt_logits"
,
DataType
),
(
"dt_norm"
,
DataType
),
(
"dt_mat"
,
DataType
),
(
"nlayer"
,
c_size_t
),
(
"nlayer"
,
c_size_t
),
(
"d"
,
c_size_t
),
(
"d"
,
c_size_t
),
(
"nh"
,
c_size_t
),
(
"nh"
,
c_size_t
),
...
@@ -58,6 +56,8 @@ class JiugeMeta(ctypes.Structure):
...
@@ -58,6 +56,8 @@ class JiugeMeta(ctypes.Structure):
class
JiugeWeights
(
ctypes
.
Structure
):
class
JiugeWeights
(
ctypes
.
Structure
):
_fields_
=
[
_fields_
=
[
(
"nlayer"
,
c_size_t
),
(
"nlayer"
,
c_size_t
),
(
"dt_norm"
,
DataType
),
(
"dt_mat"
,
DataType
),
(
"input_embd"
,
c_void_p
),
(
"input_embd"
,
c_void_p
),
(
"output_norm"
,
c_void_p
),
(
"output_norm"
,
c_void_p
),
(
"output_embd"
,
c_void_p
),
(
"output_embd"
,
c_void_p
),
...
...
src/allocator.hpp
0 → 100644
View file @
5540d53a
#ifndef ALLOCATOR_HPP
#define ALLOCATOR_HPP
#include "infinicore_infer.h"
class
AllocatorBase
{
public:
virtual
void
*
alloc
(
size_t
size
)
=
0
;
virtual
void
release
(
void
*
ptr
)
=
0
;
};
class
WorkspaceAllocator
:
public
AllocatorBase
{
private:
void
*
_memory
;
size_t
_total_size
;
size_t
_used_size
;
size_t
_align
=
256
;
public:
WorkspaceAllocator
(
size_t
intial_size
,
size_t
align
=
256
);
~
WorkspaceAllocator
();
void
*
alloc
(
size_t
size
)
override
;
void
release
(
void
*
ptr
)
override
;
};
#endif
src/allocator/workspace_allocator.cpp
0 → 100644
View file @
5540d53a
#include "../allocator.hpp"
#include "../utils.hpp"
inline
size_t
aligned_size
(
size_t
size_
,
size_t
align
)
{
return
(
size_
+
align
-
1
)
&
~
(
align
-
1
);
}
inline
void
*
allocate
(
size_t
size_
)
{
void
*
ptr
;
RUN_INFINI
(
infinirtMalloc
(
&
ptr
,
size_
));
return
ptr
;
}
WorkspaceAllocator
::
WorkspaceAllocator
(
size_t
initial_size_
,
size_t
align
)
{
_align
=
align
;
if
(
initial_size_
>
0
)
{
_total_size
=
aligned_size
(
initial_size_
,
_align
);
_memory
=
allocate
(
_total_size
);
}
}
void
*
WorkspaceAllocator
::
alloc
(
size_t
new_size
)
{
if
(
_total_size
<
new_size
)
{
if
(
_total_size
!=
0
)
{
RUN_INFINI
(
infinirtFree
(
_memory
));
}
_total_size
=
aligned_size
(
new_size
*
3
/
2
,
_align
);
_memory
=
allocate
(
_total_size
);
}
return
_memory
;
}
void
WorkspaceAllocator
::
release
(
void
*
ptr
)
{
}
WorkspaceAllocator
::~
WorkspaceAllocator
()
{
if
(
_memory
!=
nullptr
)
{
RUN_INFINI
(
infinirtFree
(
_memory
));
}
}
\ No newline at end of file
src/models/jiuge/jiuge.cpp
View file @
5540d53a
...
@@ -31,7 +31,6 @@ void createDeviceResource(DeviceResource *rsrc, const JiugeMeta *meta,
...
@@ -31,7 +31,6 @@ void createDeviceResource(DeviceResource *rsrc, const JiugeMeta *meta,
b_attn_qkv
.
push_back
(
b_attn_qkv
.
push_back
(
getAttnQKVBias
(
meta
,
weights
,
layer
,
idev
,
ndev
));
getAttnQKVBias
(
meta
,
weights
,
layer
,
idev
,
ndev
));
}
}
w_attn_out
.
push_back
(
w_attn_out
.
push_back
(
getAttnO
(
meta
,
weights
,
layer
,
idev
,
ndev
));
getAttnO
(
meta
,
weights
,
layer
,
idev
,
ndev
));
w_ffn_norm
.
push_back
(
w_ffn_norm
.
push_back
(
...
@@ -42,26 +41,29 @@ void createDeviceResource(DeviceResource *rsrc, const JiugeMeta *meta,
...
@@ -42,26 +41,29 @@ void createDeviceResource(DeviceResource *rsrc, const JiugeMeta *meta,
getFFNDown
(
meta
,
weights
,
layer
,
idev
,
ndev
));
getFFNDown
(
meta
,
weights
,
layer
,
idev
,
ndev
));
}
}
*
rsrc
=
DeviceResource
{
device
,
*
rsrc
=
DeviceResource
{
dev_id
,
device
,
handle
,
dev_id
,
getInEmbd
(
meta
,
weights
),
handle
,
getOutNorm
(
meta
,
weights
),
getInEmbd
(
meta
,
weights
),
getOutEmbd
(
meta
,
weights
),
getOutNorm
(
meta
,
weights
),
getSinTable
(
meta
),
getOutEmbd
(
meta
,
weights
),
getCosTable
(
meta
),
getSinTable
(
meta
),
w_attn_norm
,
getCosTable
(
meta
),
w_attn_qkv
,
w_attn_norm
,
b_attn_qkv
,
w_attn_qkv
,
w_attn_out
,
b_attn_qkv
,
w_ffn_norm
,
w_attn_out
,
w_ffn_gate_up
,
w_ffn_norm
,
w_ffn_down
,
w_ffn_gate_up
,
stream
,
w_ffn_down
,
comm
};
stream
,
comm
,
std
::
make_unique
<
WorkspaceAllocator
>
(
0
),
};
}
}
void
inferDeviceBatch
(
const
JiugeMeta
&
meta
,
const
DeviceResource
&
rsrc
,
void
inferDeviceBatch
(
const
JiugeMeta
&
meta
,
DeviceResource
&
rsrc
,
uint32_t
idev
,
uint32_t
ndev
,
uint32_t
idev
,
uint32_t
ndev
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
const
uint32_t
*
req_lens
,
uint32_t
nreq
,
const
uint32_t
*
req_pos
,
...
@@ -75,6 +77,7 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
...
@@ -75,6 +77,7 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
auto
dh
=
meta
.
dh
;
auto
dh
=
meta
.
dh
;
auto
d
=
meta
.
d
;
auto
d
=
meta
.
d
;
auto
dt_logits
=
meta
.
dt_logits
;
auto
dt_logits
=
meta
.
dt_logits
;
// std::cout << "dt_logits: " <<(int)dt_logits << std::endl;
auto
di
=
meta
.
di
/
ndev
;
auto
di
=
meta
.
di
/
ndev
;
auto
dvoc
=
meta
.
dvoc
;
auto
dvoc
=
meta
.
dvoc
;
auto
stream
=
rsrc
.
stream
;
auto
stream
=
rsrc
.
stream
;
...
@@ -215,12 +218,14 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
...
@@ -215,12 +218,14 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
infiniopRandomSampleDescriptor_t
desc_sample
;
infiniopRandomSampleDescriptor_t
desc_sample
;
RUN_INFINI
(
infiniopCreateRandomSampleDescriptor
(
RUN_INFINI
(
infiniopCreateRandomSampleDescriptor
(
rsrc
.
handle
,
&
desc_sample
,
rsrc
.
handle
,
&
desc_sample
,
TensorDesc
::
create
(
INFINI_DTYPE_U
64
,
{},
{})
->
get
(),
TensorDesc
::
create
(
INFINI_DTYPE_U
32
,
{},
{})
->
get
(),
TensorDesc
::
create
(
dt_logits
,
{
dvoc
},
{
1
})
->
get
()));
TensorDesc
::
create
(
dt_logits
,
{
dvoc
},
{
1
})
->
get
()));
RUN_INFINI
(
infiniopGetRandomSampleWorkspaceSize
(
desc_sample
,
&
temp_size
));
RUN_INFINI
(
infiniopGetRandomSampleWorkspaceSize
(
desc_sample
,
&
temp_size
));
workspace_size
=
std
::
max
(
workspace_size
,
temp_size
);
workspace_size
=
std
::
max
(
workspace_size
,
temp_size
);
// Allocate workspace
// Allocate workspace
RUN_INFINI
(
infinirtMallocAsync
(
&
workspace
,
workspace_size
,
stream
));
workspace
=
rsrc
.
workspace_allocator
->
alloc
(
workspace_size
);
// Compute
for
(
uint32_t
layer
=
0
;
layer
<
nlayer
;
layer
++
)
{
for
(
uint32_t
layer
=
0
;
layer
<
nlayer
;
layer
++
)
{
// 1. Attention
// 1. Attention
// rms norm
// rms norm
...
@@ -323,11 +328,13 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
...
@@ -323,11 +328,13 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
for
(
uint32_t
req
=
0
;
req
<
nreq
;
req
++
)
{
for
(
uint32_t
req
=
0
;
req
<
nreq
;
req
++
)
{
auto
seq_len
=
req_lens
[
req
];
auto
seq_len
=
req_lens
[
req
];
float
random_val
=
std
::
uniform_real_distribution
<
float
>
(
0
,
1
)(
gen
);
float
random_val
=
std
::
uniform_real_distribution
<
float
>
(
0
,
1
)(
gen
);
// prob_buf->debug();
RUN_INFINI
(
infiniopRandomSample
(
RUN_INFINI
(
infiniopRandomSample
(
desc_sample
,
workspace
,
workspace_size
,
desc_sample
,
workspace
,
workspace_size
,
result_buf
->
data
(
req
),
result_buf
->
data
(
req
),
prob_buf
->
data
(
req
*
dvoc
),
random_val
,
topp
,
prob_buf
->
data
(
req
*
dvoc
),
random_val
,
topp
,
topk
,
temperature
,
stream
));
topk
,
temperature
,
stream
));
// result_buf->debug();
token_offset
+=
seq_len
;
token_offset
+=
seq_len
;
}
}
RUN_INFINI
(
infinirtStreamSynchronize
(
stream
));
RUN_INFINI
(
infinirtStreamSynchronize
(
stream
));
...
@@ -350,7 +357,6 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
...
@@ -350,7 +357,6 @@ void inferDeviceBatch(const JiugeMeta &meta, const DeviceResource &rsrc,
infiniopDestroyRMSNormDescriptor
(
desc_norm_out
);
infiniopDestroyRMSNormDescriptor
(
desc_norm_out
);
infiniopDestroyGemmDescriptor
(
desc_out_embd
);
infiniopDestroyGemmDescriptor
(
desc_out_embd
);
infiniopDestroyRandomSampleDescriptor
(
desc_sample
);
infiniopDestroyRandomSampleDescriptor
(
desc_sample
);
infinirtFree
(
workspace
);
}
}
__C
void
__C
void
...
...
src/models/jiuge/jiuge_impl.hpp
View file @
5540d53a
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#include "infinicore_infer.h"
#include "infinicore_infer.h"
#include "../../allocator.hpp"
#include "../../tensor.hpp"
#include "../../tensor.hpp"
#include <condition_variable>
#include <condition_variable>
...
@@ -23,7 +24,10 @@ struct DeviceResource {
...
@@ -23,7 +24,10 @@ struct DeviceResource {
w_ffn_norm
,
w_ffn_gate_up
,
w_ffn_down
;
w_ffn_norm
,
w_ffn_gate_up
,
w_ffn_down
;
// Streams
// Streams
infinirtStream_t
stream
;
infinirtStream_t
stream
;
// Communicator
infinicclComm_t
comm
;
infinicclComm_t
comm
;
std
::
unique_ptr
<
WorkspaceAllocator
>
workspace_allocator
;
};
};
struct
InferState
{
struct
InferState
{
...
...
src/models/jiuge/jiuge_kv_cache.cpp
View file @
5540d53a
...
@@ -12,8 +12,8 @@ __C struct KVCache *createKVCache(const JiugeModel *model) {
...
@@ -12,8 +12,8 @@ __C struct KVCache *createKVCache(const JiugeModel *model) {
auto
kcache
=
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
();
auto
kcache
=
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
();
auto
vcache
=
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
();
auto
vcache
=
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
();
for
(
unsigned
int
layer
=
0
;
layer
<
model
->
meta
.
nlayer
;
layer
++
)
{
for
(
unsigned
int
layer
=
0
;
layer
<
model
->
meta
.
nlayer
;
layer
++
)
{
kcache
.
push_back
(
std
::
move
(
Tensor
::
buffer
(
model
->
meta
.
dt_
mat
,
shape
)));
kcache
.
push_back
(
std
::
move
(
Tensor
::
buffer
(
model
->
meta
.
dt_
logits
,
shape
)));
vcache
.
push_back
(
std
::
move
(
Tensor
::
buffer
(
model
->
meta
.
dt_
mat
,
shape
)));
vcache
.
push_back
(
std
::
move
(
Tensor
::
buffer
(
model
->
meta
.
dt_
logits
,
shape
)));
}
}
cache
->
k
.
push_back
(
kcache
);
cache
->
k
.
push_back
(
kcache
);
cache
->
v
.
push_back
(
vcache
);
cache
->
v
.
push_back
(
vcache
);
...
...
src/models/jiuge/jiuge_weight.hpp
View file @
5540d53a
...
@@ -15,7 +15,7 @@ inline std::shared_ptr<Tensor> getOutNorm(
...
@@ -15,7 +15,7 @@ inline std::shared_ptr<Tensor> getOutNorm(
JiugeMeta
const
*
meta
,
JiugeMeta
const
*
meta
,
JiugeWeights
const
*
w
)
{
JiugeWeights
const
*
w
)
{
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
d
});
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
d
});
return
Tensor
::
weight
((
char
*
)
w
->
output_norm
,
meta
->
dt_norm
,
shape
);
return
Tensor
::
weight
((
char
*
)
w
->
output_norm
,
w
->
dt_norm
,
shape
);
}
}
inline
std
::
shared_ptr
<
Tensor
>
getOutEmbd
(
inline
std
::
shared_ptr
<
Tensor
>
getOutEmbd
(
...
@@ -31,7 +31,7 @@ inline std::shared_ptr<Tensor> getAttnNorm(
...
@@ -31,7 +31,7 @@ inline std::shared_ptr<Tensor> getAttnNorm(
JiugeWeights
const
*
w
,
JiugeWeights
const
*
w
,
size_t
layer
)
{
size_t
layer
)
{
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
d
});
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
d
});
return
Tensor
::
weight
((
char
*
)(
w
->
attn_norm
[
layer
]),
meta
->
dt_norm
,
shape
);
return
Tensor
::
weight
((
char
*
)(
w
->
attn_norm
[
layer
]),
w
->
dt_norm
,
shape
);
}
}
inline
std
::
shared_ptr
<
Tensor
>
getAttnQKV
(
inline
std
::
shared_ptr
<
Tensor
>
getAttnQKV
(
...
@@ -42,9 +42,9 @@ inline std::shared_ptr<Tensor> getAttnQKV(
...
@@ -42,9 +42,9 @@ inline std::shared_ptr<Tensor> getAttnQKV(
auto
nh
=
meta
->
nh
;
auto
nh
=
meta
->
nh
;
auto
dh
=
meta
->
dh
;
auto
dh
=
meta
->
dh
;
auto
d
=
meta
->
d
;
auto
d
=
meta
->
d
;
size_t
offset
=
idev
*
((
nkvh
*
2
+
nh
)
/
ndev
*
dh
)
*
d
*
dsize
(
meta
->
dt_mat
);
size_t
offset
=
idev
*
((
nkvh
*
2
+
nh
)
/
ndev
*
dh
)
*
d
*
dsize
(
w
->
dt_mat
);
auto
shape
=
std
::
vector
<
size_t
>
({(
nh
+
2
*
nkvh
)
/
ndev
*
dh
,
d
});
auto
shape
=
std
::
vector
<
size_t
>
({(
nh
+
2
*
nkvh
)
/
ndev
*
dh
,
d
});
return
Tensor
::
weight
((
char
*
)(
w
->
attn_qkv
[
layer
])
+
offset
,
meta
->
dt_mat
,
shape
)
return
Tensor
::
weight
((
char
*
)(
w
->
attn_qkv
[
layer
])
+
offset
,
w
->
dt_mat
,
shape
)
->
permute
({
1
,
0
});
->
permute
({
1
,
0
});
}
}
...
@@ -55,9 +55,9 @@ inline std::shared_ptr<Tensor> getAttnQKVBias(
...
@@ -55,9 +55,9 @@ inline std::shared_ptr<Tensor> getAttnQKVBias(
auto
nkvh
=
meta
->
nkvh
;
auto
nkvh
=
meta
->
nkvh
;
auto
nh
=
meta
->
nh
;
auto
nh
=
meta
->
nh
;
auto
dh
=
meta
->
dh
;
auto
dh
=
meta
->
dh
;
size_t
offset
=
idev
*
((
nkvh
*
2
+
nh
)
/
ndev
*
dh
)
*
dsize
(
meta
->
dt_mat
);
size_t
offset
=
idev
*
((
nkvh
*
2
+
nh
)
/
ndev
*
dh
)
*
dsize
(
w
->
dt_mat
);
auto
shape
=
std
::
vector
<
size_t
>
({
1
,
(
nh
+
2
*
nkvh
)
/
ndev
*
dh
});
auto
shape
=
std
::
vector
<
size_t
>
({
1
,
(
nh
+
2
*
nkvh
)
/
ndev
*
dh
});
return
Tensor
::
weight
((
char
*
)(
w
->
attn_qkv_b
[
layer
])
+
offset
,
meta
->
dt_mat
,
shape
);
return
Tensor
::
weight
((
char
*
)(
w
->
attn_qkv_b
[
layer
])
+
offset
,
w
->
dt_mat
,
shape
);
}
}
inline
std
::
shared_ptr
<
Tensor
>
getAttnO
(
JiugeMeta
const
*
meta
,
inline
std
::
shared_ptr
<
Tensor
>
getAttnO
(
JiugeMeta
const
*
meta
,
...
@@ -66,9 +66,9 @@ inline std::shared_ptr<Tensor> getAttnO(JiugeMeta const *meta,
...
@@ -66,9 +66,9 @@ inline std::shared_ptr<Tensor> getAttnO(JiugeMeta const *meta,
auto
nh
=
meta
->
nh
;
auto
nh
=
meta
->
nh
;
auto
dh
=
meta
->
dh
;
auto
dh
=
meta
->
dh
;
auto
d
=
meta
->
d
;
auto
d
=
meta
->
d
;
size_t
offset
=
idev
*
d
*
(
nh
/
ndev
*
dh
)
*
dsize
(
meta
->
dt_mat
);
size_t
offset
=
idev
*
d
*
(
nh
/
ndev
*
dh
)
*
dsize
(
w
->
dt_mat
);
auto
shape
=
std
::
vector
<
size_t
>
({
d
,
nh
/
ndev
*
dh
});
auto
shape
=
std
::
vector
<
size_t
>
({
d
,
nh
/
ndev
*
dh
});
return
Tensor
::
weight
((
char
*
)(
w
->
attn_o
[
layer
])
+
offset
,
meta
->
dt_mat
,
shape
)
return
Tensor
::
weight
((
char
*
)(
w
->
attn_o
[
layer
])
+
offset
,
w
->
dt_mat
,
shape
)
->
permute
({
1
,
0
});
->
permute
({
1
,
0
});
}
}
...
@@ -77,7 +77,7 @@ inline std::shared_ptr<Tensor> getFFNNorm(
...
@@ -77,7 +77,7 @@ inline std::shared_ptr<Tensor> getFFNNorm(
JiugeWeights
const
*
w
,
JiugeWeights
const
*
w
,
size_t
layer
)
{
size_t
layer
)
{
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
d
});
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
d
});
return
Tensor
::
weight
((
char
*
)(
w
->
ffn_norm
[
layer
]),
meta
->
dt_norm
,
shape
);
return
Tensor
::
weight
((
char
*
)(
w
->
ffn_norm
[
layer
]),
w
->
dt_norm
,
shape
);
}
}
inline
std
::
shared_ptr
<
Tensor
>
getFFNGateUp
(
inline
std
::
shared_ptr
<
Tensor
>
getFFNGateUp
(
...
@@ -86,10 +86,10 @@ inline std::shared_ptr<Tensor> getFFNGateUp(
...
@@ -86,10 +86,10 @@ inline std::shared_ptr<Tensor> getFFNGateUp(
size_t
layer
,
size_t
idev
,
size_t
ndev
)
{
size_t
layer
,
size_t
idev
,
size_t
ndev
)
{
auto
di
=
meta
->
di
;
auto
di
=
meta
->
di
;
auto
d
=
meta
->
d
;
auto
d
=
meta
->
d
;
size_t
offset
=
idev
*
(
2
*
di
/
ndev
)
*
d
*
dsize
(
meta
->
dt_mat
);
size_t
offset
=
idev
*
(
2
*
di
/
ndev
)
*
d
*
dsize
(
w
->
dt_mat
);
auto
shape
=
std
::
vector
<
size_t
>
({
2
*
di
/
ndev
,
d
});
auto
shape
=
std
::
vector
<
size_t
>
({
2
*
di
/
ndev
,
d
});
return
Tensor
::
weight
((
char
*
)(
w
->
ffn_gate_up
[
layer
])
+
offset
,
return
Tensor
::
weight
((
char
*
)(
w
->
ffn_gate_up
[
layer
])
+
offset
,
meta
->
dt_mat
,
shape
)
w
->
dt_mat
,
shape
)
->
permute
({
1
,
0
});
->
permute
({
1
,
0
});
}
}
...
@@ -99,21 +99,29 @@ inline std::shared_ptr<Tensor> getFFNDown(
...
@@ -99,21 +99,29 @@ inline std::shared_ptr<Tensor> getFFNDown(
size_t
layer
,
size_t
idev
,
size_t
ndev
)
{
size_t
layer
,
size_t
idev
,
size_t
ndev
)
{
auto
di
=
meta
->
di
;
auto
di
=
meta
->
di
;
auto
d
=
meta
->
d
;
auto
d
=
meta
->
d
;
size_t
offset
=
idev
*
d
*
(
di
/
ndev
)
*
dsize
(
meta
->
dt_mat
);
size_t
offset
=
idev
*
d
*
(
di
/
ndev
)
*
dsize
(
w
->
dt_mat
);
auto
shape
=
std
::
vector
<
size_t
>
({
d
,
di
/
ndev
});
auto
shape
=
std
::
vector
<
size_t
>
({
d
,
di
/
ndev
});
return
Tensor
::
weight
((
char
*
)(
w
->
ffn_down
[
layer
])
+
offset
,
meta
->
dt_mat
,
shape
)
return
Tensor
::
weight
((
char
*
)(
w
->
ffn_down
[
layer
])
+
offset
,
w
->
dt_mat
,
shape
)
->
permute
({
1
,
0
});
->
permute
({
1
,
0
});
}
}
inline
std
::
shared_ptr
<
Tensor
>
getSinTable
(
JiugeMeta
const
*
meta
)
{
inline
std
::
shared_ptr
<
Tensor
>
getSinTable
(
JiugeMeta
const
*
meta
)
{
auto
half_dh
=
meta
->
dh
/
2
;
auto
half_dh
=
meta
->
dh
/
2
;
uint16_t
*
table
=
(
uint16_t
*
)
std
::
malloc
(
meta
->
dctx
*
half_dh
*
sizeof
(
uint16_t
));
auto
unit
=
dsize
(
meta
->
dt_logits
);
void
*
table
=
std
::
malloc
(
meta
->
dctx
*
half_dh
*
unit
);
for
(
size_t
i
=
0
;
i
<
meta
->
dctx
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
meta
->
dctx
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
half_dh
;
j
++
)
{
for
(
size_t
j
=
0
;
j
<
half_dh
;
j
++
)
{
float
_sin
=
std
::
sin
(
float
_sin
=
std
::
sin
(
static_cast
<
float
>
(
i
)
/
std
::
pow
(
meta
->
theta
,
static_cast
<
float
>
(
j
)
/
half_dh
));
static_cast
<
float
>
(
i
)
/
std
::
pow
(
meta
->
theta
,
static_cast
<
float
>
(
j
)
/
half_dh
));
table
[
i
*
half_dh
+
j
]
=
f32_to_f16
(
_sin
);
if
(
meta
->
dt_logits
==
INFINI_DTYPE_F16
)
{
((
uint16_t
*
)
table
)[
i
*
half_dh
+
j
]
=
f32_to_f16
(
_sin
);
}
else
if
(
meta
->
dt_logits
==
INFINI_DTYPE_F32
)
{
((
float
*
)
table
)[
i
*
half_dh
+
j
]
=
_sin
;
}
else
{
std
::
cout
<<
"unsupported data type"
<<
std
::
endl
;
exit
(
1
);
}
}
}
}
}
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
dctx
,
half_dh
});
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
dctx
,
half_dh
});
...
@@ -124,16 +132,24 @@ inline std::shared_ptr<Tensor> getSinTable(JiugeMeta const *meta) {
...
@@ -124,16 +132,24 @@ inline std::shared_ptr<Tensor> getSinTable(JiugeMeta const *meta) {
inline
std
::
shared_ptr
<
Tensor
>
getCosTable
(
JiugeMeta
const
*
meta
)
{
inline
std
::
shared_ptr
<
Tensor
>
getCosTable
(
JiugeMeta
const
*
meta
)
{
auto
half_dh
=
meta
->
dh
/
2
;
auto
half_dh
=
meta
->
dh
/
2
;
uint16_t
*
table
=
(
uint16_t
*
)
std
::
malloc
(
meta
->
dctx
*
half_dh
*
sizeof
(
uint16_t
));
auto
unit
=
dsize
(
meta
->
dt_logits
);
void
*
table
=
std
::
malloc
(
meta
->
dctx
*
half_dh
*
unit
);
for
(
size_t
i
=
0
;
i
<
meta
->
dctx
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
meta
->
dctx
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
half_dh
;
j
++
)
{
for
(
size_t
j
=
0
;
j
<
half_dh
;
j
++
)
{
float
_cos
=
std
::
cos
(
float
_cos
=
std
::
cos
(
static_cast
<
float
>
(
i
)
/
std
::
pow
(
meta
->
theta
,
static_cast
<
float
>
(
j
)
/
half_dh
));
static_cast
<
float
>
(
i
)
/
std
::
pow
(
meta
->
theta
,
static_cast
<
float
>
(
j
)
/
half_dh
));
table
[
i
*
half_dh
+
j
]
=
f32_to_f16
(
_cos
);
if
(
meta
->
dt_logits
==
INFINI_DTYPE_F16
)
{
((
uint16_t
*
)
table
)[
i
*
half_dh
+
j
]
=
f32_to_f16
(
_cos
);
}
else
if
(
meta
->
dt_logits
==
INFINI_DTYPE_F32
)
{
((
float
*
)
table
)[
i
*
half_dh
+
j
]
=
_cos
;
}
else
{
std
::
cout
<<
"unsupported data type"
<<
std
::
endl
;
exit
(
1
);
}
}
}
}
}
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
dctx
,
half_dh
});
auto
shape
=
std
::
vector
<
size_t
>
({
meta
->
dctx
,
half_dh
});
auto
tensor
=
Tensor
::
weight
(
table
,
meta
->
dt_logits
,
shape
);
auto
tensor
=
Tensor
::
weight
(
table
,
meta
->
dt_logits
,
shape
);
std
::
free
(
table
);
std
::
free
(
table
);
return
tensor
;
return
tensor
;
...
...
xmake.lua
View file @
5540d53a
...
@@ -12,6 +12,7 @@ target("infinicore_infer")
...
@@ -12,6 +12,7 @@ target("infinicore_infer")
add_files
(
"src/models/*/*.cpp"
)
add_files
(
"src/models/*/*.cpp"
)
add_files
(
"src/tensor/*.cpp"
)
add_files
(
"src/tensor/*.cpp"
)
add_files
(
"src/allocator/*.cpp"
)
add_includedirs
(
"include"
)
add_includedirs
(
"include"
)
set_installdir
(
INFINI_ROOT
)
set_installdir
(
INFINI_ROOT
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment