Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
8bd0f91c
Commit
8bd0f91c
authored
Sep 03, 2025
by
PanZezhong1725
Browse files
feat: specify distribution policy when register
parent
11c6e423
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
114 additions
and
128 deletions
+114
-128
scripts/jiuge_awq.py
scripts/jiuge_awq.py
+9
-33
scripts/libinfinicore_infer.py
scripts/libinfinicore_infer.py
+0
-14
src/dataloader/weights_loader.cpp
src/dataloader/weights_loader.cpp
+43
-50
src/dataloader/weights_loader.hpp
src/dataloader/weights_loader.hpp
+39
-8
src/models/jiuge_awq/jiuge_awq.cpp
src/models/jiuge_awq/jiuge_awq.cpp
+1
-1
src/models/jiuge_awq/jiuge_awq.hpp
src/models/jiuge_awq/jiuge_awq.hpp
+1
-1
src/models/jiuge_awq/jiuge_awq_weight.cpp
src/models/jiuge_awq/jiuge_awq_weight.cpp
+21
-21
No files found.
scripts/jiuge_awq.py
View file @
8bd0f91c
...
@@ -6,7 +6,6 @@ from libinfinicore_infer import (
...
@@ -6,7 +6,6 @@ from libinfinicore_infer import (
DataType
,
DataType
,
DeviceType
,
DeviceType
,
load_model_weight
,
load_model_weight
,
load_model_weight_distributed
,
create_jiuge_awq_weights
,
create_jiuge_awq_weights
,
create_jiuge_awq_model
,
create_jiuge_awq_model
,
destroy_jiuge_awq_model
,
destroy_jiuge_awq_model
,
...
@@ -196,38 +195,15 @@ class JiugeAWQForCausalLM:
...
@@ -196,38 +195,15 @@ class JiugeAWQForCausalLM:
for
key
in
f
.
keys
():
for
key
in
f
.
keys
():
# print(key)
# print(key)
tensor
=
f
.
get_tensor
(
key
)
tensor
=
f
.
get_tensor
(
key
)
if
"proj"
in
key
and
"bias"
not
in
key
:
if
"o_proj.scales"
in
key
:
if
"o_proj"
in
key
or
"down_proj"
in
key
:
tensor
=
tensor
*
self
.
meta
.
scale_o
tensor
=
(
elif
"down_proj.scales"
in
key
:
tensor
.
reshape
(
tensor
.
shape
[
0
],
self
.
ndev
,
-
1
)
tensor
=
tensor
*
self
.
meta
.
scale_down
.
permute
(
1
,
0
,
2
)
elif
"embed_tokens.weight"
in
key
:
.
contiguous
()
tensor
=
tensor
*
self
.
meta
.
scale_input
)
elif
"lm_head.weight"
in
key
:
if
"o_proj.scales"
in
key
:
tensor
=
tensor
*
self
.
meta
.
scale_output
tensor
=
tensor
*
self
.
meta
.
scale_o
load_model_weight
(
self
.
weights
,
key
,
tensor
.
data_ptr
())
elif
"down_proj.scales"
in
key
:
tensor
=
tensor
*
self
.
meta
.
scale_down
load_model_weight_distributed
(
self
.
weights
,
key
,
tensor
.
data_ptr
(),
self
.
dev_ids
,
self
.
ndev
,
)
else
:
load_model_weight_distributed
(
self
.
weights
,
key
,
tensor
.
data_ptr
(),
self
.
dev_ids
,
self
.
ndev
,
)
else
:
if
"embed_tokens.weight"
in
key
:
tensor
=
tensor
*
self
.
meta
.
scale_input
elif
"lm_head.weight"
in
key
:
tensor
=
tensor
*
self
.
meta
.
scale_output
load_model_weight
(
self
.
weights
,
key
,
tensor
.
data_ptr
())
def
max_context_len
(
self
):
def
max_context_len
(
self
):
return
self
.
meta
.
dctx
return
self
.
meta
.
dctx
...
...
scripts/libinfinicore_infer.py
View file @
8bd0f91c
...
@@ -412,16 +412,6 @@ def __open_library__():
...
@@ -412,16 +412,6 @@ def __open_library__():
]
]
lib
.
loadModelWeight
.
restype
=
None
lib
.
loadModelWeight
.
restype
=
None
# loadModelWeightDistributed
lib
.
loadModelWeightDistributed
.
argtypes
=
[
POINTER
(
ModelWeightsCStruct
),
# struct ModelWeights*
c_char_p
,
# const char* name
c_void_p
,
# void* data
POINTER
(
c_int
),
# int* ranks
c_int
,
# int nrank
]
lib
.
loadModelWeightDistributed
.
restype
=
None
return
lib
return
lib
...
@@ -432,10 +422,6 @@ def load_model_weight(weights, name, data):
...
@@ -432,10 +422,6 @@ def load_model_weight(weights, name, data):
LIB
.
loadModelWeight
(
weights
,
name
.
encode
(
"utf-8"
),
data
)
LIB
.
loadModelWeight
(
weights
,
name
.
encode
(
"utf-8"
),
data
)
def
load_model_weight_distributed
(
weights
,
name
,
data
,
ranks
,
nrank
):
LIB
.
loadModelWeightDistributed
(
weights
,
name
.
encode
(
"utf-8"
),
data
,
ranks
,
nrank
)
create_jiuge_model
=
LIB
.
createJiugeModel
create_jiuge_model
=
LIB
.
createJiugeModel
destroy_jiuge_model
=
LIB
.
destroyJiugeModel
destroy_jiuge_model
=
LIB
.
destroyJiugeModel
create_kv_cache
=
LIB
.
createKVCache
create_kv_cache
=
LIB
.
createKVCache
...
...
src/dataloader/weights_loader.cpp
View file @
8bd0f91c
...
@@ -4,63 +4,65 @@
...
@@ -4,63 +4,65 @@
#include "../utils.hpp"
#include "../utils.hpp"
#include <infinirt.h>
#include <infinirt.h>
#include <numeric>
namespace
infinicore
{
namespace
infinicore
::
weights
{
WeightsLoader
::
WeightsLoader
(
infiniDevice_t
dev
,
const
std
::
vector
<
int
>
&
dev_ids
)
:
_device
(
dev
),
_dev_ids
(
dev_ids
)
{
void
Weight
::
load
(
const
void
*
host_data
,
infinirtStream_t
stream
)
{
if
(
_dist_type
==
DistributionType
::
FULL
)
{
_tensor
->
load
(
host_data
,
stream
);
}
else
if
(
_dist_type
==
DistributionType
::
ROW
||
_tensor
->
ndim
()
==
1
)
{
// 1D column-distributed is same as row-distributed
_tensor
->
load
((
const
char
*
)
host_data
+
_rank
*
_tensor
->
numel
()
*
dsize
(
_tensor
->
dtype
()),
stream
);
}
else
if
(
_dist_type
==
DistributionType
::
COLUMN
&&
_tensor
->
ndim
()
>
1
)
{
// _dist_type == DistributionType::COLUMN
void
*
rearranged_ptr
;
RUN_INFINI
(
infinirtMallocHost
(
&
rearranged_ptr
,
_tensor
->
numel
()
*
dsize
(
_tensor
->
dtype
())));
size_t
row_size
=
_tensor
->
shape
()[
_tensor
->
ndim
()
-
1
]
*
dsize
(
_tensor
->
dtype
());
size_t
host_offset
=
_rank
*
row_size
;
size_t
host_row_size
=
_nrank
*
row_size
;
size_t
rows
=
std
::
accumulate
(
_tensor
->
shape
().
begin
(),
_tensor
->
shape
().
end
()
-
1
,
size_t
(
1
),
std
::
multiplies
<
size_t
>
());
for
(
size_t
row
=
0
;
row
<
rows
;
row
++
)
{
memcpy
((
char
*
)
rearranged_ptr
+
row
*
row_size
,
(
char
*
)
host_data
+
host_offset
+
row
*
host_row_size
,
row_size
);
}
_tensor
->
load
(
rearranged_ptr
,
stream
);
RUN_INFINI
(
infinirtFreeHost
(
rearranged_ptr
));
}
else
{
std
::
cerr
<<
"Unsupported distribution type: "
<<
_dist_type
<<
std
::
endl
;
std
::
abort
();
}
};
Loader
::
Loader
(
infiniDevice_t
dev
,
const
std
::
vector
<
int
>
&
dev_ids
)
:
_device
(
dev
),
_dev_ids
(
dev_ids
)
{
_streams
.
resize
(
_dev_ids
.
size
());
_streams
.
resize
(
_dev_ids
.
size
());
_weights
.
resize
(
_dev_ids
.
size
());
_weights
_maps
.
resize
(
_dev_ids
.
size
());
for
(
int
rank
=
0
;
rank
<
int
(
_dev_ids
.
size
());
rank
++
)
{
for
(
int
rank
=
0
;
rank
<
int
(
_dev_ids
.
size
());
rank
++
)
{
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
_weights
[
rank
]
=
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
();
_weights
_maps
[
rank
]
=
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Weight
>>
();
RUN_INFINI
(
infinirtStreamCreate
(
&
_streams
[
rank
]));
RUN_INFINI
(
infinirtStreamCreate
(
&
_streams
[
rank
]));
}
}
}
}
void
Weights
Loader
::
resigter
(
const
std
::
string
&
name
,
std
::
shared_ptr
<
Tensor
>
tensor
,
int
rank
)
{
void
Loader
::
resigter
(
const
std
::
string
&
name
,
std
::
shared_ptr
<
Tensor
>
tensor
,
int
rank
,
DistributionType
dist_type
)
{
_weights
[
rank
][
name
]
=
tensor
;
_weights
_maps
[
rank
][
name
]
=
std
::
make_shared
<
Weight
>
(
tensor
,
rank
,
_dev_ids
.
size
(),
dist_type
)
;
}
}
void
Weights
Loader
::
load
_weight
(
const
std
::
string
&
name
,
const
void
*
host_data
)
{
void
Loader
::
load
(
const
std
::
string
&
name
,
const
void
*
host_data
)
{
for
(
int
rank
=
0
;
rank
<
int
(
_dev_ids
.
size
());
rank
++
)
{
for
(
int
rank
=
0
;
rank
<
int
(
_dev_ids
.
size
());
rank
++
)
{
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
auto
it
=
_weights
[
rank
].
find
(
name
);
auto
it
=
_weights
_maps
[
rank
].
find
(
name
);
if
(
it
==
_weights
[
rank
].
end
())
{
if
(
it
==
_weights
_maps
[
rank
].
end
())
{
std
::
cerr
<<
"Weight "
<<
name
<<
" not found in rank "
<<
rank
<<
std
::
endl
;
std
::
cerr
<<
"Weight "
<<
name
<<
" not found in rank "
<<
rank
<<
std
::
endl
;
std
::
abort
();
std
::
abort
();
}
}
_weights
[
rank
][
name
]
->
load
(
host_data
,
_streams
[
rank
]);
_weights_maps
[
rank
][
name
]
->
load
(
host_data
,
_streams
[
rank
]);
}
for
(
int
rank
=
int
(
_dev_ids
.
size
()
-
1
);
rank
>=
0
;
rank
--
)
{
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
RUN_INFINI
(
infinirtStreamSynchronize
(
_streams
[
rank
]));
}
}
void
WeightsLoader
::
load_distributed_weight
(
const
std
::
string
&
name
,
const
void
*
host_data
,
const
std
::
vector
<
int
>
&
ranks
)
{
for
(
size_t
i
=
0
;
i
<
ranks
.
size
();
i
++
)
{
int
rank
=
ranks
[
i
];
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
auto
it
=
_weights
[
rank
].
find
(
name
);
if
(
it
==
_weights
[
rank
].
end
())
{
std
::
cerr
<<
"Weight "
<<
name
<<
" not found in rank "
<<
rank
<<
std
::
endl
;
std
::
abort
();
}
_weights
[
rank
][
name
]
->
load
((
char
*
)
host_data
+
i
*
_weights
[
rank
][
name
]
->
numel
()
*
dsize
(
_weights
[
rank
][
name
]
->
dtype
()),
_streams
[
rank
]);
}
}
for
(
int
rank
=
int
(
_dev_ids
.
size
()
-
1
);
rank
>=
0
;
rank
--
)
{
for
(
int
rank
=
int
(
_dev_ids
.
size
()
-
1
);
rank
>=
0
;
rank
--
)
{
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
RUN_INFINI
(
infinirtStreamSynchronize
(
_streams
[
rank
]));
RUN_INFINI
(
infinirtStreamSynchronize
(
_streams
[
rank
]));
}
}
}
}
void
WeightsLoader
::
load_rank_weight
(
const
std
::
string
&
name
,
const
void
*
host_data
,
int
rank
)
{
auto
it
=
_weights
[
rank
].
find
(
name
);
void
Loader
::
finalize
()
{
if
(
it
==
_weights
[
rank
].
end
())
{
std
::
cerr
<<
"Weight "
<<
name
<<
" not found in rank "
<<
rank
<<
std
::
endl
;
std
::
abort
();
}
RUN_INFINI
(
infinirtSetDevice
(
_device
,
_dev_ids
[
rank
]));
_weights
[
rank
][
name
]
->
load
(
host_data
);
}
void
WeightsLoader
::
finalize
()
{
int
dev_id
;
int
dev_id
;
RUN_INFINI
(
infinirtGetDevice
(
nullptr
,
&
dev_id
));
RUN_INFINI
(
infinirtGetDevice
(
nullptr
,
&
dev_id
));
for
(
int
rank
=
0
;
rank
<
int
(
_dev_ids
.
size
());
rank
++
)
{
for
(
int
rank
=
0
;
rank
<
int
(
_dev_ids
.
size
());
rank
++
)
{
...
@@ -70,25 +72,16 @@ void WeightsLoader::finalize() {
...
@@ -70,25 +72,16 @@ void WeightsLoader::finalize() {
}
}
RUN_INFINI
(
infinirtSetDevice
(
_device
,
dev_id
));
RUN_INFINI
(
infinirtSetDevice
(
_device
,
dev_id
));
}
}
std
::
shared_ptr
<
Tensor
>
Weights
Loader
::
get
(
const
std
::
string
&
name
,
int
rank
)
{
std
::
shared_ptr
<
Tensor
>
Loader
::
get
(
const
std
::
string
&
name
,
int
rank
)
{
return
_weights
[
rank
][
name
];
return
_weights
_maps
[
rank
][
name
]
->
tensor
()
;
}
}
}
// namespace infinicore
}
// namespace infinicore
::weights
__C
void
__C
void
loadModelWeight
(
struct
ModelWeights
*
weights_
,
const
char
*
name
,
void
*
data
)
{
loadModelWeight
(
struct
ModelWeights
*
weights_
,
const
char
*
name
,
void
*
data
)
{
std
::
string
name_str
(
name
);
std
::
string
name_str
(
name
);
// std::cout << "Loading weight: " << name_str << std::endl;
// std::cout << "Loading weight: " << name_str << std::endl;
auto
weights
=
reinterpret_cast
<
infinicore
::
WeightsLoader
*>
(
weights_
);
auto
weights
=
reinterpret_cast
<
infinicore
::
weights
::
Loader
*>
(
weights_
);
weights
->
load_weight
(
name_str
,
data
);
weights
->
load
(
name_str
,
data
);
}
__C
void
loadModelWeightDistributed
(
struct
ModelWeights
*
weights_
,
const
char
*
name
,
void
*
data
,
int
*
ranks
,
int
nrank
)
{
std
::
string
name_str
(
name
);
// std::cout << "Loading dist weight: " << name_str << std::endl;
auto
weights
=
reinterpret_cast
<
infinicore
::
WeightsLoader
*>
(
weights_
);
std
::
vector
<
int
>
rank_vec
(
ranks
,
ranks
+
nrank
);
weights
->
load_distributed_weight
(
name_str
,
data
,
rank_vec
);
}
}
src/dataloader/weights_loader.hpp
View file @
8bd0f91c
...
@@ -7,24 +7,55 @@
...
@@ -7,24 +7,55 @@
#include <vector>
#include <vector>
namespace
infinicore
{
namespace
infinicore
{
class
WeightsLoader
{
namespace
weights
{
enum
DistributionType
{
FULL
,
ROW
,
COLUMN
};
class
Weight
{
private:
std
::
shared_ptr
<
Tensor
>
_tensor
;
int
_rank
;
int
_nrank
;
DistributionType
_dist_type
;
public:
Weight
(
std
::
shared_ptr
<
Tensor
>
tensor
,
int
rank
=
0
,
int
nrank
=
1
,
DistributionType
dist_type
=
DistributionType
::
FULL
)
:
_tensor
(
tensor
),
_rank
(
rank
),
_nrank
(
nrank
),
_dist_type
(
dist_type
)
{}
std
::
shared_ptr
<
Tensor
>
tensor
()
const
{
return
_tensor
;
}
int
rank
()
const
{
return
_rank
;
}
int
nrank
()
const
{
return
_nrank
;
}
void
load
(
const
void
*
host_data
,
infinirtStream_t
stream
=
nullptr
);
};
class
Loader
{
protected:
protected:
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>>
_weights
;
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Weight
>>>
_weights
_maps
;
infiniDevice_t
_device
;
infiniDevice_t
_device
;
std
::
vector
<
int
>
_dev_ids
;
std
::
vector
<
int
>
_dev_ids
;
std
::
vector
<
infinirtStream_t
>
_streams
;
std
::
vector
<
infinirtStream_t
>
_streams
;
public:
public:
WeightsLoader
(
infiniDevice_t
,
const
std
::
vector
<
int
>
&
dev_ids
);
Loader
(
infiniDevice_t
,
const
std
::
vector
<
int
>
&
dev_ids
);
void
resigter
(
const
std
::
string
&
name
,
std
::
shared_ptr
<
Tensor
>
tensor
,
int
rank
=
0
);
void
load_weight
(
const
std
::
string
&
name
,
const
void
*
host_data
);
/// @brief register a tensor to the loader
void
load_distributed_weight
(
const
std
::
string
&
name
,
const
void
*
host_data
,
const
std
::
vector
<
int
>
&
ranks
);
/// @param name name (aka key) of the tensor
void
load_rank_weight
(
const
std
::
string
&
name
,
const
void
*
host_data
,
int
rank
);
/// @param tensor
/// @param rank the rank of the weight tensor (default 0)
/// @param dist_type either FULL, or distributed by ROW or COLUMN (default FULL)
void
resigter
(
const
std
::
string
&
name
,
std
::
shared_ptr
<
Tensor
>
tensor
,
int
rank
=
0
,
DistributionType
dist_type
=
DistributionType
::
FULL
);
void
load
(
const
std
::
string
&
name
,
const
void
*
host_data
);
void
finalize
();
void
finalize
();
std
::
shared_ptr
<
Tensor
>
get
(
const
std
::
string
&
name
,
int
rank
=
0
);
std
::
shared_ptr
<
Tensor
>
get
(
const
std
::
string
&
name
,
int
rank
=
0
);
const
std
::
vector
<
int
>
&
dev
_i
ds
()
const
{
return
_dev_ids
;
}
const
std
::
vector
<
int
>
&
dev
I
ds
()
const
{
return
_dev_ids
;
}
infiniDevice_t
device
()
const
{
return
_device
;
}
infiniDevice_t
device
()
const
{
return
_device
;
}
};
};
}
// namespace weights
}
// namespace infinicore
}
// namespace infinicore
#endif // WEIGHTS_LOADER_HPP
#endif // WEIGHTS_LOADER_HPP
src/models/jiuge_awq/jiuge_awq.cpp
View file @
8bd0f91c
...
@@ -351,7 +351,7 @@ void launchDevice(const JiugeAWQMeta *meta, std::shared_ptr<JiugeAWQDeviceWeight
...
@@ -351,7 +351,7 @@ void launchDevice(const JiugeAWQMeta *meta, std::shared_ptr<JiugeAWQDeviceWeight
JiugeAWQModel
::
JiugeAWQModel
(
const
JiugeAWQMeta
*
meta
,
const
ModelWeights
*
weights_
)
{
JiugeAWQModel
::
JiugeAWQModel
(
const
JiugeAWQMeta
*
meta
,
const
ModelWeights
*
weights_
)
{
auto
weights
=
(
JiugeAWQWeights
*
)(
weights_
);
auto
weights
=
(
JiugeAWQWeights
*
)(
weights_
);
device
=
weights
->
device
();
device
=
weights
->
device
();
dev_ids
=
weights
->
dev
_i
ds
();
dev_ids
=
weights
->
dev
I
ds
();
int
ndev
=
int
(
dev_ids
.
size
());
int
ndev
=
int
(
dev_ids
.
size
());
dev_resources
=
std
::
vector
<
DeviceResource
>
(
ndev
);
dev_resources
=
std
::
vector
<
DeviceResource
>
(
ndev
);
states
=
std
::
vector
<
InferState
>
(
ndev
);
states
=
std
::
vector
<
InferState
>
(
ndev
);
...
...
src/models/jiuge_awq/jiuge_awq.hpp
View file @
8bd0f91c
...
@@ -19,7 +19,7 @@ struct JiugeAWQDeviceWeight {
...
@@ -19,7 +19,7 @@ struct JiugeAWQDeviceWeight {
std
::
vector
<
std
::
shared_ptr
<
QuantInt4Weight
>>
w_attn_q
,
w_attn_k
,
w_attn_v
,
w_attn_out
,
w_ffn_gate
,
w_ffn_up
,
w_ffn_down
;
std
::
vector
<
std
::
shared_ptr
<
QuantInt4Weight
>>
w_attn_q
,
w_attn_k
,
w_attn_v
,
w_attn_out
,
w_ffn_gate
,
w_ffn_up
,
w_ffn_down
;
};
};
class
JiugeAWQWeights
:
public
infinicore
::
W
eightsLoader
{
class
JiugeAWQWeights
:
public
infinicore
::
w
eights
::
Loader
{
private:
private:
std
::
vector
<
std
::
shared_ptr
<
JiugeAWQDeviceWeight
>>
_device_weights
;
std
::
vector
<
std
::
shared_ptr
<
JiugeAWQDeviceWeight
>>
_device_weights
;
...
...
src/models/jiuge_awq/jiuge_awq_weight.cpp
View file @
8bd0f91c
...
@@ -43,7 +43,7 @@ inline std::shared_ptr<Tensor> getCosTable(size_t dctx, size_t dh, float theta)
...
@@ -43,7 +43,7 @@ inline std::shared_ptr<Tensor> getCosTable(size_t dctx, size_t dh, float theta)
JiugeAWQWeights
::
JiugeAWQWeights
(
JiugeAWQWeights
::
JiugeAWQWeights
(
const
JiugeAWQMeta
*
meta
,
const
JiugeAWQMeta
*
meta
,
infiniDevice_t
device
,
infiniDevice_t
device
,
const
std
::
vector
<
int
>
&
dev_ids
)
:
infinicore
::
W
eightsLoader
(
device
,
dev_ids
)
{
const
std
::
vector
<
int
>
&
dev_ids
)
:
infinicore
::
w
eights
::
Loader
(
device
,
dev_ids
)
{
auto
ndev
=
dev_ids
.
size
();
auto
ndev
=
dev_ids
.
size
();
_device_weights
.
resize
(
ndev
);
_device_weights
.
resize
(
ndev
);
infiniDtype_t
dt_logits
=
meta
->
dt_logits
;
infiniDtype_t
dt_logits
=
meta
->
dt_logits
;
...
@@ -82,35 +82,35 @@ JiugeAWQWeights::JiugeAWQWeights(
...
@@ -82,35 +82,35 @@ JiugeAWQWeights::JiugeAWQWeights(
for
(
size_t
layer
=
0
;
layer
<
nlayer
;
layer
++
)
{
for
(
size_t
layer
=
0
;
layer
<
nlayer
;
layer
++
)
{
#define RIGISTER_LAYER_WEIGHT(W_NAME, W_VAR, W_SHAPE, W_DTYPE
)
\
#define RIGISTER_LAYER_WEIGHT(W_NAME, W_VAR, W_SHAPE, W_DTYPE
, W_DIST_TYPE)
\
auto W_VAR = Tensor::weight(nullptr, W_DTYPE, W_SHAPE); \
auto W_VAR = Tensor::weight(nullptr, W_DTYPE, W_SHAPE);
\
this->resigter(W_NAME, W_VAR, i
);
\
this->resigter(W_NAME, W_VAR, i
, infinicore::weights::DistributionType::W_DIST_TYPE);
\
weight->W_VAR.push_back(W_VAR);
weight->W_VAR.push_back(W_VAR);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".input_layernorm.weight"
,
w_attn_norm
,
{
d
},
dt_norm_w
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".input_layernorm.weight"
,
w_attn_norm
,
{
d
},
dt_norm_w
,
FULL
);
#define REGISTER_LAYER_QUANT_WEIGHT(W_NAME, W_VAR, W_IN, W_OUT
)
\
#define REGISTER_LAYER_QUANT_WEIGHT(W_NAME, W_VAR, W_IN, W_OUT
, W_DIST_TYPE)
\
auto W_VAR = std::make_shared<QuantInt4Weight>(); \
auto W_VAR = std::make_shared<QuantInt4Weight>(); \
W_VAR->w = Tensor::weight(nullptr, INFINI_DTYPE_I32, {W_IN, (W_OUT)*nbit / 32}); \
W_VAR->w = Tensor::weight(nullptr, INFINI_DTYPE_I32, {W_IN, (W_OUT)*nbit / 32}); \
this->resigter(W_NAME + ".qweight", W_VAR->w, i
);
\
this->resigter(W_NAME + ".qweight", W_VAR->w, i
, infinicore::weights::DistributionType::W_DIST_TYPE);
\
W_VAR->s = Tensor::weight(nullptr, INFINI_DTYPE_F16, {(W_IN) / quant_group_size, (W_OUT)}); \
W_VAR->s = Tensor::weight(nullptr, INFINI_DTYPE_F16, {(W_IN) / quant_group_size, (W_OUT)}); \
this->resigter(W_NAME + ".scales", W_VAR->s, i
);
\
this->resigter(W_NAME + ".scales", W_VAR->s, i
, infinicore::weights::DistributionType::W_DIST_TYPE);
\
W_VAR->z = Tensor::weight(nullptr, INFINI_DTYPE_I32, {(W_IN) / quant_group_size, (W_OUT)*nbit / 32}); \
W_VAR->z = Tensor::weight(nullptr, INFINI_DTYPE_I32, {(W_IN) / quant_group_size, (W_OUT)*nbit / 32}); \
this->resigter(W_NAME + ".qzeros", W_VAR->z, i
);
\
this->resigter(W_NAME + ".qzeros", W_VAR->z, i
, infinicore::weights::DistributionType::W_DIST_TYPE);
\
weight->W_VAR.push_back(W_VAR);
weight->W_VAR.push_back(W_VAR);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.q_proj"
,
w_attn_q
,
d
,
nh
*
dh
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.q_proj"
,
w_attn_q
,
d
,
nh
*
dh
,
COLUMN
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.k_proj"
,
w_attn_k
,
d
,
nkvh
*
dh
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.k_proj"
,
w_attn_k
,
d
,
nkvh
*
dh
,
COLUMN
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.v_proj"
,
w_attn_v
,
d
,
nkvh
*
dh
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.v_proj"
,
w_attn_v
,
d
,
nkvh
*
dh
,
COLUMN
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.q_proj.bias"
,
b_attn_q
,
{
nh
*
dh
},
INFINI_DTYPE_F16
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.q_proj.bias"
,
b_attn_q
,
{
nh
*
dh
},
INFINI_DTYPE_F16
,
COLUMN
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.k_proj.bias"
,
b_attn_k
,
{
nkvh
*
dh
},
INFINI_DTYPE_F16
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.k_proj.bias"
,
b_attn_k
,
{
nkvh
*
dh
},
INFINI_DTYPE_F16
,
COLUMN
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.v_proj.bias"
,
b_attn_v
,
{
nkvh
*
dh
},
INFINI_DTYPE_F16
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.v_proj.bias"
,
b_attn_v
,
{
nkvh
*
dh
},
INFINI_DTYPE_F16
,
COLUMN
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.o_proj"
,
w_attn_out
,
nh
*
dh
,
d
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".self_attn.o_proj"
,
w_attn_out
,
nh
*
dh
,
d
,
ROW
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".post_attention_layernorm.weight"
,
w_ffn_norm
,
{
d
},
dt_norm_w
);
RIGISTER_LAYER_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".post_attention_layernorm.weight"
,
w_ffn_norm
,
{
d
},
dt_norm_w
,
FULL
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".mlp.gate_proj"
,
w_ffn_gate
,
d
,
di
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".mlp.gate_proj"
,
w_ffn_gate
,
d
,
di
,
COLUMN
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".mlp.up_proj"
,
w_ffn_up
,
d
,
di
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".mlp.up_proj"
,
w_ffn_up
,
d
,
di
,
COLUMN
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".mlp.down_proj"
,
w_ffn_down
,
di
,
d
);
REGISTER_LAYER_QUANT_WEIGHT
(
"model.layers."
+
std
::
to_string
(
layer
)
+
".mlp.down_proj"
,
w_ffn_down
,
di
,
d
,
ROW
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment