Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
b1f6af34
Commit
b1f6af34
authored
Mar 16, 2026
by
PanZezhong
Browse files
issue/263 fix T2-1-4
parent
d1f29df0
Changes
11
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
692 additions
and
446 deletions
+692
-446
include/infinicore_infer/models/qwen3vl.h
include/infinicore_infer/models/qwen3vl.h
+35
-35
scripts/libinfinicore_infer/__init__.py
scripts/libinfinicore_infer/__init__.py
+2
-0
scripts/libinfinicore_infer/qwen3vl.py
scripts/libinfinicore_infer/qwen3vl.py
+22
-20
scripts/qwen3vl.py
scripts/qwen3vl.py
+459
-200
src/allocator.hpp
src/allocator.hpp
+1
-1
src/models/inference_context.cpp
src/models/inference_context.cpp
+12
-12
src/models/inference_context.hpp
src/models/inference_context.hpp
+1
-1
src/models/qwen3vl/qwen3vl.cpp
src/models/qwen3vl/qwen3vl.cpp
+120
-125
src/models/qwen3vl/qwen3vl_cache.cpp
src/models/qwen3vl/qwen3vl_cache.cpp
+4
-4
src/models/qwen3vl/qwen3vl_impl.hpp
src/models/qwen3vl/qwen3vl_impl.hpp
+7
-9
src/models/qwen3vl/qwen3vl_weight.cpp
src/models/qwen3vl/qwen3vl_weight.cpp
+29
-39
No files found.
include/infinicore_infer/models/qwen3vl.h
View file @
b1f6af34
...
...
@@ -46,19 +46,19 @@ typedef struct {
qwen3vl_load_layer_fn
load_attn_qkv_weight
;
qwen3vl_load_layer_fn
load_attn_qkv_bias
;
//block mlp
//
block mlp
qwen3vl_load_layer_fn
load_mlp_linear_fc1_weight
;
qwen3vl_load_layer_fn
load_mlp_linear_fc1_bias
;
qwen3vl_load_layer_fn
load_mlp_linear_fc2_weight
;
qwen3vl_load_layer_fn
load_mlp_linear_fc2_bias
;
//block norm
//
block norm
qwen3vl_load_layer_fn
load_norm1_weight
;
qwen3vl_load_layer_fn
load_norm1_bias
;
qwen3vl_load_layer_fn
load_norm2_weight
;
qwen3vl_load_layer_fn
load_norm2_bias
;
//deepstack_merger
//
deepstack_merger
qwen3vl_load_layer_fn
load_deepstack_merger_linear_fc1_weight
;
qwen3vl_load_layer_fn
load_deepstack_merger_linear_fc1_bias
;
qwen3vl_load_layer_fn
load_deepstack_merger_linear_fc2_weight
;
...
...
@@ -66,7 +66,7 @@ typedef struct {
qwen3vl_load_layer_fn
load_deepstack_merger_norm_weight
;
qwen3vl_load_layer_fn
load_deepstack_merger_norm_bias
;
//merger
//
merger
qwen3vl_load_global_fn
load_merger_linear_fc1_weight
;
qwen3vl_load_global_fn
load_merger_linear_fc1_bias
;
qwen3vl_load_global_fn
load_merger_linear_fc2_weight
;
...
...
@@ -116,7 +116,7 @@ typedef struct {
}
Qwen3vlVisMeta
;
typedef
struct
{
infiniDtype_t
dtype
;
//INFINI_DTYPE_BF16
infiniDtype_t
dtype
;
//
INFINI_DTYPE_BF16
Qwen3vlTextMeta
text_meta
;
Qwen3vlVisMeta
vis_meta
;
...
...
@@ -132,27 +132,27 @@ typedef struct {
/// @param device 协处理器种类
/// @param ndev 协处理器数量
/// @param dev_ids 协处理器编号,长度为 ndev
__C
__export
struct
Qwen3vlModel
*
__
INFINI_
C
__export
struct
Qwen3vlModel
*
createQwen3vlModel
(
const
Qwen3vlMeta
*
,
const
Qwen3vlWeights
*
);
__C
Qwen3vlWeights
*
__
INFINI_
C
Qwen3vlWeights
*
createQwen3vlWeights
(
const
Qwen3vlMeta
*
meta
,
infiniDevice_t
device
,
int
ndev
,
const
int
*
dev_ids
,
bool
transpose_weight
);
__C
__export
Qwen3vlWeightLoader
*
__
INFINI_
C
__export
Qwen3vlWeightLoader
*
createQwen3vlWeightLoader
();
/// @brief 销毁模型
__C
__export
void
destroyQwen3vlModel
(
struct
Qwen3vlModel
*
);
__
INFINI_
C
__export
void
destroyQwen3vlModel
(
struct
Qwen3vlModel
*
);
__C
__export
struct
Qwen3vlCache
*
__
INFINI_
C
__export
struct
Qwen3vlCache
*
createQwen3vlCache
(
const
struct
Qwen3vlModel
*
);
__C
__export
void
__
INFINI_
C
__export
void
dropQwen3vlCache
(
const
struct
Qwen3vlModel
*
,
struct
Qwen3vlCache
*
);
...
...
@@ -167,7 +167,7 @@ dropQwen3vlCache(const struct Qwen3vlModel *,
/// @param topk 采样 topk(1 表示贪心采样)
/// @param topp 采样 topp
/// @param output 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
inferBatchQwen3vl
(
struct
Qwen3vlModel
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
void
*
pixel_values
,
uint32_t
total_patches
,
...
...
@@ -188,7 +188,7 @@ inferBatchQwen3vl(struct Qwen3vlModel *,
/// @param req_pos 每个请求的起始位置
/// @param kv_caches 每个请求的 KV Cache
/// @param logits 输出 token 数组,每个请求一个输出,长度至少为nreq
__C
__export
void
__
INFINI_
C
__export
void
forwardBatchQwen3vl
(
struct
Qwen3vlModel
*
,
const
uint32_t
*
tokens
,
uint32_t
ntok
,
void
*
pixel_values
,
uint32_t
total_patches
,
...
...
scripts/libinfinicore_infer/__init__.py
View file @
b1f6af34
...
...
@@ -6,6 +6,7 @@ from .deepseek_v3 import (
DeepSeekV3MetaCStruct
,
DeepSeekV3WeightsCStruct
,
DeepSeekV3WeightLoaderCStruct
,
DeepSeekV3CacheCStruct
,
)
from
.qwen3vl
import
(
Qwen3vlModel
,
...
...
@@ -33,6 +34,7 @@ __all__ = [
"DeepSeekV3MetaCStruct"
,
"DeepSeekV3WeightsCStruct"
,
"DeepSeekV3WeightLoaderCStruct"
,
"DeepSeekV3CacheCStruct"
,
"Qwen3vlModel"
,
"Qwen3vlMetaCStruct"
,
"TextMetaCStruct"
,
...
...
scripts/libinfinicore_infer/qwen3vl.py
View file @
b1f6af34
...
...
@@ -226,7 +226,9 @@ class Qwen3vlModel(BaseModel):
return
self
.
lib
.
createQwen3vlWeightLoader
()
def
create_weights
(
self
,
meta
,
device_type
,
ndev
,
dev_ids
,
transpose_weight
):
return
self
.
lib
.
createQwen3vlWeights
(
meta
,
device_type
,
ndev
,
dev_ids
,
transpose_weight
)
return
self
.
lib
.
createQwen3vlWeights
(
meta
,
device_type
,
ndev
,
dev_ids
,
transpose_weight
)
def
create_model
(
self
,
meta
,
weights
):
return
self
.
lib
.
createQwen3vlModel
(
meta
,
weights
)
...
...
scripts/qwen3vl.py
View file @
b1f6af34
This diff is collapsed.
Click to expand it.
src/allocator.hpp
View file @
b1f6af34
...
...
@@ -16,7 +16,7 @@ public:
class
MemoryPool
:
public
AllocatorBase
{
public:
static
constexpr
size_t
DEFAULT_ALIGNMENT
=
256
;
static
constexpr
size_t
DEFAULT_ALIGNMENT
=
512
;
explicit
MemoryPool
(
size_t
initialSize
=
0
,
size_t
alignment
=
DEFAULT_ALIGNMENT
);
~
MemoryPool
();
...
...
src/models/inference_context.cpp
View file @
b1f6af34
...
...
@@ -43,9 +43,9 @@ void InferenceContext::conv(std::shared_ptr<Tensor> y,
size_t
n
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
y
,
x
,
w
,
bias
);
// Combine additional parameters into the key for unique identification
hash_combine
(
key
,
std
::
hash
<
void
*>
()(
pads
));
hash_combine
(
key
,
std
::
hash
<
void
*>
()(
strides
));
hash_combine
(
key
,
std
::
hash
<
void
*>
()(
dilations
));
hash_combine
(
key
,
std
::
hash
<
void
*>
()(
pads
));
hash_combine
(
key
,
std
::
hash
<
void
*>
()(
strides
));
hash_combine
(
key
,
std
::
hash
<
void
*>
()(
dilations
));
hash_combine
(
key
,
std
::
hash
<
size_t
>
()(
n
));
infiniopConvDescriptor_t
desc
;
...
...
src/models/inference_context.hpp
View file @
b1f6af34
src/models/qwen3vl/qwen3vl.cpp
View file @
b1f6af34
This diff is collapsed.
Click to expand it.
src/models/qwen3vl/qwen3vl_cache.cpp
View file @
b1f6af34
#include "qwen3vl_impl.hpp"
__C
struct
Qwen3vlCache
*
__
INFINI_
C
struct
Qwen3vlCache
*
createQwen3vlCache
(
const
struct
Qwen3vlModel
*
model
)
{
Qwen3vlCache
*
cache
=
new
Qwen3vlCache
();
auto
ndev
=
model
->
dev_resources
.
size
();
...
...
@@ -27,7 +27,7 @@ createQwen3vlCache(const struct Qwen3vlModel *model) {
//////还有visual deepstack需要cache?
__C
void
__
INFINI_
C
void
dropQwen3vlCache
(
const
struct
Qwen3vlModel
*
model
,
struct
Qwen3vlCache
*
cache
)
{
auto
ndev
=
model
->
dev_resources
.
size
();
...
...
src/models/qwen3vl/qwen3vl_impl.hpp
View file @
b1f6af34
...
...
@@ -45,7 +45,6 @@ struct MergerWeight {
std
::
shared_ptr
<
Tensor
>
norm_weight
,
norm_bias
;
};
struct
Qwen3vlVisualEncoderWeight
{
std
::
shared_ptr
<
Tensor
>
patch_embed_weight
,
patch_embed_bias
,
pos_embed_weight
;
std
::
vector
<
Qwen3vlVisBlockWeight
>
blocks
;
...
...
@@ -53,9 +52,8 @@ struct Qwen3vlVisualEncoderWeight {
std
::
shared_ptr
<
MergerWeight
>
merger
;
};
struct
Qwen3vlDeviceWeights
{
std
::
shared_ptr
<
Tensor
>
sin_table
,
cos_table
;
std
::
shared_ptr
<
Tensor
>
sin_table
,
cos_table
;
std
::
shared_ptr
<
Qwen3vlLanguageModelWeight
>
w_lang
;
std
::
shared_ptr
<
Qwen3vlVisualEncoderWeight
>
w_vis
;
infiniDevice_t
device
;
...
...
src/models/qwen3vl/qwen3vl_weight.cpp
View file @
b1f6af34
...
...
@@ -23,7 +23,7 @@ inline std::shared_ptr<Tensor> getOutEmbd(
}
inline
void
getLayerWeight
(
const
Qwen3vlMeta
*
meta
,
Qwen3vlLayerWeight
&
layer
,
int
ndev
)
{
const
Qwen3vlMeta
*
meta
,
Qwen3vlLayerWeight
&
layer
,
int
ndev
)
{
auto
nkvh
=
meta
->
text_meta
.
num_key_value_heads
;
auto
nh
=
meta
->
text_meta
.
num_attention_heads
;
auto
dh
=
meta
->
text_meta
.
head_dim
;
...
...
@@ -47,11 +47,10 @@ inline void getLayerWeight(
layer
.
mlp_down
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
down_shape
);
}
inline
void
getVisualWeight
(
const
Qwen3vlMeta
*
meta
,
std
::
shared_ptr
<
Qwen3vlVisualEncoderWeight
>
w_vis
)
{
Qwen3vlVisMeta
vis_meta
=
meta
->
vis_meta
;
auto
patch_embed_shape
=
std
::
vector
<
size_t
>
({
vis_meta
.
hidden_size
,
vis_meta
.
in_channels
,
vis_meta
.
temporal_patch_size
,
vis_meta
.
patch_size
,
vis_meta
.
patch_size
});
auto
patch_embed_shape
=
std
::
vector
<
size_t
>
({
vis_meta
.
hidden_size
,
vis_meta
.
in_channels
,
vis_meta
.
temporal_patch_size
,
vis_meta
.
patch_size
,
vis_meta
.
patch_size
});
w_vis
->
patch_embed_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
patch_embed_shape
);
w_vis
->
patch_embed_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
});
w_vis
->
pos_embed_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
num_position_embeddings
,
vis_meta
.
hidden_size
});
...
...
@@ -64,10 +63,10 @@ inline void getVisualWeight(
w_vis
->
merger
->
norm_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
});
w_vis
->
blocks
=
std
::
vector
<
Qwen3vlVisBlockWeight
>
(
vis_meta
.
depth
);
for
(
size_t
i
=
0
;
i
<
vis_meta
.
depth
;
i
++
)
{
w_vis
->
blocks
[
i
].
attn_proj_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
,
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
attn_proj_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
,
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
attn_proj_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
attn_qkv_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
in_channels
*
vis_meta
.
hidden_size
,
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
attn_qkv_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
in_channels
*
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
attn_qkv_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
in_channels
*
vis_meta
.
hidden_size
,
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
attn_qkv_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
in_channels
*
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
mlp_linear_fc1_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
,
vis_meta
.
hidden_size
});
w_vis
->
blocks
[
i
].
mlp_linear_fc1_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
});
w_vis
->
blocks
[
i
].
mlp_linear_fc2_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
,
vis_meta
.
intermediate_size
});
...
...
@@ -78,18 +77,16 @@ inline void getVisualWeight(
w_vis
->
blocks
[
i
].
norm2_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
hidden_size
});
}
w_vis
->
deepstack_mergers
=
std
::
vector
<
DeepstackMergerWeight
>
(
3
);
for
(
size_t
i
=
0
;
i
<
3
;
i
++
){
w_vis
->
deepstack_mergers
[
i
].
linear_fc1_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
,
vis_meta
.
intermediate_size
});
w_vis
->
deepstack_mergers
[
i
].
linear_fc2_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
out_hidden_size
,
vis_meta
.
intermediate_size
});
for
(
size_t
i
=
0
;
i
<
3
;
i
++
)
{
w_vis
->
deepstack_mergers
[
i
].
linear_fc1_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
,
vis_meta
.
intermediate_size
});
w_vis
->
deepstack_mergers
[
i
].
linear_fc2_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
out_hidden_size
,
vis_meta
.
intermediate_size
});
w_vis
->
deepstack_mergers
[
i
].
linear_fc1_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
});
w_vis
->
deepstack_mergers
[
i
].
linear_fc2_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
out_hidden_size
});
w_vis
->
deepstack_mergers
[
i
].
norm_weight
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
});
w_vis
->
deepstack_mergers
[
i
].
norm_bias
=
Tensor
::
weight
(
nullptr
,
meta
->
dtype
,
{
vis_meta
.
intermediate_size
});
}
}
inline
std
::
shared_ptr
<
Tensor
>
getSinTable
(
const
Qwen3vlMeta
*
meta
)
{
auto
half_dh
=
meta
->
text_meta
.
head_dim
/
2
;
auto
unit
=
dsize
(
meta
->
dtype
);
...
...
@@ -172,7 +169,6 @@ Qwen3vlWeights::Qwen3vlWeights(
}
getVisualWeight
(
meta
,
device_weights
[
dev
]
->
w_vis
);
}
}
...
...
@@ -201,8 +197,8 @@ void load_output_embd(Qwen3vlWeights *weights, void *cpu_ptr) {
auto
weight
=
weights
->
device_weights
[
dev
];
RUN_INFINI
(
infinirtSetDevice
(
weight
->
device
,
weight
->
dev_id
));
weight
->
w_lang
->
out_embd
->
load
(
cpu_ptr
,
weight
->
load_stream
);
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
out_embd
->
permute
({
1
,
0
});
//[d,voc]
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
out_embd
->
permute
({
1
,
0
});
//[d,voc]
}
}
}
...
...
@@ -239,9 +235,8 @@ void load_attn_qkv_proj(Qwen3vlWeights *weights, void *cpu_ptr, size_t layer) {
size_t
offset
=
idev
*
((
nkvh
*
2
+
nh
)
/
ndev
*
dh
)
*
d
*
dsize
(
weights
->
meta
->
dtype
);
RUN_INFINI
(
infinirtSetDevice
(
weight
->
device
,
weight
->
dev_id
));
weight
->
w_lang
->
layers
[
layer
].
attn_qkv_proj
->
load
((
char
*
)
cpu_ptr
+
offset
,
weight
->
load_stream
);
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
attn_qkv_proj
=
weight
->
w_lang
->
layers
[
layer
].
attn_qkv_proj
->
permute
({
1
,
0
});
//[d, (nh+2*nkvh)*dh]
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
attn_qkv_proj
=
weight
->
w_lang
->
layers
[
layer
].
attn_qkv_proj
->
permute
({
1
,
0
});
//[d, (nh+2*nkvh)*dh]
}
}
}
...
...
@@ -267,9 +262,8 @@ void load_attn_o_proj(Qwen3vlWeights *weights, void *cpu_ptr, size_t layer) {
size_t
offset
=
idev
*
d
*
(
nh
/
ndev
*
dh
)
*
dsize
(
weights
->
meta
->
dtype
);
RUN_INFINI
(
infinirtSetDevice
(
weight
->
device
,
weight
->
dev_id
));
weight
->
w_lang
->
layers
[
layer
].
attn_o_proj
->
load
((
char
*
)
cpu_ptr
+
offset
,
weight
->
load_stream
);
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
attn_o_proj
=
weight
->
w_lang
->
layers
[
layer
].
attn_o_proj
->
permute
({
1
,
0
});
//[nh/ndev*dh, d]
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
attn_o_proj
=
weight
->
w_lang
->
layers
[
layer
].
attn_o_proj
->
permute
({
1
,
0
});
//[nh/ndev*dh, d]
}
}
}
...
...
@@ -295,9 +289,8 @@ void load_mlp_gate_up(Qwen3vlWeights *weights, void *cpu_ptr, size_t layer) {
size_t
offset
=
idev
*
(
2
*
di
/
ndev
)
*
d
*
dsize
(
weights
->
meta
->
dtype
);
RUN_INFINI
(
infinirtSetDevice
(
weight
->
device
,
weight
->
dev_id
));
weight
->
w_lang
->
layers
[
layer
].
mlp_gate_up
->
load
((
char
*
)
cpu_ptr
+
offset
,
weight
->
load_stream
);
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
mlp_gate_up
=
weight
->
w_lang
->
layers
[
layer
].
mlp_gate_up
->
permute
({
1
,
0
});
//[d, 2*di/ndev]
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
mlp_gate_up
=
weight
->
w_lang
->
layers
[
layer
].
mlp_gate_up
->
permute
({
1
,
0
});
//[d, 2*di/ndev]
}
}
}
...
...
@@ -313,9 +306,8 @@ void load_mlp_down(Qwen3vlWeights *weights, void *cpu_ptr, size_t layer) {
size_t
offset
=
idev
*
d
*
(
di
/
ndev
)
*
dsize
(
weights
->
meta
->
dtype
);
RUN_INFINI
(
infinirtSetDevice
(
weight
->
device
,
weight
->
dev_id
));
weight
->
w_lang
->
layers
[
layer
].
mlp_down
->
load
((
char
*
)
cpu_ptr
+
offset
,
weight
->
load_stream
);
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
mlp_down
=
weight
->
w_lang
->
layers
[
layer
].
mlp_down
->
permute
({
1
,
0
});
//[di/ndev, d]
if
(
weights
->
transpose_weight
)
{
weight
->
w_lang
->
layers
[
layer
].
mlp_down
=
weight
->
w_lang
->
layers
[
layer
].
mlp_down
->
permute
({
1
,
0
});
//[di/ndev, d]
}
}
}
...
...
@@ -569,7 +561,6 @@ void load_merger_norm_bias(Qwen3vlWeights *weights, void *cpu_ptr) {
}
}
static
Qwen3vlWeightLoader
weight_loader
=
{
// Language model loaders
.
lang_loader
=
{
...
...
@@ -614,10 +605,9 @@ static Qwen3vlWeightLoader weight_loader = {
.
load_merger_linear_fc2_bias
=
load_merger_linear_fc2_bias
,
.
load_merger_norm_weight
=
load_merger_norm_weight
,
.
load_merger_norm_bias
=
load_merger_norm_bias
,
}
};
}};
__C
Qwen3vlWeights
*
__
INFINI_
C
Qwen3vlWeights
*
createQwen3vlWeights
(
const
Qwen3vlMeta
*
meta
,
infiniDevice_t
device
,
int
ndev
,
...
...
@@ -640,7 +630,7 @@ createQwen3vlWeights(const Qwen3vlMeta *meta,
return
weights
;
};
__C
Qwen3vlWeightLoader
*
__
INFINI_
C
Qwen3vlWeightLoader
*
createQwen3vlWeightLoader
()
{
return
&
weight_loader
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment