Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
c3f8b79b
Commit
c3f8b79b
authored
Aug 20, 2025
by
wooway777
Browse files
issue/29 - further fixed tensor dimensions on qk_gemm and qk_softmax
parent
9038f7ab
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
2 deletions
+2
-2
src/models/jiuge/jiuge.cpp
src/models/jiuge/jiuge.cpp
+2
-2
No files found.
src/models/jiuge/jiuge.cpp
View file @
c3f8b79b
...
@@ -218,11 +218,11 @@ void inferDeviceBatch(const JiugeMeta &meta, DeviceResource &rsrc,
...
@@ -218,11 +218,11 @@ void inferDeviceBatch(const JiugeMeta &meta, DeviceResource &rsrc,
rearrange
(
kv_caches
[
req
]
->
v
[
idev
][
layer
]
->
slice
(
0
,
past_len
,
seq_len
),
v
);
rearrange
(
kv_caches
[
req
]
->
v
[
idev
][
layer
]
->
slice
(
0
,
past_len
,
seq_len
),
v
);
// qk
// qk
rearrange
(
q_rearrange
->
slice
(
2
,
0
,
seq_len
),
q
);
rearrange
(
q_rearrange
->
slice
(
2
,
0
,
seq_len
),
q
);
auto
qk_gemm
=
qk_buf
->
view
({
nkvh
,
ngroup
*
seq_len
,
total_len
});
auto
qk_gemm
=
qk_buf
->
slice
(
1
,
0
,
seq_len
*
total_len
)
->
view
({
nkvh
,
ngroup
*
seq_len
,
total_len
});
auto
k_gemm
=
kv_caches
[
req
]
->
k
[
idev
][
layer
]
->
slice
(
0
,
0
,
total_len
)
->
permute
({
1
,
2
,
0
});
auto
k_gemm
=
kv_caches
[
req
]
->
k
[
idev
][
layer
]
->
slice
(
0
,
0
,
total_len
)
->
permute
({
1
,
2
,
0
});
linear
(
qk_gemm
,
rearrange_q_buf
->
slice
(
1
,
0
,
ngroup
*
seq_len
),
k_gemm
,
1.
f
/
float
(
sqrt
(
dh
)),
0.
f
,
nullptr
,
nullptr
);
linear
(
qk_gemm
,
rearrange_q_buf
->
slice
(
1
,
0
,
ngroup
*
seq_len
),
k_gemm
,
1.
f
/
float
(
sqrt
(
dh
)),
0.
f
,
nullptr
,
nullptr
);
// softmax
// softmax
auto
qk_softmax
=
qk_buf
->
view
({
nh
,
seq_len
,
total_len
});
auto
qk_softmax
=
qk_buf
->
slice
(
1
,
0
,
seq_len
*
total_len
)
->
view
({
nh
,
seq_len
,
total_len
});
causalSoftmax
(
qk_softmax
,
qk_softmax
);
causalSoftmax
(
qk_softmax
,
qk_softmax
);
auto
v_gemm
=
kv_caches
[
req
]
->
v
[
idev
][
layer
]
->
slice
(
0
,
0
,
total_len
)
->
permute
({
1
,
0
,
2
});
auto
v_gemm
=
kv_caches
[
req
]
->
v
[
idev
][
layer
]
->
slice
(
0
,
0
,
total_len
)
->
permute
({
1
,
0
,
2
});
linear
(
attn_val_buf
->
slice
(
1
,
0
,
ngroup
*
seq_len
),
qk_gemm
,
v_gemm
,
1.
f
,
0.
f
,
nullptr
,
nullptr
);
linear
(
attn_val_buf
->
slice
(
1
,
0
,
ngroup
*
seq_len
),
qk_gemm
,
v_gemm
,
1.
f
,
0.
f
,
nullptr
,
nullptr
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment