issue/21 - Adjusted attn_val_buf Shape

93cd62d5 · wooway777 · 115badb9 · 93cd62d5
Commit 93cd62d5 authored Jul 31, 2025 by wooway777
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

src/models/jiuge/jiuge.cpp src/models/jiuge/jiuge.cpp +1 -1

No files found.
--- a/src/models/jiuge/jiuge.cpp
+++ b/src/models/jiuge/jiuge.cpp
@@ -181,7 +181,7 @@ void inferDeviceBatch(const JiugeMeta &meta, DeviceResource &rsrc,

    auto qk_buf = Tensor::buffer(dt_logits, {nh, max_qk_size}, rsrc.memory_pool);
    auto rearrange_q_buf = Tensor::buffer(dt_logits, {nkvh, ngroup * max_seq_len, dh}, rsrc.memory_pool);
-    auto attn_val_buf = Tensor::buffer(dt_logits, {nh, max_seq_len, dh}, rsrc.memory_pool);
+    auto attn_val_buf = Tensor::buffer(dt_logits, {nkvh, ngroup * max_seq_len, dh}, rsrc.memory_pool);

    // MLP buffers
    auto gate_buf = gate_up_buf->slice(1, 0, di);