[bug] fix mismatched shape for decoder output tensor (#517)

0d2a151e · akhoroshev · GitHub · 169d088a · 0d2a151e
Unverified Commit 0d2a151e authored Oct 11, 2023 by akhoroshev Committed by GitHub Oct 11, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

src/turbomind/models/llama/LlamaV2.cc src/turbomind/models/llama/LlamaV2.cc +1 -1

No files found.
--- a/src/turbomind/models/llama/LlamaV2.cc
+++ b/src/turbomind/models/llama/LlamaV2.cc
@@ -256,7 +256,7 @@ void LlamaV2<T>::contextDecode(T*         deocder_output,
    };
    std::unordered_map<std::string, Tensor> decoder_output_tensors{
-        {"decoder_output", {MEMORY_GPU, dtype, {bsz, max_input_len, hidden_units_}, context_decoder_output_buf}},
+        {"decoder_output", {MEMORY_GPU, dtype, {token_num, hidden_units_}, context_decoder_output_buf}},
        {"key_cache", {MEMORY_GPU, TYPE_UINT64, {bsz}, k_cache_ptr}},
        {"value_cache", {MEMORY_GPU, TYPE_UINT64, {bsz}, v_cache_ptr}},
        {"last_token_hidden_units", {MEMORY_GPU, dtype, {bsz, hidden_units_}, deocder_output}}};