issue/214 - update attn and caching logics

ee59b3f5 · wooway777 · 67e8d6e9 · ee59b3f5 · ee59b3f5
Commit ee59b3f5 authored Feb 03, 2026 by wooway777
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 3 deletions

csrc/cache/kv_cache.cpp csrc/cache/kv_cache.cpp +0 -1

csrc/models/llama/llama_attention.cpp csrc/models/llama/llama_attention.cpp +0 -2

No files found.
--- a/csrc/cache/kv_cache.cpp
+++ b/csrc/cache/kv_cache.cpp
@@ -96,7 +96,6 @@ StaticKVCache::update(size_t layer_idx,
    if (device.getType() == infinicore::Device::Type::NVIDIA
        || device.getType() == infinicore::Device::Type::ILUVATAR
        || device.getType() == infinicore::Device::Type::METAX
-        || device.getType() == infinicore::Device::Type::MOORE
        || device.getType() == infinicore::Device::Type::CAMBRICON) {
        infinicore::op::kv_caching_(
            k_cache_layer,

--- a/csrc/models/llama/llama_attention.cpp
+++ b/csrc/models/llama/llama_attention.cpp
@@ -127,8 +127,6 @@ infinicore::Tensor LlamaAttention::forward_(const infinicore::Tensor &hidden_sta

    infinicore::Tensor attn_output;
    if (q_reshaped->device().getType() == infinicore::Device::Type::NVIDIA
-        || q_reshaped->device().getType() == infinicore::Device::Type::METAX
-        || q_reshaped->device().getType() == infinicore::Device::Type::MOORE
        || q_reshaped->device().getType() == infinicore::Device::Type::ILUVATAR
        || q_reshaped->device().getType() == infinicore::Device::Type::CAMBRICON) {
        attn_output = infinicore::op::flash_attention(q_reshaped, k_total, v_total, total_sequence_lengths.value(), scaling_, true);