Commit db561fb6 authored by zhuwenwen's avatar zhuwenwen
Browse files

update cache_kernels.cu

parent afd0da21
......@@ -371,6 +371,7 @@ __global__ void read_cache_kernel(
value[tgt_value_idx] = fp8::scaled_convert<scalar_t, cache_t, kv_dt>(tgt_value, 1.0);
}
}
}
template <typename scalar_t, typename cache_t, Fp8KVCacheDataType kv_dt>
......@@ -660,6 +661,7 @@ void write_cache_multi_layers(
}
#define CALL_CONCAT_AND_CACHE_MLA(KV_T, CACHE_T, KV_DTYPE) \
vllm::concat_and_cache_mla_kernel<KV_T, CACHE_T, KV_DTYPE> \
<<<grid, block, 0, stream>>>( \
......@@ -707,7 +709,6 @@ void concat_and_cache_mla(
CALL_CONCAT_AND_CACHE_MLA);
}
namespace vllm {
template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment