Unverified Commit 1b914f37 authored by Daniël de Kok's avatar Daniël de Kok Committed by GitHub
Browse files

flashinfer: reminder to remove contiguous call in the future (#2685)

parent 41c26237
...@@ -55,6 +55,7 @@ def paged_attention( ...@@ -55,6 +55,7 @@ def paged_attention(
from text_generation_server.layers.attention.flashinfer import decode_state from text_generation_server.layers.attention.flashinfer import decode_state
return decode_state.get().forward( return decode_state.get().forward(
# TODO: remove `contiguous` call once https://github.com/flashinfer-ai/flashinfer/pull/553 is merged.
query.contiguous(), query.contiguous(),
paged_kv_cache=(kv_cache.key, kv_cache.value), paged_kv_cache=(kv_cache.key, kv_cache.value),
logits_soft_cap=softcap, logits_soft_cap=softcap,
...@@ -220,6 +221,7 @@ def attention( ...@@ -220,6 +221,7 @@ def attention(
softcap = 0.0 softcap = 0.0
return prefill_with_paged_kv_state.get().forward( return prefill_with_paged_kv_state.get().forward(
# TODO: remove `contiguous` call once https://github.com/flashinfer-ai/flashinfer/pull/553 is merged.
query.contiguous(), query.contiguous(),
causal=causal, causal=causal,
paged_kv_cache=(kv_cache.key, kv_cache.value), paged_kv_cache=(kv_cache.key, kv_cache.value),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment