"git@developer.sourcefind.cn:chenpangpang/open-webui.git" did not exist on "4aab460905cc02bed60fdaf830c31a173f4c7a30"
Unverified Commit ef3e358a authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

remove lambda (#1056)

parent 4df62e14
...@@ -11,6 +11,8 @@ import flash_attn_2_cuda as flash_attn_cuda ...@@ -11,6 +11,8 @@ import flash_attn_2_cuda as flash_attn_cuda
# isort: on # isort: on
def maybe_contiguous(x):
return x.contiguous() if x is not None and x.stride(-1) != 1 else x
def _get_block_size_n(device, head_dim, is_dropout, is_causal): def _get_block_size_n(device, head_dim, is_dropout, is_causal):
# This should match the block sizes in the CUDA kernel # This should match the block sizes in the CUDA kernel
...@@ -46,7 +48,6 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal): ...@@ -46,7 +48,6 @@ def _get_block_size_n(device, head_dim, is_dropout, is_causal):
def _flash_attn_forward( def _flash_attn_forward(
q, k, v, dropout_p, softmax_scale, causal, window_size, softcap, alibi_slopes, return_softmax q, k, v, dropout_p, softmax_scale, causal, window_size, softcap, alibi_slopes, return_softmax
): ):
maybe_contiguous = lambda x: x.contiguous() if x.stride(-1) != 1 else x
q, k, v = [maybe_contiguous(x) for x in (q, k, v)] q, k, v = [maybe_contiguous(x) for x in (q, k, v)]
out, q, k, v, out_padded, softmax_lse, S_dmask, rng_state = flash_attn_cuda.fwd( out, q, k, v, out_padded, softmax_lse, S_dmask, rng_state = flash_attn_cuda.fwd(
q, q,
...@@ -85,7 +86,6 @@ def _flash_attn_varlen_forward( ...@@ -85,7 +86,6 @@ def _flash_attn_varlen_forward(
leftpad_k=None, leftpad_k=None,
seqused_k=None, seqused_k=None,
): ):
maybe_contiguous = lambda x: x.contiguous() if x.stride(-1) != 1 else x
q, k, v = [maybe_contiguous(x) for x in (q, k, v)] q, k, v = [maybe_contiguous(x) for x in (q, k, v)]
out, q, k, v, out_padded, softmax_lse, S_dmask, rng_state = flash_attn_cuda.varlen_fwd( out, q, k, v, out_padded, softmax_lse, S_dmask, rng_state = flash_attn_cuda.varlen_fwd(
q, q,
...@@ -134,7 +134,6 @@ def _flash_attn_backward( ...@@ -134,7 +134,6 @@ def _flash_attn_backward(
deterministic, deterministic,
rng_state=None, rng_state=None,
): ):
maybe_contiguous = lambda x: x.contiguous() if x.stride(-1) != 1 else x
# dq, dk, dv are allocated by us so they should already be contiguous # dq, dk, dv are allocated by us so they should already be contiguous
dout, q, k, v, out = [maybe_contiguous(x) for x in (dout, q, k, v, out)] dout, q, k, v, out = [maybe_contiguous(x) for x in (dout, q, k, v, out)]
( (
...@@ -189,7 +188,6 @@ def _flash_attn_varlen_backward( ...@@ -189,7 +188,6 @@ def _flash_attn_varlen_backward(
deterministic, deterministic,
rng_state=None, rng_state=None,
): ):
maybe_contiguous = lambda x: x.contiguous() if x.stride(-1) != 1 else x
# dq, dk, dv are allocated by us so they should already be contiguous # dq, dk, dv are allocated by us so they should already be contiguous
dout, q, k, v, out = [maybe_contiguous(x) for x in (dout, q, k, v, out)] dout, q, k, v, out = [maybe_contiguous(x) for x in (dout, q, k, v, out)]
( (
...@@ -1253,7 +1251,6 @@ def flash_attn_with_kvcache( ...@@ -1253,7 +1251,6 @@ def flash_attn_with_kvcache(
""" """
assert k_cache.stride(-1) == 1, "k_cache must have contiguous last dimension" assert k_cache.stride(-1) == 1, "k_cache must have contiguous last dimension"
assert v_cache.stride(-1) == 1, "v_cache must have contiguous last dimension" assert v_cache.stride(-1) == 1, "v_cache must have contiguous last dimension"
maybe_contiguous = lambda x: x.contiguous() if x is not None and x.stride(-1) != 1 else x
q, k, v = [maybe_contiguous(x) for x in (q, k, v)] q, k, v = [maybe_contiguous(x) for x in (q, k, v)]
if softmax_scale is None: if softmax_scale is None:
softmax_scale = q.shape[-1] ** (-0.5) softmax_scale = q.shape[-1] ** (-0.5)
......
...@@ -11,9 +11,10 @@ import flashattn_hopper_cuda ...@@ -11,9 +11,10 @@ import flashattn_hopper_cuda
# isort: on # isort: on
def maybe_contiguous(x):
return x.contiguous() if x is not None and x.stride(-1) != 1 else x
def _flash_attn_forward(q, k, v, softmax_scale, causal): def _flash_attn_forward(q, k, v, softmax_scale, causal):
maybe_contiguous = lambda x: x.contiguous() if x.stride(-1) != 1 else x
q, k, v = [maybe_contiguous(x) for x in (q, k, v)] q, k, v = [maybe_contiguous(x) for x in (q, k, v)]
out, q, k, v, out_padded, softmax_lse, S_dmask = flashattn_hopper_cuda.fwd( out, q, k, v, out_padded, softmax_lse, S_dmask = flashattn_hopper_cuda.fwd(
q, q,
...@@ -39,7 +40,6 @@ def _flash_attn_backward( ...@@ -39,7 +40,6 @@ def _flash_attn_backward(
softmax_scale, softmax_scale,
causal causal
): ):
maybe_contiguous = lambda x: x.contiguous() if x.stride(-1) != 1 else x
# dq, dk, dv are allocated by us so they should already be contiguous # dq, dk, dv are allocated by us so they should already be contiguous
dout, q, k, v, out = [maybe_contiguous(x) for x in (dout, q, k, v, out)] dout, q, k, v, out = [maybe_contiguous(x) for x in (dout, q, k, v, out)]
dq, dk, dv, softmax_d, = flashattn_hopper_cuda.bwd( dq, dk, dv, softmax_d, = flashattn_hopper_cuda.bwd(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment