Unverified Commit 01e59e82 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Fix CI break by express-laned PRs. (#11499)

parent 99a0704a
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional, Union
from typing import TYPE_CHECKING, Optional
import numpy as np
import torch
......@@ -10,6 +10,7 @@ import triton.language as tl
from sglang.srt.configs.model_config import AttentionArch
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.radix_attention import AttentionType
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.speculative.spec_info import SpecInput
......
......@@ -728,7 +728,10 @@ class FlashInferAttnBackend(AttentionBackend):
)
else:
causal = True
if layer.is_cross_attention or layer.attn_type == AttentionType.ENCODER_ONLY:
if (
layer.is_cross_attention
or layer.attn_type == AttentionType.ENCODER_ONLY
):
causal = False
if save_kv_cache and layer.attn_type == AttentionType.ENCODER_ONLY:
save_kv_cache = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment