Commit f7f16f29 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_USE_FLUX

parent 3ab7f0ef
......@@ -19,6 +19,7 @@ if TYPE_CHECKING:
VLLM_USE_OPT_OP: bool = False
VLLM_USE_TC_PAGED_ATTN: bool = False
VLLM_USE_PA_PRINT_PARAM: bool = False
VLLM_USE_FLUX: bool = False
VLLM_FLASH_ATTN_VERSION: Optional[int] = None
LOCAL_RANK: int = 0
CUDA_VISIBLE_DEVICES: Optional[str] = None
......@@ -254,6 +255,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_USE_PA_PRINT_PARAM":
lambda: (os.environ.get("VLLM_USE_PA_PRINT_PARAM", "False").lower() in
("true", "1")),
# If set, try to use the flux fused collective communication gemm kernels.
"VLLM_USE_FLUX":
lambda: bool(int(os.getenv("VLLM_USE_FLUX", "0"))),
# Force vllm to use a specific flash-attention version (2 or 3), only valid
# when using the flash-attention backend.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment