add VLLM_USE_FLUX

f7f16f29 · zhuwenwen · 3ab7f0ef · f7f16f29
Commit f7f16f29 authored Mar 31, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 0 deletions

vllm/envs.py vllm/envs.py +5 -0

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -19,6 +19,7 @@ if TYPE_CHECKING:
    VLLM_USE_OPT_OP: bool = False
    VLLM_USE_TC_PAGED_ATTN: bool = False
    VLLM_USE_PA_PRINT_PARAM: bool = False 
+    VLLM_USE_FLUX: bool = False
    VLLM_FLASH_ATTN_VERSION: Optional[int] = None
    LOCAL_RANK: int = 0
    CUDA_VISIBLE_DEVICES: Optional[str] = None
@@ -254,6 +255,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    "VLLM_USE_PA_PRINT_PARAM":
    lambda: (os.environ.get("VLLM_USE_PA_PRINT_PARAM", "False").lower() in
             ("true", "1")),
+    
+    # If set, try to use the flux fused collective communication gemm kernels.
+    "VLLM_USE_FLUX":
+    lambda: bool(int(os.getenv("VLLM_USE_FLUX", "0"))),

    # Force vllm to use a specific flash-attention version (2 or 3), only valid
    # when using the flash-attention backend.