"vllm/vscode:/vscode.git/clone" did not exist on "04bf5a35fa2692aa75e0442791849dd976014ce8"
Commit f7f16f29 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_USE_FLUX

parent 3ab7f0ef
...@@ -19,6 +19,7 @@ if TYPE_CHECKING: ...@@ -19,6 +19,7 @@ if TYPE_CHECKING:
VLLM_USE_OPT_OP: bool = False VLLM_USE_OPT_OP: bool = False
VLLM_USE_TC_PAGED_ATTN: bool = False VLLM_USE_TC_PAGED_ATTN: bool = False
VLLM_USE_PA_PRINT_PARAM: bool = False VLLM_USE_PA_PRINT_PARAM: bool = False
VLLM_USE_FLUX: bool = False
VLLM_FLASH_ATTN_VERSION: Optional[int] = None VLLM_FLASH_ATTN_VERSION: Optional[int] = None
LOCAL_RANK: int = 0 LOCAL_RANK: int = 0
CUDA_VISIBLE_DEVICES: Optional[str] = None CUDA_VISIBLE_DEVICES: Optional[str] = None
...@@ -254,6 +255,10 @@ environment_variables: Dict[str, Callable[[], Any]] = { ...@@ -254,6 +255,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_USE_PA_PRINT_PARAM": "VLLM_USE_PA_PRINT_PARAM":
lambda: (os.environ.get("VLLM_USE_PA_PRINT_PARAM", "False").lower() in lambda: (os.environ.get("VLLM_USE_PA_PRINT_PARAM", "False").lower() in
("true", "1")), ("true", "1")),
# If set, try to use the flux fused collective communication gemm kernels.
"VLLM_USE_FLUX":
lambda: bool(int(os.getenv("VLLM_USE_FLUX", "0"))),
# Force vllm to use a specific flash-attention version (2 or 3), only valid # Force vllm to use a specific flash-attention version (2 or 3), only valid
# when using the flash-attention backend. # when using the flash-attention backend.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment