# SPDX-License-Identifier: Apache-2.0 # Adapted from vllm: https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/envs.py import os from typing import TYPE_CHECKING, Any, Callable, Dict, Optional if TYPE_CHECKING: FASTVIDEO_RINGBUFFER_WARNING_INTERVAL: int = 60 FASTVIDEO_NCCL_SO_PATH: Optional[str] = None LD_LIBRARY_PATH: Optional[str] = None LOCAL_RANK: int = 0 CUDA_VISIBLE_DEVICES: Optional[str] = None FASTVIDEO_CACHE_ROOT: str = os.path.expanduser("~/.cache/fastvideo") FASTVIDEO_CONFIG_ROOT: str = os.path.expanduser("~/.config/fastvideo") FASTVIDEO_CONFIGURE_LOGGING: int = 1 FASTVIDEO_LOGGING_LEVEL: str = "INFO" FASTVIDEO_LOGGING_PREFIX: str = "" FASTVIDEO_LOGGING_CONFIG_PATH: Optional[str] = None FASTVIDEO_TRACE_FUNCTION: int = 0 FASTVIDEO_ATTENTION_BACKEND: Optional[str] = None FASTVIDEO_ATTENTION_CONFIG: Optional[str] = None FASTVIDEO_WORKER_MULTIPROC_METHOD: str = "fork" FASTVIDEO_TARGET_DEVICE: str = "cuda" MAX_JOBS: Optional[str] = None NVCC_THREADS: Optional[str] = None CMAKE_BUILD_TYPE: Optional[str] = None VERBOSE: bool = False FASTVIDEO_SERVER_DEV_MODE: bool = False def get_default_cache_root() -> str: return os.getenv( "XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache"), ) def get_default_config_root() -> str: return os.getenv( "XDG_CONFIG_HOME", os.path.join(os.path.expanduser("~"), ".config"), ) def maybe_convert_int(value: Optional[str]) -> Optional[int]: if value is None: return None return int(value) # The begin-* and end* here are used by the documentation generator # to extract the used env vars. # begin-env-vars-definition environment_variables: Dict[str, Callable[[], Any]] = { # ================== Installation Time Env Vars ================== # Target device of FastVideo, supporting [cuda (by default), # rocm, neuron, cpu, openvino] "FASTVIDEO_TARGET_DEVICE": lambda: os.getenv("FASTVIDEO_TARGET_DEVICE", "cuda"), # Maximum number of compilation jobs to run in parallel. # By default this is the number of CPUs "MAX_JOBS": lambda: os.getenv("MAX_JOBS", None), # Number of threads to use for nvcc # By default this is 1. # If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU. "NVCC_THREADS": lambda: os.getenv("NVCC_THREADS", None), # If set, fastvideo will use precompiled binaries (*.so) "FASTVIDEO_USE_PRECOMPILED": lambda: bool(os.environ.get("FASTVIDEO_USE_PRECOMPILED")) or bool( os.environ.get("FASTVIDEO_PRECOMPILED_WHEEL_LOCATION")), # CMake build type # If not set, defaults to "Debug" or "RelWithDebInfo" # Available options: "Debug", "Release", "RelWithDebInfo" "CMAKE_BUILD_TYPE": lambda: os.getenv("CMAKE_BUILD_TYPE"), # If set, fastvideo will print verbose logs during installation "VERBOSE": lambda: bool(int(os.getenv('VERBOSE', '0'))), # Root directory for FASTVIDEO configuration files # Defaults to `~/.config/fastvideo` unless `XDG_CONFIG_HOME` is set # Note that this not only affects how fastvideo finds its configuration files # during runtime, but also affects how fastvideo installs its configuration # files during **installation**. "FASTVIDEO_CONFIG_ROOT": lambda: os.path.expanduser( os.getenv( "FASTVIDEO_CONFIG_ROOT", os.path.join(get_default_config_root(), "fastvideo"), )), # ================== Runtime Env Vars ================== # Root directory for FASTVIDEO cache files # Defaults to `~/.cache/fastvideo` unless `XDG_CACHE_HOME` is set "FASTVIDEO_CACHE_ROOT": lambda: os.path.expanduser( os.getenv( "FASTVIDEO_CACHE_ROOT", os.path.join(get_default_cache_root(), "fastvideo"), )), # Interval in seconds to log a warning message when the ring buffer is full "FASTVIDEO_RINGBUFFER_WARNING_INTERVAL": lambda: int(os.environ.get("FASTVIDEO_RINGBUFFER_WARNING_INTERVAL", "60")), # Path to the NCCL library file. It is needed because nccl>=2.19 brought # by PyTorch contains a bug: https://github.com/NVIDIA/nccl/issues/1234 "FASTVIDEO_NCCL_SO_PATH": lambda: os.environ.get("FASTVIDEO_NCCL_SO_PATH", None), # when `FASTVIDEO_NCCL_SO_PATH` is not set, fastvideo will try to find the nccl # library file in the locations specified by `LD_LIBRARY_PATH` "LD_LIBRARY_PATH": lambda: os.environ.get("LD_LIBRARY_PATH", None), # Internal flag to enable Dynamo fullgraph capture "FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE": lambda: bool( os.environ.get("FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"), # local rank of the process in the distributed setting, used to determine # the GPU device id "LOCAL_RANK": lambda: int(os.environ.get("LOCAL_RANK", "0")), # used to control the visible devices in the distributed setting "CUDA_VISIBLE_DEVICES": lambda: os.environ.get("CUDA_VISIBLE_DEVICES", None), # timeout for each iteration in the engine "FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S": lambda: int(os.environ.get("FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S", "60")), # Logging configuration # If set to 0, fastvideo will not configure logging # If set to 1, fastvideo will configure logging using the default configuration # or the configuration file specified by FASTVIDEO_LOGGING_CONFIG_PATH "FASTVIDEO_CONFIGURE_LOGGING": lambda: int(os.getenv("FASTVIDEO_CONFIGURE_LOGGING", "1")), "FASTVIDEO_LOGGING_CONFIG_PATH": lambda: os.getenv("FASTVIDEO_LOGGING_CONFIG_PATH"), # this is used for configuring the default logging level "FASTVIDEO_LOGGING_LEVEL": lambda: os.getenv("FASTVIDEO_LOGGING_LEVEL", "INFO"), # if set, FASTVIDEO_LOGGING_PREFIX will be prepended to all log messages "FASTVIDEO_LOGGING_PREFIX": lambda: os.getenv("FASTVIDEO_LOGGING_PREFIX", ""), # Trace function calls # If set to 1, fastvideo will trace function calls # Useful for debugging "FASTVIDEO_TRACE_FUNCTION": lambda: int(os.getenv("FASTVIDEO_TRACE_FUNCTION", "0")), # Backend for attention computation # Available options: # - "TORCH_SDPA": use torch.nn.MultiheadAttention # - "FLASH_ATTN": use FlashAttention # - "SLIDING_TILE_ATTN" : use Sliding Tile Attention # - "SAGE_ATTN": use Sage Attention "FASTVIDEO_ATTENTION_BACKEND": lambda: os.getenv("FASTVIDEO_ATTENTION_BACKEND", None), # Path to the attention configuration file. Only used for sliding tile # attention for now. "FASTVIDEO_ATTENTION_CONFIG": lambda: (None if os.getenv("FASTVIDEO_ATTENTION_CONFIG", None) is None else os.path.expanduser(os.getenv("FASTVIDEO_ATTENTION_CONFIG", "."))), # Use dedicated multiprocess context for workers. # Both spawn and fork work "FASTVIDEO_WORKER_MULTIPROC_METHOD": lambda: os.getenv("FASTVIDEO_WORKER_MULTIPROC_METHOD", "fork"), # Enables torch profiler if set. Path to the directory where torch profiler # traces are saved. Note that it must be an absolute path. "FASTVIDEO_TORCH_PROFILER_DIR": lambda: (None if os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", None) is None else os. path.expanduser(os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", "."))), # If set, fastvideo will run in development mode, which will enable # some additional endpoints for developing and debugging, # e.g. `/reset_prefix_cache` "FASTVIDEO_SERVER_DEV_MODE": lambda: bool(int(os.getenv("FASTVIDEO_SERVER_DEV_MODE", "0"))), } # end-env-vars-definition def __getattr__(name: str): # lazy evaluation of environment variables if name in environment_variables: return environment_variables[name]() raise AttributeError(f"module {__name__!r} has no attribute {name!r}") def __dir__(): return list(environment_variables.keys())