sync v0.15.1

c721b814 · zhuwenwen · d53fe7e5 · c721b814 · c721b814 · d53fe7e5
Commit c721b814 authored Feb 05, 2026 by zhuwenwen
20 changed files
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -87,7 +87,6 @@ if TYPE_CHECKING:
    VLLM_HTTP_TIMEOUT_KEEP_ALIVE: int = 5  # seconds
    VLLM_PLUGINS: list[str] | None = None
    VLLM_LORA_RESOLVER_CACHE_DIR: str | None = None
-    VLLM_LORA_RESOLVER_HF_REPO_LIST: str | None = None
    # Deprecated env variables for profiling, kept for backward compatibility
    # See also vllm/config/profiler.py and `--profiler-config` argument
    VLLM_TORCH_CUDA_PROFILE: str | None = None
@@ -289,11 +288,16 @@ def use_aot_compile() -> bool:
    from vllm.model_executor.layers.batch_invariant import (
        vllm_is_batch_invariant,
    )
+    from vllm.platforms import current_platform
    from vllm.utils.torch_utils import is_torch_equal_or_newer
    default_value = (
        "1"
-        if is_torch_equal_or_newer("2.10.0.dev") and not disable_compile_cache()
+        if is_torch_equal_or_newer("2.10.0.dev")
+        and not disable_compile_cache()
+        # Disabling AOT_COMPILE for CPU
+        # See: https://github.com/vllm-project/vllm/issues/32033
+        and not current_platform.is_cpu()
        else "0"
    )
@@ -870,13 +874,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_LORA_RESOLVER_CACHE_DIR": lambda: os.getenv(
        "VLLM_LORA_RESOLVER_CACHE_DIR", None
    ),
-    # A remote HF repo(s) containing one or more LoRA adapters, which
-    # may be downloaded and leveraged as needed. Only works if plugins
-    # are enabled and VLLM_ALLOW_RUNTIME_LORA_UPDATING is enabled.
-    # Values should be comma separated.
-    "VLLM_LORA_RESOLVER_HF_REPO_LIST": lambda: os.getenv(
-        "VLLM_LORA_RESOLVER_HF_REPO_LIST", None
-    ),
    # Enables torch CUDA profiling if set to 1.
    # Deprecated, see profiler_config.
    "VLLM_TORCH_CUDA_PROFILE": lambda: os.getenv("VLLM_TORCH_CUDA_PROFILE"),
@@ -884,7 +881,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Deprecated, see profiler_config.
    "VLLM_TORCH_PROFILER_DIR": lambda: os.getenv("VLLM_TORCH_PROFILER_DIR"),
    # Enable torch profiler to record shapes if set to 1.
-    # Deprecated, see profiler_config.
+# Deprecated, see profiler_config.
    "VLLM_TORCH_PROFILER_RECORD_SHAPES": lambda: (
        os.getenv("VLLM_TORCH_PROFILER_RECORD_SHAPES")
    ),

--- a/vllm/logging_utils/__init__.py
+++ b/vllm/logging_utils/__init__.py
--- a/vllm/logging_utils/access_log_filter.py
+++ b/vllm/logging_utils/access_log_filter.py
--- a/vllm/lora/ops/triton_ops/fused_moe_lora_op.py
+++ b/vllm/lora/ops/triton_ops/fused_moe_lora_op.py
--- a/vllm/model_executor/layers/attention/mla_attention.py
+++ b/vllm/model_executor/layers/attention/mla_attention.py
--- a/vllm/model_executor/layers/fused_moe/all2all_utils.py
+++ b/vllm/model_executor/layers/fused_moe/all2all_utils.py
--- a/vllm/model_executor/layers/fused_moe/config.py
+++ b/vllm/model_executor/layers/fused_moe/config.py
--- a/vllm/model_executor/layers/fused_moe/configs/E=128,N=128,device_name=BW200.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=128,N=128,device_name=BW200.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=128,N=128,device_name=BW200_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=128,N=128,device_name=BW200_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=BW200_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=BW200_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=DCU_K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=DCU_K100_AI_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=K100_AI_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=BW200.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=BW200.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=BW200.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=BW200.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=BW200_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=BW200_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=DCU_K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=DCU_K100_AI_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=K100_AI_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=32,N=512,device_name=BW200_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=32,N=512,device_name=BW200_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=32,N=512,device_name=DCU_K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=32,N=512,device_name=DCU_K100_AI_nn.json
--- a/vllm/model_executor/layers/fused_moe/configs/E=32,N=512,device_name=K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=32,N=512,device_name=K100_AI_nn.json