update utils.py

f51086de · zhuwenwen · db94f061 · f51086de
Commit f51086de authored Apr 24, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 27 additions and 30 deletions

vllm/model_executor/model_loader/utils.py vllm/model_executor/model_loader/utils.py +27 -30

No files found.
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -33,52 +33,45 @@ def set_default_torch_dtype(dtype: torch.dtype):
 def is_transformers_impl_compatible(
        arch: str,
-        module: Optional["transformers.PreTrainedModel"] = None) -> bool:
+        module: Optional[transformers.PreTrainedModel] = None) -> bool:
    mod = module or getattr(transformers, arch, None)
    if mod is None:
        return False
+    if hasattr(mod, "supports_backend"):
        return mod.is_backend_compatible()
+    else:
+        return mod._supports_flex_attn
-def resolve_transformers_arch(model_config: ModelConfig,
+def resolve_transformers_fallback(model_config: ModelConfig,
                                  architectures: list[str]):
    for i, arch in enumerate(architectures):
-        if arch == "TransformersForCausalLM":
+        if arch == "TransformersModel":
            continue
-        auto_map: dict[str, str] = getattr(model_config.hf_config, "auto_map",
+        custom_module = None
-                                           None) or dict()
+        auto_map = getattr(model_config.hf_config, "auto_map", None)
-        # Make sure that config class is always initialized before model class,
+        if auto_map is not None and "AutoModel" in auto_map:
-        # otherwise the model class won't be able to access the config class,
+            custom_module = get_class_from_dynamic_module(
-        # the expected auto_map should have correct order like:
+                model_config.hf_config.auto_map["AutoModel"],
-        # "auto_map": {
+                model_config.model)
-        #     "AutoConfig": "<your-repo-name>--<config-name>",
-        #     "AutoModel": "<your-repo-name>--<config-name>",
-        #     "AutoModelFor<Task>": "<your-repo-name>--<config-name>",
-        # },
-        auto_modules = {
-            name: get_class_from_dynamic_module(module, model_config.model)
-            for name, module in sorted(auto_map.items(), key=lambda x: x[0])
-        }
-        custom_model_module = auto_modules.get("AutoModel")
        # TODO(Isotr0py): Further clean up these raises.
        # perhaps handled them in _ModelRegistry._raise_for_unsupported?
        if model_config.model_impl == ModelImpl.TRANSFORMERS:
-            if not is_transformers_impl_compatible(arch, custom_model_module):
+            if not is_transformers_impl_compatible(arch, custom_module):
                raise ValueError(
                    f"The Transformers implementation of {arch} is not "
                    "compatible with vLLM.")
-            architectures[i] = "TransformersForCausalLM"
+            architectures[i] = "TransformersModel"
        if model_config.model_impl == ModelImpl.AUTO:
-            if not is_transformers_impl_compatible(arch, custom_model_module):
+            if not is_transformers_impl_compatible(arch, custom_module):
                raise ValueError(
                    f"{arch} has no vLLM implementation and the Transformers "
-                    "implementation is not compatible with vLLM. Try setting "
+                    "implementation is not compatible with vLLM.")
-                    "VLLM_USE_V1=0.")
            logger.warning(
                "%s has no vLLM implementation, falling back to Transformers "
                "implementation. Some features may not be supported and "
                "performance may not be optimal.", arch)
-            architectures[i] = "TransformersForCausalLM"
+            architectures[i] = "TransformersModel"
    return architectures
@@ -118,6 +111,9 @@ def get_model_architecture(
                os.environ['AWQ_PAD'] = '1'
            else:
                os.environ['AWQ_PAD'] = '0'
+    else:
+        if os.getenv('LLAMA_NN') == '1': 
+            os.environ['LLAMA_NN'] = '1'
        else:
            os.environ['LLAMA_NN'] = '0'
        os.environ['LM_NN'] = '0'
@@ -141,7 +137,8 @@ def get_model_architecture(
                            for arch in architectures)
    if (not is_vllm_supported
            or model_config.model_impl == ModelImpl.TRANSFORMERS):
-        architectures = resolve_transformers_arch(model_config, architectures)
+        architectures = resolve_transformers_fallback(model_config,
+                                                      architectures)
    model_cls, arch = ModelRegistry.resolve_model_cls(architectures)
    if model_config.task == "embed":