Unverified Commit 934bebf1 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Better errors for Transformers backend missing features (#23759)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 885ca6d3
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
"""Wrapper around `transformers` models""" """Wrapper around `transformers` models"""
from collections.abc import Iterable, Mapping from collections.abc import Iterable, Mapping
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path
from typing import Literal, Optional, Union from typing import Literal, Optional, Union
import regex as re import regex as re
...@@ -60,6 +61,21 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, ...@@ -60,6 +61,21 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
logger = init_logger(__name__) logger = init_logger(__name__)
def get_feature_request_tip(
model: str,
trust_remote_code: bool,
) -> str:
hf_url = f"a discussion at https://huggingface.co/{model}/discussions/new"
gh_url = "an issue at https://github.com/huggingface/transformers/issues/new/choose"
url = hf_url if trust_remote_code else gh_url
prefix = f"Please open {url} to request support for this feature. "
if Path(model).exists():
prefix = ""
doc_url = "https://docs.vllm.ai/en/latest/models/supported_models.html#writing-custom-models"
tip = f"See {doc_url} for instructions on how to add support yourself."
return f"{prefix}{tip}"
def vllm_flash_attention_forward( def vllm_flash_attention_forward(
# Transformers args # Transformers args
module: torch.nn.Module, module: torch.nn.Module,
...@@ -480,8 +496,11 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP): ...@@ -480,8 +496,11 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP):
return return
if not self.model.supports_pp_plan: if not self.model.supports_pp_plan:
tip = get_feature_request_tip(self.model_config.model,
self.model_config.trust_remote_code)
raise ValueError( raise ValueError(
f"{type(self.model)} does not support pipeline parallel yet!") f"{type(self.model)} does not support pipeline parallel. {tip}"
)
module_lists = [] module_lists = []
module_list_idx = None module_list_idx = None
...@@ -535,8 +554,10 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP): ...@@ -535,8 +554,10 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP):
models_with_tp_plan = filter(supports_tp_plan, pretrained_models) models_with_tp_plan = filter(supports_tp_plan, pretrained_models)
if not any(models_with_tp_plan) and self.tp_size > 1: if not any(models_with_tp_plan) and self.tp_size > 1:
tip = get_feature_request_tip(self.model_config.model,
self.model_config.trust_remote_code)
raise ValueError( raise ValueError(
f"{type(self.model)} does not support tensor parallel yet!") f"{type(self.model)} does not support tensor parallel. {tip}")
def _tensor_parallel(module: nn.Module, def _tensor_parallel(module: nn.Module,
prefix: str = "", prefix: str = "",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment