config.py 5.95 KB
Newer Older
1
import contextlib
2
from pathlib import Path
3
from typing import Any, Dict, Optional, Type, Union
Jasmond L's avatar
Jasmond L committed
4

5
from transformers import GenerationConfig, PretrainedConfig
6
7
from transformers.models.auto.image_processing_auto import (
    get_image_processor_config)
8
9
from transformers.models.auto.modeling_auto import (
    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
10

11
from vllm.envs import VLLM_USE_MODELSCOPE
12
from vllm.logger import init_logger
13
14
# yapf conflicts with isort for this block
# yapf: disable
15
from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
16
                                             EAGLEConfig, ExaoneConfig,
17
18
19
20
21
22
                                             GraniteConfig, InternVLChatConfig,
                                             JAISConfig, MedusaConfig,
                                             MLPSpeculatorConfig, MPTConfig,
                                             NemotronConfig, RWConfig,
                                             UltravoxConfig)
# yapf: enable
23
from vllm.transformers_utils.utils import check_gguf_file
24
25
26
27
28

if VLLM_USE_MODELSCOPE:
    from modelscope import AutoConfig
else:
    from transformers import AutoConfig
29

30
31
logger = init_logger(__name__)

32
_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
GoHomeToMacDonal's avatar
GoHomeToMacDonal committed
33
    "chatglm": ChatGLMConfig,
34
    "dbrx": DbrxConfig,
35
    "mpt": MPTConfig,
Zhuohan Li's avatar
Zhuohan Li committed
36
37
    "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
    "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
38
    "jais": JAISConfig,
39
    "mlp_speculator": MLPSpeculatorConfig,
40
    "medusa": MedusaConfig,
41
    "eagle": EAGLEConfig,
42
    "exaone": ExaoneConfig,
43
    "internvl_chat": InternVLChatConfig,
44
    "nemotron": NemotronConfig,
45
    "ultravox": UltravoxConfig,
46
47
48
    # Granite can be removed from here once we have upgraded to
    # transformers 4.45+
    "granite": GraniteConfig,
49
50
}

51
52
53
54
for name, cls in _CONFIG_REGISTRY.items():
    with contextlib.suppress(ValueError):
        AutoConfig.register(name, cls)

55

56
57
58
59
60
61
62
63
64
65
66
def get_config(
    model: Union[str, Path],
    trust_remote_code: bool,
    revision: Optional[str] = None,
    code_revision: Optional[str] = None,
    rope_scaling: Optional[dict] = None,
    rope_theta: Optional[float] = None,
    **kwargs,
) -> PretrainedConfig:

    # Separate model folder from file path for GGUF models
67
    is_gguf = check_gguf_file(model)
68
69
70
71
    if is_gguf:
        kwargs["gguf_file"] = Path(model).name
        model = Path(model).parent

72
73
74
75
76
77
78
    config_dict, _ = PretrainedConfig.get_config_dict(
        model, revision=revision, code_revision=code_revision, **kwargs)

    # Use custom model class if it's in our registry
    model_type = config_dict.get("model_type")
    if model_type in _CONFIG_REGISTRY:
        config_class = _CONFIG_REGISTRY[model_type]
79
80
81
        config = config_class.from_pretrained(model,
                                              revision=revision,
                                              code_revision=code_revision)
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    else:
        try:
            config = AutoConfig.from_pretrained(
                model,
                trust_remote_code=trust_remote_code,
                revision=revision,
                code_revision=code_revision,
                **kwargs)
        except ValueError as e:
            if (not trust_remote_code
                    and "requires you to execute the configuration file"
                    in str(e)):
                err_msg = (
                    "Failed to load the model config. If the model is a custom "
                    "model not yet available in the HuggingFace transformers "
                    "library, consider setting `trust_remote_code=True` in LLM "
                    "or using the `--trust-remote-code` flag in the CLI.")
                raise RuntimeError(err_msg) from e
            else:
                raise e
102
103
104
105
106
107
108
109
110

    # Special architecture mapping check for GGUF models
    if is_gguf:
        if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
            raise RuntimeError(
                f"Can't get gguf config for {config.model_type}.")
        model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
        config.update({"architectures": [model_type]})

111
112
113
114
115
116
    for key, value in [("rope_scaling", rope_scaling),
                       ("rope_theta", rope_theta)]:
        if value is not None:
            logger.info("Updating %s from %r to %r", key,
                        getattr(config, key, None), value)
            config.update({key: value})
117

118
    return config
119
120


121
122
123
124
125
def get_hf_image_processor_config(
    model: Union[str, Path],
    revision: Optional[str] = None,
    **kwargs,
) -> Dict[str, Any]:
126
127
128
    # ModelScope does not provide an interface for image_processor
    if VLLM_USE_MODELSCOPE:
        return dict()
129
    # Separate model folder from file path for GGUF models
130
    if check_gguf_file(model):
131
132
133
134
        model = Path(model).parent
    return get_image_processor_config(model, revision=revision, **kwargs)


135
136
137
138
139
140
141
142
143
144
145
def get_hf_text_config(config: PretrainedConfig):
    """Get the "sub" config relevant to llm for multi modal models.
        No op for pure text models.
    """
    if hasattr(config, "text_config"):
        # The code operates under the assumption that text_config should have
        # `num_attention_heads` (among others). Assert here to fail early
        # if transformers config doesn't align with this assumption.
        assert hasattr(config.text_config, "num_attention_heads")
        return config.text_config
    else:
146
        return config
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168


def try_get_generation_config(
    model: str,
    trust_remote_code: bool,
    revision: Optional[str] = None,
) -> Optional[GenerationConfig]:
    try:
        return GenerationConfig.from_pretrained(
            model,
            revision=revision,
        )
    except OSError:  # Not found
        try:
            config = get_config(
                model,
                trust_remote_code=trust_remote_code,
                revision=revision,
            )
            return GenerationConfig.from_model_config(config)
        except OSError:  # Not found
            return None