config.py 3.95 KB
Newer Older
1
import contextlib
2
from typing import Dict, Optional, Type
Jasmond L's avatar
Jasmond L committed
3

4
from transformers import GenerationConfig, PretrainedConfig
5

6
from vllm.envs import VLLM_USE_MODELSCOPE
7
from vllm.logger import init_logger
8
from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
9
10
11
12
                                             InternVLChatConfig, JAISConfig,
                                             MedusaConfig, MLPSpeculatorConfig,
                                             MPTConfig, NemotronConfig,
                                             RWConfig)
13
14
15
16
17

if VLLM_USE_MODELSCOPE:
    from modelscope import AutoConfig
else:
    from transformers import AutoConfig
18

19
20
logger = init_logger(__name__)

21
_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
GoHomeToMacDonal's avatar
GoHomeToMacDonal committed
22
    "chatglm": ChatGLMConfig,
23
    "dbrx": DbrxConfig,
24
    "mpt": MPTConfig,
Zhuohan Li's avatar
Zhuohan Li committed
25
26
    "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
    "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
27
    "jais": JAISConfig,
28
    "mlp_speculator": MLPSpeculatorConfig,
29
    "medusa": MedusaConfig,
30
    "internvl_chat": InternVLChatConfig,
31
    "nemotron": NemotronConfig,
32
33
}

34
35
36
37
for name, cls in _CONFIG_REGISTRY.items():
    with contextlib.suppress(ValueError):
        AutoConfig.register(name, cls)

38

Jasmond L's avatar
Jasmond L committed
39
40
def get_config(model: str,
               trust_remote_code: bool,
41
               revision: Optional[str] = None,
42
               code_revision: Optional[str] = None,
43
44
               rope_scaling: Optional[dict] = None,
               rope_theta: Optional[float] = None) -> PretrainedConfig:
45
46
    try:
        config = AutoConfig.from_pretrained(
47
48
49
50
            model,
            trust_remote_code=trust_remote_code,
            revision=revision,
            code_revision=code_revision)
51
52
53
54
55
56
57
58
59
60
61
    except ValueError as e:
        if (not trust_remote_code and
                "requires you to execute the configuration file" in str(e)):
            err_msg = (
                "Failed to load the model config. If the model is a custom "
                "model not yet available in the HuggingFace transformers "
                "library, consider setting `trust_remote_code=True` in LLM "
                "or using the `--trust-remote-code` flag in the CLI.")
            raise RuntimeError(err_msg) from e
        else:
            raise e
62
63
    if config.model_type in _CONFIG_REGISTRY:
        config_class = _CONFIG_REGISTRY[config.model_type]
64
65
66
        config = config_class.from_pretrained(model,
                                              revision=revision,
                                              code_revision=code_revision)
67
68
69
70
71
72
    for key, value in [("rope_scaling", rope_scaling),
                       ("rope_theta", rope_theta)]:
        if value is not None:
            logger.info("Updating %s from %r to %r", key,
                        getattr(config, key, None), value)
            config.update({key: value})
73
    return config
74
75
76
77
78
79
80
81
82
83
84
85
86


def get_hf_text_config(config: PretrainedConfig):
    """Get the "sub" config relevant to llm for multi modal models.
        No op for pure text models.
    """
    if hasattr(config, "text_config"):
        # The code operates under the assumption that text_config should have
        # `num_attention_heads` (among others). Assert here to fail early
        # if transformers config doesn't align with this assumption.
        assert hasattr(config.text_config, "num_attention_heads")
        return config.text_config
    else:
87
        return config
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109


def try_get_generation_config(
    model: str,
    trust_remote_code: bool,
    revision: Optional[str] = None,
) -> Optional[GenerationConfig]:
    try:
        return GenerationConfig.from_pretrained(
            model,
            revision=revision,
        )
    except OSError:  # Not found
        try:
            config = get_config(
                model,
                trust_remote_code=trust_remote_code,
                revision=revision,
            )
            return GenerationConfig.from_model_config(config)
        except OSError:  # Not found
            return None