patcher.py 9.38 KB
Newer Older
chenych's avatar
chenych committed
1
# Copyright 2025 the LlamaFactory team.
chenych's avatar
chenych committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from types import MethodType
chenych's avatar
chenych committed
16
from typing import TYPE_CHECKING, Any
chenych's avatar
chenych committed
17
18
19
20
21
22
23

import torch
from peft import PeftModel
from transformers import PreTrainedModel, PreTrainedTokenizerBase, is_torch_npu_available
from transformers.integrations import is_deepspeed_zero3_enabled
from transformers.modeling_utils import is_fsdp_enabled

luopl's avatar
luopl committed
24
from ..extras import logging
chenych's avatar
chenych committed
25
from ..extras.misc import infer_optim_dtype, is_env_enabled
luopl's avatar
luopl committed
26
from ..extras.packages import is_transformers_version_greater_than
chenych's avatar
chenych committed
27
28
29
from .model_utils.attention import configure_attn_implementation, print_attn_implementation
from .model_utils.checkpointing import prepare_model_for_training
from .model_utils.embedding import resize_embedding_layer
chenych's avatar
chenych committed
30
from .model_utils.kv_cache import configure_kv_cache
chenych's avatar
chenych committed
31
32
33
34
35
36
from .model_utils.longlora import configure_longlora
from .model_utils.moe import add_z3_leaf_module, configure_moe
from .model_utils.packing import configure_packing
from .model_utils.quantization import configure_quantization
from .model_utils.rope import configure_rope
from .model_utils.valuehead import prepare_valuehead_model
chenych's avatar
chenych committed
37
from .model_utils.visual import autocast_projector_dtype, configure_visual_model
chenych's avatar
chenych committed
38
39
40


if TYPE_CHECKING:
luopl's avatar
luopl committed
41
    from transformers import PretrainedConfig, PreTrainedTokenizer, ProcessorMixin
chenych's avatar
chenych committed
42
43
44
45
46
    from trl import AutoModelForCausalLMWithValueHead

    from ..hparams import ModelArguments


luopl's avatar
luopl committed
47
logger = logging.get_logger(__name__)
chenych's avatar
chenych committed
48
49


chenych's avatar
chenych committed
50
def patch_tokenizer(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> None:
chenych's avatar
chenych committed
51
52
53
    if "PreTrainedTokenizerBase" not in str(tokenizer._pad.__func__):
        tokenizer._pad = MethodType(PreTrainedTokenizerBase._pad, tokenizer)

chenych's avatar
chenych committed
54
55
    if model_args.model_max_length is not None and tokenizer.model_max_length < model_args.model_max_length:
        tokenizer.model_max_length = model_args.model_max_length  # enlarge the tokenizer max length
chenych's avatar
chenych committed
56
57
58
59
60
61
62
63
64
65
66

    if model_args.new_special_tokens is not None:
        num_added_tokens = tokenizer.add_special_tokens(
            dict(additional_special_tokens=model_args.new_special_tokens),
            replace_additional_special_tokens=False,
        )
        logger.info_rank0("Add {} to special tokens.".format(",".join(model_args.new_special_tokens)))
        if num_added_tokens > 0 and not model_args.resize_vocab:
            model_args.resize_vocab = True
            logger.warning_rank0("New tokens have been added, changed `resize_vocab` to True.")

chenych's avatar
chenych committed
67

luopl's avatar
luopl committed
68
69
70
71
72
73
def patch_processor(
    processor: "ProcessorMixin",
    tokenizer: "PreTrainedTokenizer",
    model_args: "ModelArguments",
) -> None:
    setattr(processor, "tokenizer", tokenizer)
chenych's avatar
chenych committed
74
75
76
77
78
79
80
81
    setattr(processor, "image_max_pixels", model_args.image_max_pixels)
    setattr(processor, "image_min_pixels", model_args.image_min_pixels)
    setattr(processor, "image_do_pan_and_scan", model_args.image_do_pan_and_scan)
    setattr(processor, "video_max_pixels", model_args.video_max_pixels)
    setattr(processor, "video_min_pixels", model_args.video_min_pixels)
    setattr(processor, "video_fps", model_args.video_fps)
    setattr(processor, "video_maxlen", model_args.video_maxlen)
    setattr(processor, "audio_sampling_rate", model_args.audio_sampling_rate)
chenych's avatar
chenych committed
82
    setattr(processor, "use_audio_in_video", model_args.use_audio_in_video)
luopl's avatar
luopl committed
83
84


chenych's avatar
chenych committed
85
86
87
88
def patch_config(
    config: "PretrainedConfig",
    tokenizer: "PreTrainedTokenizer",
    model_args: "ModelArguments",
chenych's avatar
chenych committed
89
    init_kwargs: dict[str, Any],
chenych's avatar
chenych committed
90
91
92
93
94
95
96
97
98
    is_trainable: bool,
) -> None:
    if model_args.compute_dtype is None:  # priority: bf16 > fp16 > fp32
        if model_args.infer_dtype != "auto" and not is_trainable:
            model_args.compute_dtype = getattr(torch, model_args.infer_dtype)
        else:
            model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))

    if is_torch_npu_available():
chenych's avatar
chenych committed
99
100
        # avoid JIT compile on NPU devices, see https://zhuanlan.zhihu.com/p/660875458
        torch.npu.set_compile_mode(jit_compile=is_env_enabled("NPU_JIT_COMPILE"))
chenych's avatar
chenych committed
101
102
103
104
105
106
107

    configure_attn_implementation(config, model_args, is_trainable)
    configure_rope(config, model_args, is_trainable)
    configure_longlora(config, model_args, is_trainable)
    configure_quantization(config, tokenizer, model_args, init_kwargs)
    configure_moe(config, model_args, is_trainable)
    configure_visual_model(config)
luopl's avatar
luopl committed
108
    configure_packing(model_args, is_trainable)
chenych's avatar
chenych committed
109
    configure_kv_cache(config, model_args, is_trainable)
chenych's avatar
chenych committed
110
111
112
113
114
115

    if getattr(config, "model_type", None) == "qwen":
        setattr(config, "use_flash_attn", model_args.flash_attn == "fa2")
        for dtype_name, dtype in [("fp16", torch.float16), ("bf16", torch.bfloat16), ("fp32", torch.float32)]:
            setattr(config, dtype_name, model_args.compute_dtype == dtype)

luopl's avatar
luopl committed
116
    if getattr(config, "model_type", None) == "minicpmo":
chenych's avatar
chenych committed
117
        setattr(config, "init_audio", True)
luopl's avatar
luopl committed
118
119
        setattr(config, "init_tts", False)

chenych's avatar
chenych committed
120
121
122
123
    # replace the top-k gating method
    if getattr(config, "model_type", None) == "kimi_vl" and is_trainable:
        setattr(config.text_config, "topk_method", "greedy")

luopl's avatar
luopl committed
124
125
126
    if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
        raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")

luopl's avatar
luopl committed
127
128
129
    if getattr(config, "model_type", None) == "internlm3" and not is_transformers_version_greater_than("4.47.1"):
        raise RuntimeError("InternLM3 model requires transformers>=4.47.1, please upgrade it.")

chenych's avatar
chenych committed
130
131
132
    # deepspeed zero3 is not compatible with low_cpu_mem_usage
    init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())

chenych's avatar
chenych committed
133
134
    # do not cast data type of the model deepspeed zero3 without qlora
    if not (is_deepspeed_zero3_enabled() and model_args.quantization_bit is None):
chenych's avatar
chenych committed
135
136
        init_kwargs["torch_dtype"] = model_args.compute_dtype

chenych's avatar
chenych committed
137
        if init_kwargs["low_cpu_mem_usage"] and not is_fsdp_enabled():  # fsdp does not need device map
chenych's avatar
chenych committed
138
            if "device_map" not in init_kwargs and model_args.device_map:
chenych's avatar
chenych committed
139
                init_kwargs["device_map"] = model_args.device_map  # device map requires low_cpu_mem_usage=True
chenych's avatar
chenych committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

            if init_kwargs.get("device_map", None) == "auto":
                init_kwargs["offload_folder"] = model_args.offload_folder


def patch_model(
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
    model_args: "ModelArguments",
    is_trainable: bool,
    add_valuehead: bool,
) -> None:
    gen_config = model.generation_config  # check and fix generation config
    if not gen_config.do_sample and (
        (gen_config.temperature is not None and gen_config.temperature != 1.0)
        or (gen_config.top_p is not None and gen_config.top_p != 1.0)
        or (gen_config.typical_p is not None and gen_config.typical_p != 1.0)
    ):
        gen_config.do_sample = True

luopl's avatar
luopl committed
160
161
162
    if getattr(model.config, "model_type", None) not in ["minicpmv", "minicpmo"] and "GenerationMixin" not in str(
        model.generate.__func__
    ):
chenych's avatar
chenych committed
163
164
165
166
167
168
169
170
171
172
        model.generate = MethodType(PreTrainedModel.generate, model)

    if add_valuehead:
        prepare_valuehead_model(model)

    if model_args.resize_vocab:
        resize_embedding_layer(model, tokenizer)

    if is_trainable:
        prepare_model_for_training(model, model_args)
luopl's avatar
luopl committed
173
        autocast_projector_dtype(model, model_args)
chenych's avatar
chenych committed
174
175
176
177
178
179
180
181
        add_z3_leaf_module(model)

    if not model_args.use_unsloth:
        print_attn_implementation(model.config)

    try:
        model.add_model_tags(["llama-factory"])
    except Exception:
luopl's avatar
luopl committed
182
        logger.warning_rank0("Cannot properly tag the model.")
chenych's avatar
chenych committed
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207


def patch_valuehead_model(model: "AutoModelForCausalLMWithValueHead") -> None:
    def tie_weights(self: "AutoModelForCausalLMWithValueHead") -> None:
        if isinstance(self.pretrained_model, PreTrainedModel):
            self.pretrained_model.tie_weights()

    def get_input_embeddings(self: "AutoModelForCausalLMWithValueHead") -> torch.nn.Module:
        if isinstance(self.pretrained_model, PreTrainedModel):
            return self.pretrained_model.get_input_embeddings()

    def get_output_embeddings(self: "AutoModelForCausalLMWithValueHead") -> torch.nn.Module:
        if isinstance(self.pretrained_model, PreTrainedModel):
            return self.pretrained_model.get_output_embeddings()

    def create_or_update_model_card(self: "AutoModelForCausalLMWithValueHead", output_dir: str) -> None:
        if isinstance(self.pretrained_model, PeftModel):
            self.pretrained_model.create_or_update_model_card(output_dir)

    ignore_modules = [name for name, _ in model.named_parameters() if "pretrained_model" in name]
    setattr(model, "_keys_to_ignore_on_save", ignore_modules)
    setattr(model, "tie_weights", MethodType(tie_weights, model))
    setattr(model, "get_input_embeddings", MethodType(get_input_embeddings, model))
    setattr(model, "get_output_embeddings", MethodType(get_output_embeddings, model))
    setattr(model, "create_or_update_model_card", MethodType(create_or_update_model_card, model))