model_args.py 13.1 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

luopl's avatar
luopl committed
18
import json
luopl's avatar
luopl committed
19
from dataclasses import asdict, dataclass, field, fields
luopl's avatar
luopl committed
20
from typing import Any, Dict, Literal, Optional, Union
chenych's avatar
chenych committed
21

luopl's avatar
luopl committed
22
import torch
luopl's avatar
luopl committed
23
from transformers.training_args import _convert_str_dict
chenych's avatar
chenych committed
24
25
26
from typing_extensions import Self


luopl's avatar
luopl committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
@dataclass
class QuantizationArguments:
    r"""
    Arguments pertaining to the quantization method.
    """

    quantization_method: Literal["bitsandbytes", "hqq", "eetq"] = field(
        default="bitsandbytes",
        metadata={"help": "Quantization method to use for on-the-fly quantization."},
    )
    quantization_bit: Optional[int] = field(
        default=None,
        metadata={"help": "The number of bits to quantize the model using on-the-fly quantization."},
    )
    quantization_type: Literal["fp4", "nf4"] = field(
        default="nf4",
        metadata={"help": "Quantization data type to use in bitsandbytes int4 training."},
    )
    double_quantization: bool = field(
        default=True,
        metadata={"help": "Whether or not to use double quantization in bitsandbytes int4 training."},
    )
    quantization_device_map: Optional[Literal["auto"]] = field(
        default=None,
        metadata={"help": "Device map used to infer the 4-bit quantized model, needs bitsandbytes>=0.43.0."},
    )


@dataclass
class ProcessorArguments:
    r"""
    Arguments pertaining to the image processor.
    """

    image_resolution: int = field(
luopl's avatar
luopl committed
62
63
        default=512 * 512,
        metadata={"help": "Keeps the number of pixels of image below this resolution."},
luopl's avatar
luopl committed
64
65
    )
    video_resolution: int = field(
luopl's avatar
luopl committed
66
67
        default=128 * 128,
        metadata={"help": "Keeps the number of pixels of video below this resolution."},
luopl's avatar
luopl committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    )
    video_fps: float = field(
        default=2.0,
        metadata={"help": "The frames to sample per second for video inputs."},
    )
    video_maxlen: int = field(
        default=64,
        metadata={"help": "The maximum number of sampled frames for video inputs."},
    )


@dataclass
class ExportArguments:
    r"""
    Arguments pertaining to the model export.
    """

    export_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Path to the directory to save the exported model."},
    )
    export_size: int = field(
        default=1,
        metadata={"help": "The file shard size (in GB) of the exported model."},
    )
    export_device: Literal["cpu", "auto"] = field(
        default="cpu",
        metadata={"help": "The device used in model export, use `auto` to accelerate exporting."},
    )
    export_quantization_bit: Optional[int] = field(
        default=None,
        metadata={"help": "The number of bits to quantize the exported model."},
    )
    export_quantization_dataset: Optional[str] = field(
        default=None,
        metadata={"help": "Path to the dataset or dataset name to use in quantizing the exported model."},
    )
    export_quantization_nsamples: int = field(
        default=128,
        metadata={"help": "The number of samples used for quantization."},
    )
    export_quantization_maxlen: int = field(
        default=1024,
        metadata={"help": "The maximum length of the model inputs used for quantization."},
    )
    export_legacy_format: bool = field(
        default=False,
        metadata={"help": "Whether or not to save the `.bin` files instead of `.safetensors`."},
    )
    export_hub_model_id: Optional[str] = field(
        default=None,
        metadata={"help": "The name of the repository if push the model to the Hugging Face hub."},
    )


@dataclass
class VllmArguments:
    r"""
    Arguments pertaining to the vLLM worker.
    """

    vllm_maxlen: int = field(
luopl's avatar
luopl committed
130
        default=4096,
luopl's avatar
luopl committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
        metadata={"help": "Maximum sequence (prompt + response) length of the vLLM engine."},
    )
    vllm_gpu_util: float = field(
        default=0.9,
        metadata={"help": "The fraction of GPU memory in (0,1) to be used for the vLLM engine."},
    )
    vllm_enforce_eager: bool = field(
        default=False,
        metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
    )
    vllm_max_lora_rank: int = field(
        default=32,
        metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
    )
luopl's avatar
luopl committed
145
146
147
148
    vllm_config: Optional[Union[dict, str]] = field(
        default=None,
        metadata={"help": "Config to initialize the vllm engine. Please use JSON strings."},
    )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
149
150
151


@dataclass
luopl's avatar
luopl committed
152
class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments, VllmArguments):
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
153
154
155
156
    r"""
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune or infer.
    """

luopl's avatar
luopl committed
157
158
    model_name_or_path: Optional[str] = field(
        default=None,
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
159
160
161
162
163
164
        metadata={
            "help": "Path to the model weight or identifier from huggingface.co/models or modelscope.cn/models."
        },
    )
    adapter_name_or_path: Optional[str] = field(
        default=None,
chenych's avatar
chenych committed
165
166
167
168
169
170
171
172
173
174
        metadata={
            "help": (
                "Path to the adapter weight or identifier from huggingface.co/models. "
                "Use commas to separate multiple adapters."
            )
        },
    )
    adapter_folder: Optional[str] = field(
        default=None,
        metadata={"help": "The folder containing the adapter weights to load."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
175
176
177
178
179
180
    )
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."},
    )
    use_fast_tokenizer: bool = field(
chenych's avatar
chenych committed
181
        default=True,
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
182
183
184
185
186
187
188
189
190
191
        metadata={"help": "Whether or not to use one of the fast tokenizer (backed by the tokenizers library)."},
    )
    resize_vocab: bool = field(
        default=False,
        metadata={"help": "Whether or not to resize the tokenizer vocab and the embedding layers."},
    )
    split_special_tokens: bool = field(
        default=False,
        metadata={"help": "Whether or not the special tokens should be split during the tokenization process."},
    )
chenych's avatar
chenych committed
192
193
194
195
    new_special_tokens: Optional[str] = field(
        default=None,
        metadata={"help": "Special tokens to be added into the tokenizer. Use commas to separate multiple tokens."},
    )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
196
197
198
199
200
201
202
203
204
205
206
207
    model_revision: str = field(
        default="main",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )
    low_cpu_mem_usage: bool = field(
        default=True,
        metadata={"help": "Whether or not to use memory-efficient model loading."},
    )
    rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
        default=None,
        metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."},
    )
chenych's avatar
chenych committed
208
209
210
    flash_attn: Literal["auto", "disabled", "sdpa", "fa2"] = field(
        default="auto",
        metadata={"help": "Enable FlashAttention for faster training and inference."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
211
212
213
214
215
216
217
218
219
220
221
222
223
    )
    shift_attn: bool = field(
        default=False,
        metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."},
    )
    mixture_of_depths: Optional[Literal["convert", "load"]] = field(
        default=None,
        metadata={"help": "Convert the model to mixture-of-depths (MoD) or load the MoD model."},
    )
    use_unsloth: bool = field(
        default=False,
        metadata={"help": "Whether or not to use unsloth's optimization for the LoRA training."},
    )
luopl's avatar
luopl committed
224
225
226
227
228
    use_unsloth_gc: bool = field(
        default=False,
        metadata={"help": "Whether or not to use unsloth's gradient checkpointing."},
    )
    enable_liger_kernel: bool = field(
chenych's avatar
chenych committed
229
        default=False,
luopl's avatar
luopl committed
230
        metadata={"help": "Whether or not to enable liger kernel for faster training."},
chenych's avatar
chenych committed
231
    )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
232
233
234
235
236
237
238
239
    moe_aux_loss_coef: Optional[float] = field(
        default=None,
        metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."},
    )
    disable_gradient_checkpointing: bool = field(
        default=False,
        metadata={"help": "Whether or not to disable gradient checkpointing."},
    )
luopl's avatar
luopl committed
240
241
242
243
    use_reentrant_gc: bool = field(
        default=True,
        metadata={"help": "Whether or not to use reentrant gradient checkpointing."},
    )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
244
245
246
247
248
249
250
251
    upcast_layernorm: bool = field(
        default=False,
        metadata={"help": "Whether or not to upcast the layernorm weights in fp32."},
    )
    upcast_lmhead_output: bool = field(
        default=False,
        metadata={"help": "Whether or not to upcast the output of lm_head in fp32."},
    )
chenych's avatar
chenych committed
252
253
254
255
    train_from_scratch: bool = field(
        default=False,
        metadata={"help": "Whether or not to randomly initialize the model weights."},
    )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
256
257
258
259
260
261
262
263
264
265
266
267
    infer_backend: Literal["huggingface", "vllm"] = field(
        default="huggingface",
        metadata={"help": "Backend engine used at inference."},
    )
    offload_folder: str = field(
        default="offload",
        metadata={"help": "Path to offload model weights."},
    )
    use_cache: bool = field(
        default=True,
        metadata={"help": "Whether or not to use KV cache in generation."},
    )
chenych's avatar
chenych committed
268
269
270
271
    infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
        default="auto",
        metadata={"help": "Data type for model weights and activations at inference."},
    )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
272
273
274
275
276
277
278
279
    hf_hub_token: Optional[str] = field(
        default=None,
        metadata={"help": "Auth token to log in with Hugging Face Hub."},
    )
    ms_hub_token: Optional[str] = field(
        default=None,
        metadata={"help": "Auth token to log in with ModelScope Hub."},
    )
luopl's avatar
luopl committed
280
281
282
283
    om_hub_token: Optional[str] = field(
        default=None,
        metadata={"help": "Auth token to log in with Modelers Hub."},
    )
luopl's avatar
luopl committed
284
285
286
    print_param_status: bool = field(
        default=False,
        metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
287
    )
luopl's avatar
luopl committed
288
289
290
291
    trust_remote_code: bool = field(
        default=False,
        metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
    )
luopl's avatar
luopl committed
292
    compute_dtype: Optional[torch.dtype] = field(
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
293
        default=None,
luopl's avatar
luopl committed
294
295
        init=False,
        metadata={"help": "Torch data type for computing model outputs, derived from `fp/bf16`. Do not specify it."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
296
    )
luopl's avatar
luopl committed
297
    device_map: Optional[Union[str, Dict[str, Any]]] = field(
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
298
        default=None,
luopl's avatar
luopl committed
299
300
        init=False,
        metadata={"help": "Device map for model placement, derived from training stage. Do not specify it."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
301
    )
luopl's avatar
luopl committed
302
    model_max_length: Optional[int] = field(
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
303
        default=None,
luopl's avatar
luopl committed
304
305
        init=False,
        metadata={"help": "The maximum input length for model, derived from `cutoff_len`. Do not specify it."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
306
    )
luopl's avatar
luopl committed
307
    block_diag_attn: bool = field(
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
308
        default=False,
luopl's avatar
luopl committed
309
310
        init=False,
        metadata={"help": "Whether use block diag attention or not, derived from `neat_packing`. Do not specify it."},
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
311
312
313
    )

    def __post_init__(self):
luopl's avatar
luopl committed
314
315
        if self.model_name_or_path is None:
            raise ValueError("Please provide `model_name_or_path`.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
316
317
318
319
320
321
322

        if self.split_special_tokens and self.use_fast_tokenizer:
            raise ValueError("`split_special_tokens` is only supported for slow tokenizers.")

        if self.adapter_name_or_path is not None:  # support merging multiple lora weights
            self.adapter_name_or_path = [path.strip() for path in self.adapter_name_or_path.split(",")]

chenych's avatar
chenych committed
323
324
        if self.new_special_tokens is not None:  # support multiple special tokens
            self.new_special_tokens = [token.strip() for token in self.new_special_tokens.split(",")]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
325
326
327
328

        if self.export_quantization_bit is not None and self.export_quantization_dataset is None:
            raise ValueError("Quantization dataset is necessary for exporting.")

luopl's avatar
luopl committed
329
330
331
        if isinstance(self.vllm_config, str) and self.vllm_config.startswith("{"):
            self.vllm_config = _convert_str_dict(json.loads(self.vllm_config))

chenych's avatar
chenych committed
332
    @classmethod
luopl's avatar
luopl committed
333
334
335
336
337
338
339
340
341
342
343
344
345
346
    def copyfrom(cls, source: "Self", **kwargs) -> "Self":
        init_args, lazy_args = {}, {}
        for attr in fields(source):
            if attr.init:
                init_args[attr.name] = getattr(source, attr.name)
            else:
                lazy_args[attr.name] = getattr(source, attr.name)

        init_args.update(kwargs)
        result = cls(**init_args)
        for name, value in lazy_args.items():
            setattr(result, name, value)

        return result
luopl's avatar
luopl committed
347
348
349
350
351

    def to_dict(self) -> Dict[str, Any]:
        args = asdict(self)
        args = {k: f"<{k.upper()}>" if k.endswith("token") else v for k, v in args.items()}
        return args