parser.py 23.1 KB
Newer Older
chenych's avatar
chenych committed
1
# Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
chenych's avatar
chenych committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
18
19
import os
import sys
luopl's avatar
luopl committed
20
from pathlib import Path
shihm's avatar
uodata  
shihm committed
21
from typing import Any, Optional
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
22
23
24

import torch
import transformers
chenych's avatar
chenych committed
25
from omegaconf import OmegaConf
luopl's avatar
luopl committed
26
from transformers import HfArgumentParser
chenych's avatar
chenych committed
27
from transformers.integrations import is_deepspeed_zero3_enabled
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
28
from transformers.trainer_utils import get_last_checkpoint
chenych's avatar
chenych committed
29
from transformers.training_args import ParallelMode
luopl's avatar
luopl committed
30
from transformers.utils import is_torch_bf16_gpu_available, is_torch_npu_available
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
31

luopl's avatar
luopl committed
32
from ..extras import logging
chenych's avatar
chenych committed
33
from ..extras.constants import CHECKPOINT_NAMES, EngineName
chenych's avatar
chenych committed
34
from ..extras.misc import check_dependencies, check_version, get_current_device, is_env_enabled
shihm's avatar
uodata  
shihm committed
35
from ..extras.packages import is_mcore_adapter_available, is_transformers_version_greater_than
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
36
37
38
39
40
from .data_args import DataArguments
from .evaluation_args import EvaluationArguments
from .finetuning_args import FinetuningArguments
from .generating_args import GeneratingArguments
from .model_args import ModelArguments
luopl's avatar
luopl committed
41
from .training_args import RayArguments, TrainingArguments
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
42
43


luopl's avatar
luopl committed
44
logger = logging.get_logger(__name__)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
45
46
47
48

check_dependencies()


luopl's avatar
luopl committed
49
_TRAIN_ARGS = [ModelArguments, DataArguments, TrainingArguments, FinetuningArguments, GeneratingArguments]
chenych's avatar
chenych committed
50
_TRAIN_CLS = tuple[ModelArguments, DataArguments, TrainingArguments, FinetuningArguments, GeneratingArguments]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
51
_INFER_ARGS = [ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments]
chenych's avatar
chenych committed
52
_INFER_CLS = tuple[ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
53
_EVAL_ARGS = [ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments]
chenych's avatar
chenych committed
54
_EVAL_CLS = tuple[ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
55

shihm's avatar
uodata  
shihm committed
56
57
if is_mcore_adapter_available() and is_env_enabled("USE_MCA"):
    from mcore_adapter import TrainingArguments as McaTrainingArguments
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
58

shihm's avatar
uodata  
shihm committed
59
60
61
62
63
64
65
66
67
68
    _TRAIN_MCA_ARGS = [ModelArguments, DataArguments, McaTrainingArguments, FinetuningArguments, GeneratingArguments]
    _TRAIN_MCA_CLS = tuple[
        ModelArguments, DataArguments, McaTrainingArguments, FinetuningArguments, GeneratingArguments
    ]
else:
    _TRAIN_MCA_ARGS = []
    _TRAIN_MCA_CLS = tuple()


def read_args(args: dict[str, Any] | list[str] | None = None) -> dict[str, Any] | list[str]:
chenych's avatar
chenych committed
69
    r"""Get arguments from the command line or a config file."""
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
70
    if args is not None:
luopl's avatar
luopl committed
71
        return args
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
72

chenych's avatar
chenych committed
73
74
    if sys.argv[1].endswith(".yaml") or sys.argv[1].endswith(".yml"):
        override_config = OmegaConf.from_cli(sys.argv[2:])
chenych's avatar
chenych committed
75
        dict_config = OmegaConf.load(Path(sys.argv[1]).absolute())
chenych's avatar
chenych committed
76
77
78
        return OmegaConf.to_container(OmegaConf.merge(dict_config, override_config))
    elif sys.argv[1].endswith(".json"):
        override_config = OmegaConf.from_cli(sys.argv[2:])
chenych's avatar
chenych committed
79
        dict_config = OmegaConf.load(Path(sys.argv[1]).absolute())
chenych's avatar
chenych committed
80
        return OmegaConf.to_container(OmegaConf.merge(dict_config, override_config))
luopl's avatar
luopl committed
81
82
    else:
        return sys.argv[1:]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
83
84


luopl's avatar
luopl committed
85
def _parse_args(
shihm's avatar
uodata  
shihm committed
86
    parser: "HfArgumentParser", args: dict[str, Any] | list[str] | None = None, allow_extra_keys: bool = False
chenych's avatar
chenych committed
87
) -> tuple[Any]:
luopl's avatar
luopl committed
88
89
90
    args = read_args(args)
    if isinstance(args, dict):
        return parser.parse_dict(args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
91

luopl's avatar
luopl committed
92
93
94
    (*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(args=args, return_remaining_strings=True)

    if unknown_args and not allow_extra_keys:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
95
        print(parser.format_help())
luopl's avatar
luopl committed
96
97
        print(f"Got unknown args, potentially deprecated arguments: {unknown_args}")
        raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {unknown_args}")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
98

chenych's avatar
chenych committed
99
    return tuple(parsed_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
100
101


luopl's avatar
luopl committed
102
def _set_transformers_logging() -> None:
chenych's avatar
chenych committed
103
104
105
106
    if os.getenv("LLAMAFACTORY_VERBOSITY", "INFO") in ["DEBUG", "INFO"]:
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
107
108


chenych's avatar
chenych committed
109
110
111
112
113
114
115
116
def _set_env_vars() -> None:
    if is_torch_npu_available():
        # avoid JIT compile on NPU devices, see https://zhuanlan.zhihu.com/p/660875458
        torch.npu.set_compile_mode(jit_compile=is_env_enabled("NPU_JIT_COMPILE"))
        # avoid use fork method on NPU devices, see https://github.com/hiyouga/LLaMA-Factory/issues/7447
        os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"


chenych's avatar
chenych committed
117
118
119
120
121
def _verify_model_args(
    model_args: "ModelArguments",
    data_args: "DataArguments",
    finetuning_args: "FinetuningArguments",
) -> None:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
122
123
124
125
    if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora":
        raise ValueError("Adapter is only valid for the LoRA method.")

    if model_args.quantization_bit is not None:
shihm's avatar
uodata  
shihm committed
126
127
        if finetuning_args.finetuning_type not in ["lora", "oft"]:
            raise ValueError("Quantization is only compatible with the LoRA or OFT method.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
128

chenych's avatar
chenych committed
129
130
131
132
133
134
        if finetuning_args.pissa_init:
            raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA for a quantized model.")

        if model_args.resize_vocab:
            raise ValueError("Cannot resize embedding layers of a quantized model.")

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
135
136
137
138
139
140
        if model_args.adapter_name_or_path is not None and finetuning_args.create_new_adapter:
            raise ValueError("Cannot create new adapter upon a quantized model.")

        if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
            raise ValueError("Quantized model only accepts a single adapter. Merge them first.")

chenych's avatar
chenych committed
141
    if data_args.template == "yi" and model_args.use_fast_tokenizer:
luopl's avatar
luopl committed
142
        logger.warning_rank0("We should use slow tokenizer for the Yi models. Change `use_fast_tokenizer` to False.")
chenych's avatar
chenych committed
143
144
        model_args.use_fast_tokenizer = False

shihm's avatar
uodata  
shihm committed
145
146
147
148
149
150
151
152
    # Validate advanced training features
    if model_args.fp8 and model_args.quantization_bit is not None:
        raise ValueError("FP8 training is not compatible with quantization. Please disable one of them.")

    if model_args.fp8_enable_fsdp_float8_all_gather and not model_args.fp8:
        logger.warning_rank0("fp8_enable_fsdp_float8_all_gather requires fp8=True. Setting fp8=True.")
        model_args.fp8 = True

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
153
154
155
156

def _check_extra_dependencies(
    model_args: "ModelArguments",
    finetuning_args: "FinetuningArguments",
luopl's avatar
luopl committed
157
    training_args: Optional["TrainingArguments"] = None,
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
158
) -> None:
shihm's avatar
uodata  
shihm committed
159
160
161
    if model_args.use_kt:
        check_version("ktransformers", mandatory=True)

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
162
    if model_args.use_unsloth:
luopl's avatar
luopl committed
163
        check_version("unsloth", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
164

luopl's avatar
luopl committed
165
    if model_args.enable_liger_kernel:
luopl's avatar
luopl committed
166
        check_version("liger-kernel", mandatory=True)
luopl's avatar
luopl committed
167

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
168
    if model_args.mixture_of_depths is not None:
luopl's avatar
luopl committed
169
        check_version("mixture-of-depth>=1.1.6", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
170

chenych's avatar
chenych committed
171
    if model_args.infer_backend == EngineName.VLLM:
shihm's avatar
uodata  
shihm committed
172
        check_version("vllm>=0.4.3,<=0.11.0")
luopl's avatar
luopl committed
173
        check_version("vllm", mandatory=True)
chenych's avatar
chenych committed
174
    elif model_args.infer_backend == EngineName.SGLANG:
chenych's avatar
chenych committed
175
        check_version("sglang>=0.4.5")
chenych's avatar
chenych committed
176
        check_version("sglang", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
177
178

    if finetuning_args.use_galore:
luopl's avatar
luopl committed
179
180
181
182
        check_version("galore_torch", mandatory=True)

    if finetuning_args.use_apollo:
        check_version("apollo_torch", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
183
184

    if finetuning_args.use_badam:
luopl's avatar
luopl committed
185
        check_version("badam>=1.2.1", mandatory=True)
chenych's avatar
chenych committed
186
187

    if finetuning_args.use_adam_mini:
luopl's avatar
luopl committed
188
        check_version("adam-mini", mandatory=True)
chenych's avatar
chenych committed
189

chenych's avatar
chenych committed
190
191
192
    if finetuning_args.use_swanlab:
        check_version("swanlab", mandatory=True)

chenych's avatar
chenych committed
193
    if finetuning_args.plot_loss:
luopl's avatar
luopl committed
194
        check_version("matplotlib", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
195

chenych's avatar
chenych committed
196
197
198
    if training_args is not None:
        if training_args.deepspeed:
            # pin deepspeed version < 0.17 because of https://github.com/deepspeedai/DeepSpeed/issues/7347
shihm's avatar
uodata  
shihm committed
199
200
            check_version("deepspeed", mandatory=True)
            check_version("deepspeed>=0.10.0,<=0.16.9")
chenych's avatar
chenych committed
201
202
203
204
205

        if training_args.predict_with_generate:
            check_version("jieba", mandatory=True)
            check_version("nltk", mandatory=True)
            check_version("rouge_chinese", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
206
207


shihm's avatar
uodata  
shihm committed
208
def _parse_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
209
    parser = HfArgumentParser(_TRAIN_ARGS)
chenych's avatar
chenych committed
210
211
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
212
213


shihm's avatar
uodata  
shihm committed
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def _parse_train_mca_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_MCA_CLS:
    parser = HfArgumentParser(_TRAIN_MCA_ARGS)
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    model_args, data_args, training_args, finetuning_args, generating_args = _parse_args(
        parser, args, allow_extra_keys=allow_extra_keys
    )

    _configure_mca_training_args(training_args, data_args, finetuning_args)

    return model_args, data_args, training_args, finetuning_args, generating_args


def _configure_mca_training_args(training_args, data_args, finetuning_args) -> None:
    """Patch training args to avoid args checking errors and sync MCA settings."""
    training_args.predict_with_generate = False
    training_args.generation_max_length = data_args.cutoff_len
    training_args.generation_num_beams = 1
    training_args.use_mca = True
    finetuning_args.use_mca = True


def _parse_infer_args(args: dict[str, Any] | list[str] | None = None) -> _INFER_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
236
    parser = HfArgumentParser(_INFER_ARGS)
chenych's avatar
chenych committed
237
238
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
239
240


shihm's avatar
uodata  
shihm committed
241
def _parse_eval_args(args: dict[str, Any] | list[str] | None = None) -> _EVAL_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
242
    parser = HfArgumentParser(_EVAL_ARGS)
chenych's avatar
chenych committed
243
244
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
245
246


shihm's avatar
uodata  
shihm committed
247
def get_ray_args(args: dict[str, Any] | list[str] | None = None) -> RayArguments:
luopl's avatar
luopl committed
248
249
250
251
252
    parser = HfArgumentParser(RayArguments)
    (ray_args,) = _parse_args(parser, args, allow_extra_keys=True)
    return ray_args


shihm's avatar
uodata  
shihm committed
253
254
255
256
257
258
def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS:
    if is_env_enabled("USE_MCA"):
        model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_mca_args(args)
    else:
        model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_args(args)
        finetuning_args.use_mca = False
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
259
260
261
262
263
264

    # Setup logging
    if training_args.should_log:
        _set_transformers_logging()

    # Check arguments
chenych's avatar
chenych committed
265
266
267
268
269
270
271
272
273
    if finetuning_args.stage != "sft":
        if training_args.predict_with_generate:
            raise ValueError("`predict_with_generate` cannot be set as True except SFT.")

        if data_args.neat_packing:
            raise ValueError("`neat_packing` cannot be set as True except SFT.")

        if data_args.train_on_prompt or data_args.mask_history:
            raise ValueError("`train_on_prompt` or `mask_history` cannot be set as True except SFT.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
274
275
276
277
278
279
280

    if finetuning_args.stage == "sft" and training_args.do_predict and not training_args.predict_with_generate:
        raise ValueError("Please enable `predict_with_generate` to save model predictions.")

    if finetuning_args.stage in ["rm", "ppo"] and training_args.load_best_model_at_end:
        raise ValueError("RM and PPO stages do not support `load_best_model_at_end`.")

chenych's avatar
chenych committed
281
282
283
    if finetuning_args.stage == "ppo":
        if not training_args.do_train:
            raise ValueError("PPO training does not support evaluation, use the SFT stage to evaluate models.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
284

chenych's avatar
chenych committed
285
286
        if model_args.shift_attn:
            raise ValueError("PPO training is incompatible with S^2-Attn.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
287

shihm's avatar
uodata  
shihm committed
288
289
290
        if finetuning_args.reward_model_type == "lora" and model_args.use_kt:
            raise ValueError("KTransformers does not support lora reward model.")

chenych's avatar
chenych committed
291
292
        if finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
            raise ValueError("Unsloth does not support lora reward model.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
293

chenych's avatar
chenych committed
294
295
296
        if training_args.report_to and training_args.report_to[0] not in ["wandb", "tensorboard"]:
            raise ValueError("PPO only accepts wandb or tensorboard logger.")

shihm's avatar
uodata  
shihm committed
297
    if not model_args.use_kt and training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED:
chenych's avatar
chenych committed
298
299
300
301
        raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.")

    if training_args.deepspeed and training_args.parallel_mode != ParallelMode.DISTRIBUTED:
        raise ValueError("Please use `FORCE_TORCHRUN=1` to launch DeepSpeed training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
302
303
304
305

    if training_args.max_steps == -1 and data_args.streaming:
        raise ValueError("Please specify `max_steps` in streaming mode.")

chenych's avatar
chenych committed
306
307
308
    if training_args.do_train and data_args.dataset is None:
        raise ValueError("Please specify dataset for training.")

shihm's avatar
uodata  
shihm committed
309
    if (training_args.do_eval or training_args.do_predict or training_args.predict_with_generate) and (
chenych's avatar
chenych committed
310
311
        data_args.eval_dataset is None and data_args.val_size < 1e-6
    ):
shihm's avatar
uodata  
shihm committed
312
        raise ValueError("Please make sure eval_dataset be provided or val_size >1e-6")
chenych's avatar
chenych committed
313

luopl's avatar
luopl committed
314
315
316
317
318
319
    if training_args.predict_with_generate:
        if is_deepspeed_zero3_enabled():
            raise ValueError("`predict_with_generate` is incompatible with DeepSpeed ZeRO-3.")

        if finetuning_args.compute_accuracy:
            raise ValueError("Cannot use `predict_with_generate` and `compute_accuracy` together.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
320
321
322
323

    if training_args.do_train and model_args.quantization_device_map == "auto":
        raise ValueError("Cannot use device map for quantized models in training.")

chenych's avatar
chenych committed
324
325
    if finetuning_args.pissa_init and is_deepspeed_zero3_enabled():
        raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA in DeepSpeed ZeRO-3.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
326
327

    if finetuning_args.pure_bf16:
luopl's avatar
luopl committed
328
        if not (is_torch_bf16_gpu_available() or (is_torch_npu_available() and torch.npu.is_bf16_supported())):
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
329
330
            raise ValueError("This device does not support `pure_bf16`.")

chenych's avatar
chenych committed
331
332
        if is_deepspeed_zero3_enabled():
            raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
333

luopl's avatar
luopl committed
334
335
336
337
338
339
    if training_args.parallel_mode == ParallelMode.DISTRIBUTED:
        if finetuning_args.use_galore and finetuning_args.galore_layerwise:
            raise ValueError("Distributed training does not support layer-wise GaLore.")

        if finetuning_args.use_apollo and finetuning_args.apollo_layerwise:
            raise ValueError("Distributed training does not support layer-wise APOLLO.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
340

luopl's avatar
luopl committed
341
342
343
344
345
        if finetuning_args.use_badam:
            if finetuning_args.badam_mode == "ratio":
                raise ValueError("Radio-based BAdam does not yet support distributed training, use layer-wise BAdam.")
            elif not is_deepspeed_zero3_enabled():
                raise ValueError("Layer-wise BAdam only supports DeepSpeed ZeRO-3 training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
346

luopl's avatar
luopl committed
347
348
    if training_args.deepspeed is not None and (finetuning_args.use_galore or finetuning_args.use_apollo):
        raise ValueError("GaLore and APOLLO are incompatible with DeepSpeed yet.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
349

chenych's avatar
chenych committed
350
351
    if model_args.infer_backend != EngineName.HF:
        raise ValueError("vLLM/SGLang backend is only available for API, CLI and Web.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
352

chenych's avatar
chenych committed
353
354
355
    if model_args.use_unsloth and is_deepspeed_zero3_enabled():
        raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")

shihm's avatar
uodata  
shihm committed
356
357
358
359
360
361
    if model_args.use_kt and is_deepspeed_zero3_enabled():
        raise ValueError("KTransformers is incompatible with DeepSpeed ZeRO-3.")

    if data_args.neat_packing and is_transformers_version_greater_than("4.53.0"):
        raise ValueError("Neat packing is incompatible with transformers>=4.53.0.")

chenych's avatar
chenych committed
362
    _set_env_vars()
chenych's avatar
chenych committed
363
    _verify_model_args(model_args, data_args, finetuning_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
364
365
366
367
368
    _check_extra_dependencies(model_args, finetuning_args, training_args)

    if (
        training_args.do_train
        and finetuning_args.finetuning_type == "lora"
chenych's avatar
chenych committed
369
        and model_args.quantization_bit is None
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
370
371
372
        and model_args.resize_vocab
        and finetuning_args.additional_target is None
    ):
luopl's avatar
luopl committed
373
374
375
        logger.warning_rank0(
            "Remember to add embedding layers to `additional_target` to make the added tokens trainable."
        )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
376
377

    if training_args.do_train and model_args.quantization_bit is not None and (not model_args.upcast_layernorm):
luopl's avatar
luopl committed
378
        logger.warning_rank0("We recommend enable `upcast_layernorm` in quantized training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
379
380

    if training_args.do_train and (not training_args.fp16) and (not training_args.bf16):
luopl's avatar
luopl committed
381
        logger.warning_rank0("We recommend enable mixed precision training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
382

luopl's avatar
luopl committed
383
384
385
386
387
    if (
        training_args.do_train
        and (finetuning_args.use_galore or finetuning_args.use_apollo)
        and not finetuning_args.pure_bf16
    ):
luopl's avatar
luopl committed
388
        logger.warning_rank0(
luopl's avatar
luopl committed
389
            "Using GaLore or APOLLO with mixed precision training may significantly increases GPU memory usage."
luopl's avatar
luopl committed
390
        )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
391
392

    if (not training_args.do_train) and model_args.quantization_bit is not None:
luopl's avatar
luopl committed
393
        logger.warning_rank0("Evaluating model in 4/8-bit mode may cause lower scores.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
394
395

    if (not training_args.do_train) and finetuning_args.stage == "dpo" and finetuning_args.ref_model is None:
luopl's avatar
luopl committed
396
        logger.warning_rank0("Specify `ref_model` for computing rewards at evaluation.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
397
398

    # Post-process training arguments
chenych's avatar
chenych committed
399
400
401
402
403
404
405
406
    training_args.generation_max_length = training_args.generation_max_length or data_args.cutoff_len
    training_args.generation_num_beams = data_args.eval_num_beams or training_args.generation_num_beams
    training_args.remove_unused_columns = False  # important for multimodal dataset

    if finetuning_args.finetuning_type == "lora":
        # https://github.com/huggingface/transformers/blob/v4.50.0/src/transformers/trainer.py#L782
        training_args.label_names = training_args.label_names or ["labels"]

chenych's avatar
chenych committed
407
408
409
    if "swanlab" in training_args.report_to and finetuning_args.use_swanlab:
        training_args.report_to.remove("swanlab")

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
410
    if (
chenych's avatar
chenych committed
411
        training_args.parallel_mode == ParallelMode.DISTRIBUTED
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
412
413
414
        and training_args.ddp_find_unused_parameters is None
        and finetuning_args.finetuning_type == "lora"
    ):
chenych's avatar
chenych committed
415
        logger.info_rank0("Set `ddp_find_unused_parameters` to False in DDP training since LoRA is enabled.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
416
417
418
419
420
        training_args.ddp_find_unused_parameters = False

    if finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type in ["full", "freeze"]:
        can_resume_from_checkpoint = False
        if training_args.resume_from_checkpoint is not None:
luopl's avatar
luopl committed
421
            logger.warning_rank0("Cannot resume from checkpoint in current stage.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
422
423
424
425
426
427
428
429
430
431
432
433
            training_args.resume_from_checkpoint = None
    else:
        can_resume_from_checkpoint = True

    if (
        training_args.resume_from_checkpoint is None
        and training_args.do_train
        and os.path.isdir(training_args.output_dir)
        and not training_args.overwrite_output_dir
        and can_resume_from_checkpoint
    ):
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
chenych's avatar
chenych committed
434
435
436
        if last_checkpoint is None and any(
            os.path.isfile(os.path.join(training_args.output_dir, name)) for name in CHECKPOINT_NAMES
        ):
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
437
438
439
440
            raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.")

        if last_checkpoint is not None:
            training_args.resume_from_checkpoint = last_checkpoint
luopl's avatar
luopl committed
441
442
            logger.info_rank0(f"Resuming training from {training_args.resume_from_checkpoint}.")
            logger.info_rank0("Change `output_dir` or use `overwrite_output_dir` to avoid.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
443
444
445
446
447
448

    if (
        finetuning_args.stage in ["rm", "ppo"]
        and finetuning_args.finetuning_type == "lora"
        and training_args.resume_from_checkpoint is not None
    ):
luopl's avatar
luopl committed
449
        logger.warning_rank0(
chenych's avatar
chenych committed
450
            f"Add {training_args.resume_from_checkpoint} to `adapter_name_or_path` to resume training from checkpoint."
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
451
452
453
454
455
456
457
458
459
460
        )

    # Post-process model arguments
    if training_args.bf16 or finetuning_args.pure_bf16:
        model_args.compute_dtype = torch.bfloat16
    elif training_args.fp16:
        model_args.compute_dtype = torch.float16

    model_args.device_map = {"": get_current_device()}
    model_args.model_max_length = data_args.cutoff_len
chenych's avatar
chenych committed
461
    model_args.block_diag_attn = data_args.neat_packing
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
462
463
    data_args.packing = data_args.packing if data_args.packing is not None else finetuning_args.stage == "pt"

chenych's avatar
chenych committed
464
    # Log on each process the small summary
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
465
    logger.info(
chenych's avatar
chenych committed
466
467
468
469
        f"Process rank: {training_args.process_index}, "
        f"world size: {training_args.world_size}, device: {training_args.device}, "
        f"distributed training: {training_args.parallel_mode == ParallelMode.DISTRIBUTED}, "
        f"compute dtype: {str(model_args.compute_dtype)}"
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
470
471
472
473
474
475
    )
    transformers.set_seed(training_args.seed)

    return model_args, data_args, training_args, finetuning_args, generating_args


shihm's avatar
uodata  
shihm committed
476
def get_infer_args(args: dict[str, Any] | list[str] | None = None) -> _INFER_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
477
478
    model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args)

chenych's avatar
chenych committed
479
    # Setup logging
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
480
481
    _set_transformers_logging()

chenych's avatar
chenych committed
482
    # Check arguments
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
483
484
485
486
487
    if model_args.infer_backend == "vllm":
        if finetuning_args.stage != "sft":
            raise ValueError("vLLM engine only supports auto-regressive models.")

        if model_args.quantization_bit is not None:
chenych's avatar
chenych committed
488
            raise ValueError("vLLM engine does not support bnb quantization (GPTQ and AWQ are supported).")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
489
490
491
492

        if model_args.rope_scaling is not None:
            raise ValueError("vLLM engine does not support RoPE scaling.")

chenych's avatar
chenych committed
493
494
495
        if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
            raise ValueError("vLLM only accepts a single adapter. Merge them first.")

chenych's avatar
chenych committed
496
    _set_env_vars()
chenych's avatar
chenych committed
497
    _verify_model_args(model_args, data_args, finetuning_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
498
499
    _check_extra_dependencies(model_args, finetuning_args)

chenych's avatar
chenych committed
500
    # Post-process model arguments
chenych's avatar
chenych committed
501
502
    if model_args.export_dir is not None and model_args.export_device == "cpu":
        model_args.device_map = {"": torch.device("cpu")}
chenych's avatar
chenych committed
503
504
        if data_args.cutoff_len != DataArguments().cutoff_len:  # override cutoff_len if it is not default
            model_args.model_max_length = data_args.cutoff_len
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
505
506
507
508
509
510
    else:
        model_args.device_map = "auto"

    return model_args, data_args, finetuning_args, generating_args


shihm's avatar
uodata  
shihm committed
511
def get_eval_args(args: dict[str, Any] | list[str] | None = None) -> _EVAL_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
512
513
    model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args)

chenych's avatar
chenych committed
514
    # Setup logging
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
515
516
    _set_transformers_logging()

chenych's avatar
chenych committed
517
    # Check arguments
chenych's avatar
chenych committed
518
519
    if model_args.infer_backend != EngineName.HF:
        raise ValueError("vLLM/SGLang backend is only available for API, CLI and Web.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
520

chenych's avatar
chenych committed
521
    _set_env_vars()
chenych's avatar
chenych committed
522
    _verify_model_args(model_args, data_args, finetuning_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
523
524
525
526
527
528
529
    _check_extra_dependencies(model_args, finetuning_args)

    model_args.device_map = "auto"

    transformers.set_seed(eval_args.seed)

    return model_args, data_args, eval_args, finetuning_args