parser.py 20.6 KB
Newer Older
chenych's avatar
chenych committed
1
# Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
chenych's avatar
chenych committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
18
19
import os
import sys
luopl's avatar
luopl committed
20
from pathlib import Path
chenych's avatar
chenych committed
21
from typing import Any, Optional, Union
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
22
23
24

import torch
import transformers
chenych's avatar
chenych committed
25
from omegaconf import OmegaConf
luopl's avatar
luopl committed
26
from transformers import HfArgumentParser
chenych's avatar
chenych committed
27
from transformers.integrations import is_deepspeed_zero3_enabled
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
28
from transformers.trainer_utils import get_last_checkpoint
chenych's avatar
chenych committed
29
from transformers.training_args import ParallelMode
luopl's avatar
luopl committed
30
from transformers.utils import is_torch_bf16_gpu_available, is_torch_npu_available
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
31

luopl's avatar
luopl committed
32
from ..extras import logging
chenych's avatar
chenych committed
33
from ..extras.constants import CHECKPOINT_NAMES, EngineName
chenych's avatar
chenych committed
34
from ..extras.misc import check_dependencies, check_version, get_current_device, is_env_enabled
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
35
36
37
38
39
from .data_args import DataArguments
from .evaluation_args import EvaluationArguments
from .finetuning_args import FinetuningArguments
from .generating_args import GeneratingArguments
from .model_args import ModelArguments
luopl's avatar
luopl committed
40
from .training_args import RayArguments, TrainingArguments
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
41
42


luopl's avatar
luopl committed
43
logger = logging.get_logger(__name__)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
44
45
46
47

check_dependencies()


luopl's avatar
luopl committed
48
_TRAIN_ARGS = [ModelArguments, DataArguments, TrainingArguments, FinetuningArguments, GeneratingArguments]
chenych's avatar
chenych committed
49
_TRAIN_CLS = tuple[ModelArguments, DataArguments, TrainingArguments, FinetuningArguments, GeneratingArguments]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
50
_INFER_ARGS = [ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments]
chenych's avatar
chenych committed
51
_INFER_CLS = tuple[ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
52
_EVAL_ARGS = [ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments]
chenych's avatar
chenych committed
53
_EVAL_CLS = tuple[ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
54
55


chenych's avatar
chenych committed
56
57
def read_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> Union[dict[str, Any], list[str]]:
    r"""Get arguments from the command line or a config file."""
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
58
    if args is not None:
luopl's avatar
luopl committed
59
        return args
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
60

chenych's avatar
chenych committed
61
62
    if sys.argv[1].endswith(".yaml") or sys.argv[1].endswith(".yml"):
        override_config = OmegaConf.from_cli(sys.argv[2:])
chenych's avatar
chenych committed
63
        dict_config = OmegaConf.load(Path(sys.argv[1]).absolute())
chenych's avatar
chenych committed
64
65
66
        return OmegaConf.to_container(OmegaConf.merge(dict_config, override_config))
    elif sys.argv[1].endswith(".json"):
        override_config = OmegaConf.from_cli(sys.argv[2:])
chenych's avatar
chenych committed
67
        dict_config = OmegaConf.load(Path(sys.argv[1]).absolute())
chenych's avatar
chenych committed
68
        return OmegaConf.to_container(OmegaConf.merge(dict_config, override_config))
luopl's avatar
luopl committed
69
70
    else:
        return sys.argv[1:]
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
71
72


luopl's avatar
luopl committed
73
def _parse_args(
chenych's avatar
chenych committed
74
75
    parser: "HfArgumentParser", args: Optional[Union[dict[str, Any], list[str]]] = None, allow_extra_keys: bool = False
) -> tuple[Any]:
luopl's avatar
luopl committed
76
77
78
    args = read_args(args)
    if isinstance(args, dict):
        return parser.parse_dict(args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
79

luopl's avatar
luopl committed
80
81
82
    (*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(args=args, return_remaining_strings=True)

    if unknown_args and not allow_extra_keys:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
83
        print(parser.format_help())
luopl's avatar
luopl committed
84
85
        print(f"Got unknown args, potentially deprecated arguments: {unknown_args}")
        raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {unknown_args}")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
86

chenych's avatar
chenych committed
87
    return tuple(parsed_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
88
89


luopl's avatar
luopl committed
90
def _set_transformers_logging() -> None:
chenych's avatar
chenych committed
91
92
93
94
    if os.getenv("LLAMAFACTORY_VERBOSITY", "INFO") in ["DEBUG", "INFO"]:
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
95
96


chenych's avatar
chenych committed
97
98
99
100
101
102
103
104
def _set_env_vars() -> None:
    if is_torch_npu_available():
        # avoid JIT compile on NPU devices, see https://zhuanlan.zhihu.com/p/660875458
        torch.npu.set_compile_mode(jit_compile=is_env_enabled("NPU_JIT_COMPILE"))
        # avoid use fork method on NPU devices, see https://github.com/hiyouga/LLaMA-Factory/issues/7447
        os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"


chenych's avatar
chenych committed
105
106
107
108
109
def _verify_model_args(
    model_args: "ModelArguments",
    data_args: "DataArguments",
    finetuning_args: "FinetuningArguments",
) -> None:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
110
111
112
113
114
115
116
    if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora":
        raise ValueError("Adapter is only valid for the LoRA method.")

    if model_args.quantization_bit is not None:
        if finetuning_args.finetuning_type != "lora":
            raise ValueError("Quantization is only compatible with the LoRA method.")

chenych's avatar
chenych committed
117
118
119
120
121
122
        if finetuning_args.pissa_init:
            raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA for a quantized model.")

        if model_args.resize_vocab:
            raise ValueError("Cannot resize embedding layers of a quantized model.")

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
123
124
125
126
127
128
        if model_args.adapter_name_or_path is not None and finetuning_args.create_new_adapter:
            raise ValueError("Cannot create new adapter upon a quantized model.")

        if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
            raise ValueError("Quantized model only accepts a single adapter. Merge them first.")

chenych's avatar
chenych committed
129
    if data_args.template == "yi" and model_args.use_fast_tokenizer:
luopl's avatar
luopl committed
130
        logger.warning_rank0("We should use slow tokenizer for the Yi models. Change `use_fast_tokenizer` to False.")
chenych's avatar
chenych committed
131
132
        model_args.use_fast_tokenizer = False

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
133
134
135
136

def _check_extra_dependencies(
    model_args: "ModelArguments",
    finetuning_args: "FinetuningArguments",
luopl's avatar
luopl committed
137
    training_args: Optional["TrainingArguments"] = None,
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
138
139
) -> None:
    if model_args.use_unsloth:
luopl's avatar
luopl committed
140
        check_version("unsloth", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
141

luopl's avatar
luopl committed
142
    if model_args.enable_liger_kernel:
luopl's avatar
luopl committed
143
        check_version("liger-kernel", mandatory=True)
luopl's avatar
luopl committed
144

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
145
    if model_args.mixture_of_depths is not None:
luopl's avatar
luopl committed
146
        check_version("mixture-of-depth>=1.1.6", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
147

chenych's avatar
chenych committed
148
    if model_args.infer_backend == EngineName.VLLM:
chenych's avatar
chenych committed
149
        check_version("vllm>=0.4.3,<=0.9.1")
luopl's avatar
luopl committed
150
        check_version("vllm", mandatory=True)
chenych's avatar
chenych committed
151
    elif model_args.infer_backend == EngineName.SGLANG:
chenych's avatar
chenych committed
152
        check_version("sglang>=0.4.5")
chenych's avatar
chenych committed
153
        check_version("sglang", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
154
155

    if finetuning_args.use_galore:
luopl's avatar
luopl committed
156
157
158
159
        check_version("galore_torch", mandatory=True)

    if finetuning_args.use_apollo:
        check_version("apollo_torch", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
160
161

    if finetuning_args.use_badam:
luopl's avatar
luopl committed
162
        check_version("badam>=1.2.1", mandatory=True)
chenych's avatar
chenych committed
163
164

    if finetuning_args.use_adam_mini:
luopl's avatar
luopl committed
165
        check_version("adam-mini", mandatory=True)
chenych's avatar
chenych committed
166

chenych's avatar
chenych committed
167
168
169
    if finetuning_args.use_swanlab:
        check_version("swanlab", mandatory=True)

chenych's avatar
chenych committed
170
    if finetuning_args.plot_loss:
luopl's avatar
luopl committed
171
        check_version("matplotlib", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
172

chenych's avatar
chenych committed
173
174
175
176
177
178
179
180
181
    if training_args is not None:
        if training_args.deepspeed:
            # pin deepspeed version < 0.17 because of https://github.com/deepspeedai/DeepSpeed/issues/7347
            check_version("deepspeed>=0.10.0,<=0.16.9", mandatory=True)

        if training_args.predict_with_generate:
            check_version("jieba", mandatory=True)
            check_version("nltk", mandatory=True)
            check_version("rouge_chinese", mandatory=True)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
182
183


chenych's avatar
chenych committed
184
def _parse_train_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _TRAIN_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
185
    parser = HfArgumentParser(_TRAIN_ARGS)
chenych's avatar
chenych committed
186
187
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
188
189


chenych's avatar
chenych committed
190
def _parse_infer_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _INFER_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
191
    parser = HfArgumentParser(_INFER_ARGS)
chenych's avatar
chenych committed
192
193
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
194
195


chenych's avatar
chenych committed
196
def _parse_eval_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _EVAL_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
197
    parser = HfArgumentParser(_EVAL_ARGS)
chenych's avatar
chenych committed
198
199
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
200
201


chenych's avatar
chenych committed
202
def get_ray_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> RayArguments:
luopl's avatar
luopl committed
203
204
205
206
207
    parser = HfArgumentParser(RayArguments)
    (ray_args,) = _parse_args(parser, args, allow_extra_keys=True)
    return ray_args


chenych's avatar
chenych committed
208
def get_train_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _TRAIN_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
209
210
211
212
213
214
215
    model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_args(args)

    # Setup logging
    if training_args.should_log:
        _set_transformers_logging()

    # Check arguments
chenych's avatar
chenych committed
216
217
218
219
220
221
222
223
224
    if finetuning_args.stage != "sft":
        if training_args.predict_with_generate:
            raise ValueError("`predict_with_generate` cannot be set as True except SFT.")

        if data_args.neat_packing:
            raise ValueError("`neat_packing` cannot be set as True except SFT.")

        if data_args.train_on_prompt or data_args.mask_history:
            raise ValueError("`train_on_prompt` or `mask_history` cannot be set as True except SFT.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
225
226
227
228
229
230
231

    if finetuning_args.stage == "sft" and training_args.do_predict and not training_args.predict_with_generate:
        raise ValueError("Please enable `predict_with_generate` to save model predictions.")

    if finetuning_args.stage in ["rm", "ppo"] and training_args.load_best_model_at_end:
        raise ValueError("RM and PPO stages do not support `load_best_model_at_end`.")

chenych's avatar
chenych committed
232
233
234
    if finetuning_args.stage == "ppo":
        if not training_args.do_train:
            raise ValueError("PPO training does not support evaluation, use the SFT stage to evaluate models.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
235

chenych's avatar
chenych committed
236
237
        if model_args.shift_attn:
            raise ValueError("PPO training is incompatible with S^2-Attn.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
238

chenych's avatar
chenych committed
239
240
        if finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
            raise ValueError("Unsloth does not support lora reward model.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
241

chenych's avatar
chenych committed
242
243
244
245
246
247
248
249
        if training_args.report_to and training_args.report_to[0] not in ["wandb", "tensorboard"]:
            raise ValueError("PPO only accepts wandb or tensorboard logger.")

    if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED:
        raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.")

    if training_args.deepspeed and training_args.parallel_mode != ParallelMode.DISTRIBUTED:
        raise ValueError("Please use `FORCE_TORCHRUN=1` to launch DeepSpeed training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
250
251
252
253

    if training_args.max_steps == -1 and data_args.streaming:
        raise ValueError("Please specify `max_steps` in streaming mode.")

chenych's avatar
chenych committed
254
255
256
257
258
259
260
261
    if training_args.do_train and data_args.dataset is None:
        raise ValueError("Please specify dataset for training.")

    if (training_args.do_eval or training_args.do_predict) and (
        data_args.eval_dataset is None and data_args.val_size < 1e-6
    ):
        raise ValueError("Please specify dataset for evaluation.")

luopl's avatar
luopl committed
262
263
264
265
266
267
    if training_args.predict_with_generate:
        if is_deepspeed_zero3_enabled():
            raise ValueError("`predict_with_generate` is incompatible with DeepSpeed ZeRO-3.")

        if data_args.eval_dataset is None:
            raise ValueError("Cannot use `predict_with_generate` if `eval_dataset` is None.")
chenych's avatar
chenych committed
268

luopl's avatar
luopl committed
269
270
        if finetuning_args.compute_accuracy:
            raise ValueError("Cannot use `predict_with_generate` and `compute_accuracy` together.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
271
272
273
274

    if training_args.do_train and model_args.quantization_device_map == "auto":
        raise ValueError("Cannot use device map for quantized models in training.")

chenych's avatar
chenych committed
275
276
    if finetuning_args.pissa_init and is_deepspeed_zero3_enabled():
        raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA in DeepSpeed ZeRO-3.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
277
278

    if finetuning_args.pure_bf16:
luopl's avatar
luopl committed
279
        if not (is_torch_bf16_gpu_available() or (is_torch_npu_available() and torch.npu.is_bf16_supported())):
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
280
281
            raise ValueError("This device does not support `pure_bf16`.")

chenych's avatar
chenych committed
282
283
        if is_deepspeed_zero3_enabled():
            raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
284

luopl's avatar
luopl committed
285
286
287
288
289
290
    if training_args.parallel_mode == ParallelMode.DISTRIBUTED:
        if finetuning_args.use_galore and finetuning_args.galore_layerwise:
            raise ValueError("Distributed training does not support layer-wise GaLore.")

        if finetuning_args.use_apollo and finetuning_args.apollo_layerwise:
            raise ValueError("Distributed training does not support layer-wise APOLLO.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
291

luopl's avatar
luopl committed
292
293
294
295
296
        if finetuning_args.use_badam:
            if finetuning_args.badam_mode == "ratio":
                raise ValueError("Radio-based BAdam does not yet support distributed training, use layer-wise BAdam.")
            elif not is_deepspeed_zero3_enabled():
                raise ValueError("Layer-wise BAdam only supports DeepSpeed ZeRO-3 training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
297

luopl's avatar
luopl committed
298
299
    if training_args.deepspeed is not None and (finetuning_args.use_galore or finetuning_args.use_apollo):
        raise ValueError("GaLore and APOLLO are incompatible with DeepSpeed yet.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
300

chenych's avatar
chenych committed
301
302
    if model_args.infer_backend != EngineName.HF:
        raise ValueError("vLLM/SGLang backend is only available for API, CLI and Web.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
303

chenych's avatar
chenych committed
304
305
306
    if model_args.use_unsloth and is_deepspeed_zero3_enabled():
        raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")

chenych's avatar
chenych committed
307
    _set_env_vars()
chenych's avatar
chenych committed
308
    _verify_model_args(model_args, data_args, finetuning_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
309
310
311
312
313
    _check_extra_dependencies(model_args, finetuning_args, training_args)

    if (
        training_args.do_train
        and finetuning_args.finetuning_type == "lora"
chenych's avatar
chenych committed
314
        and model_args.quantization_bit is None
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
315
316
317
        and model_args.resize_vocab
        and finetuning_args.additional_target is None
    ):
luopl's avatar
luopl committed
318
319
320
        logger.warning_rank0(
            "Remember to add embedding layers to `additional_target` to make the added tokens trainable."
        )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
321
322

    if training_args.do_train and model_args.quantization_bit is not None and (not model_args.upcast_layernorm):
luopl's avatar
luopl committed
323
        logger.warning_rank0("We recommend enable `upcast_layernorm` in quantized training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
324
325

    if training_args.do_train and (not training_args.fp16) and (not training_args.bf16):
luopl's avatar
luopl committed
326
        logger.warning_rank0("We recommend enable mixed precision training.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
327

luopl's avatar
luopl committed
328
329
330
331
332
    if (
        training_args.do_train
        and (finetuning_args.use_galore or finetuning_args.use_apollo)
        and not finetuning_args.pure_bf16
    ):
luopl's avatar
luopl committed
333
        logger.warning_rank0(
luopl's avatar
luopl committed
334
            "Using GaLore or APOLLO with mixed precision training may significantly increases GPU memory usage."
luopl's avatar
luopl committed
335
        )
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
336
337

    if (not training_args.do_train) and model_args.quantization_bit is not None:
luopl's avatar
luopl committed
338
        logger.warning_rank0("Evaluating model in 4/8-bit mode may cause lower scores.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
339
340

    if (not training_args.do_train) and finetuning_args.stage == "dpo" and finetuning_args.ref_model is None:
luopl's avatar
luopl committed
341
        logger.warning_rank0("Specify `ref_model` for computing rewards at evaluation.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
342
343

    # Post-process training arguments
chenych's avatar
chenych committed
344
345
346
347
348
349
350
351
    training_args.generation_max_length = training_args.generation_max_length or data_args.cutoff_len
    training_args.generation_num_beams = data_args.eval_num_beams or training_args.generation_num_beams
    training_args.remove_unused_columns = False  # important for multimodal dataset

    if finetuning_args.finetuning_type == "lora":
        # https://github.com/huggingface/transformers/blob/v4.50.0/src/transformers/trainer.py#L782
        training_args.label_names = training_args.label_names or ["labels"]

chenych's avatar
chenych committed
352
353
354
    if "swanlab" in training_args.report_to and finetuning_args.use_swanlab:
        training_args.report_to.remove("swanlab")

Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
355
    if (
chenych's avatar
chenych committed
356
        training_args.parallel_mode == ParallelMode.DISTRIBUTED
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
357
358
359
        and training_args.ddp_find_unused_parameters is None
        and finetuning_args.finetuning_type == "lora"
    ):
chenych's avatar
chenych committed
360
        logger.info_rank0("Set `ddp_find_unused_parameters` to False in DDP training since LoRA is enabled.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
361
362
363
364
365
        training_args.ddp_find_unused_parameters = False

    if finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type in ["full", "freeze"]:
        can_resume_from_checkpoint = False
        if training_args.resume_from_checkpoint is not None:
luopl's avatar
luopl committed
366
            logger.warning_rank0("Cannot resume from checkpoint in current stage.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
367
368
369
370
371
372
373
374
375
376
377
378
            training_args.resume_from_checkpoint = None
    else:
        can_resume_from_checkpoint = True

    if (
        training_args.resume_from_checkpoint is None
        and training_args.do_train
        and os.path.isdir(training_args.output_dir)
        and not training_args.overwrite_output_dir
        and can_resume_from_checkpoint
    ):
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
chenych's avatar
chenych committed
379
380
381
        if last_checkpoint is None and any(
            os.path.isfile(os.path.join(training_args.output_dir, name)) for name in CHECKPOINT_NAMES
        ):
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
382
383
384
385
            raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.")

        if last_checkpoint is not None:
            training_args.resume_from_checkpoint = last_checkpoint
luopl's avatar
luopl committed
386
387
            logger.info_rank0(f"Resuming training from {training_args.resume_from_checkpoint}.")
            logger.info_rank0("Change `output_dir` or use `overwrite_output_dir` to avoid.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
388
389
390
391
392
393

    if (
        finetuning_args.stage in ["rm", "ppo"]
        and finetuning_args.finetuning_type == "lora"
        and training_args.resume_from_checkpoint is not None
    ):
luopl's avatar
luopl committed
394
        logger.warning_rank0(
chenych's avatar
chenych committed
395
            f"Add {training_args.resume_from_checkpoint} to `adapter_name_or_path` to resume training from checkpoint."
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
396
397
398
399
400
401
402
403
404
405
        )

    # Post-process model arguments
    if training_args.bf16 or finetuning_args.pure_bf16:
        model_args.compute_dtype = torch.bfloat16
    elif training_args.fp16:
        model_args.compute_dtype = torch.float16

    model_args.device_map = {"": get_current_device()}
    model_args.model_max_length = data_args.cutoff_len
chenych's avatar
chenych committed
406
    model_args.block_diag_attn = data_args.neat_packing
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
407
408
    data_args.packing = data_args.packing if data_args.packing is not None else finetuning_args.stage == "pt"

chenych's avatar
chenych committed
409
    # Log on each process the small summary
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
410
    logger.info(
chenych's avatar
chenych committed
411
412
413
414
        f"Process rank: {training_args.process_index}, "
        f"world size: {training_args.world_size}, device: {training_args.device}, "
        f"distributed training: {training_args.parallel_mode == ParallelMode.DISTRIBUTED}, "
        f"compute dtype: {str(model_args.compute_dtype)}"
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
415
416
417
418
419
420
    )
    transformers.set_seed(training_args.seed)

    return model_args, data_args, training_args, finetuning_args, generating_args


chenych's avatar
chenych committed
421
def get_infer_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _INFER_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
422
423
    model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args)

chenych's avatar
chenych committed
424
    # Setup logging
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
425
426
    _set_transformers_logging()

chenych's avatar
chenych committed
427
    # Check arguments
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
428
429
430
431
432
    if model_args.infer_backend == "vllm":
        if finetuning_args.stage != "sft":
            raise ValueError("vLLM engine only supports auto-regressive models.")

        if model_args.quantization_bit is not None:
chenych's avatar
chenych committed
433
            raise ValueError("vLLM engine does not support bnb quantization (GPTQ and AWQ are supported).")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
434
435
436
437

        if model_args.rope_scaling is not None:
            raise ValueError("vLLM engine does not support RoPE scaling.")

chenych's avatar
chenych committed
438
439
440
        if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
            raise ValueError("vLLM only accepts a single adapter. Merge them first.")

chenych's avatar
chenych committed
441
    _set_env_vars()
chenych's avatar
chenych committed
442
    _verify_model_args(model_args, data_args, finetuning_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
443
444
    _check_extra_dependencies(model_args, finetuning_args)

chenych's avatar
chenych committed
445
    # Post-process model arguments
chenych's avatar
chenych committed
446
447
    if model_args.export_dir is not None and model_args.export_device == "cpu":
        model_args.device_map = {"": torch.device("cpu")}
chenych's avatar
chenych committed
448
449
        if data_args.cutoff_len != DataArguments().cutoff_len:  # override cutoff_len if it is not default
            model_args.model_max_length = data_args.cutoff_len
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
450
451
452
453
454
455
    else:
        model_args.device_map = "auto"

    return model_args, data_args, finetuning_args, generating_args


chenych's avatar
chenych committed
456
def get_eval_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _EVAL_CLS:
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
457
458
    model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args)

chenych's avatar
chenych committed
459
    # Setup logging
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
460
461
    _set_transformers_logging()

chenych's avatar
chenych committed
462
    # Check arguments
chenych's avatar
chenych committed
463
464
    if model_args.infer_backend != EngineName.HF:
        raise ValueError("vLLM/SGLang backend is only available for API, CLI and Web.")
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
465

chenych's avatar
chenych committed
466
    _set_env_vars()
chenych's avatar
chenych committed
467
    _verify_model_args(model_args, data_args, finetuning_args)
Rayyyyy's avatar
V0.6.3  
Rayyyyy committed
468
469
470
471
472
473
474
    _check_extra_dependencies(model_args, finetuning_args)

    model_args.device_map = "auto"

    transformers.set_seed(eval_args.seed)

    return model_args, data_args, eval_args, finetuning_args