export.py 1.79 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Export a GPTModel."""
import functools
import os
import sys
import warnings

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import modelopt.torch.export as mtex
import torch

from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.model_provider import model_provider
from megatron.training import get_args, get_model
from megatron.training.initialize import initialize_megatron
from megatron.training.utils import unwrap_model

warnings.filterwarnings('ignore')


def add_modelopt_export_args(parser):
    """Add additional arguments for ModelOpt hf-like export."""
    group = parser.add_argument_group(title='ModelOpt hf-like export')
    group.add_argument(
        "--pretrained-model-name",
        type=str,
        help="A pretrained model hosted inside a model repo on huggingface.co.",
    )
    group.add_argument("--export-dir", type=str, help="The target export path.")
    add_modelopt_args(parser)
    return parser


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_modelopt_export_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )

    args = get_args()

    model = get_model(functools.partial(model_provider, parallel_output=True), wrap_with_ddp=False)

    if args.load is not None:
        _ = load_modelopt_checkpoint(model)

    unwrapped_model = unwrap_model(model)[0]

    mtex.export_mcore_gpt_to_hf(
        unwrapped_model,
        args.pretrained_model_name,
        dtype=torch.bfloat16,
        export_dir=args.export_dir,
    )