"vscode:/vscode.git/clone" did not exist on "fa3c9e0668f16b9c5946dd621202f47324e71786"
Commit d520d24f authored by silencealiang's avatar silencealiang
Browse files

Merge branch 'main' into 'main'

megatron升级v0.10

See merge request !3
parents 3aca1415 481609bb
.. Lumache documentation master file, created by
sphinx-quickstart on Tue Aug 15 13:44:10 2023.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Megatron Core User Guide
===================================
**Megatron Core** is a Python library that has the core components required to build your language models.
A reference implementation of Megatron Core can be found in `NeMo <https://github.com/NVIDIA/NeMo/tree/main>`_ It offers a *simple* and
*intuitive* API.
.. toctree::
:maxdepth: 2
:caption: User Guide
user-guide/index
.. toctree::
:maxdepth: 3
:caption: API Guide
api-guide/index
User Guide
============
.. mdinclude:: ../../../megatron/core/QuickStart.md
\ No newline at end of file
...@@ -107,7 +107,7 @@ def get_score(line): ...@@ -107,7 +107,7 @@ def get_score(line):
except UnicodeDecodeError: except UnicodeDecodeError:
try: try:
decoded_text = encoded_text[:20476].decode('utf8') decoded_text = encoded_text[:20476].decode('utf8')
except: except Exception:
print("Error occurred") print("Error occurred")
data['score'] = None data['score'] = None
return json.dumps(data) return json.dumps(data)
...@@ -138,7 +138,7 @@ def get_scores(lines): ...@@ -138,7 +138,7 @@ def get_scores(lines):
except UnicodeDecodeError: except UnicodeDecodeError:
try: try:
decoded_text = encoded_text[:20476].decode('utf8') decoded_text = encoded_text[:20476].decode('utf8')
except: except Exception:
print("Error occurred") print("Error occurred")
data['score'] = None data['score'] = None
all_data.append(json.dumps(data)) all_data.append(json.dumps(data))
......
...@@ -10,18 +10,20 @@ import os ...@@ -10,18 +10,20 @@ import os
import sys import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir, os.path.pardir))) os.path.pardir, os.path.pardir)))
from megatron import get_args from megatron.training import get_args
from megatron import get_timers from megatron.training import get_timers
from megatron import get_tokenizer from megatron.training import get_tokenizer
from megatron import print_rank_0 from megatron.training import print_rank_0
from megatron.core import mpu from megatron.core import mpu
from megatron.data.blendable_dataset import BlendableDataset from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.data.gpt_dataset import build_train_valid_test_datasets from megatron.core.datasets.blended_megatron_dataset_config import GPTDatasetConfig
from megatron.model import GPTModel from megatron.core.datasets.gpt_dataset import GPTDataset
from megatron.core.datasets.utils import get_blend_from_list
from megatron.legacy.model import GPTModel
from megatron.core.enums import ModelType from megatron.core.enums import ModelType
from megatron.training import pretrain from megatron.training import pretrain
from megatron.utils import get_ltor_masks_and_position_ids from megatron.training.utils import get_ltor_masks_and_position_ids
from megatron.utils import average_losses_across_data_parallel_group from megatron.training.utils import average_losses_across_data_parallel_group
def model_provider(pre_process=True, post_process=True): def model_provider(pre_process=True, post_process=True):
"""Build the model.""" """Build the model."""
...@@ -101,24 +103,34 @@ def train_valid_test_datasets_provider(train_val_test_num_samples): ...@@ -101,24 +103,34 @@ def train_valid_test_datasets_provider(train_val_test_num_samples):
print_rank_0('> building train, validation, and test datasets ' print_rank_0('> building train, validation, and test datasets '
'for GPT ...') 'for GPT ...')
train_ds, valid_ds1, test_ds = build_train_valid_test_datasets( train_ds, _, test_ds = BlendedMegatronDatasetBuilder(
data_prefix=args.data_path, GPTDataset,
data_impl=args.data_impl, train_val_test_num_samples,
splits_string=args.split, lambda: True,
train_valid_test_num_samples=train_val_test_num_samples, GPTDatasetConfig(
seq_length=args.seq_length, blend=get_blend_from_list(args.data_path),
seed=args.seed, split=args.split,
skip_warmup=(not args.mmap_warmup)) random_seed=args.seed,
sequence_length=args.seq_length,
path_to_cache=args.data_cache_path,
return_document_ids=False
)
).build()
print_rank_0("> finished creating finetuning GPT datasets ...") print_rank_0("> finished creating finetuning GPT datasets ...")
_, valid_ds, _ = build_train_valid_test_datasets( _, valid_ds, _ = BlendedMegatronDatasetBuilder(
data_prefix=args.data_path2, GPTDataset,
data_impl="mmap", train_val_test_num_samples,
splits_string="98,2,0", lambda: True,
train_valid_test_num_samples=train_val_test_num_samples, GPTDatasetConfig(
seq_length=2048, blend=get_blend_from_list(args.data_path2),
seed=1234, split="98,2,0",
skip_warmup=(not args.mmap_warmup)) random_seed=1234,
sequence_length=2048,
path_to_cache=args.data_cache_path,
return_document_ids=False
)
).build()
print_rank_0("> finished creating pretrained GPT datasets ...") print_rank_0("> finished creating pretrained GPT datasets ...")
return train_ds, valid_ds, test_ds return train_ds, valid_ds, test_ds
......
...@@ -43,7 +43,6 @@ python -m torch.distributed.run $DISTRIBUTED_ARGS \ ...@@ -43,7 +43,6 @@ python -m torch.distributed.run $DISTRIBUTED_ARGS \
--data-path2 ${DATA_BLEND} \ --data-path2 ${DATA_BLEND} \
--vocab-file $VOCAB_FILE \ --vocab-file $VOCAB_FILE \
--merge-file $MERGE_FILE \ --merge-file $MERGE_FILE \
--data-impl mmap \
--split 100,0,0 \ --split 100,0,0 \
--distributed-backend nccl \ --distributed-backend nccl \
--lr-decay-style constant \ --lr-decay-style constant \
......
...@@ -9,23 +9,84 @@ import sys ...@@ -9,23 +9,84 @@ import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir, os.path.pardir))) os.path.pardir, os.path.pardir)))
import torch import torch
from megatron import get_args from megatron.training import get_args
from megatron import get_tokenizer from megatron.training import get_tokenizer
from megatron import print_rank_0 from megatron.training import print_rank_0
from megatron.checkpointing import load_checkpoint from megatron.training.checkpointing import load_checkpoint
from megatron.core import mpu from megatron.core import mpu
from megatron.initialize import initialize_megatron from megatron.training.initialize import initialize_megatron
from megatron.model import GPTModel from megatron.legacy.model import GPTModel
from megatron.training import get_model from megatron.training import get_model
from megatron.text_generation import generate_and_post_process from megatron.inference.text_generation import generate_and_post_process
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.models.gpt import GPTModel
from typing import Union
import megatron.legacy.model
from megatron.core.transformer.spec_utils import import_module
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec, get_gpt_layer_local_spec
def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megatron.legacy.model.GPTModel]:
"""Builds the model.
def model_provider(pre_process=True, post_process=True): If you set the use_legacy_models to True, it will return the legacy GPT model and if not the core GPT model.
"""Build the model."""
Args:
pre_process (bool, optional): Set to true if you need to compute embedings. Defaults to True.
post_process (bool, optional): Set to true if you need to want to compute output logits/loss. Defaults to True.
Returns:
Union[GPTModel, megatron.legacy.model.GPTModel]: The returned model
"""
args = get_args()
print_rank_0('building GPT model ...') print_rank_0('building GPT model ...')
model = GPTModel(num_tokentypes=0, parallel_output=False, config = core_transformer_config_from_args(args)
pre_process=pre_process, post_process=post_process)
if args.use_legacy_models:
model = megatron.legacy.model.GPTModel(
config,
num_tokentypes=0,
parallel_output=False,
pre_process=pre_process,
post_process=post_process
)
else:
if args.spec is None:
if args.transformer_impl == 'local':
transformer_layer_spec = get_gpt_layer_local_spec(
num_experts=args.num_experts,
moe_grouped_gemm=args.moe_grouped_gemm
)
elif args.transformer_impl == 'transformer_engine':
transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
num_experts=args.num_experts,
moe_grouped_gemm=args.moe_grouped_gemm
)
else:
raise ValueError(f"Invalid transformer_impl {args.transformer_impl}")
elif args.spec[0] == 'local':
transformer_layer_spec = get_gpt_layer_local_spec(
num_experts=args.num_experts,
moe_grouped_gemm=args.moe_grouped_gemm
)
else:
transformer_layer_spec = import_module(args.spec)
model = GPTModel(
config=config,
transformer_layer_spec=transformer_layer_spec,
vocab_size=args.padded_vocab_size,
max_sequence_length=args.max_position_embeddings,
pre_process=pre_process,
post_process=post_process,
fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
parallel_output=False,
share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
position_embedding_type=args.position_embedding_type,
rotary_percent=args.rotary_percent
)
return model return model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment