Commit aa2c6708 authored by dongcl's avatar dongcl
Browse files

patch for megatron core_v0.12.0

parent cf5d3189
...@@ -16,6 +16,9 @@ from megatron.core.enums import ModelType ...@@ -16,6 +16,9 @@ from megatron.core.enums import ModelType
from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.core.datasets.gpt_dataset import GPTDatasetConfig from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
from megatron.core.datasets.gpt_dataset import MockGPTDataset, GPTDataset from megatron.core.datasets.gpt_dataset import MockGPTDataset, GPTDataset
from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
get_gpt_heterogeneous_layer_spec,
)
from megatron.core.rerun_state_machine import get_rerun_state_machine from megatron.core.rerun_state_machine import get_rerun_state_machine
import megatron.legacy.model import megatron.legacy.model
from megatron.core.models.gpt import GPTModel from megatron.core.models.gpt import GPTModel
...@@ -35,6 +38,7 @@ from megatron.core.models.gpt.gpt_layer_specs import ( ...@@ -35,6 +38,7 @@ from megatron.core.models.gpt.gpt_layer_specs import (
get_gpt_layer_with_transformer_engine_spec, get_gpt_layer_with_transformer_engine_spec,
get_gpt_mtp_block_spec, get_gpt_mtp_block_spec,
) )
from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
from dcu_megatron import megatron_adaptor from dcu_megatron import megatron_adaptor
...@@ -98,6 +102,8 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat ...@@ -98,6 +102,8 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
if args.num_experts: if args.num_experts:
# Define the decoder block spec # Define the decoder block spec
transformer_layer_spec = get_gpt_decoder_block_spec(config, use_transformer_engine=use_te, normalization=args.normalization) transformer_layer_spec = get_gpt_decoder_block_spec(config, use_transformer_engine=use_te, normalization=args.normalization)
elif args.heterogeneous_layers_config_path is not None:
transformer_layer_spec = get_gpt_heterogeneous_layer_spec(config, use_te)
else: else:
# Define the decoder layer spec # Define the decoder layer spec
if use_te: if use_te:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment