Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
evt_fugx1
dcu_megatron
Commits
aa2c6708
Commit
aa2c6708
authored
May 19, 2025
by
dongcl
Browse files
patch for megatron core_v0.12.0
parent
cf5d3189
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
0 deletions
+6
-0
pretrain_gpt.py
pretrain_gpt.py
+6
-0
No files found.
pretrain_gpt.py
View file @
aa2c6708
...
@@ -16,6 +16,9 @@ from megatron.core.enums import ModelType
...
@@ -16,6 +16,9 @@ from megatron.core.enums import ModelType
from
megatron.core.datasets.blended_megatron_dataset_builder
import
BlendedMegatronDatasetBuilder
from
megatron.core.datasets.blended_megatron_dataset_builder
import
BlendedMegatronDatasetBuilder
from
megatron.core.datasets.gpt_dataset
import
GPTDatasetConfig
from
megatron.core.datasets.gpt_dataset
import
GPTDatasetConfig
from
megatron.core.datasets.gpt_dataset
import
MockGPTDataset
,
GPTDataset
from
megatron.core.datasets.gpt_dataset
import
MockGPTDataset
,
GPTDataset
from
megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs
import
(
get_gpt_heterogeneous_layer_spec
,
)
from
megatron.core.rerun_state_machine
import
get_rerun_state_machine
from
megatron.core.rerun_state_machine
import
get_rerun_state_machine
import
megatron.legacy.model
import
megatron.legacy.model
from
megatron.core.models.gpt
import
GPTModel
from
megatron.core.models.gpt
import
GPTModel
...
@@ -35,6 +38,7 @@ from megatron.core.models.gpt.gpt_layer_specs import (
...
@@ -35,6 +38,7 @@ from megatron.core.models.gpt.gpt_layer_specs import (
get_gpt_layer_with_transformer_engine_spec
,
get_gpt_layer_with_transformer_engine_spec
,
get_gpt_mtp_block_spec
,
get_gpt_mtp_block_spec
,
)
)
from
megatron.core.transformer.transformer_block
import
TransformerBlockSubmodules
from
dcu_megatron
import
megatron_adaptor
from
dcu_megatron
import
megatron_adaptor
...
@@ -98,6 +102,8 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
...
@@ -98,6 +102,8 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
if
args
.
num_experts
:
if
args
.
num_experts
:
# Define the decoder block spec
# Define the decoder block spec
transformer_layer_spec
=
get_gpt_decoder_block_spec
(
config
,
use_transformer_engine
=
use_te
,
normalization
=
args
.
normalization
)
transformer_layer_spec
=
get_gpt_decoder_block_spec
(
config
,
use_transformer_engine
=
use_te
,
normalization
=
args
.
normalization
)
elif
args
.
heterogeneous_layers_config_path
is
not
None
:
transformer_layer_spec
=
get_gpt_heterogeneous_layer_spec
(
config
,
use_te
)
else
:
else
:
# Define the decoder layer spec
# Define the decoder layer spec
if
use_te
:
if
use_te
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment