Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
evt_fugx1
dcu_megatron
Commits
425a9899
Commit
425a9899
authored
Apr 14, 2025
by
dongcl
Browse files
bug fix
parent
9800dec4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
5 deletions
+5
-5
dcu_megatron/adaptor/megatron_adaptor.py
dcu_megatron/adaptor/megatron_adaptor.py
+3
-3
dcu_megatron/legacy/model/transformer.py
dcu_megatron/legacy/model/transformer.py
+2
-2
No files found.
dcu_megatron/adaptor/megatron_adaptor.py
View file @
425a9899
...
@@ -241,15 +241,15 @@ class LegacyAdaptation(MegatronAdaptationABC):
...
@@ -241,15 +241,15 @@ class LegacyAdaptation(MegatronAdaptationABC):
self
.
patch_legacy_models
()
self
.
patch_legacy_models
()
def
patch_legacy_models
(
self
):
def
patch_legacy_models
(
self
):
from
..legacy.model.transformer
import
ParallelMLP
,
ParallelAttention
from
..legacy.model.transformer
import
ParallelMLP
Patch
,
ParallelAttention
Patch
from
..legacy.model.utils
import
get_norm
from
..legacy.model.utils
import
get_norm
# ParallecMLP
# ParallecMLP
MegatronAdaptation
.
register
(
'megatron.legacy.model.transformer.ParallelMLP.__init__'
,
MegatronAdaptation
.
register
(
'megatron.legacy.model.transformer.ParallelMLP.__init__'
,
ParallelMLP
.
__init__
)
ParallelMLP
Patch
.
__init__
)
MegatronAdaptation
.
register
(
'megatron.legacy.model.transformer.ParallelAttention.forward'
,
MegatronAdaptation
.
register
(
'megatron.legacy.model.transformer.ParallelAttention.forward'
,
ParallelAttention
.
forward
)
ParallelAttention
Patch
.
forward
)
# rms_norm.RMSNorm
# rms_norm.RMSNorm
MegatronAdaptation
.
register
(
'megatron.legacy.model.rms_norm.RMSNorm.forward'
,
MegatronAdaptation
.
register
(
'megatron.legacy.model.rms_norm.RMSNorm.forward'
,
...
...
dcu_megatron/legacy/model/transformer.py
View file @
425a9899
...
@@ -10,7 +10,7 @@ from megatron.legacy.model.utils import (
...
@@ -10,7 +10,7 @@ from megatron.legacy.model.utils import (
)
)
class
ParallelMLP
(
MegatronModule
):
class
ParallelMLP
Patch
(
MegatronModule
):
"""MLP.
"""MLP.
MLP will take the input with h hidden state, project it to 4*h
MLP will take the input with h hidden state, project it to 4*h
...
@@ -74,7 +74,7 @@ class ParallelMLP(MegatronModule):
...
@@ -74,7 +74,7 @@ class ParallelMLP(MegatronModule):
)
)
class
ParallelAttention
(
MegatronModule
):
class
ParallelAttention
Patch
(
MegatronModule
):
"""Parallel self-attention layer abstract class.
"""Parallel self-attention layer abstract class.
Self-attention layer takes input with size [s, b, h]
Self-attention layer takes input with size [s, b, h]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment