Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
GPT2_pytorch
Commits
eeac1cc4
Commit
eeac1cc4
authored
Jul 22, 2023
by
liangjing
Browse files
add fp16 support
parent
c138a95c
Pipeline
#442
failed with stage
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
9 deletions
+14
-9
README.md
README.md
+2
-1
megatron/model/fused_layer_norm.py
megatron/model/fused_layer_norm.py
+3
-2
megatron/model/fused_softmax.py
megatron/model/fused_softmax.py
+3
-1
megatron/model/transformer.py
megatron/model/transformer.py
+6
-5
No files found.
README.md
View file @
eeac1cc4
...
...
@@ -44,6 +44,7 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/vscode-pytorch:1.10.0-centos
```
pip install -r requirements.txt -i http://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
pip install lightop-0.1-cp37-cp37m-linux_x86_64.whl #安装优化算子库
```
### 训练(单节点)
...
...
@@ -95,7 +96,7 @@ pip install -r requirements.txt -i http://pypi.tuna.tsinghua.edu.cn/simple --t
```
rm megatron/arguments.py
cp megatron/arguments.py-nodes megatron/arguments.py
sbatch run-16B.sh(主要参数在single-16B.sh)
sbatch run-16B.sh(主要参数在single-16B.sh
, 默认以fp32精度训练,如需采用fp16精度可执行sbatch run-16B-fp16.sh
)
```
```
...
...
megatron/model/fused_layer_norm.py
View file @
eeac1cc4
...
...
@@ -32,7 +32,7 @@ import torch.nn.functional as F
global
fused_mix_prec_layer_norm_cuda
fused_mix_prec_layer_norm_cuda
=
None
from
lightop
import
op
class
FusedLayerNormAffineFunction
(
torch
.
autograd
.
Function
):
...
...
@@ -108,4 +108,5 @@ class MixedFusedLayerNorm(torch.nn.Module):
return
FusedLayerNormAffineFunction
.
apply
(
input
,
self
.
weight
,
self
.
bias
,
self
.
normalized_shape
,
self
.
eps
)
else
:
return
F
.
layer_norm
(
input
,
self
.
normalized_shape
,
self
.
weight
,
self
.
bias
)
#return F.layer_norm(input, self.normalized_shape, self.weight, self.bias)
return
op
.
layernorm_forward_autograd
(
input
,
self
.
weight
,
self
.
bias
,
self
.
eps
)
megatron/model/fused_softmax.py
View file @
eeac1cc4
...
...
@@ -17,6 +17,7 @@ from functools import lru_cache
import
torch
import
torch.nn
as
nn
from
megatron.enums
import
AttnMaskType
from
lightop.fusesoftmax
import
FuseSoftmax
class
ScaledUpperTriangMaskedSoftmax
(
torch
.
autograd
.
Function
):
"""
...
...
@@ -221,7 +222,8 @@ class FusedScaleMaskSoftmax(nn.Module):
mask_output
=
self
.
mask_func
(
input
,
mask
)
if
mask
is
not
None
else
input
probs
=
torch
.
nn
.
Softmax
(
dim
=-
1
)(
mask_output
)
#probs = torch.nn.Softmax(dim=-1)(mask_output)
probs
=
FuseSoftmax
(
dim
=-
1
)(
mask_output
)
if
self
.
input_in_float16
and
self
.
softmax_in_fp32
:
if
self
.
input_in_fp16
:
...
...
megatron/model/transformer.py
View file @
eeac1cc4
...
...
@@ -32,7 +32,7 @@ import deepspeed
from
.glu_activations
import
GLU_ACTIVATIONS
from
.positional_embeddings
import
RotaryEmbedding
,
apply_rotary_pos_emb_torch
,
apply_rotary_pos_emb
from
lightop
import
op
# flags required to enable jit fusion kernels
torch
.
_C
.
_jit_set_profiling_mode
(
False
)
torch
.
_C
.
_jit_set_profiling_executor
(
False
)
...
...
@@ -407,8 +407,9 @@ class ParallelAttention(MegatronModule):
def
bias_dropout_add
(
x
,
bias
,
residual
,
prob
,
training
):
# type: (Tensor, Tensor, Tensor, float, bool) -> Tensor
out
=
torch
.
nn
.
functional
.
dropout
(
x
+
bias
,
p
=
prob
,
training
=
training
)
out
=
residual
+
out
#out = torch.nn.functional.dropout(x + bias, p=prob, training=training)
#out = residual + out
out
=
op
.
add_dropout_forward_autograd
(
x
+
bias
,
residual
,
prob
,
training
)
return
out
...
...
@@ -418,13 +419,13 @@ def get_bias_dropout_add(training):
return
_bias_dropout_add
@
torch
.
jit
.
script
#
@torch.jit.script
def
bias_dropout_add_fused_train
(
x
,
bias
,
residual
,
prob
):
# type: (Tensor, Tensor, Tensor, float) -> Tensor
return
bias_dropout_add
(
x
,
bias
,
residual
,
prob
,
True
)
@
torch
.
jit
.
script
#
@torch.jit.script
def
bias_dropout_add_fused_inference
(
x
,
bias
,
residual
,
prob
):
# type: (Tensor, Tensor, Tensor, float) -> Tensor
return
bias_dropout_add
(
x
,
bias
,
residual
,
prob
,
False
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment