Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
flash-attention
Commits
2a2a3c4b
"tests/vscode:/vscode.git/clone" did not exist on "80ff4ba63eb95400e01626c1767c4e3b9b1cc4aa"
Commit
2a2a3c4b
authored
Jul 23, 2023
by
Tri Dao
Browse files
[LayerNorm] Add test for randomness
parent
767b71cc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
0 deletions
+26
-0
tests/ops/test_dropout_layer_norm.py
tests/ops/test_dropout_layer_norm.py
+26
-0
No files found.
tests/ops/test_dropout_layer_norm.py
View file @
2a2a3c4b
...
@@ -863,3 +863,29 @@ def test_dropout_layer_norm_parallel_residual_prenorm_training(
...
@@ -863,3 +863,29 @@ def test_dropout_layer_norm_parallel_residual_prenorm_training(
assert
(
weight1
.
grad
-
weight1_ref
.
grad
).
abs
().
max
()
<=
3
*
(
weight1_pt
.
grad
-
weight1_ref
.
grad
).
abs
().
max
()
+
3e-5
assert
(
weight1
.
grad
-
weight1_ref
.
grad
).
abs
().
max
()
<=
3
*
(
weight1_pt
.
grad
-
weight1_ref
.
grad
).
abs
().
max
()
+
3e-5
if
not
is_rms_norm
:
if
not
is_rms_norm
:
assert
(
bias1
.
grad
-
bias1_ref
.
grad
).
abs
().
max
()
<=
2
*
(
bias1_pt
.
grad
-
bias1_ref
.
grad
).
abs
().
max
()
+
3e-5
assert
(
bias1
.
grad
-
bias1_ref
.
grad
).
abs
().
max
()
<=
2
*
(
bias1_pt
.
grad
-
bias1_ref
.
grad
).
abs
().
max
()
+
3e-5
def
test_dropout_layer_norm_randomness
():
hidden_size
=
256
dtype
=
torch
.
float32
dropout_p
=
0.1
device
=
'cuda'
# set seed
torch
.
random
.
manual_seed
(
0
)
batch_size
=
8
seqlen
=
512
x0
=
torch
.
randn
(
batch_size
,
seqlen
,
hidden_size
,
device
=
device
,
dtype
=
dtype
,
requires_grad
=
True
)
res
=
torch
.
randn_like
(
x0
,
dtype
=
dtype
,
requires_grad
=
True
)
model
=
DropoutAddLayerNorm
(
hidden_size
,
p
=
dropout_p
,
device
=
device
,
dtype
=
dtype
)
torch
.
random
.
manual_seed
(
42
)
_
,
dmask0
=
dropout_add_layer_norm
(
x0
,
res
,
model
.
weight
,
model
.
bias
,
model
.
p
,
model
.
eps
,
return_dropout_mask
=
True
)
# Subsequent call should have a different dropout mask
_
,
dmask1
=
dropout_add_layer_norm
(
x0
,
res
,
model
.
weight
,
model
.
bias
,
model
.
p
,
model
.
eps
,
return_dropout_mask
=
True
)
torch
.
random
.
manual_seed
(
42
)
# Resetting the seed, should get the same dropout mask
_
,
dmask2
=
dropout_add_layer_norm
(
x0
,
res
,
model
.
weight
,
model
.
bias
,
model
.
p
,
model
.
eps
,
return_dropout_mask
=
True
)
assert
not
torch
.
equal
(
dmask0
,
dmask1
)
assert
torch
.
equal
(
dmask0
,
dmask2
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment