Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
0df6c4c3
Commit
0df6c4c3
authored
Jul 29, 2022
by
hubertlu-tw
Browse files
Update test_fused_layer_norm.py
parent
8df1b6b8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
19 deletions
+40
-19
tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
+40
-19
No files found.
tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
View file @
0df6c4c3
import
unittest
import
os
import
random
import
itertools
import
itertools
import
unittest
import
torch
import
torch
import
apex
import
apex
from
torch.autograd
import
Variable
class
TestFusedLayerNorm
(
unittest
.
TestCase
):
class
TestFusedLayerNorm
(
unittest
.
TestCase
):
...
@@ -31,20 +30,43 @@ class TestFusedLayerNorm(unittest.TestCase):
...
@@ -31,20 +30,43 @@ class TestFusedLayerNorm(unittest.TestCase):
normalized_shape
=
self
.
normalized_shape
).
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
)
normalized_shape
=
self
.
normalized_shape
).
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
)
def
_
test
_same_output
(
self
,
batch_size
):
def
_
check
_same_output
(
self
,
batch_size
,
contiguous
):
torch
.
cuda
.
manual_seed
(
42
)
torch
.
cuda
.
manual_seed
(
42
)
self
.
input_
=
torch
.
randn
((
batch_size
,
*
self
.
module_cpu_
.
normalized_shape
),
device
=
"cpu"
).
requires_grad_
(
True
)
if
contiguous
:
self
.
input_cuda_
=
self
.
input_
.
cuda
().
detach
().
requires_grad_
(
True
)
input_shape
=
[
batch_size
]
+
self
.
normalized_shape
out_cpu_
=
self
.
module_cpu_
(
self
.
input_
)
input_
=
torch
.
randn
(
input_shape
,
device
=
"cpu"
).
requires_grad_
(
True
)
input_cuda_
=
input_
.
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
).
detach
().
requires_grad_
(
True
)
self
.
assertTrue
(
input_
.
is_contiguous
())
self
.
assertTrue
(
input_cuda_
.
is_contiguous
())
else
:
input_shape
=
[
batch_size
]
+
self
.
normalized_shape
input_shape
=
[
batch_size
*
3
]
+
[
self
.
normalized_shape
[
0
]
*
5
,
self
.
normalized_shape
[
1
]
*
3
]
input_src_
=
torch
.
randn
(
input_shape
,
device
=
"cpu"
)
input_
=
input_src_
[::
3
,
::
5
,
::
3
].
detach
().
requires_grad_
(
True
)
input_cuda_
=
input_src_
.
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
)[::
3
,
::
5
,
::
3
].
detach
().
requires_grad_
(
True
)
# make sure that tensors are NOT contiguous.
self
.
assertFalse
(
input_
.
is_contiguous
())
self
.
assertFalse
(
input_cuda_
.
is_contiguous
())
out_cpu_
=
self
.
module_cpu_
(
input_
)
gO
=
torch
.
rand_like
(
out_cpu_
)
gO
=
torch
.
rand_like
(
out_cpu_
)
out_cpu_
.
backward
(
gO
)
out_cpu_
.
backward
(
gO
)
out_cuda_
=
self
.
module_cuda_
(
self
.
input_cuda_
)
out_cuda_
=
self
.
module_cuda_
(
input_cuda_
)
gO
=
gO
.
cuda
(
)
gO
=
gO
.
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
)
out_cuda_
.
backward
(
gO
)
out_cuda_
.
backward
(
gO
)
assert
out_cpu_
.
is_cuda
==
False
self
.
assertFalse
(
out_cpu_
.
is_cuda
)
assert
out_cuda_
.
is_cuda
==
True
self
.
assertTrue
(
out_cuda_
.
is_cuda
)
torch
.
testing
.
assert_allclose
(
out_cpu_
,
out_cuda_
.
cpu
())
# TODO (mkozuki): `torch.testing.assert_allclose` is deprecated.
torch
.
testing
.
assert_allclose
(
self
.
input_
.
grad
,
self
.
input_cuda_
.
grad
.
cpu
())
# Use `torch.testing.assert_close`.
# See https://github.com/pytorch/pytorch/issues/61844
torch
.
testing
.
assert_allclose
(
out_cpu_
.
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
),
out_cuda_
,
**
self
.
fwd_thresholds
)
torch
.
testing
.
assert_allclose
(
input_
.
grad
.
to
(
device
=
"cuda"
,
dtype
=
self
.
dtype
),
input_cuda_
.
grad
,
**
self
.
bwd_thresholds
)
def
_test_same_output
(
self
,
batch_size
):
for
contiguous
in
(
True
,
False
):
with
self
.
subTest
(
contiguous
=
contiguous
):
self
.
_check_same_output
(
batch_size
,
contiguous
)
def
test_layer_norm
(
self
):
def
test_layer_norm
(
self
):
self
.
_test_same_output
(
16
)
self
.
_test_same_output
(
16
)
...
@@ -205,11 +227,8 @@ def _prep_inputs(batch_size, normalized_shape, dtype):
...
@@ -205,11 +227,8 @@ def _prep_inputs(batch_size, normalized_shape, dtype):
native
=
fused
.
clone
().
to
(
dtype
).
requires_grad_
(
True
)
native
=
fused
.
clone
().
to
(
dtype
).
requires_grad_
(
True
)
return
native
,
fused
return
native
,
fused
TORCH_MAJOR
,
TORCH_MINOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
0
]),
int
(
torch
.
__version__
.
split
(
'.'
)[
1
])
if
(
TORCH_MAJOR
<=
1
and
TORCH_MINOR
<
10
):
autocast_dtypes
=
(
torch
.
half
,
torch
.
bfloat16
)
if
torch
.
cuda
.
is_bf16_supported
()
else
(
torch
.
half
,)
autocast_dtypes
=
(
torch
.
half
,)
else
:
autocast_dtypes
=
(
torch
.
half
,
torch
.
bfloat16
)
if
torch
.
cuda
.
is_bf16_supported
()
else
(
torch
.
half
,)
class
TestAutocastFusedLayerNorm
(
unittest
.
TestCase
):
class
TestAutocastFusedLayerNorm
(
unittest
.
TestCase
):
bf16_fwd_thresholds
=
dict
(
rtol
=
1.6e-2
,
atol
=
3e-4
)
bf16_fwd_thresholds
=
dict
(
rtol
=
1.6e-2
,
atol
=
3e-4
)
...
@@ -235,6 +254,8 @@ class TestAutocastFusedLayerNorm(unittest.TestCase):
...
@@ -235,6 +254,8 @@ class TestAutocastFusedLayerNorm(unittest.TestCase):
expected
.
backward
(
g_native
)
expected
.
backward
(
g_native
)
actual
.
backward
(
g_fused
)
actual
.
backward
(
g_fused
)
tols
=
{
'rtol'
:
None
,
'atol'
:
None
}
if
dtype
==
torch
.
half
else
TestAutocastFusedLayerNorm
.
bf16_bwd_thresholds
torch
.
testing
.
assert_allclose
(
native_x
.
grad
,
fused_x
.
grad
,
**
tols
)
def
test_autocast
(
self
):
def
test_autocast
(
self
):
for
(
dtype
,
elementwise_affine
)
in
itertools
.
product
(
autocast_dtypes
,
(
True
,
False
)):
for
(
dtype
,
elementwise_affine
)
in
itertools
.
product
(
autocast_dtypes
,
(
True
,
False
)):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment