Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
ce378327
Unverified
Commit
ce378327
authored
Dec 29, 2022
by
Xin Yao
Committed by
GitHub
Dec 29, 2022
Browse files
fix bf16 tests (#5089)
parent
37bd0925
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
57 additions
and
34 deletions
+57
-34
docs/source/guide/mixed_precision.rst
docs/source/guide/mixed_precision.rst
+5
-1
tests/compute/test_sparse.py
tests/compute/test_sparse.py
+52
-33
No files found.
docs/source/guide/mixed_precision.rst
View file @
ce378327
...
@@ -9,10 +9,14 @@ consumption. This feature requires DGL 0.9+.
...
@@ -9,10 +9,14 @@ consumption. This feature requires DGL 0.9+.
Message
-
Passing
with
Half
Precision
Message
-
Passing
with
Half
Precision
-----------------------------------
-----------------------------------
DGL
allows
message
-
passing
on
``
float16
(
fp16
)``
/
``
bfloat16
(
bf16
)``
(
requires
CUDA
>=
11.0
)
DGL
allows
message
-
passing
on
``
float16
(
fp16
)``
/
``
bfloat16
(
bf16
)``
features
for
both
UDFs
(
User
Defined
Functions
)
and
built
-
in
functions
features
for
both
UDFs
(
User
Defined
Functions
)
and
built
-
in
functions
(
e
.
g
.,
``
dgl
.
function
.
sum
``,
``
dgl
.
function
.
copy_u
``).
(
e
.
g
.,
``
dgl
.
function
.
sum
``,
``
dgl
.
function
.
copy_u
``).
..
note
::
Please
check
bfloat16
support
via
``
torch
.
cuda
.
is_bf16_supported
()``
before
using
it
.
Typically
it
requires
CUDA
>=
11.0
and
GPU
compute
capability
>=
8.0
.
The
following
example
shows
how
to
use
DGL
's message-passing APIs on half-precision
The
following
example
shows
how
to
use
DGL
's message-passing APIs on half-precision
features:
features:
...
...
tests/compute/test_sparse.py
View file @
ce378327
...
@@ -2,16 +2,15 @@ import random
...
@@ -2,16 +2,15 @@ import random
import
unittest
import
unittest
import
backend
as
F
import
backend
as
F
import
dgl
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
import
torch
import
torch
from
dgl.ops
import
edge_softmax
,
gsddmm
,
gspmm
,
segment_reduce
from
test_utils
import
parametrize_idtype
from
test_utils
import
parametrize_idtype
from
test_utils.graph_cases
import
get_cases
from
test_utils.graph_cases
import
get_cases
import
dgl
from
dgl.ops
import
edge_softmax
,
gsddmm
,
gspmm
,
segment_reduce
from
dgl.utils
import
version
random
.
seed
(
42
)
random
.
seed
(
42
)
np
.
random
.
seed
(
42
)
np
.
random
.
seed
(
42
)
...
@@ -177,30 +176,32 @@ def test_spmm(idtype, g, shp, msg, reducer):
...
@@ -177,30 +176,32 @@ def test_spmm(idtype, g, shp, msg, reducer):
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
!=
"pytorch"
,
dgl
.
backend
.
backend_name
!=
"pytorch"
,
reason
=
"Only support PyTorch for now."
reason
=
"Only support PyTorch for now."
,
)
)
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
F
.
_default_context_str
==
"cpu"
,
F
.
_default_context_str
==
"cpu"
,
reason
=
"Don't support half precision on CPU."
reason
=
"Don't support half precision on CPU."
,
)
)
@
parametrize_idtype
@
parametrize_idtype
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"dtype, rtol, atol"
,
"dtype, rtol, atol"
,
[(
torch
.
float16
,
1e-3
,
0.5
),
(
torch
.
bfloat16
,
4e-3
,
2.
)]
[(
torch
.
float16
,
1e-3
,
0.5
),
(
torch
.
bfloat16
,
4e-3
,
2.
0
)]
,
)
)
def
test_half_spmm
(
idtype
,
dtype
,
rtol
,
atol
):
def
test_half_spmm
(
idtype
,
dtype
,
rtol
,
atol
):
if
version
.
parse
(
torch
.
version
.
cuda
)
<
version
.
parse
(
"11.0"
)
\
if
dtype
==
torch
.
bfloat16
and
not
torch
.
cuda
.
is_bf16_supported
():
and
dtype
==
torch
.
bfloat16
:
pytest
.
skip
(
"BF16 is not supported."
)
pytest
.
skip
(
"BF16 requires CUDA >= 11.0."
)
# make sure the spmm result is < 512 to match the rtol/atol we set.
# make sure the spmm result is < 512 to match the rtol/atol we set.
g
=
dgl
.
graph
((
torch
.
arange
(
900
),
torch
.
tensor
([
0
]
*
900
)),
g
=
dgl
.
graph
(
idtype
=
idtype
,
device
=
F
.
ctx
())
(
torch
.
arange
(
900
),
torch
.
tensor
([
0
]
*
900
)),
idtype
=
idtype
,
device
=
F
.
ctx
(),
)
feat_fp32
=
torch
.
rand
((
g
.
num_src_nodes
(),
32
)).
to
(
0
)
feat_fp32
=
torch
.
rand
((
g
.
num_src_nodes
(),
32
)).
to
(
0
)
feat_half
=
feat_fp32
.
to
(
dtype
)
feat_half
=
feat_fp32
.
to
(
dtype
)
# test SpMMCSR
# test SpMMCSR
g
=
g
.
formats
([
'
csc
'
])
g
=
g
.
formats
([
"
csc
"
])
res_fp32
=
dgl
.
ops
.
copy_u_sum
(
g
,
feat_fp32
)[
0
]
res_fp32
=
dgl
.
ops
.
copy_u_sum
(
g
,
feat_fp32
)[
0
]
res_half
=
dgl
.
ops
.
copy_u_sum
(
g
,
feat_half
)[
0
].
float
()
res_half
=
dgl
.
ops
.
copy_u_sum
(
g
,
feat_half
)[
0
].
float
()
assert
torch
.
allclose
(
res_fp32
,
res_half
,
rtol
=
rtol
,
atol
=
atol
)
assert
torch
.
allclose
(
res_fp32
,
res_half
,
rtol
=
rtol
,
atol
=
atol
)
...
@@ -364,20 +365,25 @@ def test_segment_reduce(reducer):
...
@@ -364,20 +365,25 @@ def test_segment_reduce(reducer):
@
pytest
.
mark
.
parametrize
(
"feat_size"
,
[
1
,
8
,
16
,
64
,
256
])
@
pytest
.
mark
.
parametrize
(
"feat_size"
,
[
1
,
8
,
16
,
64
,
256
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"dtype, tol"
,
"dtype, tol"
,
[(
torch
.
float16
,
1e-2
),
(
torch
.
bfloat16
,
1e-2
),
[
(
torch
.
float32
,
3e-3
),
(
torch
.
float64
,
1e-4
)],
(
torch
.
float16
,
1e-2
),
(
torch
.
bfloat16
,
1e-2
),
(
torch
.
float32
,
3e-3
),
(
torch
.
float64
,
1e-4
),
],
)
)
def
test_segment_mm
(
idtype
,
feat_size
,
dtype
,
tol
):
def
test_segment_mm
(
idtype
,
feat_size
,
dtype
,
tol
):
if
F
.
_default_context_str
==
"cpu"
and
dtype
in
(
torch
.
float16
,
torch
.
bfloat16
):
if
F
.
_default_context_str
==
"cpu"
and
dtype
in
(
pytest
.
skip
(
torch
.
float16
,
"Only support float32 and float64 on CPU."
torch
.
bfloat16
,
)
):
if
F
.
_default_context_str
==
"gpu"
\
pytest
.
skip
(
"Only support float32 and float64 on CPU."
)
and
version
.
parse
(
torch
.
version
.
cuda
)
<
version
.
parse
(
"11.0"
)
\
if
(
and
dtype
==
torch
.
bfloat16
:
F
.
_default_context_str
==
"gpu"
pytest
.
skip
(
and
dtype
==
torch
.
bfloat16
"BF16 requires CUDA >= 11.0."
and
not
torch
.
cuda
.
is_bf16_supported
()
)
):
pytest
.
skip
(
"BF16 is not supported."
)
dev
=
F
.
ctx
()
dev
=
F
.
ctx
()
# input
# input
a
=
torch
.
tensor
(
np
.
random
.
rand
(
100
,
feat_size
)).
to
(
dev
).
to
(
dtype
)
a
=
torch
.
tensor
(
np
.
random
.
rand
(
100
,
feat_size
)).
to
(
dev
).
to
(
dtype
)
...
@@ -419,22 +425,35 @@ def test_segment_mm(idtype, feat_size, dtype, tol):
...
@@ -419,22 +425,35 @@ def test_segment_mm(idtype, feat_size, dtype, tol):
@
pytest
.
mark
.
parametrize
(
"feat_size"
,
[
1
,
8
,
16
,
64
,
256
])
@
pytest
.
mark
.
parametrize
(
"feat_size"
,
[
1
,
8
,
16
,
64
,
256
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"dtype, tol"
,
"dtype, tol"
,
[(
torch
.
float16
,
1e-2
),
(
torch
.
bfloat16
,
2e-2
),
[
(
torch
.
float32
,
3e-3
),
(
torch
.
float64
,
1e-4
)]
(
torch
.
float16
,
1e-2
),
(
torch
.
bfloat16
,
2e-2
),
(
torch
.
float32
,
3e-3
),
(
torch
.
float64
,
1e-4
),
],
)
)
def
test_gather_mm_idx_b
(
feat_size
,
dtype
,
tol
):
def
test_gather_mm_idx_b
(
feat_size
,
dtype
,
tol
):
if
F
.
_default_context_str
==
"cpu"
and
dtype
in
(
torch
.
float16
,
torch
.
bfloat16
):
if
F
.
_default_context_str
==
"cpu"
and
dtype
in
(
torch
.
float16
,
torch
.
bfloat16
,
):
pytest
.
skip
(
"Only support float32 and float64 on CPU."
)
pytest
.
skip
(
"Only support float32 and float64 on CPU."
)
if
F
.
_default_context_str
==
"gpu"
\
if
(
and
version
.
parse
(
torch
.
version
.
cuda
)
<
version
.
parse
(
"11.0"
)
\
F
.
_default_context_str
==
"gpu"
and
dtype
==
torch
.
bfloat16
:
and
dtype
==
torch
.
bfloat16
pytest
.
skip
(
"BF16 requires CUDA >= 11.0."
)
and
not
torch
.
cuda
.
is_bf16_supported
()
):
pytest
.
skip
(
"BF16 is not supported."
)
dev
=
F
.
ctx
()
dev
=
F
.
ctx
()
# input
# input
a
=
torch
.
tensor
(
np
.
random
.
rand
(
100
,
feat_size
)).
to
(
dev
).
to
(
dtype
)
a
=
torch
.
tensor
(
np
.
random
.
rand
(
100
,
feat_size
)).
to
(
dev
).
to
(
dtype
)
a
.
requires_grad_
()
a
.
requires_grad_
()
b
=
torch
.
tensor
(
np
.
random
.
rand
(
10
,
feat_size
,
feat_size
+
1
)).
to
(
dev
).
to
(
dtype
)
b
=
(
torch
.
tensor
(
np
.
random
.
rand
(
10
,
feat_size
,
feat_size
+
1
))
.
to
(
dev
)
.
to
(
dtype
)
)
b
.
requires_grad_
()
b
.
requires_grad_
()
idx
=
torch
.
tensor
(
np
.
random
.
randint
(
0
,
10
,
100
)).
to
(
dev
).
long
()
idx
=
torch
.
tensor
(
np
.
random
.
randint
(
0
,
10
,
100
)).
to
(
dev
).
long
()
dc
=
torch
.
tensor
(
np
.
random
.
rand
(
100
,
feat_size
+
1
)).
to
(
dev
).
to
(
dtype
)
dc
=
torch
.
tensor
(
np
.
random
.
rand
(
100
,
feat_size
+
1
)).
to
(
dev
).
to
(
dtype
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment