Unverified Commit e0e8736f authored by ndickson-nvidia's avatar ndickson-nvidia Committed by GitHub
Browse files

[Feature] Added floating-point conversion functions to dgl.transforms.functional (#3890)

* * Added half_(), float_(), and double_() functions to DGLHeteroGraph, HeteroNodeDataView, and HeteroEdgeDataView, for converting floating-point tensor data to float16, float32, or float64 precision

* * Extracted out private functions for floating-point type conversion, to reduce code duplication

* * Added test for floating-point data conversion functions, half_(), float_(), and double_()

* * Moved half_(), float_(), and double_() functions from HeteroNodeDataView and HeteroEdgeDataView to Frame class

* * Updated test_float_cast() to use dgl.heterograph instead of dgl.graph

* Added to CONTRIBUTORS.md

* * Changed data type conversion to be deferred until the data is accessed, to avoid redundant conversions of data that isn't used.

* * Addressed issues flagged by linter

* * Worked around a bug in the old version of mxnet that's currently used for DGL testing

* * Only defer Column data type co...
parent 1f2e6960
......@@ -62,3 +62,4 @@ Contributors
* [Abdurrahman Yasar](https://github.com/ayasar70) from Nvidia
* [Shaked Brody](https://github.com/shakedbr) from Technion
* [Jiahui Liu](https://github.com/paoxiaode) from Nvidia
* [Neil Dickson](https://github.com/ndickson-nvidia) from Nvidia
......@@ -182,11 +182,12 @@ class Column(TensorStorage):
index : Tensor
Index tensor
"""
def __init__(self, storage, scheme=None, index=None, device=None):
def __init__(self, storage, scheme=None, index=None, device=None, deferred_dtype=None):
super().__init__(storage)
self.scheme = scheme if scheme else infer_scheme(storage)
self.index = index
self.device = device
self.deferred_dtype = deferred_dtype
self.pinned_by_dgl = False
def __len__(self):
......@@ -230,6 +231,11 @@ class Column(TensorStorage):
if self.device is not None:
self.storage = F.copy_to(self.storage, self.device[0], **self.device[1])
self.device = None
# convert data to the right type
if self.deferred_dtype is not None:
self.storage = F.astype(self.storage, self.deferred_dtype)
self.deferred_dtype = None
return self.storage
@data.setter
......@@ -258,6 +264,49 @@ class Column(TensorStorage):
col.device = (device, kwargs)
return col
@property
def dtype(self):
""" Return the effective data type of this Column """
if self.deferred_dtype is not None:
return self.deferred_dtype
return self.storage.dtype
def astype(self, new_dtype):
""" Return a new column such that when its data is requested,
it will be converted to new_dtype.
Parameters
----------
new_dtype : Framework-specific type object
The type to convert the data to.
Returns
-------
Column
A new column
"""
col = self.clone()
if col.dtype != new_dtype:
# If there is already a pending conversion, ensure that the pending
# conversion and transfer/sampling are done before this new conversion.
if col.deferred_dtype is not None:
_ = col.data
if (col.device is None) and (col.index is None):
# Do the conversion immediately if no device transfer or index
# sampling is pending. The assumption is that this is most
# likely to be the desired behaviour, such as converting an
# entire graph's feature data to float16 (half) before transfer
# to device when training, or converting back to float32 (float)
# after fetching the data to a device.
col.storage = F.astype(col.storage, new_dtype)
else:
# Defer the conversion if there is a pending transfer or sampling.
# This is so that feature data that never gets accessed on the
# device never needs to be transferred or sampled or converted.
col.deferred_dtype = new_dtype
return col
def __getitem__(self, rowids):
"""Return the feature data given the rowids.
......@@ -329,7 +378,7 @@ class Column(TensorStorage):
def clone(self):
"""Return a shallow copy of this column."""
return Column(self.storage, self.scheme, self.index, self.device)
return Column(self.storage, self.scheme, self.index, self.device, self.deferred_dtype)
def deepclone(self):
"""Return a deepcopy of this column.
......@@ -358,13 +407,13 @@ class Column(TensorStorage):
Sub-column
"""
if self.index is None:
return Column(self.storage, self.scheme, rowids, self.device)
return Column(self.storage, self.scheme, rowids, self.device, self.deferred_dtype)
else:
index = self.index
if not isinstance(index, _LazyIndex):
index = _LazyIndex(self.index)
index = index.slice(rowids)
return Column(self.storage, self.scheme, index, self.device)
return Column(self.storage, self.scheme, index, self.device, self.deferred_dtype)
@staticmethod
def create(data):
......@@ -792,3 +841,32 @@ class Frame(MutableMapping):
if necessary."""
for column in self._columns.values():
column.unpin_memory_()
def _astype_float(self, new_type):
assert new_type in [F.float64, F.float32, F.float16], \
"'new_type' must be floating-point type: %s" % str(new_type)
newframe = self.clone()
new_columns = {}
for name, column in self._columns.items():
dtype = column.dtype
if dtype != new_type and dtype in [F.float64, F.float32, F.float16]:
new_columns[name] = column.astype(new_type)
else:
new_columns[name] = column
newframe._columns = new_columns
return newframe
def half(self):
""" Return a new frame with all floating-point columns converted
to half-precision (float16) """
return self._astype_float(F.float16)
def float(self):
""" Return a new frame with all floating-point columns converted
to single-precision (float32) """
return self._astype_float(F.float32)
def double(self):
""" Return a new frame with all floating-point columns converted
to double-precision (float64) """
return self._astype_float(F.float64)
......@@ -17,6 +17,7 @@
from collections.abc import Iterable, Mapping
from collections import defaultdict
import copy
import numpy as np
import scipy.sparse as sparse
import scipy.sparse.linalg
......@@ -3530,4 +3531,55 @@ def laplacian_pe(g, k):
return PE
def to_half(g):
r"""Cast this graph to use float16 (half-precision) for any
floating-point edge and node feature data.
A shallow copy is returned so that the original graph is not modified.
Feature tensors that are not floating-point will not be modified.
Returns
-------
DGLHeteroGraph
Clone of graph with the feature data converted to float16.
"""
ret = copy.copy(g)
ret._edge_frames = [frame.half() for frame in ret._edge_frames]
ret._node_frames = [frame.half() for frame in ret._node_frames]
return ret
def to_float(g):
r"""Cast this graph to use float32 (single-precision) for any
floating-point edge and node feature data.
A shallow copy is returned so that the original graph is not modified.
Feature tensors that are not floating-point will not be modified.
Returns
-------
DGLHeteroGraph
Clone of graph with the feature data converted to float32.
"""
ret = copy.copy(g)
ret._edge_frames = [frame.float() for frame in ret._edge_frames]
ret._node_frames = [frame.float() for frame in ret._node_frames]
return ret
def to_double(g):
r"""Cast this graph to use float64 (double-precision) for any
floating-point edge and node feature data.
A shallow copy is returned so that the original graph is not modified.
Feature tensors that are not floating-point will not be modified.
Returns
-------
DGLHeteroGraph
Clone of graph with the feature data converted to float64.
"""
ret = copy.copy(g)
ret._edge_frames = [frame.double() for frame in ret._edge_frames]
ret._node_frames = [frame.double() for frame in ret._node_frames]
return ret
_init_api("dgl.transform", __name__)
......@@ -1924,6 +1924,65 @@ def test_dtype_cast(idtype):
assert g_cast.idtype == F.int32
test_utils.check_graph_equal(g, g_cast, check_idtype=False)
def test_float_cast():
for t in [F.float16, F.float32, F.float64]:
idtype = F.int32
g = dgl.heterograph({
('user', 'follows', 'user'): (F.tensor([0, 1, 1, 2, 2, 3], dtype=idtype),
F.tensor([0, 0, 1, 1, 2, 2], dtype=idtype)),
('user', 'plays', 'game'): (F.tensor([0, 1, 1], dtype=idtype),
F.tensor([0, 0, 1], dtype=idtype))},
idtype=idtype, device=F.ctx())
uvalues = [1, 2, 3, 4]
gvalues = [5, 6]
fvalues = [7, 8, 9, 10, 11, 12]
pvalues = [13, 14, 15]
dataNamesTypes = [
('a',F.float16),
('b',F.float32),
('c',F.float64),
('d',F.int32),
('e',F.int64)]
for name,type in dataNamesTypes:
g.nodes['user'].data[name] = F.copy_to(F.tensor(uvalues, dtype=type), ctx=F.ctx())
for name,type in dataNamesTypes:
g.nodes['game'].data[name] = F.copy_to(F.tensor(gvalues, dtype=type), ctx=F.ctx())
for name,type in dataNamesTypes:
g.edges['follows'].data[name] = F.copy_to(F.tensor(fvalues, dtype=type), ctx=F.ctx())
for name,type in dataNamesTypes:
g.edges['plays'].data[name] = F.copy_to(F.tensor(pvalues, dtype=type), ctx=F.ctx())
if t == F.float16:
g = dgl.transforms.functional.to_half(g)
if t == F.float32:
g = dgl.transforms.functional.to_float(g)
if t == F.float64:
g = dgl.transforms.functional.to_double(g)
for name,origType in dataNamesTypes:
# integer tensors shouldn't be converted
reqType = t if (origType in [F.float16,F.float32,F.float64]) else origType
values = g.nodes['user'].data[name]
assert values.dtype == reqType
assert len(values) == len(uvalues)
assert F.allclose(values, F.tensor(uvalues), 0, 0)
values = g.nodes['game'].data[name]
assert values.dtype == reqType
assert len(values) == len(gvalues)
assert F.allclose(values, F.tensor(gvalues), 0, 0)
values = g.edges['follows'].data[name]
assert values.dtype == reqType
assert len(values) == len(fvalues)
assert F.allclose(values, F.tensor(fvalues), 0, 0)
values = g.edges['plays'].data[name]
assert values.dtype == reqType
assert len(values) == len(pvalues)
assert F.allclose(values, F.tensor(pvalues), 0, 0)
@parametrize_dtype
def test_format(idtype):
# single relation
......@@ -2865,6 +2924,7 @@ if __name__ == '__main__':
# test_isolated_ntype()
# test_bipartite()
# test_dtype_cast()
# test_float_cast()
# test_reverse("int32")
# test_format()
#test_add_edges(F.int32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment