[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset...

[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset from Float64 to Float32 (#2592) * Update * Update * Update * update Co-authored-by: Jinjing Zhou <VoVAllen@users.noreply.github.com> Co-authored-by: Tong He <hetong007@gmail.com>

[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset...
[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset from Float64 to Float32 (#2592) * Update * Update * Update * update Co-authored-by: Jinjing Zhou <VoVAllen@users.noreply.github.com> Co-authored-by: Tong He <hetong007@gmail.com>
b1840f49 · Mufei Li · GitHub · d460efee · b1840f49 · b1840f49
Unverified Commit b1840f49 authored Feb 03, 2021 by Mufei Li Committed by GitHub Feb 03, 2021
11 changed files
--- a/docs/source/guide/training-graph.rst
+++ b/docs/source/guide/training-graph.rst
@@ -217,7 +217,7 @@ updating the model.
    opt = torch.optim.Adam(model.parameters())
    for epoch in range(20):
        for batched_graph, labels in dataloader:
-            feats = batched_graph.ndata['attr'].float()
+            feats = batched_graph.ndata['attr']
            logits = model(batched_graph, feats)
            loss = F.cross_entropy(logits, labels)
            opt.zero_grad()

--- a/docs/source/guide_cn/training-graph.rst
+++ b/docs/source/guide_cn/training-graph.rst
@@ -165,7 +165,7 @@ DGL内置了常见的图读出函数，例如 :func:`dgl.readout_nodes` 就实
    opt = torch.optim.Adam(model.parameters())
    for epoch in range(20):
        for batched_graph, labels in dataloader:
-            feats = batched_graph.ndata['attr'].float()
+            feats = batched_graph.ndata['attr']
            logits = model(batched_graph, feats)
            loss = F.cross_entropy(logits, labels)
            opt.zero_grad()

--- a/examples/mxnet/gin/main.py
+++ b/examples/mxnet/gin/main.py
@@ -21,7 +21,7 @@ def train(args, net, trainloader, trainer, criterion, epoch):
    for pos, (graphs, labels) in zip(bar, trainloader):
        # batch graphs will be shipped to device in forward part of model
        labels = labels.as_in_context(args.device)
-        feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device)
+        feat = graphs.ndata['attr'].as_in_context(args.device)

        with mx.autograd.record():
            graphs = graphs.to(args.device)
@@ -52,7 +52,7 @@ def eval_net(args, net, dataloader, criterion):
    for data in dataloader:
        graphs, labels = data
        labels = labels.as_in_context(args.device)
-        feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device)
+        feat = graphs.ndata['attr'].as_in_context(args.device)

        total += len(labels)
        graphs = graphs.to(args.device)

--- a/examples/pytorch/gin/main.py
+++ b/examples/pytorch/gin/main.py
@@ -6,7 +6,7 @@ import torch
 import torch.nn as nn
 import torch.optim as optim

-from dgl.data.gindt import GINDataset
+from dgl.data import GINDataset
 from dataloader import GINDataLoader
 from parser import Parser
 from gin import GIN
@@ -22,11 +22,9 @@ def train(args, net, trainloader, optimizer, criterion, epoch):

    for pos, (graphs, labels) in zip(bar, trainloader):
        # batch graphs will be shipped to device in forward part of model
-        for key in graphs.node_attr_schemes().keys():
-            graphs.ndata[key] = graphs.ndata[key].float()
        labels = labels.to(args.device)
-        feat = graphs.ndata.pop('attr').to(args.device)
        graphs = graphs.to(args.device)
+        feat = graphs.ndata.pop('attr')
        outputs = net(graphs, feat)

        loss = criterion(outputs, labels)
@@ -55,11 +53,9 @@ def eval_net(args, net, dataloader, criterion):

    for data in dataloader:
        graphs, labels = data
-        for key in graphs.node_attr_schemes().keys():
-            graphs.ndata[key] = graphs.ndata[key].float()
-        feat = graphs.ndata.pop('attr').to(args.device)
        graphs = graphs.to(args.device)
        labels = labels.to(args.device)
+        feat = graphs.ndata.pop('attr')
        total += len(labels)
        outputs = net(graphs, feat)
        _, predicted = torch.max(outputs.data, 1)

--- a/examples/pytorch/gxn/main.py
+++ b/examples/pytorch/gxn/main.py
@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader,
    for batch in trainloader:
        optimizer.zero_grad()
        batch_graphs, batch_labels = batch
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, l1, l2 = model(batch_graphs, 
@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device):
    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])

--- a/examples/pytorch/gxn/main_early_stop.py
+++ b/examples/pytorch/gxn/main_early_stop.py
@@ -4,7 +4,7 @@ from time import time
 from datetime import datetime

 import dgl
-from dgl.data import LegacyTUDataset, TUDataset
+from dgl.data import LegacyTUDataset
 from torch.utils.data import random_split
 import torch
 from torch import Tensor
@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader,
    for batch in trainloader:
        optimizer.zero_grad()
        batch_graphs, batch_labels = batch
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, l1, l2 = model(batch_graphs, 
@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device):
    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])
@@ -98,8 +94,6 @@ def validate(model:torch.nn.Module, loader, device,
    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"])

--- a/examples/pytorch/hgp_sl/main.py
+++ b/examples/pytorch/hgp_sl/main.py
@@ -90,8 +90,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device):
    for batch in trainloader:
        optimizer.zero_grad()
        batch_graphs, batch_labels = batch
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out = model(batch_graphs, batch_graphs.ndata["feat"])
@@ -113,8 +111,6 @@ def test(model:torch.nn.Module, loader, device):
    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out = model(batch_graphs, batch_graphs.ndata["feat"])

--- a/examples/pytorch/sagpool/main.py
+++ b/examples/pytorch/sagpool/main.py
@@ -82,8 +82,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device):
    for batch in trainloader:
        optimizer.zero_grad()
        batch_graphs, batch_labels = batch
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out = model(batch_graphs)
@@ -105,8 +103,6 @@ def test(model:torch.nn.Module, loader, device):
    for batch in loader:
        batch_graphs, batch_labels = batch
        num_graphs += batch_labels.size(0)
-        for (key, value) in batch_graphs.ndata.items():
-            batch_graphs.ndata[key] = value.float()
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.long().to(device)
        out = model(batch_graphs)

--- a/new-tutorial/5_graph_classification.py
+++ b/new-tutorial/5_graph_classification.py
@@ -182,7 +182,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

 for epoch in range(20):
    for batched_graph, labels in train_dataloader:
-        pred = model(batched_graph, batched_graph.ndata['attr'].float())
+        pred = model(batched_graph, batched_graph.ndata['attr'])
        loss = F.cross_entropy(pred, labels)
        optimizer.zero_grad()
        loss.backward()
@@ -191,7 +191,7 @@ for epoch in range(20):
 num_correct = 0
 num_tests = 0
 for batched_graph, labels in test_dataloader:
-    pred = model(batched_graph, batched_graph.ndata['attr'].float())
+    pred = model(batched_graph, batched_graph.ndata['attr'])
    num_correct += (pred.argmax(1) == labels).sum().item()
    num_tests += len(labels)


--- a/python/dgl/data/gindt.py
+++ b/python/dgl/data/gindt.py
@@ -49,7 +49,7 @@ class GINDataset(DGLBuiltinDataset):
    >>> g, label = data[128]
    >>> g
    Graph(num_nodes=13, num_edges=26,
-          ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float64)}
+          ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)}
          edata_schemes={})
    >>> label
    tensor(1)
@@ -61,7 +61,7 @@ class GINDataset(DGLBuiltinDataset):
    >>> batched_labels = torch.tensor(labels)
    >>> batched_graphs
    Graph(num_nodes=330, num_edges=748,
-          ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float64)}
+          ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)}
          edata_schemes={})
    """

@@ -175,7 +175,6 @@ class GINDataset(DGLBuiltinDataset):
                    if tmp == len(nrow):
                        # no node attributes
                        nrow = [int(w) for w in nrow]
-                        nattr = None
                    elif tmp > len(nrow):
                        nrow = [int(w) for w in nrow[:tmp]]
                        nattr = [float(w) for w in nrow[tmp:]]
@@ -208,10 +207,8 @@ class GINDataset(DGLBuiltinDataset):

                if nattrs != []:
                    nattrs = np.stack(nattrs)
-                    g.ndata['attr'] = F.tensor(nattrs)
+                    g.ndata['attr'] = F.tensor(nattrs, F.float32)
                    self.nattrs_flag = True
-                else:
-                    nattrs = None

                g.ndata['label'] = F.tensor(nlabels)
                if len(self.nlabel_dict) > 1:
@@ -230,7 +227,6 @@ class GINDataset(DGLBuiltinDataset):
        if not self.nattrs_flag:
            if self.verbose:
                print('there are no node features in this dataset!')
-            label2idx = {}
            # generate node attr by node degree
            if self.degree_as_nlabel:
                if self.verbose:
@@ -265,7 +261,7 @@ class GINDataset(DGLBuiltinDataset):
                    g.number_of_nodes(), len(label2idx)))
                attr[range(g.number_of_nodes()), [label2idx[nl]
                                                  for nl in F.asnumpy(g.ndata['label']).tolist()]] = 1
-                g.ndata['attr'] = F.tensor(attr)
+                g.ndata['attr'] = F.tensor(attr, F.float32)

        # after load, get the #classes and #dim
        self.gclasses = len(self.glabel_dict)

--- a/python/dgl/data/tu.py
+++ b/python/dgl/data/tu.py
 from __future__ import absolute_import
 import numpy as np
 import os
-import random
-

 from .dgl_dataset import DGLBuiltinDataset
 from .utils import loadtxt, save_graphs, load_graphs, save_info, load_info
 from .. import backend as F
-from ..utils import retry_method_with_fix
 from ..convert import graph as dgl_graph

 class LegacyTUDataset(DGLBuiltinDataset):
@@ -48,7 +45,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
    >>> g, label = data[1024]
    >>> g
    Graph(num_nodes=88, num_edges=410,
-          ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
+          ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
    >>> label
    tensor(1)
@@ -60,7 +57,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
    >>> batched_labels = torch.tensor(labels)
    >>> batched_graphs
    Graph(num_nodes=9539, num_edges=47382,
-          ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
+          ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})

    Notes
@@ -121,7 +118,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
            g.ndata['node_label'] = F.tensor(DS_node_labels)
            one_hot_node_labels = self._to_onehot(DS_node_labels)
            for idxs, g in zip(node_idx_list, self.graph_lists):
-                g.ndata['feat'] = F.tensor(one_hot_node_labels[idxs, :])
+                g.ndata['feat'] = F.tensor(one_hot_node_labels[idxs, :], F.float32)
            self.data_mode = "node_label"
        except IOError:
            print("No Node Label Data")
@@ -132,14 +129,15 @@ class LegacyTUDataset(DGLBuiltinDataset):
            if DS_node_attr.ndim == 1:
                DS_node_attr = np.expand_dims(DS_node_attr, -1)
            for idxs, g in zip(node_idx_list, self.graph_lists):
-                g.ndata['feat'] = F.tensor(DS_node_attr[idxs, :])
+                g.ndata['feat'] = F.tensor(DS_node_attr[idxs, :], F.float32)
            self.data_mode = "node_attr"
        except IOError:
            print("No Node Attribute Data")

        if 'feat' not in g.ndata.keys():
            for idxs, g in zip(node_idx_list, self.graph_lists):
-                g.ndata['feat'] = np.ones((g.number_of_nodes(), self.hidden_size))
+                g.ndata['feat'] = F.ones((g.number_of_nodes(), self.hidden_size),
+                                         F.float32, F.cpu())
            self.data_mode = "constant"
            if self.verbose:
                print("Use Constant one as Feature with hidden size {}".format(self.hidden_size))