Unverified Commit b1840f49 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset...


[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset from Float64 to Float32 (#2592)

* Update

* Update

* Update

* update
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarTong He <hetong007@gmail.com>
parent d460efee
...@@ -217,7 +217,7 @@ updating the model. ...@@ -217,7 +217,7 @@ updating the model.
opt = torch.optim.Adam(model.parameters()) opt = torch.optim.Adam(model.parameters())
for epoch in range(20): for epoch in range(20):
for batched_graph, labels in dataloader: for batched_graph, labels in dataloader:
feats = batched_graph.ndata['attr'].float() feats = batched_graph.ndata['attr']
logits = model(batched_graph, feats) logits = model(batched_graph, feats)
loss = F.cross_entropy(logits, labels) loss = F.cross_entropy(logits, labels)
opt.zero_grad() opt.zero_grad()
......
...@@ -165,7 +165,7 @@ DGL内置了常见的图读出函数,例如 :func:`dgl.readout_nodes` 就实 ...@@ -165,7 +165,7 @@ DGL内置了常见的图读出函数,例如 :func:`dgl.readout_nodes` 就实
opt = torch.optim.Adam(model.parameters()) opt = torch.optim.Adam(model.parameters())
for epoch in range(20): for epoch in range(20):
for batched_graph, labels in dataloader: for batched_graph, labels in dataloader:
feats = batched_graph.ndata['attr'].float() feats = batched_graph.ndata['attr']
logits = model(batched_graph, feats) logits = model(batched_graph, feats)
loss = F.cross_entropy(logits, labels) loss = F.cross_entropy(logits, labels)
opt.zero_grad() opt.zero_grad()
......
...@@ -21,7 +21,7 @@ def train(args, net, trainloader, trainer, criterion, epoch): ...@@ -21,7 +21,7 @@ def train(args, net, trainloader, trainer, criterion, epoch):
for pos, (graphs, labels) in zip(bar, trainloader): for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model # batch graphs will be shipped to device in forward part of model
labels = labels.as_in_context(args.device) labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device) feat = graphs.ndata['attr'].as_in_context(args.device)
with mx.autograd.record(): with mx.autograd.record():
graphs = graphs.to(args.device) graphs = graphs.to(args.device)
...@@ -52,7 +52,7 @@ def eval_net(args, net, dataloader, criterion): ...@@ -52,7 +52,7 @@ def eval_net(args, net, dataloader, criterion):
for data in dataloader: for data in dataloader:
graphs, labels = data graphs, labels = data
labels = labels.as_in_context(args.device) labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device) feat = graphs.ndata['attr'].as_in_context(args.device)
total += len(labels) total += len(labels)
graphs = graphs.to(args.device) graphs = graphs.to(args.device)
......
...@@ -6,7 +6,7 @@ import torch ...@@ -6,7 +6,7 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from dgl.data.gindt import GINDataset from dgl.data import GINDataset
from dataloader import GINDataLoader from dataloader import GINDataLoader
from parser import Parser from parser import Parser
from gin import GIN from gin import GIN
...@@ -22,11 +22,9 @@ def train(args, net, trainloader, optimizer, criterion, epoch): ...@@ -22,11 +22,9 @@ def train(args, net, trainloader, optimizer, criterion, epoch):
for pos, (graphs, labels) in zip(bar, trainloader): for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model # batch graphs will be shipped to device in forward part of model
for key in graphs.node_attr_schemes().keys():
graphs.ndata[key] = graphs.ndata[key].float()
labels = labels.to(args.device) labels = labels.to(args.device)
feat = graphs.ndata.pop('attr').to(args.device)
graphs = graphs.to(args.device) graphs = graphs.to(args.device)
feat = graphs.ndata.pop('attr')
outputs = net(graphs, feat) outputs = net(graphs, feat)
loss = criterion(outputs, labels) loss = criterion(outputs, labels)
...@@ -55,11 +53,9 @@ def eval_net(args, net, dataloader, criterion): ...@@ -55,11 +53,9 @@ def eval_net(args, net, dataloader, criterion):
for data in dataloader: for data in dataloader:
graphs, labels = data graphs, labels = data
for key in graphs.node_attr_schemes().keys():
graphs.ndata[key] = graphs.ndata[key].float()
feat = graphs.ndata.pop('attr').to(args.device)
graphs = graphs.to(args.device) graphs = graphs.to(args.device)
labels = labels.to(args.device) labels = labels.to(args.device)
feat = graphs.ndata.pop('attr')
total += len(labels) total += len(labels)
outputs = net(graphs, feat) outputs = net(graphs, feat)
_, predicted = torch.max(outputs.data, 1) _, predicted = torch.max(outputs.data, 1)
......
...@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, ...@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader,
for batch in trainloader: for batch in trainloader:
optimizer.zero_grad() optimizer.zero_grad()
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out, l1, l2 = model(batch_graphs, out, l1, l2 = model(batch_graphs,
...@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device): ...@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader: for batch in loader:
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0) num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"]) out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])
......
...@@ -4,7 +4,7 @@ from time import time ...@@ -4,7 +4,7 @@ from time import time
from datetime import datetime from datetime import datetime
import dgl import dgl
from dgl.data import LegacyTUDataset, TUDataset from dgl.data import LegacyTUDataset
from torch.utils.data import random_split from torch.utils.data import random_split
import torch import torch
from torch import Tensor from torch import Tensor
...@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, ...@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader,
for batch in trainloader: for batch in trainloader:
optimizer.zero_grad() optimizer.zero_grad()
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out, l1, l2 = model(batch_graphs, out, l1, l2 = model(batch_graphs,
...@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device): ...@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader: for batch in loader:
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0) num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"]) out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])
...@@ -98,8 +94,6 @@ def validate(model:torch.nn.Module, loader, device, ...@@ -98,8 +94,6 @@ def validate(model:torch.nn.Module, loader, device,
for batch in loader: for batch in loader:
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0) num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"]) out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"])
......
...@@ -90,8 +90,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device): ...@@ -90,8 +90,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device):
for batch in trainloader: for batch in trainloader:
optimizer.zero_grad() optimizer.zero_grad()
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out = model(batch_graphs, batch_graphs.ndata["feat"]) out = model(batch_graphs, batch_graphs.ndata["feat"])
...@@ -113,8 +111,6 @@ def test(model:torch.nn.Module, loader, device): ...@@ -113,8 +111,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader: for batch in loader:
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0) num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out = model(batch_graphs, batch_graphs.ndata["feat"]) out = model(batch_graphs, batch_graphs.ndata["feat"])
......
...@@ -82,8 +82,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device): ...@@ -82,8 +82,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device):
for batch in trainloader: for batch in trainloader:
optimizer.zero_grad() optimizer.zero_grad()
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out = model(batch_graphs) out = model(batch_graphs)
...@@ -105,8 +103,6 @@ def test(model:torch.nn.Module, loader, device): ...@@ -105,8 +103,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader: for batch in loader:
batch_graphs, batch_labels = batch batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0) num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device) batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device) batch_labels = batch_labels.long().to(device)
out = model(batch_graphs) out = model(batch_graphs)
......
...@@ -182,7 +182,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=0.01) ...@@ -182,7 +182,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for epoch in range(20): for epoch in range(20):
for batched_graph, labels in train_dataloader: for batched_graph, labels in train_dataloader:
pred = model(batched_graph, batched_graph.ndata['attr'].float()) pred = model(batched_graph, batched_graph.ndata['attr'])
loss = F.cross_entropy(pred, labels) loss = F.cross_entropy(pred, labels)
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
...@@ -191,7 +191,7 @@ for epoch in range(20): ...@@ -191,7 +191,7 @@ for epoch in range(20):
num_correct = 0 num_correct = 0
num_tests = 0 num_tests = 0
for batched_graph, labels in test_dataloader: for batched_graph, labels in test_dataloader:
pred = model(batched_graph, batched_graph.ndata['attr'].float()) pred = model(batched_graph, batched_graph.ndata['attr'])
num_correct += (pred.argmax(1) == labels).sum().item() num_correct += (pred.argmax(1) == labels).sum().item()
num_tests += len(labels) num_tests += len(labels)
......
...@@ -49,7 +49,7 @@ class GINDataset(DGLBuiltinDataset): ...@@ -49,7 +49,7 @@ class GINDataset(DGLBuiltinDataset):
>>> g, label = data[128] >>> g, label = data[128]
>>> g >>> g
Graph(num_nodes=13, num_edges=26, Graph(num_nodes=13, num_edges=26,
ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float64)} ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)}
edata_schemes={}) edata_schemes={})
>>> label >>> label
tensor(1) tensor(1)
...@@ -61,7 +61,7 @@ class GINDataset(DGLBuiltinDataset): ...@@ -61,7 +61,7 @@ class GINDataset(DGLBuiltinDataset):
>>> batched_labels = torch.tensor(labels) >>> batched_labels = torch.tensor(labels)
>>> batched_graphs >>> batched_graphs
Graph(num_nodes=330, num_edges=748, Graph(num_nodes=330, num_edges=748,
ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float64)} ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)}
edata_schemes={}) edata_schemes={})
""" """
...@@ -175,7 +175,6 @@ class GINDataset(DGLBuiltinDataset): ...@@ -175,7 +175,6 @@ class GINDataset(DGLBuiltinDataset):
if tmp == len(nrow): if tmp == len(nrow):
# no node attributes # no node attributes
nrow = [int(w) for w in nrow] nrow = [int(w) for w in nrow]
nattr = None
elif tmp > len(nrow): elif tmp > len(nrow):
nrow = [int(w) for w in nrow[:tmp]] nrow = [int(w) for w in nrow[:tmp]]
nattr = [float(w) for w in nrow[tmp:]] nattr = [float(w) for w in nrow[tmp:]]
...@@ -208,10 +207,8 @@ class GINDataset(DGLBuiltinDataset): ...@@ -208,10 +207,8 @@ class GINDataset(DGLBuiltinDataset):
if nattrs != []: if nattrs != []:
nattrs = np.stack(nattrs) nattrs = np.stack(nattrs)
g.ndata['attr'] = F.tensor(nattrs) g.ndata['attr'] = F.tensor(nattrs, F.float32)
self.nattrs_flag = True self.nattrs_flag = True
else:
nattrs = None
g.ndata['label'] = F.tensor(nlabels) g.ndata['label'] = F.tensor(nlabels)
if len(self.nlabel_dict) > 1: if len(self.nlabel_dict) > 1:
...@@ -230,7 +227,6 @@ class GINDataset(DGLBuiltinDataset): ...@@ -230,7 +227,6 @@ class GINDataset(DGLBuiltinDataset):
if not self.nattrs_flag: if not self.nattrs_flag:
if self.verbose: if self.verbose:
print('there are no node features in this dataset!') print('there are no node features in this dataset!')
label2idx = {}
# generate node attr by node degree # generate node attr by node degree
if self.degree_as_nlabel: if self.degree_as_nlabel:
if self.verbose: if self.verbose:
...@@ -265,7 +261,7 @@ class GINDataset(DGLBuiltinDataset): ...@@ -265,7 +261,7 @@ class GINDataset(DGLBuiltinDataset):
g.number_of_nodes(), len(label2idx))) g.number_of_nodes(), len(label2idx)))
attr[range(g.number_of_nodes()), [label2idx[nl] attr[range(g.number_of_nodes()), [label2idx[nl]
for nl in F.asnumpy(g.ndata['label']).tolist()]] = 1 for nl in F.asnumpy(g.ndata['label']).tolist()]] = 1
g.ndata['attr'] = F.tensor(attr) g.ndata['attr'] = F.tensor(attr, F.float32)
# after load, get the #classes and #dim # after load, get the #classes and #dim
self.gclasses = len(self.glabel_dict) self.gclasses = len(self.glabel_dict)
......
from __future__ import absolute_import from __future__ import absolute_import
import numpy as np import numpy as np
import os import os
import random
from .dgl_dataset import DGLBuiltinDataset from .dgl_dataset import DGLBuiltinDataset
from .utils import loadtxt, save_graphs, load_graphs, save_info, load_info from .utils import loadtxt, save_graphs, load_graphs, save_info, load_info
from .. import backend as F from .. import backend as F
from ..utils import retry_method_with_fix
from ..convert import graph as dgl_graph from ..convert import graph as dgl_graph
class LegacyTUDataset(DGLBuiltinDataset): class LegacyTUDataset(DGLBuiltinDataset):
...@@ -48,7 +45,7 @@ class LegacyTUDataset(DGLBuiltinDataset): ...@@ -48,7 +45,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
>>> g, label = data[1024] >>> g, label = data[1024]
>>> g >>> g
Graph(num_nodes=88, num_edges=410, Graph(num_nodes=88, num_edges=410,
ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)} ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> label >>> label
tensor(1) tensor(1)
...@@ -60,7 +57,7 @@ class LegacyTUDataset(DGLBuiltinDataset): ...@@ -60,7 +57,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
>>> batched_labels = torch.tensor(labels) >>> batched_labels = torch.tensor(labels)
>>> batched_graphs >>> batched_graphs
Graph(num_nodes=9539, num_edges=47382, Graph(num_nodes=9539, num_edges=47382,
ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)} ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
Notes Notes
...@@ -121,7 +118,7 @@ class LegacyTUDataset(DGLBuiltinDataset): ...@@ -121,7 +118,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
g.ndata['node_label'] = F.tensor(DS_node_labels) g.ndata['node_label'] = F.tensor(DS_node_labels)
one_hot_node_labels = self._to_onehot(DS_node_labels) one_hot_node_labels = self._to_onehot(DS_node_labels)
for idxs, g in zip(node_idx_list, self.graph_lists): for idxs, g in zip(node_idx_list, self.graph_lists):
g.ndata['feat'] = F.tensor(one_hot_node_labels[idxs, :]) g.ndata['feat'] = F.tensor(one_hot_node_labels[idxs, :], F.float32)
self.data_mode = "node_label" self.data_mode = "node_label"
except IOError: except IOError:
print("No Node Label Data") print("No Node Label Data")
...@@ -132,14 +129,15 @@ class LegacyTUDataset(DGLBuiltinDataset): ...@@ -132,14 +129,15 @@ class LegacyTUDataset(DGLBuiltinDataset):
if DS_node_attr.ndim == 1: if DS_node_attr.ndim == 1:
DS_node_attr = np.expand_dims(DS_node_attr, -1) DS_node_attr = np.expand_dims(DS_node_attr, -1)
for idxs, g in zip(node_idx_list, self.graph_lists): for idxs, g in zip(node_idx_list, self.graph_lists):
g.ndata['feat'] = F.tensor(DS_node_attr[idxs, :]) g.ndata['feat'] = F.tensor(DS_node_attr[idxs, :], F.float32)
self.data_mode = "node_attr" self.data_mode = "node_attr"
except IOError: except IOError:
print("No Node Attribute Data") print("No Node Attribute Data")
if 'feat' not in g.ndata.keys(): if 'feat' not in g.ndata.keys():
for idxs, g in zip(node_idx_list, self.graph_lists): for idxs, g in zip(node_idx_list, self.graph_lists):
g.ndata['feat'] = np.ones((g.number_of_nodes(), self.hidden_size)) g.ndata['feat'] = F.ones((g.number_of_nodes(), self.hidden_size),
F.float32, F.cpu())
self.data_mode = "constant" self.data_mode = "constant"
if self.verbose: if self.verbose:
print("Use Constant one as Feature with hidden size {}".format(self.hidden_size)) print("Use Constant one as Feature with hidden size {}".format(self.hidden_size))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment