Unverified Commit b1840f49 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset...


[Dataset] Change the Data Type of the Node Features in GINDataset/TUDataset from Float64 to Float32 (#2592)

* Update

* Update

* Update

* update
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarTong He <hetong007@gmail.com>
parent d460efee
......@@ -217,7 +217,7 @@ updating the model.
opt = torch.optim.Adam(model.parameters())
for epoch in range(20):
for batched_graph, labels in dataloader:
feats = batched_graph.ndata['attr'].float()
feats = batched_graph.ndata['attr']
logits = model(batched_graph, feats)
loss = F.cross_entropy(logits, labels)
opt.zero_grad()
......
......@@ -165,7 +165,7 @@ DGL内置了常见的图读出函数,例如 :func:`dgl.readout_nodes` 就实
opt = torch.optim.Adam(model.parameters())
for epoch in range(20):
for batched_graph, labels in dataloader:
feats = batched_graph.ndata['attr'].float()
feats = batched_graph.ndata['attr']
logits = model(batched_graph, feats)
loss = F.cross_entropy(logits, labels)
opt.zero_grad()
......
......@@ -21,7 +21,7 @@ def train(args, net, trainloader, trainer, criterion, epoch):
for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model
labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device)
feat = graphs.ndata['attr'].as_in_context(args.device)
with mx.autograd.record():
graphs = graphs.to(args.device)
......@@ -52,7 +52,7 @@ def eval_net(args, net, dataloader, criterion):
for data in dataloader:
graphs, labels = data
labels = labels.as_in_context(args.device)
feat = graphs.ndata['attr'].astype('float32').as_in_context(args.device)
feat = graphs.ndata['attr'].as_in_context(args.device)
total += len(labels)
graphs = graphs.to(args.device)
......
......@@ -6,7 +6,7 @@ import torch
import torch.nn as nn
import torch.optim as optim
from dgl.data.gindt import GINDataset
from dgl.data import GINDataset
from dataloader import GINDataLoader
from parser import Parser
from gin import GIN
......@@ -22,11 +22,9 @@ def train(args, net, trainloader, optimizer, criterion, epoch):
for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model
for key in graphs.node_attr_schemes().keys():
graphs.ndata[key] = graphs.ndata[key].float()
labels = labels.to(args.device)
feat = graphs.ndata.pop('attr').to(args.device)
graphs = graphs.to(args.device)
feat = graphs.ndata.pop('attr')
outputs = net(graphs, feat)
loss = criterion(outputs, labels)
......@@ -55,11 +53,9 @@ def eval_net(args, net, dataloader, criterion):
for data in dataloader:
graphs, labels = data
for key in graphs.node_attr_schemes().keys():
graphs.ndata[key] = graphs.ndata[key].float()
feat = graphs.ndata.pop('attr').to(args.device)
graphs = graphs.to(args.device)
labels = labels.to(args.device)
feat = graphs.ndata.pop('attr')
total += len(labels)
outputs = net(graphs, feat)
_, predicted = torch.max(outputs.data, 1)
......
......@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader,
for batch in trainloader:
optimizer.zero_grad()
batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out, l1, l2 = model(batch_graphs,
......@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader:
batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])
......
......@@ -4,7 +4,7 @@ from time import time
from datetime import datetime
import dgl
from dgl.data import LegacyTUDataset, TUDataset
from dgl.data import LegacyTUDataset
from torch.utils.data import random_split
import torch
from torch import Tensor
......@@ -48,8 +48,6 @@ def train(model:torch.nn.Module, optimizer, trainloader,
for batch in trainloader:
optimizer.zero_grad()
batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out, l1, l2 = model(batch_graphs,
......@@ -74,8 +72,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader:
batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"])
......@@ -98,8 +94,6 @@ def validate(model:torch.nn.Module, loader, device,
for batch in loader:
batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"])
......
......@@ -90,8 +90,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device):
for batch in trainloader:
optimizer.zero_grad()
batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out = model(batch_graphs, batch_graphs.ndata["feat"])
......@@ -113,8 +111,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader:
batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out = model(batch_graphs, batch_graphs.ndata["feat"])
......
......@@ -82,8 +82,6 @@ def train(model:torch.nn.Module, optimizer, trainloader, device):
for batch in trainloader:
optimizer.zero_grad()
batch_graphs, batch_labels = batch
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out = model(batch_graphs)
......@@ -105,8 +103,6 @@ def test(model:torch.nn.Module, loader, device):
for batch in loader:
batch_graphs, batch_labels = batch
num_graphs += batch_labels.size(0)
for (key, value) in batch_graphs.ndata.items():
batch_graphs.ndata[key] = value.float()
batch_graphs = batch_graphs.to(device)
batch_labels = batch_labels.long().to(device)
out = model(batch_graphs)
......
......@@ -182,7 +182,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for epoch in range(20):
for batched_graph, labels in train_dataloader:
pred = model(batched_graph, batched_graph.ndata['attr'].float())
pred = model(batched_graph, batched_graph.ndata['attr'])
loss = F.cross_entropy(pred, labels)
optimizer.zero_grad()
loss.backward()
......@@ -191,7 +191,7 @@ for epoch in range(20):
num_correct = 0
num_tests = 0
for batched_graph, labels in test_dataloader:
pred = model(batched_graph, batched_graph.ndata['attr'].float())
pred = model(batched_graph, batched_graph.ndata['attr'])
num_correct += (pred.argmax(1) == labels).sum().item()
num_tests += len(labels)
......
......@@ -49,7 +49,7 @@ class GINDataset(DGLBuiltinDataset):
>>> g, label = data[128]
>>> g
Graph(num_nodes=13, num_edges=26,
ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float64)}
ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)}
edata_schemes={})
>>> label
tensor(1)
......@@ -61,7 +61,7 @@ class GINDataset(DGLBuiltinDataset):
>>> batched_labels = torch.tensor(labels)
>>> batched_graphs
Graph(num_nodes=330, num_edges=748,
ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float64)}
ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)}
edata_schemes={})
"""
......@@ -175,7 +175,6 @@ class GINDataset(DGLBuiltinDataset):
if tmp == len(nrow):
# no node attributes
nrow = [int(w) for w in nrow]
nattr = None
elif tmp > len(nrow):
nrow = [int(w) for w in nrow[:tmp]]
nattr = [float(w) for w in nrow[tmp:]]
......@@ -208,10 +207,8 @@ class GINDataset(DGLBuiltinDataset):
if nattrs != []:
nattrs = np.stack(nattrs)
g.ndata['attr'] = F.tensor(nattrs)
g.ndata['attr'] = F.tensor(nattrs, F.float32)
self.nattrs_flag = True
else:
nattrs = None
g.ndata['label'] = F.tensor(nlabels)
if len(self.nlabel_dict) > 1:
......@@ -230,7 +227,6 @@ class GINDataset(DGLBuiltinDataset):
if not self.nattrs_flag:
if self.verbose:
print('there are no node features in this dataset!')
label2idx = {}
# generate node attr by node degree
if self.degree_as_nlabel:
if self.verbose:
......@@ -265,7 +261,7 @@ class GINDataset(DGLBuiltinDataset):
g.number_of_nodes(), len(label2idx)))
attr[range(g.number_of_nodes()), [label2idx[nl]
for nl in F.asnumpy(g.ndata['label']).tolist()]] = 1
g.ndata['attr'] = F.tensor(attr)
g.ndata['attr'] = F.tensor(attr, F.float32)
# after load, get the #classes and #dim
self.gclasses = len(self.glabel_dict)
......
from __future__ import absolute_import
import numpy as np
import os
import random
from .dgl_dataset import DGLBuiltinDataset
from .utils import loadtxt, save_graphs, load_graphs, save_info, load_info
from .. import backend as F
from ..utils import retry_method_with_fix
from ..convert import graph as dgl_graph
class LegacyTUDataset(DGLBuiltinDataset):
......@@ -48,7 +45,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
>>> g, label = data[1024]
>>> g
Graph(num_nodes=88, num_edges=410,
ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> label
tensor(1)
......@@ -60,7 +57,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
>>> batched_labels = torch.tensor(labels)
>>> batched_graphs
Graph(num_nodes=9539, num_edges=47382,
ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
Notes
......@@ -121,7 +118,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
g.ndata['node_label'] = F.tensor(DS_node_labels)
one_hot_node_labels = self._to_onehot(DS_node_labels)
for idxs, g in zip(node_idx_list, self.graph_lists):
g.ndata['feat'] = F.tensor(one_hot_node_labels[idxs, :])
g.ndata['feat'] = F.tensor(one_hot_node_labels[idxs, :], F.float32)
self.data_mode = "node_label"
except IOError:
print("No Node Label Data")
......@@ -132,14 +129,15 @@ class LegacyTUDataset(DGLBuiltinDataset):
if DS_node_attr.ndim == 1:
DS_node_attr = np.expand_dims(DS_node_attr, -1)
for idxs, g in zip(node_idx_list, self.graph_lists):
g.ndata['feat'] = F.tensor(DS_node_attr[idxs, :])
g.ndata['feat'] = F.tensor(DS_node_attr[idxs, :], F.float32)
self.data_mode = "node_attr"
except IOError:
print("No Node Attribute Data")
if 'feat' not in g.ndata.keys():
for idxs, g in zip(node_idx_list, self.graph_lists):
g.ndata['feat'] = np.ones((g.number_of_nodes(), self.hidden_size))
g.ndata['feat'] = F.ones((g.number_of_nodes(), self.hidden_size),
F.float32, F.cpu())
self.data_mode = "constant"
if self.verbose:
print("Use Constant one as Feature with hidden size {}".format(self.hidden_size))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment