Unverified Commit 9d417346 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt] update to_dgl() in examples (#6763)

parent a5e5f11a
...@@ -153,7 +153,6 @@ def evaluate(rank, model, dataloader, num_classes, device): ...@@ -153,7 +153,6 @@ def evaluate(rank, model, dataloader, num_classes, device):
for step, data in ( for step, data in (
tqdm.tqdm(enumerate(dataloader)) if rank == 0 else enumerate(dataloader) tqdm.tqdm(enumerate(dataloader)) if rank == 0 else enumerate(dataloader)
): ):
data = data.to_dgl()
blocks = data.blocks blocks = data.blocks
x = data.node_features["feat"] x = data.node_features["feat"]
y.append(data.labels) y.append(data.labels)
...@@ -206,9 +205,6 @@ def train( ...@@ -206,9 +205,6 @@ def train(
if rank == 0 if rank == 0
else enumerate(train_dataloader) else enumerate(train_dataloader)
): ):
# Convert data to DGL format.
data = data.to_dgl()
# The input features are from the source nodes in the first # The input features are from the source nodes in the first
# layer's computation graph. # layer's computation graph.
x = data.node_features["feat"] x = data.node_features["feat"]
......
...@@ -93,7 +93,6 @@ class SAGE(LightningModule): ...@@ -93,7 +93,6 @@ class SAGE(LightningModule):
) )
def training_step(self, batch, batch_idx): def training_step(self, batch, batch_idx):
batch = batch.to_dgl()
blocks = [block.to("cuda") for block in batch.blocks] blocks = [block.to("cuda") for block in batch.blocks]
x = batch.node_features["feat"] x = batch.node_features["feat"]
y = batch.labels.to("cuda") y = batch.labels.to("cuda")
...@@ -111,7 +110,6 @@ class SAGE(LightningModule): ...@@ -111,7 +110,6 @@ class SAGE(LightningModule):
return loss return loss
def validation_step(self, batch, batch_idx): def validation_step(self, batch, batch_idx):
batch = batch.to_dgl()
blocks = [block.to("cuda") for block in batch.blocks] blocks = [block.to("cuda") for block in batch.blocks]
x = batch.node_features["feat"] x = batch.node_features["feat"]
y = batch.labels.to("cuda") y = batch.labels.to("cuda")
......
...@@ -101,7 +101,6 @@ class SAGE(nn.Module): ...@@ -101,7 +101,6 @@ class SAGE(nn.Module):
) )
feature = feature.to(device) feature = feature.to(device)
for step, data in tqdm.tqdm(enumerate(dataloader)): for step, data in tqdm.tqdm(enumerate(dataloader)):
data = data.to_dgl()
x = feature[data.input_nodes] x = feature[data.input_nodes]
hidden_x = layer(data.blocks[0], x) # len(blocks) = 1 hidden_x = layer(data.blocks[0], x) # len(blocks) = 1
if not is_last_layer: if not is_last_layer:
...@@ -237,20 +236,6 @@ def create_dataloader(args, graph, features, itemset, is_train=True): ...@@ -237,20 +236,6 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
return dataloader return dataloader
def to_binary_link_dgl_computing_pack(data: gb.DGLMiniBatch):
"""Convert the minibatch to a training pair and a label tensor."""
pos_src, pos_dst = data.positive_node_pairs
neg_src, neg_dst = data.negative_node_pairs
node_pairs = (
torch.cat((pos_src, neg_src), dim=0),
torch.cat((pos_dst, neg_dst), dim=0),
)
pos_label = torch.ones_like(pos_src)
neg_label = torch.zeros_like(neg_src)
labels = torch.cat([pos_label, neg_label], dim=0)
return (node_pairs, labels.float())
@torch.no_grad() @torch.no_grad()
def compute_mrr(args, model, evaluator, node_emb, src, dst, neg_dst): def compute_mrr(args, model, evaluator, node_emb, src, dst, neg_dst):
"""Compute the Mean Reciprocal Rank (MRR) for given source and destination """Compute the Mean Reciprocal Rank (MRR) for given source and destination
...@@ -324,11 +309,8 @@ def train(args, model, graph, features, train_set): ...@@ -324,11 +309,8 @@ def train(args, model, graph, features, train_set):
total_loss = 0 total_loss = 0
start_epoch_time = time.time() start_epoch_time = time.time()
for step, data in enumerate(dataloader): for step, data in enumerate(dataloader):
# Convert data to DGL format. # Get node pairs with labels for loss calculation.
data = data.to_dgl() compacted_pairs, labels = data.node_pairs_with_labels
# Unpack MiniBatch.
compacted_pairs, labels = to_binary_link_dgl_computing_pack(data)
node_feature = data.node_features["feat"] node_feature = data.node_features["feat"]
# Convert sampled subgraphs to DGL blocks. # Convert sampled subgraphs to DGL blocks.
blocks = data.blocks blocks = data.blocks
......
...@@ -202,7 +202,6 @@ class SAGE(nn.Module): ...@@ -202,7 +202,6 @@ class SAGE(nn.Module):
feature = feature.to(device) feature = feature.to(device)
for step, data in tqdm(enumerate(dataloader)): for step, data in tqdm(enumerate(dataloader)):
data = data.to_dgl()
x = feature[data.input_nodes] x = feature[data.input_nodes]
hidden_x = layer(data.blocks[0], x) # len(blocks) = 1 hidden_x = layer(data.blocks[0], x) # len(blocks) = 1
if not is_last_layer: if not is_last_layer:
...@@ -261,7 +260,6 @@ def evaluate(args, model, graph, features, itemset, num_classes): ...@@ -261,7 +260,6 @@ def evaluate(args, model, graph, features, itemset, num_classes):
) )
for step, data in tqdm(enumerate(dataloader)): for step, data in tqdm(enumerate(dataloader)):
data = data.to_dgl()
x = data.node_features["feat"] x = data.node_features["feat"]
y.append(data.labels) y.append(data.labels)
y_hats.append(model(data.blocks, x)) y_hats.append(model(data.blocks, x))
...@@ -292,9 +290,6 @@ def train(args, graph, features, train_set, valid_set, num_classes, model): ...@@ -292,9 +290,6 @@ def train(args, graph, features, train_set, valid_set, num_classes, model):
model.train() model.train()
total_loss = 0 total_loss = 0
for step, data in enumerate(dataloader): for step, data in enumerate(dataloader):
# Convert data to DGL format.
data = data.to_dgl()
# The input features from the source nodes in the first layer's # The input features from the source nodes in the first layer's
# computation graph. # computation graph.
x = data.node_features["feat"] x = data.node_features["feat"]
......
...@@ -76,20 +76,6 @@ class GraphSAGE(nn.Module): ...@@ -76,20 +76,6 @@ class GraphSAGE(nn.Module):
return hidden_x return hidden_x
def to_binary_link_dgl_computing_pack(data: gb.MiniBatch):
"""Convert the minibatch to a training pair and a label tensor."""
pos_src, pos_dst = data.positive_node_pairs
neg_src, neg_dst = data.negative_node_pairs
node_pairs = (
torch.cat((pos_src, neg_src), dim=0),
torch.cat((pos_dst, neg_dst), dim=0),
)
pos_label = torch.ones_like(pos_src)
neg_label = torch.zeros_like(neg_src)
labels = torch.cat([pos_label, neg_label], dim=0)
return (node_pairs, labels)
@torch.no_grad() @torch.no_grad()
def evaluate(model, dataset, device): def evaluate(model, dataset, device):
model.eval() model.eval()
......
...@@ -176,7 +176,7 @@ def rel_graph_embed(graph, embed_size): ...@@ -176,7 +176,7 @@ def rel_graph_embed(graph, embed_size):
for the "paper" node type. for the "paper" node type.
""" """
node_num = {} node_num = {}
node_type_to_id = graph.metadata.node_type_to_id node_type_to_id = graph.node_type_to_id
node_type_offset = graph.node_type_offset node_type_offset = graph.node_type_offset
for ntype, ntype_id in node_type_to_id.items(): for ntype, ntype_id in node_type_to_id.items():
# Skip the "paper" node type. # Skip the "paper" node type.
...@@ -328,12 +328,12 @@ class EntityClassify(nn.Module): ...@@ -328,12 +328,12 @@ class EntityClassify(nn.Module):
# Generate and sort a list of unique edge types from the input graph. # Generate and sort a list of unique edge types from the input graph.
# eg. ['writes', 'cites'] # eg. ['writes', 'cites']
etypes = list(graph.metadata.edge_type_to_id.keys()) etypes = list(graph.edge_type_to_id.keys())
etypes = [gb.etype_str_to_tuple(etype)[1] for etype in etypes] etypes = [gb.etype_str_to_tuple(etype)[1] for etype in etypes]
self.relation_names = etypes self.relation_names = etypes
self.relation_names.sort() self.relation_names.sort()
self.dropout = 0.5 self.dropout = 0.5
ntypes = list(graph.metadata.node_type_to_id.keys()) ntypes = list(graph.node_type_to_id.keys())
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
# First layer: transform input features to hidden features. Use ReLU # First layer: transform input features to hidden features. Use ReLU
...@@ -487,9 +487,6 @@ def evaluate( ...@@ -487,9 +487,6 @@ def evaluate(
y_true = list() y_true = list()
for data in tqdm(data_loader, desc="Inference"): for data in tqdm(data_loader, desc="Inference"):
# Convert data to DGL format for computing.
data = data.to_dgl()
blocks = [block.to(device) for block in data.blocks] blocks = [block.to(device) for block in data.blocks]
node_features = extract_node_features( node_features = extract_node_features(
name, blocks[0], data, node_embed, device name, blocks[0], data, node_embed, device
...@@ -558,9 +555,6 @@ def run( ...@@ -558,9 +555,6 @@ def run(
total_loss = 0 total_loss = 0
for data in tqdm(data_loader, desc=f"Training~Epoch {epoch:02d}"): for data in tqdm(data_loader, desc=f"Training~Epoch {epoch:02d}"):
# Convert data to DGL format for computing.
data = data.to_dgl()
# Convert MiniBatch to DGL Blocks. # Convert MiniBatch to DGL Blocks.
blocks = [block.to(device) for block in data.blocks] blocks = [block.to(device) for block in data.blocks]
......
...@@ -118,7 +118,6 @@ def create_dataloader( ...@@ -118,7 +118,6 @@ def create_dataloader(
) )
datapipe = datapipe.sample_neighbor(graph, [10, 10, 10]) datapipe = datapipe.sample_neighbor(graph, [10, 10, 10])
datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"])
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device) datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0) dataloader = gb.DataLoader(datapipe, num_workers=0)
return dataloader return dataloader
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment