"...pytorch/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "6b99f328802376d97b5c992cdf8bc8b5e3790216"
Unverified Commit dbedce51 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[doc] fix doc build issue (#6646)

parent c9b26dda
...@@ -200,13 +200,13 @@ def evaluate(model, dataloader, device): ...@@ -200,13 +200,13 @@ def evaluate(model, dataloader, device):
############################################################################### ###############################################################################
# Define the main function for each process. # Define the run function for each process.
# #
from torch.optim import Adam from torch.optim import Adam
def main(rank, world_size, dataset, seed=0): def run(rank, world_size, dataset, seed=0):
init_process_group(world_size, rank) init_process_group(world_size, rank)
if torch.cuda.is_available(): if torch.cuda.is_available():
device = torch.device("cuda:{:d}".format(rank)) device = torch.device("cuda:{:d}".format(rank))
...@@ -255,12 +255,16 @@ def main(rank, world_size, dataset, seed=0): ...@@ -255,12 +255,16 @@ def main(rank, world_size, dataset, seed=0):
import torch.multiprocessing as mp import torch.multiprocessing as mp
from dgl.data import GINDataset from dgl.data import GINDataset
if __name__ == "__main__":
def main():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
print("No GPU found!") print("No GPU found!")
exit(0) return
num_gpus = torch.cuda.device_count() num_gpus = torch.cuda.device_count()
procs = []
dataset = GINDataset(name="IMDBBINARY", self_loop=False) dataset = GINDataset(name="IMDBBINARY", self_loop=False)
mp.spawn(main, args=(num_gpus, dataset), nprocs=num_gpus) mp.spawn(run, args=(num_gpus, dataset), nprocs=num_gpus)
if __name__ == "__main__":
main()
...@@ -101,7 +101,6 @@ class SAGE(nn.Module): ...@@ -101,7 +101,6 @@ class SAGE(nn.Module):
def create_dataloader( def create_dataloader(
args,
graph, graph,
features, features,
itemset, itemset,
...@@ -112,18 +111,16 @@ def create_dataloader( ...@@ -112,18 +111,16 @@ def create_dataloader(
): ):
datapipe = gb.DistributedItemSampler( datapipe = gb.DistributedItemSampler(
item_set=itemset, item_set=itemset,
batch_size=args.batch_size, batch_size=1024,
drop_last=drop_last, drop_last=drop_last,
shuffle=shuffle, shuffle=shuffle,
drop_uneven_inputs=drop_uneven_inputs, drop_uneven_inputs=drop_uneven_inputs,
) )
datapipe = datapipe.sample_neighbor(graph, args.fanout) datapipe = datapipe.sample_neighbor(graph, [10, 10, 10])
datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"])
datapipe = datapipe.to_dgl() datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device) datapipe = datapipe.copy_to(device)
dataloader = gb.MultiProcessDataLoader( dataloader = gb.MultiProcessDataLoader(datapipe, num_workers=0)
datapipe, num_workers=args.num_workers
)
return dataloader return dataloader
...@@ -136,12 +133,11 @@ def create_dataloader( ...@@ -136,12 +133,11 @@ def create_dataloader(
@torch.no_grad() @torch.no_grad()
def evaluate(rank, args, model, graph, features, itemset, num_classes, device): def evaluate(rank, model, graph, features, itemset, num_classes, device):
model.eval() model.eval()
y = [] y = []
y_hats = [] y_hats = []
dataloader = create_dataloader( dataloader = create_dataloader(
args,
graph, graph,
features, features,
itemset, itemset,
...@@ -186,7 +182,6 @@ def evaluate(rank, args, model, graph, features, itemset, num_classes, device): ...@@ -186,7 +182,6 @@ def evaluate(rank, args, model, graph, features, itemset, num_classes, device):
def train( def train(
world_size, world_size,
rank, rank,
args,
graph, graph,
features, features,
train_set, train_set,
...@@ -195,10 +190,9 @@ def train( ...@@ -195,10 +190,9 @@ def train(
model, model,
device, device,
): ):
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Create training data loader. # Create training data loader.
dataloader = create_dataloader( dataloader = create_dataloader(
args,
graph, graph,
features, features,
train_set, train_set,
...@@ -208,7 +202,7 @@ def train( ...@@ -208,7 +202,7 @@ def train(
drop_uneven_inputs=False, drop_uneven_inputs=False,
) )
for epoch in range(args.epochs): for epoch in range(5):
epoch_start = time.time() epoch_start = time.time()
model.train() model.train()
...@@ -246,7 +240,6 @@ def train( ...@@ -246,7 +240,6 @@ def train(
acc = ( acc = (
evaluate( evaluate(
rank, rank,
args,
model, model,
graph, graph,
features, features,
...@@ -289,7 +282,7 @@ def train( ...@@ -289,7 +282,7 @@ def train(
# #
def run(rank, world_size, args, devices, dataset): def run(rank, world_size, devices, dataset):
# Set up multiprocessing environment. # Set up multiprocessing environment.
device = devices[rank] device = devices[rank]
torch.cuda.set_device(device) torch.cuda.set_device(device)
...@@ -304,7 +297,6 @@ def run(rank, world_size, args, devices, dataset): ...@@ -304,7 +297,6 @@ def run(rank, world_size, args, devices, dataset):
features = dataset.feature features = dataset.feature
train_set = dataset.tasks[0].train_set train_set = dataset.tasks[0].train_set
valid_set = dataset.tasks[0].validation_set valid_set = dataset.tasks[0].validation_set
args.fanout = list(map(int, args.fanout.split(",")))
num_classes = dataset.tasks[0].metadata["num_classes"] num_classes = dataset.tasks[0].metadata["num_classes"]
in_size = features.size("node", None, "feat")[0] in_size = features.size("node", None, "feat")[0]
...@@ -321,7 +313,6 @@ def run(rank, world_size, args, devices, dataset): ...@@ -321,7 +313,6 @@ def run(rank, world_size, args, devices, dataset):
train( train(
world_size, world_size,
rank, rank,
args,
graph, graph,
features, features,
train_set, train_set,
...@@ -338,7 +329,6 @@ def run(rank, world_size, args, devices, dataset): ...@@ -338,7 +329,6 @@ def run(rank, world_size, args, devices, dataset):
test_acc = ( test_acc = (
evaluate( evaluate(
rank, rank,
args,
model, model,
graph, graph,
features, features,
...@@ -363,20 +353,14 @@ def run(rank, world_size, args, devices, dataset): ...@@ -363,20 +353,14 @@ def run(rank, world_size, args, devices, dataset):
# #
if __name__ == "__main__": def main():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
print("No GPU found!") print("No GPU found!")
exit(0) return
args = {
"epochs": 5,
"lr": 0.01,
"batch_size": 1024,
"fanout": "10,10,10",
"num_workers": 0,
}
devices = torch.arange(torch.cuda.device_count()) devices = [
torch.device(f"cuda:{i}") for i in range(torch.cuda.device_count())
]
world_size = len(devices) world_size = len(devices)
print(f"Training with {world_size} gpus.") print(f"Training with {world_size} gpus.")
...@@ -390,7 +374,11 @@ if __name__ == "__main__": ...@@ -390,7 +374,11 @@ if __name__ == "__main__":
mp.set_sharing_strategy("file_system") mp.set_sharing_strategy("file_system")
mp.spawn( mp.spawn(
run, run,
args=(world_size, args, devices, dataset), args=(world_size, devices, dataset),
nprocs=world_size, nprocs=world_size,
join=True, join=True,
) )
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment