Unverified Commit d9caeaaa authored by Xinyu Yao's avatar Xinyu Yao Committed by GitHub
Browse files

[GraphBolt] Update dataset names that exposed to users in examples. (#7280)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent e9661d3b
......@@ -205,7 +205,7 @@ if __name__ == "__main__":
)
args = parser.parse_args()
dataset = gb.BuiltinDataset("ogbn-products-seeds").load()
dataset = gb.BuiltinDataset("ogbn-products").load()
datamodule = DataModule(
dataset,
[10, 10, 10],
......
......@@ -385,7 +385,7 @@ def main(args):
# Load and preprocess dataset.
print("Loading data")
dataset = gb.BuiltinDataset("ogbl-citation2-seeds").load()
dataset = gb.BuiltinDataset("ogbl-citation2").load()
# Move the dataset to the selected storage.
if args.storage_device == "pinned":
......
......@@ -364,12 +364,8 @@ def parse_args():
parser.add_argument(
"--dataset",
type=str,
default="ogbn-products-seeds",
choices=[
"ogbn-arxiv-seeds",
"ogbn-products-seeds",
"ogbn-papers100M-seeds",
],
default="ogbn-products",
choices=["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"],
help="The dataset we can use for node classification example. Currently"
" ogbn-products, ogbn-arxiv, ogbn-papers100M datasets are supported.",
)
......
......@@ -208,9 +208,8 @@ def main():
parser.add_argument(
"--dataset",
type=str,
default="ogbn-products-seeds",
help='Name of the dataset to use (e.g., "ogbn-products-seeds",'
+ ' "ogbn-arxiv-seeds")',
default="ogbn-products",
help='Name of the dataset to use (e.g., "ogbn-products", "ogbn-arxiv")',
)
parser.add_argument(
"--epochs", type=int, default=10, help="Number of training epochs."
......
......@@ -324,12 +324,8 @@ def parse_args():
parser.add_argument(
"--dataset",
type=str,
default="ogbn-products-seeds",
choices=[
"ogbn-arxiv-seeds",
"ogbn-products-seeds",
"ogbn-papers100M-seeds",
],
default="ogbn-products",
choices=["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"],
help="The dataset we can use for node classification example. Currently"
" ogbn-products, ogbn-arxiv, ogbn-papers100M datasets are supported.",
)
......
......@@ -158,7 +158,7 @@ if __name__ == "__main__":
# Load and preprocess dataset.
print("Loading data...")
dataset = gb.BuiltinDataset("cora-seeds").load()
dataset = gb.BuiltinDataset("cora").load()
# If a CUDA device is selected, we pin the graph and the features so that
# the GPU can access them.
......
......@@ -117,7 +117,7 @@ if __name__ == "__main__":
# Load and preprocess dataset.
print("Loading data...")
dataset = gb.BuiltinDataset("cora-seeds").load()
dataset = gb.BuiltinDataset("cora").load()
# If a CUDA device is selected, we pin the graph and the features so that
# the GPU can access them.
......
......@@ -153,7 +153,7 @@ def extract_embed(node_embed, input_nodes):
def extract_node_features(name, block, data, node_embed, device):
"""Extract the node features from embedding layer or raw features."""
if name == "ogbn-mag-seeds":
if name == "ogbn-mag":
input_nodes = {
k: v.to(device) for k, v in block.srcdata[dgl.NID].items()
}
......@@ -419,8 +419,8 @@ def evaluate(
model.eval()
category = "paper"
# An evaluator for the dataset.
if name == "ogbn-mag-seeds":
evaluator = Evaluator(name="ogbn-mag")
if name == "ogbn-mag":
evaluator = Evaluator(name=name)
else:
evaluator = MAG240MEvaluator()
......@@ -578,7 +578,7 @@ def main(args):
# `institution` are generated in advance and stored in the feature store.
# For `ogbn-mag`, we generate the features on the fly.
embed_layer = None
if args.dataset == "ogbn-mag-seeds":
if args.dataset == "ogbn-mag":
# Create the embedding layer and move it to the appropriate device.
embed_layer = rel_graph_embed(g, feat_size).to(device)
print(
......@@ -652,9 +652,9 @@ if __name__ == "__main__":
parser.add_argument(
"--dataset",
type=str,
default="ogbn-mag-seeds",
choices=["ogbn-mag-seeds", "ogb-lsc-mag240m"],
help="Dataset name. Possible values: ogbn-mag-seeds, ogb-lsc-mag240m",
default="ogbn-mag",
choices=["ogbn-mag", "ogb-lsc-mag240m"],
help="Dataset name. Possible values: ogbn-mag, ogb-lsc-mag240m",
)
parser.add_argument("--num_epochs", type=int, default=3)
parser.add_argument("--num_workers", type=int, default=0)
......
......@@ -242,7 +242,7 @@ if __name__ == "__main__":
# Load and preprocess dataset.
print("Loading data")
device = torch.device("cpu" if args.mode == "cpu" else "cuda")
dataset = gb.BuiltinDataset("ogbn-products-seeds").load()
dataset = gb.BuiltinDataset("ogbn-products").load()
g = dataset.graph
features = dataset.feature
......
......@@ -1015,6 +1015,11 @@ class BuiltinDataset(OnDiskDataset):
_all_datasets = _datasets + _large_datasets
def __init__(self, name: str, root: str = "datasets") -> OnDiskDataset:
# For user using DGL 2.2 or later version, we prefer them to use
# datasets with `seeds` suffix. This hack should be removed, when the
# datasets with `seeds` suffix have covered previous ones.
if "seeds" not in name:
name += "-seeds"
dataset_dir = os.path.join(root, name)
if not os.path.exists(dataset_dir):
if name not in self._all_datasets:
......
......@@ -2481,8 +2481,10 @@ def test_BuiltinDataset():
with tempfile.TemporaryDirectory() as test_dir:
# Case 1: download from DGL S3 storage.
dataset_name = "test-dataset-231207"
# Add dataset to the builtin dataset list for testing only.
gb.BuiltinDataset._all_datasets.append(dataset_name)
# Add dataset to the builtin dataset list for testing only. Due to we
# add `seeds` suffix to datasets when downloading, so we append
# dataset name with `-seeds` suffix here.
gb.BuiltinDataset._all_datasets.append(dataset_name + "-seeds")
dataset = gb.BuiltinDataset(name=dataset_name, root=test_dir).load()
assert dataset.graph is not None
assert dataset.feature is not None
......@@ -2499,7 +2501,7 @@ def test_BuiltinDataset():
dataset = None
# Case 3: dataset is not available.
dataset_name = "fake_name"
dataset_name = "fake_name-seeds"
with pytest.raises(
RuntimeError,
match=rf"Dataset {dataset_name} is not available.*",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment