Unverified Commit 31e4a89b authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[DGL-Go] Inference for Node Prediction Pipeline (full & ns) (#4095)

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update
parent 69226588
...@@ -3,8 +3,9 @@ import torch ...@@ -3,8 +3,9 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import dgl import dgl
from dgl.data import AsNodePredDataset import os
from dgl.data import AsNodePredDataset
{{ data_import_code }} {{ data_import_code }}
{{ model_code }} {{ model_code }}
...@@ -13,7 +14,7 @@ from dgl.data import AsNodePredDataset ...@@ -13,7 +14,7 @@ from dgl.data import AsNodePredDataset
class EarlyStopping: class EarlyStopping:
def __init__(self, def __init__(self,
patience: int = -1, patience: int = -1,
checkpoint_path: str = 'checkpoint.pt'): checkpoint_path: str = 'checkpoint.pth'):
self.patience = patience self.patience = patience
self.checkpoint_path = checkpoint_path self.checkpoint_path = checkpoint_path
self.counter = 0 self.counter = 0
...@@ -42,6 +43,9 @@ class EarlyStopping: ...@@ -42,6 +43,9 @@ class EarlyStopping:
def load_checkpoint(self, model): def load_checkpoint(self, model):
model.load_state_dict(torch.load(self.checkpoint_path)) model.load_state_dict(torch.load(self.checkpoint_path))
def close(self):
os.remove(self.checkpoint_path)
{% endif %} {% endif %}
...@@ -134,39 +138,50 @@ def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn): ...@@ -134,39 +138,50 @@ def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn):
{% if user_cfg.early_stop %} {% if user_cfg.early_stop %}
stopper.load_checkpoint(model) stopper.load_checkpoint(model)
stopper.close()
{% endif %} {% endif %}
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
test_acc = evaluate(model, test_g, test_nfeat, test_labels, test_nid, cfg["eval_device"]) test_acc = evaluate(model, test_g, test_nfeat, test_labels, test_nid, cfg["eval_device"])
return test_acc return test_acc
def main(): def main(run, cfg, data):
{{ user_cfg_str }}
device = cfg['device'] device = cfg['device']
pipeline_cfg = cfg["general_pipeline"] pipeline_cfg = cfg["general_pipeline"]
model = {{ model_class_name }}(**cfg["model"])
model = model.to(device)
loss = torch.nn.{{ user_cfg.general_pipeline.loss }}()
optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}(model.parameters(), **pipeline_cfg["optimizer"])
test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss)
cpt_path = os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run))
torch.save({'cfg': cfg, 'model': model.state_dict()}, cpt_path)
print('Saved training checkpoint to {}'.format(cpt_path))
return test_acc
if __name__ == '__main__':
{{ user_cfg_str }}
if not torch.cuda.is_available():
cfg['device'] = 'cpu'
# load data # load data
data = AsNodePredDataset({{data_initialize_code}}) data = AsNodePredDataset({{data_initialize_code}})
# create model
model_cfg = cfg["model"] model_cfg = cfg["model"]
cfg["model"]["data_info"] = { cfg["model"]["data_info"] = {
"in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1], "in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1],
"out_size": data.num_classes, "out_size": data.num_classes,
"num_nodes": data[0].num_nodes() "num_nodes": data[0].num_nodes()
} }
model = {{ model_class_name }}(**cfg["model"])
model = model.to(device)
loss = torch.nn.{{ user_cfg.general_pipeline.loss }}()
optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}(model.parameters(), **pipeline_cfg["optimizer"])
test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss)
torch.save(model.state_dict(), pipeline_cfg["save_path"])
return test_acc
if __name__ == '__main__': os.makedirs(cfg['general_pipeline']["save_path"])
all_acc = [] all_acc = []
num_runs = {{ user_cfg.general_pipeline.num_runs }} num_runs = {{ user_cfg.general_pipeline.num_runs }}
for run in range(num_runs): for run in range(num_runs):
print(f'Run experiment #{run}') print(f'Run experiment #{run}')
test_acc = main() test_acc = main(run, cfg, data)
print("Test Accuracy {:.4f}".format(test_acc)) print("Test Accuracy {:.4f}".format(test_acc))
all_acc.append(test_acc) all_acc.append(test_acc)
avg_acc = np.round(np.mean(all_acc), 6) avg_acc = np.round(np.mean(all_acc), 6)
......
...@@ -3,7 +3,7 @@ import torch ...@@ -3,7 +3,7 @@ import torch
class EarlyStopping: class EarlyStopping:
def __init__(self, def __init__(self,
patience: int = -1, patience: int = -1,
checkpoint_path: str = 'checkpoint.pt'): checkpoint_path: str = 'checkpoint.pth'):
self.patience = patience self.patience = patience
self.checkpoint_path = checkpoint_path self.checkpoint_path = checkpoint_path
self.counter = 0 self.counter = 0
......
...@@ -14,7 +14,7 @@ from .base_model import DGLBaseModel ...@@ -14,7 +14,7 @@ from .base_model import DGLBaseModel
class PipelineConfig(DGLBaseModel): class PipelineConfig(DGLBaseModel):
node_embed_size: Optional[int] = -1 node_embed_size: Optional[int] = -1
early_stop: Optional[dict] early_stop: Optional[dict]
num_epochs: int = 200 num_epochs: int = 200
...@@ -25,5 +25,5 @@ class PipelineConfig(DGLBaseModel): ...@@ -25,5 +25,5 @@ class PipelineConfig(DGLBaseModel):
class UserConfig(DGLBaseModel): class UserConfig(DGLBaseModel):
version: Optional[str] = "0.0.1" version: Optional[str] = "0.0.1"
pipeline_name: PipelineFactory.get_pipeline_enum() pipeline_name: PipelineFactory.get_pipeline_enum()
pipeline_mode: str
device: str = "cpu" device: str = "cpu"
# general_pipeline: PipelineConfig = PipelineConfig()
\ No newline at end of file
...@@ -265,6 +265,23 @@ class PipelineFactory: ...@@ -265,6 +265,23 @@ class PipelineFactory:
"PipelineName", {k: k for k, v in cls.registry.items()}) "PipelineName", {k: k for k, v in cls.registry.items()})
return enum_class return enum_class
class ApplyPipelineFactory:
"""The factory class for creating executors for inference"""
registry: Dict[str, PipelineBase] = {}
""" Internal registry for available executors """
@classmethod
def register(cls, name: str) -> Callable:
def inner_wrapper(wrapped_class) -> Callable:
if name in cls.registry:
logger.warning(
'Executor %s already exists. Will replace it', name)
cls.registry[name] = wrapped_class()
return wrapped_class
return inner_wrapper
model_dir = Path(__file__).parent.parent / "model" model_dir = Path(__file__).parent.parent / "model"
......
version: 0.0.1 version: 0.0.1
pipeline_name: graphpred pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
data: data:
name: ogbg-molhiv name: ogbg-molhiv
...@@ -26,4 +27,4 @@ general_pipeline: ...@@ -26,4 +27,4 @@ general_pipeline:
loss: BCEWithLogitsLoss loss: BCEWithLogitsLoss
metric: roc_auc_score metric: roc_auc_score
num_epochs: 100 # Number of training epochs num_epochs: 100 # Number of training epochs
save_path: model.pth # Path to save the model save_path: "results" # Directory to save the experiment results
version: 0.0.1 version: 0.0.1
pipeline_name: graphpred pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
data: data:
name: ogbg-molhiv name: ogbg-molhiv
...@@ -33,4 +34,4 @@ general_pipeline: ...@@ -33,4 +34,4 @@ general_pipeline:
loss: BCEWithLogitsLoss loss: BCEWithLogitsLoss
metric: roc_auc_score metric: roc_auc_score
num_epochs: 200 # Number of training epochs num_epochs: 200 # Number of training epochs
save_path: model.pth # Path to save the model save_path: "results" # Directory to save the experiment results
version: 0.0.1 version: 0.0.1
pipeline_name: graphpred pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
data: data:
name: ogbg-molpcba name: ogbg-molpcba
...@@ -26,4 +27,4 @@ general_pipeline: ...@@ -26,4 +27,4 @@ general_pipeline:
loss: BCEWithLogitsLoss loss: BCEWithLogitsLoss
metric: average_precision_score metric: average_precision_score
num_epochs: 100 # Number of training epochs num_epochs: 100 # Number of training epochs
save_path: model.pth # Path to save the model save_path: "results" # Directory to save the experiment results
version: 0.0.1 version: 0.0.1
pipeline_name: linkpred pipeline_name: linkpred
pipeline_mode: train
device: cpu device: cpu
data: data:
name: ogbl-citation2 name: ogbl-citation2
...@@ -15,7 +16,7 @@ node_model: ...@@ -15,7 +16,7 @@ node_model:
aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``).
edge_model: edge_model:
name: ele name: ele
hidden_size: 64 # Hidden size. hidden_size: 64 # Hidden size.
num_layers: 2 # Number of hidden layers. num_layers: 2 # Number of hidden layers.
bias: true # Whether to use bias in the linaer layer. bias: true # Whether to use bias in the linaer layer.
neg_sampler: neg_sampler:
...@@ -31,5 +32,5 @@ general_pipeline: ...@@ -31,5 +32,5 @@ general_pipeline:
name: Adam name: Adam
lr: 0.005 lr: 0.005
loss: BCELoss loss: BCELoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 1 # Number of experiments to run num_runs: 1 # Number of experiments to run
version: 0.0.1 version: 0.0.1
pipeline_name: linkpred pipeline_name: linkpred
pipeline_mode: train
device: cpu device: cpu
data: data:
name: ogbl-collab name: ogbl-collab
...@@ -15,7 +16,7 @@ node_model: ...@@ -15,7 +16,7 @@ node_model:
aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``).
edge_model: edge_model:
name: ele name: ele
hidden_size: 64 # Hidden size. hidden_size: 64 # Hidden size.
num_layers: 2 # Number of hidden layers. num_layers: 2 # Number of hidden layers.
bias: true # Whether to use bias in the linaer layer. bias: true # Whether to use bias in the linaer layer.
neg_sampler: neg_sampler:
...@@ -31,5 +32,5 @@ general_pipeline: ...@@ -31,5 +32,5 @@ general_pipeline:
name: Adam name: Adam
lr: 0.005 lr: 0.005
loss: BCELoss loss: BCELoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 1 # Number of experiments to run num_runs: 1 # Number of experiments to run
version: 0.0.1 version: 0.0.1
pipeline_name: linkpred pipeline_name: linkpred
pipeline_mode: train
device: cuda device: cuda
data: data:
name: cora name: cora
...@@ -15,7 +16,7 @@ node_model: ...@@ -15,7 +16,7 @@ node_model:
aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``).
edge_model: edge_model:
name: ele name: ele
hidden_size: 64 # Hidden size. hidden_size: 64 # Hidden size.
num_layers: 2 # Number of hidden layers. num_layers: 2 # Number of hidden layers.
bias: true # Whether to use bias in the linaer layer. bias: true # Whether to use bias in the linaer layer.
neg_sampler: neg_sampler:
...@@ -31,5 +32,5 @@ general_pipeline: ...@@ -31,5 +32,5 @@ general_pipeline:
name: Adam name: Adam
lr: 0.005 lr: 0.005
loss: BCELoss loss: BCELoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 1 # Number of experiments to run num_runs: 1 # Number of experiments to run
# Accuracy across 5 runs: 0.593288 ± 0.006103 # Accuracy across 5 runs: 0.593288 ± 0.006103
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred-ns pipeline_name: nodepred-ns
pipeline_mode: train
device: 'cuda:0' device: 'cuda:0'
eval_device: 'cpu' eval_device: 'cpu'
data: data:
...@@ -31,5 +32,5 @@ general_pipeline: ...@@ -31,5 +32,5 @@ general_pipeline:
lr: 0.005 lr: 0.005
weight_decay: 0.0 weight_decay: 0.0
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 5 num_runs: 5
# Accuracy across 1 runs: 0.796911 # Accuracy across 1 runs: 0.796911
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred-ns pipeline_name: nodepred-ns
pipeline_mode: train
device: cuda device: cuda
eval_device: cpu eval_device: cpu
data: data:
...@@ -35,5 +36,5 @@ general_pipeline: ...@@ -35,5 +36,5 @@ general_pipeline:
lr: 0.005 lr: 0.005
weight_decay: 0.0 weight_decay: 0.0
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 5 # Number of experiments to run num_runs: 5 # Number of experiments to run
# Accuracy across 10 runs: 0.7097 ± 0.006914 # Accuracy across 10 runs: 0.7097 ± 0.006914
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: citeseer name: citeseer
...@@ -28,5 +29,5 @@ general_pipeline: ...@@ -28,5 +29,5 @@ general_pipeline:
lr: 0.005 lr: 0.005
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.6852 ± 0.008875 # Accuracy across 10 runs: 0.6852 ± 0.008875
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: citeseer name: citeseer
...@@ -24,5 +25,5 @@ general_pipeline: ...@@ -24,5 +25,5 @@ general_pipeline:
lr: 0.01 lr: 0.01
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.6994 ± 0.004005 # Accuracy across 10 runs: 0.6994 ± 0.004005
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: citeseer name: citeseer
...@@ -23,5 +24,5 @@ general_pipeline: ...@@ -23,5 +24,5 @@ general_pipeline:
lr: 0.01 lr: 0.01
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.8208 ± 0.00663 # Accuracy across 10 runs: 0.8208 ± 0.00663
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: cora name: cora
...@@ -28,5 +29,5 @@ general_pipeline: ...@@ -28,5 +29,5 @@ general_pipeline:
lr: 0.005 lr: 0.005
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.802 ± 0.005329 # Accuracy across 10 runs: 0.802 ± 0.005329
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: cora name: cora
...@@ -24,5 +25,5 @@ general_pipeline: ...@@ -24,5 +25,5 @@ general_pipeline:
lr: 0.01 lr: 0.01
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.8163 ± 0.006856 # Accuracy across 10 runs: 0.8163 ± 0.006856
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: cora name: cora
...@@ -23,5 +24,5 @@ general_pipeline: ...@@ -23,5 +24,5 @@ general_pipeline:
lr: 0.01 lr: 0.01
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.7788 ± 0.002227 # Accuracy across 10 runs: 0.7788 ± 0.002227
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: pubmed name: pubmed
...@@ -28,5 +29,5 @@ general_pipeline: ...@@ -28,5 +29,5 @@ general_pipeline:
lr: 0.005 lr: 0.005
weight_decay: 0.001 weight_decay: 0.001
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.7826 ± 0.004317 # Accuracy across 10 runs: 0.7826 ± 0.004317
version: 0.0.1 version: 0.0.1
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train
device: cuda:0 device: cuda:0
data: data:
name: pubmed name: pubmed
...@@ -24,5 +25,5 @@ general_pipeline: ...@@ -24,5 +25,5 @@ general_pipeline:
lr: 0.01 lr: 0.01
weight_decay: 0.0005 weight_decay: 0.0005
loss: CrossEntropyLoss loss: CrossEntropyLoss
save_path: "model.pth" save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run num_runs: 10 # Number of experiments to run
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment