Unverified Commit 31e4a89b authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[DGL-Go] Inference for Node Prediction Pipeline (full & ns) (#4095)

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update
parent 69226588
......@@ -3,8 +3,9 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.data import AsNodePredDataset
import os
from dgl.data import AsNodePredDataset
{{ data_import_code }}
{{ model_code }}
......@@ -13,7 +14,7 @@ from dgl.data import AsNodePredDataset
class EarlyStopping:
def __init__(self,
patience: int = -1,
checkpoint_path: str = 'checkpoint.pt'):
checkpoint_path: str = 'checkpoint.pth'):
self.patience = patience
self.checkpoint_path = checkpoint_path
self.counter = 0
......@@ -42,6 +43,9 @@ class EarlyStopping:
def load_checkpoint(self, model):
model.load_state_dict(torch.load(self.checkpoint_path))
def close(self):
os.remove(self.checkpoint_path)
{% endif %}
......@@ -134,39 +138,50 @@ def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn):
{% if user_cfg.early_stop %}
stopper.load_checkpoint(model)
stopper.close()
{% endif %}
model.eval()
with torch.no_grad():
test_acc = evaluate(model, test_g, test_nfeat, test_labels, test_nid, cfg["eval_device"])
return test_acc
def main():
{{ user_cfg_str }}
def main(run, cfg, data):
device = cfg['device']
pipeline_cfg = cfg["general_pipeline"]
model = {{ model_class_name }}(**cfg["model"])
model = model.to(device)
loss = torch.nn.{{ user_cfg.general_pipeline.loss }}()
optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}(model.parameters(), **pipeline_cfg["optimizer"])
test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss)
cpt_path = os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run))
torch.save({'cfg': cfg, 'model': model.state_dict()}, cpt_path)
print('Saved training checkpoint to {}'.format(cpt_path))
return test_acc
if __name__ == '__main__':
{{ user_cfg_str }}
if not torch.cuda.is_available():
cfg['device'] = 'cpu'
# load data
data = AsNodePredDataset({{data_initialize_code}})
# create model
model_cfg = cfg["model"]
cfg["model"]["data_info"] = {
"in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1],
"out_size": data.num_classes,
"num_nodes": data[0].num_nodes()
}
model = {{ model_class_name }}(**cfg["model"])
model = model.to(device)
loss = torch.nn.{{ user_cfg.general_pipeline.loss }}()
optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}(model.parameters(), **pipeline_cfg["optimizer"])
test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss)
torch.save(model.state_dict(), pipeline_cfg["save_path"])
return test_acc
if __name__ == '__main__':
os.makedirs(cfg['general_pipeline']["save_path"])
all_acc = []
num_runs = {{ user_cfg.general_pipeline.num_runs }}
for run in range(num_runs):
print(f'Run experiment #{run}')
test_acc = main()
test_acc = main(run, cfg, data)
print("Test Accuracy {:.4f}".format(test_acc))
all_acc.append(test_acc)
avg_acc = np.round(np.mean(all_acc), 6)
......
......@@ -3,7 +3,7 @@ import torch
class EarlyStopping:
def __init__(self,
patience: int = -1,
checkpoint_path: str = 'checkpoint.pt'):
checkpoint_path: str = 'checkpoint.pth'):
self.patience = patience
self.checkpoint_path = checkpoint_path
self.counter = 0
......
......@@ -14,7 +14,7 @@ from .base_model import DGLBaseModel
class PipelineConfig(DGLBaseModel):
class PipelineConfig(DGLBaseModel):
node_embed_size: Optional[int] = -1
early_stop: Optional[dict]
num_epochs: int = 200
......@@ -25,5 +25,5 @@ class PipelineConfig(DGLBaseModel):
class UserConfig(DGLBaseModel):
version: Optional[str] = "0.0.1"
pipeline_name: PipelineFactory.get_pipeline_enum()
pipeline_mode: str
device: str = "cpu"
# general_pipeline: PipelineConfig = PipelineConfig()
\ No newline at end of file
......@@ -265,6 +265,23 @@ class PipelineFactory:
"PipelineName", {k: k for k, v in cls.registry.items()})
return enum_class
class ApplyPipelineFactory:
"""The factory class for creating executors for inference"""
registry: Dict[str, PipelineBase] = {}
""" Internal registry for available executors """
@classmethod
def register(cls, name: str) -> Callable:
def inner_wrapper(wrapped_class) -> Callable:
if name in cls.registry:
logger.warning(
'Executor %s already exists. Will replace it', name)
cls.registry[name] = wrapped_class()
return wrapped_class
return inner_wrapper
model_dir = Path(__file__).parent.parent / "model"
......
version: 0.0.1
pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
data:
name: ogbg-molhiv
......@@ -26,4 +27,4 @@ general_pipeline:
loss: BCEWithLogitsLoss
metric: roc_auc_score
num_epochs: 100 # Number of training epochs
save_path: model.pth # Path to save the model
save_path: "results" # Directory to save the experiment results
version: 0.0.1
pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
data:
name: ogbg-molhiv
......@@ -33,4 +34,4 @@ general_pipeline:
loss: BCEWithLogitsLoss
metric: roc_auc_score
num_epochs: 200 # Number of training epochs
save_path: model.pth # Path to save the model
save_path: "results" # Directory to save the experiment results
version: 0.0.1
pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
data:
name: ogbg-molpcba
......@@ -26,4 +27,4 @@ general_pipeline:
loss: BCEWithLogitsLoss
metric: average_precision_score
num_epochs: 100 # Number of training epochs
save_path: model.pth # Path to save the model
save_path: "results" # Directory to save the experiment results
version: 0.0.1
pipeline_name: linkpred
pipeline_mode: train
device: cpu
data:
name: ogbl-citation2
......@@ -15,7 +16,7 @@ node_model:
aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``).
edge_model:
name: ele
hidden_size: 64 # Hidden size.
hidden_size: 64 # Hidden size.
num_layers: 2 # Number of hidden layers.
bias: true # Whether to use bias in the linaer layer.
neg_sampler:
......@@ -31,5 +32,5 @@ general_pipeline:
name: Adam
lr: 0.005
loss: BCELoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 1 # Number of experiments to run
version: 0.0.1
pipeline_name: linkpred
pipeline_mode: train
device: cpu
data:
name: ogbl-collab
......@@ -15,7 +16,7 @@ node_model:
aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``).
edge_model:
name: ele
hidden_size: 64 # Hidden size.
hidden_size: 64 # Hidden size.
num_layers: 2 # Number of hidden layers.
bias: true # Whether to use bias in the linaer layer.
neg_sampler:
......@@ -31,5 +32,5 @@ general_pipeline:
name: Adam
lr: 0.005
loss: BCELoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 1 # Number of experiments to run
version: 0.0.1
pipeline_name: linkpred
pipeline_mode: train
device: cuda
data:
name: cora
......@@ -15,7 +16,7 @@ node_model:
aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``).
edge_model:
name: ele
hidden_size: 64 # Hidden size.
hidden_size: 64 # Hidden size.
num_layers: 2 # Number of hidden layers.
bias: true # Whether to use bias in the linaer layer.
neg_sampler:
......@@ -31,5 +32,5 @@ general_pipeline:
name: Adam
lr: 0.005
loss: BCELoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 1 # Number of experiments to run
# Accuracy across 5 runs: 0.593288 ± 0.006103
version: 0.0.1
pipeline_name: nodepred-ns
pipeline_mode: train
device: 'cuda:0'
eval_device: 'cpu'
data:
......@@ -31,5 +32,5 @@ general_pipeline:
lr: 0.005
weight_decay: 0.0
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 5
# Accuracy across 1 runs: 0.796911
version: 0.0.1
pipeline_name: nodepred-ns
pipeline_mode: train
device: cuda
eval_device: cpu
data:
......@@ -35,5 +36,5 @@ general_pipeline:
lr: 0.005
weight_decay: 0.0
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 5 # Number of experiments to run
# Accuracy across 10 runs: 0.7097 ± 0.006914
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: citeseer
......@@ -28,5 +29,5 @@ general_pipeline:
lr: 0.005
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.6852 ± 0.008875
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: citeseer
......@@ -24,5 +25,5 @@ general_pipeline:
lr: 0.01
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.6994 ± 0.004005
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: citeseer
......@@ -23,5 +24,5 @@ general_pipeline:
lr: 0.01
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.8208 ± 0.00663
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: cora
......@@ -28,5 +29,5 @@ general_pipeline:
lr: 0.005
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.802 ± 0.005329
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: cora
......@@ -24,5 +25,5 @@ general_pipeline:
lr: 0.01
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.8163 ± 0.006856
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: cora
......@@ -23,5 +24,5 @@ general_pipeline:
lr: 0.01
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.7788 ± 0.002227
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: pubmed
......@@ -28,5 +29,5 @@ general_pipeline:
lr: 0.005
weight_decay: 0.001
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
# Accuracy across 10 runs: 0.7826 ± 0.004317
version: 0.0.1
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
data:
name: pubmed
......@@ -24,5 +25,5 @@ general_pipeline:
lr: 0.01
weight_decay: 0.0005
loss: CrossEntropyLoss
save_path: "model.pth"
save_path: "results" # Directory to save the experiment results
num_runs: 10 # Number of experiments to run
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment