Unverified Commit 704bcaf6 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files
parent 6bc82161
"""QM9 dataset for graph property prediction (regression)."""
import os
import dgl
import numpy as np
import scipy.sparse as sp
import torch
from tqdm import trange
import dgl
from dgl.convert import graph as dgl_graph
from dgl.data import QM9Dataset
from dgl.data.utils import load_graphs, save_graphs
from tqdm import trange
class QM9(QM9Dataset):
......@@ -106,7 +106,6 @@ class QM9(QM9Dataset):
force_reload=False,
verbose=False,
):
self.edge_funcs = edge_funcs
self._keys = [
"mu",
......
import os
import ssl
import dgl
import numpy as np
import torch
from six.moves import urllib
from torch.utils.data import DataLoader, Dataset
import dgl
def download_file(dataset):
print("Start Downloading data: {}".format(dataset))
......
import dgl
import dgl.function as fn
import numpy as np
import scipy.sparse as sparse
import torch
import torch.nn as nn
import dgl
import dgl.function as fn
from dgl.base import DGLError
......
import numpy as np
import torch
import torch.nn as nn
import dgl
import dgl.function as fn
import dgl.nn as dglnn
import numpy as np
import torch
import torch.nn as nn
from dgl.base import DGLError
from dgl.nn.functional import edge_softmax
......
import dgl
import dgl.function as fn
import dgl.nn as dglnn
import numpy as np
import scipy.sparse as sparse
import torch
import torch.nn as nn
import dgl
import dgl.function as fn
import dgl.nn as dglnn
from dgl.base import DGLError
from dgl.nn.functional import edge_softmax
......
import argparse
from functools import partial
import dgl
import numpy as np
import torch
import torch.nn as nn
from dataloading import (METR_LAGraphDataset, METR_LATestDataset,
METR_LATrainDataset, METR_LAValidDataset,
PEMS_BAYGraphDataset, PEMS_BAYTestDataset,
PEMS_BAYTrainDataset, PEMS_BAYValidDataset)
from dataloading import (
METR_LAGraphDataset,
METR_LATestDataset,
METR_LATrainDataset,
METR_LAValidDataset,
PEMS_BAYGraphDataset,
PEMS_BAYTestDataset,
PEMS_BAYTrainDataset,
PEMS_BAYValidDataset,
)
from dcrnn import DiffConv
from gaan import GatedGAT
from model import GraphRNN
from torch.utils.data import DataLoader
from utils import NormalizationLayer, get_learning_rate, masked_mae_loss
import dgl
from utils import get_learning_rate, masked_mae_loss, NormalizationLayer
batch_cnt = [0]
......
import dgl
import numpy as np
import scipy.sparse as sparse
import torch
import torch.nn as nn
import dgl
class NormalizationLayer(nn.Module):
def __init__(self, mean, std):
......
import math
from itertools import product
import dgl
import numpy as np
import pandas as pd
import torch
import dgl
from dgl.data import DGLDataset
......@@ -84,7 +84,6 @@ class EEGGraphDataset(DGLDataset):
def get_geodesic_distance(
self, montage_sensor1_idx, montage_sensor2_idx, coords_1010
):
# get the reference sensor in the 10-10 system for the current montage pair in 10-20 system
ref_sensor1 = self.ref_names[montage_sensor1_idx]
ref_sensor2 = self.ref_names[montage_sensor2_idx]
......
import torch.nn as nn
import torch.nn.functional as function
from torch.nn import BatchNorm1d
from dgl.nn import GraphConv, SumPooling
from torch.nn import BatchNorm1d
class EEGGraphConvNet(nn.Module):
......
......@@ -4,6 +4,8 @@ import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from dgl.dataloading import GraphDataLoader
from EEGGraphDataset import EEGGraphDataset
from joblib import load
from sklearn import preprocessing
......@@ -11,8 +13,6 @@ from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from torch.utils.data import WeightedRandomSampler
from dgl.dataloading import GraphDataLoader
if __name__ == "__main__":
# argparse commandline args
parser = argparse.ArgumentParser(
......
import dgl
import torch as th
import torch.optim as optim
import utils
......@@ -6,8 +7,6 @@ from sampler import Sampler
from sklearn import metrics
from torch.utils.data import DataLoader
import dgl
def train(args, train_g, sku_info, num_skus, num_brands, num_shops, num_cates):
sampler = Sampler(
......
import dgl
import numpy as np
import torch as th
import dgl
class Sampler:
def __init__(
......
......@@ -2,12 +2,12 @@ import argparse
import random
from datetime import datetime
import dgl
import networkx as nx
import numpy as np
import torch as th
import dgl
def init_args():
# TODO: change args
......
import os
import dgl
import numpy
import pandas
import torch
import dgl
def process_raw_data(raw_dir, processed_dir):
r"""
......
import torch
import torch.nn as nn
from torch.nn import init
from torch.nn.parameter import Parameter
from dgl.nn.pytorch import GraphConv
from torch.nn import init
from torch.nn.parameter import Parameter
class MatGRUCell(torch.nn.Module):
......
import argparse
import time
import dgl
import torch
import torch.nn.functional as F
from dataset import EllipticDataset
from model import EvolveGCNH, EvolveGCNO
from utils import Measure
import dgl
def train(args, device):
elliptic_dataset = EllipticDataset(
......
import os
import dgl
import numpy as np
import scipy.io as sio
import torch as th
import dgl
from dgl.data import DGLBuiltinDataset
from dgl.data.utils import _get_dgl_url, load_graphs, save_graphs
......
import dgl
import argparse
import dgl
import torch as th
import torch.optim as optim
import torch.nn.functional as F
import torch.optim as optim
from dataloader import GASDataset
from model_sampling import GAS
from sklearn.metrics import f1_score, precision_recall_curve, roc_auc_score
def evaluate(model, loss_fn, dataloader, device='cpu'):
def evaluate(model, loss_fn, dataloader, device="cpu"):
loss = 0
f1 = 0
auc = 0
......@@ -17,21 +18,28 @@ def evaluate(model, loss_fn, dataloader, device='cpu'):
for input_nodes, edge_subgraph, blocks in dataloader:
blocks = [b.to(device) for b in blocks]
edge_subgraph = edge_subgraph.to(device)
u_feat = blocks[0].srcdata['feat']['u']
v_feat = blocks[0].srcdata['feat']['v']
f_feat = blocks[0].edges['forward'].data['feat']
b_feat = blocks[0].edges['backward'].data['feat']
labels = edge_subgraph.edges['forward'].data['label'].long()
u_feat = blocks[0].srcdata["feat"]["u"]
v_feat = blocks[0].srcdata["feat"]["v"]
f_feat = blocks[0].edges["forward"].data["feat"]
b_feat = blocks[0].edges["backward"].data["feat"]
labels = edge_subgraph.edges["forward"].data["label"].long()
logits = model(edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat)
loss += loss_fn(logits, labels).item()
f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu())
auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu())
pre, re, _ = precision_recall_curve(labels.cpu(), logits[:, 1].detach().cpu())
pre, re, _ = precision_recall_curve(
labels.cpu(), logits[:, 1].detach().cpu()
)
rap += re[pre > args.precision].max()
num_blocks += 1
return rap / num_blocks, f1 / num_blocks, auc / num_blocks, loss / num_blocks
return (
rap / num_blocks,
f1 / num_blocks,
auc / num_blocks,
loss / num_blocks,
)
def main(args):
......@@ -45,49 +53,62 @@ def main(args):
tr_eid_dict = {}
val_eid_dict = {}
test_eid_dict = {}
tr_eid_dict['forward'] = graph.edges['forward'].data["train_mask"].nonzero().squeeze()
val_eid_dict['forward'] = graph.edges['forward'].data["val_mask"].nonzero().squeeze()
test_eid_dict['forward'] = graph.edges['forward'].data["test_mask"].nonzero().squeeze()
tr_eid_dict["forward"] = (
graph.edges["forward"].data["train_mask"].nonzero().squeeze()
)
val_eid_dict["forward"] = (
graph.edges["forward"].data["val_mask"].nonzero().squeeze()
)
test_eid_dict["forward"] = (
graph.edges["forward"].data["test_mask"].nonzero().squeeze()
)
sampler = dgl.dataloading.as_edge_prediction_sampler(sampler)
tr_loader = dgl.dataloading.DataLoader(graph,
tr_loader = dgl.dataloading.DataLoader(
graph,
tr_eid_dict,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers)
val_loader = dgl.dataloading.DataLoader(graph,
num_workers=args.num_workers,
)
val_loader = dgl.dataloading.DataLoader(
graph,
val_eid_dict,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers)
test_loader = dgl.dataloading.DataLoader(graph,
num_workers=args.num_workers,
)
test_loader = dgl.dataloading.DataLoader(
graph,
test_eid_dict,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers)
num_workers=args.num_workers,
)
# check cuda
if args.gpu >= 0 and th.cuda.is_available():
device = 'cuda:{}'.format(args.gpu)
device = "cuda:{}".format(args.gpu)
else:
device = 'cpu'
device = "cpu"
# binary classification
num_classes = dataset.num_classes
# Extract node features
e_feats = graph.edges['forward'].data['feat'].shape[-1]
u_feats = graph.nodes['u'].data['feat'].shape[-1]
v_feats = graph.nodes['v'].data['feat'].shape[-1]
e_feats = graph.edges["forward"].data["feat"].shape[-1]
u_feats = graph.nodes["u"].data["feat"].shape[-1]
v_feats = graph.nodes["v"].data["feat"].shape[-1]
# Step 2: Create model =================================================================== #
model = GAS(e_in_dim=e_feats,
model = GAS(
e_in_dim=e_feats,
u_in_dim=u_feats,
v_in_dim=v_feats,
e_hid_dim=args.e_hid_dim,
......@@ -96,13 +117,16 @@ def main(args):
out_dim=num_classes,
num_layers=args.num_layers,
dropout=args.dropout,
activation=F.relu)
activation=F.relu,
)
model = model.to(device)
# Step 3: Create training components ===================================================== #
loss_fn = th.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
optimizer = optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
# Step 4: training epochs =============================================================== #
for epoch in range(args.max_epoch):
......@@ -115,19 +139,23 @@ def main(args):
for input_nodes, edge_subgraph, blocks in tr_loader:
blocks = [b.to(device) for b in blocks]
edge_subgraph = edge_subgraph.to(device)
u_feat = blocks[0].srcdata['feat']['u']
v_feat = blocks[0].srcdata['feat']['v']
f_feat = blocks[0].edges['forward'].data['feat']
b_feat = blocks[0].edges['backward'].data['feat']
labels = edge_subgraph.edges['forward'].data['label'].long()
logits = model(edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat)
u_feat = blocks[0].srcdata["feat"]["u"]
v_feat = blocks[0].srcdata["feat"]["v"]
f_feat = blocks[0].edges["forward"].data["feat"]
b_feat = blocks[0].edges["backward"].data["feat"]
labels = edge_subgraph.edges["forward"].data["label"].long()
logits = model(
edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat
)
# compute loss
batch_loss = loss_fn(logits, labels)
tr_loss += batch_loss.item()
tr_f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu())
tr_auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu())
tr_pre, tr_re, _ = precision_recall_curve(labels.cpu(), logits[:, 1].detach().cpu())
tr_pre, tr_re, _ = precision_recall_curve(
labels.cpu(), logits[:, 1].detach().cpu()
)
tr_rap += tr_re[tr_pre > args.precision].max()
tr_blocks += 1
......@@ -138,36 +166,100 @@ def main(args):
# validation
model.eval()
val_rap, val_f1, val_auc, val_loss = evaluate(model, loss_fn, val_loader, device)
val_rap, val_f1, val_auc, val_loss = evaluate(
model, loss_fn, val_loader, device
)
# Print out performance
print("In epoch {}, Train R@P: {:.4f} | Train F1: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
"Valid R@P: {:.4f} | Valid F1: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".
format(epoch, tr_rap / tr_blocks, tr_f1 / tr_blocks, tr_auc / tr_blocks , tr_loss / tr_blocks,
val_rap, val_f1, val_auc, val_loss))
print(
"In epoch {}, Train R@P: {:.4f} | Train F1: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
"Valid R@P: {:.4f} | Valid F1: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".format(
epoch,
tr_rap / tr_blocks,
tr_f1 / tr_blocks,
tr_auc / tr_blocks,
tr_loss / tr_blocks,
val_rap,
val_f1,
val_auc,
val_loss,
)
)
# Test with mini batch after all epoch
model.eval()
test_rap, test_f1, test_auc, test_loss = evaluate(model, loss_fn, test_loader, device)
print("Test R@P: {:.4f} | Test F1: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".
format(test_rap, test_f1, test_auc, test_loss))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN-based Anti-Spam Model')
parser.add_argument("--dataset", type=str, default="pol", help="'pol', or 'gos'")
parser.add_argument("--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU.")
parser.add_argument("--e_hid_dim", type=int, default=128, help="Hidden layer dimension for edges")
parser.add_argument("--u_hid_dim", type=int, default=128, help="Hidden layer dimension for source nodes")
parser.add_argument("--v_hid_dim", type=int, default=128, help="Hidden layer dimension for destination nodes")
parser.add_argument("--num_layers", type=int, default=2, help="Number of GCN layers")
parser.add_argument("--max_epoch", type=int, default=100, help="The max number of epochs. Default: 100")
parser.add_argument("--lr", type=float, default=0.001, help="Learning rate. Default: 1e-3")
parser.add_argument("--dropout", type=float, default=0.0, help="Dropout rate. Default: 0.0")
parser.add_argument("--batch_size", type=int, default=64, help="Size of mini-batches. Default: 64")
parser.add_argument("--num_workers", type=int, default=4, help="Number of node dataloader")
parser.add_argument("--weight_decay", type=float, default=5e-4, help="Weight Decay. Default: 0.0005")
parser.add_argument("--precision", type=float, default=0.9, help="The value p in recall@p precision. Default: 0.9")
test_rap, test_f1, test_auc, test_loss = evaluate(
model, loss_fn, test_loader, device
)
print(
"Test R@P: {:.4f} | Test F1: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(
test_rap, test_f1, test_auc, test_loss
)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model")
parser.add_argument(
"--dataset", type=str, default="pol", help="'pol', or 'gos'"
)
parser.add_argument(
"--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU."
)
parser.add_argument(
"--e_hid_dim",
type=int,
default=128,
help="Hidden layer dimension for edges",
)
parser.add_argument(
"--u_hid_dim",
type=int,
default=128,
help="Hidden layer dimension for source nodes",
)
parser.add_argument(
"--v_hid_dim",
type=int,
default=128,
help="Hidden layer dimension for destination nodes",
)
parser.add_argument(
"--num_layers", type=int, default=2, help="Number of GCN layers"
)
parser.add_argument(
"--max_epoch",
type=int,
default=100,
help="The max number of epochs. Default: 100",
)
parser.add_argument(
"--lr", type=float, default=0.001, help="Learning rate. Default: 1e-3"
)
parser.add_argument(
"--dropout", type=float, default=0.0, help="Dropout rate. Default: 0.0"
)
parser.add_argument(
"--batch_size",
type=int,
default=64,
help="Size of mini-batches. Default: 64",
)
parser.add_argument(
"--num_workers", type=int, default=4, help="Number of node dataloader"
)
parser.add_argument(
"--weight_decay",
type=float,
default=5e-4,
help="Weight Decay. Default: 0.0005",
)
parser.add_argument(
"--precision",
type=float,
default=0.9,
help="The value p in recall@p precision. Default: 0.9",
)
args = parser.parse_args()
print(args)
......
import dgl.function as fn
import torch as th
import torch.nn as nn
import dgl.function as fn
from dgl.nn.functional import edge_softmax
......
import dgl.function as fn
import torch as th
import torch.nn as nn
import dgl.function as fn
from dgl.nn.functional import edge_softmax
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment