"...python/git@developer.sourcefind.cn:change/sglang.git" did not exist on "d40846d456ecc930c04538778ed11f67cc793c23"
Unverified Commit 704bcaf6 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files
parent 6bc82161
"""QM9 dataset for graph property prediction (regression).""" """QM9 dataset for graph property prediction (regression)."""
import os import os
import dgl
import numpy as np import numpy as np
import scipy.sparse as sp import scipy.sparse as sp
import torch import torch
from tqdm import trange
import dgl
from dgl.convert import graph as dgl_graph from dgl.convert import graph as dgl_graph
from dgl.data import QM9Dataset from dgl.data import QM9Dataset
from dgl.data.utils import load_graphs, save_graphs from dgl.data.utils import load_graphs, save_graphs
from tqdm import trange
class QM9(QM9Dataset): class QM9(QM9Dataset):
...@@ -106,7 +106,6 @@ class QM9(QM9Dataset): ...@@ -106,7 +106,6 @@ class QM9(QM9Dataset):
force_reload=False, force_reload=False,
verbose=False, verbose=False,
): ):
self.edge_funcs = edge_funcs self.edge_funcs = edge_funcs
self._keys = [ self._keys = [
"mu", "mu",
......
import os import os
import ssl import ssl
import dgl
import numpy as np import numpy as np
import torch import torch
from six.moves import urllib from six.moves import urllib
from torch.utils.data import DataLoader, Dataset from torch.utils.data import DataLoader, Dataset
import dgl
def download_file(dataset): def download_file(dataset):
print("Start Downloading data: {}".format(dataset)) print("Start Downloading data: {}".format(dataset))
......
import dgl
import dgl.function as fn
import numpy as np import numpy as np
import scipy.sparse as sparse import scipy.sparse as sparse
import torch import torch
import torch.nn as nn import torch.nn as nn
import dgl
import dgl.function as fn
from dgl.base import DGLError from dgl.base import DGLError
......
import numpy as np
import torch
import torch.nn as nn
import dgl import dgl
import dgl.function as fn import dgl.function as fn
import dgl.nn as dglnn import dgl.nn as dglnn
import numpy as np
import torch
import torch.nn as nn
from dgl.base import DGLError from dgl.base import DGLError
from dgl.nn.functional import edge_softmax from dgl.nn.functional import edge_softmax
......
import dgl
import dgl.function as fn
import dgl.nn as dglnn
import numpy as np import numpy as np
import scipy.sparse as sparse import scipy.sparse as sparse
import torch import torch
import torch.nn as nn import torch.nn as nn
import dgl
import dgl.function as fn
import dgl.nn as dglnn
from dgl.base import DGLError from dgl.base import DGLError
from dgl.nn.functional import edge_softmax from dgl.nn.functional import edge_softmax
......
import argparse import argparse
from functools import partial from functools import partial
import dgl
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from dataloading import (METR_LAGraphDataset, METR_LATestDataset, from dataloading import (
METR_LATrainDataset, METR_LAValidDataset, METR_LAGraphDataset,
PEMS_BAYGraphDataset, PEMS_BAYTestDataset, METR_LATestDataset,
PEMS_BAYTrainDataset, PEMS_BAYValidDataset) METR_LATrainDataset,
METR_LAValidDataset,
PEMS_BAYGraphDataset,
PEMS_BAYTestDataset,
PEMS_BAYTrainDataset,
PEMS_BAYValidDataset,
)
from dcrnn import DiffConv from dcrnn import DiffConv
from gaan import GatedGAT from gaan import GatedGAT
from model import GraphRNN from model import GraphRNN
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from utils import NormalizationLayer, get_learning_rate, masked_mae_loss from utils import get_learning_rate, masked_mae_loss, NormalizationLayer
import dgl
batch_cnt = [0] batch_cnt = [0]
......
import dgl
import numpy as np import numpy as np
import scipy.sparse as sparse import scipy.sparse as sparse
import torch import torch
import torch.nn as nn import torch.nn as nn
import dgl
class NormalizationLayer(nn.Module): class NormalizationLayer(nn.Module):
def __init__(self, mean, std): def __init__(self, mean, std):
......
import math import math
from itertools import product from itertools import product
import dgl
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import torch import torch
import dgl
from dgl.data import DGLDataset from dgl.data import DGLDataset
...@@ -84,7 +84,6 @@ class EEGGraphDataset(DGLDataset): ...@@ -84,7 +84,6 @@ class EEGGraphDataset(DGLDataset):
def get_geodesic_distance( def get_geodesic_distance(
self, montage_sensor1_idx, montage_sensor2_idx, coords_1010 self, montage_sensor1_idx, montage_sensor2_idx, coords_1010
): ):
# get the reference sensor in the 10-10 system for the current montage pair in 10-20 system # get the reference sensor in the 10-10 system for the current montage pair in 10-20 system
ref_sensor1 = self.ref_names[montage_sensor1_idx] ref_sensor1 = self.ref_names[montage_sensor1_idx]
ref_sensor2 = self.ref_names[montage_sensor2_idx] ref_sensor2 = self.ref_names[montage_sensor2_idx]
......
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as function import torch.nn.functional as function
from torch.nn import BatchNorm1d
from dgl.nn import GraphConv, SumPooling from dgl.nn import GraphConv, SumPooling
from torch.nn import BatchNorm1d
class EEGGraphConvNet(nn.Module): class EEGGraphConvNet(nn.Module):
......
...@@ -4,6 +4,8 @@ import numpy as np ...@@ -4,6 +4,8 @@ import numpy as np
import pandas as pd import pandas as pd
import torch import torch
import torch.nn as nn import torch.nn as nn
from dgl.dataloading import GraphDataLoader
from EEGGraphDataset import EEGGraphDataset from EEGGraphDataset import EEGGraphDataset
from joblib import load from joblib import load
from sklearn import preprocessing from sklearn import preprocessing
...@@ -11,8 +13,6 @@ from sklearn.metrics import balanced_accuracy_score, roc_auc_score ...@@ -11,8 +13,6 @@ from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from torch.utils.data import WeightedRandomSampler from torch.utils.data import WeightedRandomSampler
from dgl.dataloading import GraphDataLoader
if __name__ == "__main__": if __name__ == "__main__":
# argparse commandline args # argparse commandline args
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
......
import dgl
import torch as th import torch as th
import torch.optim as optim import torch.optim as optim
import utils import utils
...@@ -6,8 +7,6 @@ from sampler import Sampler ...@@ -6,8 +7,6 @@ from sampler import Sampler
from sklearn import metrics from sklearn import metrics
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import dgl
def train(args, train_g, sku_info, num_skus, num_brands, num_shops, num_cates): def train(args, train_g, sku_info, num_skus, num_brands, num_shops, num_cates):
sampler = Sampler( sampler = Sampler(
......
import dgl
import numpy as np import numpy as np
import torch as th import torch as th
import dgl
class Sampler: class Sampler:
def __init__( def __init__(
......
...@@ -2,12 +2,12 @@ import argparse ...@@ -2,12 +2,12 @@ import argparse
import random import random
from datetime import datetime from datetime import datetime
import dgl
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import torch as th import torch as th
import dgl
def init_args(): def init_args():
# TODO: change args # TODO: change args
......
import os import os
import dgl
import numpy import numpy
import pandas import pandas
import torch import torch
import dgl
def process_raw_data(raw_dir, processed_dir): def process_raw_data(raw_dir, processed_dir):
r""" r"""
......
import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.nn import init
from torch.nn.parameter import Parameter
from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch import GraphConv
from torch.nn import init
from torch.nn.parameter import Parameter
class MatGRUCell(torch.nn.Module): class MatGRUCell(torch.nn.Module):
......
import argparse import argparse
import time import time
import dgl
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from dataset import EllipticDataset from dataset import EllipticDataset
from model import EvolveGCNH, EvolveGCNO from model import EvolveGCNH, EvolveGCNO
from utils import Measure from utils import Measure
import dgl
def train(args, device): def train(args, device):
elliptic_dataset = EllipticDataset( elliptic_dataset = EllipticDataset(
......
import os import os
import dgl
import numpy as np import numpy as np
import scipy.io as sio import scipy.io as sio
import torch as th import torch as th
import dgl
from dgl.data import DGLBuiltinDataset from dgl.data import DGLBuiltinDataset
from dgl.data.utils import _get_dgl_url, load_graphs, save_graphs from dgl.data.utils import _get_dgl_url, load_graphs, save_graphs
......
import dgl
import argparse import argparse
import dgl
import torch as th import torch as th
import torch.optim as optim
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim
from dataloader import GASDataset from dataloader import GASDataset
from model_sampling import GAS from model_sampling import GAS
from sklearn.metrics import f1_score, precision_recall_curve, roc_auc_score from sklearn.metrics import f1_score, precision_recall_curve, roc_auc_score
def evaluate(model, loss_fn, dataloader, device='cpu'): def evaluate(model, loss_fn, dataloader, device="cpu"):
loss = 0 loss = 0
f1 = 0 f1 = 0
auc = 0 auc = 0
...@@ -17,21 +18,28 @@ def evaluate(model, loss_fn, dataloader, device='cpu'): ...@@ -17,21 +18,28 @@ def evaluate(model, loss_fn, dataloader, device='cpu'):
for input_nodes, edge_subgraph, blocks in dataloader: for input_nodes, edge_subgraph, blocks in dataloader:
blocks = [b.to(device) for b in blocks] blocks = [b.to(device) for b in blocks]
edge_subgraph = edge_subgraph.to(device) edge_subgraph = edge_subgraph.to(device)
u_feat = blocks[0].srcdata['feat']['u'] u_feat = blocks[0].srcdata["feat"]["u"]
v_feat = blocks[0].srcdata['feat']['v'] v_feat = blocks[0].srcdata["feat"]["v"]
f_feat = blocks[0].edges['forward'].data['feat'] f_feat = blocks[0].edges["forward"].data["feat"]
b_feat = blocks[0].edges['backward'].data['feat'] b_feat = blocks[0].edges["backward"].data["feat"]
labels = edge_subgraph.edges['forward'].data['label'].long() labels = edge_subgraph.edges["forward"].data["label"].long()
logits = model(edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat) logits = model(edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat)
loss += loss_fn(logits, labels).item() loss += loss_fn(logits, labels).item()
f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu()) f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu())
auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu()) auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu())
pre, re, _ = precision_recall_curve(labels.cpu(), logits[:, 1].detach().cpu()) pre, re, _ = precision_recall_curve(
labels.cpu(), logits[:, 1].detach().cpu()
)
rap += re[pre > args.precision].max() rap += re[pre > args.precision].max()
num_blocks += 1 num_blocks += 1
return rap / num_blocks, f1 / num_blocks, auc / num_blocks, loss / num_blocks return (
rap / num_blocks,
f1 / num_blocks,
auc / num_blocks,
loss / num_blocks,
)
def main(args): def main(args):
...@@ -45,64 +53,80 @@ def main(args): ...@@ -45,64 +53,80 @@ def main(args):
tr_eid_dict = {} tr_eid_dict = {}
val_eid_dict = {} val_eid_dict = {}
test_eid_dict = {} test_eid_dict = {}
tr_eid_dict['forward'] = graph.edges['forward'].data["train_mask"].nonzero().squeeze() tr_eid_dict["forward"] = (
val_eid_dict['forward'] = graph.edges['forward'].data["val_mask"].nonzero().squeeze() graph.edges["forward"].data["train_mask"].nonzero().squeeze()
test_eid_dict['forward'] = graph.edges['forward'].data["test_mask"].nonzero().squeeze() )
val_eid_dict["forward"] = (
graph.edges["forward"].data["val_mask"].nonzero().squeeze()
)
test_eid_dict["forward"] = (
graph.edges["forward"].data["test_mask"].nonzero().squeeze()
)
sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) sampler = dgl.dataloading.as_edge_prediction_sampler(sampler)
tr_loader = dgl.dataloading.DataLoader(graph, tr_loader = dgl.dataloading.DataLoader(
tr_eid_dict, graph,
sampler, tr_eid_dict,
batch_size=args.batch_size, sampler,
shuffle=True, batch_size=args.batch_size,
drop_last=False, shuffle=True,
num_workers=args.num_workers) drop_last=False,
val_loader = dgl.dataloading.DataLoader(graph, num_workers=args.num_workers,
val_eid_dict, )
sampler, val_loader = dgl.dataloading.DataLoader(
batch_size=args.batch_size, graph,
shuffle=True, val_eid_dict,
drop_last=False, sampler,
num_workers=args.num_workers) batch_size=args.batch_size,
test_loader = dgl.dataloading.DataLoader(graph, shuffle=True,
test_eid_dict, drop_last=False,
sampler, num_workers=args.num_workers,
batch_size=args.batch_size, )
shuffle=True, test_loader = dgl.dataloading.DataLoader(
drop_last=False, graph,
num_workers=args.num_workers) test_eid_dict,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers,
)
# check cuda # check cuda
if args.gpu >= 0 and th.cuda.is_available(): if args.gpu >= 0 and th.cuda.is_available():
device = 'cuda:{}'.format(args.gpu) device = "cuda:{}".format(args.gpu)
else: else:
device = 'cpu' device = "cpu"
# binary classification # binary classification
num_classes = dataset.num_classes num_classes = dataset.num_classes
# Extract node features # Extract node features
e_feats = graph.edges['forward'].data['feat'].shape[-1] e_feats = graph.edges["forward"].data["feat"].shape[-1]
u_feats = graph.nodes['u'].data['feat'].shape[-1] u_feats = graph.nodes["u"].data["feat"].shape[-1]
v_feats = graph.nodes['v'].data['feat'].shape[-1] v_feats = graph.nodes["v"].data["feat"].shape[-1]
# Step 2: Create model =================================================================== # # Step 2: Create model =================================================================== #
model = GAS(e_in_dim=e_feats, model = GAS(
u_in_dim=u_feats, e_in_dim=e_feats,
v_in_dim=v_feats, u_in_dim=u_feats,
e_hid_dim=args.e_hid_dim, v_in_dim=v_feats,
u_hid_dim=args.u_hid_dim, e_hid_dim=args.e_hid_dim,
v_hid_dim=args.v_hid_dim, u_hid_dim=args.u_hid_dim,
out_dim=num_classes, v_hid_dim=args.v_hid_dim,
num_layers=args.num_layers, out_dim=num_classes,
dropout=args.dropout, num_layers=args.num_layers,
activation=F.relu) dropout=args.dropout,
activation=F.relu,
)
model = model.to(device) model = model.to(device)
# Step 3: Create training components ===================================================== # # Step 3: Create training components ===================================================== #
loss_fn = th.nn.CrossEntropyLoss() loss_fn = th.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) optimizer = optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
# Step 4: training epochs =============================================================== # # Step 4: training epochs =============================================================== #
for epoch in range(args.max_epoch): for epoch in range(args.max_epoch):
...@@ -115,19 +139,23 @@ def main(args): ...@@ -115,19 +139,23 @@ def main(args):
for input_nodes, edge_subgraph, blocks in tr_loader: for input_nodes, edge_subgraph, blocks in tr_loader:
blocks = [b.to(device) for b in blocks] blocks = [b.to(device) for b in blocks]
edge_subgraph = edge_subgraph.to(device) edge_subgraph = edge_subgraph.to(device)
u_feat = blocks[0].srcdata['feat']['u'] u_feat = blocks[0].srcdata["feat"]["u"]
v_feat = blocks[0].srcdata['feat']['v'] v_feat = blocks[0].srcdata["feat"]["v"]
f_feat = blocks[0].edges['forward'].data['feat'] f_feat = blocks[0].edges["forward"].data["feat"]
b_feat = blocks[0].edges['backward'].data['feat'] b_feat = blocks[0].edges["backward"].data["feat"]
labels = edge_subgraph.edges['forward'].data['label'].long() labels = edge_subgraph.edges["forward"].data["label"].long()
logits = model(edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat) logits = model(
edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat
)
# compute loss # compute loss
batch_loss = loss_fn(logits, labels) batch_loss = loss_fn(logits, labels)
tr_loss += batch_loss.item() tr_loss += batch_loss.item()
tr_f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu()) tr_f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu())
tr_auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu()) tr_auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu())
tr_pre, tr_re, _ = precision_recall_curve(labels.cpu(), logits[:, 1].detach().cpu()) tr_pre, tr_re, _ = precision_recall_curve(
labels.cpu(), logits[:, 1].detach().cpu()
)
tr_rap += tr_re[tr_pre > args.precision].max() tr_rap += tr_re[tr_pre > args.precision].max()
tr_blocks += 1 tr_blocks += 1
...@@ -138,36 +166,100 @@ def main(args): ...@@ -138,36 +166,100 @@ def main(args):
# validation # validation
model.eval() model.eval()
val_rap, val_f1, val_auc, val_loss = evaluate(model, loss_fn, val_loader, device) val_rap, val_f1, val_auc, val_loss = evaluate(
model, loss_fn, val_loader, device
)
# Print out performance # Print out performance
print("In epoch {}, Train R@P: {:.4f} | Train F1: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; " print(
"Valid R@P: {:.4f} | Valid F1: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}". "In epoch {}, Train R@P: {:.4f} | Train F1: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
format(epoch, tr_rap / tr_blocks, tr_f1 / tr_blocks, tr_auc / tr_blocks , tr_loss / tr_blocks, "Valid R@P: {:.4f} | Valid F1: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".format(
val_rap, val_f1, val_auc, val_loss)) epoch,
tr_rap / tr_blocks,
tr_f1 / tr_blocks,
tr_auc / tr_blocks,
tr_loss / tr_blocks,
val_rap,
val_f1,
val_auc,
val_loss,
)
)
# Test with mini batch after all epoch # Test with mini batch after all epoch
model.eval() model.eval()
test_rap, test_f1, test_auc, test_loss = evaluate(model, loss_fn, test_loader, device) test_rap, test_f1, test_auc, test_loss = evaluate(
print("Test R@P: {:.4f} | Test F1: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}". model, loss_fn, test_loader, device
format(test_rap, test_f1, test_auc, test_loss)) )
print(
"Test R@P: {:.4f} | Test F1: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(
if __name__ == '__main__': test_rap, test_f1, test_auc, test_loss
parser = argparse.ArgumentParser(description='GCN-based Anti-Spam Model') )
parser.add_argument("--dataset", type=str, default="pol", help="'pol', or 'gos'") )
parser.add_argument("--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU.")
parser.add_argument("--e_hid_dim", type=int, default=128, help="Hidden layer dimension for edges")
parser.add_argument("--u_hid_dim", type=int, default=128, help="Hidden layer dimension for source nodes") if __name__ == "__main__":
parser.add_argument("--v_hid_dim", type=int, default=128, help="Hidden layer dimension for destination nodes") parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model")
parser.add_argument("--num_layers", type=int, default=2, help="Number of GCN layers") parser.add_argument(
parser.add_argument("--max_epoch", type=int, default=100, help="The max number of epochs. Default: 100") "--dataset", type=str, default="pol", help="'pol', or 'gos'"
parser.add_argument("--lr", type=float, default=0.001, help="Learning rate. Default: 1e-3") )
parser.add_argument("--dropout", type=float, default=0.0, help="Dropout rate. Default: 0.0") parser.add_argument(
parser.add_argument("--batch_size", type=int, default=64, help="Size of mini-batches. Default: 64") "--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU."
parser.add_argument("--num_workers", type=int, default=4, help="Number of node dataloader") )
parser.add_argument("--weight_decay", type=float, default=5e-4, help="Weight Decay. Default: 0.0005") parser.add_argument(
parser.add_argument("--precision", type=float, default=0.9, help="The value p in recall@p precision. Default: 0.9") "--e_hid_dim",
type=int,
default=128,
help="Hidden layer dimension for edges",
)
parser.add_argument(
"--u_hid_dim",
type=int,
default=128,
help="Hidden layer dimension for source nodes",
)
parser.add_argument(
"--v_hid_dim",
type=int,
default=128,
help="Hidden layer dimension for destination nodes",
)
parser.add_argument(
"--num_layers", type=int, default=2, help="Number of GCN layers"
)
parser.add_argument(
"--max_epoch",
type=int,
default=100,
help="The max number of epochs. Default: 100",
)
parser.add_argument(
"--lr", type=float, default=0.001, help="Learning rate. Default: 1e-3"
)
parser.add_argument(
"--dropout", type=float, default=0.0, help="Dropout rate. Default: 0.0"
)
parser.add_argument(
"--batch_size",
type=int,
default=64,
help="Size of mini-batches. Default: 64",
)
parser.add_argument(
"--num_workers", type=int, default=4, help="Number of node dataloader"
)
parser.add_argument(
"--weight_decay",
type=float,
default=5e-4,
help="Weight Decay. Default: 0.0005",
)
parser.add_argument(
"--precision",
type=float,
default=0.9,
help="The value p in recall@p precision. Default: 0.9",
)
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
......
import dgl.function as fn
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
import dgl.function as fn
from dgl.nn.functional import edge_softmax from dgl.nn.functional import edge_softmax
......
import dgl.function as fn
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
import dgl.function as fn
from dgl.nn.functional import edge_softmax from dgl.nn.functional import edge_softmax
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment