Unverified Commit d16fb3ff authored by Zihao Ye's avatar Zihao Ye Committed by GitHub
Browse files

[Example] GCMC in PyTorch (#1101)

* upd

* fix

* upd

* upd

* upd

* fix

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd
parent 1552090a
# Graph Convolutional Matrix Completion
Paper link: [https://arxiv.org/abs/1706.02263](https://arxiv.org/abs/1706.02263)
Author's code: [https://github.com/riannevdberg/gc-mc](https://github.com/riannevdberg/gc-mc)
The implementation does not handle side-channel features and mini-epoching and thus achieves
slightly worse performance when using node features.
Credit: Jiani Zhang ([@jennyzhang0215](https://github.com/jennyzhang0215))
## Dependencies
* PyTorch 1.2+
* pandas
* torchtext 0.4+
## Data
Supported datasets: ml-100k, ml-1m, ml-10m
## How to run
ml-100k, no feature
```bash
python train.py --data_name=ml-100k --use_one_hot_fea --gcn_agg_accum=stack
```
Results: RMSE=0.9088 (0.910 reported)
Speed: 0.0195s/epoch (vanilla implementation: 0.1008s/epoch)
ml-100k, with feature
```bash
python train.py --data_name=ml-100k --gcn_agg_accum=stack
```
Results: RMSE=0.9448 (0.905 reported)
ml-1m, no feature
```bash
python train.py --data_name=ml-1m --gcn_agg_accum=sum --use_one_hot_fea
```
Results: RMSE=0.8377 (0.832 reported)
Speed: 0.0557s/epoch (vanilla implementation: 1.538s/epoch)
ml-10m, no feature
```bash
python train.py --data_name=ml-10m --gcn_agg_accum=stack --gcn_dropout=0.3 \
--train_lr=0.001 --train_min_lr=0.0001 --train_max_iter=15000 \
--use_one_hot_fea --gen_r_num_basis_func=4
```
Results: RMSE=0.7800 (0.777 reported)
Speed: 0.9207/epoch (vanilla implementation: OOM)
Testbed: EC2 p3.2xlarge instance(Amazon Linux 2)
\ No newline at end of file
This diff is collapsed.
"""NN modules"""
import torch as th
import torch.nn as nn
import dgl.function as fn
from utils import get_activation
class GCMCLayer(nn.Module):
r"""GCMC layer
.. math::
z_j^{(l+1)} = \sigma_{agg}\left[\mathrm{agg}\left(
\sum_{j\in\mathcal{N}_1}\frac{1}{c_{ij}}W_1h_j, \ldots,
\sum_{j\in\mathcal{N}_R}\frac{1}{c_{ij}}W_Rh_j
\right)\right]
After that, apply an extra output projection:
.. math::
h_j^{(l+1)} = \sigma_{out}W_oz_j^{(l+1)}
The equation is applied to both user nodes and movie nodes and the parameters
are not shared unless ``share_user_item_param`` is true.
Parameters
----------
rating_vals : list of int or float
Possible rating values.
user_in_units : int
Size of user input feature
movie_in_units : int
Size of movie input feature
msg_units : int
Size of message :math:`W_rh_j`
out_units : int
Size of of final output user and movie features
dropout_rate : float, optional
Dropout rate (Default: 0.0)
agg : str, optional
Function to aggregate messages of different ratings.
Could be any of the supported cross type reducers:
"sum", "max", "min", "mean", "stack".
(Default: "stack")
agg_act : callable, str, optional
Activation function :math:`sigma_{agg}`. (Default: None)
out_act : callable, str, optional
Activation function :math:`sigma_{agg}`. (Default: None)
share_user_item_param : bool, optional
If true, user node and movie node share the same set of parameters.
Require ``user_in_units`` and ``move_in_units`` to be the same.
(Default: False)
"""
def __init__(self,
rating_vals,
user_in_units,
movie_in_units,
msg_units,
out_units,
dropout_rate=0.0,
agg='stack', # or 'sum'
agg_act=None,
out_act=None,
share_user_item_param=False):
super(GCMCLayer, self).__init__()
self.rating_vals = rating_vals
self.agg = agg
self.share_user_item_param = share_user_item_param
self.ufc = nn.Linear(msg_units, out_units)
if share_user_item_param:
self.ifc = self.ufc
else:
self.ifc = nn.Linear(msg_units, out_units)
if agg == 'stack':
# divide the original msg unit size by number of ratings to keep
# the dimensionality
assert msg_units % len(rating_vals) == 0
msg_units = msg_units // len(rating_vals)
self.dropout = nn.Dropout(dropout_rate)
self.W_r = nn.ParameterDict()
for rating in rating_vals:
# PyTorch parameter name can't contain "."
rating = str(rating).replace('.', '_')
if share_user_item_param and user_in_units == movie_in_units:
self.W_r[rating] = nn.Parameter(th.randn(user_in_units, msg_units))
self.W_r['rev-%s' % rating] = self.W_r[rating]
else:
self.W_r[rating] = nn.Parameter(th.randn(user_in_units, msg_units))
self.W_r['rev-%s' % rating] = nn.Parameter(th.randn(movie_in_units, msg_units))
self.agg_act = get_activation(agg_act)
self.out_act = get_activation(out_act)
self.reset_parameters()
def reset_parameters(self):
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
def forward(self, graph, ufeat=None, ifeat=None):
"""Forward function
Normalizer constant :math:`c_{ij}` is stored as two node data "ci"
and "cj".
Parameters
----------
graph : DGLHeteroGraph
User-movie rating graph. It should contain two node types: "user"
and "movie" and many edge types each for one rating value.
ufeat : torch.Tensor, optional
User features. If None, using an identity matrix.
ifeat : torch.Tensor, optional
Movie features. If None, using an identity matrix.
Returns
-------
new_ufeat : torch.Tensor
New user features
new_ifeat : torch.Tensor
New movie features
"""
num_u = graph.number_of_nodes('user')
num_i = graph.number_of_nodes('movie')
funcs = {}
for i, rating in enumerate(self.rating_vals):
rating = str(rating)
# W_r * x
x_u = dot_or_identity(ufeat, self.W_r[rating.replace('.', '_')])
x_i = dot_or_identity(ifeat, self.W_r['rev-%s' % rating.replace('.', '_')])
# left norm and dropout
x_u = x_u * self.dropout(graph.nodes['user'].data['cj'])
x_i = x_i * self.dropout(graph.nodes['movie'].data['cj'])
graph.nodes['user'].data['h%d' % i] = x_u
graph.nodes['movie'].data['h%d' % i] = x_i
funcs[rating] = (fn.copy_u('h%d' % i, 'm'), fn.sum('m', 'h'))
funcs['rev-%s' % rating] = (fn.copy_u('h%d' % i, 'm'), fn.sum('m', 'h'))
# message passing
graph.multi_update_all(funcs, self.agg)
ufeat = graph.nodes['user'].data.pop('h').view(num_u, -1)
ifeat = graph.nodes['movie'].data.pop('h').view(num_i, -1)
# right norm
ufeat = ufeat * graph.nodes['user'].data['ci']
ifeat = ifeat * graph.nodes['movie'].data['ci']
# fc and non-linear
ufeat = self.agg_act(ufeat)
ifeat = self.agg_act(ifeat)
ufeat = self.dropout(ufeat)
ifeat = self.dropout(ifeat)
ufeat = self.ufc(ufeat)
ifeat = self.ifc(ifeat)
return self.out_act(ufeat), self.out_act(ifeat)
class BiDecoder(nn.Module):
r"""Bilinear decoder.
.. math::
p(M_{ij}=r) = \text{softmax}(u_i^TQ_rv_j)
The trainable parameter :math:`Q_r` is further decomposed to a linear
combination of basis weight matrices :math:`P_s`:
.. math::
Q_r = \sum_{s=1}^{b} a_{rs}P_s
Parameters
----------
rating_vals : list of int or float
Possible rating values.
in_units : int
Size of input user and movie features
num_basis_functions : int, optional
Number of basis. (Default: 2)
dropout_rate : float, optional
Dropout raite (Default: 0.0)
"""
def __init__(self,
rating_vals,
in_units,
num_basis_functions=2,
dropout_rate=0.0):
super(BiDecoder, self).__init__()
self.rating_vals = rating_vals
self._num_basis_functions = num_basis_functions
self.dropout = nn.Dropout(dropout_rate)
self.Ps = nn.ParameterList()
for i in range(num_basis_functions):
self.Ps.append(nn.Parameter(th.randn(in_units, in_units)))
self.rate_out = nn.Linear(self._num_basis_functions, len(rating_vals), bias=False)
self.reset_parameters()
def reset_parameters(self):
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
def forward(self, graph, ufeat, ifeat):
"""Forward function.
Parameters
----------
graph : DGLHeteroGraph
"Flattened" user-movie graph with only one edge type.
ufeat : th.Tensor
User embeddings. Shape: (|V_u|, D)
ifeat : th.Tensor
Movie embeddings. Shape: (|V_m|, D)
Returns
-------
th.Tensor
Predicting scores for each user-movie edge.
"""
graph = graph.local_var()
ufeat = self.dropout(ufeat)
ifeat = self.dropout(ifeat)
graph.nodes['movie'].data['h'] = ifeat
basis_out = []
for i in range(self._num_basis_functions):
graph.nodes['user'].data['h'] = ufeat @ self.Ps[i]
graph.apply_edges(fn.u_dot_v('h', 'h', 'sr'))
basis_out.append(graph.edata['sr'].unsqueeze(1))
out = th.cat(basis_out, dim=1)
out = self.rate_out(out)
return out
def dot_or_identity(A, B):
# if A is None, treat as identity matrix
if A is None:
return B
else:
return A @ B
"""Training script"""
import os, time
import argparse
import logging
import random
import string
import numpy as np
import torch as th
import torch.nn as nn
from data import MovieLens
from model import GCMCLayer, BiDecoder
from utils import get_activation, get_optimizer, torch_total_param_num, torch_net_info, MetricLogger
class Net(nn.Module):
def __init__(self, args):
super(Net, self).__init__()
self._act = get_activation(args.model_activation)
self.encoder = GCMCLayer(args.rating_vals,
args.src_in_units,
args.dst_in_units,
args.gcn_agg_units,
args.gcn_out_units,
args.gcn_dropout,
args.gcn_agg_accum,
agg_act=self._act,
share_user_item_param=args.share_param)
self.decoder = BiDecoder(args.rating_vals,
in_units=args.gcn_out_units,
num_basis_functions=args.gen_r_num_basis_func)
def forward(self, enc_graph, dec_graph, ufeat, ifeat):
user_out, movie_out = self.encoder(
enc_graph,
ufeat,
ifeat)
pred_ratings = self.decoder(dec_graph, user_out, movie_out)
return pred_ratings
def evaluate(args, net, dataset, segment='valid'):
possible_rating_values = dataset.possible_rating_values
nd_possible_rating_values = th.FloatTensor(possible_rating_values).to(args.device)
if segment == "valid":
rating_values = dataset.valid_truths
enc_graph = dataset.valid_enc_graph
dec_graph = dataset.valid_dec_graph
elif segment == "test":
rating_values = dataset.test_truths
enc_graph = dataset.test_enc_graph
dec_graph = dataset.test_dec_graph
else:
raise NotImplementedError
# Evaluate RMSE
net.eval()
with th.no_grad():
pred_ratings = net(enc_graph, dec_graph,
dataset.user_feature, dataset.movie_feature)
real_pred_ratings = (th.softmax(pred_ratings, dim=1) *
nd_possible_rating_values.view(1, -1)).sum(dim=1)
rmse = ((real_pred_ratings - rating_values) ** 2.).mean().item()
rmse = np.sqrt(rmse)
return rmse
def train(args):
print(args)
dataset = MovieLens(args.data_name, args.device, use_one_hot_fea=args.use_one_hot_fea, symm=args.gcn_agg_norm_symm,
test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio)
print("Loading data finished ...\n")
args.src_in_units = dataset.user_feature_shape[1]
args.dst_in_units = dataset.movie_feature_shape[1]
args.rating_vals = dataset.possible_rating_values
### build the net
net = Net(args=args)
net = net.to(args.device)
nd_possible_rating_values = th.FloatTensor(dataset.possible_rating_values).to(args.device)
rating_loss_net = nn.CrossEntropyLoss()
learning_rate = args.train_lr
optimizer = get_optimizer(args.train_optimizer)(net.parameters(), lr=learning_rate)
print("Loading network finished ...\n")
### perpare training data
train_gt_labels = dataset.train_labels
train_gt_ratings = dataset.train_truths
### prepare the logger
train_loss_logger = MetricLogger(['iter', 'loss', 'rmse'], ['%d', '%.4f', '%.4f'],
os.path.join(args.save_dir, 'train_loss%d.csv' % args.save_id))
valid_loss_logger = MetricLogger(['iter', 'rmse'], ['%d', '%.4f'],
os.path.join(args.save_dir, 'valid_loss%d.csv' % args.save_id))
test_loss_logger = MetricLogger(['iter', 'rmse'], ['%d', '%.4f'],
os.path.join(args.save_dir, 'test_loss%d.csv' % args.save_id))
### declare the loss information
best_valid_rmse = np.inf
no_better_valid = 0
best_iter = -1
count_rmse = 0
count_num = 0
count_loss = 0
print("Start training ...")
dur = []
for iter_idx in range(1, args.train_max_iter):
if iter_idx > 3:
t0 = time.time()
net.train()
pred_ratings = net(dataset.train_enc_graph, dataset.train_dec_graph,
dataset.user_feature, dataset.movie_feature)
loss = rating_loss_net(pred_ratings, train_gt_labels).mean()
count_loss += loss.item()
optimizer.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(net.parameters(), args.train_grad_clip)
optimizer.step()
if iter_idx > 3:
dur.append(time.time() - t0)
if iter_idx == 1:
print("Total #Param of net: %d" % (torch_total_param_num(net)))
print(torch_net_info(net, save_path=os.path.join(args.save_dir, 'net%d.txt' % args.save_id)))
real_pred_ratings = (th.softmax(pred_ratings, dim=1) *
nd_possible_rating_values.view(1, -1)).sum(dim=1)
rmse = ((real_pred_ratings - train_gt_ratings) ** 2).sum()
count_rmse += rmse.item()
count_num += pred_ratings.shape[0]
if iter_idx % args.train_log_interval == 0:
train_loss_logger.log(iter=iter_idx,
loss=count_loss/(iter_idx+1), rmse=count_rmse/count_num)
logging_str = "Iter={}, loss={:.4f}, rmse={:.4f}, time={:.4f}".format(
iter_idx, count_loss/iter_idx, count_rmse/count_num,
np.average(dur))
count_rmse = 0
count_num = 0
if iter_idx % args.train_valid_interval == 0:
valid_rmse = evaluate(args=args, net=net, dataset=dataset, segment='valid')
valid_loss_logger.log(iter = iter_idx, rmse = valid_rmse)
logging_str += ',\tVal RMSE={:.4f}'.format(valid_rmse)
if valid_rmse < best_valid_rmse:
best_valid_rmse = valid_rmse
no_better_valid = 0
best_iter = iter_idx
test_rmse = evaluate(args=args, net=net, dataset=dataset, segment='test')
best_test_rmse = test_rmse
test_loss_logger.log(iter=iter_idx, rmse=test_rmse)
logging_str += ', Test RMSE={:.4f}'.format(test_rmse)
else:
no_better_valid += 1
if no_better_valid > args.train_early_stopping_patience\
and learning_rate <= args.train_min_lr:
logging.info("Early stopping threshold reached. Stop training.")
break
if no_better_valid > args.train_decay_patience:
new_lr = max(learning_rate * args.train_lr_decay_factor, args.train_min_lr)
if new_lr < learning_rate:
logging.info("\tChange the LR to %g" % new_lr)
for p in optimizer.param_groups:
p['lr'] = learning_rate
no_better_valid = 0
if iter_idx % args.train_log_interval == 0:
print(logging_str)
print('Best Iter Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}'.format(
best_iter, best_valid_rmse, best_test_rmse))
train_loss_logger.close()
valid_loss_logger.close()
test_loss_logger.close()
def config():
parser = argparse.ArgumentParser(description='GCMC')
parser.add_argument('--seed', default=123, type=int)
parser.add_argument('--device', default='0', type=int,
help='Running device. E.g `--device 0`, if using cpu, set `--device -1`')
parser.add_argument('--save_dir', type=str, help='The saving directory')
parser.add_argument('--save_id', type=int, help='The saving log id')
parser.add_argument('--silent', action='store_true')
parser.add_argument('--data_name', default='ml-1m', type=str,
help='The dataset name: ml-100k, ml-1m, ml-10m')
parser.add_argument('--data_test_ratio', type=float, default=0.1) ## for ml-100k the test ration is 0.2
parser.add_argument('--data_valid_ratio', type=float, default=0.1)
parser.add_argument('--use_one_hot_fea', action='store_true', default=False)
parser.add_argument('--model_activation', type=str, default="leaky")
parser.add_argument('--gcn_dropout', type=float, default=0.7)
parser.add_argument('--gcn_agg_norm_symm', type=bool, default=True)
parser.add_argument('--gcn_agg_units', type=int, default=500)
parser.add_argument('--gcn_agg_accum', type=str, default="sum")
parser.add_argument('--gcn_out_units', type=int, default=75)
parser.add_argument('--gen_r_num_basis_func', type=int, default=2)
parser.add_argument('--train_max_iter', type=int, default=2000)
parser.add_argument('--train_log_interval', type=int, default=1)
parser.add_argument('--train_valid_interval', type=int, default=1)
parser.add_argument('--train_optimizer', type=str, default="adam")
parser.add_argument('--train_grad_clip', type=float, default=1.0)
parser.add_argument('--train_lr', type=float, default=0.01)
parser.add_argument('--train_min_lr', type=float, default=0.001)
parser.add_argument('--train_lr_decay_factor', type=float, default=0.5)
parser.add_argument('--train_decay_patience', type=int, default=50)
parser.add_argument('--train_early_stopping_patience', type=int, default=100)
parser.add_argument('--share_param', default=False, action='store_true')
args = parser.parse_args()
args.device = th.device(args.device) if args.device >= 0 else th.device('cpu')
### configure save_fir to save all the info
if args.save_dir is None:
args.save_dir = args.data_name+"_" + ''.join(random.choices(string.ascii_uppercase + string.digits, k=2))
if args.save_id is None:
args.save_id = np.random.randint(20)
args.save_dir = os.path.join("log", args.save_dir)
if not os.path.isdir(args.save_dir):
os.makedirs(args.save_dir)
return args
if __name__ == '__main__':
args = config()
np.random.seed(args.seed)
th.manual_seed(args.seed)
if th.cuda.is_available():
th.cuda.manual_seed_all(args.seed)
train(args)
import csv
import re
import torch as th
import numpy as np
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict
class MetricLogger(object):
def __init__(self, attr_names, parse_formats, save_path):
self._attr_format_dict = OrderedDict(zip(attr_names, parse_formats))
self._file = open(save_path, 'w')
self._csv = csv.writer(self._file)
self._csv.writerow(attr_names)
self._file.flush()
def log(self, **kwargs):
self._csv.writerow([parse_format % kwargs[attr_name]
for attr_name, parse_format in self._attr_format_dict.items()])
self._file.flush()
def close(self):
self._file.close()
def torch_total_param_num(net):
return sum([np.prod(p.shape) for p in net.parameters()])
def torch_net_info(net, save_path=None):
info_str = 'Total Param Number: {}\n'.format(torch_total_param_num(net)) +\
'Params:\n'
for k, v in net.named_parameters():
info_str += '\t{}: {}, {}\n'.format(k, v.shape, np.prod(v.shape))
info_str += str(net)
if save_path is not None:
with open(save_path, 'w') as f:
f.write(info_str)
return info_str
def get_activation(act):
"""Get the activation based on the act string
Parameters
----------
act: str or callable function
Returns
-------
ret: callable function
"""
if act is None:
return lambda x: x
if isinstance(act, str):
if act == 'leaky':
return nn.LeakyReLU(0.1)
elif act == 'relu':
return nn.ReLU()
elif act == 'tanh':
return nn.Tanh()
elif act == 'sigmoid':
return nn.Sigmoid()
elif act == 'softsign':
return nn.Softsign()
else:
raise NotImplementedError
else:
return act
def get_optimizer(opt):
if opt == 'sgd':
return optim.SGD
elif opt == 'adam':
return optim.Adam
else:
raise NotImplementedError
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment