Unverified Commit ffe58983 authored by xiang song(charlie.song)'s avatar xiang song(charlie.song) Committed by GitHub
Browse files

[Optimization][KG] Several optimizations on DGL-KG (#1233)

* Several optimizations on DGL-KG:
1. Sorted positive edges for sampling which can reduce random
   memory access during positive sampling
2. Asynchronous node embedding update
3. Balanced Relation Partition that gives balanced number of
   edges in each partition. When there is no cross partition
   relation, relation embedding can be pin into GPU memory
4. tunable neg_sample_size instead of fixed neg_sample_size

* Fix test

* Fix test and eval.py

* Now TransR is OK

* Fix single GPU with mix_cpu_gpu

* Add app tests

* Fix test script

* fix mxnet

* Fix sample

* Add docstrings

* Fix

* Default value for num_workers

* Upd

* upd
parent f103bbf9
#To reproduce reported results on README, you can run the model with the following commands: #To reproduce reported results on README, you can run the model with the following commands:
# for FB15k # for FB15k
# DistMult 1GPU
DGLBACKEND=pytorch python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.1 --max_step 100000 \ --neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --gpu 0 \
--num_worker=8 --max_step 40000
# DistMult 8GPU
DGLBACKEND=pytorch python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 \
--valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 --gpu 0 1 2 3 4 5 6 7 \
--num_worker=4 --max_step 10000 --rel_part --async_update
# ComplEx 1GPU
DGLBACKEND=pytorch python3 train.py --model ComplEx --dataset FB15k --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model ComplEx --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.2 --max_step 100000 \ --neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.1 --regularization_coef 2.00E-06 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 \
--gpu 0 --num_worker=8 --max_step 32000
# ComplEx 8GPU
DGLBACKEND=pytorch python3 train.py --model ComplEx --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.1 --regularization_coef 2.00E-06 \
--batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 \
--gpu 0 1 2 3 4 5 6 7 --num_worker=4 --max_step 4000 --rel_part --async_update
# TransE_l1 1GPU
DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset FB15k --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 2000 --gamma 24.0 --lr 0.01 --max_step 20000 \ --neg_sample_size 64 --regularization_coef 1e-07 --hidden_dim 400 --gamma 16.0 --lr 0.01 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 \
--gpu 0 --num_worker=8 --max_step 48000
# TransE_l1 8GPU
DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset FB15k --batch_size 1024 \
--neg_sample_size 64 --regularization_coef 1e-07 --hidden_dim 400 --gamma 16.0 --lr 0.01 \
--batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 \
--gpu 0 1 2 3 4 5 6 7 --num_worker=4 --max_step 6000 --rel_part --async_update
# TransE_l2 1GPU
DGLBACKEND=pytorch python3 train.py --model TransE_l2 --dataset FB15k --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model TransE_l2 --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 2000 --gamma 12.0 --lr 0.1 --max_step 30000 \ --neg_sample_size 256 --hidden_dim 2000 --gamma 12.0 --lr 0.1 --max_step 30000 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --regularization_coef=2e-7 --batch_size_eval 16 --gpu 0 --valid --test -adv --regularization_coef=2e-7
# RESCAL 1GPU
DGLBACKEND=pytorch python3 train.py --model RESCAL --dataset FB15k --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model RESCAL --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 500 --gamma 24.0 --lr 0.03 --max_step 30000 \ --neg_sample_size 256 --hidden_dim 500 --gamma 24.0 --lr 0.03 --max_step 30000 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --batch_size_eval 16 --gpu 0 --valid --test -adv
# TransR 1GPU
DGLBACKEND=pytorch python3 train.py --model TransR --dataset FB15k --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model TransR --dataset FB15k --batch_size 1024 \
--neg_sample_size 256 --hidden_dim 500 --gamma 24.0 --lr 0.01 --max_step 30000 \ --neg_sample_size 256 --regularization_coef 5e-8 --hidden_dim 200 --gamma 8.0 --lr 0.015 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 \
--gpu 0 --num_worker=8 --max_step 32000
DGLBACKEND=pytorch python3 train.py --model RotatE --dataset FB15k --batch_size 1024 \ # TransR 8GPU
--neg_sample_size 256 --hidden_dim 400 --gamma 12.0 --lr 0.01 --max_step 30000 \ DGLBACKEND=pytorch python3 train.py --model TransR --dataset FB15k --batch_size 1024 \
--batch_size_eval 16 --gpu 0 --valid --test -adv -de --regularization_coef=1e-4 --neg_sample_size 256 --regularization_coef 5e-8 --hidden_dim 200 --gamma 8.0 --lr 0.015 \
--batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 \
--gpu 0 1 2 3 4 5 6 7 --num_worker=4 --max_step 4000 --rel_part --async_update
# RotatE 1GPU
DGLBACKEND=pytorch python3 train.py --model RotatE --dataset FB15k --batch_size 2048 \
--neg_sample_size 256 --regularization_coef 1e-07 --hidden_dim 200 --gamma 12.0 --lr 0.009 \
--batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 -de \
--mix_cpu_gpu --max_step 40000 --gpu 0 --num_worker=4
# RotatE 8GPU
DGLBACKEND=pytorch python3 train.py --model RotatE --dataset FB15k --batch_size 2048 \
--neg_sample_size 256 --regularization_coef 1e-07 --hidden_dim 200 --gamma 12.0 --lr 0.009 \
--batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 -de \
--mix_cpu_gpu --max_step 5000 --num_proc 8 --gpu 0 1 2 3 4 5 6 7 --num_worker=4 \
--rel_part --async_update
# for wn18 # for wn18
DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset wn18 --batch_size 1024 \ DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset wn18 --batch_size 1024 \
--neg_sample_size 512 --hidden_dim 500 --gamma 12.0 --adversarial_temperature 0.5 \ --neg_sample_size 512 --hidden_dim 500 --gamma 12.0 --adversarial_temperature 0.5 \
--lr 0.01 --max_step 40000 --batch_size_eval 16 --gpu 0 --valid --test -adv \ --lr 0.01 --max_step 40000 --batch_size_eval 16 --gpu 0 --valid --test -adv \
......
This diff is collapsed.
...@@ -15,7 +15,7 @@ if backend.lower() == 'mxnet': ...@@ -15,7 +15,7 @@ if backend.lower() == 'mxnet':
else: else:
import torch.multiprocessing as mp import torch.multiprocessing as mp
from train_pytorch import load_model_from_checkpoint from train_pytorch import load_model_from_checkpoint
from train_pytorch import test from train_pytorch import test, test_mp
class ArgParser(argparse.ArgumentParser): class ArgParser(argparse.ArgumentParser):
def __init__(self): def __init__(self):
...@@ -100,7 +100,8 @@ def main(args): ...@@ -100,7 +100,8 @@ def main(args):
args.train = False args.train = False
args.valid = False args.valid = False
args.test = True args.test = True
args.rel_part = False args.strict_rel_part = False
args.async_update = False
args.batch_size_eval = args.batch_size args.batch_size_eval = args.batch_size
logger = get_logger(args) logger = get_logger(args)
...@@ -172,28 +173,33 @@ def main(args): ...@@ -172,28 +173,33 @@ def main(args):
queue = mp.Queue(args.num_proc) queue = mp.Queue(args.num_proc)
procs = [] procs = []
for i in range(args.num_proc): for i in range(args.num_proc):
proc = mp.Process(target=test, args=(args, model, [test_sampler_heads[i], test_sampler_tails[i]], proc = mp.Process(target=test_mp, args=(args,
i, 'Test', queue)) model,
[test_sampler_heads[i], test_sampler_tails[i]],
i,
'Test',
queue))
procs.append(proc) procs.append(proc)
proc.start() proc.start()
for proc in procs:
proc.join()
total_metrics = {} total_metrics = {}
metrics = {}
logs = []
for i in range(args.num_proc): for i in range(args.num_proc):
metrics = queue.get() log = queue.get()
for k, v in metrics.items(): logs = logs + log
if i == 0:
total_metrics[k] = v / args.num_proc for metric in logs[0].keys():
else: metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
total_metrics[k] += v / args.num_proc
for k, v in metrics.items(): for k, v in metrics.items():
print('Test average {} at [{}/{}]: {}'.format(k, args.step, args.max_step, v)) print('Test average {} at [{}/{}]: {}'.format(k, args.step, args.max_step, v))
for proc in procs:
proc.join()
else: else:
test(args, model, [test_sampler_head, test_sampler_tail]) test(args, model, [test_sampler_head, test_sampler_tail])
print('Test takes {:.3f} seconds'.format(time.time() - start)) print('Test takes {:.3f} seconds'.format(time.time() - start))
if __name__ == '__main__': if __name__ == '__main__':
args = ArgParser().parse_args() args = ArgParser().parse_args()
main(args) main(args)
......
"""
Graph Embedding Model
1. TransE
2. TransR
3. RESCAL
4. DistMult
5. ComplEx
6. RotatE
"""
import os import os
import numpy as np import numpy as np
import dgl.backend as F import dgl.backend as F
...@@ -23,6 +32,30 @@ else: ...@@ -23,6 +32,30 @@ else:
from .pytorch.score_fun import * from .pytorch.score_fun import *
class KEModel(object): class KEModel(object):
""" DGL Knowledge Embedding Model.
Parameters
----------
args:
Global configs.
model_name : str
Which KG model to use, including 'TransE_l1', 'TransE_l2', 'TransR',
'RESCAL', 'DistMult', 'ComplEx', 'RotatE'
n_entities : int
Num of entities.
n_relations : int
Num of relations.
hidden_dim : int
Dimetion size of embedding.
gamma : float
Gamma for score function.
double_entity_emb : bool
If True, entity embedding size will be 2 * hidden_dim.
Default: False
double_relation_emb : bool
If True, relation embedding size will be 2 * hidden_dim.
Default: False
"""
def __init__(self, args, model_name, n_entities, n_relations, hidden_dim, gamma, def __init__(self, args, model_name, n_entities, n_relations, hidden_dim, gamma,
double_entity_emb=False, double_relation_emb=False): double_entity_emb=False, double_relation_emb=False):
super(KEModel, self).__init__() super(KEModel, self).__init__()
...@@ -47,15 +80,24 @@ class KEModel(object): ...@@ -47,15 +80,24 @@ class KEModel(object):
rel_dim = relation_dim rel_dim = relation_dim
self.rel_dim = rel_dim self.rel_dim = rel_dim
self.relation_emb = ExternalEmbedding(args, n_relations, rel_dim, F.cpu() if args.mix_cpu_gpu else device) self.entity_dim = entity_dim
self.strict_rel_part = args.strict_rel_part
if not self.strict_rel_part:
self.relation_emb = ExternalEmbedding(args, n_relations, rel_dim,
F.cpu() if args.mix_cpu_gpu else device)
else:
self.global_relation_emb = ExternalEmbedding(args, n_relations, rel_dim, F.cpu())
if model_name == 'TransE' or model_name == 'TransE_l2': if model_name == 'TransE' or model_name == 'TransE_l2':
self.score_func = TransEScore(gamma, 'l2') self.score_func = TransEScore(gamma, 'l2')
elif model_name == 'TransE_l1': elif model_name == 'TransE_l1':
self.score_func = TransEScore(gamma, 'l1') self.score_func = TransEScore(gamma, 'l1')
elif model_name == 'TransR': elif model_name == 'TransR':
projection_emb = ExternalEmbedding(args, n_relations, entity_dim * relation_dim, projection_emb = ExternalEmbedding(args,
n_relations,
entity_dim * relation_dim,
F.cpu() if args.mix_cpu_gpu else device) F.cpu() if args.mix_cpu_gpu else device)
self.score_func = TransRScore(gamma, projection_emb, relation_dim, entity_dim) self.score_func = TransRScore(gamma, projection_emb, relation_dim, entity_dim)
elif model_name == 'DistMult': elif model_name == 'DistMult':
self.score_func = DistMultScore() self.score_func = DistMultScore()
...@@ -66,6 +108,7 @@ class KEModel(object): ...@@ -66,6 +108,7 @@ class KEModel(object):
elif model_name == 'RotatE': elif model_name == 'RotatE':
self.score_func = RotatEScore(gamma, self.emb_init) self.score_func = RotatEScore(gamma, self.emb_init)
self.model_name = model_name
self.head_neg_score = self.score_func.create_neg(True) self.head_neg_score = self.score_func.create_neg(True)
self.tail_neg_score = self.score_func.create_neg(False) self.tail_neg_score = self.score_func.create_neg(False)
self.head_neg_prepare = self.score_func.create_neg_prepare(True) self.head_neg_prepare = self.score_func.create_neg_prepare(True)
...@@ -74,31 +117,101 @@ class KEModel(object): ...@@ -74,31 +117,101 @@ class KEModel(object):
self.reset_parameters() self.reset_parameters()
def share_memory(self): def share_memory(self):
# TODO(zhengda) we should make it work for parameters in score func """Use torch.tensor.share_memory_() to allow cross process embeddings access.
"""
self.entity_emb.share_memory() self.entity_emb.share_memory()
self.relation_emb.share_memory() if self.strict_rel_part:
self.global_relation_emb.share_memory()
else:
self.relation_emb.share_memory()
if self.model_name == 'TransR':
self.score_func.share_memory()
def save_emb(self, path, dataset): def save_emb(self, path, dataset):
"""Save the model.
Parameters
----------
path : str
Directory to save the model.
dataset : str
Dataset name as prefix to the saved embeddings.
"""
self.entity_emb.save(path, dataset+'_'+self.model_name+'_entity') self.entity_emb.save(path, dataset+'_'+self.model_name+'_entity')
self.relation_emb.save(path, dataset+'_'+self.model_name+'_relation') if self.strict_rel_part:
self.global_relation_emb.save(path, dataset+'_'+self.model_name+'_relation')
else:
self.relation_emb.save(path, dataset+'_'+self.model_name+'_relation')
self.score_func.save(path, dataset+'_'+self.model_name) self.score_func.save(path, dataset+'_'+self.model_name)
def load_emb(self, path, dataset): def load_emb(self, path, dataset):
"""Load the model.
Parameters
----------
path : str
Directory to load the model.
dataset : str
Dataset name as prefix to the saved embeddings.
"""
self.entity_emb.load(path, dataset+'_'+self.model_name+'_entity') self.entity_emb.load(path, dataset+'_'+self.model_name+'_entity')
self.relation_emb.load(path, dataset+'_'+self.model_name+'_relation') self.relation_emb.load(path, dataset+'_'+self.model_name+'_relation')
self.score_func.load(path, dataset+'_'+self.model_name) self.score_func.load(path, dataset+'_'+self.model_name)
def reset_parameters(self): def reset_parameters(self):
"""Re-initialize the model.
"""
self.entity_emb.init(self.emb_init) self.entity_emb.init(self.emb_init)
self.score_func.reset_parameters() self.score_func.reset_parameters()
self.relation_emb.init(self.emb_init) if not self.strict_rel_part:
self.relation_emb.init(self.emb_init)
def predict_score(self, g): def predict_score(self, g):
"""Predict the positive score.
Parameters
----------
g : DGLGraph
Graph holding positive edges.
Returns
-------
tensor
The positive score
"""
self.score_func(g) self.score_func(g)
return g.edata['score'] return g.edata['score']
def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False, def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False,
neg_deg_sample=False): neg_deg_sample=False):
"""Calculate the negative score.
Parameters
----------
pos_g : DGLGraph
Graph holding positive edges.
neg_g : DGLGraph
Graph holding negative edges.
to_device : func
Function to move data into device.
gpu_id : int
Which gpu to move data to.
trace : bool
If True, trace the computation. This is required in training.
If False, do not trace the computation.
Default: False
neg_deg_sample : bool
If True, we use the head and tail nodes of the positive edges to
construct negative edges.
Default: False
Returns
-------
tensor
The negative score
"""
num_chunks = neg_g.num_chunks num_chunks = neg_g.num_chunks
chunk_size = neg_g.chunk_size chunk_size = neg_g.chunk_size
neg_sample_size = neg_g.neg_sample_size neg_sample_size = neg_g.neg_sample_size
...@@ -160,6 +273,19 @@ class KEModel(object): ...@@ -160,6 +273,19 @@ class KEModel(object):
return neg_score return neg_score
def forward_test(self, pos_g, neg_g, logs, gpu_id=-1): def forward_test(self, pos_g, neg_g, logs, gpu_id=-1):
"""Do the forward and generate ranking results.
Parameters
----------
pos_g : DGLGraph
Graph holding positive edges.
neg_g : DGLGraph
Graph holding negative edges.
logs : List
Where to put results in.
gpu_id : int
Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
"""
pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, False) pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, False)
pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, False) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, False)
...@@ -183,7 +309,7 @@ class KEModel(object): ...@@ -183,7 +309,7 @@ class KEModel(object):
# To compute the rank of a positive edge among all negative edges, # To compute the rank of a positive edge among all negative edges,
# we need to know how many negative edges have higher scores than # we need to know how many negative edges have higher scores than
# the positive edge. # the positive edge.
rankings = F.sum(neg_scores > pos_scores, dim=1) + 1 rankings = F.sum(neg_scores >= pos_scores, dim=1) + 1
rankings = F.asnumpy(rankings) rankings = F.asnumpy(rankings)
for i in range(batch_size): for i in range(batch_size):
ranking = rankings[i] ranking = rankings[i]
...@@ -197,6 +323,24 @@ class KEModel(object): ...@@ -197,6 +323,24 @@ class KEModel(object):
# @profile # @profile
def forward(self, pos_g, neg_g, gpu_id=-1): def forward(self, pos_g, neg_g, gpu_id=-1):
"""Do the forward.
Parameters
----------
pos_g : DGLGraph
Graph holding positive edges.
neg_g : DGLGraph
Graph holding negative edges.
gpu_id : int
Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
Returns
-------
tensor
loss value
dict
loss info
"""
pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, True) pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, True)
pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, True) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, True)
...@@ -248,7 +392,63 @@ class KEModel(object): ...@@ -248,7 +392,63 @@ class KEModel(object):
return loss, log return loss, log
def update(self, gpu_id=-1): def update(self, gpu_id=-1):
""" Update the embeddings in the model
gpu_id : int
Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
"""
self.entity_emb.update(gpu_id) self.entity_emb.update(gpu_id)
self.relation_emb.update(gpu_id) self.relation_emb.update(gpu_id)
self.score_func.update(gpu_id) self.score_func.update(gpu_id)
def prepare_relation(self, device=None):
""" Prepare relation embeddings in multi-process multi-gpu training model.
device : th.device
Which device (GPU) to put relation embeddings in.
"""
self.relation_emb = ExternalEmbedding(self.args, self.n_relations, self.rel_dim, device)
self.relation_emb.init(self.emb_init)
if self.model_name == 'TransR':
local_projection_emb = ExternalEmbedding(self.args, self.n_relations,
self.entity_dim * self.rel_dim, device)
self.score_func.prepare_local_emb(local_projection_emb)
self.score_func.reset_parameters()
def writeback_relation(self, rank=0, rel_parts=None):
""" Writeback relation embeddings in a specific process to global relation embedding.
Used in multi-process multi-gpu training model.
rank : int
Process id.
rel_parts : List of tensor
List of tensor stroing edge types of each partition.
"""
idx = rel_parts[rank]
self.global_relation_emb.emb[idx] = F.copy_to(self.relation_emb.emb, F.cpu())[idx]
if self.model_name == 'TransR':
self.score_func.writeback_local_emb(idx)
def load_relation(self, device=None):
""" Sync global relation embeddings into local relation embeddings.
Used in multi-process multi-gpu training model.
device : th.device
Which device (GPU) to put relation embeddings in.
"""
self.relation_emb = ExternalEmbedding(self.args, self.n_relations, self.rel_dim, device)
self.relation_emb.emb = F.copy_to(self.global_relation_emb.emb, device)
if self.model_name == 'TransR':
local_projection_emb = ExternalEmbedding(self.args, self.n_relations,
self.entity_dim * self.rel_dim, device)
self.score_func.load_local_emb(local_projection_emb)
def create_async_update(self):
"""Set up the async update for entity embedding.
"""
self.entity_emb.create_async_update()
def finish_async_update(self):
"""Terminate the async update for entity embedding.
"""
self.entity_emb.finish_async_update()
...@@ -21,6 +21,9 @@ def batched_l1_dist(a, b): ...@@ -21,6 +21,9 @@ def batched_l1_dist(a, b):
return res return res
class TransEScore(nn.Block): class TransEScore(nn.Block):
""" TransE score function
Paper link: https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data
"""
def __init__(self, gamma, dist_func='l2'): def __init__(self, gamma, dist_func='l2'):
super(TransEScore, self).__init__() super(TransEScore, self).__init__()
self.gamma = gamma self.gamma = gamma
...@@ -81,6 +84,9 @@ class TransEScore(nn.Block): ...@@ -81,6 +84,9 @@ class TransEScore(nn.Block):
return fn return fn
class TransRScore(nn.Block): class TransRScore(nn.Block):
"""TransR score function
Paper link: https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/download/9571/9523
"""
def __init__(self, gamma, projection_emb, relation_dim, entity_dim): def __init__(self, gamma, projection_emb, relation_dim, entity_dim):
super(TransRScore, self).__init__() super(TransRScore, self).__init__()
self.gamma = gamma self.gamma = gamma
...@@ -180,6 +186,18 @@ class TransRScore(nn.Block): ...@@ -180,6 +186,18 @@ class TransRScore(nn.Block):
def load(self, path, name): def load(self, path, name):
self.projection_emb.load(path, name+'projection') self.projection_emb.load(path, name+'projection')
def prepare_local_emb(self, projection_emb):
self.global_projection_emb = self.projection_emb
self.projection_emb = projection_emb
def writeback_local_emb(self, idx):
self.global_projection_emb.emb[idx] = self.projection_emb.emb.as_in_context(mx.cpu())[idx]
def load_local_emb(self, projection_emb):
context = projection_emb.emb.context
projection_emb.emb = self.projection_emb.emb.as_in_context(context)
self.projection_emb = projection_emb
def create_neg(self, neg_head): def create_neg(self, neg_head):
gamma = self.gamma gamma = self.gamma
if neg_head: if neg_head:
...@@ -200,6 +218,9 @@ class TransRScore(nn.Block): ...@@ -200,6 +218,9 @@ class TransRScore(nn.Block):
return fn return fn
class DistMultScore(nn.Block): class DistMultScore(nn.Block):
"""DistMult score function
Paper link: https://arxiv.org/abs/1412.6575
"""
def __init__(self): def __init__(self):
super(DistMultScore, self).__init__() super(DistMultScore, self).__init__()
...@@ -253,6 +274,9 @@ class DistMultScore(nn.Block): ...@@ -253,6 +274,9 @@ class DistMultScore(nn.Block):
return fn return fn
class ComplExScore(nn.Block): class ComplExScore(nn.Block):
"""ComplEx score function
Paper link: https://arxiv.org/abs/1606.06357
"""
def __init__(self): def __init__(self):
super(ComplExScore, self).__init__() super(ComplExScore, self).__init__()
...@@ -321,6 +345,9 @@ class ComplExScore(nn.Block): ...@@ -321,6 +345,9 @@ class ComplExScore(nn.Block):
return fn return fn
class RESCALScore(nn.Block): class RESCALScore(nn.Block):
"""RESCAL score function
Paper link: http://www.icml-2011.org/papers/438_icmlpaper.pdf
"""
def __init__(self, relation_dim, entity_dim): def __init__(self, relation_dim, entity_dim):
super(RESCALScore, self).__init__() super(RESCALScore, self).__init__()
self.relation_dim = relation_dim self.relation_dim = relation_dim
...@@ -384,6 +411,9 @@ class RESCALScore(nn.Block): ...@@ -384,6 +411,9 @@ class RESCALScore(nn.Block):
return fn return fn
class RotatEScore(nn.Block): class RotatEScore(nn.Block):
"""RotatE score function
Paper link: https://arxiv.org/abs/1902.10197
"""
def __init__(self, gamma, emb_init, eps=1e-10): def __init__(self, gamma, emb_init, eps=1e-10):
super(RotatEScore, self).__init__() super(RotatEScore, self).__init__()
self.gamma = gamma self.gamma = gamma
......
"""
KG Sparse embedding
"""
import os import os
import numpy as np import numpy as np
import mxnet as mx import mxnet as mx
...@@ -22,6 +25,20 @@ reshape = lambda arr, x, y: arr.reshape(x, y) ...@@ -22,6 +25,20 @@ reshape = lambda arr, x, y: arr.reshape(x, y)
cuda = lambda arr, gpu: arr.as_in_context(mx.gpu(gpu)) cuda = lambda arr, gpu: arr.as_in_context(mx.gpu(gpu))
class ExternalEmbedding: class ExternalEmbedding:
"""Sparse Embedding for Knowledge Graph
It is used to store both entity embeddings and relation embeddings.
Parameters
----------
args :
Global configs.
num : int
Number of embeddings.
dim : int
Embedding dimention size.
ctx : mx.ctx
Device context to store the embedding.
"""
def __init__(self, args, num, dim, ctx): def __init__(self, args, num, dim, ctx):
self.gpu = args.gpu self.gpu = args.gpu
self.args = args self.args = args
...@@ -32,6 +49,13 @@ class ExternalEmbedding: ...@@ -32,6 +49,13 @@ class ExternalEmbedding:
self.state_step = 0 self.state_step = 0
def init(self, emb_init): def init(self, emb_init):
"""Initializing the embeddings.
Parameters
----------
emb_init : float
The intial embedding range should be [-emb_init, emb_init].
"""
nd.random.uniform(-emb_init, emb_init, nd.random.uniform(-emb_init, emb_init,
shape=self.emb.shape, dtype=self.emb.dtype, shape=self.emb.shape, dtype=self.emb.dtype,
ctx=self.emb.context, out=self.emb) ctx=self.emb.context, out=self.emb)
...@@ -41,6 +65,19 @@ class ExternalEmbedding: ...@@ -41,6 +65,19 @@ class ExternalEmbedding:
pass pass
def __call__(self, idx, gpu_id=-1, trace=True): def __call__(self, idx, gpu_id=-1, trace=True):
""" Return sliced tensor.
Parameters
----------
idx : th.tensor
Slicing index
gpu_id : int
Which gpu to put sliced data in.
trace : bool
If True, trace the computation. This is required in training.
If False, do not trace the computation.
Default: True
"""
if self.emb.context != idx.context: if self.emb.context != idx.context:
idx = idx.as_in_context(self.emb.context) idx = idx.as_in_context(self.emb.context)
data = nd.take(self.emb, idx) data = nd.take(self.emb, idx)
...@@ -52,6 +89,15 @@ class ExternalEmbedding: ...@@ -52,6 +89,15 @@ class ExternalEmbedding:
return data return data
def update(self, gpu_id=-1): def update(self, gpu_id=-1):
""" Update embeddings in a sparse manner
Sparse embeddings are updated in mini batches. we maintains gradient states for
each embedding so they can be updated separately.
Parameters
----------
gpu_id : int
Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
"""
self.state_step += 1 self.state_step += 1
for idx, data in self.trace: for idx, data in self.trace:
grad = data.grad grad = data.grad
...@@ -82,13 +128,33 @@ class ExternalEmbedding: ...@@ -82,13 +128,33 @@ class ExternalEmbedding:
self.trace = [] self.trace = []
def curr_emb(self): def curr_emb(self):
"""Return embeddings in trace.
"""
data = [data for _, data in self.trace] data = [data for _, data in self.trace]
return nd.concat(*data, dim=0) return nd.concat(*data, dim=0)
def save(self, path, name): def save(self, path, name):
"""Save embeddings.
Parameters
----------
path : str
Directory to save the embedding.
name : str
Embedding name.
"""
emb_fname = os.path.join(path, name+'.npy') emb_fname = os.path.join(path, name+'.npy')
np.save(emb_fname, self.emb.asnumpy()) np.save(emb_fname, self.emb.asnumpy())
def load(self, path, name): def load(self, path, name):
"""Load embeddings.
Parameters
----------
path : str
Directory to load the embedding.
name : str
Embedding name.
"""
emb_fname = os.path.join(path, name+'.npy') emb_fname = os.path.join(path, name+'.npy')
self.emb = nd.array(np.load(emb_fname)) self.emb = nd.array(np.load(emb_fname))
...@@ -19,6 +19,9 @@ def batched_l1_dist(a, b): ...@@ -19,6 +19,9 @@ def batched_l1_dist(a, b):
return res return res
class TransEScore(nn.Module): class TransEScore(nn.Module):
"""TransE score function
Paper link: https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data
"""
def __init__(self, gamma, dist_func='l2'): def __init__(self, gamma, dist_func='l2'):
super(TransEScore, self).__init__() super(TransEScore, self).__init__()
self.gamma = gamma self.gamma = gamma
...@@ -79,6 +82,9 @@ class TransEScore(nn.Module): ...@@ -79,6 +82,9 @@ class TransEScore(nn.Module):
return fn return fn
class TransRScore(nn.Module): class TransRScore(nn.Module):
"""TransR score function
Paper link: https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/download/9571/9523
"""
def __init__(self, gamma, projection_emb, relation_dim, entity_dim): def __init__(self, gamma, projection_emb, relation_dim, entity_dim):
super(TransRScore, self).__init__() super(TransRScore, self).__init__()
self.gamma = gamma self.gamma = gamma
...@@ -141,12 +147,27 @@ class TransRScore(nn.Module): ...@@ -141,12 +147,27 @@ class TransRScore(nn.Module):
def update(self, gpu_id=-1): def update(self, gpu_id=-1):
self.projection_emb.update(gpu_id) self.projection_emb.update(gpu_id)
def save(self, path, name): def save(self, path, name):
self.projection_emb.save(path, name+'projection') self.projection_emb.save(path, name+'projection')
def load(self, path, name): def load(self, path, name):
self.projection_emb.load(path, name+'projection') self.projection_emb.load(path, name+'projection')
def prepare_local_emb(self, projection_emb):
self.global_projection_emb = self.projection_emb
self.projection_emb = projection_emb
def writeback_local_emb(self, idx):
self.global_projection_emb.emb[idx] = self.projection_emb.emb.cpu()[idx]
def load_local_emb(self, projection_emb):
device = projection_emb.emb.device
projection_emb.emb = self.projection_emb.emb.to(device)
self.projection_emb = projection_emb
def share_memory(self):
self.projection_emb.share_memory()
def create_neg(self, neg_head): def create_neg(self, neg_head):
gamma = self.gamma gamma = self.gamma
if neg_head: if neg_head:
...@@ -167,6 +188,9 @@ class TransRScore(nn.Module): ...@@ -167,6 +188,9 @@ class TransRScore(nn.Module):
return fn return fn
class DistMultScore(nn.Module): class DistMultScore(nn.Module):
"""DistMult score function
Paper link: https://arxiv.org/abs/1412.6575
"""
def __init__(self): def __init__(self):
super(DistMultScore, self).__init__() super(DistMultScore, self).__init__()
...@@ -220,6 +244,9 @@ class DistMultScore(nn.Module): ...@@ -220,6 +244,9 @@ class DistMultScore(nn.Module):
return fn return fn
class ComplExScore(nn.Module): class ComplExScore(nn.Module):
"""ComplEx score function
Paper link: https://arxiv.org/abs/1606.06357
"""
def __init__(self): def __init__(self):
super(ComplExScore, self).__init__() super(ComplExScore, self).__init__()
...@@ -291,6 +318,9 @@ class ComplExScore(nn.Module): ...@@ -291,6 +318,9 @@ class ComplExScore(nn.Module):
return fn return fn
class RESCALScore(nn.Module): class RESCALScore(nn.Module):
"""RESCAL score function
Paper link: http://www.icml-2011.org/papers/438_icmlpaper.pdf
"""
def __init__(self, relation_dim, entity_dim): def __init__(self, relation_dim, entity_dim):
super(RESCALScore, self).__init__() super(RESCALScore, self).__init__()
self.relation_dim = relation_dim self.relation_dim = relation_dim
...@@ -354,6 +384,9 @@ class RESCALScore(nn.Module): ...@@ -354,6 +384,9 @@ class RESCALScore(nn.Module):
return fn return fn
class RotatEScore(nn.Module): class RotatEScore(nn.Module):
"""RotatE score function
Paper link: https://arxiv.org/abs/1902.10197
"""
def __init__(self, gamma, emb_init): def __init__(self, gamma, emb_init):
super(RotatEScore, self).__init__() super(RotatEScore, self).__init__()
self.gamma = gamma self.gamma = gamma
......
""" """
Knowledge Graph Embedding Models. KG Sparse embedding
1. TransE
2. DistMult
3. ComplEx
4. RotatE
5. pRotatE
6. TransH
7. TransR
8. TransD
9. RESCAL
""" """
import os import os
import numpy as np import numpy as np
...@@ -18,6 +9,12 @@ import torch.nn as nn ...@@ -18,6 +9,12 @@ import torch.nn as nn
import torch.nn.functional as functional import torch.nn.functional as functional
import torch.nn.init as INIT import torch.nn.init as INIT
import torch.multiprocessing as mp
from torch.multiprocessing import Queue
from _thread import start_new_thread
import traceback
from functools import wraps
from .. import * from .. import *
logsigmoid = functional.logsigmoid logsigmoid = functional.logsigmoid
...@@ -26,14 +23,97 @@ def get_device(args): ...@@ -26,14 +23,97 @@ def get_device(args):
return th.device('cpu') if args.gpu[0] < 0 else th.device('cuda:' + str(args.gpu[0])) return th.device('cpu') if args.gpu[0] < 0 else th.device('cuda:' + str(args.gpu[0]))
norm = lambda x, p: x.norm(p=p)**p norm = lambda x, p: x.norm(p=p)**p
get_scalar = lambda x: x.detach().item() get_scalar = lambda x: x.detach().item()
reshape = lambda arr, x, y: arr.view(x, y) reshape = lambda arr, x, y: arr.view(x, y)
cuda = lambda arr, gpu: arr.cuda(gpu) cuda = lambda arr, gpu: arr.cuda(gpu)
def thread_wrapped_func(func):
"""Wrapped func for torch.multiprocessing.Process.
With this wrapper we can use OMP threads in subprocesses
otherwise, OMP_NUM_THREADS=1 is mandatory.
How to use:
@thread_wrapped_func
def func_to_wrap(args ...):
"""
@wraps(func)
def decorated_function(*args, **kwargs):
queue = Queue()
def _queue_result():
exception, trace, res = None, None, None
try:
res = func(*args, **kwargs)
except Exception as e:
exception = e
trace = traceback.format_exc()
queue.put((res, exception, trace))
start_new_thread(_queue_result, ())
result, exception, trace = queue.get()
if exception is None:
return result
else:
assert isinstance(exception, Exception)
raise exception.__class__(trace)
return decorated_function
@thread_wrapped_func
def async_update(args, emb, queue):
"""Asynchronous embedding update for entity embeddings.
How it works:
1. trainer process push entity embedding update requests into the queue.
2. async_update process pull requests from the queue, calculate
the gradient state and gradient and write it into entity embeddings.
Parameters
----------
args :
Global confis.
emb : ExternalEmbedding
The entity embeddings.
queue:
The request queue.
"""
th.set_num_threads(args.num_thread)
while True:
(grad_indices, grad_values, gpu_id) = queue.get()
clr = emb.args.lr
if grad_indices is None:
return
with th.no_grad():
grad_sum = (grad_values * grad_values).mean(1)
device = emb.state_sum.device
if device != grad_indices.device:
grad_indices = grad_indices.to(device)
if device != grad_sum.device:
grad_sum = grad_sum.to(device)
emb.state_sum.index_add_(0, grad_indices, grad_sum)
std = emb.state_sum[grad_indices] # _sparse_mask
if gpu_id >= 0:
std = std.cuda(gpu_id)
std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
tmp = (-clr * grad_values / std_values)
if tmp.device != device:
tmp = tmp.to(device)
emb.emb.index_add_(0, grad_indices, tmp)
class ExternalEmbedding: class ExternalEmbedding:
"""Sparse Embedding for Knowledge Graph
It is used to store both entity embeddings and relation embeddings.
Parameters
----------
args :
Global configs.
num : int
Number of embeddings.
dim : int
Embedding dimention size.
device : th.device
Device to store the embedding.
"""
def __init__(self, args, num, dim, device): def __init__(self, args, num, dim, device):
self.gpu = args.gpu self.gpu = args.gpu
self.args = args self.args = args
...@@ -42,16 +122,42 @@ class ExternalEmbedding: ...@@ -42,16 +122,42 @@ class ExternalEmbedding:
self.emb = th.empty(num, dim, dtype=th.float32, device=device) self.emb = th.empty(num, dim, dtype=th.float32, device=device)
self.state_sum = self.emb.new().resize_(self.emb.size(0)).zero_() self.state_sum = self.emb.new().resize_(self.emb.size(0)).zero_()
self.state_step = 0 self.state_step = 0
# queue used by asynchronous update
self.async_q = None
# asynchronous update process
self.async_p = None
def init(self, emb_init): def init(self, emb_init):
"""Initializing the embeddings.
Parameters
----------
emb_init : float
The intial embedding range should be [-emb_init, emb_init].
"""
INIT.uniform_(self.emb, -emb_init, emb_init) INIT.uniform_(self.emb, -emb_init, emb_init)
INIT.zeros_(self.state_sum) INIT.zeros_(self.state_sum)
def share_memory(self): def share_memory(self):
"""Use torch.tensor.share_memory_() to allow cross process tensor access
"""
self.emb.share_memory_() self.emb.share_memory_()
self.state_sum.share_memory_() self.state_sum.share_memory_()
def __call__(self, idx, gpu_id=-1, trace=True): def __call__(self, idx, gpu_id=-1, trace=True):
""" Return sliced tensor.
Parameters
----------
idx : th.tensor
Slicing index
gpu_id : int
Which gpu to put sliced data in.
trace : bool
If True, trace the computation. This is required in training.
If False, do not trace the computation.
Default: True
"""
s = self.emb[idx] s = self.emb[idx]
if gpu_id >= 0: if gpu_id >= 0:
s = s.cuda(gpu_id) s = s.cuda(gpu_id)
...@@ -65,6 +171,15 @@ class ExternalEmbedding: ...@@ -65,6 +171,15 @@ class ExternalEmbedding:
return data return data
def update(self, gpu_id=-1): def update(self, gpu_id=-1):
""" Update embeddings in a sparse manner
Sparse embeddings are updated in mini batches. we maintains gradient states for
each embedding so they can be updated separately.
Parameters
----------
gpu_id : int
Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
"""
self.state_step += 1 self.state_step += 1
with th.no_grad(): with th.no_grad():
for idx, data in self.trace: for idx, data in self.trace:
...@@ -76,33 +191,70 @@ class ExternalEmbedding: ...@@ -76,33 +191,70 @@ class ExternalEmbedding:
# the update is non-linear so indices must be unique # the update is non-linear so indices must be unique
grad_indices = idx grad_indices = idx
grad_values = grad grad_values = grad
if self.async_q is not None:
grad_sum = (grad_values * grad_values).mean(1) grad_indices.share_memory_()
device = self.state_sum.device grad_values.share_memory_()
if device != grad_indices.device: self.async_q.put((grad_indices, grad_values, gpu_id))
grad_indices = grad_indices.to(device) else:
if device != grad_sum.device: grad_sum = (grad_values * grad_values).mean(1)
grad_sum = grad_sum.to(device) device = self.state_sum.device
self.state_sum.index_add_(0, grad_indices, grad_sum) if device != grad_indices.device:
std = self.state_sum[grad_indices] # _sparse_mask grad_indices = grad_indices.to(device)
if gpu_id >= 0: if device != grad_sum.device:
std = std.cuda(gpu_id) grad_sum = grad_sum.to(device)
std_values = std.sqrt_().add_(1e-10).unsqueeze(1) self.state_sum.index_add_(0, grad_indices, grad_sum)
tmp = (-clr * grad_values / std_values) std = self.state_sum[grad_indices] # _sparse_mask
if tmp.device != device: if gpu_id >= 0:
tmp = tmp.to(device) std = std.cuda(gpu_id)
# TODO(zhengda) the overhead is here. std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
self.emb.index_add_(0, grad_indices, tmp) tmp = (-clr * grad_values / std_values)
if tmp.device != device:
tmp = tmp.to(device)
# TODO(zhengda) the overhead is here.
self.emb.index_add_(0, grad_indices, tmp)
self.trace = [] self.trace = []
def create_async_update(self):
"""Set up the async update subprocess.
"""
self.async_q = Queue(1)
self.async_p = mp.Process(target=async_update, args=(self.args, self, self.async_q))
self.async_p.start()
def finish_async_update(self):
"""Notify the async update subprocess to quit.
"""
self.async_q.put((None, None, None))
self.async_p.join()
def curr_emb(self): def curr_emb(self):
"""Return embeddings in trace.
"""
data = [data for _, data in self.trace] data = [data for _, data in self.trace]
return th.cat(data, 0) return th.cat(data, 0)
def save(self, path, name): def save(self, path, name):
"""Save embeddings.
Parameters
----------
path : str
Directory to save the embedding.
name : str
Embedding name.
"""
file_name = os.path.join(path, name+'.npy') file_name = os.path.join(path, name+'.npy')
np.save(file_name, self.emb.cpu().detach().numpy()) np.save(file_name, self.emb.cpu().detach().numpy())
def load(self, path, name): def load(self, path, name):
"""Load embeddings.
Parameters
----------
path : str
Directory to load the embedding.
name : str
Embedding name.
"""
file_name = os.path.join(path, name+'.npy') file_name = os.path.join(path, name+'.npy')
self.emb = th.Tensor(np.load(file_name)) self.emb = th.Tensor(np.load(file_name))
...@@ -144,7 +144,13 @@ def check_score_func(func_name): ...@@ -144,7 +144,13 @@ def check_score_func(func_name):
return_false_neg=False) return_false_neg=False)
for pos_g, neg_g in sampler: for pos_g, neg_g in sampler:
neg_g = create_neg_subgraph(pos_g, neg_g, neg_sample_size, True, True, g.number_of_nodes()) neg_g = create_neg_subgraph(pos_g,
neg_g,
neg_sample_size,
neg_sample_size,
True,
True,
g.number_of_nodes())
pos_g.copy_from_parent() pos_g.copy_from_parent()
neg_g.copy_from_parent() neg_g.copy_from_parent()
score1 = F.reshape(model.predict_score(neg_g), (batch_size, -1)) score1 = F.reshape(model.predict_score(neg_g), (batch_size, -1))
......
...@@ -15,8 +15,8 @@ if backend.lower() == 'mxnet': ...@@ -15,8 +15,8 @@ if backend.lower() == 'mxnet':
else: else:
import torch.multiprocessing as mp import torch.multiprocessing as mp
from train_pytorch import load_model from train_pytorch import load_model
from train_pytorch import train from train_pytorch import train, train_mp
from train_pytorch import test from train_pytorch import test, test_mp
class ArgParser(argparse.ArgumentParser): class ArgParser(argparse.ArgumentParser):
def __init__(self): def __init__(self):
...@@ -98,7 +98,7 @@ class ArgParser(argparse.ArgumentParser): ...@@ -98,7 +98,7 @@ class ArgParser(argparse.ArgumentParser):
help='set value > 0.0 if regularization is used') help='set value > 0.0 if regularization is used')
self.add_argument('-rn', '--regularization_norm', type=int, default=3, self.add_argument('-rn', '--regularization_norm', type=int, default=3,
help='norm used in regularization') help='norm used in regularization')
self.add_argument('--num_worker', type=int, default=16, self.add_argument('--num_worker', type=int, default=32,
help='number of workers used for loading data') help='number of workers used for loading data')
self.add_argument('--non_uni_weight', action='store_true', self.add_argument('--non_uni_weight', action='store_true',
help='if use uniform weight when computing loss') help='if use uniform weight when computing loss')
...@@ -112,6 +112,12 @@ class ArgParser(argparse.ArgumentParser): ...@@ -112,6 +112,12 @@ class ArgParser(argparse.ArgumentParser):
help='number of process used') help='number of process used')
self.add_argument('--rel_part', action='store_true', self.add_argument('--rel_part', action='store_true',
help='enable relation partitioning') help='enable relation partitioning')
self.add_argument('--nomp_thread_per_process', type=int, default=-1,
help='num of omp threads used per process in multi-process training')
self.add_argument('--async_update', action='store_true',
help='allow async_update on node embedding')
self.add_argument('--force_sync_interval', type=int, default=-1,
help='We force a synchronization between processes every x steps')
def get_logger(args): def get_logger(args):
...@@ -162,50 +168,70 @@ def run(args, logger): ...@@ -162,50 +168,70 @@ def run(args, logger):
if args.neg_chunk_size_test < 0: if args.neg_chunk_size_test < 0:
args.neg_chunk_size_test = args.neg_sample_size_test args.neg_chunk_size_test = args.neg_sample_size_test
num_workers = args.num_worker
train_data = TrainDataset(dataset, args, ranks=args.num_proc) train_data = TrainDataset(dataset, args, ranks=args.num_proc)
args.strict_rel_part = args.mix_cpu_gpu and (train_data.cross_part == False)
# Automatically set number of OMP threads for each process if it is not provided
# The value for GPU is evaluated in AWS p3.16xlarge
# The value for CPU is evaluated in AWS x1.32xlarge
if args.nomp_thread_per_process == -1:
if len(args.gpu) > 0:
# GPU training
args.num_thread = 4
else:
# CPU training
args.num_thread = mp.cpu_count() // args.num_proc + 1
else:
args.num_thread = args.nomp_thread_per_process
if args.num_proc > 1: if args.num_proc > 1:
train_samplers = [] train_samplers = []
for i in range(args.num_proc): for i in range(args.num_proc):
train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size, train_sampler_head = train_data.create_sampler(args.batch_size,
args.neg_sample_size,
args.neg_chunk_size, args.neg_chunk_size,
mode='chunk-head', mode='head',
num_workers=args.num_worker, num_workers=num_workers,
shuffle=True, shuffle=True,
exclude_positive=True, exclude_positive=False,
rank=i) rank=i)
train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size, train_sampler_tail = train_data.create_sampler(args.batch_size,
args.neg_sample_size,
args.neg_chunk_size, args.neg_chunk_size,
mode='chunk-tail', mode='tail',
num_workers=args.num_worker, num_workers=num_workers,
shuffle=True, shuffle=True,
exclude_positive=True, exclude_positive=False,
rank=i) rank=i)
train_samplers.append(NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, train_samplers.append(NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
args.neg_chunk_size, args.neg_chunk_size, args.neg_sample_size,
True, n_entities)) True, n_entities))
else: else:
train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size, train_sampler_head = train_data.create_sampler(args.batch_size,
args.neg_sample_size,
args.neg_chunk_size, args.neg_chunk_size,
mode='chunk-head', mode='head',
num_workers=args.num_worker, num_workers=num_workers,
shuffle=True, shuffle=True,
exclude_positive=True) exclude_positive=False)
train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size, train_sampler_tail = train_data.create_sampler(args.batch_size,
args.neg_sample_size,
args.neg_chunk_size, args.neg_chunk_size,
mode='chunk-tail', mode='tail',
num_workers=args.num_worker, num_workers=num_workers,
shuffle=True, shuffle=True,
exclude_positive=True) exclude_positive=False)
train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
args.neg_chunk_size, args.neg_chunk_size, args.neg_sample_size,
True, n_entities) True, n_entities)
# for multiprocessing evaluation, we don't need to sample multiple batches at a time # for multiprocessing evaluation, we don't need to sample multiple batches at a time
# in each process. # in each process.
num_workers = args.num_worker
if args.num_proc > 1: if args.num_proc > 1:
num_workers = 1 num_workers = 1
if args.valid or args.test: if args.valid or args.test:
args.num_test_proc = args.num_proc if args.num_proc < len(args.gpu) else len(args.gpu)
eval_dataset = EvalDataset(dataset, args) eval_dataset = EvalDataset(dataset, args)
if args.valid: if args.valid:
# Here we want to use the regualr negative sampler because we need to ensure that # Here we want to use the regualr negative sampler because we need to ensure that
...@@ -248,24 +274,25 @@ def run(args, logger): ...@@ -248,24 +274,25 @@ def run(args, logger):
if args.test: if args.test:
# Here we want to use the regualr negative sampler because we need to ensure that # Here we want to use the regualr negative sampler because we need to ensure that
# all positive edges are excluded. # all positive edges are excluded.
if args.num_proc > 1: # We use a maximum of num_gpu in test stage to save GPU memory.
if args.num_test_proc > 1:
test_sampler_tails = [] test_sampler_tails = []
test_sampler_heads = [] test_sampler_heads = []
for i in range(args.num_proc): for i in range(args.num_test_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval, test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test, args.neg_sample_size_test,
args.neg_chunk_size_test, args.neg_chunk_size_test,
args.eval_filter, args.eval_filter,
mode='chunk-head', mode='chunk-head',
num_workers=num_workers, num_workers=num_workers,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_test_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval, test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test, args.neg_sample_size_test,
args.neg_chunk_size_test, args.neg_chunk_size_test,
args.eval_filter, args.eval_filter,
mode='chunk-tail', mode='chunk-tail',
num_workers=num_workers, num_workers=num_workers,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_test_proc)
test_sampler_heads.append(test_sampler_head) test_sampler_heads.append(test_sampler_head)
test_sampler_tails.append(test_sampler_tail) test_sampler_tails.append(test_sampler_tail)
else: else:
...@@ -290,24 +317,31 @@ def run(args, logger): ...@@ -290,24 +317,31 @@ def run(args, logger):
# load model # load model
model = load_model(logger, args, n_entities, n_relations) model = load_model(logger, args, n_entities, n_relations)
if args.num_proc > 1: if args.num_proc > 1 or args.async_update:
model.share_memory() model.share_memory()
# train # train
start = time.time() start = time.time()
rel_parts = train_data.rel_parts if args.strict_rel_part else None
if args.num_proc > 1: if args.num_proc > 1:
procs = [] procs = []
barrier = mp.Barrier(args.num_proc)
for i in range(args.num_proc): for i in range(args.num_proc):
rel_parts = train_data.rel_parts if args.rel_part else None valid_sampler = [valid_sampler_heads[i], valid_sampler_tails[i]] if args.valid else None
valid_samplers = [valid_sampler_heads[i], valid_sampler_tails[i]] if args.valid else None proc = mp.Process(target=train_mp, args=(args,
proc = mp.Process(target=train, args=(args, model, train_samplers[i], i, rel_parts, valid_samplers)) model,
train_samplers[i],
valid_sampler,
i,
rel_parts,
barrier))
procs.append(proc) procs.append(proc)
proc.start() proc.start()
for proc in procs: for proc in procs:
proc.join() proc.join()
else: else:
valid_samplers = [valid_sampler_head, valid_sampler_tail] if args.valid else None valid_samplers = [valid_sampler_head, valid_sampler_tail] if args.valid else None
train(args, model, train_sampler, valid_samplers) train(args, model, train_sampler, valid_samplers, rel_parts=rel_parts)
print('training takes {} seconds'.format(time.time() - start)) print('training takes {} seconds'.format(time.time() - start))
if args.save_emb is not None: if args.save_emb is not None:
...@@ -318,23 +352,28 @@ def run(args, logger): ...@@ -318,23 +352,28 @@ def run(args, logger):
# test # test
if args.test: if args.test:
start = time.time() start = time.time()
if args.num_proc > 1: if args.num_test_proc > 1:
queue = mp.Queue(args.num_proc) queue = mp.Queue(args.num_test_proc)
procs = [] procs = []
for i in range(args.num_proc): for i in range(args.num_test_proc):
proc = mp.Process(target=test, args=(args, model, [test_sampler_heads[i], test_sampler_tails[i]], proc = mp.Process(target=test_mp, args=(args,
i, 'Test', queue)) model,
[test_sampler_heads[i], test_sampler_tails[i]],
i,
'Test',
queue))
procs.append(proc) procs.append(proc)
proc.start() proc.start()
total_metrics = {} total_metrics = {}
for i in range(args.num_proc): metrics = {}
metrics = queue.get() logs = []
for k, v in metrics.items(): for i in range(args.num_test_proc):
if i == 0: log = queue.get()
total_metrics[k] = v / args.num_proc logs = logs + log
else:
total_metrics[k] += v / args.num_proc for metric in logs[0].keys():
metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
for k, v in metrics.items(): for k, v in metrics.items():
print('Test average {} at [{}/{}]: {}'.format(k, args.step, args.max_step, v)) print('Test average {} at [{}/{}]: {}'.format(k, args.step, args.max_step, v))
......
...@@ -24,8 +24,8 @@ def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path) ...@@ -24,8 +24,8 @@ def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path)
model.load_emb(ckpt_path, args.dataset) model.load_emb(ckpt_path, args.dataset)
return model return model
def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=None): def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, barrier=None):
assert args.num_proc == 1, "MXNet KGE does not support multi-process now" assert args.num_proc <= 1, "MXNet KGE does not support multi-process now"
assert args.rel_part == False, "No need for relation partition in single process for MXNet KGE" assert args.rel_part == False, "No need for relation partition in single process for MXNet KGE"
logs = [] logs = []
...@@ -37,6 +37,9 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non ...@@ -37,6 +37,9 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non
else: else:
gpu_id = -1 gpu_id = -1
if args.strict_rel_part:
model.prepare_relation(mx.gpu(gpu_id))
start = time.time() start = time.time()
for step in range(args.init_step, args.max_step): for step in range(args.init_step, args.max_step):
pos_g, neg_g = next(train_sampler) pos_g, neg_g = next(train_sampler)
...@@ -59,11 +62,14 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non ...@@ -59,11 +62,14 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non
start = time.time() start = time.time()
test(args, model, valid_samplers, mode='Valid') test(args, model, valid_samplers, mode='Valid')
print('test:', time.time() - start) print('test:', time.time() - start)
if args.strict_rel_part:
model.writeback_relation(rank, rel_parts)
# clear cache # clear cache
logs = [] logs = []
def test(args, model, test_samplers, rank=0, mode='Test', queue=None): def test(args, model, test_samplers, rank=0, mode='Test', queue=None):
assert args.num_proc == 1, "MXNet KGE does not support multi-process now" assert args.num_proc <= 1, "MXNet KGE does not support multi-process now"
logs = [] logs = []
if len(args.gpu) > 0: if len(args.gpu) > 0:
...@@ -71,6 +77,9 @@ def test(args, model, test_samplers, rank=0, mode='Test', queue=None): ...@@ -71,6 +77,9 @@ def test(args, model, test_samplers, rank=0, mode='Test', queue=None):
else: else:
gpu_id = -1 gpu_id = -1
if args.strict_rel_part:
model.load_relation(mx.gpu(gpu_id))
for sampler in test_samplers: for sampler in test_samplers:
#print('Number of tests: ' + len(sampler)) #print('Number of tests: ' + len(sampler))
count = 0 count = 0
......
...@@ -3,42 +3,18 @@ from models import KEModel ...@@ -3,42 +3,18 @@ from models import KEModel
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import torch.optim as optim import torch.optim as optim
import torch as th import torch as th
import torch.multiprocessing as mp
from torch.multiprocessing import Queue
from _thread import start_new_thread
from distutils.version import LooseVersion from distutils.version import LooseVersion
TH_VERSION = LooseVersion(th.__version__) TH_VERSION = LooseVersion(th.__version__)
if TH_VERSION.version[0] == 1 and TH_VERSION.version[1] < 2: if TH_VERSION.version[0] == 1 and TH_VERSION.version[1] < 2:
raise Exception("DGL-ke has to work with Pytorch version >= 1.2") raise Exception("DGL-ke has to work with Pytorch version >= 1.2")
from models.pytorch.tensor_models import thread_wrapped_func
import os import os
import logging import logging
import time import time
from functools import wraps from functools import wraps
def thread_wrapped_func(func):
@wraps(func)
def decorated_function(*args, **kwargs):
queue = Queue()
def _queue_result():
exception, trace, res = None, None, None
try:
res = func(*args, **kwargs)
except Exception as e:
exception = e
trace = traceback.format_exc()
queue.put((res, exception, trace))
start_new_thread(_queue_result, ())
result, exception, trace = queue.get()
if exception is None:
return result
else:
assert isinstance(exception, Exception)
raise exception.__class__(trace)
return decorated_function
def load_model(logger, args, n_entities, n_relations, ckpt=None): def load_model(logger, args, n_entities, n_relations, ckpt=None):
model = KEModel(args, args.model_name, n_entities, n_relations, model = KEModel(args, args.model_name, n_entities, n_relations,
args.hidden_dim, args.gamma, args.hidden_dim, args.gamma,
...@@ -53,10 +29,7 @@ def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path) ...@@ -53,10 +29,7 @@ def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path)
model.load_emb(ckpt_path, args.dataset) model.load_emb(ckpt_path, args.dataset)
return model return model
@thread_wrapped_func def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, barrier=None):
def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=None):
if args.num_proc > 1:
th.set_num_threads(4)
logs = [] logs = []
for arg in vars(args): for arg in vars(args):
logging.info('{:20}:{}'.format(arg, getattr(args, arg))) logging.info('{:20}:{}'.format(arg, getattr(args, arg)))
...@@ -66,6 +39,11 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non ...@@ -66,6 +39,11 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non
else: else:
gpu_id = -1 gpu_id = -1
if args.async_update:
model.create_async_update()
if args.strict_rel_part:
model.prepare_relation(th.device('cuda:' + str(gpu_id)))
start = time.time() start = time.time()
sample_time = 0 sample_time = 0
update_time = 0 update_time = 0
...@@ -90,52 +68,78 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non ...@@ -90,52 +68,78 @@ def train(args, model, train_sampler, rank=0, rel_parts=None, valid_samplers=Non
update_time += time.time() - start1 update_time += time.time() - start1
logs.append(log) logs.append(log)
if step % args.log_interval == 0: # force synchronize embedding across processes every X steps
if args.force_sync_interval > 0 and \
(step + 1) % args.force_sync_interval == 0:
barrier.wait()
if (step + 1) % args.log_interval == 0:
for k in logs[0].keys(): for k in logs[0].keys():
v = sum(l[k] for l in logs) / len(logs) v = sum(l[k] for l in logs) / len(logs)
print('[Train]({}/{}) average {}: {}'.format(step, args.max_step, k, v)) print('[{}][Train]({}/{}) average {}: {}'.format(rank, (step + 1), args.max_step, k, v))
logs = [] logs = []
print('[Train] {} steps take {:.3f} seconds'.format(args.log_interval, print('[{}][Train] {} steps take {:.3f} seconds'.format(rank, args.log_interval,
time.time() - start)) time.time() - start))
print('sample: {:.3f}, forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format( print('[{}]sample: {:.3f}, forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format(
sample_time, forward_time, backward_time, update_time)) rank, sample_time, forward_time, backward_time, update_time))
sample_time = 0 sample_time = 0
update_time = 0 update_time = 0
forward_time = 0 forward_time = 0
backward_time = 0 backward_time = 0
start = time.time() start = time.time()
if args.valid and step % args.eval_interval == 0 and step > 1 and valid_samplers is not None: if args.valid and (step + 1) % args.eval_interval == 0 and step > 1 and valid_samplers is not None:
start = time.time() valid_start = time.time()
test(args, model, valid_samplers, mode='Valid') if args.strict_rel_part:
print('test:', time.time() - start) model.writeback_relation(rank, rel_parts)
# forced sync for validation
if barrier is not None:
barrier.wait()
test(args, model, valid_samplers, rank, mode='Valid')
print('test:', time.time() - valid_start)
if barrier is not None:
barrier.wait()
print('train {} takes {:.3f} seconds'.format(rank, time.time() - start))
if args.async_update:
model.finish_async_update()
if args.strict_rel_part:
model.writeback_relation(rank, rel_parts)
@thread_wrapped_func
def test(args, model, test_samplers, rank=0, mode='Test', queue=None): def test(args, model, test_samplers, rank=0, mode='Test', queue=None):
if args.num_proc > 1:
th.set_num_threads(4)
if len(args.gpu) > 0: if len(args.gpu) > 0:
gpu_id = args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0] gpu_id = args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0]
else: else:
gpu_id = -1 gpu_id = -1
if args.strict_rel_part:
model.load_relation(th.device('cuda:' + str(gpu_id)))
with th.no_grad(): with th.no_grad():
logs = [] logs = []
for sampler in test_samplers: for sampler in test_samplers:
count = 0 count = 0
for pos_g, neg_g in sampler: for pos_g, neg_g in sampler:
with th.no_grad(): model.forward_test(pos_g, neg_g, logs, gpu_id)
model.forward_test(pos_g, neg_g, logs, gpu_id)
metrics = {} metrics = {}
if len(logs) > 0: if len(logs) > 0:
for metric in logs[0].keys(): for metric in logs[0].keys():
metrics[metric] = sum([log[metric] for log in logs]) / len(logs) metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
if queue is not None: if queue is not None:
queue.put(metrics) queue.put(logs)
else: else:
for k, v in metrics.items(): for k, v in metrics.items():
print('{} average {} at [{}/{}]: {}'.format(mode, k, args.step, args.max_step, v)) print('[{}]{} average {} at [{}/{}]: {}'.format(rank, mode, k, args.step, args.max_step, v))
test_samplers[0] = test_samplers[0].reset() test_samplers[0] = test_samplers[0].reset()
test_samplers[1] = test_samplers[1].reset() test_samplers[1] = test_samplers[1].reset()
@thread_wrapped_func
def train_mp(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, barrier=None):
if args.num_proc > 1:
th.set_num_threads(args.num_thread)
train(args, model, train_sampler, valid_samplers, rank, rel_parts, barrier)
@thread_wrapped_func
def test_mp(args, model, test_samplers, rank=0, mode='Test', queue=None):
test(args, model, test_samplers, rank, mode, queue)
...@@ -1227,13 +1227,7 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1227,13 +1227,7 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
neg_subg.graph = GraphPtr(new ImmutableGraph(neg_coo)); neg_subg.graph = GraphPtr(new ImmutableGraph(neg_coo));
neg_subg.induced_vertices = induced_neg_vid; neg_subg.induced_vertices = induced_neg_vid;
neg_subg.induced_edges = induced_neg_eid; neg_subg.induced_edges = induced_neg_eid;
// If we didn't sample all nodes to form negative edges, some of the nodes
// in the vector might be redundant.
if (neg_sample_size < num_tot_nodes) {
std::sort(neg_vids.begin(), neg_vids.end());
auto it = std::unique(neg_vids.begin(), neg_vids.end());
neg_vids.resize(it - neg_vids.begin());
}
if (IsNegativeHeadMode(neg_mode)) { if (IsNegativeHeadMode(neg_mode)) {
neg_subg.head_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map)); neg_subg.head_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map));
neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids); neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids);
......
...@@ -228,12 +228,10 @@ def check_head_tail(g): ...@@ -228,12 +228,10 @@ def check_head_tail(g):
lsrc = np.unique(F.asnumpy(lsrc)) lsrc = np.unique(F.asnumpy(lsrc))
head_nid = np.unique(F.asnumpy(g.head_nid)) head_nid = np.unique(F.asnumpy(g.head_nid))
assert len(head_nid) == len(g.head_nid)
np.testing.assert_equal(lsrc, head_nid) np.testing.assert_equal(lsrc, head_nid)
ldst = np.unique(F.asnumpy(ldst)) ldst = np.unique(F.asnumpy(ldst))
tail_nid = np.unique(F.asnumpy(g.tail_nid)) tail_nid = np.unique(F.asnumpy(g.tail_nid))
assert len(tail_nid) == len(g.tail_nid)
np.testing.assert_equal(tail_nid, ldst) np.testing.assert_equal(tail_nid, ldst)
......
...@@ -60,12 +60,45 @@ elif [ "$2" == "gpu" ]; then ...@@ -60,12 +60,45 @@ elif [ "$2" == "gpu" ]; then
python3 train.py --model DistMult --dataset FB15k --batch_size 128 \ python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
--neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \ --neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --mix_cpu_gpu --eval_percent 0.01 \ --batch_size_eval 16 --gpu 0 --valid --test -adv --mix_cpu_gpu --eval_percent 0.01 \
--save_emb DistMult_FB15k_emb --data_path /data/kg || fail "run mix CPU/GPU DistMult" --save_emb DistMult_FB15k_emb --data_path /data/kg || fail "run mix with async CPU/GPU DistMult"
# verify saving training result # verify saving training result
python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 100 \ python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 100 \
--gamma 500.0 --batch_size 16 --gpu 0 --model_path DistMult_FB15k_emb/ \ --gamma 500.0 --batch_size 16 --gpu 0 --model_path DistMult_FB15k_emb/ \
--eval_percent 0.01 --data_path /data/kg || fail "eval DistMult on $2" --eval_percent 0.01 --data_path /data/kg || fail "eval DistMult on $2"
if [ "$1" == "pytorch" ]; then
# verify mixed CPU GPU training with async_update
python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
--neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
--batch_size_eval 16 --gpu 0 --valid --test -adv --mix_cpu_gpu --eval_percent 0.01 \
--async_update --data_path /data/kg || fail "run mix CPU/GPU DistMult"
# verify mixed CPU GPU training with random partition
python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
--neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
--batch_size_eval 16 --num_proc 2 --gpu 0 --valid --test -adv --mix_cpu_gpu \
--eval_percent 0.01 --async_update --force_sync_interval 100 \
--data_path /data/kg || fail "run multiprocess async CPU/GPU DistMult"
# verify mixed CPU GPU training with random partition async_update
python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
--neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
--batch_size_eval 16 --num_proc 2 --gpu 0 --valid --test -adv --mix_cpu_gpu \
--eval_percent 0.01 --rel_part --async_update --force_sync_interval 100 \
--data_path /data/kg || fail "run multiprocess async CPU/GPU DistMult"
# multi process training TransR
python3 train.py --model TransR --dataset FB15k --batch_size 128 \
--neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
--batch_size_eval 16 --num_proc 2 --gpu 0 --valid --test -adv --eval_interval 30 \
--eval_percent 0.01 --data_path /data/kg --mix_cpu_gpu --rel_part --async_update \
--save_emb TransR_FB15k_emb || fail "run multiprocess TransR on $2"
python3 eval.py --model_name TransR --dataset FB15k --hidden_dim 100 \
--gamma 500.0 --batch_size 16 --num_proc 2 --gpu 0 --model_path TransR_FB15k_emb/ \
--eval_percent 0.01 --mix_cpu_gpu --data_path /data/kg || fail "eval multiprocess TransR on $2"
fi
fi fi
popd > /dev/null popd > /dev/null
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment