Unverified Commit 7ee72b66 authored by Hao Xiong's avatar Hao Xiong Committed by GitHub
Browse files

[Example] Experimental results on ogbl-citation & ogbl-ppa (#1711)



* ogb-deepwalk

* update readme

* update readme

* update readme

* update readme

* ogbl-ddi

* readme

* citation&ppa
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent f9fd3107
...@@ -5,13 +5,12 @@ python3 load_dataset.py --name ogbl-collab ...@@ -5,13 +5,12 @@ python3 load_dataset.py --name ogbl-collab
``` ```
## Evaluation ## Evaluation
For evaluatation we follow the code provided by ogb [here](https://github.com/snap-stanford/ogb/blob/master/examples/linkproppred/collab/mlp.py). For evaluatation we follow the code mlp.py provided by ogb [here](https://github.com/snap-stanford/ogb/blob/master/examples/linkproppred/collab/mlp.py).
## Used config ## Used config
ogbl-collab ogbl-collab
``` ```
python3 deepwalk.py --data_file ogbl-collab-net.txt --save_in_pt --output_emb_file embedding.pt --num_walks 50 --window_size 20 --walk_length 40 --lr 0.1 --negative 1 --neg_weight 1 --lap_norm 0.005 --mix --adam --gpus 0 1 2 3 --num_threads 4 --print_interval 2000 --print_loss --batch_size 32 python3 deepwalk.py --data_file ogbl-collab-net.txt --save_in_pt --output_emb_file embedding.pt --num_walks 50 --window_size 20 --walk_length 40 --lr 0.1 --negative 1 --neg_weight 1 --lap_norm 0.005 --mix --adam --gpus 0 1 2 3 --num_threads 4 --print_interval 2000 --print_loss --batch_size 32
cd ./ogb/blob/master/examples/linkproppred/collab/ cd ./ogb/blob/master/examples/linkproppred/collab/
cp embedding_pt_file_path ./ cp embedding_pt_file_path ./
python3 mlp.py --device 0 --runs 10 --use_node_embedding python3 mlp.py --device 0 --runs 10 --use_node_embedding
...@@ -25,6 +24,19 @@ cp embedding_pt_file_path ./ ...@@ -25,6 +24,19 @@ cp embedding_pt_file_path ./
python3 mlp.py --device 0 --runs 5 python3 mlp.py --device 0 --runs 5
``` ```
ogbl-ppa
```
python3 deepwalk.py --data_file ogbl-ppa-net.txt --save_in_pt --output_emb_file ppa-embedding.pt --negative 1 --neg_weight 1 --batch_size 64 --print_interval 2000 --print_loss --window_size 2 --num_walks 30 --walk_length 80 --lr 0.1 --lap_norm 0.02 --adam --mix --gpus 0 1 --use_context_weight --num_threads 4
cp embedding_pt_file_path ./
python3 mlp.py --device 2 --runs 10
```
ogbl-citation
```
python3 deepwalk.py --data_file ogbl-citation-net.txt --save_in_pt --output_emb_file embedding.pt --window_size 2 --num_walks 10 --negative 1 --neg_weight 1 --walk_length 80 --batch_size 128 --print_loss --print_interval 1000 --mix --adam --gpus 0 1 2 3 --use_context_weight --num_threads 4 --lap_norm 0.05 --lr 0.1
cp embedding_pt_file_path ./
python3 mlp.py --device 2 --runs 5 --use_node_embedding
```
## Score ## Score
ogbl-collab ogbl-collab
...@@ -61,3 +73,27 @@ ogbl-collab ...@@ -61,3 +73,27 @@ ogbl-collab
<br>&emsp;&emsp;Final Train: 52.28 ± 1.21 <br>&emsp;&emsp;Final Train: 52.28 ± 1.21
<br>&emsp;&emsp;Final Test: 29.13 ± 3.46 <br>&emsp;&emsp;Final Test: 29.13 ± 3.46
<br>ogbl-ppa
<br>Hits@10
<br>&emsp;Highest Train: 3.58 ± 0.90
<br>&emsp;Highest Valid: 2.88 ± 0.76
<br>&emsp;&emsp;Final Train: 3.58 ± 0.90
<br>&emsp;&emsp;Final Test: 1.45 ± 0.65
<br>&emsp;Hits@50
<br>&emsp;Highest Train: 18.21 ± 2.29
<br>&emsp;Highest Valid: 15.75 ± 2.10
<br>&emsp;&emsp;Final Train: 18.21 ± 2.29
<br>&emsp;&emsp;Final Test: 11.70 ± 0.97
<br>&emsp;Hits@100
<br>&emsp;Highest Train: 31.16 ± 2.23
<br>&emsp;Highest Valid: 27.52 ± 2.07
<br>&emsp;&emsp;Final Train: 31.16 ± 2.23
<br>&emsp;&emsp;Final Test: 23.02 ± 1.63
<br>ogbl-citation
<br>MRR
<br>&emsp;Highest Train: 0.8796 ± 0.0007
<br>&emsp;Highest Valid: 0.8141 ± 0.0007
<br>&emsp;&emsp;Final Train: 0.8793 ± 0.0008
<br>&emsp;&emsp;Final Test: 0.8159 ± 0.0006
...@@ -4,8 +4,10 @@ from ogb.linkproppred import PygLinkPropPredDataset ...@@ -4,8 +4,10 @@ from ogb.linkproppred import PygLinkPropPredDataset
import argparse import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--name', type=str, choices=['ogbl-collab', 'ogbl-ddi', 'ogbl-ppa'], default='ogbl-collab', parser.add_argument('--name', type=str,
help="name of datasets by ogb") choices=['ogbl-collab', 'ogbl-ddi', 'ogbl-ppa', 'ogbl-citation'],
default='ogbl-collab',
help="name of datasets by ogb")
args = parser.parse_args() args = parser.parse_args()
name = args.name name = args.name
...@@ -19,7 +21,6 @@ try: ...@@ -19,7 +21,6 @@ try:
except: except:
weighted = False weighted = False
with open(name + "-net.txt", "w") as f: with open(name + "-net.txt", "w") as f:
for i in range(data.edge_index.shape[1]): for i in range(data.edge_index.shape[1]):
if weighted: if weighted:
......
...@@ -481,7 +481,7 @@ class SkipGramModel(nn.Module): ...@@ -481,7 +481,7 @@ class SkipGramModel(nn.Module):
return torch.sum(score), torch.sum(neg_score) return torch.sum(score), torch.sum(neg_score)
def save_embedding(self, dataset, file_name): def save_embedding(self, dataset, file_name):
""" Write embedding to local file. """ Write embedding to local file. Only used when node ids are numbers.
Parameter Parameter
--------- ---------
...@@ -494,14 +494,19 @@ class SkipGramModel(nn.Module): ...@@ -494,14 +494,19 @@ class SkipGramModel(nn.Module):
np.save(file_name, embedding) np.save(file_name, embedding)
def save_embedding_pt(self, dataset, file_name): def save_embedding_pt(self, dataset, file_name):
""" """ For ogb leaderboard.
""" """
embedding = self.u_embeddings.weight.cpu().data max_node_id = max(dataset.node2id.keys())
assert max(dataset.node2id.keys()) == self.emb_size - 1, "The node id does not starts from 0, saving embedding failed." if max_node_id + 1 != self.emb_size:
index = torch.LongTensor(list(map(lambda node: dataset.node2id[node], list(range(self.emb_size))))) print("WARNING: The node ids are not serial.")
embedding = torch.index_select(embedding, 0, index)
embedding = torch.zeros(max_node_id + 1, self.emb_dimension)
index = torch.LongTensor(list(map(lambda id: dataset.id2node[id], list(range(self.emb_size)))))
embedding.index_add_(0, index, self.u_embeddings.weight.cpu().data)
if self.norm: if self.norm:
embedding /= torch.sqrt(torch.sum(embedding.mul(embedding), 1)).unsqueeze(1) embedding /= torch.sqrt(torch.sum(embedding.mul(embedding), 1) + 1e-6).unsqueeze(1)
torch.save(embedding, file_name) torch.save(embedding, file_name)
def save_embedding_txt(self, dataset, file_name): def save_embedding_txt(self, dataset, file_name):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment