Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
edfbee2c
Unverified
Commit
edfbee2c
authored
Aug 18, 2020
by
Quan (Andy) Gan
Committed by
GitHub
Aug 18, 2020
Browse files
[Feature] Make sample_neighbors copy features on demand (#2042)
* fix * fix * lint * fix * test * fix * fix
parent
09ec6020
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
81 additions
and
8 deletions
+81
-8
examples/pytorch/graphsage/train_sampling.py
examples/pytorch/graphsage/train_sampling.py
+3
-1
python/dgl/distributed/graph_services.py
python/dgl/distributed/graph_services.py
+1
-1
python/dgl/sampling/neighbor.py
python/dgl/sampling/neighbor.py
+77
-6
No files found.
examples/pytorch/graphsage/train_sampling.py
View file @
edfbee2c
...
@@ -158,8 +158,10 @@ def run(args, device, data):
...
@@ -158,8 +158,10 @@ def run(args, device, data):
tic_step
=
time
.
time
()
tic_step
=
time
.
time
()
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
dataloader
):
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
dataloader
):
# Load the input features as well as output labels
# Load the input features as well as output labels
batch_inputs
,
batch_labels
=
load_subtensor
(
train_g
,
seeds
,
input_nodes
,
device
)
#
batch_inputs, batch_labels = load_subtensor(train_g, seeds, input_nodes, device)
blocks
=
[
block
.
int
().
to
(
device
)
for
block
in
blocks
]
blocks
=
[
block
.
int
().
to
(
device
)
for
block
in
blocks
]
batch_inputs
=
blocks
[
0
].
srcdata
[
'features'
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
'labels'
]
# Compute loss and prediction
# Compute loss and prediction
batch_pred
=
model
(
blocks
,
batch_inputs
)
batch_pred
=
model
(
blocks
,
batch_inputs
)
...
...
python/dgl/distributed/graph_services.py
View file @
edfbee2c
...
@@ -56,7 +56,7 @@ def _sample_neighbors(local_g, partition_book, seed_nodes, fan_out, edge_dir, pr
...
@@ -56,7 +56,7 @@ def _sample_neighbors(local_g, partition_book, seed_nodes, fan_out, edge_dir, pr
local_ids
=
F
.
astype
(
local_ids
,
local_g
.
idtype
)
local_ids
=
F
.
astype
(
local_ids
,
local_g
.
idtype
)
# local_ids = self.seed_nodes
# local_ids = self.seed_nodes
sampled_graph
=
local_sample_neighbors
(
sampled_graph
=
local_sample_neighbors
(
local_g
,
local_ids
,
fan_out
,
edge_dir
,
prob
,
replace
)
local_g
,
local_ids
,
fan_out
,
edge_dir
,
prob
,
replace
,
_dist_training
=
True
)
global_nid_mapping
=
local_g
.
ndata
[
NID
]
global_nid_mapping
=
local_g
.
ndata
[
NID
]
src
,
dst
=
sampled_graph
.
edges
()
src
,
dst
=
sampled_graph
.
edges
()
global_src
,
global_dst
=
global_nid_mapping
[
src
],
global_nid_mapping
[
dst
]
global_src
,
global_dst
=
global_nid_mapping
[
src
],
global_nid_mapping
[
dst
]
...
...
python/dgl/sampling/neighbor.py
View file @
edfbee2c
...
@@ -11,7 +11,8 @@ __all__ = [
...
@@ -11,7 +11,8 @@ __all__ = [
'sample_neighbors'
,
'sample_neighbors'
,
'select_topk'
]
'select_topk'
]
def
sample_neighbors
(
g
,
nodes
,
fanout
,
edge_dir
=
'in'
,
prob
=
None
,
replace
=
False
):
def
sample_neighbors
(
g
,
nodes
,
fanout
,
edge_dir
=
'in'
,
prob
=
None
,
replace
=
False
,
copy_ndata
=
True
,
copy_edata
=
True
,
_dist_training
=
False
):
"""Sample neighboring edges of the given nodes and return the induced subgraph.
"""Sample neighboring edges of the given nodes and return the induced subgraph.
For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges
For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges
...
@@ -53,12 +54,35 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
...
@@ -53,12 +54,35 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
to sum up to one). Otherwise, the result will be undefined.
to sum up to one). Otherwise, the result will be undefined.
replace : bool, optional
replace : bool, optional
If True, sample with replacement.
If True, sample with replacement.
copy_ndata: bool, optional
If True, the node features of the new graph are copied from
the original graph. If False, the new graph will not have any
node features.
(Default: True)
copy_edata: bool, optional
If True, the edge features of the new graph are copied from
the original graph. If False, the new graph will not have any
edge features.
(Default: True)
_dist_training : bool, optional
Internal argument. Do not use.
(Default: False)
Returns
Returns
-------
-------
DGLGraph
DGLGraph
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
Notes
-----
If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as
the node or edge features of the original graph and the new graph.
As a result, users should avoid performing in-place operations
on the node features of the new graph to avoid feature corruption.
Examples
Examples
--------
--------
Assume that you have the following graph
Assume that you have the following graph
...
@@ -130,11 +154,30 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
...
@@ -130,11 +154,30 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
edge_dir
,
prob_arrays
,
replace
)
edge_dir
,
prob_arrays
,
replace
)
induced_edges
=
subgidx
.
induced_edges
induced_edges
=
subgidx
.
induced_edges
ret
=
DGLHeteroGraph
(
subgidx
.
graph
,
g
.
ntypes
,
g
.
etypes
)
ret
=
DGLHeteroGraph
(
subgidx
.
graph
,
g
.
ntypes
,
g
.
etypes
)
for
i
,
etype
in
enumerate
(
ret
.
canonical_etypes
):
ret
.
edges
[
etype
].
data
[
EID
]
=
induced_edges
[
i
]
# handle features
# (TODO) (BarclayII) DGL distributed fails with bus error, freezes, or other
# incomprehensible errors with lazy feature copy.
# So in distributed training context, we fall back to old behavior where we
# only set the edge IDs.
if
not
_dist_training
:
if
copy_ndata
:
print
(
g
,
type
(
g
))
node_frames
=
utils
.
extract_node_subframes
(
g
,
None
)
utils
.
set_new_frames
(
ret
,
node_frames
=
node_frames
)
if
copy_edata
:
print
(
g
,
type
(
g
))
edge_frames
=
utils
.
extract_edge_subframes
(
g
,
induced_edges
)
utils
.
set_new_frames
(
ret
,
edge_frames
=
edge_frames
)
else
:
for
i
,
etype
in
enumerate
(
ret
.
canonical_etypes
):
ret
.
edges
[
etype
].
data
[
EID
]
=
induced_edges
[
i
]
return
ret
return
ret
def
select_topk
(
g
,
k
,
weight
,
nodes
=
None
,
edge_dir
=
'in'
,
ascending
=
False
):
def
select_topk
(
g
,
k
,
weight
,
nodes
=
None
,
edge_dir
=
'in'
,
ascending
=
False
,
copy_ndata
=
True
,
copy_edata
=
True
):
"""Select the neighboring edges with k-largest (or k-smallest) weights of the given
"""Select the neighboring edges with k-largest (or k-smallest) weights of the given
nodes and return the induced subgraph.
nodes and return the induced subgraph.
...
@@ -176,12 +219,31 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
...
@@ -176,12 +219,31 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
ascending : bool, optional
ascending : bool, optional
If True, DGL will return edges with k-smallest weights instead of
If True, DGL will return edges with k-smallest weights instead of
k-largest weights.
k-largest weights.
copy_ndata: bool, optional
If True, the node features of the new graph are copied from
the original graph. If False, the new graph will not have any
node features.
(Default: True)
copy_edata: bool, optional
If True, the edge features of the new graph are copied from
the original graph. If False, the new graph will not have any
edge features.
(Default: True)
Returns
Returns
-------
-------
DGLGraph
DGLGraph
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
Notes
-----
If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as
the node or edge features of the original graph and the new graph.
As a result, users should avoid performing in-place operations
on the node features of the new graph to avoid feature corruption.
Examples
Examples
--------
--------
>>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0]))
>>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0]))
...
@@ -231,8 +293,17 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
...
@@ -231,8 +293,17 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
g
.
_graph
,
nodes_all_types
,
k_array
,
edge_dir
,
weight_arrays
,
bool
(
ascending
))
g
.
_graph
,
nodes_all_types
,
k_array
,
edge_dir
,
weight_arrays
,
bool
(
ascending
))
induced_edges
=
subgidx
.
induced_edges
induced_edges
=
subgidx
.
induced_edges
ret
=
DGLHeteroGraph
(
subgidx
.
graph
,
g
.
ntypes
,
g
.
etypes
)
ret
=
DGLHeteroGraph
(
subgidx
.
graph
,
g
.
ntypes
,
g
.
etypes
)
for
i
,
etype
in
enumerate
(
ret
.
canonical_etypes
):
ret
.
edges
[
etype
].
data
[
EID
]
=
induced_edges
[
i
]
# handle features
if
copy_ndata
:
print
(
g
,
type
(
g
))
node_frames
=
utils
.
extract_node_subframes
(
g
,
None
)
utils
.
set_new_frames
(
ret
,
node_frames
=
node_frames
)
if
copy_edata
:
print
(
g
,
type
(
g
))
edge_frames
=
utils
.
extract_edge_subframes
(
g
,
induced_edges
)
utils
.
set_new_frames
(
ret
,
edge_frames
=
edge_frames
)
return
ret
return
ret
_init_api
(
'dgl.sampling.neighbor'
,
__name__
)
_init_api
(
'dgl.sampling.neighbor'
,
__name__
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment