"...api/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "ae82a3eb34afe2167e7013c871a738846b33a94e"
Unverified Commit d93a9759 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

revert (#2672)

parent ff64bd0d
......@@ -316,9 +316,6 @@ class NodeCollator(Collator):
The node set to compute outputs.
block_sampler : dgl.dataloading.BlockSampler
The neighborhood sampler.
return_eids : bool, default False
Whether to additionally return the indices of the input ``nids`` array sampled in the
minibatch.
Examples
--------
......@@ -334,24 +331,20 @@ class NodeCollator(Collator):
>>> for input_nodes, output_nodes, blocks in dataloader:
... train_on(input_nodes, output_nodes, blocks)
"""
def __init__(self, g, nids, block_sampler, return_indices=False):
def __init__(self, g, nids, block_sampler):
self.g = g
self._is_distributed = isinstance(g, DistGraph)
if not isinstance(nids, Mapping):
assert len(g.ntypes) == 1, \
"nids should be a dict of node type and ids for graph with multiple node types"
self.block_sampler = block_sampler
self.return_indices = return_indices
if isinstance(nids, Mapping):
self.nids = _prepare_tensor_dict(g, nids, 'nids', self._is_distributed)
dataset = {k: F.arange(0, len(v), F.dtype(v), F.context(v))
for k, v in self.nids.items()} if return_indices else self.nids
self._dataset = utils.FlattenedDict(dataset)
self._dataset = utils.FlattenedDict(self.nids)
else:
self.nids = _prepare_tensor(g, nids, 'nids', self._is_distributed)
self._dataset = F.arange(0, len(nids), F.dtype(nids), F.context(nids)) \
if return_indices else nids
self._dataset = self.nids
@property
def dataset(self):
......@@ -367,9 +360,6 @@ class NodeCollator(Collator):
Either a list of node IDs (for homogeneous graphs), or a list of node type-ID
pairs (for heterogeneous graphs).
If ``return_indices`` is True, represents the indices to the seed node
array(s) instead.
Returns
-------
input_nodes : Tensor or dict[ntype, Tensor]
......@@ -382,10 +372,6 @@ class NodeCollator(Collator):
If the original graph has multiple node types, return a dictionary of
node type names and node ID tensors. Otherwise, return a single tensor.
indices : Tensor or dict[ntype, Tensor], optional
The indices of the sampled nodes in the ``nids`` member.
Only returned if ``return_indices`` is True.
blocks : list[DGLGraph]
The list of blocks necessary for computing the representation.
"""
......@@ -396,20 +382,11 @@ class NodeCollator(Collator):
else:
items = _prepare_tensor(self.g, items, 'items', self._is_distributed)
if isinstance(items, dict):
sample_items = {k: F.gather_row(self.nids[k], v) for k, v in items.items()} \
if self.return_indices else items
else:
sample_items = F.gather_row(self.nids, items) if self.return_indices else items
blocks = self.block_sampler.sample_blocks(self.g, sample_items)
blocks = self.block_sampler.sample_blocks(self.g, items)
output_nodes = blocks[-1].dstdata[NID]
input_nodes = blocks[0].srcdata[NID]
if not self.return_indices:
return input_nodes, output_nodes, blocks
else:
return input_nodes, output_nodes, items, blocks
return input_nodes, output_nodes, blocks
class EdgeCollator(Collator):
"""DGL collator to combine edges and their computation dependencies within a minibatch for
......@@ -489,9 +466,6 @@ class EdgeCollator(Collator):
A set of builtin negative samplers are provided in
:ref:`the negative sampling module <api-dataloading-negative-sampling>`.
return_eids : bool, default False
Whether to additionally return the indices of the input ``eids`` array sampled in the
minibatch.
Examples
--------
......@@ -577,18 +551,16 @@ class EdgeCollator(Collator):
... collator.dataset, collate_fn=collator.collate,
... batch_size=1024, shuffle=True, drop_last=False, num_workers=4)
>>> for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader:
... train_on(input_nodse, pair_graph, neg_pair_graph, blocks)
... train_on(input_nodes, pair_graph, neg_pair_graph, blocks)
"""
def __init__(self, g, eids, block_sampler, g_sampling=None, exclude=None,
reverse_eids=None, reverse_etypes=None, negative_sampler=None,
return_indices=False):
reverse_eids=None, reverse_etypes=None, negative_sampler=None):
self.g = g
self._is_distributed = isinstance(g, DistGraph)
if not isinstance(eids, Mapping):
assert len(g.etypes) == 1, \
"eids should be a dict of etype and ids for graph with multiple etypes"
self.block_sampler = block_sampler
self.return_indices = return_indices
# One may wish to iterate over the edges in one graph while perform sampling in
# another graph. This may be the case for iterating over validation and test
......@@ -608,13 +580,10 @@ class EdgeCollator(Collator):
if isinstance(eids, Mapping):
self.eids = _prepare_tensor_dict(g, eids, 'eids', self._is_distributed)
dataset = {k: F.arange(0, len(v), F.dtype(v), F.context(v))
for k, v in self.eids.items()} if return_indices else self.eids
self._dataset = utils.FlattenedDict(dataset)
self._dataset = utils.FlattenedDict(self.eids)
else:
self.eids = _prepare_tensor(g, eids, 'eids', self._is_distributed)
self._dataset = F.arange(0, len(eids), F.dtype(eids), F.context(eids)) \
if return_indices else eids
self._dataset = self.eids
@property
def dataset(self):
......@@ -628,19 +597,13 @@ class EdgeCollator(Collator):
else:
items = _prepare_tensor(self.g_sampling, items, 'items', self._is_distributed)
if isinstance(items, dict):
sample_items = {k: F.gather_row(self.eids[k], v) for k, v in items.items()} \
if self.return_indices else items
else:
sample_items = F.gather_row(self.eids, items) if self.return_indices else items
pair_graph = self.g.edge_subgraph(sample_items)
pair_graph = self.g.edge_subgraph(items)
seed_nodes = pair_graph.ndata[NID]
exclude_eids = _find_exclude_eids(
self.g,
self.exclude,
sample_items,
items,
reverse_eid_map=self.reverse_eids,
reverse_etype_map=self.reverse_etypes)
......@@ -648,10 +611,7 @@ class EdgeCollator(Collator):
self.g_sampling, seed_nodes, exclude_eids=exclude_eids)
input_nodes = blocks[0].srcdata[NID]
if not self.return_indices:
return input_nodes, pair_graph, blocks
else:
return input_nodes, pair_graph, items, blocks
return input_nodes, pair_graph, blocks
def _collate_with_negative_sampling(self, items):
if isinstance(items[0], tuple):
......@@ -661,16 +621,10 @@ class EdgeCollator(Collator):
else:
items = _prepare_tensor(self.g_sampling, items, 'items', self._is_distributed)
if isinstance(items, dict):
sample_items = {k: F.gather_row(self.eids[k], v) for k, v in items.items()} \
if self.return_indices else items
else:
sample_items = F.gather_row(self.eids, items) if self.return_indices else items
pair_graph = self.g.edge_subgraph(sample_items, preserve_nodes=True)
pair_graph = self.g.edge_subgraph(items, preserve_nodes=True)
induced_edges = pair_graph.edata[EID]
neg_srcdst = self.negative_sampler(self.g, sample_items)
neg_srcdst = self.negative_sampler(self.g, items)
if not isinstance(neg_srcdst, Mapping):
assert len(self.g.etypes) == 1, \
'graph has multiple or no edge types; '\
......@@ -692,7 +646,7 @@ class EdgeCollator(Collator):
exclude_eids = _find_exclude_eids(
self.g,
self.exclude,
sample_items,
items,
reverse_eid_map=self.reverse_eids,
reverse_etype_map=self.reverse_etypes)
......@@ -700,10 +654,7 @@ class EdgeCollator(Collator):
self.g_sampling, seed_nodes, exclude_eids=exclude_eids)
input_nodes = blocks[0].srcdata[NID]
if not self.return_indices:
return input_nodes, pair_graph, neg_pair_graph, blocks
else:
return input_nodes, pair_graph, neg_pair_graph, items, blocks
return input_nodes, pair_graph, neg_pair_graph, blocks
def collate(self, items):
"""Combines the sampled edges into a minibatch for edge classification, edge
......@@ -715,9 +666,6 @@ class EdgeCollator(Collator):
Either a list of edge IDs (for homogeneous graphs), or a list of edge type-ID
pairs (for heterogeneous graphs).
If ``return_indices`` is True, represents the indices to the seed edge
array(s) instead.
Returns
-------
Either ``(input_nodes, pair_graph, blocks)``, or
......@@ -741,10 +689,6 @@ class EdgeCollator(Collator):
Note that the metagraph of this graph will be identical to that of the original
graph.
items : Tensor or dict[ntype, Tensor]
The indices of the sampled edges in the ``eids`` member.
Only returned if ``return_indices`` is True.
blocks : list[DGLGraph]
The list of blocks necessary for computing the representation of the edges.
"""
......
......@@ -126,13 +126,13 @@ class _NodeCollator(NodeCollator):
class _EdgeCollator(EdgeCollator):
def collate(self, items):
if self.negative_sampler is None:
# input_nodes, pair_graph, [items], blocks
# input_nodes, pair_graph, blocks
result = super().collate(items)
_pop_subgraph_storage(result[1], self.g)
_pop_blocks_storage(result[-1], self.g_sampling)
return result
else:
# input_nodes, pair_graph, neg_pair_graph, [items], blocks
# input_nodes, pair_graph, neg_pair_graph, blocks
result = super().collate(items)
_pop_subgraph_storage(result[1], self.g)
_pop_subgraph_storage(result[2], self.g)
......@@ -156,13 +156,11 @@ class _NodeDataLoaderIter:
self.iter_ = iter(node_dataloader.dataloader)
def __next__(self):
# input_nodes, output_nodes, [items], blocks
# input_nodes, output_nodes, blocks
result_ = next(self.iter_)
_restore_blocks_storage(result_[-1], self.node_dataloader.collator.g)
result = []
for data in result_:
result.append(_to_device(data, self.device))
result = [_to_device(data, self.device) for data in result_]
return result
class _EdgeDataLoaderIter:
......@@ -175,15 +173,13 @@ class _EdgeDataLoaderIter:
result_ = next(self.iter_)
if self.edge_dataloader.collator.negative_sampler is not None:
# input_nodes, pair_graph, neg_pair_graph, [items], blocks
# Otherwise, input_nodes, pair_graph, [items], blocks
# input_nodes, pair_graph, neg_pair_graph, blocks
# Otherwise, input_nodes, pair_graph, blocks
_restore_subgraph_storage(result_[2], self.edge_dataloader.collator.g)
_restore_subgraph_storage(result_[1], self.edge_dataloader.collator.g)
_restore_blocks_storage(result_[-1], self.edge_dataloader.collator.g_sampling)
result = []
for data in result_:
result.append(_to_device(data, self.device))
result = [_to_device(data, self.device) for data in result_]
return result
class NodeDataLoader:
......
......@@ -10,36 +10,16 @@ def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator):
for item in dl:
if mode == 'node':
input_nodes, output_nodes, items, blocks = item
input_nodes, output_nodes, blocks = item
elif mode == 'edge':
input_nodes, pair_graph, items, blocks = item
input_nodes, pair_graph, blocks = item
output_nodes = pair_graph.ndata[dgl.NID]
elif mode == 'link':
input_nodes, pair_graph, neg_graph, items, blocks = item
input_nodes, pair_graph, neg_graph, blocks = item
output_nodes = pair_graph.ndata[dgl.NID]
for ntype in pair_graph.ntypes:
assert F.array_equal(pair_graph.nodes[ntype].data[dgl.NID], neg_graph.nodes[ntype].data[dgl.NID])
# TODO: check if items match output nodes/edges
if mode == 'node':
if len(g.ntypes) > 1:
for ntype in g.ntypes:
if ntype not in items:
assert len(output_nodes[ntype]) == 0
else:
assert F.array_equal(output_nodes[ntype], F.gather_row(collator.nids[ntype], items[ntype]))
else:
assert F.array_equal(output_nodes, F.gather_row(collator.nids, items))
else:
if len(g.etypes) > 1:
for etype, eids in collator.eids.items():
if etype not in items:
assert pair_graph.num_edges(etype=etype) == 0
else:
assert F.array_equal(pair_graph.edges[etype].data[dgl.EID], F.gather_row(eids, items[etype]))
else:
assert F.array_equal(pair_graph.edata[dgl.EID], F.gather_row(collator.eids, items))
if len(g.ntypes) > 1:
for ntype in g.ntypes:
assert F.array_equal(input_nodes[ntype], blocks[0].srcnodes[ntype].data[dgl.NID])
......@@ -130,34 +110,31 @@ def test_neighbor_sampler_dataloader():
for seeds, sampler in product(
[F.tensor([0, 1, 2, 3, 5], dtype=F.int64), F.tensor([4, 5], dtype=F.int64)],
[g_sampler1, g_sampler2]):
collators.append(dgl.dataloading.NodeCollator(g, seeds, sampler, return_indices=True))
collators.append(dgl.dataloading.NodeCollator(g, seeds, sampler))
graphs.append(g)
nids.append({'user': seeds})
modes.append('node')
collators.append(dgl.dataloading.EdgeCollator(g, seeds, sampler, return_indices=True))
collators.append(dgl.dataloading.EdgeCollator(g, seeds, sampler))
graphs.append(g)
nids.append({'follow': seeds})
modes.append('edge')
collators.append(dgl.dataloading.EdgeCollator(
g, seeds, sampler, exclude='reverse_id', reverse_eids=reverse_eids,
return_indices=True))
g, seeds, sampler, exclude='reverse_id', reverse_eids=reverse_eids))
graphs.append(g)
nids.append({'follow': seeds})
modes.append('edge')
collators.append(dgl.dataloading.EdgeCollator(
g, seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(2),
return_indices=True))
g, seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
graphs.append(g)
nids.append({'follow': seeds})
modes.append('link')
collators.append(dgl.dataloading.EdgeCollator(
g, seeds, sampler, exclude='reverse_id', reverse_eids=reverse_eids,
negative_sampler=dgl.dataloading.negative_sampler.Uniform(2),
return_indices=True))
negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
graphs.append(g)
nids.append({'follow': seeds})
modes.append('link')
......@@ -166,7 +143,7 @@ def test_neighbor_sampler_dataloader():
[{'user': F.tensor([0, 1, 3, 5], dtype=F.int64), 'game': F.tensor([0, 1, 2], dtype=F.int64)},
{'user': F.tensor([4, 5], dtype=F.int64), 'game': F.tensor([0, 1, 2], dtype=F.int64)}],
[hg_sampler1, hg_sampler2]):
collators.append(dgl.dataloading.NodeCollator(hg, seeds, sampler, return_indices=True))
collators.append(dgl.dataloading.NodeCollator(hg, seeds, sampler))
graphs.append(hg)
nids.append(seeds)
modes.append('node')
......@@ -175,29 +152,26 @@ def test_neighbor_sampler_dataloader():
[{'follow': F.tensor([0, 1, 3, 5], dtype=F.int64), 'play': F.tensor([1, 3], dtype=F.int64)},
{'follow': F.tensor([4, 5], dtype=F.int64), 'play': F.tensor([1, 3], dtype=F.int64)}],
[hg_sampler1, hg_sampler2]):
collators.append(dgl.dataloading.EdgeCollator(hg, seeds, sampler, return_indices=True))
collators.append(dgl.dataloading.EdgeCollator(hg, seeds, sampler))
graphs.append(hg)
nids.append(seeds)
modes.append('edge')
collators.append(dgl.dataloading.EdgeCollator(
hg, seeds, sampler, exclude='reverse_types', reverse_etypes=reverse_etypes,
return_indices=True))
hg, seeds, sampler, exclude='reverse_types', reverse_etypes=reverse_etypes))
graphs.append(hg)
nids.append(seeds)
modes.append('edge')
collators.append(dgl.dataloading.EdgeCollator(
hg, seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(2),
return_indices=True))
hg, seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
graphs.append(hg)
nids.append(seeds)
modes.append('link')
collators.append(dgl.dataloading.EdgeCollator(
hg, seeds, sampler, exclude='reverse_types', reverse_etypes=reverse_etypes,
negative_sampler=dgl.dataloading.negative_sampler.Uniform(2),
return_indices=True))
negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
graphs.append(hg)
nids.append(seeds)
modes.append('link')
......@@ -232,7 +206,6 @@ def test_node_dataloader():
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
# return_indices = False
dataloader = dgl.dataloading.NodeDataLoader(
g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes())
for input_nodes, output_nodes, blocks in dataloader:
......@@ -240,15 +213,6 @@ def test_node_dataloader():
_check_device(output_nodes)
_check_device(blocks)
# return_indices = True
dataloader = dgl.dataloading.NodeDataLoader(
g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes(), return_indices=True)
for input_nodes, output_nodes, items, blocks in dataloader:
_check_device(input_nodes)
_check_device(output_nodes)
_check_device(items)
_check_device(blocks)
g2 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
......@@ -259,7 +223,6 @@ def test_node_dataloader():
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)
# return_indices = False
dataloader = dgl.dataloading.NodeDataLoader(
g2, {nty: g2.nodes(nty) for nty in g2.ntypes},
sampler, device=F.ctx(), batch_size=batch_size)
......@@ -268,16 +231,6 @@ def test_node_dataloader():
_check_device(output_nodes)
_check_device(blocks)
# return_indices = True
dataloader = dgl.dataloading.NodeDataLoader(
g2, {nty: g2.nodes(nty) for nty in g2.ntypes},
sampler, device=F.ctx(), batch_size=batch_size, return_indices=True)
for input_nodes, output_nodes, items, blocks in dataloader:
_check_device(input_nodes)
_check_device(output_nodes)
_check_device(items)
_check_device(blocks)
def test_edge_dataloader():
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
neg_sampler = dgl.dataloading.negative_sampler.Uniform(2)
......@@ -285,7 +238,7 @@ def test_edge_dataloader():
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
# return_indices = False & no negative sampler
# no negative sampler
dataloader = dgl.dataloading.EdgeDataLoader(
g1, g1.edges(form='eid'), sampler, device=F.ctx(), batch_size=g1.num_edges())
for input_nodes, pos_pair_graph, blocks in dataloader:
......@@ -293,7 +246,7 @@ def test_edge_dataloader():
_check_device(pos_pair_graph)
_check_device(blocks)
# return_indices = False & negative sampler
# negative sampler
dataloader = dgl.dataloading.EdgeDataLoader(
g1, g1.edges(form='eid'), sampler, device=F.ctx(),
negative_sampler=neg_sampler, batch_size=g1.num_edges())
......@@ -313,26 +266,24 @@ def test_edge_dataloader():
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes)
# return_indices = True & no negative sampler
# no negative sampler
dataloader = dgl.dataloading.EdgeDataLoader(
g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes},
sampler, device=F.ctx(), batch_size=batch_size, return_indices=True)
for input_nodes, pos_pair_graph, items, blocks in dataloader:
sampler, device=F.ctx(), batch_size=batch_size)
for input_nodes, pos_pair_graph, blocks in dataloader:
_check_device(input_nodes)
_check_device(pos_pair_graph)
_check_device(items)
_check_device(blocks)
# return_indices = True & negative sampler
# negative sampler
dataloader = dgl.dataloading.EdgeDataLoader(
g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes},
sampler, device=F.ctx(), negative_sampler=neg_sampler,
batch_size=batch_size, return_indices=True)
for input_nodes, pos_pair_graph, neg_pair_graph, items, blocks in dataloader:
batch_size=batch_size)
for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader:
_check_device(input_nodes)
_check_device(pos_pair_graph)
_check_device(neg_pair_graph)
_check_device(items)
_check_device(blocks)
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment