Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
5854ef5e
Unverified
Commit
5854ef5e
authored
Mar 07, 2023
by
peizhou001
Committed by
GitHub
Mar 07, 2023
Browse files
[Enhancement]Speed up ToBlockCPU with concurrent id hash map (#5297)
parent
cce31e9a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
74 additions
and
89 deletions
+74
-89
src/graph/transform/to_block.cc
src/graph/transform/to_block.cc
+74
-89
No files found.
src/graph/transform/to_block.cc
View file @
5854ef5e
...
@@ -34,7 +34,7 @@
...
@@ -34,7 +34,7 @@
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "../../array/cpu/
array_utils
.h"
#include "../../array/cpu/
concurrent_id_hash_map
.h"
namespace
dgl
{
namespace
dgl
{
...
@@ -45,104 +45,89 @@ namespace transform {
...
@@ -45,104 +45,89 @@ namespace transform {
namespace
{
namespace
{
// Since partial specialization is not allowed for functions, use this as an
// intermediate for ToBlock where XPU = kDGLCPU.
template
<
typename
IdType
>
template
<
typename
IdType
>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
ToBlockCPU
(
struct
CPUIdsMapper
{
HeteroGraphPtr
graph
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
std
::
tuple
<
std
::
vector
<
IdArray
>
,
std
::
vector
<
IdArray
>>
operator
()(
bool
include_rhs_in_lhs
,
std
::
vector
<
IdArray
>
*
const
lhs_nodes_ptr
)
{
const
HeteroGraphPtr
&
graph
,
bool
include_rhs_in_lhs
,
int64_t
num_ntypes
,
std
::
vector
<
IdArray
>
&
lhs_nodes
=
*
lhs_nodes_ptr
;
const
DGLContext
&
ctx
,
const
std
::
vector
<
int64_t
>
&
max_nodes_per_type
,
const
bool
generate_lhs_nodes
=
lhs_nodes
.
empty
();
const
std
::
vector
<
EdgeArray
>
&
edge_arrays
,
const
std
::
vector
<
IdArray
>
&
src_nodes
,
const
int64_t
num_etypes
=
graph
->
NumEdgeTypes
();
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
const
int64_t
num_ntypes
=
graph
->
NumVertexTypes
();
std
::
vector
<
IdArray
>
*
const
lhs_nodes_ptr
,
std
::
vector
<
EdgeArray
>
edge_arrays
(
num_etypes
);
std
::
vector
<
int64_t
>
*
const
num_nodes_per_type_ptr
)
{
std
::
vector
<
IdArray
>
&
lhs_nodes
=
*
lhs_nodes_ptr
;
CHECK
(
rhs_nodes
.
size
()
==
static_cast
<
size_t
>
(
num_ntypes
))
std
::
vector
<
int64_t
>
&
num_nodes_per_type
=
*
num_nodes_per_type_ptr
;
<<
"rhs_nodes not given for every node type"
;
const
bool
generate_lhs_nodes
=
lhs_nodes
.
empty
();
const
std
::
vector
<
IdHashMap
<
IdType
>>
rhs_node_mappings
(
if
(
generate_lhs_nodes
)
{
rhs_nodes
.
begin
(),
rhs_nodes
.
end
());
lhs_nodes
.
reserve
(
num_ntypes
);
std
::
vector
<
IdHashMap
<
IdType
>>
lhs_node_mappings
;
}
if
(
generate_lhs_nodes
)
{
// build lhs_node_mappings -- if we don't have them already
if
(
include_rhs_in_lhs
)
lhs_node_mappings
=
rhs_node_mappings
;
// copy
else
lhs_node_mappings
.
resize
(
num_ntypes
);
}
else
{
lhs_node_mappings
=
std
::
vector
<
IdHashMap
<
IdType
>>
(
lhs_nodes
.
begin
(),
lhs_nodes
.
end
());
}
for
(
int64_t
etype
=
0
;
etype
<
num_etypes
;
++
etype
)
{
std
::
vector
<
ConcurrentIdHashMap
<
IdType
>>
lhs_nodes_map
(
num_ntypes
);
const
auto
src_dst_types
=
graph
->
GetEndpointTypes
(
etype
);
std
::
vector
<
ConcurrentIdHashMap
<
IdType
>>
rhs_nodes_map
(
num_ntypes
);
const
dgl_type_t
srctype
=
src_dst_types
.
first
;
for
(
int64_t
ntype
=
0
;
ntype
<
num_ntypes
;
++
ntype
)
{
const
dgl_type_t
dsttype
=
src_dst_types
.
second
;
IdArray
unique_ids
=
if
(
!
aten
::
IsNullArray
(
rhs_nodes
[
dsttype
]))
{
aten
::
NullArray
(
DGLDataTypeTraits
<
IdType
>::
dtype
,
ctx
);
const
EdgeArray
&
edges
=
graph
->
Edges
(
etype
);
if
(
!
aten
::
IsNullArray
(
src_nodes
[
ntype
]))
{
auto
num_seeds
=
include_rhs_in_lhs
?
rhs_nodes
[
ntype
]
->
shape
[
0
]
:
0
;
unique_ids
=
lhs_nodes_map
[
ntype
].
Init
(
src_nodes
[
ntype
],
num_seeds
);
}
if
(
generate_lhs_nodes
)
{
if
(
generate_lhs_nodes
)
{
lhs_node_mappings
[
srctype
].
Update
(
edges
.
src
);
num_nodes_per_type
[
ntype
]
=
unique_ids
->
shape
[
0
];
lhs_nodes
.
emplace_back
(
unique_ids
);
}
}
edge_arrays
[
etype
]
=
edges
;
}
}
}
std
::
vector
<
int64_t
>
num_nodes_per_type
;
num_nodes_per_type
.
reserve
(
2
*
num_ntypes
);
const
auto
meta_graph
=
graph
->
meta_graph
();
// Skip rhs mapping construction to save efforts when rhs is already
const
EdgeArray
etypes
=
meta_graph
->
Edges
(
"eid"
);
// contained in lhs.
const
IdArray
new_dst
=
Add
(
etypes
.
dst
,
num_ntypes
);
if
(
!
include_rhs_in_lhs
)
{
const
auto
new_meta_graph
=
for
(
int64_t
ntype
=
0
;
ntype
<
num_ntypes
;
++
ntype
)
{
ImmutableGraph
::
CreateFromCOO
(
num_ntypes
*
2
,
etypes
.
src
,
new_dst
);
if
(
!
aten
::
IsNullArray
(
rhs_nodes
[
ntype
]))
{
rhs_nodes_map
[
ntype
].
Init
(
for
(
int64_t
ntype
=
0
;
ntype
<
num_ntypes
;
++
ntype
)
rhs_nodes
[
ntype
],
rhs_nodes
[
ntype
]
->
shape
[
0
]);
num_nodes_per_type
.
push_back
(
lhs_node_mappings
[
ntype
].
Size
());
}
for
(
int64_t
ntype
=
0
;
ntype
<
num_ntypes
;
++
ntype
)
}
num_nodes_per_type
.
push_back
(
rhs_node_mappings
[
ntype
].
Size
());
}
std
::
vector
<
HeteroGraphPtr
>
rel_graphs
;
// Map node numberings from global to local, and build pointer for CSR.
std
::
vector
<
IdArray
>
induced_edges
;
std
::
vector
<
IdArray
>
new_lhs
;
for
(
int64_t
etype
=
0
;
etype
<
num_etypes
;
++
etype
)
{
std
::
vector
<
IdArray
>
new_rhs
;
const
auto
src_dst_types
=
graph
->
GetEndpointTypes
(
etype
);
new_lhs
.
reserve
(
edge_arrays
.
size
());
const
dgl_type_t
srctype
=
src_dst_types
.
first
;
new_rhs
.
reserve
(
edge_arrays
.
size
());
const
dgl_type_t
dsttype
=
src_dst_types
.
second
;
const
int64_t
num_etypes
=
static_cast
<
int64_t
>
(
edge_arrays
.
size
());
const
IdHashMap
<
IdType
>
&
lhs_map
=
lhs_node_mappings
[
srctype
];
for
(
int64_t
etype
=
0
;
etype
<
num_etypes
;
++
etype
)
{
const
IdHashMap
<
IdType
>
&
rhs_map
=
rhs_node_mappings
[
dsttype
];
const
EdgeArray
&
edges
=
edge_arrays
[
etype
];
if
(
rhs_map
.
Size
()
==
0
)
{
if
(
edges
.
id
.
defined
()
&&
!
aten
::
IsNullArray
(
edges
.
src
))
{
// No rhs nodes are given for this edge type. Create an empty graph.
const
auto
src_dst_types
=
graph
->
GetEndpointTypes
(
etype
);
rel_graphs
.
push_back
(
CreateFromCOO
(
const
int
src_type
=
src_dst_types
.
first
;
2
,
lhs_map
.
Size
(),
rhs_map
.
Size
(),
aten
::
NullArray
(),
const
int
dst_type
=
src_dst_types
.
second
;
aten
::
NullArray
()));
new_lhs
.
emplace_back
(
lhs_nodes_map
[
src_type
].
MapIds
(
edges
.
src
));
induced_edges
.
push_back
(
aten
::
NullArray
());
if
(
include_rhs_in_lhs
)
{
}
else
{
new_rhs
.
emplace_back
(
lhs_nodes_map
[
dst_type
].
MapIds
(
edges
.
dst
));
IdArray
new_src
=
lhs_map
.
Map
(
edge_arrays
[
etype
].
src
,
-
1
);
}
else
{
IdArray
new_dst
=
rhs_map
.
Map
(
edge_arrays
[
etype
].
dst
,
-
1
);
new_rhs
.
emplace_back
(
rhs_nodes_map
[
dst_type
].
MapIds
(
edges
.
dst
));
// Check whether there are unmapped IDs and raise error.
}
for
(
int64_t
i
=
0
;
i
<
new_dst
->
shape
[
0
];
++
i
)
}
else
{
CHECK_NE
(
new_dst
.
Ptr
<
IdType
>
()[
i
],
-
1
)
new_lhs
.
emplace_back
(
<<
"Node "
<<
edge_arrays
[
etype
].
dst
.
Ptr
<
IdType
>
()[
i
]
aten
::
NullArray
(
DGLDataTypeTraits
<
IdType
>::
dtype
,
ctx
));
<<
" does not exist"
new_rhs
.
emplace_back
(
<<
" in `rhs_nodes`. Argument `rhs_nodes` must contain all the edge"
aten
::
NullArray
(
DGLDataTypeTraits
<
IdType
>::
dtype
,
ctx
));
<<
" destination nodes."
;
}
rel_graphs
.
push_back
(
CreateFromCOO
(
2
,
lhs_map
.
Size
(),
rhs_map
.
Size
(),
new_src
,
new_dst
));
induced_edges
.
push_back
(
edge_arrays
[
etype
].
id
);
}
}
return
std
::
tuple
<
std
::
vector
<
IdArray
>
,
std
::
vector
<
IdArray
>>
(
std
::
move
(
new_lhs
),
std
::
move
(
new_rhs
));
}
}
};
const
HeteroGraphPtr
new_graph
=
// Since partial specialization is not allowed for functions, use this as an
CreateHeteroGraph
(
new_meta_graph
,
rel_graphs
,
num_nodes_per_type
);
// intermediate for ToBlock where XPU = kDGLCPU.
template
<
typename
IdType
>
if
(
generate_lhs_nodes
)
{
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
ToBlockCPU
(
CHECK_EQ
(
lhs_nodes
.
size
(),
0
)
<<
"InteralError: lhs_nodes should be empty "
HeteroGraphPtr
graph
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
"when generating it."
;
bool
include_rhs_in_lhs
,
std
::
vector
<
IdArray
>
*
const
lhs_nodes_ptr
)
{
for
(
const
IdHashMap
<
IdType
>
&
lhs_map
:
lhs_node_mappings
)
return
dgl
::
transform
::
ProcessToBlock
<
IdType
>
(
lhs_nodes
.
push_back
(
lhs_map
.
Values
());
graph
,
rhs_nodes
,
include_rhs_in_lhs
,
lhs_nodes_ptr
,
}
CPUIdsMapper
<
IdType
>
());
return
std
::
make_tuple
(
new_graph
,
induced_edges
);
}
}
}
// namespace
}
// namespace
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment