Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
f118ea95
Unverified
Commit
f118ea95
authored
Nov 25, 2022
by
Hongzhi (Steve), Chen
Committed by
GitHub
Nov 25, 2022
Browse files
black (#4951)
Co-authored-by:
Steve
<
ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal
>
parent
c59000ac
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1061 additions
and
530 deletions
+1061
-530
examples/pytorch/graph_matching/ged.py
examples/pytorch/graph_matching/ged.py
+1061
-530
No files found.
examples/pytorch/graph_matching/ged.py
View file @
f118ea95
...
...
@@ -2,16 +2,24 @@ import dgl
import
numpy
as
np
from
heapq
import
heappush
,
heappop
,
heapify
,
nsmallest
from
copy
import
deepcopy
# We use lapjv implementation (https://github.com/src-d/lapjv) to solve assignment problem, because of its scalability
# Also see https://github.com/berhane/LAP-solvers for benchmarking of LAP solvers
from
lapjv
import
lapjv
EPSILON
=
0.0000001
;
EPSILON
=
0.0000001
def
validate_cost_functions
(
G1
,
G2
,
node_substitution_cost
=
None
,
edge_substitution_cost
=
None
,
G1_node_deletion_cost
=
None
,
G1_edge_deletion_cost
=
None
,
G2_node_insertion_cost
=
None
,
G2_edge_insertion_cost
=
None
):
def
validate_cost_functions
(
G1
,
G2
,
node_substitution_cost
=
None
,
edge_substitution_cost
=
None
,
G1_node_deletion_cost
=
None
,
G1_edge_deletion_cost
=
None
,
G2_node_insertion_cost
=
None
,
G2_edge_insertion_cost
=
None
,
):
"""Validates cost functions (substitution, insertion, deletion) and initializes them with default=0 for substitution
and default=1 for insertion/deletion
if the provided ones are None.
...
...
@@ -28,43 +36,59 @@ def validate_cost_functions(G1, G2,
# if any cost matrix is None, initialize it with default costs
if
node_substitution_cost
is
None
:
node_substitution_cost
=
np
.
zeros
((
num_G1_nodes
,
num_G2_nodes
),
dtype
=
float
)
node_substitution_cost
=
np
.
zeros
(
(
num_G1_nodes
,
num_G2_nodes
),
dtype
=
float
)
else
:
assert
node_substitution_cost
.
shape
==
(
num_G1_nodes
,
num_G2_nodes
)
;
assert
node_substitution_cost
.
shape
==
(
num_G1_nodes
,
num_G2_nodes
)
if
edge_substitution_cost
is
None
:
edge_substitution_cost
=
np
.
zeros
((
num_G1_edges
,
num_G2_edges
),
dtype
=
float
)
edge_substitution_cost
=
np
.
zeros
(
(
num_G1_edges
,
num_G2_edges
),
dtype
=
float
)
else
:
assert
edge_substitution_cost
.
shape
==
(
num_G1_edges
,
num_G2_edges
)
;
assert
edge_substitution_cost
.
shape
==
(
num_G1_edges
,
num_G2_edges
)
if
G1_node_deletion_cost
is
None
:
G1_node_deletion_cost
=
np
.
ones
(
num_G1_nodes
,
dtype
=
float
)
else
:
assert
G1_node_deletion_cost
.
shape
[
0
]
==
num_G1_nodes
;
assert
G1_node_deletion_cost
.
shape
[
0
]
==
num_G1_nodes
if
G1_edge_deletion_cost
is
None
:
G1_edge_deletion_cost
=
np
.
ones
(
num_G1_edges
,
dtype
=
float
)
else
:
assert
G1_edge_deletion_cost
.
shape
[
0
]
==
num_G1_edges
;
assert
G1_edge_deletion_cost
.
shape
[
0
]
==
num_G1_edges
if
G2_node_insertion_cost
is
None
:
G2_node_insertion_cost
=
np
.
ones
(
num_G2_nodes
,
dtype
=
float
)
else
:
assert
G2_node_insertion_cost
.
shape
[
0
]
==
num_G2_nodes
;
assert
G2_node_insertion_cost
.
shape
[
0
]
==
num_G2_nodes
if
G2_edge_insertion_cost
is
None
:
G2_edge_insertion_cost
=
np
.
ones
(
num_G2_edges
,
dtype
=
float
)
else
:
assert
G2_edge_insertion_cost
.
shape
[
0
]
==
num_G2_edges
;
return
node_substitution_cost
,
edge_substitution_cost
,
\
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
\
G2_node_insertion_cost
,
G2_edge_insertion_cost
;
def
construct_cost_functions
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
):
assert
G2_edge_insertion_cost
.
shape
[
0
]
==
num_G2_edges
return
(
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
)
def
construct_cost_functions
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
):
"""Constructs cost matrices for LAP solution
...
...
@@ -77,318 +101,499 @@ def construct_cost_functions(G1, G2,
num_G1_edges
=
G1
.
number_of_edges
()
num_G2_edges
=
G2
.
number_of_edges
()
# cost matrix of node mappings
cost_upper_bound
=
node_substitution_cost
.
sum
()
+
G1_node_deletion_cost
.
sum
()
+
G2_node_insertion_cost
.
sum
()
+
1
C_node
=
np
.
zeros
((
num_G1_nodes
+
num_G2_nodes
,
num_G1_nodes
+
num_G2_nodes
),
dtype
=
float
)
C_node
[
0
:
num_G1_nodes
,
0
:
num_G2_nodes
]
=
node_substitution_cost
;
C_node
[
0
:
num_G1_nodes
,
num_G2_nodes
:
num_G2_nodes
+
num_G1_nodes
]
=
np
.
array
([
G1_node_deletion_cost
[
i
]
if
i
==
j
\
else
cost_upper_bound
\
for
i
in
range
(
num_G1_nodes
)
\
for
j
in
range
(
num_G1_nodes
)]).
reshape
(
num_G1_nodes
,
num_G1_nodes
);
C_node
[
num_G1_nodes
:
num_G1_nodes
+
num_G2_nodes
,
0
:
num_G2_nodes
]
=
np
.
array
([
G2_node_insertion_cost
[
i
]
if
i
==
j
\
else
cost_upper_bound
\
for
i
in
range
(
num_G2_nodes
)
\
for
j
in
range
(
num_G2_nodes
)]).
reshape
(
num_G2_nodes
,
num_G2_nodes
);
cost_upper_bound
=
(
node_substitution_cost
.
sum
()
+
G1_node_deletion_cost
.
sum
()
+
G2_node_insertion_cost
.
sum
()
+
1
)
C_node
=
np
.
zeros
(
(
num_G1_nodes
+
num_G2_nodes
,
num_G1_nodes
+
num_G2_nodes
),
dtype
=
float
)
C_node
[
0
:
num_G1_nodes
,
0
:
num_G2_nodes
]
=
node_substitution_cost
C_node
[
0
:
num_G1_nodes
,
num_G2_nodes
:
num_G2_nodes
+
num_G1_nodes
]
=
np
.
array
(
[
G1_node_deletion_cost
[
i
]
if
i
==
j
else
cost_upper_bound
for
i
in
range
(
num_G1_nodes
)
for
j
in
range
(
num_G1_nodes
)
]
).
reshape
(
num_G1_nodes
,
num_G1_nodes
)
C_node
[
num_G1_nodes
:
num_G1_nodes
+
num_G2_nodes
,
0
:
num_G2_nodes
]
=
np
.
array
(
[
G2_node_insertion_cost
[
i
]
if
i
==
j
else
cost_upper_bound
for
i
in
range
(
num_G2_nodes
)
for
j
in
range
(
num_G2_nodes
)
]
).
reshape
(
num_G2_nodes
,
num_G2_nodes
)
# cost matrix of edge mappings
cost_upper_bound
=
edge_substitution_cost
.
sum
()
+
G1_edge_deletion_cost
.
sum
()
+
G2_edge_insertion_cost
.
sum
()
+
1
C_edge
=
np
.
zeros
((
num_G1_edges
+
num_G2_edges
,
num_G1_edges
+
num_G2_edges
),
dtype
=
float
)
cost_upper_bound
=
(
edge_substitution_cost
.
sum
()
+
G1_edge_deletion_cost
.
sum
()
+
G2_edge_insertion_cost
.
sum
()
+
1
)
C_edge
=
np
.
zeros
(
(
num_G1_edges
+
num_G2_edges
,
num_G1_edges
+
num_G2_edges
),
dtype
=
float
)
C_edge
[
0
:
num_G1_edges
,
0
:
num_G2_edges
]
=
edge_substitution_cost
C_edge
[
0
:
num_G1_edges
,
num_G2_edges
:
num_G2_edges
+
num_G1_edges
]
=
np
.
array
(
[
G1_edge_deletion_cost
[
i
]
if
i
==
j
else
cost_upper_bound
for
i
in
range
(
num_G1_edges
)
for
j
in
range
(
num_G1_edges
)
]
).
reshape
(
num_G1_edges
,
num_G1_edges
)
C_edge
[
num_G1_edges
:
num_G1_edges
+
num_G2_edges
,
0
:
num_G2_edges
]
=
np
.
array
(
[
G2_edge_insertion_cost
[
i
]
if
i
==
j
else
cost_upper_bound
for
i
in
range
(
num_G2_edges
)
for
j
in
range
(
num_G2_edges
)
]
).
reshape
(
num_G2_edges
,
num_G2_edges
)
return
C_node
,
C_edge
C_edge
[
0
:
num_G1_edges
,
0
:
num_G2_edges
]
=
edge_substitution_cost
;
C_edge
[
0
:
num_G1_edges
,
num_G2_edges
:
num_G2_edges
+
num_G1_edges
]
=
np
.
array
([
G1_edge_deletion_cost
[
i
]
if
i
==
j
\
else
cost_upper_bound
\
for
i
in
range
(
num_G1_edges
)
\
for
j
in
range
(
num_G1_edges
)]).
reshape
(
num_G1_edges
,
num_G1_edges
);
C_edge
[
num_G1_edges
:
num_G1_edges
+
num_G2_edges
,
0
:
num_G2_edges
]
=
np
.
array
([
G2_edge_insertion_cost
[
i
]
if
i
==
j
\
else
cost_upper_bound
\
for
i
in
range
(
num_G2_edges
)
\
for
j
in
range
(
num_G2_edges
)]).
reshape
(
num_G2_edges
,
num_G2_edges
);
return
C_node
,
C_edge
;
def
get_edges_to_match
(
G
,
node_id
,
matched_nodes
):
# Find the edges in G with one end-point as node_id and other in matched_nodes or node_id
incident_edges
=
np
.
array
([],
dtype
=
int
)
index
=
np
.
array
([],
dtype
=
int
)
direction
=
np
.
array
([],
dtype
=
int
)
if
G
.
has_edge_between
(
node_id
,
node_id
):
self_edge_ids
=
G
.
edge_id
(
node_id
,
node_id
,
return_array
=
True
).
numpy
()
;
incident_edges
=
np
.
concatenate
((
incident_edges
,
self_edge_ids
))
;
index
=
np
.
concatenate
((
index
,
[
-
1
]
*
len
(
self_edge_ids
)))
;
direction
=
np
.
concatenate
((
direction
,
[
0
]
*
len
(
self_edge_ids
)))
;
self_edge_ids
=
G
.
edge_id
(
node_id
,
node_id
,
return_array
=
True
).
numpy
()
incident_edges
=
np
.
concatenate
((
incident_edges
,
self_edge_ids
))
index
=
np
.
concatenate
((
index
,
[
-
1
]
*
len
(
self_edge_ids
)))
direction
=
np
.
concatenate
((
direction
,
[
0
]
*
len
(
self_edge_ids
)))
# Find predecessors
src
,
_
,
eid
=
G
.
in_edges
([
node_id
],
'all'
);
eid
=
eid
.
numpy
();
src
=
src
.
numpy
();
filtered_indices
=
[(
i
,
matched_nodes
.
index
(
src
[
i
]))
for
i
in
range
(
len
(
src
))
if
src
[
i
]
in
matched_nodes
];
matched_index
=
np
.
array
([
_
[
1
]
for
_
in
filtered_indices
],
dtype
=
int
);
eid_index
=
np
.
array
([
_
[
0
]
for
_
in
filtered_indices
],
dtype
=
int
);
index
=
np
.
concatenate
((
index
,
matched_index
));
incident_edges
=
np
.
concatenate
((
incident_edges
,
eid
[
eid_index
]));
direction
=
np
.
concatenate
((
direction
,
np
.
array
([
-
1
]
*
len
(
filtered_indices
),
dtype
=
int
)));
src
,
_
,
eid
=
G
.
in_edges
([
node_id
],
"all"
)
eid
=
eid
.
numpy
()
src
=
src
.
numpy
()
filtered_indices
=
[
(
i
,
matched_nodes
.
index
(
src
[
i
]))
for
i
in
range
(
len
(
src
))
if
src
[
i
]
in
matched_nodes
]
matched_index
=
np
.
array
([
_
[
1
]
for
_
in
filtered_indices
],
dtype
=
int
)
eid_index
=
np
.
array
([
_
[
0
]
for
_
in
filtered_indices
],
dtype
=
int
)
index
=
np
.
concatenate
((
index
,
matched_index
))
incident_edges
=
np
.
concatenate
((
incident_edges
,
eid
[
eid_index
]))
direction
=
np
.
concatenate
(
(
direction
,
np
.
array
([
-
1
]
*
len
(
filtered_indices
),
dtype
=
int
))
)
# Find successors
_
,
dst
,
eid
=
G
.
out_edges
([
node_id
],
'all'
);
eid
=
eid
.
numpy
();
dst
=
dst
.
numpy
();
filtered_indices
=
[(
i
,
matched_nodes
.
index
(
dst
[
i
]))
for
i
in
range
(
len
(
dst
))
if
dst
[
i
]
in
matched_nodes
]
matched_index
=
np
.
array
([
_
[
1
]
for
_
in
filtered_indices
],
dtype
=
int
);
eid_index
=
np
.
array
([
_
[
0
]
for
_
in
filtered_indices
],
dtype
=
int
);
index
=
np
.
concatenate
((
index
,
matched_index
));
incident_edges
=
np
.
concatenate
((
incident_edges
,
eid
[
eid_index
]));
direction
=
np
.
concatenate
((
direction
,
np
.
array
([
1
]
*
len
(
filtered_indices
),
dtype
=
int
)));
return
incident_edges
,
index
,
direction
;
_
,
dst
,
eid
=
G
.
out_edges
([
node_id
],
"all"
)
eid
=
eid
.
numpy
()
dst
=
dst
.
numpy
()
filtered_indices
=
[
(
i
,
matched_nodes
.
index
(
dst
[
i
]))
for
i
in
range
(
len
(
dst
))
if
dst
[
i
]
in
matched_nodes
]
matched_index
=
np
.
array
([
_
[
1
]
for
_
in
filtered_indices
],
dtype
=
int
)
eid_index
=
np
.
array
([
_
[
0
]
for
_
in
filtered_indices
],
dtype
=
int
)
index
=
np
.
concatenate
((
index
,
matched_index
))
incident_edges
=
np
.
concatenate
((
incident_edges
,
eid
[
eid_index
]))
direction
=
np
.
concatenate
(
(
direction
,
np
.
array
([
1
]
*
len
(
filtered_indices
),
dtype
=
int
))
)
return
incident_edges
,
index
,
direction
def
subset_cost_matrix
(
cost_matrix
,
row_ids
,
col_ids
,
num_rows
,
num_cols
):
# Extract thr subset of cost matrix corresponding to rows/cols in arrays row_ids/col_ids
# Note that the shape of cost_matrix is (num_rows+num_cols) * (num_rows+num_cols)
extended_row_ids
=
np
.
concatenate
((
row_ids
,
np
.
array
([
k
+
num_rows
for
k
in
col_ids
])));
extended_col_ids
=
np
.
concatenate
((
col_ids
,
np
.
array
([
k
+
num_cols
for
k
in
row_ids
])));
extended_row_ids
=
np
.
concatenate
(
(
row_ids
,
np
.
array
([
k
+
num_rows
for
k
in
col_ids
]))
)
extended_col_ids
=
np
.
concatenate
(
(
col_ids
,
np
.
array
([
k
+
num_cols
for
k
in
row_ids
]))
)
return
cost_matrix
[
extended_row_ids
,
:][:,
extended_col_ids
]
class
search_tree_node
:
def
__init__
(
self
,
G1
,
G2
,
parent_matched_cost
,
parent_matched_nodes
,
parent_matched_edges
,
node_G1
,
node_G2
,
\
parent_unprocessed_nodes_G1
,
parent_unprocessed_nodes_G2
,
parent_unprocessed_edges_G1
,
parent_unprocessed_edges_G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
):
self
.
matched_cost
=
parent_matched_cost
;
self
.
future_approximate_cost
=
0.0
;
self
.
matched_nodes
=
deepcopy
(
parent_matched_nodes
);
self
.
matched_nodes
[
0
].
append
(
node_G1
);
self
.
matched_nodes
[
1
].
append
(
node_G2
);
self
.
matched_edges
=
deepcopy
(
parent_matched_edges
);
self
.
unprocessed_nodes_G1
=
[
_
for
_
in
parent_unprocessed_nodes_G1
if
_
!=
node_G1
];
self
.
unprocessed_nodes_G2
=
[
_
for
_
in
parent_unprocessed_nodes_G2
if
_
!=
node_G2
];
class
search_tree_node
:
def
__init__
(
self
,
G1
,
G2
,
parent_matched_cost
,
parent_matched_nodes
,
parent_matched_edges
,
node_G1
,
node_G2
,
parent_unprocessed_nodes_G1
,
parent_unprocessed_nodes_G2
,
parent_unprocessed_edges_G1
,
parent_unprocessed_edges_G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
):
self
.
matched_cost
=
parent_matched_cost
self
.
future_approximate_cost
=
0.0
self
.
matched_nodes
=
deepcopy
(
parent_matched_nodes
)
self
.
matched_nodes
[
0
].
append
(
node_G1
)
self
.
matched_nodes
[
1
].
append
(
node_G2
)
self
.
matched_edges
=
deepcopy
(
parent_matched_edges
)
self
.
unprocessed_nodes_G1
=
[
_
for
_
in
parent_unprocessed_nodes_G1
if
_
!=
node_G1
]
self
.
unprocessed_nodes_G2
=
[
_
for
_
in
parent_unprocessed_nodes_G2
if
_
!=
node_G2
]
# Add the cost of matching nodes at this tree-node to the matched cost
if
node_G1
is
not
None
and
node_G2
is
not
None
:
# Substitute node_G1 with node_G2
self
.
matched_cost
+=
cost_matrix_nodes
[
node_G1
,
node_G2
];
if
(
node_G1
is
not
None
and
node_G2
is
not
None
):
# Substitute node_G1 with node_G2
self
.
matched_cost
+=
cost_matrix_nodes
[
node_G1
,
node_G2
]
elif
node_G1
is
not
None
:
# Delete node_G1
self
.
matched_cost
+=
cost_matrix_nodes
[
node_G1
,
node_G1
+
G2
.
number_of_nodes
()];
self
.
matched_cost
+=
cost_matrix_nodes
[
node_G1
,
node_G1
+
G2
.
number_of_nodes
()
]
elif
node_G2
is
not
None
:
# Insert node_G2
self
.
matched_cost
+=
cost_matrix_nodes
[
node_G2
+
G1
.
number_of_nodes
(),
node_G2
];
self
.
matched_cost
+=
cost_matrix_nodes
[
node_G2
+
G1
.
number_of_nodes
(),
node_G2
]
# Add the cost of matching edges at this tree-node to the matched cost
incident_edges_G1
=
[];
if
node_G1
is
not
None
:
# Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1
,
index_G1
,
direction_G1
=
get_edges_to_match
(
G1
,
node_G1
,
parent_matched_nodes
[
0
])
incident_edges_G2
=
np
.
array
([]);
if
node_G2
is
not
None
:
# Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2
,
index_G2
,
direction_G2
=
get_edges_to_match
(
G2
,
node_G2
,
parent_matched_nodes
[
1
])
if
len
(
incident_edges_G1
)
>
0
and
len
(
incident_edges_G2
)
>
0
:
# Consider substituting
matched_edges_cost_matrix
=
subset_cost_matrix
(
cost_matrix_edges
,
incident_edges_G1
,
incident_edges_G2
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
())
max_sum
=
matched_edges_cost_matrix
.
sum
();
incident_edges_G1
=
[]
if
(
node_G1
is
not
None
):
# Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1
,
index_G1
,
direction_G1
=
get_edges_to_match
(
G1
,
node_G1
,
parent_matched_nodes
[
0
]
)
incident_edges_G2
=
np
.
array
([])
if
(
node_G2
is
not
None
):
# Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2
,
index_G2
,
direction_G2
=
get_edges_to_match
(
G2
,
node_G2
,
parent_matched_nodes
[
1
]
)
if
(
len
(
incident_edges_G1
)
>
0
and
len
(
incident_edges_G2
)
>
0
):
# Consider substituting
matched_edges_cost_matrix
=
subset_cost_matrix
(
cost_matrix_edges
,
incident_edges_G1
,
incident_edges_G2
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
(),
)
max_sum
=
matched_edges_cost_matrix
.
sum
()
# take care of impossible assignments by assigning maximum cost
for
i
in
range
(
len
(
incident_edges_G1
)):
for
j
in
range
(
len
(
incident_edges_G2
)):
# both edges need to have same direction and the other end nodes are matched
if
direction_G1
[
i
]
==
direction_G2
[
j
]
and
index_G1
[
i
]
==
index_G2
[
j
]:
continue
;
if
(
direction_G1
[
i
]
==
direction_G2
[
j
]
and
index_G1
[
i
]
==
index_G2
[
j
]
):
continue
else
:
matched_edges_cost_matrix
[
i
,
j
]
=
max_sum
;
matched_edges_cost_matrix
[
i
,
j
]
=
max_sum
# Match the edges as per the LAP solution
row_ind
,
col_ind
,
_
=
lapjv
(
matched_edges_cost_matrix
)
;
row_ind
,
col_ind
,
_
=
lapjv
(
matched_edges_cost_matrix
)
lap_cost
=
0.00
for
i
in
range
(
len
(
row_ind
)):
lap_cost
+=
matched_edges_cost_matrix
[
i
,
row_ind
[
i
]]
;
lap_cost
+=
matched_edges_cost_matrix
[
i
,
row_ind
[
i
]]
#Update matched edges
#
Update matched edges
for
i
in
range
(
len
(
row_ind
)):
if
i
<
len
(
incident_edges_G1
):
self
.
matched_edges
[
0
].
append
(
incident_edges_G1
[
i
])
;
self
.
matched_edges
[
0
].
append
(
incident_edges_G1
[
i
])
if
row_ind
[
i
]
<
len
(
incident_edges_G2
):
self
.
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]]);
self
.
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]]
)
else
:
self
.
matched_edges
[
1
].
append
(
None
)
;
self
.
matched_edges
[
1
].
append
(
None
)
elif
row_ind
[
i
]
<
len
(
incident_edges_G2
):
self
.
matched_edges
[
0
].
append
(
None
)
;
self
.
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]])
;
self
.
matched_cost
+=
lap_cost
;
self
.
matched_edges
[
0
].
append
(
None
)
self
.
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]])
self
.
matched_cost
+=
lap_cost
elif
len
(
incident_edges_G1
)
>
0
:
#
only deletion possible
edge_deletion_cost
=
0.0
;
elif
len
(
incident_edges_G1
)
>
0
:
#
only deletion possible
edge_deletion_cost
=
0.0
for
edge
in
incident_edges_G1
:
edge_deletion_cost
+=
cost_matrix_edges
[
edge
,
G2
.
number_of_edges
()
+
edge
];
#Update matched edges
edge_deletion_cost
+=
cost_matrix_edges
[
edge
,
G2
.
number_of_edges
()
+
edge
]
# Update matched edges
for
edge
in
incident_edges_G1
:
self
.
matched_edges
[
0
].
append
(
edge
)
;
self
.
matched_edges
[
1
].
append
(
None
)
;
self
.
matched_edges
[
0
].
append
(
edge
)
self
.
matched_edges
[
1
].
append
(
None
)
#Update matched edges
#
Update matched edges
self
.
matched_cost
+=
edge_deletion_cost
;
self
.
matched_cost
+=
edge_deletion_cost
elif
len
(
incident_edges_G2
)
>
0
:
#
only insertion possible
edge_insertion_cost
=
0.0
;
elif
len
(
incident_edges_G2
)
>
0
:
#
only insertion possible
edge_insertion_cost
=
0.0
for
edge
in
incident_edges_G2
:
edge_insertion_cost
+=
cost_matrix_edges
[
G1
.
number_of_edges
()
+
edge
,
edge
];
#Update matched edges
edge_insertion_cost
+=
cost_matrix_edges
[
G1
.
number_of_edges
()
+
edge
,
edge
]
# Update matched edges
for
edge
in
incident_edges_G2
:
self
.
matched_edges
[
0
].
append
(
None
);
self
.
matched_edges
[
1
].
append
(
edge
);
self
.
matched_cost
+=
edge_insertion_cost
;
self
.
matched_edges
[
0
].
append
(
None
)
self
.
matched_edges
[
1
].
append
(
edge
)
self
.
matched_cost
+=
edge_insertion_cost
# Add the cost of matching of unprocessed nodes to the future approximate cost
if
len
(
self
.
unprocessed_nodes_G1
)
>
0
and
len
(
self
.
unprocessed_nodes_G2
)
>
0
:
# Consider substituting
unmatched_nodes_cost_matrix
=
subset_cost_matrix
(
cost_matrix_nodes
,
self
.
unprocessed_nodes_G1
,
self
.
unprocessed_nodes_G2
,
G1
.
number_of_nodes
(),
G2
.
number_of_nodes
())
if
(
len
(
self
.
unprocessed_nodes_G1
)
>
0
and
len
(
self
.
unprocessed_nodes_G2
)
>
0
):
# Consider substituting
unmatched_nodes_cost_matrix
=
subset_cost_matrix
(
cost_matrix_nodes
,
self
.
unprocessed_nodes_G1
,
self
.
unprocessed_nodes_G2
,
G1
.
number_of_nodes
(),
G2
.
number_of_nodes
(),
)
# Match the edges as per the LAP solution
row_ind
,
col_ind
,
_
=
lapjv
(
unmatched_nodes_cost_matrix
)
;
row_ind
,
col_ind
,
_
=
lapjv
(
unmatched_nodes_cost_matrix
)
lap_cost
=
0.00
for
i
in
range
(
len
(
row_ind
)):
lap_cost
+=
unmatched_nodes_cost_matrix
[
i
,
row_ind
[
i
]]
;
lap_cost
+=
unmatched_nodes_cost_matrix
[
i
,
row_ind
[
i
]]
self
.
future_approximate_cost
+=
lap_cost
;
self
.
future_approximate_cost
+=
lap_cost
elif
len
(
self
.
unprocessed_nodes_G1
)
>
0
:
# only deletion possible
node_deletion_cost
=
0.0
;
node_deletion_cost
=
0.0
for
node
in
self
.
unprocessed_nodes_G1
:
node_deletion_cost
+=
cost_matrix_nodes
[
node
,
G2
.
number_of_nodes
()
+
node
];
node_deletion_cost
+=
cost_matrix_nodes
[
node
,
G2
.
number_of_nodes
()
+
node
]
self
.
future_approximate_cost
+=
node_deletion_cost
;
self
.
future_approximate_cost
+=
node_deletion_cost
elif
len
(
self
.
unprocessed_nodes_G2
)
>
0
:
# only insertion possible
node_insertion_cost
=
0.0
;
node_insertion_cost
=
0.0
for
node
in
self
.
unprocessed_nodes_G2
:
node_insertion_cost
+=
cost_matrix_nodes
[
G1
.
number_of_nodes
()
+
node
,
node
];
self
.
future_approximate_cost
+=
node_insertion_cost
;
node_insertion_cost
+=
cost_matrix_nodes
[
G1
.
number_of_nodes
()
+
node
,
node
]
self
.
future_approximate_cost
+=
node_insertion_cost
# Add the cost of LAP matching of unprocessed edges to the future approximate cost
self
.
unprocessed_edges_G1
=
[
_
for
_
in
parent_unprocessed_edges_G1
if
_
not
in
incident_edges_G1
];
self
.
unprocessed_edges_G2
=
[
_
for
_
in
parent_unprocessed_edges_G2
if
_
not
in
incident_edges_G2
];
if
len
(
self
.
unprocessed_edges_G1
)
>
0
and
len
(
self
.
unprocessed_edges_G2
)
>
0
:
# Consider substituting
unmatched_edges_cost_matrix
=
subset_cost_matrix
(
cost_matrix_edges
,
self
.
unprocessed_edges_G1
,
self
.
unprocessed_edges_G2
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
())
self
.
unprocessed_edges_G1
=
[
_
for
_
in
parent_unprocessed_edges_G1
if
_
not
in
incident_edges_G1
]
self
.
unprocessed_edges_G2
=
[
_
for
_
in
parent_unprocessed_edges_G2
if
_
not
in
incident_edges_G2
]
if
(
len
(
self
.
unprocessed_edges_G1
)
>
0
and
len
(
self
.
unprocessed_edges_G2
)
>
0
):
# Consider substituting
unmatched_edges_cost_matrix
=
subset_cost_matrix
(
cost_matrix_edges
,
self
.
unprocessed_edges_G1
,
self
.
unprocessed_edges_G2
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
(),
)
# Match the edges as per the LAP solution
row_ind
,
col_ind
,
_
=
lapjv
(
unmatched_edges_cost_matrix
)
;
row_ind
,
col_ind
,
_
=
lapjv
(
unmatched_edges_cost_matrix
)
lap_cost
=
0.00
for
i
in
range
(
len
(
row_ind
)):
lap_cost
+=
unmatched_edges_cost_matrix
[
i
,
row_ind
[
i
]]
;
lap_cost
+=
unmatched_edges_cost_matrix
[
i
,
row_ind
[
i
]]
self
.
future_approximate_cost
+=
lap_cost
;
self
.
future_approximate_cost
+=
lap_cost
elif
len
(
self
.
unprocessed_edges_G1
)
>
0
:
# only deletion possible
edge_deletion_cost
=
0.0
;
edge_deletion_cost
=
0.0
for
edge
in
self
.
unprocessed_edges_G1
:
edge_deletion_cost
+=
cost_matrix_edges
[
edge
,
G2
.
number_of_edges
()
+
edge
];
edge_deletion_cost
+=
cost_matrix_edges
[
edge
,
G2
.
number_of_edges
()
+
edge
]
self
.
future_approximate_cost
+=
edge_deletion_cost
;
self
.
future_approximate_cost
+=
edge_deletion_cost
elif
len
(
self
.
unprocessed_edges_G2
)
>
0
:
# only insertion possible
edge_insertion_cost
=
0.0
;
edge_insertion_cost
=
0.0
for
edge
in
self
.
unprocessed_edges_G2
:
edge_insertion_cost
+=
cost_matrix_edges
[
G1
.
number_of_edges
()
+
edge
,
edge
];
edge_insertion_cost
+=
cost_matrix_edges
[
G1
.
number_of_edges
()
+
edge
,
edge
]
self
.
future_approximate_cost
+=
edge_insertion_cost
;
self
.
future_approximate_cost
+=
edge_insertion_cost
# For heap insertion order
def
__lt__
(
self
,
other
):
if
abs
((
self
.
matched_cost
+
self
.
future_approximate_cost
)
-
(
other
.
matched_cost
+
other
.
future_approximate_cost
)
)
>
EPSILON
:
return
(
self
.
matched_cost
+
self
.
future_approximate_cost
)
<
(
other
.
matched_cost
+
other
.
future_approximate_cost
);
if
(
abs
(
(
self
.
matched_cost
+
self
.
future_approximate_cost
)
-
(
other
.
matched_cost
+
other
.
future_approximate_cost
)
)
>
EPSILON
):
return
(
self
.
matched_cost
+
self
.
future_approximate_cost
)
<
(
other
.
matched_cost
+
other
.
future_approximate_cost
)
elif
abs
(
self
.
matched_cost
-
other
.
matched_cost
)
>
EPSILON
:
return
other
.
matched_cost
<
self
.
matched_cost
;
#matched cost is closer to reality
return
other
.
matched_cost
<
self
.
matched_cost
# matched cost is closer to reality
else
:
return
(
len
(
self
.
unprocessed_nodes_G1
)
+
len
(
self
.
unprocessed_nodes_G2
)
+
\
len
(
self
.
unprocessed_edges_G1
)
+
len
(
self
.
unprocessed_edges_G2
))
<
\
(
len
(
other
.
unprocessed_nodes_G1
)
+
len
(
other
.
unprocessed_nodes_G2
)
+
\
len
(
other
.
unprocessed_edges_G1
)
+
len
(
other
.
unprocessed_edges_G2
));
def
edit_cost_from_node_matching
(
G1
,
G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
node_matching
):
matched_cost
=
0.0
;
return
(
len
(
self
.
unprocessed_nodes_G1
)
+
len
(
self
.
unprocessed_nodes_G2
)
+
len
(
self
.
unprocessed_edges_G1
)
+
len
(
self
.
unprocessed_edges_G2
)
)
<
(
len
(
other
.
unprocessed_nodes_G1
)
+
len
(
other
.
unprocessed_nodes_G2
)
+
len
(
other
.
unprocessed_edges_G1
)
+
len
(
other
.
unprocessed_edges_G2
)
)
def
edit_cost_from_node_matching
(
G1
,
G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
node_matching
):
matched_cost
=
0.0
matched_nodes
=
([],
[])
matched_edges
=
([],
[])
# Add the cost of matching nodes
for
i
in
range
(
G1
.
number_of_nodes
()):
matched_cost
+=
cost_matrix_nodes
[
i
,
node_matching
[
i
]]
matched_nodes
[
0
].
append
(
i
)
;
matched_nodes
[
0
].
append
(
i
)
if
node_matching
[
i
]
<
G2
.
number_of_nodes
():
matched_nodes
[
1
].
append
(
node_matching
[
i
])
;
matched_nodes
[
1
].
append
(
node_matching
[
i
])
else
:
matched_nodes
[
1
].
append
(
None
)
;
matched_nodes
[
1
].
append
(
None
)
for
i
in
range
(
G1
.
number_of_nodes
(),
len
(
node_matching
)):
matched_cost
+=
cost_matrix_nodes
[
i
,
node_matching
[
i
]]
if
node_matching
[
i
]
<
G2
.
number_of_nodes
():
matched_nodes
[
0
].
append
(
None
)
;
matched_nodes
[
1
].
append
(
node_matching
[
i
])
;
matched_nodes
[
0
].
append
(
None
)
matched_nodes
[
1
].
append
(
node_matching
[
i
])
for
i
in
range
(
len
(
matched_nodes
[
0
])):
# Add the cost of matching edges
incident_edges_G1
=
[];
if
matched_nodes
[
0
][
i
]
is
not
None
:
# Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1
,
index_G1
,
direction_G1
=
get_edges_to_match
(
G1
,
matched_nodes
[
0
][
i
],
matched_nodes
[
0
][:
i
])
incident_edges_G2
=
np
.
array
([]);
if
matched_nodes
[
1
][
i
]
is
not
None
:
# Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2
,
index_G2
,
direction_G2
=
get_edges_to_match
(
G2
,
matched_nodes
[
1
][
i
],
matched_nodes
[
1
][:
i
])
if
len
(
incident_edges_G1
)
>
0
and
len
(
incident_edges_G2
)
>
0
:
# Consider substituting
matched_edges_cost_matrix
=
subset_cost_matrix
(
cost_matrix_edges
,
incident_edges_G1
,
incident_edges_G2
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
())
max_sum
=
matched_edges_cost_matrix
.
sum
();
incident_edges_G1
=
[]
if
(
matched_nodes
[
0
][
i
]
is
not
None
):
# Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1
,
index_G1
,
direction_G1
=
get_edges_to_match
(
G1
,
matched_nodes
[
0
][
i
],
matched_nodes
[
0
][:
i
]
)
incident_edges_G2
=
np
.
array
([])
if
(
matched_nodes
[
1
][
i
]
is
not
None
):
# Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2
,
index_G2
,
direction_G2
=
get_edges_to_match
(
G2
,
matched_nodes
[
1
][
i
],
matched_nodes
[
1
][:
i
]
)
if
(
len
(
incident_edges_G1
)
>
0
and
len
(
incident_edges_G2
)
>
0
):
# Consider substituting
matched_edges_cost_matrix
=
subset_cost_matrix
(
cost_matrix_edges
,
incident_edges_G1
,
incident_edges_G2
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
(),
)
max_sum
=
matched_edges_cost_matrix
.
sum
()
# take care of impossible assignments by assigning maximum cost
for
i
in
range
(
len
(
incident_edges_G1
)):
for
j
in
range
(
len
(
incident_edges_G2
)):
# both edges need to have same direction and the other end nodes are matched
if
direction_G1
[
i
]
==
direction_G2
[
j
]
and
index_G1
[
i
]
==
index_G2
[
j
]:
continue
;
if
(
direction_G1
[
i
]
==
direction_G2
[
j
]
and
index_G1
[
i
]
==
index_G2
[
j
]
):
continue
else
:
matched_edges_cost_matrix
[
i
,
j
]
=
max_sum
;
matched_edges_cost_matrix
[
i
,
j
]
=
max_sum
# Match the edges as per the LAP solution
row_ind
,
col_ind
,
_
=
lapjv
(
matched_edges_cost_matrix
)
;
row_ind
,
col_ind
,
_
=
lapjv
(
matched_edges_cost_matrix
)
lap_cost
=
0.00
for
i
in
range
(
len
(
row_ind
)):
lap_cost
+=
matched_edges_cost_matrix
[
i
,
row_ind
[
i
]]
;
lap_cost
+=
matched_edges_cost_matrix
[
i
,
row_ind
[
i
]]
#Update matched edges
#
Update matched edges
for
i
in
range
(
len
(
row_ind
)):
if
i
<
len
(
incident_edges_G1
):
matched_edges
[
0
].
append
(
incident_edges_G1
[
i
])
;
matched_edges
[
0
].
append
(
incident_edges_G1
[
i
])
if
row_ind
[
i
]
<
len
(
incident_edges_G2
):
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]])
;
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]])
else
:
matched_edges
[
1
].
append
(
None
)
;
matched_edges
[
1
].
append
(
None
)
elif
row_ind
[
i
]
<
len
(
incident_edges_G2
):
matched_edges
[
0
].
append
(
None
)
;
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]])
;
matched_cost
+=
lap_cost
;
matched_edges
[
0
].
append
(
None
)
matched_edges
[
1
].
append
(
incident_edges_G2
[
row_ind
[
i
]])
matched_cost
+=
lap_cost
elif
len
(
incident_edges_G1
)
>
0
:
#
only deletion possible
edge_deletion_cost
=
0.0
;
elif
len
(
incident_edges_G1
)
>
0
:
#
only deletion possible
edge_deletion_cost
=
0.0
for
edge
in
incident_edges_G1
:
edge_deletion_cost
+=
cost_matrix_edges
[
edge
,
G2
.
number_of_edges
()
+
edge
];
#Update matched edges
edge_deletion_cost
+=
cost_matrix_edges
[
edge
,
G2
.
number_of_edges
()
+
edge
]
# Update matched edges
for
edge
in
incident_edges_G1
:
matched_edges
[
0
].
append
(
edge
)
;
matched_edges
[
1
].
append
(
None
)
;
matched_edges
[
0
].
append
(
edge
)
matched_edges
[
1
].
append
(
None
)
#Update matched edges
#
Update matched edges
matched_cost
+=
edge_deletion_cost
;
matched_cost
+=
edge_deletion_cost
elif
len
(
incident_edges_G2
)
>
0
:
#
only insertion possible
edge_insertion_cost
=
0.0
;
elif
len
(
incident_edges_G2
)
>
0
:
#
only insertion possible
edge_insertion_cost
=
0.0
for
edge
in
incident_edges_G2
:
edge_insertion_cost
+=
cost_matrix_edges
[
G1
.
number_of_edges
()
+
edge
,
edge
];
#Update matched edges
edge_insertion_cost
+=
cost_matrix_edges
[
G1
.
number_of_edges
()
+
edge
,
edge
]
# Update matched edges
for
edge
in
incident_edges_G2
:
matched_edges
[
0
].
append
(
None
);
matched_edges
[
1
].
append
(
edge
);
matched_edges
[
0
].
append
(
None
)
matched_edges
[
1
].
append
(
edge
)
matched_cost
+=
edge_insertion_cost
matched_cost
+=
edge_insertion_cost
;
return
(
matched_cost
,
matched_nodes
,
matched_edges
)
return
(
matched_cost
,
matched_nodes
,
matched_edges
);
def
contextual_cost_matrix_construction
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
):
def
contextual_cost_matrix_construction
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
):
# Calculates approximate GED using linear assignment on the nodes with bipartite algorithm
# cost matrix of node mappings
...
...
@@ -398,89 +603,174 @@ def contextual_cost_matrix_construction(G1, G2,
num_G1_edges
=
G1
.
number_of_edges
()
num_G2_edges
=
G2
.
number_of_edges
()
cost_upper_bound
=
2
*
(
node_substitution_cost
.
sum
()
+
G1_node_deletion_cost
.
sum
()
+
G2_node_insertion_cost
.
sum
()
+
1
)
cost_matrix
=
np
.
zeros
((
num_G1_nodes
+
num_G2_nodes
,
num_G1_nodes
+
num_G2_nodes
),
dtype
=
float
)
cost_matrix
[
0
:
num_G1_nodes
,
0
:
num_G2_nodes
]
=
node_substitution_cost
;
cost_matrix
[
0
:
num_G1_nodes
,
num_G2_nodes
:
num_G2_nodes
+
num_G1_nodes
]
=
np
.
array
([
G1_node_deletion_cost
[
i
]
if
i
==
j
\
else
cost_upper_bound
\
for
i
in
range
(
num_G1_nodes
)
\
for
j
in
range
(
num_G1_nodes
)]).
reshape
(
num_G1_nodes
,
num_G1_nodes
);
cost_matrix
[
num_G1_nodes
:
num_G1_nodes
+
num_G2_nodes
,
0
:
num_G2_nodes
]
=
np
.
array
([
G2_node_insertion_cost
[
i
]
if
i
==
j
\
else
cost_upper_bound
\
for
i
in
range
(
num_G2_nodes
)
\
for
j
in
range
(
num_G2_nodes
)]).
reshape
(
num_G2_nodes
,
num_G2_nodes
);
self_edge_list_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
;
self_edge_list_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
;
incoming_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
;
incoming_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
;
outgoing_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
;
outgoing_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
;
cost_upper_bound
=
2
*
(
node_substitution_cost
.
sum
()
+
G1_node_deletion_cost
.
sum
()
+
G2_node_insertion_cost
.
sum
()
+
1
)
cost_matrix
=
np
.
zeros
(
(
num_G1_nodes
+
num_G2_nodes
,
num_G1_nodes
+
num_G2_nodes
),
dtype
=
float
)
cost_matrix
[
0
:
num_G1_nodes
,
0
:
num_G2_nodes
]
=
node_substitution_cost
cost_matrix
[
0
:
num_G1_nodes
,
num_G2_nodes
:
num_G2_nodes
+
num_G1_nodes
]
=
np
.
array
(
[
G1_node_deletion_cost
[
i
]
if
i
==
j
else
cost_upper_bound
for
i
in
range
(
num_G1_nodes
)
for
j
in
range
(
num_G1_nodes
)
]
).
reshape
(
num_G1_nodes
,
num_G1_nodes
)
cost_matrix
[
num_G1_nodes
:
num_G1_nodes
+
num_G2_nodes
,
0
:
num_G2_nodes
]
=
np
.
array
(
[
G2_node_insertion_cost
[
i
]
if
i
==
j
else
cost_upper_bound
for
i
in
range
(
num_G2_nodes
)
for
j
in
range
(
num_G2_nodes
)
]
).
reshape
(
num_G2_nodes
,
num_G2_nodes
)
self_edge_list_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
self_edge_list_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
incoming_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
incoming_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
outgoing_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
outgoing_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
for
i
in
range
(
num_G1_nodes
):
if
G1
.
has_edge_between
(
i
,
i
):
self_edge_list_G1
[
i
]
=
sorted
(
G1
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
());
incoming_edges_G1
[
i
]
=
G1
.
in_edges
([
i
],
'eid'
).
numpy
();
incoming_edges_G1
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G1
[
i
],
self_edge_list_G1
[
i
]);
outgoing_edges_G1
[
i
]
=
G1
.
out_edges
([
i
],
'eid'
).
numpy
();
outgoing_edges_G1
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G1
[
i
],
self_edge_list_G1
[
i
]);
self_edge_list_G1
[
i
]
=
sorted
(
G1
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
()
)
incoming_edges_G1
[
i
]
=
G1
.
in_edges
([
i
],
"eid"
).
numpy
()
incoming_edges_G1
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G1
[
i
],
self_edge_list_G1
[
i
]
)
outgoing_edges_G1
[
i
]
=
G1
.
out_edges
([
i
],
"eid"
).
numpy
()
outgoing_edges_G1
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G1
[
i
],
self_edge_list_G1
[
i
]
)
for
i
in
range
(
num_G2_nodes
):
if
G2
.
has_edge_between
(
i
,
i
):
self_edge_list_G2
[
i
]
=
sorted
(
G2
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
());
incoming_edges_G2
[
i
]
=
G2
.
in_edges
([
i
],
'eid'
).
numpy
();
incoming_edges_G2
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G2
[
i
],
self_edge_list_G2
[
i
]);
outgoing_edges_G2
[
i
]
=
G2
.
out_edges
([
i
],
'eid'
).
numpy
();
outgoing_edges_G2
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G2
[
i
],
self_edge_list_G2
[
i
]);
selected_deletion_G1
=
[
G1_edge_deletion_cost
[
np
.
concatenate
((
self_edge_list_G1
[
i
],
incoming_edges_G1
[
i
],
outgoing_edges_G1
[
i
]))]
for
i
in
range
(
G1
.
number_of_nodes
())];
selected_insertion_G2
=
[
G2_edge_insertion_cost
[
np
.
concatenate
((
self_edge_list_G2
[
i
],
incoming_edges_G2
[
i
],
outgoing_edges_G2
[
i
]))]
for
i
in
range
(
G2
.
number_of_nodes
())];
self_edge_list_G2
[
i
]
=
sorted
(
G2
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
()
)
incoming_edges_G2
[
i
]
=
G2
.
in_edges
([
i
],
"eid"
).
numpy
()
incoming_edges_G2
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G2
[
i
],
self_edge_list_G2
[
i
]
)
outgoing_edges_G2
[
i
]
=
G2
.
out_edges
([
i
],
"eid"
).
numpy
()
outgoing_edges_G2
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G2
[
i
],
self_edge_list_G2
[
i
]
)
selected_deletion_G1
=
[
G1_edge_deletion_cost
[
np
.
concatenate
(
(
self_edge_list_G1
[
i
],
incoming_edges_G1
[
i
],
outgoing_edges_G1
[
i
],
)
)
]
for
i
in
range
(
G1
.
number_of_nodes
())
]
selected_insertion_G2
=
[
G2_edge_insertion_cost
[
np
.
concatenate
(
(
self_edge_list_G2
[
i
],
incoming_edges_G2
[
i
],
outgoing_edges_G2
[
i
],
)
)
]
for
i
in
range
(
G2
.
number_of_nodes
())
]
# Add the cost of edge edition which are dependent of a node (see this as the cost associated with a substructure)
for
i
in
range
(
num_G1_nodes
):
for
j
in
range
(
num_G2_nodes
):
m
=
len
(
self_edge_list_G1
[
i
])
+
len
(
incoming_edges_G1
[
i
])
+
len
(
outgoing_edges_G1
[
i
]);
n
=
len
(
self_edge_list_G2
[
j
])
+
len
(
incoming_edges_G2
[
j
])
+
len
(
outgoing_edges_G2
[
j
]);
matrix_dim
=
m
+
n
;
m
=
(
len
(
self_edge_list_G1
[
i
])
+
len
(
incoming_edges_G1
[
i
])
+
len
(
outgoing_edges_G1
[
i
])
)
n
=
(
len
(
self_edge_list_G2
[
j
])
+
len
(
incoming_edges_G2
[
j
])
+
len
(
outgoing_edges_G2
[
j
])
)
matrix_dim
=
m
+
n
if
matrix_dim
==
0
:
continue
;
temp_edge_cost_matrix
=
np
.
empty
((
matrix_dim
,
matrix_dim
));
temp_edge_cost_matrix
.
fill
(
cost_upper_bound
);
temp_edge_cost_matrix
[:
len
(
self_edge_list_G1
[
i
]),:
len
(
self_edge_list_G2
[
j
])]
=
edge_substitution_cost
[
self_edge_list_G1
[
i
],:][:,
self_edge_list_G2
[
j
]];
temp_edge_cost_matrix
[
len
(
self_edge_list_G1
[
i
]):
len
(
self_edge_list_G1
[
i
])
+
len
(
incoming_edges_G1
[
i
]),
len
(
self_edge_list_G2
[
j
]):
len
(
self_edge_list_G2
[
j
])
+
len
(
incoming_edges_G2
[
j
])]
=
edge_substitution_cost
[
incoming_edges_G1
[
i
],:][:,
incoming_edges_G2
[
j
]];
temp_edge_cost_matrix
[
len
(
self_edge_list_G1
[
i
])
+
len
(
incoming_edges_G1
[
i
]):
m
,
len
(
self_edge_list_G2
[
j
])
+
len
(
incoming_edges_G2
[
j
]):
n
]
=
edge_substitution_cost
[
outgoing_edges_G1
[
i
],:][:,
outgoing_edges_G2
[
j
]];
np
.
fill_diagonal
(
temp_edge_cost_matrix
[:
m
,
n
:],
selected_deletion_G1
[
i
]);
np
.
fill_diagonal
(
temp_edge_cost_matrix
[
m
:,
:
n
],
selected_insertion_G2
[
j
]);
temp_edge_cost_matrix
[
m
:,
n
:].
fill
(
0
);
row_ind
,
col_ind
,
_
=
lapjv
(
temp_edge_cost_matrix
);
continue
temp_edge_cost_matrix
=
np
.
empty
((
matrix_dim
,
matrix_dim
))
temp_edge_cost_matrix
.
fill
(
cost_upper_bound
)
temp_edge_cost_matrix
[
:
len
(
self_edge_list_G1
[
i
]),
:
len
(
self_edge_list_G2
[
j
])
]
=
edge_substitution_cost
[
self_edge_list_G1
[
i
],
:][
:,
self_edge_list_G2
[
j
]
]
temp_edge_cost_matrix
[
len
(
self_edge_list_G1
[
i
])
:
len
(
self_edge_list_G1
[
i
])
+
len
(
incoming_edges_G1
[
i
]),
len
(
self_edge_list_G2
[
j
])
:
len
(
self_edge_list_G2
[
j
])
+
len
(
incoming_edges_G2
[
j
]),
]
=
edge_substitution_cost
[
incoming_edges_G1
[
i
],
:][
:,
incoming_edges_G2
[
j
]
]
temp_edge_cost_matrix
[
len
(
self_edge_list_G1
[
i
])
+
len
(
incoming_edges_G1
[
i
])
:
m
,
len
(
self_edge_list_G2
[
j
])
+
len
(
incoming_edges_G2
[
j
])
:
n
,
]
=
edge_substitution_cost
[
outgoing_edges_G1
[
i
],
:][
:,
outgoing_edges_G2
[
j
]
]
np
.
fill_diagonal
(
temp_edge_cost_matrix
[:
m
,
n
:],
selected_deletion_G1
[
i
]
)
np
.
fill_diagonal
(
temp_edge_cost_matrix
[
m
:,
:
n
],
selected_insertion_G2
[
j
]
)
temp_edge_cost_matrix
[
m
:,
n
:].
fill
(
0
)
row_ind
,
col_ind
,
_
=
lapjv
(
temp_edge_cost_matrix
)
lap_cost
=
0.00
for
k
in
range
(
len
(
row_ind
)):
lap_cost
+=
temp_edge_cost_matrix
[
k
,
row_ind
[
k
]]
;
lap_cost
+=
temp_edge_cost_matrix
[
k
,
row_ind
[
k
]]
cost_matrix
[
i
,
j
]
+=
lap_cost
;
cost_matrix
[
i
,
j
]
+=
lap_cost
for
i
in
range
(
num_G1_nodes
):
cost_matrix
[
i
,
num_G2_nodes
+
i
]
+=
selected_deletion_G1
[
i
].
sum
()
cost_matrix
[
i
,
num_G2_nodes
+
i
]
+=
selected_deletion_G1
[
i
].
sum
()
for
i
in
range
(
num_G2_nodes
):
cost_matrix
[
num_G1_nodes
+
i
,
i
]
+=
selected_insertion_G2
[
i
].
sum
()
cost_matrix
[
num_G1_nodes
+
i
,
i
]
+=
selected_insertion_G2
[
i
].
sum
()
return
cost_matrix
;
return
cost_matrix
def
hausdorff_matching
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
):
def
hausdorff_matching
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
):
# Calculates approximate GED using hausdorff_matching
# cost matrix of node mappings
...
...
@@ -490,44 +780,104 @@ def hausdorff_matching(G1, G2,
num_G1_edges
=
G1
.
number_of_edges
()
num_G2_edges
=
G2
.
number_of_edges
()
self_edge_list_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
;
self_edge_list_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
;
incoming_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
;
incoming_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
;
outgoing_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
;
outgoing_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
;
self_edge_list_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
self_edge_list_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
incoming_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
incoming_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
outgoing_edges_G1
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G1_nodes
outgoing_edges_G2
=
[
np
.
array
([],
dtype
=
int
)]
*
num_G2_nodes
for
i
in
range
(
num_G1_nodes
):
if
G1
.
has_edge_between
(
i
,
i
):
self_edge_list_G1
[
i
]
=
sorted
(
G1
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
());
incoming_edges_G1
[
i
]
=
G1
.
in_edges
([
i
],
'eid'
).
numpy
();
incoming_edges_G1
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G1
[
i
],
self_edge_list_G1
[
i
]);
outgoing_edges_G1
[
i
]
=
G1
.
out_edges
([
i
],
'eid'
).
numpy
();
outgoing_edges_G1
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G1
[
i
],
self_edge_list_G1
[
i
]);
self_edge_list_G1
[
i
]
=
sorted
(
G1
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
()
)
incoming_edges_G1
[
i
]
=
G1
.
in_edges
([
i
],
"eid"
).
numpy
()
incoming_edges_G1
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G1
[
i
],
self_edge_list_G1
[
i
]
)
outgoing_edges_G1
[
i
]
=
G1
.
out_edges
([
i
],
"eid"
).
numpy
()
outgoing_edges_G1
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G1
[
i
],
self_edge_list_G1
[
i
]
)
for
i
in
range
(
num_G2_nodes
):
if
G2
.
has_edge_between
(
i
,
i
):
self_edge_list_G2
[
i
]
=
sorted
(
G2
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
());
incoming_edges_G2
[
i
]
=
G2
.
in_edges
([
i
],
'eid'
).
numpy
();
incoming_edges_G2
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G2
[
i
],
self_edge_list_G2
[
i
]);
outgoing_edges_G2
[
i
]
=
G2
.
out_edges
([
i
],
'eid'
).
numpy
();
outgoing_edges_G2
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G2
[
i
],
self_edge_list_G2
[
i
]);
selected_deletion_self_G1
=
[
G1_edge_deletion_cost
[
self_edge_list_G1
[
i
]]
for
i
in
range
(
G1
.
number_of_nodes
())];
selected_insertion_self_G2
=
[
G2_edge_insertion_cost
[
self_edge_list_G2
[
i
]]
for
i
in
range
(
G2
.
number_of_nodes
())];
selected_deletion_incoming_G1
=
[
G1_edge_deletion_cost
[
incoming_edges_G1
[
i
]]
for
i
in
range
(
G1
.
number_of_nodes
())];
selected_insertion_incoming_G2
=
[
G2_edge_insertion_cost
[
incoming_edges_G2
[
i
]]
for
i
in
range
(
G2
.
number_of_nodes
())];
selected_deletion_outgoing_G1
=
[
G1_edge_deletion_cost
[
outgoing_edges_G1
[
i
]]
for
i
in
range
(
G1
.
number_of_nodes
())];
selected_insertion_outgoing_G2
=
[
G2_edge_insertion_cost
[
outgoing_edges_G2
[
i
]]
for
i
in
range
(
G2
.
number_of_nodes
())];
selected_deletion_G1
=
[
G1_edge_deletion_cost
[
np
.
concatenate
((
self_edge_list_G1
[
i
],
incoming_edges_G1
[
i
],
outgoing_edges_G1
[
i
]))]
for
i
in
range
(
G1
.
number_of_nodes
())];
selected_insertion_G2
=
[
G2_edge_insertion_cost
[
np
.
concatenate
((
self_edge_list_G2
[
i
],
incoming_edges_G2
[
i
],
outgoing_edges_G2
[
i
]))]
for
i
in
range
(
G2
.
number_of_nodes
())];
cost_G1
=
np
.
array
([(
G1_node_deletion_cost
[
i
]
+
selected_deletion_G1
[
i
].
sum
()
/
2
)
for
i
in
range
(
num_G1_nodes
)])
cost_G2
=
np
.
array
([(
G2_node_insertion_cost
[
i
]
+
selected_insertion_G2
[
i
].
sum
()
/
2
)
for
i
in
range
(
num_G2_nodes
)])
self_edge_list_G2
[
i
]
=
sorted
(
G2
.
edge_id
(
i
,
i
,
return_array
=
True
).
numpy
()
)
incoming_edges_G2
[
i
]
=
G2
.
in_edges
([
i
],
"eid"
).
numpy
()
incoming_edges_G2
[
i
]
=
np
.
setdiff1d
(
incoming_edges_G2
[
i
],
self_edge_list_G2
[
i
]
)
outgoing_edges_G2
[
i
]
=
G2
.
out_edges
([
i
],
"eid"
).
numpy
()
outgoing_edges_G2
[
i
]
=
np
.
setdiff1d
(
outgoing_edges_G2
[
i
],
self_edge_list_G2
[
i
]
)
selected_deletion_self_G1
=
[
G1_edge_deletion_cost
[
self_edge_list_G1
[
i
]]
for
i
in
range
(
G1
.
number_of_nodes
())
]
selected_insertion_self_G2
=
[
G2_edge_insertion_cost
[
self_edge_list_G2
[
i
]]
for
i
in
range
(
G2
.
number_of_nodes
())
]
selected_deletion_incoming_G1
=
[
G1_edge_deletion_cost
[
incoming_edges_G1
[
i
]]
for
i
in
range
(
G1
.
number_of_nodes
())
]
selected_insertion_incoming_G2
=
[
G2_edge_insertion_cost
[
incoming_edges_G2
[
i
]]
for
i
in
range
(
G2
.
number_of_nodes
())
]
selected_deletion_outgoing_G1
=
[
G1_edge_deletion_cost
[
outgoing_edges_G1
[
i
]]
for
i
in
range
(
G1
.
number_of_nodes
())
]
selected_insertion_outgoing_G2
=
[
G2_edge_insertion_cost
[
outgoing_edges_G2
[
i
]]
for
i
in
range
(
G2
.
number_of_nodes
())
]
selected_deletion_G1
=
[
G1_edge_deletion_cost
[
np
.
concatenate
(
(
self_edge_list_G1
[
i
],
incoming_edges_G1
[
i
],
outgoing_edges_G1
[
i
],
)
)
]
for
i
in
range
(
G1
.
number_of_nodes
())
]
selected_insertion_G2
=
[
G2_edge_insertion_cost
[
np
.
concatenate
(
(
self_edge_list_G2
[
i
],
incoming_edges_G2
[
i
],
outgoing_edges_G2
[
i
],
)
)
]
for
i
in
range
(
G2
.
number_of_nodes
())
]
cost_G1
=
np
.
array
(
[
(
G1_node_deletion_cost
[
i
]
+
selected_deletion_G1
[
i
].
sum
()
/
2
)
for
i
in
range
(
num_G1_nodes
)
]
)
cost_G2
=
np
.
array
(
[
(
G2_node_insertion_cost
[
i
]
+
selected_insertion_G2
[
i
].
sum
()
/
2
)
for
i
in
range
(
num_G2_nodes
)
]
)
for
i
in
range
(
num_G1_nodes
):
for
j
in
range
(
num_G2_nodes
):
...
...
@@ -538,140 +888,274 @@ def hausdorff_matching(G1, G2,
c1_outgoing
=
deepcopy
(
selected_deletion_outgoing_G1
[
i
])
c2_outgoing
=
deepcopy
(
selected_insertion_outgoing_G2
[
j
])
for
k
,
a
in
enumerate
(
self_edge_list_G1
[
i
]):
for
l
,
b
in
enumerate
(
self_edge_list_G2
[
j
]):
c1_self
[
k
]
=
min
(
c1_self
[
k
],
edge_substitution_cost
[
a
,
b
]
/
2
);
c2_self
[
l
]
=
min
(
c2_self
[
l
],
edge_substitution_cost
[
a
,
b
]
/
2
);
for
k
,
a
in
enumerate
(
incoming_edges_G1
[
i
]):
for
l
,
b
in
enumerate
(
incoming_edges_G2
[
j
]):
c1_incoming
[
k
]
=
min
(
c1_incoming
[
k
],
edge_substitution_cost
[
a
,
b
]
/
2
);
c2_incoming
[
l
]
=
min
(
c2_incoming
[
l
],
edge_substitution_cost
[
a
,
b
]
/
2
);
for
k
,
a
in
enumerate
(
outgoing_edges_G1
[
i
]):
for
l
,
b
in
enumerate
(
outgoing_edges_G2
[
j
]):
c1_outgoing
[
k
]
=
min
(
c1_outgoing
[
k
],
edge_substitution_cost
[
a
,
b
]
/
2
);
c2_outgoing
[
l
]
=
min
(
c2_outgoing
[
l
],
edge_substitution_cost
[
a
,
b
]
/
2
);
edge_hausdorff_lower_bound
=
0.0
;
if
len
(
selected_deletion_G1
[
i
])
>
len
(
selected_insertion_G2
[
j
]):
idx
=
np
.
argpartition
(
selected_deletion_G1
[
i
],
(
len
(
selected_deletion_G1
[
i
])
-
len
(
selected_insertion_G2
[
j
])));
edge_hausdorff_lower_bound
=
selected_deletion_G1
[
i
][
idx
[:(
len
(
selected_deletion_G1
[
i
])
-
len
(
selected_insertion_G2
[
j
]))]].
sum
();
elif
len
(
selected_deletion_G1
[
i
])
<
len
(
selected_insertion_G2
[
j
]):
idx
=
np
.
argpartition
(
selected_insertion_G2
[
j
],
(
len
(
selected_insertion_G2
[
j
])
-
len
(
selected_deletion_G1
[
i
])));
edge_hausdorff_lower_bound
=
selected_insertion_G2
[
j
][
idx
[:(
len
(
selected_insertion_G2
[
j
])
-
len
(
selected_deletion_G1
[
i
]))]].
sum
();
sc_cost
=
0.5
*
(
node_substitution_cost
[
i
,
j
]
+
0.5
*
max
(
c1_self
.
sum
()
+
c2_self
.
sum
()
+
\
c1_incoming
.
sum
()
+
c2_incoming
.
sum
()
+
\
c1_outgoing
.
sum
()
+
c2_outgoing
.
sum
(),
\
edge_hausdorff_lower_bound
));
for
k
,
a
in
enumerate
(
self_edge_list_G1
[
i
]):
for
l
,
b
in
enumerate
(
self_edge_list_G2
[
j
]):
c1_self
[
k
]
=
min
(
c1_self
[
k
],
edge_substitution_cost
[
a
,
b
]
/
2
)
c2_self
[
l
]
=
min
(
c2_self
[
l
],
edge_substitution_cost
[
a
,
b
]
/
2
)
for
k
,
a
in
enumerate
(
incoming_edges_G1
[
i
]):
for
l
,
b
in
enumerate
(
incoming_edges_G2
[
j
]):
c1_incoming
[
k
]
=
min
(
c1_incoming
[
k
],
edge_substitution_cost
[
a
,
b
]
/
2
)
c2_incoming
[
l
]
=
min
(
c2_incoming
[
l
],
edge_substitution_cost
[
a
,
b
]
/
2
)
for
k
,
a
in
enumerate
(
outgoing_edges_G1
[
i
]):
for
l
,
b
in
enumerate
(
outgoing_edges_G2
[
j
]):
c1_outgoing
[
k
]
=
min
(
c1_outgoing
[
k
],
edge_substitution_cost
[
a
,
b
]
/
2
)
c2_outgoing
[
l
]
=
min
(
c2_outgoing
[
l
],
edge_substitution_cost
[
a
,
b
]
/
2
)
edge_hausdorff_lower_bound
=
0.0
if
len
(
selected_deletion_G1
[
i
])
>
len
(
selected_insertion_G2
[
j
]):
idx
=
np
.
argpartition
(
selected_deletion_G1
[
i
],
(
len
(
selected_deletion_G1
[
i
])
-
len
(
selected_insertion_G2
[
j
])
),
)
edge_hausdorff_lower_bound
=
selected_deletion_G1
[
i
][
idx
[
:
(
len
(
selected_deletion_G1
[
i
])
-
len
(
selected_insertion_G2
[
j
])
)
]
].
sum
()
elif
len
(
selected_deletion_G1
[
i
])
<
len
(
selected_insertion_G2
[
j
]):
idx
=
np
.
argpartition
(
selected_insertion_G2
[
j
],
(
len
(
selected_insertion_G2
[
j
])
-
len
(
selected_deletion_G1
[
i
])
),
)
edge_hausdorff_lower_bound
=
selected_insertion_G2
[
j
][
idx
[
:
(
len
(
selected_insertion_G2
[
j
])
-
len
(
selected_deletion_G1
[
i
])
)
]
].
sum
()
sc_cost
=
0.5
*
(
node_substitution_cost
[
i
,
j
]
+
0.5
*
max
(
c1_self
.
sum
()
+
c2_self
.
sum
()
+
c1_incoming
.
sum
()
+
c2_incoming
.
sum
()
+
c1_outgoing
.
sum
()
+
c2_outgoing
.
sum
(),
edge_hausdorff_lower_bound
,
)
)
if
cost_G1
[
i
]
>
sc_cost
:
cost_G1
[
i
]
=
sc_cost
;
cost_G1
[
i
]
=
sc_cost
if
cost_G2
[
j
]
>
sc_cost
:
cost_G2
[
j
]
=
sc_cost
;
cost_G2
[
j
]
=
sc_cost
graph_hausdorff_lower_bound
=
0.0
;
graph_hausdorff_lower_bound
=
0.0
if
num_G1_nodes
>
num_G2_nodes
:
idx
=
np
.
argpartition
(
G1_node_deletion_cost
,
(
num_G1_nodes
-
num_G2_nodes
));
graph_hausdorff_lower_bound
=
G1_node_deletion_cost
[
idx
[:(
num_G1_nodes
-
num_G2_nodes
)]].
sum
();
idx
=
np
.
argpartition
(
G1_node_deletion_cost
,
(
num_G1_nodes
-
num_G2_nodes
)
)
graph_hausdorff_lower_bound
=
G1_node_deletion_cost
[
idx
[:
(
num_G1_nodes
-
num_G2_nodes
)]
].
sum
()
elif
num_G1_nodes
<
num_G2_nodes
:
idx
=
np
.
argpartition
(
G2_node_insertion_cost
,
(
num_G2_nodes
-
num_G1_nodes
));
graph_hausdorff_lower_bound
=
G2_node_insertion_cost
[
idx
[:(
num_G2_nodes
-
num_G1_nodes
)]].
sum
();
idx
=
np
.
argpartition
(
G2_node_insertion_cost
,
(
num_G2_nodes
-
num_G1_nodes
)
)
graph_hausdorff_lower_bound
=
G2_node_insertion_cost
[
idx
[:
(
num_G2_nodes
-
num_G1_nodes
)]
].
sum
()
graph_hausdorff_cost
=
max
(
graph_hausdorff_lower_bound
,
cost_G1
.
sum
()
+
cost_G2
.
sum
());
return
graph_hausdorff_cost
;
graph_hausdorff_cost
=
max
(
graph_hausdorff_lower_bound
,
cost_G1
.
sum
()
+
cost_G2
.
sum
()
)
return
graph_hausdorff_cost
def
a_star_search
(
G1
,
G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
max_beam_size
):
# A-star traversal
open_list
=
[]
;
open_list
=
[]
# Create first nodes in the A-star search tree, matching node 0 of G1 with all possibilities (each node of G2, and deletion)
matched_cost
=
0.0
;
matched_nodes
=
([],
[]);
# No nodes matched in the beginning
matched_edges
=
([],
[]);
# No edges matched in the beginning
unprocessed_nodes_G1
=
[
i
for
i
in
range
(
G1
.
number_of_nodes
())]
# No nodes matched in the beginning
unprocessed_nodes_G2
=
[
i
for
i
in
range
(
G2
.
number_of_nodes
())]
# No nodes matched in the beginning
unprocessed_edges_G1
=
[
i
for
i
in
range
(
G1
.
number_of_edges
())]
# No edges matched in the beginning
unprocessed_edges_G2
=
[
i
for
i
in
range
(
G2
.
number_of_edges
())]
# No edges matched in the beginning
matched_cost
=
0.0
matched_nodes
=
([],
[])
# No nodes matched in the beginning
matched_edges
=
([],
[])
# No edges matched in the beginning
unprocessed_nodes_G1
=
[
i
for
i
in
range
(
G1
.
number_of_nodes
())
]
# No nodes matched in the beginning
unprocessed_nodes_G2
=
[
i
for
i
in
range
(
G2
.
number_of_nodes
())
]
# No nodes matched in the beginning
unprocessed_edges_G1
=
[
i
for
i
in
range
(
G1
.
number_of_edges
())
]
# No edges matched in the beginning
unprocessed_edges_G2
=
[
i
for
i
in
range
(
G2
.
number_of_edges
())
]
# No edges matched in the beginning
for
i
in
range
(
len
(
unprocessed_nodes_G2
)):
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
unprocessed_nodes_G2
[
i
],
\
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
);
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
unprocessed_nodes_G2
[
i
],
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
)
# Insert into open-list, implemented as a heap
heappush
(
open_list
,
tree_node
)
# Consider node deletion
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
None
,
\
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
);
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
None
,
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
)
# Insert into open-list, implemented as a heap
heappush
(
open_list
,
tree_node
)
while
len
(
open_list
)
>
0
:
# TODO: Create a node that processes multi node insertion deletion in one search node,
# as opposed in multiple search nodes here
parent_tree_node
=
heappop
(
open_list
)
;
matched_cost
=
parent_tree_node
.
matched_cost
;
matched_nodes
=
parent_tree_node
.
matched_nodes
;
matched_edges
=
parent_tree_node
.
matched_edges
;
unprocessed_nodes_G1
=
parent_tree_node
.
unprocessed_nodes_G1
;
unprocessed_nodes_G2
=
parent_tree_node
.
unprocessed_nodes_G2
;
unprocessed_edges_G1
=
parent_tree_node
.
unprocessed_edges_G1
;
unprocessed_edges_G2
=
parent_tree_node
.
unprocessed_edges_G2
;
parent_tree_node
=
heappop
(
open_list
)
matched_cost
=
parent_tree_node
.
matched_cost
matched_nodes
=
parent_tree_node
.
matched_nodes
matched_edges
=
parent_tree_node
.
matched_edges
unprocessed_nodes_G1
=
parent_tree_node
.
unprocessed_nodes_G1
unprocessed_nodes_G2
=
parent_tree_node
.
unprocessed_nodes_G2
unprocessed_edges_G1
=
parent_tree_node
.
unprocessed_edges_G1
unprocessed_edges_G2
=
parent_tree_node
.
unprocessed_edges_G2
if
len
(
unprocessed_nodes_G1
)
==
0
and
len
(
unprocessed_nodes_G2
)
==
0
:
return
(
matched_cost
,
matched_nodes
,
matched_edges
)
;
return
(
matched_cost
,
matched_nodes
,
matched_edges
)
elif
len
(
unprocessed_nodes_G1
)
>
0
:
for
i
in
range
(
len
(
unprocessed_nodes_G2
)):
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
unprocessed_nodes_G2
[
i
],
\
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
);
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
unprocessed_nodes_G2
[
i
],
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
)
# Insert into open-list, implemented as a heap
heappush
(
open_list
,
tree_node
)
# Consider node deletion
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
None
,
\
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
);
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
unprocessed_nodes_G1
[
0
],
None
,
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
)
# Insert into open-list, implemented as a heap
heappush
(
open_list
,
tree_node
)
elif
len
(
unprocessed_nodes_G2
)
>
0
:
for
i
in
range
(
len
(
unprocessed_nodes_G2
)):
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
None
,
unprocessed_nodes_G2
[
i
],
\
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
);
tree_node
=
search_tree_node
(
G1
,
G2
,
matched_cost
,
matched_nodes
,
matched_edges
,
None
,
unprocessed_nodes_G2
[
i
],
unprocessed_nodes_G1
,
unprocessed_nodes_G2
,
unprocessed_edges_G1
,
unprocessed_edges_G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
)
# Insert into open-list, implemented as a heap
heappush
(
open_list
,
tree_node
)
# Retain the top-k elements in open-list iff algorithm is beam
if
max_beam_size
>
0
and
len
(
open_list
)
>
max_beam_size
:
open_list
=
nsmallest
(
max_beam_size
,
open_list
);
heapify
(
open_list
);
open_list
=
nsmallest
(
max_beam_size
,
open_list
)
heapify
(
open_list
)
return
None
return
None
;
def
get_sorted_mapping
(
mapping_tuple
,
len1
,
len2
):
# Get sorted mapping of nodes/edges
result_0
=
[
None
]
*
len1
;
result_1
=
[
None
]
*
len2
;
result_0
=
[
None
]
*
len1
result_1
=
[
None
]
*
len2
for
i
in
range
(
len
(
mapping_tuple
[
0
])):
if
mapping_tuple
[
0
][
i
]
is
not
None
and
mapping_tuple
[
1
][
i
]
is
not
None
:
result_0
[
mapping_tuple
[
0
][
i
]]
=
mapping_tuple
[
1
][
i
];
result_1
[
mapping_tuple
[
1
][
i
]]
=
mapping_tuple
[
0
][
i
];
return
(
result_0
,
result_1
);
def
graph_edit_distance
(
G1
,
G2
,
node_substitution_cost
=
None
,
edge_substitution_cost
=
None
,
G1_node_deletion_cost
=
None
,
G2_node_insertion_cost
=
None
,
G1_edge_deletion_cost
=
None
,
G2_edge_insertion_cost
=
None
,
algorithm
=
'bipartite'
,
max_beam_size
=
100
):
result_0
[
mapping_tuple
[
0
][
i
]]
=
mapping_tuple
[
1
][
i
]
result_1
[
mapping_tuple
[
1
][
i
]]
=
mapping_tuple
[
0
][
i
]
return
(
result_0
,
result_1
)
def
graph_edit_distance
(
G1
,
G2
,
node_substitution_cost
=
None
,
edge_substitution_cost
=
None
,
G1_node_deletion_cost
=
None
,
G2_node_insertion_cost
=
None
,
G1_edge_deletion_cost
=
None
,
G2_edge_insertion_cost
=
None
,
algorithm
=
"bipartite"
,
max_beam_size
=
100
,
):
"""Returns GED (graph edit distance) between DGLGraphs G1 and G2.
...
...
@@ -752,52 +1236,99 @@ def graph_edit_distance(G1, G2,
"""
# Handle corner cases
if
G1
is
None
and
G2
is
None
:
return
(
0.0
,
([],
[]),
([],
[]))
;
return
(
0.0
,
([],
[]),
([],
[]))
elif
G1
is
None
:
edit_cost
=
0.0
;
edit_cost
=
0.0
# Validate
if
algorithm
!=
"beam"
:
max_beam_size
=
-
1
;
node_substitution_cost
,
edge_substitution_cost
,
\
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
\
G2_node_insertion_cost
,
G2_edge_insertion_cost
=
validate_cost_functions
(
G1
,
G2
,
\
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
);
max_beam_size
=
-
1
(
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
)
=
validate_cost_functions
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
)
# cost matrices for LAP solution
cost_matrix_nodes
,
cost_matrix_edges
=
construct_cost_functions
(
G1
,
G2
,
\
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
);
cost_matrix_nodes
,
cost_matrix_edges
=
construct_cost_functions
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
)
if
algorithm
==
"astar"
or
algorithm
==
"beam"
:
(
matched_cost
,
matched_nodes
,
matched_edges
)
=
a_star_search
(
G1
,
G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
,
max_beam_size
);
return
(
matched_cost
,
get_sorted_mapping
(
matched_nodes
,
G1
.
number_of_nodes
(),
G2
.
number_of_nodes
()),
get_sorted_mapping
(
matched_edges
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
()));
(
matched_cost
,
matched_nodes
,
matched_edges
)
=
a_star_search
(
G1
,
G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
max_beam_size
)
return
(
matched_cost
,
get_sorted_mapping
(
matched_nodes
,
G1
.
number_of_nodes
(),
G2
.
number_of_nodes
()
),
get_sorted_mapping
(
matched_edges
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
()
),
)
elif
algorithm
==
"hausdorff"
:
hausdorff_cost
=
hausdorff_matching
(
G1
,
G2
,
\
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
);
return
(
hausdorff_cost
,
None
,
None
);
hausdorff_cost
=
hausdorff_matching
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
)
return
(
hausdorff_cost
,
None
,
None
)
else
:
cost_matrix
=
contextual_cost_matrix_construction
(
G1
,
G2
,
\
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
);
cost_matrix
=
contextual_cost_matrix_construction
(
G1
,
G2
,
node_substitution_cost
,
edge_substitution_cost
,
G1_node_deletion_cost
,
G1_edge_deletion_cost
,
G2_node_insertion_cost
,
G2_edge_insertion_cost
,
)
# Match the nodes as per the LAP solution
row_ind
,
col_ind
,
_
=
lapjv
(
cost_matrix
);
(
matched_cost
,
matched_nodes
,
matched_edges
)
=
edit_cost_from_node_matching
(
G1
,
G2
,
\
cost_matrix_nodes
,
cost_matrix_edges
,
row_ind
);
return
(
matched_cost
,
get_sorted_mapping
(
matched_nodes
,
G1
.
number_of_nodes
(),
G2
.
number_of_nodes
()),
get_sorted_mapping
(
matched_edges
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
()));
row_ind
,
col_ind
,
_
=
lapjv
(
cost_matrix
)
(
matched_cost
,
matched_nodes
,
matched_edges
,
)
=
edit_cost_from_node_matching
(
G1
,
G2
,
cost_matrix_nodes
,
cost_matrix_edges
,
row_ind
)
return
(
matched_cost
,
get_sorted_mapping
(
matched_nodes
,
G1
.
number_of_nodes
(),
G2
.
number_of_nodes
()
),
get_sorted_mapping
(
matched_edges
,
G1
.
number_of_edges
(),
G2
.
number_of_edges
()
),
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment