"vscode:/vscode.git/clone" did not exist on "d89f825d8cd70832f41e190837e532787ff795f4"
Unverified Commit f118ea95 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files
parent c59000ac
...@@ -2,16 +2,24 @@ import dgl ...@@ -2,16 +2,24 @@ import dgl
import numpy as np import numpy as np
from heapq import heappush, heappop, heapify, nsmallest from heapq import heappush, heappop, heapify, nsmallest
from copy import deepcopy from copy import deepcopy
# We use lapjv implementation (https://github.com/src-d/lapjv) to solve assignment problem, because of its scalability # We use lapjv implementation (https://github.com/src-d/lapjv) to solve assignment problem, because of its scalability
# Also see https://github.com/berhane/LAP-solvers for benchmarking of LAP solvers # Also see https://github.com/berhane/LAP-solvers for benchmarking of LAP solvers
from lapjv import lapjv from lapjv import lapjv
EPSILON = 0.0000001; EPSILON = 0.0000001
def validate_cost_functions(G1, G2, def validate_cost_functions(
node_substitution_cost=None, edge_substitution_cost=None, G1,
G1_node_deletion_cost=None, G1_edge_deletion_cost=None, G2,
G2_node_insertion_cost=None, G2_edge_insertion_cost=None): node_substitution_cost=None,
edge_substitution_cost=None,
G1_node_deletion_cost=None,
G1_edge_deletion_cost=None,
G2_node_insertion_cost=None,
G2_edge_insertion_cost=None,
):
"""Validates cost functions (substitution, insertion, deletion) and initializes them with default=0 for substitution """Validates cost functions (substitution, insertion, deletion) and initializes them with default=0 for substitution
and default=1 for insertion/deletion and default=1 for insertion/deletion
if the provided ones are None. if the provided ones are None.
...@@ -28,43 +36,59 @@ def validate_cost_functions(G1, G2, ...@@ -28,43 +36,59 @@ def validate_cost_functions(G1, G2,
# if any cost matrix is None, initialize it with default costs # if any cost matrix is None, initialize it with default costs
if node_substitution_cost is None: if node_substitution_cost is None:
node_substitution_cost = np.zeros((num_G1_nodes, num_G2_nodes), dtype=float) node_substitution_cost = np.zeros(
(num_G1_nodes, num_G2_nodes), dtype=float
)
else: else:
assert node_substitution_cost.shape == (num_G1_nodes, num_G2_nodes); assert node_substitution_cost.shape == (num_G1_nodes, num_G2_nodes)
if edge_substitution_cost is None: if edge_substitution_cost is None:
edge_substitution_cost = np.zeros((num_G1_edges, num_G2_edges), dtype=float) edge_substitution_cost = np.zeros(
(num_G1_edges, num_G2_edges), dtype=float
)
else: else:
assert edge_substitution_cost.shape == (num_G1_edges, num_G2_edges); assert edge_substitution_cost.shape == (num_G1_edges, num_G2_edges)
if G1_node_deletion_cost is None: if G1_node_deletion_cost is None:
G1_node_deletion_cost = np.ones(num_G1_nodes, dtype=float) G1_node_deletion_cost = np.ones(num_G1_nodes, dtype=float)
else: else:
assert G1_node_deletion_cost.shape[0] == num_G1_nodes; assert G1_node_deletion_cost.shape[0] == num_G1_nodes
if G1_edge_deletion_cost is None: if G1_edge_deletion_cost is None:
G1_edge_deletion_cost = np.ones(num_G1_edges, dtype=float) G1_edge_deletion_cost = np.ones(num_G1_edges, dtype=float)
else: else:
assert G1_edge_deletion_cost.shape[0] == num_G1_edges; assert G1_edge_deletion_cost.shape[0] == num_G1_edges
if G2_node_insertion_cost is None: if G2_node_insertion_cost is None:
G2_node_insertion_cost = np.ones(num_G2_nodes, dtype=float) G2_node_insertion_cost = np.ones(num_G2_nodes, dtype=float)
else: else:
assert G2_node_insertion_cost.shape[0] == num_G2_nodes; assert G2_node_insertion_cost.shape[0] == num_G2_nodes
if G2_edge_insertion_cost is None: if G2_edge_insertion_cost is None:
G2_edge_insertion_cost = np.ones(num_G2_edges, dtype=float) G2_edge_insertion_cost = np.ones(num_G2_edges, dtype=float)
else: else:
assert G2_edge_insertion_cost.shape[0] == num_G2_edges; assert G2_edge_insertion_cost.shape[0] == num_G2_edges
return node_substitution_cost, edge_substitution_cost, \ return (
G1_node_deletion_cost, G1_edge_deletion_cost, \ node_substitution_cost,
G2_node_insertion_cost, G2_edge_insertion_cost; edge_substitution_cost,
G1_node_deletion_cost,
def construct_cost_functions(G1, G2, G1_edge_deletion_cost,
node_substitution_cost, edge_substitution_cost, G2_node_insertion_cost,
G1_node_deletion_cost, G1_edge_deletion_cost, G2_edge_insertion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost): )
def construct_cost_functions(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
):
"""Constructs cost matrices for LAP solution """Constructs cost matrices for LAP solution
...@@ -77,318 +101,499 @@ def construct_cost_functions(G1, G2, ...@@ -77,318 +101,499 @@ def construct_cost_functions(G1, G2,
num_G1_edges = G1.number_of_edges() num_G1_edges = G1.number_of_edges()
num_G2_edges = G2.number_of_edges() num_G2_edges = G2.number_of_edges()
# cost matrix of node mappings # cost matrix of node mappings
cost_upper_bound = node_substitution_cost.sum() + G1_node_deletion_cost.sum() + G2_node_insertion_cost.sum() + 1 cost_upper_bound = (
C_node = np.zeros((num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float) node_substitution_cost.sum()
+ G1_node_deletion_cost.sum()
+ G2_node_insertion_cost.sum()
C_node[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost; + 1
C_node[0:num_G1_nodes, num_G2_nodes:num_G2_nodes + num_G1_nodes] = np.array([G1_node_deletion_cost[i] if i == j \ )
else cost_upper_bound\ C_node = np.zeros(
for i in range(num_G1_nodes) \ (num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float
for j in range(num_G1_nodes)]).reshape(num_G1_nodes, num_G1_nodes); )
C_node[num_G1_nodes:num_G1_nodes + num_G2_nodes, 0:num_G2_nodes] = np.array([G2_node_insertion_cost[i] if i == j \
else cost_upper_bound\ C_node[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost
for i in range(num_G2_nodes) \ C_node[
for j in range(num_G2_nodes)]).reshape(num_G2_nodes, num_G2_nodes); 0:num_G1_nodes, num_G2_nodes : num_G2_nodes + num_G1_nodes
] = np.array(
[
G1_node_deletion_cost[i] if i == j else cost_upper_bound
for i in range(num_G1_nodes)
for j in range(num_G1_nodes)
]
).reshape(
num_G1_nodes, num_G1_nodes
)
C_node[
num_G1_nodes : num_G1_nodes + num_G2_nodes, 0:num_G2_nodes
] = np.array(
[
G2_node_insertion_cost[i] if i == j else cost_upper_bound
for i in range(num_G2_nodes)
for j in range(num_G2_nodes)
]
).reshape(
num_G2_nodes, num_G2_nodes
)
# cost matrix of edge mappings # cost matrix of edge mappings
cost_upper_bound = edge_substitution_cost.sum() + G1_edge_deletion_cost.sum() + G2_edge_insertion_cost.sum() + 1 cost_upper_bound = (
C_edge = np.zeros((num_G1_edges + num_G2_edges, num_G1_edges + num_G2_edges), dtype=float) edge_substitution_cost.sum()
+ G1_edge_deletion_cost.sum()
+ G2_edge_insertion_cost.sum()
+ 1
)
C_edge = np.zeros(
(num_G1_edges + num_G2_edges, num_G1_edges + num_G2_edges), dtype=float
)
C_edge[0:num_G1_edges, 0:num_G2_edges] = edge_substitution_cost
C_edge[
0:num_G1_edges, num_G2_edges : num_G2_edges + num_G1_edges
] = np.array(
[
G1_edge_deletion_cost[i] if i == j else cost_upper_bound
for i in range(num_G1_edges)
for j in range(num_G1_edges)
]
).reshape(
num_G1_edges, num_G1_edges
)
C_edge[
num_G1_edges : num_G1_edges + num_G2_edges, 0:num_G2_edges
] = np.array(
[
G2_edge_insertion_cost[i] if i == j else cost_upper_bound
for i in range(num_G2_edges)
for j in range(num_G2_edges)
]
).reshape(
num_G2_edges, num_G2_edges
)
return C_node, C_edge
C_edge[0:num_G1_edges, 0:num_G2_edges] = edge_substitution_cost;
C_edge[0:num_G1_edges, num_G2_edges:num_G2_edges + num_G1_edges] = np.array([G1_edge_deletion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G1_edges) \
for j in range(num_G1_edges)]).reshape(num_G1_edges, num_G1_edges);
C_edge[num_G1_edges:num_G1_edges + num_G2_edges, 0:num_G2_edges] = np.array([G2_edge_insertion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G2_edges) \
for j in range(num_G2_edges)]).reshape(num_G2_edges, num_G2_edges);
return C_node, C_edge;
def get_edges_to_match(G, node_id, matched_nodes): def get_edges_to_match(G, node_id, matched_nodes):
# Find the edges in G with one end-point as node_id and other in matched_nodes or node_id # Find the edges in G with one end-point as node_id and other in matched_nodes or node_id
incident_edges = np.array([], dtype=int) incident_edges = np.array([], dtype=int)
index = np.array([], dtype=int) index = np.array([], dtype=int)
direction = np.array([], dtype=int) direction = np.array([], dtype=int)
if G.has_edge_between(node_id, node_id): if G.has_edge_between(node_id, node_id):
self_edge_ids = G.edge_id(node_id, node_id, return_array=True).numpy(); self_edge_ids = G.edge_id(node_id, node_id, return_array=True).numpy()
incident_edges = np.concatenate((incident_edges, self_edge_ids)); incident_edges = np.concatenate((incident_edges, self_edge_ids))
index = np.concatenate((index, [-1]*len(self_edge_ids))); index = np.concatenate((index, [-1] * len(self_edge_ids)))
direction = np.concatenate((direction, [0]*len(self_edge_ids))); direction = np.concatenate((direction, [0] * len(self_edge_ids)))
# Find predecessors # Find predecessors
src, _, eid = G.in_edges([node_id], 'all'); src, _, eid = G.in_edges([node_id], "all")
eid = eid.numpy(); eid = eid.numpy()
src = src.numpy(); src = src.numpy()
filtered_indices = [(i,matched_nodes.index(src[i])) for i in range(len(src)) if src[i] in matched_nodes]; filtered_indices = [
matched_index = np.array([_[1] for _ in filtered_indices], dtype=int); (i, matched_nodes.index(src[i]))
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int); for i in range(len(src))
index = np.concatenate((index, matched_index)); if src[i] in matched_nodes
incident_edges = np.concatenate((incident_edges, eid[eid_index])); ]
direction = np.concatenate((direction, np.array([-1]*len(filtered_indices), dtype=int))); matched_index = np.array([_[1] for _ in filtered_indices], dtype=int)
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int)
index = np.concatenate((index, matched_index))
incident_edges = np.concatenate((incident_edges, eid[eid_index]))
direction = np.concatenate(
(direction, np.array([-1] * len(filtered_indices), dtype=int))
)
# Find successors # Find successors
_, dst, eid = G.out_edges([node_id], 'all'); _, dst, eid = G.out_edges([node_id], "all")
eid = eid.numpy(); eid = eid.numpy()
dst = dst.numpy(); dst = dst.numpy()
filtered_indices = [(i,matched_nodes.index(dst[i])) for i in range(len(dst)) if dst[i] in matched_nodes] filtered_indices = [
matched_index = np.array([_[1] for _ in filtered_indices], dtype=int); (i, matched_nodes.index(dst[i]))
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int); for i in range(len(dst))
index = np.concatenate((index, matched_index)); if dst[i] in matched_nodes
incident_edges = np.concatenate((incident_edges, eid[eid_index])); ]
direction = np.concatenate((direction, np.array([1]*len(filtered_indices), dtype=int))); matched_index = np.array([_[1] for _ in filtered_indices], dtype=int)
return incident_edges, index, direction; eid_index = np.array([_[0] for _ in filtered_indices], dtype=int)
index = np.concatenate((index, matched_index))
incident_edges = np.concatenate((incident_edges, eid[eid_index]))
direction = np.concatenate(
(direction, np.array([1] * len(filtered_indices), dtype=int))
)
return incident_edges, index, direction
def subset_cost_matrix(cost_matrix, row_ids, col_ids, num_rows, num_cols): def subset_cost_matrix(cost_matrix, row_ids, col_ids, num_rows, num_cols):
# Extract thr subset of cost matrix corresponding to rows/cols in arrays row_ids/col_ids # Extract thr subset of cost matrix corresponding to rows/cols in arrays row_ids/col_ids
# Note that the shape of cost_matrix is (num_rows+num_cols) * (num_rows+num_cols) # Note that the shape of cost_matrix is (num_rows+num_cols) * (num_rows+num_cols)
extended_row_ids = np.concatenate((row_ids, np.array([k + num_rows for k in col_ids]))); extended_row_ids = np.concatenate(
extended_col_ids = np.concatenate((col_ids, np.array([k + num_cols for k in row_ids]))); (row_ids, np.array([k + num_rows for k in col_ids]))
)
extended_col_ids = np.concatenate(
(col_ids, np.array([k + num_cols for k in row_ids]))
)
return cost_matrix[extended_row_ids, :][:, extended_col_ids] return cost_matrix[extended_row_ids, :][:, extended_col_ids]
class search_tree_node:
def __init__(self, G1, G2, parent_matched_cost, parent_matched_nodes, parent_matched_edges, node_G1, node_G2, \
parent_unprocessed_nodes_G1, parent_unprocessed_nodes_G2, parent_unprocessed_edges_G1, parent_unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges):
self.matched_cost = parent_matched_cost;
self.future_approximate_cost = 0.0;
self.matched_nodes = deepcopy(parent_matched_nodes);
self.matched_nodes[0].append(node_G1);
self.matched_nodes[1].append(node_G2);
self.matched_edges = deepcopy(parent_matched_edges);
self.unprocessed_nodes_G1 = [_ for _ in parent_unprocessed_nodes_G1 if _ != node_G1];
self.unprocessed_nodes_G2 = [_ for _ in parent_unprocessed_nodes_G2 if _ != node_G2];
class search_tree_node:
def __init__(
self,
G1,
G2,
parent_matched_cost,
parent_matched_nodes,
parent_matched_edges,
node_G1,
node_G2,
parent_unprocessed_nodes_G1,
parent_unprocessed_nodes_G2,
parent_unprocessed_edges_G1,
parent_unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
):
self.matched_cost = parent_matched_cost
self.future_approximate_cost = 0.0
self.matched_nodes = deepcopy(parent_matched_nodes)
self.matched_nodes[0].append(node_G1)
self.matched_nodes[1].append(node_G2)
self.matched_edges = deepcopy(parent_matched_edges)
self.unprocessed_nodes_G1 = [
_ for _ in parent_unprocessed_nodes_G1 if _ != node_G1
]
self.unprocessed_nodes_G2 = [
_ for _ in parent_unprocessed_nodes_G2 if _ != node_G2
]
# Add the cost of matching nodes at this tree-node to the matched cost # Add the cost of matching nodes at this tree-node to the matched cost
if node_G1 is not None and node_G2 is not None: # Substitute node_G1 with node_G2 if (
self.matched_cost += cost_matrix_nodes[node_G1, node_G2]; node_G1 is not None and node_G2 is not None
): # Substitute node_G1 with node_G2
self.matched_cost += cost_matrix_nodes[node_G1, node_G2]
elif node_G1 is not None: # Delete node_G1 elif node_G1 is not None: # Delete node_G1
self.matched_cost += cost_matrix_nodes[node_G1, node_G1+G2.number_of_nodes()]; self.matched_cost += cost_matrix_nodes[
node_G1, node_G1 + G2.number_of_nodes()
]
elif node_G2 is not None: # Insert node_G2 elif node_G2 is not None: # Insert node_G2
self.matched_cost += cost_matrix_nodes[node_G2+G1.number_of_nodes(), node_G2]; self.matched_cost += cost_matrix_nodes[
node_G2 + G1.number_of_nodes(), node_G2
]
# Add the cost of matching edges at this tree-node to the matched cost # Add the cost of matching edges at this tree-node to the matched cost
incident_edges_G1 = []; incident_edges_G1 = []
if node_G1 is not None: # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1 if (
incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(G1, node_G1, parent_matched_nodes[0]) node_G1 is not None
): # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G2 = np.array([]); incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(
if node_G2 is not None: # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2 G1, node_G1, parent_matched_nodes[0]
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(G2, node_G2, parent_matched_nodes[1]) )
if len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0: # Consider substituting incident_edges_G2 = np.array([])
matched_edges_cost_matrix = subset_cost_matrix(cost_matrix_edges, incident_edges_G1, incident_edges_G2, G1.number_of_edges(), G2.number_of_edges()) if (
max_sum = matched_edges_cost_matrix.sum(); node_G2 is not None
): # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(
G2, node_G2, parent_matched_nodes[1]
)
if (
len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0
): # Consider substituting
matched_edges_cost_matrix = subset_cost_matrix(
cost_matrix_edges,
incident_edges_G1,
incident_edges_G2,
G1.number_of_edges(),
G2.number_of_edges(),
)
max_sum = matched_edges_cost_matrix.sum()
# take care of impossible assignments by assigning maximum cost # take care of impossible assignments by assigning maximum cost
for i in range(len(incident_edges_G1)): for i in range(len(incident_edges_G1)):
for j in range(len(incident_edges_G2)): for j in range(len(incident_edges_G2)):
# both edges need to have same direction and the other end nodes are matched # both edges need to have same direction and the other end nodes are matched
if direction_G1[i] == direction_G2[j] and index_G1[i] == index_G2[j]: if (
continue; direction_G1[i] == direction_G2[j]
and index_G1[i] == index_G2[j]
):
continue
else: else:
matched_edges_cost_matrix[i,j] = max_sum; matched_edges_cost_matrix[i, j] = max_sum
# Match the edges as per the LAP solution # Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix); row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix)
lap_cost = 0.00 lap_cost = 0.00
for i in range(len(row_ind)): for i in range(len(row_ind)):
lap_cost += matched_edges_cost_matrix[i, row_ind[i]]; lap_cost += matched_edges_cost_matrix[i, row_ind[i]]
#Update matched edges # Update matched edges
for i in range(len(row_ind)): for i in range(len(row_ind)):
if i < len(incident_edges_G1): if i < len(incident_edges_G1):
self.matched_edges[0].append(incident_edges_G1[i]); self.matched_edges[0].append(incident_edges_G1[i])
if row_ind[i] < len(incident_edges_G2): if row_ind[i] < len(incident_edges_G2):
self.matched_edges[1].append(incident_edges_G2[row_ind[i]]); self.matched_edges[1].append(
incident_edges_G2[row_ind[i]]
)
else: else:
self.matched_edges[1].append(None); self.matched_edges[1].append(None)
elif row_ind[i] < len(incident_edges_G2): elif row_ind[i] < len(incident_edges_G2):
self.matched_edges[0].append(None); self.matched_edges[0].append(None)
self.matched_edges[1].append(incident_edges_G2[row_ind[i]]); self.matched_edges[1].append(incident_edges_G2[row_ind[i]])
self.matched_cost += lap_cost; self.matched_cost += lap_cost
elif len(incident_edges_G1) > 0: #only deletion possible elif len(incident_edges_G1) > 0: # only deletion possible
edge_deletion_cost = 0.0; edge_deletion_cost = 0.0
for edge in incident_edges_G1: for edge in incident_edges_G1:
edge_deletion_cost += cost_matrix_edges[edge, G2.number_of_edges()+edge]; edge_deletion_cost += cost_matrix_edges[
#Update matched edges edge, G2.number_of_edges() + edge
]
# Update matched edges
for edge in incident_edges_G1: for edge in incident_edges_G1:
self.matched_edges[0].append(edge); self.matched_edges[0].append(edge)
self.matched_edges[1].append(None); self.matched_edges[1].append(None)
#Update matched edges # Update matched edges
self.matched_cost += edge_deletion_cost; self.matched_cost += edge_deletion_cost
elif len(incident_edges_G2) > 0: #only insertion possible elif len(incident_edges_G2) > 0: # only insertion possible
edge_insertion_cost = 0.0; edge_insertion_cost = 0.0
for edge in incident_edges_G2: for edge in incident_edges_G2:
edge_insertion_cost += cost_matrix_edges[G1.number_of_edges()+edge, edge]; edge_insertion_cost += cost_matrix_edges[
#Update matched edges G1.number_of_edges() + edge, edge
]
# Update matched edges
for edge in incident_edges_G2: for edge in incident_edges_G2:
self.matched_edges[0].append(None); self.matched_edges[0].append(None)
self.matched_edges[1].append(edge); self.matched_edges[1].append(edge)
self.matched_cost += edge_insertion_cost;
self.matched_cost += edge_insertion_cost
# Add the cost of matching of unprocessed nodes to the future approximate cost # Add the cost of matching of unprocessed nodes to the future approximate cost
if len(self.unprocessed_nodes_G1) > 0 and len(self.unprocessed_nodes_G2) > 0: # Consider substituting if (
unmatched_nodes_cost_matrix = subset_cost_matrix(cost_matrix_nodes, self.unprocessed_nodes_G1, self.unprocessed_nodes_G2, G1.number_of_nodes(), G2.number_of_nodes()) len(self.unprocessed_nodes_G1) > 0
and len(self.unprocessed_nodes_G2) > 0
): # Consider substituting
unmatched_nodes_cost_matrix = subset_cost_matrix(
cost_matrix_nodes,
self.unprocessed_nodes_G1,
self.unprocessed_nodes_G2,
G1.number_of_nodes(),
G2.number_of_nodes(),
)
# Match the edges as per the LAP solution # Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(unmatched_nodes_cost_matrix); row_ind, col_ind, _ = lapjv(unmatched_nodes_cost_matrix)
lap_cost = 0.00 lap_cost = 0.00
for i in range(len(row_ind)): for i in range(len(row_ind)):
lap_cost += unmatched_nodes_cost_matrix[i, row_ind[i]]; lap_cost += unmatched_nodes_cost_matrix[i, row_ind[i]]
self.future_approximate_cost += lap_cost; self.future_approximate_cost += lap_cost
elif len(self.unprocessed_nodes_G1) > 0: # only deletion possible elif len(self.unprocessed_nodes_G1) > 0: # only deletion possible
node_deletion_cost = 0.0; node_deletion_cost = 0.0
for node in self.unprocessed_nodes_G1: for node in self.unprocessed_nodes_G1:
node_deletion_cost += cost_matrix_nodes[node, G2.number_of_nodes()+node]; node_deletion_cost += cost_matrix_nodes[
node, G2.number_of_nodes() + node
]
self.future_approximate_cost += node_deletion_cost; self.future_approximate_cost += node_deletion_cost
elif len(self.unprocessed_nodes_G2) > 0: # only insertion possible elif len(self.unprocessed_nodes_G2) > 0: # only insertion possible
node_insertion_cost = 0.0; node_insertion_cost = 0.0
for node in self.unprocessed_nodes_G2: for node in self.unprocessed_nodes_G2:
node_insertion_cost += cost_matrix_nodes[G1.number_of_nodes()+node, node]; node_insertion_cost += cost_matrix_nodes[
G1.number_of_nodes() + node, node
self.future_approximate_cost += node_insertion_cost; ]
self.future_approximate_cost += node_insertion_cost
# Add the cost of LAP matching of unprocessed edges to the future approximate cost # Add the cost of LAP matching of unprocessed edges to the future approximate cost
self.unprocessed_edges_G1 = [_ for _ in parent_unprocessed_edges_G1 if _ not in incident_edges_G1]; self.unprocessed_edges_G1 = [
self.unprocessed_edges_G2 = [_ for _ in parent_unprocessed_edges_G2 if _ not in incident_edges_G2]; _ for _ in parent_unprocessed_edges_G1 if _ not in incident_edges_G1
if len(self.unprocessed_edges_G1) > 0 and len(self.unprocessed_edges_G2) > 0: # Consider substituting ]
unmatched_edges_cost_matrix = subset_cost_matrix(cost_matrix_edges, self.unprocessed_edges_G1, self.unprocessed_edges_G2, G1.number_of_edges(), G2.number_of_edges()) self.unprocessed_edges_G2 = [
_ for _ in parent_unprocessed_edges_G2 if _ not in incident_edges_G2
]
if (
len(self.unprocessed_edges_G1) > 0
and len(self.unprocessed_edges_G2) > 0
): # Consider substituting
unmatched_edges_cost_matrix = subset_cost_matrix(
cost_matrix_edges,
self.unprocessed_edges_G1,
self.unprocessed_edges_G2,
G1.number_of_edges(),
G2.number_of_edges(),
)
# Match the edges as per the LAP solution # Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(unmatched_edges_cost_matrix); row_ind, col_ind, _ = lapjv(unmatched_edges_cost_matrix)
lap_cost = 0.00 lap_cost = 0.00
for i in range(len(row_ind)): for i in range(len(row_ind)):
lap_cost += unmatched_edges_cost_matrix[i, row_ind[i]]; lap_cost += unmatched_edges_cost_matrix[i, row_ind[i]]
self.future_approximate_cost += lap_cost; self.future_approximate_cost += lap_cost
elif len(self.unprocessed_edges_G1) > 0: # only deletion possible elif len(self.unprocessed_edges_G1) > 0: # only deletion possible
edge_deletion_cost = 0.0; edge_deletion_cost = 0.0
for edge in self.unprocessed_edges_G1: for edge in self.unprocessed_edges_G1:
edge_deletion_cost += cost_matrix_edges[edge, G2.number_of_edges()+edge]; edge_deletion_cost += cost_matrix_edges[
edge, G2.number_of_edges() + edge
]
self.future_approximate_cost += edge_deletion_cost; self.future_approximate_cost += edge_deletion_cost
elif len(self.unprocessed_edges_G2) > 0: # only insertion possible elif len(self.unprocessed_edges_G2) > 0: # only insertion possible
edge_insertion_cost = 0.0; edge_insertion_cost = 0.0
for edge in self.unprocessed_edges_G2: for edge in self.unprocessed_edges_G2:
edge_insertion_cost += cost_matrix_edges[G1.number_of_edges()+edge, edge]; edge_insertion_cost += cost_matrix_edges[
G1.number_of_edges() + edge, edge
]
self.future_approximate_cost += edge_insertion_cost; self.future_approximate_cost += edge_insertion_cost
# For heap insertion order # For heap insertion order
def __lt__(self, other): def __lt__(self, other):
if abs((self.matched_cost+self.future_approximate_cost) - (other.matched_cost+other.future_approximate_cost) if (
)> EPSILON: abs(
return (self.matched_cost+self.future_approximate_cost) < (other.matched_cost+other.future_approximate_cost); (self.matched_cost + self.future_approximate_cost)
- (other.matched_cost + other.future_approximate_cost)
)
> EPSILON
):
return (self.matched_cost + self.future_approximate_cost) < (
other.matched_cost + other.future_approximate_cost
)
elif abs(self.matched_cost - other.matched_cost) > EPSILON: elif abs(self.matched_cost - other.matched_cost) > EPSILON:
return other.matched_cost < self.matched_cost; #matched cost is closer to reality return other.matched_cost < self.matched_cost
# matched cost is closer to reality
else: else:
return (len(self.unprocessed_nodes_G1)+len(self.unprocessed_nodes_G2)+\ return (
len(self.unprocessed_edges_G1)+len(self.unprocessed_edges_G2)) < \ len(self.unprocessed_nodes_G1)
(len(other.unprocessed_nodes_G1)+len(other.unprocessed_nodes_G2)+\ + len(self.unprocessed_nodes_G2)
len(other.unprocessed_edges_G1)+len(other.unprocessed_edges_G2)); + len(self.unprocessed_edges_G1)
+ len(self.unprocessed_edges_G2)
def edit_cost_from_node_matching(G1, G2, cost_matrix_nodes, cost_matrix_edges, node_matching): ) < (
matched_cost = 0.0; len(other.unprocessed_nodes_G1)
+ len(other.unprocessed_nodes_G2)
+ len(other.unprocessed_edges_G1)
+ len(other.unprocessed_edges_G2)
)
def edit_cost_from_node_matching(
G1, G2, cost_matrix_nodes, cost_matrix_edges, node_matching
):
matched_cost = 0.0
matched_nodes = ([], []) matched_nodes = ([], [])
matched_edges = ([], []) matched_edges = ([], [])
# Add the cost of matching nodes # Add the cost of matching nodes
for i in range(G1.number_of_nodes()): for i in range(G1.number_of_nodes()):
matched_cost += cost_matrix_nodes[i, node_matching[i]] matched_cost += cost_matrix_nodes[i, node_matching[i]]
matched_nodes[0].append(i); matched_nodes[0].append(i)
if node_matching[i] < G2.number_of_nodes(): if node_matching[i] < G2.number_of_nodes():
matched_nodes[1].append(node_matching[i]); matched_nodes[1].append(node_matching[i])
else: else:
matched_nodes[1].append(None); matched_nodes[1].append(None)
for i in range(G1.number_of_nodes(), len(node_matching)): for i in range(G1.number_of_nodes(), len(node_matching)):
matched_cost += cost_matrix_nodes[i, node_matching[i]] matched_cost += cost_matrix_nodes[i, node_matching[i]]
if node_matching[i] < G2.number_of_nodes(): if node_matching[i] < G2.number_of_nodes():
matched_nodes[0].append(None); matched_nodes[0].append(None)
matched_nodes[1].append(node_matching[i]); matched_nodes[1].append(node_matching[i])
for i in range(len(matched_nodes[0])): for i in range(len(matched_nodes[0])):
# Add the cost of matching edges # Add the cost of matching edges
incident_edges_G1 = []; incident_edges_G1 = []
if matched_nodes[0][i] is not None: # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1 if (
incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(G1, matched_nodes[0][i], matched_nodes[0][:i]) matched_nodes[0][i] is not None
): # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G2 = np.array([]); incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(
if matched_nodes[1][i] is not None: # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2 G1, matched_nodes[0][i], matched_nodes[0][:i]
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(G2, matched_nodes[1][i], matched_nodes[1][:i]) )
if len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0: # Consider substituting incident_edges_G2 = np.array([])
matched_edges_cost_matrix = subset_cost_matrix(cost_matrix_edges, incident_edges_G1, incident_edges_G2, G1.number_of_edges(), G2.number_of_edges()) if (
max_sum = matched_edges_cost_matrix.sum(); matched_nodes[1][i] is not None
): # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(
G2, matched_nodes[1][i], matched_nodes[1][:i]
)
if (
len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0
): # Consider substituting
matched_edges_cost_matrix = subset_cost_matrix(
cost_matrix_edges,
incident_edges_G1,
incident_edges_G2,
G1.number_of_edges(),
G2.number_of_edges(),
)
max_sum = matched_edges_cost_matrix.sum()
# take care of impossible assignments by assigning maximum cost # take care of impossible assignments by assigning maximum cost
for i in range(len(incident_edges_G1)): for i in range(len(incident_edges_G1)):
for j in range(len(incident_edges_G2)): for j in range(len(incident_edges_G2)):
# both edges need to have same direction and the other end nodes are matched # both edges need to have same direction and the other end nodes are matched
if direction_G1[i] == direction_G2[j] and index_G1[i] == index_G2[j]: if (
continue; direction_G1[i] == direction_G2[j]
and index_G1[i] == index_G2[j]
):
continue
else: else:
matched_edges_cost_matrix[i,j] = max_sum; matched_edges_cost_matrix[i, j] = max_sum
# Match the edges as per the LAP solution # Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix); row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix)
lap_cost = 0.00 lap_cost = 0.00
for i in range(len(row_ind)): for i in range(len(row_ind)):
lap_cost += matched_edges_cost_matrix[i, row_ind[i]]; lap_cost += matched_edges_cost_matrix[i, row_ind[i]]
#Update matched edges # Update matched edges
for i in range(len(row_ind)): for i in range(len(row_ind)):
if i < len(incident_edges_G1): if i < len(incident_edges_G1):
matched_edges[0].append(incident_edges_G1[i]); matched_edges[0].append(incident_edges_G1[i])
if row_ind[i] < len(incident_edges_G2): if row_ind[i] < len(incident_edges_G2):
matched_edges[1].append(incident_edges_G2[row_ind[i]]); matched_edges[1].append(incident_edges_G2[row_ind[i]])
else: else:
matched_edges[1].append(None); matched_edges[1].append(None)
elif row_ind[i] < len(incident_edges_G2): elif row_ind[i] < len(incident_edges_G2):
matched_edges[0].append(None); matched_edges[0].append(None)
matched_edges[1].append(incident_edges_G2[row_ind[i]]); matched_edges[1].append(incident_edges_G2[row_ind[i]])
matched_cost += lap_cost; matched_cost += lap_cost
elif len(incident_edges_G1) > 0: #only deletion possible elif len(incident_edges_G1) > 0: # only deletion possible
edge_deletion_cost = 0.0; edge_deletion_cost = 0.0
for edge in incident_edges_G1: for edge in incident_edges_G1:
edge_deletion_cost += cost_matrix_edges[edge, G2.number_of_edges()+edge]; edge_deletion_cost += cost_matrix_edges[
#Update matched edges edge, G2.number_of_edges() + edge
]
# Update matched edges
for edge in incident_edges_G1: for edge in incident_edges_G1:
matched_edges[0].append(edge); matched_edges[0].append(edge)
matched_edges[1].append(None); matched_edges[1].append(None)
#Update matched edges # Update matched edges
matched_cost += edge_deletion_cost; matched_cost += edge_deletion_cost
elif len(incident_edges_G2) > 0: #only insertion possible elif len(incident_edges_G2) > 0: # only insertion possible
edge_insertion_cost = 0.0; edge_insertion_cost = 0.0
for edge in incident_edges_G2: for edge in incident_edges_G2:
edge_insertion_cost += cost_matrix_edges[G1.number_of_edges()+edge, edge]; edge_insertion_cost += cost_matrix_edges[
#Update matched edges G1.number_of_edges() + edge, edge
]
# Update matched edges
for edge in incident_edges_G2: for edge in incident_edges_G2:
matched_edges[0].append(None); matched_edges[0].append(None)
matched_edges[1].append(edge); matched_edges[1].append(edge)
matched_cost += edge_insertion_cost
matched_cost += edge_insertion_cost; return (matched_cost, matched_nodes, matched_edges)
return (matched_cost, matched_nodes, matched_edges);
def contextual_cost_matrix_construction(G1, G2, def contextual_cost_matrix_construction(
node_substitution_cost, edge_substitution_cost, G1,
G1_node_deletion_cost, G1_edge_deletion_cost, G2,
G2_node_insertion_cost, G2_edge_insertion_cost): node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
):
# Calculates approximate GED using linear assignment on the nodes with bipartite algorithm # Calculates approximate GED using linear assignment on the nodes with bipartite algorithm
# cost matrix of node mappings # cost matrix of node mappings
...@@ -398,89 +603,174 @@ def contextual_cost_matrix_construction(G1, G2, ...@@ -398,89 +603,174 @@ def contextual_cost_matrix_construction(G1, G2,
num_G1_edges = G1.number_of_edges() num_G1_edges = G1.number_of_edges()
num_G2_edges = G2.number_of_edges() num_G2_edges = G2.number_of_edges()
cost_upper_bound = 2*(node_substitution_cost.sum() + G1_node_deletion_cost.sum() + G2_node_insertion_cost.sum() + 1) cost_upper_bound = 2 * (
cost_matrix = np.zeros((num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float) node_substitution_cost.sum()
+ G1_node_deletion_cost.sum()
+ G2_node_insertion_cost.sum()
cost_matrix[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost; + 1
cost_matrix[0:num_G1_nodes, num_G2_nodes:num_G2_nodes + num_G1_nodes] = np.array([G1_node_deletion_cost[i] if i == j \ )
else cost_upper_bound\ cost_matrix = np.zeros(
for i in range(num_G1_nodes) \ (num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float
for j in range(num_G1_nodes)]).reshape(num_G1_nodes, num_G1_nodes); )
cost_matrix[num_G1_nodes:num_G1_nodes + num_G2_nodes, 0:num_G2_nodes] = np.array([G2_node_insertion_cost[i] if i == j \
else cost_upper_bound\ cost_matrix[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost
for i in range(num_G2_nodes) \ cost_matrix[
for j in range(num_G2_nodes)]).reshape(num_G2_nodes, num_G2_nodes); 0:num_G1_nodes, num_G2_nodes : num_G2_nodes + num_G1_nodes
] = np.array(
[
self_edge_list_G1 = [np.array([], dtype=int)]*num_G1_nodes; G1_node_deletion_cost[i] if i == j else cost_upper_bound
self_edge_list_G2 = [np.array([], dtype=int)]*num_G2_nodes; for i in range(num_G1_nodes)
incoming_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes; for j in range(num_G1_nodes)
incoming_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes; ]
outgoing_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes; ).reshape(
outgoing_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes; num_G1_nodes, num_G1_nodes
)
cost_matrix[
num_G1_nodes : num_G1_nodes + num_G2_nodes, 0:num_G2_nodes
] = np.array(
[
G2_node_insertion_cost[i] if i == j else cost_upper_bound
for i in range(num_G2_nodes)
for j in range(num_G2_nodes)
]
).reshape(
num_G2_nodes, num_G2_nodes
)
self_edge_list_G1 = [np.array([], dtype=int)] * num_G1_nodes
self_edge_list_G2 = [np.array([], dtype=int)] * num_G2_nodes
incoming_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
incoming_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
outgoing_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
outgoing_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
for i in range(num_G1_nodes): for i in range(num_G1_nodes):
if G1.has_edge_between(i, i): if G1.has_edge_between(i, i):
self_edge_list_G1[i] = sorted(G1.edge_id(i, i, return_array=True).numpy()); self_edge_list_G1[i] = sorted(
incoming_edges_G1[i] = G1.in_edges([i], 'eid').numpy(); G1.edge_id(i, i, return_array=True).numpy()
incoming_edges_G1[i] = np.setdiff1d(incoming_edges_G1[i], self_edge_list_G1[i]); )
outgoing_edges_G1[i] = G1.out_edges([i], 'eid').numpy(); incoming_edges_G1[i] = G1.in_edges([i], "eid").numpy()
outgoing_edges_G1[i] = np.setdiff1d(outgoing_edges_G1[i], self_edge_list_G1[i]); incoming_edges_G1[i] = np.setdiff1d(
incoming_edges_G1[i], self_edge_list_G1[i]
)
outgoing_edges_G1[i] = G1.out_edges([i], "eid").numpy()
outgoing_edges_G1[i] = np.setdiff1d(
outgoing_edges_G1[i], self_edge_list_G1[i]
)
for i in range(num_G2_nodes): for i in range(num_G2_nodes):
if G2.has_edge_between(i, i): if G2.has_edge_between(i, i):
self_edge_list_G2[i] = sorted(G2.edge_id(i, i, return_array=True).numpy()); self_edge_list_G2[i] = sorted(
incoming_edges_G2[i] = G2.in_edges([i], 'eid').numpy(); G2.edge_id(i, i, return_array=True).numpy()
incoming_edges_G2[i] = np.setdiff1d(incoming_edges_G2[i], self_edge_list_G2[i]); )
outgoing_edges_G2[i] = G2.out_edges([i], 'eid').numpy(); incoming_edges_G2[i] = G2.in_edges([i], "eid").numpy()
outgoing_edges_G2[i] = np.setdiff1d(outgoing_edges_G2[i], self_edge_list_G2[i]); incoming_edges_G2[i] = np.setdiff1d(
incoming_edges_G2[i], self_edge_list_G2[i]
selected_deletion_G1 = [G1_edge_deletion_cost[np.concatenate((self_edge_list_G1[i], incoming_edges_G1[i], outgoing_edges_G1[i]))] for i in range(G1.number_of_nodes())]; )
selected_insertion_G2 = [G2_edge_insertion_cost[np.concatenate((self_edge_list_G2[i], incoming_edges_G2[i], outgoing_edges_G2[i]))] for i in range(G2.number_of_nodes())]; outgoing_edges_G2[i] = G2.out_edges([i], "eid").numpy()
outgoing_edges_G2[i] = np.setdiff1d(
outgoing_edges_G2[i], self_edge_list_G2[i]
)
selected_deletion_G1 = [
G1_edge_deletion_cost[
np.concatenate(
(
self_edge_list_G1[i],
incoming_edges_G1[i],
outgoing_edges_G1[i],
)
)
]
for i in range(G1.number_of_nodes())
]
selected_insertion_G2 = [
G2_edge_insertion_cost[
np.concatenate(
(
self_edge_list_G2[i],
incoming_edges_G2[i],
outgoing_edges_G2[i],
)
)
]
for i in range(G2.number_of_nodes())
]
# Add the cost of edge edition which are dependent of a node (see this as the cost associated with a substructure) # Add the cost of edge edition which are dependent of a node (see this as the cost associated with a substructure)
for i in range(num_G1_nodes): for i in range(num_G1_nodes):
for j in range(num_G2_nodes): for j in range(num_G2_nodes):
m = len(self_edge_list_G1[i])+len(incoming_edges_G1[i])+len(outgoing_edges_G1[i]); m = (
n = len(self_edge_list_G2[j])+len(incoming_edges_G2[j])+len(outgoing_edges_G2[j]); len(self_edge_list_G1[i])
+ len(incoming_edges_G1[i])
matrix_dim = m + n; + len(outgoing_edges_G1[i])
)
n = (
len(self_edge_list_G2[j])
+ len(incoming_edges_G2[j])
+ len(outgoing_edges_G2[j])
)
matrix_dim = m + n
if matrix_dim == 0: if matrix_dim == 0:
continue; continue
temp_edge_cost_matrix = np.empty((matrix_dim, matrix_dim)); temp_edge_cost_matrix = np.empty((matrix_dim, matrix_dim))
temp_edge_cost_matrix.fill(cost_upper_bound); temp_edge_cost_matrix.fill(cost_upper_bound)
temp_edge_cost_matrix[:len(self_edge_list_G1[i]),:len(self_edge_list_G2[j])] = edge_substitution_cost[self_edge_list_G1[i],:][:,self_edge_list_G2[j]]; temp_edge_cost_matrix[
temp_edge_cost_matrix[len(self_edge_list_G1[i]):len(self_edge_list_G1[i])+len(incoming_edges_G1[i]),len(self_edge_list_G2[j]):len(self_edge_list_G2[j])+len(incoming_edges_G2[j])] = edge_substitution_cost[incoming_edges_G1[i],:][:, incoming_edges_G2[j]]; : len(self_edge_list_G1[i]), : len(self_edge_list_G2[j])
temp_edge_cost_matrix[len(self_edge_list_G1[i])+len(incoming_edges_G1[i]):m,len(self_edge_list_G2[j])+len(incoming_edges_G2[j]):n] = edge_substitution_cost[outgoing_edges_G1[i],:][:, outgoing_edges_G2[j]]; ] = edge_substitution_cost[self_edge_list_G1[i], :][
:, self_edge_list_G2[j]
np.fill_diagonal(temp_edge_cost_matrix[:m, n:], selected_deletion_G1[i]); ]
np.fill_diagonal(temp_edge_cost_matrix[m:, :n], selected_insertion_G2[j]); temp_edge_cost_matrix[
len(self_edge_list_G1[i]) : len(self_edge_list_G1[i])
temp_edge_cost_matrix[m:, n:].fill(0); + len(incoming_edges_G1[i]),
row_ind, col_ind, _ = lapjv(temp_edge_cost_matrix); len(self_edge_list_G2[j]) : len(self_edge_list_G2[j])
+ len(incoming_edges_G2[j]),
] = edge_substitution_cost[incoming_edges_G1[i], :][
:, incoming_edges_G2[j]
]
temp_edge_cost_matrix[
len(self_edge_list_G1[i]) + len(incoming_edges_G1[i]) : m,
len(self_edge_list_G2[j]) + len(incoming_edges_G2[j]) : n,
] = edge_substitution_cost[outgoing_edges_G1[i], :][
:, outgoing_edges_G2[j]
]
np.fill_diagonal(
temp_edge_cost_matrix[:m, n:], selected_deletion_G1[i]
)
np.fill_diagonal(
temp_edge_cost_matrix[m:, :n], selected_insertion_G2[j]
)
temp_edge_cost_matrix[m:, n:].fill(0)
row_ind, col_ind, _ = lapjv(temp_edge_cost_matrix)
lap_cost = 0.00 lap_cost = 0.00
for k in range(len(row_ind)): for k in range(len(row_ind)):
lap_cost += temp_edge_cost_matrix[k, row_ind[k]]; lap_cost += temp_edge_cost_matrix[k, row_ind[k]]
cost_matrix[i,j] += lap_cost; cost_matrix[i, j] += lap_cost
for i in range(num_G1_nodes): for i in range(num_G1_nodes):
cost_matrix[i,num_G2_nodes+i] += selected_deletion_G1[i].sum() cost_matrix[i, num_G2_nodes + i] += selected_deletion_G1[i].sum()
for i in range(num_G2_nodes): for i in range(num_G2_nodes):
cost_matrix[num_G1_nodes+i,i] += selected_insertion_G2[i].sum() cost_matrix[num_G1_nodes + i, i] += selected_insertion_G2[i].sum()
return cost_matrix; return cost_matrix
def hausdorff_matching(G1, G2, def hausdorff_matching(
node_substitution_cost, edge_substitution_cost, G1,
G1_node_deletion_cost, G1_edge_deletion_cost, G2,
G2_node_insertion_cost, G2_edge_insertion_cost): node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
):
# Calculates approximate GED using hausdorff_matching # Calculates approximate GED using hausdorff_matching
# cost matrix of node mappings # cost matrix of node mappings
...@@ -490,44 +780,104 @@ def hausdorff_matching(G1, G2, ...@@ -490,44 +780,104 @@ def hausdorff_matching(G1, G2,
num_G1_edges = G1.number_of_edges() num_G1_edges = G1.number_of_edges()
num_G2_edges = G2.number_of_edges() num_G2_edges = G2.number_of_edges()
self_edge_list_G1 = [np.array([], dtype=int)]*num_G1_nodes; self_edge_list_G1 = [np.array([], dtype=int)] * num_G1_nodes
self_edge_list_G2 = [np.array([], dtype=int)]*num_G2_nodes; self_edge_list_G2 = [np.array([], dtype=int)] * num_G2_nodes
incoming_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes; incoming_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
incoming_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes; incoming_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
outgoing_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes; outgoing_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
outgoing_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes; outgoing_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
for i in range(num_G1_nodes): for i in range(num_G1_nodes):
if G1.has_edge_between(i, i): if G1.has_edge_between(i, i):
self_edge_list_G1[i] = sorted(G1.edge_id(i, i, return_array=True).numpy()); self_edge_list_G1[i] = sorted(
incoming_edges_G1[i] = G1.in_edges([i], 'eid').numpy(); G1.edge_id(i, i, return_array=True).numpy()
incoming_edges_G1[i] = np.setdiff1d(incoming_edges_G1[i], self_edge_list_G1[i]); )
outgoing_edges_G1[i] = G1.out_edges([i], 'eid').numpy(); incoming_edges_G1[i] = G1.in_edges([i], "eid").numpy()
outgoing_edges_G1[i] = np.setdiff1d(outgoing_edges_G1[i], self_edge_list_G1[i]); incoming_edges_G1[i] = np.setdiff1d(
incoming_edges_G1[i], self_edge_list_G1[i]
)
outgoing_edges_G1[i] = G1.out_edges([i], "eid").numpy()
outgoing_edges_G1[i] = np.setdiff1d(
outgoing_edges_G1[i], self_edge_list_G1[i]
)
for i in range(num_G2_nodes): for i in range(num_G2_nodes):
if G2.has_edge_between(i, i): if G2.has_edge_between(i, i):
self_edge_list_G2[i] = sorted(G2.edge_id(i, i, return_array=True).numpy()); self_edge_list_G2[i] = sorted(
incoming_edges_G2[i] = G2.in_edges([i], 'eid').numpy(); G2.edge_id(i, i, return_array=True).numpy()
incoming_edges_G2[i] = np.setdiff1d(incoming_edges_G2[i], self_edge_list_G2[i]); )
outgoing_edges_G2[i] = G2.out_edges([i], 'eid').numpy(); incoming_edges_G2[i] = G2.in_edges([i], "eid").numpy()
outgoing_edges_G2[i] = np.setdiff1d(outgoing_edges_G2[i], self_edge_list_G2[i]); incoming_edges_G2[i] = np.setdiff1d(
incoming_edges_G2[i], self_edge_list_G2[i]
)
outgoing_edges_G2[i] = G2.out_edges([i], "eid").numpy()
selected_deletion_self_G1 = [G1_edge_deletion_cost[self_edge_list_G1[i]] for i in range(G1.number_of_nodes())]; outgoing_edges_G2[i] = np.setdiff1d(
selected_insertion_self_G2 = [G2_edge_insertion_cost[self_edge_list_G2[i]] for i in range(G2.number_of_nodes())]; outgoing_edges_G2[i], self_edge_list_G2[i]
)
selected_deletion_incoming_G1 = [G1_edge_deletion_cost[incoming_edges_G1[i]] for i in range(G1.number_of_nodes())];
selected_insertion_incoming_G2 = [G2_edge_insertion_cost[incoming_edges_G2[i]] for i in range(G2.number_of_nodes())]; selected_deletion_self_G1 = [
G1_edge_deletion_cost[self_edge_list_G1[i]]
selected_deletion_outgoing_G1 = [G1_edge_deletion_cost[outgoing_edges_G1[i]] for i in range(G1.number_of_nodes())]; for i in range(G1.number_of_nodes())
selected_insertion_outgoing_G2 = [G2_edge_insertion_cost[outgoing_edges_G2[i]] for i in range(G2.number_of_nodes())]; ]
selected_insertion_self_G2 = [
selected_deletion_G1 = [G1_edge_deletion_cost[np.concatenate((self_edge_list_G1[i], incoming_edges_G1[i], outgoing_edges_G1[i]))] for i in range(G1.number_of_nodes())]; G2_edge_insertion_cost[self_edge_list_G2[i]]
selected_insertion_G2 = [G2_edge_insertion_cost[np.concatenate((self_edge_list_G2[i], incoming_edges_G2[i], outgoing_edges_G2[i]))] for i in range(G2.number_of_nodes())]; for i in range(G2.number_of_nodes())
]
cost_G1 = np.array([(G1_node_deletion_cost[i] + selected_deletion_G1[i].sum()/2) for i in range(num_G1_nodes)])
cost_G2 = np.array([(G2_node_insertion_cost[i] + selected_insertion_G2[i].sum()/2) for i in range(num_G2_nodes)]) selected_deletion_incoming_G1 = [
G1_edge_deletion_cost[incoming_edges_G1[i]]
for i in range(G1.number_of_nodes())
]
selected_insertion_incoming_G2 = [
G2_edge_insertion_cost[incoming_edges_G2[i]]
for i in range(G2.number_of_nodes())
]
selected_deletion_outgoing_G1 = [
G1_edge_deletion_cost[outgoing_edges_G1[i]]
for i in range(G1.number_of_nodes())
]
selected_insertion_outgoing_G2 = [
G2_edge_insertion_cost[outgoing_edges_G2[i]]
for i in range(G2.number_of_nodes())
]
selected_deletion_G1 = [
G1_edge_deletion_cost[
np.concatenate(
(
self_edge_list_G1[i],
incoming_edges_G1[i],
outgoing_edges_G1[i],
)
)
]
for i in range(G1.number_of_nodes())
]
selected_insertion_G2 = [
G2_edge_insertion_cost[
np.concatenate(
(
self_edge_list_G2[i],
incoming_edges_G2[i],
outgoing_edges_G2[i],
)
)
]
for i in range(G2.number_of_nodes())
]
cost_G1 = np.array(
[
(G1_node_deletion_cost[i] + selected_deletion_G1[i].sum() / 2)
for i in range(num_G1_nodes)
]
)
cost_G2 = np.array(
[
(G2_node_insertion_cost[i] + selected_insertion_G2[i].sum() / 2)
for i in range(num_G2_nodes)
]
)
for i in range(num_G1_nodes): for i in range(num_G1_nodes):
for j in range(num_G2_nodes): for j in range(num_G2_nodes):
...@@ -538,140 +888,274 @@ def hausdorff_matching(G1, G2, ...@@ -538,140 +888,274 @@ def hausdorff_matching(G1, G2,
c1_outgoing = deepcopy(selected_deletion_outgoing_G1[i]) c1_outgoing = deepcopy(selected_deletion_outgoing_G1[i])
c2_outgoing = deepcopy(selected_insertion_outgoing_G2[j]) c2_outgoing = deepcopy(selected_insertion_outgoing_G2[j])
for k, a in enumerate(self_edge_list_G1[i]):
for k,a in enumerate(self_edge_list_G1[i]): for l, b in enumerate(self_edge_list_G2[j]):
for l,b in enumerate(self_edge_list_G2[j]): c1_self[k] = min(
c1_self[k] = min(c1_self[k], edge_substitution_cost[a,b]/2); c1_self[k], edge_substitution_cost[a, b] / 2
c2_self[l] = min(c2_self[l], edge_substitution_cost[a,b]/2); )
c2_self[l] = min(
for k,a in enumerate(incoming_edges_G1[i]): c2_self[l], edge_substitution_cost[a, b] / 2
for l,b in enumerate(incoming_edges_G2[j]): )
c1_incoming[k] = min(c1_incoming[k], edge_substitution_cost[a,b]/2);
c2_incoming[l] = min(c2_incoming[l], edge_substitution_cost[a,b]/2); for k, a in enumerate(incoming_edges_G1[i]):
for l, b in enumerate(incoming_edges_G2[j]):
for k,a in enumerate(outgoing_edges_G1[i]): c1_incoming[k] = min(
for l,b in enumerate(outgoing_edges_G2[j]): c1_incoming[k], edge_substitution_cost[a, b] / 2
c1_outgoing[k] = min(c1_outgoing[k], edge_substitution_cost[a,b]/2); )
c2_outgoing[l] = min(c2_outgoing[l], edge_substitution_cost[a,b]/2); c2_incoming[l] = min(
c2_incoming[l], edge_substitution_cost[a, b] / 2
edge_hausdorff_lower_bound = 0.0; )
if len(selected_deletion_G1[i])>len(selected_insertion_G2[j]): for k, a in enumerate(outgoing_edges_G1[i]):
idx = np.argpartition(selected_deletion_G1[i], (len(selected_deletion_G1[i])-len(selected_insertion_G2[j]))); for l, b in enumerate(outgoing_edges_G2[j]):
edge_hausdorff_lower_bound = selected_deletion_G1[i][idx[:(len(selected_deletion_G1[i])-len(selected_insertion_G2[j]))]].sum(); c1_outgoing[k] = min(
elif len(selected_deletion_G1[i])<len(selected_insertion_G2[j]): c1_outgoing[k], edge_substitution_cost[a, b] / 2
idx = np.argpartition(selected_insertion_G2[j], (len(selected_insertion_G2[j])-len(selected_deletion_G1[i]))); )
edge_hausdorff_lower_bound = selected_insertion_G2[j][idx[:(len(selected_insertion_G2[j])-len(selected_deletion_G1[i]))]].sum(); c2_outgoing[l] = min(
c2_outgoing[l], edge_substitution_cost[a, b] / 2
sc_cost = 0.5*(node_substitution_cost[i,j]+0.5*max(c1_self.sum() + c2_self.sum() + \ )
c1_incoming.sum() + c2_incoming.sum() + \
c1_outgoing.sum() + c2_outgoing.sum(), \ edge_hausdorff_lower_bound = 0.0
edge_hausdorff_lower_bound));
if len(selected_deletion_G1[i]) > len(selected_insertion_G2[j]):
idx = np.argpartition(
selected_deletion_G1[i],
(
len(selected_deletion_G1[i])
- len(selected_insertion_G2[j])
),
)
edge_hausdorff_lower_bound = selected_deletion_G1[i][
idx[
: (
len(selected_deletion_G1[i])
- len(selected_insertion_G2[j])
)
]
].sum()
elif len(selected_deletion_G1[i]) < len(selected_insertion_G2[j]):
idx = np.argpartition(
selected_insertion_G2[j],
(
len(selected_insertion_G2[j])
- len(selected_deletion_G1[i])
),
)
edge_hausdorff_lower_bound = selected_insertion_G2[j][
idx[
: (
len(selected_insertion_G2[j])
- len(selected_deletion_G1[i])
)
]
].sum()
sc_cost = 0.5 * (
node_substitution_cost[i, j]
+ 0.5
* max(
c1_self.sum()
+ c2_self.sum()
+ c1_incoming.sum()
+ c2_incoming.sum()
+ c1_outgoing.sum()
+ c2_outgoing.sum(),
edge_hausdorff_lower_bound,
)
)
if cost_G1[i] > sc_cost: if cost_G1[i] > sc_cost:
cost_G1[i] = sc_cost; cost_G1[i] = sc_cost
if cost_G2[j] > sc_cost: if cost_G2[j] > sc_cost:
cost_G2[j] = sc_cost; cost_G2[j] = sc_cost
graph_hausdorff_lower_bound = 0.0
graph_hausdorff_lower_bound = 0.0;
if num_G1_nodes > num_G2_nodes: if num_G1_nodes > num_G2_nodes:
idx = np.argpartition(G1_node_deletion_cost, (num_G1_nodes - num_G2_nodes)); idx = np.argpartition(
graph_hausdorff_lower_bound = G1_node_deletion_cost[idx[:(num_G1_nodes - num_G2_nodes)]].sum(); G1_node_deletion_cost, (num_G1_nodes - num_G2_nodes)
)
graph_hausdorff_lower_bound = G1_node_deletion_cost[
idx[: (num_G1_nodes - num_G2_nodes)]
].sum()
elif num_G1_nodes < num_G2_nodes: elif num_G1_nodes < num_G2_nodes:
idx = np.argpartition(G2_node_insertion_cost, (num_G2_nodes - num_G1_nodes)); idx = np.argpartition(
graph_hausdorff_lower_bound = G2_node_insertion_cost[idx[:(num_G2_nodes - num_G1_nodes)]].sum(); G2_node_insertion_cost, (num_G2_nodes - num_G1_nodes)
)
graph_hausdorff_lower_bound = G2_node_insertion_cost[
idx[: (num_G2_nodes - num_G1_nodes)]
].sum()
graph_hausdorff_cost = max(graph_hausdorff_lower_bound, cost_G1.sum() + cost_G2.sum()); graph_hausdorff_cost = max(
return graph_hausdorff_cost; graph_hausdorff_lower_bound, cost_G1.sum() + cost_G2.sum()
)
return graph_hausdorff_cost
def a_star_search(G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size): def a_star_search(G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size):
# A-star traversal # A-star traversal
open_list = []; open_list = []
# Create first nodes in the A-star search tree, matching node 0 of G1 with all possibilities (each node of G2, and deletion) # Create first nodes in the A-star search tree, matching node 0 of G1 with all possibilities (each node of G2, and deletion)
matched_cost = 0.0; matched_cost = 0.0
matched_nodes = ([], []); # No nodes matched in the beginning matched_nodes = ([], [])
matched_edges = ([], []); # No edges matched in the beginning # No nodes matched in the beginning
unprocessed_nodes_G1 = [i for i in range(G1.number_of_nodes())] # No nodes matched in the beginning matched_edges = ([], [])
unprocessed_nodes_G2 = [i for i in range(G2.number_of_nodes())] # No nodes matched in the beginning # No edges matched in the beginning
unprocessed_edges_G1 = [i for i in range(G1.number_of_edges())] # No edges matched in the beginning unprocessed_nodes_G1 = [
unprocessed_edges_G2 = [i for i in range(G2.number_of_edges())] # No edges matched in the beginning i for i in range(G1.number_of_nodes())
] # No nodes matched in the beginning
unprocessed_nodes_G2 = [
i for i in range(G2.number_of_nodes())
] # No nodes matched in the beginning
unprocessed_edges_G1 = [
i for i in range(G1.number_of_edges())
] # No edges matched in the beginning
unprocessed_edges_G2 = [
i for i in range(G2.number_of_edges())
] # No edges matched in the beginning
for i in range(len(unprocessed_nodes_G2)): for i in range(len(unprocessed_nodes_G2)):
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], unprocessed_nodes_G2[i], \ tree_node = search_tree_node(
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \ G1,
cost_matrix_nodes, cost_matrix_edges); G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
unprocessed_nodes_G2[i],
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap # Insert into open-list, implemented as a heap
heappush(open_list, tree_node) heappush(open_list, tree_node)
# Consider node deletion # Consider node deletion
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], None, \ tree_node = search_tree_node(
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \ G1,
cost_matrix_nodes, cost_matrix_edges); G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
None,
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap # Insert into open-list, implemented as a heap
heappush(open_list, tree_node) heappush(open_list, tree_node)
while len(open_list) > 0: while len(open_list) > 0:
# TODO: Create a node that processes multi node insertion deletion in one search node, # TODO: Create a node that processes multi node insertion deletion in one search node,
# as opposed in multiple search nodes here # as opposed in multiple search nodes here
parent_tree_node = heappop(open_list); parent_tree_node = heappop(open_list)
matched_cost = parent_tree_node.matched_cost; matched_cost = parent_tree_node.matched_cost
matched_nodes = parent_tree_node.matched_nodes; matched_nodes = parent_tree_node.matched_nodes
matched_edges = parent_tree_node.matched_edges; matched_edges = parent_tree_node.matched_edges
unprocessed_nodes_G1 = parent_tree_node.unprocessed_nodes_G1; unprocessed_nodes_G1 = parent_tree_node.unprocessed_nodes_G1
unprocessed_nodes_G2 = parent_tree_node.unprocessed_nodes_G2; unprocessed_nodes_G2 = parent_tree_node.unprocessed_nodes_G2
unprocessed_edges_G1 = parent_tree_node.unprocessed_edges_G1; unprocessed_edges_G1 = parent_tree_node.unprocessed_edges_G1
unprocessed_edges_G2 = parent_tree_node.unprocessed_edges_G2; unprocessed_edges_G2 = parent_tree_node.unprocessed_edges_G2
if len(unprocessed_nodes_G1) == 0 and len(unprocessed_nodes_G2) == 0: if len(unprocessed_nodes_G1) == 0 and len(unprocessed_nodes_G2) == 0:
return (matched_cost, matched_nodes, matched_edges); return (matched_cost, matched_nodes, matched_edges)
elif len(unprocessed_nodes_G1) > 0: elif len(unprocessed_nodes_G1) > 0:
for i in range(len(unprocessed_nodes_G2)): for i in range(len(unprocessed_nodes_G2)):
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], unprocessed_nodes_G2[i], \ tree_node = search_tree_node(
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \ G1,
cost_matrix_nodes, cost_matrix_edges); G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
unprocessed_nodes_G2[i],
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap # Insert into open-list, implemented as a heap
heappush(open_list, tree_node) heappush(open_list, tree_node)
# Consider node deletion # Consider node deletion
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], None, \ tree_node = search_tree_node(
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \ G1,
cost_matrix_nodes, cost_matrix_edges); G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
None,
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap # Insert into open-list, implemented as a heap
heappush(open_list, tree_node) heappush(open_list, tree_node)
elif len(unprocessed_nodes_G2) > 0: elif len(unprocessed_nodes_G2) > 0:
for i in range(len(unprocessed_nodes_G2)): for i in range(len(unprocessed_nodes_G2)):
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, None, unprocessed_nodes_G2[i], \ tree_node = search_tree_node(
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \ G1,
cost_matrix_nodes, cost_matrix_edges); G2,
matched_cost,
matched_nodes,
matched_edges,
None,
unprocessed_nodes_G2[i],
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap # Insert into open-list, implemented as a heap
heappush(open_list, tree_node) heappush(open_list, tree_node)
# Retain the top-k elements in open-list iff algorithm is beam # Retain the top-k elements in open-list iff algorithm is beam
if max_beam_size > 0 and len(open_list) > max_beam_size: if max_beam_size > 0 and len(open_list) > max_beam_size:
open_list = nsmallest(max_beam_size, open_list); open_list = nsmallest(max_beam_size, open_list)
heapify(open_list); heapify(open_list)
return None
return None;
def get_sorted_mapping(mapping_tuple, len1, len2): def get_sorted_mapping(mapping_tuple, len1, len2):
# Get sorted mapping of nodes/edges # Get sorted mapping of nodes/edges
result_0 = [None]*len1; result_0 = [None] * len1
result_1 = [None]*len2; result_1 = [None] * len2
for i in range(len(mapping_tuple[0])): for i in range(len(mapping_tuple[0])):
if mapping_tuple[0][i] is not None and mapping_tuple[1][i] is not None: if mapping_tuple[0][i] is not None and mapping_tuple[1][i] is not None:
result_0[mapping_tuple[0][i]] = mapping_tuple[1][i]; result_0[mapping_tuple[0][i]] = mapping_tuple[1][i]
result_1[mapping_tuple[1][i]] = mapping_tuple[0][i]; result_1[mapping_tuple[1][i]] = mapping_tuple[0][i]
return (result_0, result_1); return (result_0, result_1)
def graph_edit_distance(G1, G2,
node_substitution_cost=None, edge_substitution_cost=None, def graph_edit_distance(
G1_node_deletion_cost=None, G2_node_insertion_cost=None, G1,
G1_edge_deletion_cost=None, G2_edge_insertion_cost=None, G2,
algorithm='bipartite', max_beam_size=100): node_substitution_cost=None,
edge_substitution_cost=None,
G1_node_deletion_cost=None,
G2_node_insertion_cost=None,
G1_edge_deletion_cost=None,
G2_edge_insertion_cost=None,
algorithm="bipartite",
max_beam_size=100,
):
"""Returns GED (graph edit distance) between DGLGraphs G1 and G2. """Returns GED (graph edit distance) between DGLGraphs G1 and G2.
...@@ -752,52 +1236,99 @@ def graph_edit_distance(G1, G2, ...@@ -752,52 +1236,99 @@ def graph_edit_distance(G1, G2,
""" """
# Handle corner cases # Handle corner cases
if G1 is None and G2 is None: if G1 is None and G2 is None:
return (0.0, ([], []), ([], [])); return (0.0, ([], []), ([], []))
elif G1 is None: elif G1 is None:
edit_cost = 0.0; edit_cost = 0.0
# Validate # Validate
if algorithm != "beam": if algorithm != "beam":
max_beam_size = -1; max_beam_size = -1
node_substitution_cost, edge_substitution_cost, \ (
G1_node_deletion_cost, G1_edge_deletion_cost, \ node_substitution_cost,
G2_node_insertion_cost, G2_edge_insertion_cost = validate_cost_functions(G1, G2, \ edge_substitution_cost,
node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost,
G1_node_deletion_cost, G1_edge_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost); G2_node_insertion_cost,
G2_edge_insertion_cost,
) = validate_cost_functions(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
# cost matrices for LAP solution # cost matrices for LAP solution
cost_matrix_nodes, cost_matrix_edges = construct_cost_functions(G1, G2, \ cost_matrix_nodes, cost_matrix_edges = construct_cost_functions(
node_substitution_cost, edge_substitution_cost, G1,
G1_node_deletion_cost, G1_edge_deletion_cost, G2,
G2_node_insertion_cost, G2_edge_insertion_cost); node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
if algorithm == "astar" or algorithm == "beam": if algorithm == "astar" or algorithm == "beam":
(matched_cost, matched_nodes, matched_edges) = a_star_search(G1, G2, \ (matched_cost, matched_nodes, matched_edges) = a_star_search(
cost_matrix_nodes, cost_matrix_edges, max_beam_size); G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size
return (matched_cost, get_sorted_mapping(matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()), get_sorted_mapping(matched_edges, G1.number_of_edges(), G2.number_of_edges())); )
return (
matched_cost,
get_sorted_mapping(
matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()
),
get_sorted_mapping(
matched_edges, G1.number_of_edges(), G2.number_of_edges()
),
)
elif algorithm == "hausdorff": elif algorithm == "hausdorff":
hausdorff_cost = hausdorff_matching(G1, G2, \ hausdorff_cost = hausdorff_matching(
node_substitution_cost, edge_substitution_cost, G1,
G1_node_deletion_cost, G1_edge_deletion_cost, G2,
G2_node_insertion_cost, G2_edge_insertion_cost); node_substitution_cost,
edge_substitution_cost,
return (hausdorff_cost, None, None); G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
return (hausdorff_cost, None, None)
else: else:
cost_matrix = contextual_cost_matrix_construction(G1, G2, \ cost_matrix = contextual_cost_matrix_construction(
node_substitution_cost, edge_substitution_cost, G1,
G1_node_deletion_cost, G1_edge_deletion_cost, G2,
G2_node_insertion_cost, G2_edge_insertion_cost); node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
# Match the nodes as per the LAP solution # Match the nodes as per the LAP solution
row_ind, col_ind, _ = lapjv(cost_matrix); row_ind, col_ind, _ = lapjv(cost_matrix)
(matched_cost, matched_nodes, matched_edges) = edit_cost_from_node_matching(G1, G2, \ (
cost_matrix_nodes, cost_matrix_edges, row_ind); matched_cost,
matched_nodes,
return (matched_cost, get_sorted_mapping(matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()), get_sorted_mapping(matched_edges, G1.number_of_edges(), G2.number_of_edges())); matched_edges,
) = edit_cost_from_node_matching(
G1, G2, cost_matrix_nodes, cost_matrix_edges, row_ind
)
return (
matched_cost,
get_sorted_mapping(
matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()
),
get_sorted_mapping(
matched_edges, G1.number_of_edges(), G2.number_of_edges()
),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment