Unverified Commit f118ea95 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files
parent c59000ac
......@@ -2,16 +2,24 @@ import dgl
import numpy as np
from heapq import heappush, heappop, heapify, nsmallest
from copy import deepcopy
# We use lapjv implementation (https://github.com/src-d/lapjv) to solve assignment problem, because of its scalability
# Also see https://github.com/berhane/LAP-solvers for benchmarking of LAP solvers
from lapjv import lapjv
EPSILON = 0.0000001;
EPSILON = 0.0000001
def validate_cost_functions(G1, G2,
node_substitution_cost=None, edge_substitution_cost=None,
G1_node_deletion_cost=None, G1_edge_deletion_cost=None,
G2_node_insertion_cost=None, G2_edge_insertion_cost=None):
def validate_cost_functions(
G1,
G2,
node_substitution_cost=None,
edge_substitution_cost=None,
G1_node_deletion_cost=None,
G1_edge_deletion_cost=None,
G2_node_insertion_cost=None,
G2_edge_insertion_cost=None,
):
"""Validates cost functions (substitution, insertion, deletion) and initializes them with default=0 for substitution
and default=1 for insertion/deletion
if the provided ones are None.
......@@ -28,43 +36,59 @@ def validate_cost_functions(G1, G2,
# if any cost matrix is None, initialize it with default costs
if node_substitution_cost is None:
node_substitution_cost = np.zeros((num_G1_nodes, num_G2_nodes), dtype=float)
node_substitution_cost = np.zeros(
(num_G1_nodes, num_G2_nodes), dtype=float
)
else:
assert node_substitution_cost.shape == (num_G1_nodes, num_G2_nodes);
assert node_substitution_cost.shape == (num_G1_nodes, num_G2_nodes)
if edge_substitution_cost is None:
edge_substitution_cost = np.zeros((num_G1_edges, num_G2_edges), dtype=float)
edge_substitution_cost = np.zeros(
(num_G1_edges, num_G2_edges), dtype=float
)
else:
assert edge_substitution_cost.shape == (num_G1_edges, num_G2_edges);
assert edge_substitution_cost.shape == (num_G1_edges, num_G2_edges)
if G1_node_deletion_cost is None:
G1_node_deletion_cost = np.ones(num_G1_nodes, dtype=float)
else:
assert G1_node_deletion_cost.shape[0] == num_G1_nodes;
assert G1_node_deletion_cost.shape[0] == num_G1_nodes
if G1_edge_deletion_cost is None:
G1_edge_deletion_cost = np.ones(num_G1_edges, dtype=float)
else:
assert G1_edge_deletion_cost.shape[0] == num_G1_edges;
assert G1_edge_deletion_cost.shape[0] == num_G1_edges
if G2_node_insertion_cost is None:
G2_node_insertion_cost = np.ones(num_G2_nodes, dtype=float)
else:
assert G2_node_insertion_cost.shape[0] == num_G2_nodes;
assert G2_node_insertion_cost.shape[0] == num_G2_nodes
if G2_edge_insertion_cost is None:
G2_edge_insertion_cost = np.ones(num_G2_edges, dtype=float)
else:
assert G2_edge_insertion_cost.shape[0] == num_G2_edges;
return node_substitution_cost, edge_substitution_cost, \
G1_node_deletion_cost, G1_edge_deletion_cost, \
G2_node_insertion_cost, G2_edge_insertion_cost;
def construct_cost_functions(G1, G2,
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost):
assert G2_edge_insertion_cost.shape[0] == num_G2_edges
return (
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
def construct_cost_functions(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
):
"""Constructs cost matrices for LAP solution
......@@ -77,318 +101,499 @@ def construct_cost_functions(G1, G2,
num_G1_edges = G1.number_of_edges()
num_G2_edges = G2.number_of_edges()
# cost matrix of node mappings
cost_upper_bound = node_substitution_cost.sum() + G1_node_deletion_cost.sum() + G2_node_insertion_cost.sum() + 1
C_node = np.zeros((num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float)
C_node[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost;
C_node[0:num_G1_nodes, num_G2_nodes:num_G2_nodes + num_G1_nodes] = np.array([G1_node_deletion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G1_nodes) \
for j in range(num_G1_nodes)]).reshape(num_G1_nodes, num_G1_nodes);
C_node[num_G1_nodes:num_G1_nodes + num_G2_nodes, 0:num_G2_nodes] = np.array([G2_node_insertion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G2_nodes) \
for j in range(num_G2_nodes)]).reshape(num_G2_nodes, num_G2_nodes);
cost_upper_bound = (
node_substitution_cost.sum()
+ G1_node_deletion_cost.sum()
+ G2_node_insertion_cost.sum()
+ 1
)
C_node = np.zeros(
(num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float
)
C_node[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost
C_node[
0:num_G1_nodes, num_G2_nodes : num_G2_nodes + num_G1_nodes
] = np.array(
[
G1_node_deletion_cost[i] if i == j else cost_upper_bound
for i in range(num_G1_nodes)
for j in range(num_G1_nodes)
]
).reshape(
num_G1_nodes, num_G1_nodes
)
C_node[
num_G1_nodes : num_G1_nodes + num_G2_nodes, 0:num_G2_nodes
] = np.array(
[
G2_node_insertion_cost[i] if i == j else cost_upper_bound
for i in range(num_G2_nodes)
for j in range(num_G2_nodes)
]
).reshape(
num_G2_nodes, num_G2_nodes
)
# cost matrix of edge mappings
cost_upper_bound = edge_substitution_cost.sum() + G1_edge_deletion_cost.sum() + G2_edge_insertion_cost.sum() + 1
C_edge = np.zeros((num_G1_edges + num_G2_edges, num_G1_edges + num_G2_edges), dtype=float)
cost_upper_bound = (
edge_substitution_cost.sum()
+ G1_edge_deletion_cost.sum()
+ G2_edge_insertion_cost.sum()
+ 1
)
C_edge = np.zeros(
(num_G1_edges + num_G2_edges, num_G1_edges + num_G2_edges), dtype=float
)
C_edge[0:num_G1_edges, 0:num_G2_edges] = edge_substitution_cost
C_edge[
0:num_G1_edges, num_G2_edges : num_G2_edges + num_G1_edges
] = np.array(
[
G1_edge_deletion_cost[i] if i == j else cost_upper_bound
for i in range(num_G1_edges)
for j in range(num_G1_edges)
]
).reshape(
num_G1_edges, num_G1_edges
)
C_edge[
num_G1_edges : num_G1_edges + num_G2_edges, 0:num_G2_edges
] = np.array(
[
G2_edge_insertion_cost[i] if i == j else cost_upper_bound
for i in range(num_G2_edges)
for j in range(num_G2_edges)
]
).reshape(
num_G2_edges, num_G2_edges
)
return C_node, C_edge
C_edge[0:num_G1_edges, 0:num_G2_edges] = edge_substitution_cost;
C_edge[0:num_G1_edges, num_G2_edges:num_G2_edges + num_G1_edges] = np.array([G1_edge_deletion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G1_edges) \
for j in range(num_G1_edges)]).reshape(num_G1_edges, num_G1_edges);
C_edge[num_G1_edges:num_G1_edges + num_G2_edges, 0:num_G2_edges] = np.array([G2_edge_insertion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G2_edges) \
for j in range(num_G2_edges)]).reshape(num_G2_edges, num_G2_edges);
return C_node, C_edge;
def get_edges_to_match(G, node_id, matched_nodes):
# Find the edges in G with one end-point as node_id and other in matched_nodes or node_id
incident_edges = np.array([], dtype=int)
index = np.array([], dtype=int)
direction = np.array([], dtype=int)
if G.has_edge_between(node_id, node_id):
self_edge_ids = G.edge_id(node_id, node_id, return_array=True).numpy();
incident_edges = np.concatenate((incident_edges, self_edge_ids));
index = np.concatenate((index, [-1]*len(self_edge_ids)));
direction = np.concatenate((direction, [0]*len(self_edge_ids)));
self_edge_ids = G.edge_id(node_id, node_id, return_array=True).numpy()
incident_edges = np.concatenate((incident_edges, self_edge_ids))
index = np.concatenate((index, [-1] * len(self_edge_ids)))
direction = np.concatenate((direction, [0] * len(self_edge_ids)))
# Find predecessors
src, _, eid = G.in_edges([node_id], 'all');
eid = eid.numpy();
src = src.numpy();
filtered_indices = [(i,matched_nodes.index(src[i])) for i in range(len(src)) if src[i] in matched_nodes];
matched_index = np.array([_[1] for _ in filtered_indices], dtype=int);
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int);
index = np.concatenate((index, matched_index));
incident_edges = np.concatenate((incident_edges, eid[eid_index]));
direction = np.concatenate((direction, np.array([-1]*len(filtered_indices), dtype=int)));
src, _, eid = G.in_edges([node_id], "all")
eid = eid.numpy()
src = src.numpy()
filtered_indices = [
(i, matched_nodes.index(src[i]))
for i in range(len(src))
if src[i] in matched_nodes
]
matched_index = np.array([_[1] for _ in filtered_indices], dtype=int)
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int)
index = np.concatenate((index, matched_index))
incident_edges = np.concatenate((incident_edges, eid[eid_index]))
direction = np.concatenate(
(direction, np.array([-1] * len(filtered_indices), dtype=int))
)
# Find successors
_, dst, eid = G.out_edges([node_id], 'all');
eid = eid.numpy();
dst = dst.numpy();
filtered_indices = [(i,matched_nodes.index(dst[i])) for i in range(len(dst)) if dst[i] in matched_nodes]
matched_index = np.array([_[1] for _ in filtered_indices], dtype=int);
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int);
index = np.concatenate((index, matched_index));
incident_edges = np.concatenate((incident_edges, eid[eid_index]));
direction = np.concatenate((direction, np.array([1]*len(filtered_indices), dtype=int)));
return incident_edges, index, direction;
_, dst, eid = G.out_edges([node_id], "all")
eid = eid.numpy()
dst = dst.numpy()
filtered_indices = [
(i, matched_nodes.index(dst[i]))
for i in range(len(dst))
if dst[i] in matched_nodes
]
matched_index = np.array([_[1] for _ in filtered_indices], dtype=int)
eid_index = np.array([_[0] for _ in filtered_indices], dtype=int)
index = np.concatenate((index, matched_index))
incident_edges = np.concatenate((incident_edges, eid[eid_index]))
direction = np.concatenate(
(direction, np.array([1] * len(filtered_indices), dtype=int))
)
return incident_edges, index, direction
def subset_cost_matrix(cost_matrix, row_ids, col_ids, num_rows, num_cols):
# Extract thr subset of cost matrix corresponding to rows/cols in arrays row_ids/col_ids
# Note that the shape of cost_matrix is (num_rows+num_cols) * (num_rows+num_cols)
extended_row_ids = np.concatenate((row_ids, np.array([k + num_rows for k in col_ids])));
extended_col_ids = np.concatenate((col_ids, np.array([k + num_cols for k in row_ids])));
extended_row_ids = np.concatenate(
(row_ids, np.array([k + num_rows for k in col_ids]))
)
extended_col_ids = np.concatenate(
(col_ids, np.array([k + num_cols for k in row_ids]))
)
return cost_matrix[extended_row_ids, :][:, extended_col_ids]
class search_tree_node:
def __init__(self, G1, G2, parent_matched_cost, parent_matched_nodes, parent_matched_edges, node_G1, node_G2, \
parent_unprocessed_nodes_G1, parent_unprocessed_nodes_G2, parent_unprocessed_edges_G1, parent_unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges):
self.matched_cost = parent_matched_cost;
self.future_approximate_cost = 0.0;
self.matched_nodes = deepcopy(parent_matched_nodes);
self.matched_nodes[0].append(node_G1);
self.matched_nodes[1].append(node_G2);
self.matched_edges = deepcopy(parent_matched_edges);
self.unprocessed_nodes_G1 = [_ for _ in parent_unprocessed_nodes_G1 if _ != node_G1];
self.unprocessed_nodes_G2 = [_ for _ in parent_unprocessed_nodes_G2 if _ != node_G2];
class search_tree_node:
def __init__(
self,
G1,
G2,
parent_matched_cost,
parent_matched_nodes,
parent_matched_edges,
node_G1,
node_G2,
parent_unprocessed_nodes_G1,
parent_unprocessed_nodes_G2,
parent_unprocessed_edges_G1,
parent_unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
):
self.matched_cost = parent_matched_cost
self.future_approximate_cost = 0.0
self.matched_nodes = deepcopy(parent_matched_nodes)
self.matched_nodes[0].append(node_G1)
self.matched_nodes[1].append(node_G2)
self.matched_edges = deepcopy(parent_matched_edges)
self.unprocessed_nodes_G1 = [
_ for _ in parent_unprocessed_nodes_G1 if _ != node_G1
]
self.unprocessed_nodes_G2 = [
_ for _ in parent_unprocessed_nodes_G2 if _ != node_G2
]
# Add the cost of matching nodes at this tree-node to the matched cost
if node_G1 is not None and node_G2 is not None: # Substitute node_G1 with node_G2
self.matched_cost += cost_matrix_nodes[node_G1, node_G2];
if (
node_G1 is not None and node_G2 is not None
): # Substitute node_G1 with node_G2
self.matched_cost += cost_matrix_nodes[node_G1, node_G2]
elif node_G1 is not None: # Delete node_G1
self.matched_cost += cost_matrix_nodes[node_G1, node_G1+G2.number_of_nodes()];
self.matched_cost += cost_matrix_nodes[
node_G1, node_G1 + G2.number_of_nodes()
]
elif node_G2 is not None: # Insert node_G2
self.matched_cost += cost_matrix_nodes[node_G2+G1.number_of_nodes(), node_G2];
self.matched_cost += cost_matrix_nodes[
node_G2 + G1.number_of_nodes(), node_G2
]
# Add the cost of matching edges at this tree-node to the matched cost
incident_edges_G1 = [];
if node_G1 is not None: # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(G1, node_G1, parent_matched_nodes[0])
incident_edges_G2 = np.array([]);
if node_G2 is not None: # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(G2, node_G2, parent_matched_nodes[1])
if len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0: # Consider substituting
matched_edges_cost_matrix = subset_cost_matrix(cost_matrix_edges, incident_edges_G1, incident_edges_G2, G1.number_of_edges(), G2.number_of_edges())
max_sum = matched_edges_cost_matrix.sum();
incident_edges_G1 = []
if (
node_G1 is not None
): # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(
G1, node_G1, parent_matched_nodes[0]
)
incident_edges_G2 = np.array([])
if (
node_G2 is not None
): # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(
G2, node_G2, parent_matched_nodes[1]
)
if (
len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0
): # Consider substituting
matched_edges_cost_matrix = subset_cost_matrix(
cost_matrix_edges,
incident_edges_G1,
incident_edges_G2,
G1.number_of_edges(),
G2.number_of_edges(),
)
max_sum = matched_edges_cost_matrix.sum()
# take care of impossible assignments by assigning maximum cost
for i in range(len(incident_edges_G1)):
for j in range(len(incident_edges_G2)):
# both edges need to have same direction and the other end nodes are matched
if direction_G1[i] == direction_G2[j] and index_G1[i] == index_G2[j]:
continue;
if (
direction_G1[i] == direction_G2[j]
and index_G1[i] == index_G2[j]
):
continue
else:
matched_edges_cost_matrix[i,j] = max_sum;
matched_edges_cost_matrix[i, j] = max_sum
# Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix);
row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix)
lap_cost = 0.00
for i in range(len(row_ind)):
lap_cost += matched_edges_cost_matrix[i, row_ind[i]];
lap_cost += matched_edges_cost_matrix[i, row_ind[i]]
#Update matched edges
# Update matched edges
for i in range(len(row_ind)):
if i < len(incident_edges_G1):
self.matched_edges[0].append(incident_edges_G1[i]);
self.matched_edges[0].append(incident_edges_G1[i])
if row_ind[i] < len(incident_edges_G2):
self.matched_edges[1].append(incident_edges_G2[row_ind[i]]);
self.matched_edges[1].append(
incident_edges_G2[row_ind[i]]
)
else:
self.matched_edges[1].append(None);
self.matched_edges[1].append(None)
elif row_ind[i] < len(incident_edges_G2):
self.matched_edges[0].append(None);
self.matched_edges[1].append(incident_edges_G2[row_ind[i]]);
self.matched_cost += lap_cost;
self.matched_edges[0].append(None)
self.matched_edges[1].append(incident_edges_G2[row_ind[i]])
self.matched_cost += lap_cost
elif len(incident_edges_G1) > 0: #only deletion possible
edge_deletion_cost = 0.0;
elif len(incident_edges_G1) > 0: # only deletion possible
edge_deletion_cost = 0.0
for edge in incident_edges_G1:
edge_deletion_cost += cost_matrix_edges[edge, G2.number_of_edges()+edge];
#Update matched edges
edge_deletion_cost += cost_matrix_edges[
edge, G2.number_of_edges() + edge
]
# Update matched edges
for edge in incident_edges_G1:
self.matched_edges[0].append(edge);
self.matched_edges[1].append(None);
self.matched_edges[0].append(edge)
self.matched_edges[1].append(None)
#Update matched edges
# Update matched edges
self.matched_cost += edge_deletion_cost;
self.matched_cost += edge_deletion_cost
elif len(incident_edges_G2) > 0: #only insertion possible
edge_insertion_cost = 0.0;
elif len(incident_edges_G2) > 0: # only insertion possible
edge_insertion_cost = 0.0
for edge in incident_edges_G2:
edge_insertion_cost += cost_matrix_edges[G1.number_of_edges()+edge, edge];
#Update matched edges
edge_insertion_cost += cost_matrix_edges[
G1.number_of_edges() + edge, edge
]
# Update matched edges
for edge in incident_edges_G2:
self.matched_edges[0].append(None);
self.matched_edges[1].append(edge);
self.matched_cost += edge_insertion_cost;
self.matched_edges[0].append(None)
self.matched_edges[1].append(edge)
self.matched_cost += edge_insertion_cost
# Add the cost of matching of unprocessed nodes to the future approximate cost
if len(self.unprocessed_nodes_G1) > 0 and len(self.unprocessed_nodes_G2) > 0: # Consider substituting
unmatched_nodes_cost_matrix = subset_cost_matrix(cost_matrix_nodes, self.unprocessed_nodes_G1, self.unprocessed_nodes_G2, G1.number_of_nodes(), G2.number_of_nodes())
if (
len(self.unprocessed_nodes_G1) > 0
and len(self.unprocessed_nodes_G2) > 0
): # Consider substituting
unmatched_nodes_cost_matrix = subset_cost_matrix(
cost_matrix_nodes,
self.unprocessed_nodes_G1,
self.unprocessed_nodes_G2,
G1.number_of_nodes(),
G2.number_of_nodes(),
)
# Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(unmatched_nodes_cost_matrix);
row_ind, col_ind, _ = lapjv(unmatched_nodes_cost_matrix)
lap_cost = 0.00
for i in range(len(row_ind)):
lap_cost += unmatched_nodes_cost_matrix[i, row_ind[i]];
lap_cost += unmatched_nodes_cost_matrix[i, row_ind[i]]
self.future_approximate_cost += lap_cost;
self.future_approximate_cost += lap_cost
elif len(self.unprocessed_nodes_G1) > 0: # only deletion possible
node_deletion_cost = 0.0;
node_deletion_cost = 0.0
for node in self.unprocessed_nodes_G1:
node_deletion_cost += cost_matrix_nodes[node, G2.number_of_nodes()+node];
node_deletion_cost += cost_matrix_nodes[
node, G2.number_of_nodes() + node
]
self.future_approximate_cost += node_deletion_cost;
self.future_approximate_cost += node_deletion_cost
elif len(self.unprocessed_nodes_G2) > 0: # only insertion possible
node_insertion_cost = 0.0;
node_insertion_cost = 0.0
for node in self.unprocessed_nodes_G2:
node_insertion_cost += cost_matrix_nodes[G1.number_of_nodes()+node, node];
self.future_approximate_cost += node_insertion_cost;
node_insertion_cost += cost_matrix_nodes[
G1.number_of_nodes() + node, node
]
self.future_approximate_cost += node_insertion_cost
# Add the cost of LAP matching of unprocessed edges to the future approximate cost
self.unprocessed_edges_G1 = [_ for _ in parent_unprocessed_edges_G1 if _ not in incident_edges_G1];
self.unprocessed_edges_G2 = [_ for _ in parent_unprocessed_edges_G2 if _ not in incident_edges_G2];
if len(self.unprocessed_edges_G1) > 0 and len(self.unprocessed_edges_G2) > 0: # Consider substituting
unmatched_edges_cost_matrix = subset_cost_matrix(cost_matrix_edges, self.unprocessed_edges_G1, self.unprocessed_edges_G2, G1.number_of_edges(), G2.number_of_edges())
self.unprocessed_edges_G1 = [
_ for _ in parent_unprocessed_edges_G1 if _ not in incident_edges_G1
]
self.unprocessed_edges_G2 = [
_ for _ in parent_unprocessed_edges_G2 if _ not in incident_edges_G2
]
if (
len(self.unprocessed_edges_G1) > 0
and len(self.unprocessed_edges_G2) > 0
): # Consider substituting
unmatched_edges_cost_matrix = subset_cost_matrix(
cost_matrix_edges,
self.unprocessed_edges_G1,
self.unprocessed_edges_G2,
G1.number_of_edges(),
G2.number_of_edges(),
)
# Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(unmatched_edges_cost_matrix);
row_ind, col_ind, _ = lapjv(unmatched_edges_cost_matrix)
lap_cost = 0.00
for i in range(len(row_ind)):
lap_cost += unmatched_edges_cost_matrix[i, row_ind[i]];
lap_cost += unmatched_edges_cost_matrix[i, row_ind[i]]
self.future_approximate_cost += lap_cost;
self.future_approximate_cost += lap_cost
elif len(self.unprocessed_edges_G1) > 0: # only deletion possible
edge_deletion_cost = 0.0;
edge_deletion_cost = 0.0
for edge in self.unprocessed_edges_G1:
edge_deletion_cost += cost_matrix_edges[edge, G2.number_of_edges()+edge];
edge_deletion_cost += cost_matrix_edges[
edge, G2.number_of_edges() + edge
]
self.future_approximate_cost += edge_deletion_cost;
self.future_approximate_cost += edge_deletion_cost
elif len(self.unprocessed_edges_G2) > 0: # only insertion possible
edge_insertion_cost = 0.0;
edge_insertion_cost = 0.0
for edge in self.unprocessed_edges_G2:
edge_insertion_cost += cost_matrix_edges[G1.number_of_edges()+edge, edge];
edge_insertion_cost += cost_matrix_edges[
G1.number_of_edges() + edge, edge
]
self.future_approximate_cost += edge_insertion_cost;
self.future_approximate_cost += edge_insertion_cost
# For heap insertion order
def __lt__(self, other):
if abs((self.matched_cost+self.future_approximate_cost) - (other.matched_cost+other.future_approximate_cost)
)> EPSILON:
return (self.matched_cost+self.future_approximate_cost) < (other.matched_cost+other.future_approximate_cost);
if (
abs(
(self.matched_cost + self.future_approximate_cost)
- (other.matched_cost + other.future_approximate_cost)
)
> EPSILON
):
return (self.matched_cost + self.future_approximate_cost) < (
other.matched_cost + other.future_approximate_cost
)
elif abs(self.matched_cost - other.matched_cost) > EPSILON:
return other.matched_cost < self.matched_cost; #matched cost is closer to reality
return other.matched_cost < self.matched_cost
# matched cost is closer to reality
else:
return (len(self.unprocessed_nodes_G1)+len(self.unprocessed_nodes_G2)+\
len(self.unprocessed_edges_G1)+len(self.unprocessed_edges_G2)) < \
(len(other.unprocessed_nodes_G1)+len(other.unprocessed_nodes_G2)+\
len(other.unprocessed_edges_G1)+len(other.unprocessed_edges_G2));
def edit_cost_from_node_matching(G1, G2, cost_matrix_nodes, cost_matrix_edges, node_matching):
matched_cost = 0.0;
return (
len(self.unprocessed_nodes_G1)
+ len(self.unprocessed_nodes_G2)
+ len(self.unprocessed_edges_G1)
+ len(self.unprocessed_edges_G2)
) < (
len(other.unprocessed_nodes_G1)
+ len(other.unprocessed_nodes_G2)
+ len(other.unprocessed_edges_G1)
+ len(other.unprocessed_edges_G2)
)
def edit_cost_from_node_matching(
G1, G2, cost_matrix_nodes, cost_matrix_edges, node_matching
):
matched_cost = 0.0
matched_nodes = ([], [])
matched_edges = ([], [])
# Add the cost of matching nodes
for i in range(G1.number_of_nodes()):
matched_cost += cost_matrix_nodes[i, node_matching[i]]
matched_nodes[0].append(i);
matched_nodes[0].append(i)
if node_matching[i] < G2.number_of_nodes():
matched_nodes[1].append(node_matching[i]);
matched_nodes[1].append(node_matching[i])
else:
matched_nodes[1].append(None);
matched_nodes[1].append(None)
for i in range(G1.number_of_nodes(), len(node_matching)):
matched_cost += cost_matrix_nodes[i, node_matching[i]]
if node_matching[i] < G2.number_of_nodes():
matched_nodes[0].append(None);
matched_nodes[1].append(node_matching[i]);
matched_nodes[0].append(None)
matched_nodes[1].append(node_matching[i])
for i in range(len(matched_nodes[0])):
# Add the cost of matching edges
incident_edges_G1 = [];
if matched_nodes[0][i] is not None: # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(G1, matched_nodes[0][i], matched_nodes[0][:i])
incident_edges_G2 = np.array([]);
if matched_nodes[1][i] is not None: # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(G2, matched_nodes[1][i], matched_nodes[1][:i])
if len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0: # Consider substituting
matched_edges_cost_matrix = subset_cost_matrix(cost_matrix_edges, incident_edges_G1, incident_edges_G2, G1.number_of_edges(), G2.number_of_edges())
max_sum = matched_edges_cost_matrix.sum();
incident_edges_G1 = []
if (
matched_nodes[0][i] is not None
): # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1
incident_edges_G1, index_G1, direction_G1 = get_edges_to_match(
G1, matched_nodes[0][i], matched_nodes[0][:i]
)
incident_edges_G2 = np.array([])
if (
matched_nodes[1][i] is not None
): # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2
incident_edges_G2, index_G2, direction_G2 = get_edges_to_match(
G2, matched_nodes[1][i], matched_nodes[1][:i]
)
if (
len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0
): # Consider substituting
matched_edges_cost_matrix = subset_cost_matrix(
cost_matrix_edges,
incident_edges_G1,
incident_edges_G2,
G1.number_of_edges(),
G2.number_of_edges(),
)
max_sum = matched_edges_cost_matrix.sum()
# take care of impossible assignments by assigning maximum cost
for i in range(len(incident_edges_G1)):
for j in range(len(incident_edges_G2)):
# both edges need to have same direction and the other end nodes are matched
if direction_G1[i] == direction_G2[j] and index_G1[i] == index_G2[j]:
continue;
if (
direction_G1[i] == direction_G2[j]
and index_G1[i] == index_G2[j]
):
continue
else:
matched_edges_cost_matrix[i,j] = max_sum;
matched_edges_cost_matrix[i, j] = max_sum
# Match the edges as per the LAP solution
row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix);
row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix)
lap_cost = 0.00
for i in range(len(row_ind)):
lap_cost += matched_edges_cost_matrix[i, row_ind[i]];
lap_cost += matched_edges_cost_matrix[i, row_ind[i]]
#Update matched edges
# Update matched edges
for i in range(len(row_ind)):
if i < len(incident_edges_G1):
matched_edges[0].append(incident_edges_G1[i]);
matched_edges[0].append(incident_edges_G1[i])
if row_ind[i] < len(incident_edges_G2):
matched_edges[1].append(incident_edges_G2[row_ind[i]]);
matched_edges[1].append(incident_edges_G2[row_ind[i]])
else:
matched_edges[1].append(None);
matched_edges[1].append(None)
elif row_ind[i] < len(incident_edges_G2):
matched_edges[0].append(None);
matched_edges[1].append(incident_edges_G2[row_ind[i]]);
matched_cost += lap_cost;
matched_edges[0].append(None)
matched_edges[1].append(incident_edges_G2[row_ind[i]])
matched_cost += lap_cost
elif len(incident_edges_G1) > 0: #only deletion possible
edge_deletion_cost = 0.0;
elif len(incident_edges_G1) > 0: # only deletion possible
edge_deletion_cost = 0.0
for edge in incident_edges_G1:
edge_deletion_cost += cost_matrix_edges[edge, G2.number_of_edges()+edge];
#Update matched edges
edge_deletion_cost += cost_matrix_edges[
edge, G2.number_of_edges() + edge
]
# Update matched edges
for edge in incident_edges_G1:
matched_edges[0].append(edge);
matched_edges[1].append(None);
matched_edges[0].append(edge)
matched_edges[1].append(None)
#Update matched edges
# Update matched edges
matched_cost += edge_deletion_cost;
matched_cost += edge_deletion_cost
elif len(incident_edges_G2) > 0: #only insertion possible
edge_insertion_cost = 0.0;
elif len(incident_edges_G2) > 0: # only insertion possible
edge_insertion_cost = 0.0
for edge in incident_edges_G2:
edge_insertion_cost += cost_matrix_edges[G1.number_of_edges()+edge, edge];
#Update matched edges
edge_insertion_cost += cost_matrix_edges[
G1.number_of_edges() + edge, edge
]
# Update matched edges
for edge in incident_edges_G2:
matched_edges[0].append(None);
matched_edges[1].append(edge);
matched_edges[0].append(None)
matched_edges[1].append(edge)
matched_cost += edge_insertion_cost
matched_cost += edge_insertion_cost;
return (matched_cost, matched_nodes, matched_edges)
return (matched_cost, matched_nodes, matched_edges);
def contextual_cost_matrix_construction(G1, G2,
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost):
def contextual_cost_matrix_construction(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
):
# Calculates approximate GED using linear assignment on the nodes with bipartite algorithm
# cost matrix of node mappings
......@@ -398,89 +603,174 @@ def contextual_cost_matrix_construction(G1, G2,
num_G1_edges = G1.number_of_edges()
num_G2_edges = G2.number_of_edges()
cost_upper_bound = 2*(node_substitution_cost.sum() + G1_node_deletion_cost.sum() + G2_node_insertion_cost.sum() + 1)
cost_matrix = np.zeros((num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float)
cost_matrix[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost;
cost_matrix[0:num_G1_nodes, num_G2_nodes:num_G2_nodes + num_G1_nodes] = np.array([G1_node_deletion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G1_nodes) \
for j in range(num_G1_nodes)]).reshape(num_G1_nodes, num_G1_nodes);
cost_matrix[num_G1_nodes:num_G1_nodes + num_G2_nodes, 0:num_G2_nodes] = np.array([G2_node_insertion_cost[i] if i == j \
else cost_upper_bound\
for i in range(num_G2_nodes) \
for j in range(num_G2_nodes)]).reshape(num_G2_nodes, num_G2_nodes);
self_edge_list_G1 = [np.array([], dtype=int)]*num_G1_nodes;
self_edge_list_G2 = [np.array([], dtype=int)]*num_G2_nodes;
incoming_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes;
incoming_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes;
outgoing_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes;
outgoing_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes;
cost_upper_bound = 2 * (
node_substitution_cost.sum()
+ G1_node_deletion_cost.sum()
+ G2_node_insertion_cost.sum()
+ 1
)
cost_matrix = np.zeros(
(num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float
)
cost_matrix[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost
cost_matrix[
0:num_G1_nodes, num_G2_nodes : num_G2_nodes + num_G1_nodes
] = np.array(
[
G1_node_deletion_cost[i] if i == j else cost_upper_bound
for i in range(num_G1_nodes)
for j in range(num_G1_nodes)
]
).reshape(
num_G1_nodes, num_G1_nodes
)
cost_matrix[
num_G1_nodes : num_G1_nodes + num_G2_nodes, 0:num_G2_nodes
] = np.array(
[
G2_node_insertion_cost[i] if i == j else cost_upper_bound
for i in range(num_G2_nodes)
for j in range(num_G2_nodes)
]
).reshape(
num_G2_nodes, num_G2_nodes
)
self_edge_list_G1 = [np.array([], dtype=int)] * num_G1_nodes
self_edge_list_G2 = [np.array([], dtype=int)] * num_G2_nodes
incoming_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
incoming_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
outgoing_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
outgoing_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
for i in range(num_G1_nodes):
if G1.has_edge_between(i, i):
self_edge_list_G1[i] = sorted(G1.edge_id(i, i, return_array=True).numpy());
incoming_edges_G1[i] = G1.in_edges([i], 'eid').numpy();
incoming_edges_G1[i] = np.setdiff1d(incoming_edges_G1[i], self_edge_list_G1[i]);
outgoing_edges_G1[i] = G1.out_edges([i], 'eid').numpy();
outgoing_edges_G1[i] = np.setdiff1d(outgoing_edges_G1[i], self_edge_list_G1[i]);
self_edge_list_G1[i] = sorted(
G1.edge_id(i, i, return_array=True).numpy()
)
incoming_edges_G1[i] = G1.in_edges([i], "eid").numpy()
incoming_edges_G1[i] = np.setdiff1d(
incoming_edges_G1[i], self_edge_list_G1[i]
)
outgoing_edges_G1[i] = G1.out_edges([i], "eid").numpy()
outgoing_edges_G1[i] = np.setdiff1d(
outgoing_edges_G1[i], self_edge_list_G1[i]
)
for i in range(num_G2_nodes):
if G2.has_edge_between(i, i):
self_edge_list_G2[i] = sorted(G2.edge_id(i, i, return_array=True).numpy());
incoming_edges_G2[i] = G2.in_edges([i], 'eid').numpy();
incoming_edges_G2[i] = np.setdiff1d(incoming_edges_G2[i], self_edge_list_G2[i]);
outgoing_edges_G2[i] = G2.out_edges([i], 'eid').numpy();
outgoing_edges_G2[i] = np.setdiff1d(outgoing_edges_G2[i], self_edge_list_G2[i]);
selected_deletion_G1 = [G1_edge_deletion_cost[np.concatenate((self_edge_list_G1[i], incoming_edges_G1[i], outgoing_edges_G1[i]))] for i in range(G1.number_of_nodes())];
selected_insertion_G2 = [G2_edge_insertion_cost[np.concatenate((self_edge_list_G2[i], incoming_edges_G2[i], outgoing_edges_G2[i]))] for i in range(G2.number_of_nodes())];
self_edge_list_G2[i] = sorted(
G2.edge_id(i, i, return_array=True).numpy()
)
incoming_edges_G2[i] = G2.in_edges([i], "eid").numpy()
incoming_edges_G2[i] = np.setdiff1d(
incoming_edges_G2[i], self_edge_list_G2[i]
)
outgoing_edges_G2[i] = G2.out_edges([i], "eid").numpy()
outgoing_edges_G2[i] = np.setdiff1d(
outgoing_edges_G2[i], self_edge_list_G2[i]
)
selected_deletion_G1 = [
G1_edge_deletion_cost[
np.concatenate(
(
self_edge_list_G1[i],
incoming_edges_G1[i],
outgoing_edges_G1[i],
)
)
]
for i in range(G1.number_of_nodes())
]
selected_insertion_G2 = [
G2_edge_insertion_cost[
np.concatenate(
(
self_edge_list_G2[i],
incoming_edges_G2[i],
outgoing_edges_G2[i],
)
)
]
for i in range(G2.number_of_nodes())
]
# Add the cost of edge edition which are dependent of a node (see this as the cost associated with a substructure)
for i in range(num_G1_nodes):
for j in range(num_G2_nodes):
m = len(self_edge_list_G1[i])+len(incoming_edges_G1[i])+len(outgoing_edges_G1[i]);
n = len(self_edge_list_G2[j])+len(incoming_edges_G2[j])+len(outgoing_edges_G2[j]);
matrix_dim = m + n;
m = (
len(self_edge_list_G1[i])
+ len(incoming_edges_G1[i])
+ len(outgoing_edges_G1[i])
)
n = (
len(self_edge_list_G2[j])
+ len(incoming_edges_G2[j])
+ len(outgoing_edges_G2[j])
)
matrix_dim = m + n
if matrix_dim == 0:
continue;
temp_edge_cost_matrix = np.empty((matrix_dim, matrix_dim));
temp_edge_cost_matrix.fill(cost_upper_bound);
temp_edge_cost_matrix[:len(self_edge_list_G1[i]),:len(self_edge_list_G2[j])] = edge_substitution_cost[self_edge_list_G1[i],:][:,self_edge_list_G2[j]];
temp_edge_cost_matrix[len(self_edge_list_G1[i]):len(self_edge_list_G1[i])+len(incoming_edges_G1[i]),len(self_edge_list_G2[j]):len(self_edge_list_G2[j])+len(incoming_edges_G2[j])] = edge_substitution_cost[incoming_edges_G1[i],:][:, incoming_edges_G2[j]];
temp_edge_cost_matrix[len(self_edge_list_G1[i])+len(incoming_edges_G1[i]):m,len(self_edge_list_G2[j])+len(incoming_edges_G2[j]):n] = edge_substitution_cost[outgoing_edges_G1[i],:][:, outgoing_edges_G2[j]];
np.fill_diagonal(temp_edge_cost_matrix[:m, n:], selected_deletion_G1[i]);
np.fill_diagonal(temp_edge_cost_matrix[m:, :n], selected_insertion_G2[j]);
temp_edge_cost_matrix[m:, n:].fill(0);
row_ind, col_ind, _ = lapjv(temp_edge_cost_matrix);
continue
temp_edge_cost_matrix = np.empty((matrix_dim, matrix_dim))
temp_edge_cost_matrix.fill(cost_upper_bound)
temp_edge_cost_matrix[
: len(self_edge_list_G1[i]), : len(self_edge_list_G2[j])
] = edge_substitution_cost[self_edge_list_G1[i], :][
:, self_edge_list_G2[j]
]
temp_edge_cost_matrix[
len(self_edge_list_G1[i]) : len(self_edge_list_G1[i])
+ len(incoming_edges_G1[i]),
len(self_edge_list_G2[j]) : len(self_edge_list_G2[j])
+ len(incoming_edges_G2[j]),
] = edge_substitution_cost[incoming_edges_G1[i], :][
:, incoming_edges_G2[j]
]
temp_edge_cost_matrix[
len(self_edge_list_G1[i]) + len(incoming_edges_G1[i]) : m,
len(self_edge_list_G2[j]) + len(incoming_edges_G2[j]) : n,
] = edge_substitution_cost[outgoing_edges_G1[i], :][
:, outgoing_edges_G2[j]
]
np.fill_diagonal(
temp_edge_cost_matrix[:m, n:], selected_deletion_G1[i]
)
np.fill_diagonal(
temp_edge_cost_matrix[m:, :n], selected_insertion_G2[j]
)
temp_edge_cost_matrix[m:, n:].fill(0)
row_ind, col_ind, _ = lapjv(temp_edge_cost_matrix)
lap_cost = 0.00
for k in range(len(row_ind)):
lap_cost += temp_edge_cost_matrix[k, row_ind[k]];
lap_cost += temp_edge_cost_matrix[k, row_ind[k]]
cost_matrix[i,j] += lap_cost;
cost_matrix[i, j] += lap_cost
for i in range(num_G1_nodes):
cost_matrix[i,num_G2_nodes+i] += selected_deletion_G1[i].sum()
cost_matrix[i, num_G2_nodes + i] += selected_deletion_G1[i].sum()
for i in range(num_G2_nodes):
cost_matrix[num_G1_nodes+i,i] += selected_insertion_G2[i].sum()
cost_matrix[num_G1_nodes + i, i] += selected_insertion_G2[i].sum()
return cost_matrix;
return cost_matrix
def hausdorff_matching(G1, G2,
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost):
def hausdorff_matching(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
):
# Calculates approximate GED using hausdorff_matching
# cost matrix of node mappings
......@@ -490,44 +780,104 @@ def hausdorff_matching(G1, G2,
num_G1_edges = G1.number_of_edges()
num_G2_edges = G2.number_of_edges()
self_edge_list_G1 = [np.array([], dtype=int)]*num_G1_nodes;
self_edge_list_G2 = [np.array([], dtype=int)]*num_G2_nodes;
incoming_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes;
incoming_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes;
outgoing_edges_G1 = [np.array([], dtype=int)]*num_G1_nodes;
outgoing_edges_G2 = [np.array([], dtype=int)]*num_G2_nodes;
self_edge_list_G1 = [np.array([], dtype=int)] * num_G1_nodes
self_edge_list_G2 = [np.array([], dtype=int)] * num_G2_nodes
incoming_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
incoming_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
outgoing_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes
outgoing_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes
for i in range(num_G1_nodes):
if G1.has_edge_between(i, i):
self_edge_list_G1[i] = sorted(G1.edge_id(i, i, return_array=True).numpy());
incoming_edges_G1[i] = G1.in_edges([i], 'eid').numpy();
incoming_edges_G1[i] = np.setdiff1d(incoming_edges_G1[i], self_edge_list_G1[i]);
outgoing_edges_G1[i] = G1.out_edges([i], 'eid').numpy();
outgoing_edges_G1[i] = np.setdiff1d(outgoing_edges_G1[i], self_edge_list_G1[i]);
self_edge_list_G1[i] = sorted(
G1.edge_id(i, i, return_array=True).numpy()
)
incoming_edges_G1[i] = G1.in_edges([i], "eid").numpy()
incoming_edges_G1[i] = np.setdiff1d(
incoming_edges_G1[i], self_edge_list_G1[i]
)
outgoing_edges_G1[i] = G1.out_edges([i], "eid").numpy()
outgoing_edges_G1[i] = np.setdiff1d(
outgoing_edges_G1[i], self_edge_list_G1[i]
)
for i in range(num_G2_nodes):
if G2.has_edge_between(i, i):
self_edge_list_G2[i] = sorted(G2.edge_id(i, i, return_array=True).numpy());
incoming_edges_G2[i] = G2.in_edges([i], 'eid').numpy();
incoming_edges_G2[i] = np.setdiff1d(incoming_edges_G2[i], self_edge_list_G2[i]);
outgoing_edges_G2[i] = G2.out_edges([i], 'eid').numpy();
outgoing_edges_G2[i] = np.setdiff1d(outgoing_edges_G2[i], self_edge_list_G2[i]);
selected_deletion_self_G1 = [G1_edge_deletion_cost[self_edge_list_G1[i]] for i in range(G1.number_of_nodes())];
selected_insertion_self_G2 = [G2_edge_insertion_cost[self_edge_list_G2[i]] for i in range(G2.number_of_nodes())];
selected_deletion_incoming_G1 = [G1_edge_deletion_cost[incoming_edges_G1[i]] for i in range(G1.number_of_nodes())];
selected_insertion_incoming_G2 = [G2_edge_insertion_cost[incoming_edges_G2[i]] for i in range(G2.number_of_nodes())];
selected_deletion_outgoing_G1 = [G1_edge_deletion_cost[outgoing_edges_G1[i]] for i in range(G1.number_of_nodes())];
selected_insertion_outgoing_G2 = [G2_edge_insertion_cost[outgoing_edges_G2[i]] for i in range(G2.number_of_nodes())];
selected_deletion_G1 = [G1_edge_deletion_cost[np.concatenate((self_edge_list_G1[i], incoming_edges_G1[i], outgoing_edges_G1[i]))] for i in range(G1.number_of_nodes())];
selected_insertion_G2 = [G2_edge_insertion_cost[np.concatenate((self_edge_list_G2[i], incoming_edges_G2[i], outgoing_edges_G2[i]))] for i in range(G2.number_of_nodes())];
cost_G1 = np.array([(G1_node_deletion_cost[i] + selected_deletion_G1[i].sum()/2) for i in range(num_G1_nodes)])
cost_G2 = np.array([(G2_node_insertion_cost[i] + selected_insertion_G2[i].sum()/2) for i in range(num_G2_nodes)])
self_edge_list_G2[i] = sorted(
G2.edge_id(i, i, return_array=True).numpy()
)
incoming_edges_G2[i] = G2.in_edges([i], "eid").numpy()
incoming_edges_G2[i] = np.setdiff1d(
incoming_edges_G2[i], self_edge_list_G2[i]
)
outgoing_edges_G2[i] = G2.out_edges([i], "eid").numpy()
outgoing_edges_G2[i] = np.setdiff1d(
outgoing_edges_G2[i], self_edge_list_G2[i]
)
selected_deletion_self_G1 = [
G1_edge_deletion_cost[self_edge_list_G1[i]]
for i in range(G1.number_of_nodes())
]
selected_insertion_self_G2 = [
G2_edge_insertion_cost[self_edge_list_G2[i]]
for i in range(G2.number_of_nodes())
]
selected_deletion_incoming_G1 = [
G1_edge_deletion_cost[incoming_edges_G1[i]]
for i in range(G1.number_of_nodes())
]
selected_insertion_incoming_G2 = [
G2_edge_insertion_cost[incoming_edges_G2[i]]
for i in range(G2.number_of_nodes())
]
selected_deletion_outgoing_G1 = [
G1_edge_deletion_cost[outgoing_edges_G1[i]]
for i in range(G1.number_of_nodes())
]
selected_insertion_outgoing_G2 = [
G2_edge_insertion_cost[outgoing_edges_G2[i]]
for i in range(G2.number_of_nodes())
]
selected_deletion_G1 = [
G1_edge_deletion_cost[
np.concatenate(
(
self_edge_list_G1[i],
incoming_edges_G1[i],
outgoing_edges_G1[i],
)
)
]
for i in range(G1.number_of_nodes())
]
selected_insertion_G2 = [
G2_edge_insertion_cost[
np.concatenate(
(
self_edge_list_G2[i],
incoming_edges_G2[i],
outgoing_edges_G2[i],
)
)
]
for i in range(G2.number_of_nodes())
]
cost_G1 = np.array(
[
(G1_node_deletion_cost[i] + selected_deletion_G1[i].sum() / 2)
for i in range(num_G1_nodes)
]
)
cost_G2 = np.array(
[
(G2_node_insertion_cost[i] + selected_insertion_G2[i].sum() / 2)
for i in range(num_G2_nodes)
]
)
for i in range(num_G1_nodes):
for j in range(num_G2_nodes):
......@@ -538,140 +888,274 @@ def hausdorff_matching(G1, G2,
c1_outgoing = deepcopy(selected_deletion_outgoing_G1[i])
c2_outgoing = deepcopy(selected_insertion_outgoing_G2[j])
for k,a in enumerate(self_edge_list_G1[i]):
for l,b in enumerate(self_edge_list_G2[j]):
c1_self[k] = min(c1_self[k], edge_substitution_cost[a,b]/2);
c2_self[l] = min(c2_self[l], edge_substitution_cost[a,b]/2);
for k,a in enumerate(incoming_edges_G1[i]):
for l,b in enumerate(incoming_edges_G2[j]):
c1_incoming[k] = min(c1_incoming[k], edge_substitution_cost[a,b]/2);
c2_incoming[l] = min(c2_incoming[l], edge_substitution_cost[a,b]/2);
for k,a in enumerate(outgoing_edges_G1[i]):
for l,b in enumerate(outgoing_edges_G2[j]):
c1_outgoing[k] = min(c1_outgoing[k], edge_substitution_cost[a,b]/2);
c2_outgoing[l] = min(c2_outgoing[l], edge_substitution_cost[a,b]/2);
edge_hausdorff_lower_bound = 0.0;
if len(selected_deletion_G1[i])>len(selected_insertion_G2[j]):
idx = np.argpartition(selected_deletion_G1[i], (len(selected_deletion_G1[i])-len(selected_insertion_G2[j])));
edge_hausdorff_lower_bound = selected_deletion_G1[i][idx[:(len(selected_deletion_G1[i])-len(selected_insertion_G2[j]))]].sum();
elif len(selected_deletion_G1[i])<len(selected_insertion_G2[j]):
idx = np.argpartition(selected_insertion_G2[j], (len(selected_insertion_G2[j])-len(selected_deletion_G1[i])));
edge_hausdorff_lower_bound = selected_insertion_G2[j][idx[:(len(selected_insertion_G2[j])-len(selected_deletion_G1[i]))]].sum();
sc_cost = 0.5*(node_substitution_cost[i,j]+0.5*max(c1_self.sum() + c2_self.sum() + \
c1_incoming.sum() + c2_incoming.sum() + \
c1_outgoing.sum() + c2_outgoing.sum(), \
edge_hausdorff_lower_bound));
for k, a in enumerate(self_edge_list_G1[i]):
for l, b in enumerate(self_edge_list_G2[j]):
c1_self[k] = min(
c1_self[k], edge_substitution_cost[a, b] / 2
)
c2_self[l] = min(
c2_self[l], edge_substitution_cost[a, b] / 2
)
for k, a in enumerate(incoming_edges_G1[i]):
for l, b in enumerate(incoming_edges_G2[j]):
c1_incoming[k] = min(
c1_incoming[k], edge_substitution_cost[a, b] / 2
)
c2_incoming[l] = min(
c2_incoming[l], edge_substitution_cost[a, b] / 2
)
for k, a in enumerate(outgoing_edges_G1[i]):
for l, b in enumerate(outgoing_edges_G2[j]):
c1_outgoing[k] = min(
c1_outgoing[k], edge_substitution_cost[a, b] / 2
)
c2_outgoing[l] = min(
c2_outgoing[l], edge_substitution_cost[a, b] / 2
)
edge_hausdorff_lower_bound = 0.0
if len(selected_deletion_G1[i]) > len(selected_insertion_G2[j]):
idx = np.argpartition(
selected_deletion_G1[i],
(
len(selected_deletion_G1[i])
- len(selected_insertion_G2[j])
),
)
edge_hausdorff_lower_bound = selected_deletion_G1[i][
idx[
: (
len(selected_deletion_G1[i])
- len(selected_insertion_G2[j])
)
]
].sum()
elif len(selected_deletion_G1[i]) < len(selected_insertion_G2[j]):
idx = np.argpartition(
selected_insertion_G2[j],
(
len(selected_insertion_G2[j])
- len(selected_deletion_G1[i])
),
)
edge_hausdorff_lower_bound = selected_insertion_G2[j][
idx[
: (
len(selected_insertion_G2[j])
- len(selected_deletion_G1[i])
)
]
].sum()
sc_cost = 0.5 * (
node_substitution_cost[i, j]
+ 0.5
* max(
c1_self.sum()
+ c2_self.sum()
+ c1_incoming.sum()
+ c2_incoming.sum()
+ c1_outgoing.sum()
+ c2_outgoing.sum(),
edge_hausdorff_lower_bound,
)
)
if cost_G1[i] > sc_cost:
cost_G1[i] = sc_cost;
cost_G1[i] = sc_cost
if cost_G2[j] > sc_cost:
cost_G2[j] = sc_cost;
cost_G2[j] = sc_cost
graph_hausdorff_lower_bound = 0.0;
graph_hausdorff_lower_bound = 0.0
if num_G1_nodes > num_G2_nodes:
idx = np.argpartition(G1_node_deletion_cost, (num_G1_nodes - num_G2_nodes));
graph_hausdorff_lower_bound = G1_node_deletion_cost[idx[:(num_G1_nodes - num_G2_nodes)]].sum();
idx = np.argpartition(
G1_node_deletion_cost, (num_G1_nodes - num_G2_nodes)
)
graph_hausdorff_lower_bound = G1_node_deletion_cost[
idx[: (num_G1_nodes - num_G2_nodes)]
].sum()
elif num_G1_nodes < num_G2_nodes:
idx = np.argpartition(G2_node_insertion_cost, (num_G2_nodes - num_G1_nodes));
graph_hausdorff_lower_bound = G2_node_insertion_cost[idx[:(num_G2_nodes - num_G1_nodes)]].sum();
idx = np.argpartition(
G2_node_insertion_cost, (num_G2_nodes - num_G1_nodes)
)
graph_hausdorff_lower_bound = G2_node_insertion_cost[
idx[: (num_G2_nodes - num_G1_nodes)]
].sum()
graph_hausdorff_cost = max(graph_hausdorff_lower_bound, cost_G1.sum() + cost_G2.sum());
return graph_hausdorff_cost;
graph_hausdorff_cost = max(
graph_hausdorff_lower_bound, cost_G1.sum() + cost_G2.sum()
)
return graph_hausdorff_cost
def a_star_search(G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size):
# A-star traversal
open_list = [];
open_list = []
# Create first nodes in the A-star search tree, matching node 0 of G1 with all possibilities (each node of G2, and deletion)
matched_cost = 0.0;
matched_nodes = ([], []); # No nodes matched in the beginning
matched_edges = ([], []); # No edges matched in the beginning
unprocessed_nodes_G1 = [i for i in range(G1.number_of_nodes())] # No nodes matched in the beginning
unprocessed_nodes_G2 = [i for i in range(G2.number_of_nodes())] # No nodes matched in the beginning
unprocessed_edges_G1 = [i for i in range(G1.number_of_edges())] # No edges matched in the beginning
unprocessed_edges_G2 = [i for i in range(G2.number_of_edges())] # No edges matched in the beginning
matched_cost = 0.0
matched_nodes = ([], [])
# No nodes matched in the beginning
matched_edges = ([], [])
# No edges matched in the beginning
unprocessed_nodes_G1 = [
i for i in range(G1.number_of_nodes())
] # No nodes matched in the beginning
unprocessed_nodes_G2 = [
i for i in range(G2.number_of_nodes())
] # No nodes matched in the beginning
unprocessed_edges_G1 = [
i for i in range(G1.number_of_edges())
] # No edges matched in the beginning
unprocessed_edges_G2 = [
i for i in range(G2.number_of_edges())
] # No edges matched in the beginning
for i in range(len(unprocessed_nodes_G2)):
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], unprocessed_nodes_G2[i], \
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges);
tree_node = search_tree_node(
G1,
G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
unprocessed_nodes_G2[i],
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap
heappush(open_list, tree_node)
# Consider node deletion
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], None, \
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges);
tree_node = search_tree_node(
G1,
G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
None,
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap
heappush(open_list, tree_node)
while len(open_list) > 0:
# TODO: Create a node that processes multi node insertion deletion in one search node,
# as opposed in multiple search nodes here
parent_tree_node = heappop(open_list);
matched_cost = parent_tree_node.matched_cost;
matched_nodes = parent_tree_node.matched_nodes;
matched_edges = parent_tree_node.matched_edges;
unprocessed_nodes_G1 = parent_tree_node.unprocessed_nodes_G1;
unprocessed_nodes_G2 = parent_tree_node.unprocessed_nodes_G2;
unprocessed_edges_G1 = parent_tree_node.unprocessed_edges_G1;
unprocessed_edges_G2 = parent_tree_node.unprocessed_edges_G2;
parent_tree_node = heappop(open_list)
matched_cost = parent_tree_node.matched_cost
matched_nodes = parent_tree_node.matched_nodes
matched_edges = parent_tree_node.matched_edges
unprocessed_nodes_G1 = parent_tree_node.unprocessed_nodes_G1
unprocessed_nodes_G2 = parent_tree_node.unprocessed_nodes_G2
unprocessed_edges_G1 = parent_tree_node.unprocessed_edges_G1
unprocessed_edges_G2 = parent_tree_node.unprocessed_edges_G2
if len(unprocessed_nodes_G1) == 0 and len(unprocessed_nodes_G2) == 0:
return (matched_cost, matched_nodes, matched_edges);
return (matched_cost, matched_nodes, matched_edges)
elif len(unprocessed_nodes_G1) > 0:
for i in range(len(unprocessed_nodes_G2)):
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], unprocessed_nodes_G2[i], \
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges);
tree_node = search_tree_node(
G1,
G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
unprocessed_nodes_G2[i],
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap
heappush(open_list, tree_node)
# Consider node deletion
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], None, \
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges);
tree_node = search_tree_node(
G1,
G2,
matched_cost,
matched_nodes,
matched_edges,
unprocessed_nodes_G1[0],
None,
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap
heappush(open_list, tree_node)
elif len(unprocessed_nodes_G2) > 0:
for i in range(len(unprocessed_nodes_G2)):
tree_node = search_tree_node(G1, G2, matched_cost, matched_nodes, matched_edges, None, unprocessed_nodes_G2[i], \
unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, \
cost_matrix_nodes, cost_matrix_edges);
tree_node = search_tree_node(
G1,
G2,
matched_cost,
matched_nodes,
matched_edges,
None,
unprocessed_nodes_G2[i],
unprocessed_nodes_G1,
unprocessed_nodes_G2,
unprocessed_edges_G1,
unprocessed_edges_G2,
cost_matrix_nodes,
cost_matrix_edges,
)
# Insert into open-list, implemented as a heap
heappush(open_list, tree_node)
# Retain the top-k elements in open-list iff algorithm is beam
if max_beam_size > 0 and len(open_list) > max_beam_size:
open_list = nsmallest(max_beam_size, open_list);
heapify(open_list);
open_list = nsmallest(max_beam_size, open_list)
heapify(open_list)
return None
return None;
def get_sorted_mapping(mapping_tuple, len1, len2):
# Get sorted mapping of nodes/edges
result_0 = [None]*len1;
result_1 = [None]*len2;
result_0 = [None] * len1
result_1 = [None] * len2
for i in range(len(mapping_tuple[0])):
if mapping_tuple[0][i] is not None and mapping_tuple[1][i] is not None:
result_0[mapping_tuple[0][i]] = mapping_tuple[1][i];
result_1[mapping_tuple[1][i]] = mapping_tuple[0][i];
return (result_0, result_1);
def graph_edit_distance(G1, G2,
node_substitution_cost=None, edge_substitution_cost=None,
G1_node_deletion_cost=None, G2_node_insertion_cost=None,
G1_edge_deletion_cost=None, G2_edge_insertion_cost=None,
algorithm='bipartite', max_beam_size=100):
result_0[mapping_tuple[0][i]] = mapping_tuple[1][i]
result_1[mapping_tuple[1][i]] = mapping_tuple[0][i]
return (result_0, result_1)
def graph_edit_distance(
G1,
G2,
node_substitution_cost=None,
edge_substitution_cost=None,
G1_node_deletion_cost=None,
G2_node_insertion_cost=None,
G1_edge_deletion_cost=None,
G2_edge_insertion_cost=None,
algorithm="bipartite",
max_beam_size=100,
):
"""Returns GED (graph edit distance) between DGLGraphs G1 and G2.
......@@ -752,52 +1236,99 @@ def graph_edit_distance(G1, G2,
"""
# Handle corner cases
if G1 is None and G2 is None:
return (0.0, ([], []), ([], []));
return (0.0, ([], []), ([], []))
elif G1 is None:
edit_cost = 0.0;
edit_cost = 0.0
# Validate
if algorithm != "beam":
max_beam_size = -1;
node_substitution_cost, edge_substitution_cost, \
G1_node_deletion_cost, G1_edge_deletion_cost, \
G2_node_insertion_cost, G2_edge_insertion_cost = validate_cost_functions(G1, G2, \
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost);
max_beam_size = -1
(
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
) = validate_cost_functions(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
# cost matrices for LAP solution
cost_matrix_nodes, cost_matrix_edges = construct_cost_functions(G1, G2, \
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost);
cost_matrix_nodes, cost_matrix_edges = construct_cost_functions(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
if algorithm == "astar" or algorithm == "beam":
(matched_cost, matched_nodes, matched_edges) = a_star_search(G1, G2, \
cost_matrix_nodes, cost_matrix_edges, max_beam_size);
return (matched_cost, get_sorted_mapping(matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()), get_sorted_mapping(matched_edges, G1.number_of_edges(), G2.number_of_edges()));
(matched_cost, matched_nodes, matched_edges) = a_star_search(
G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size
)
return (
matched_cost,
get_sorted_mapping(
matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()
),
get_sorted_mapping(
matched_edges, G1.number_of_edges(), G2.number_of_edges()
),
)
elif algorithm == "hausdorff":
hausdorff_cost = hausdorff_matching(G1, G2, \
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost);
return (hausdorff_cost, None, None);
hausdorff_cost = hausdorff_matching(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
return (hausdorff_cost, None, None)
else:
cost_matrix = contextual_cost_matrix_construction(G1, G2, \
node_substitution_cost, edge_substitution_cost,
G1_node_deletion_cost, G1_edge_deletion_cost,
G2_node_insertion_cost, G2_edge_insertion_cost);
cost_matrix = contextual_cost_matrix_construction(
G1,
G2,
node_substitution_cost,
edge_substitution_cost,
G1_node_deletion_cost,
G1_edge_deletion_cost,
G2_node_insertion_cost,
G2_edge_insertion_cost,
)
# Match the nodes as per the LAP solution
row_ind, col_ind, _ = lapjv(cost_matrix);
(matched_cost, matched_nodes, matched_edges) = edit_cost_from_node_matching(G1, G2, \
cost_matrix_nodes, cost_matrix_edges, row_ind);
return (matched_cost, get_sorted_mapping(matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()), get_sorted_mapping(matched_edges, G1.number_of_edges(), G2.number_of_edges()));
row_ind, col_ind, _ = lapjv(cost_matrix)
(
matched_cost,
matched_nodes,
matched_edges,
) = edit_cost_from_node_matching(
G1, G2, cost_matrix_nodes, cost_matrix_edges, row_ind
)
return (
matched_cost,
get_sorted_mapping(
matched_nodes, G1.number_of_nodes(), G2.number_of_nodes()
),
get_sorted_mapping(
matched_edges, G1.number_of_edges(), G2.number_of_edges()
),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment