code changes for bug fixes identified during mag_lsc dataset (#4187)

* code changes for bug fixes identified during mag_lsc dataset 1. Changed from call torch.Tensor() to torch.from_numpy() to address memory corruption issues when creating large tensors. Tricky thing is this works correctly for small tensors. 2. Changed dgl.graph() function call to include 'num_nodes" argument to specifically mention all the nodes in a graph partition. * Update convert_partition.py Moving the changes to the function "create_metadata_json" function to the "multiple-file-format" support, where this change is more appropriate. Since multiple machine testing was done with these code changes. * Addressing review comments. Removed space as suggested at the end of the line

code changes for bug fixes identified during mag_lsc dataset (#4187)
* code changes for bug fixes identified during mag_lsc dataset 1. Changed from call torch.Tensor() to torch.from_numpy() to address memory corruption issues when creating large tensors. Tricky thing is this works correctly for small tensors. 2. Changed dgl.graph() function call to include 'num_nodes" argument to specifically mention all the nodes in a graph partition. * Update convert_partition.py Moving the changes to the function "create_metadata_json" function to the "multiple-file-format" support, where this change is more appropriate. Since multiple machine testing was done with these code changes. * Addressing review comments. Removed space as suggested at the end of the line
3ccd973c · kylasa · GitHub · 3fe5eea7 · 3ccd973c · 3ccd973c
Unverified Commit 3ccd973c authored Jun 29, 2022 by kylasa Committed by GitHub Jun 29, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

tools/distpartitioning/convert_partition.py tools/distpartitioning/convert_partition.py +5 -3

tools/distpartitioning/globalids.py tools/distpartitioning/globalids.py +1 -1

No files found.
--- a/tools/distpartitioning/convert_partition.py
+++ b/tools/distpartitioning/convert_partition.py
@@ -9,6 +9,7 @@ import pyarrow
 import pandas as pd
 import constants
 from pyarrow import csv
+from utils import read_json
 def create_dgl_object(graph_name, num_parts, \
                        schema, part_id, node_data, \
@@ -129,7 +130,8 @@ def create_dgl_object(graph_name, num_parts, \
    assert len(uniq_ids) == len(idx)
    # We get the edge list with their node IDs mapped to a contiguous ID range.
    part_local_src_id, part_local_dst_id = np.split(inverse_idx[:len(shuffle_global_src_id) * 2], 2)
-    compact_g = dgl.graph((part_local_src_id, part_local_dst_id))
+    compact_g = dgl.graph(data=(part_local_src_id, part_local_dst_id), num_nodes=len(idx))
    compact_g.edata['orig_id'] = th.as_tensor(global_edge_id)
    compact_g.edata[dgl.ETYPE] = th.as_tensor(etype_ids)
    compact_g.edata['inner_edge'] = th.ones(
@@ -232,6 +234,6 @@ def create_metadata_json(graph_name, num_nodes, num_edges, num_parts, node_map_v
        edge_feat_file = os.path.join(part_dir, "edge_feat.dgl")
        part_graph_file = os.path.join(part_dir, "graph.dgl")
        part_metadata['part-{}'.format(part_id)] = {'node_feats': node_feat_file,
-                                                'edge_feats': edge_feat_file,
+                                                    'edge_feats': edge_feat_file,
-                                                'part_graph': part_graph_file}
+                                                    'part_graph': part_graph_file}
    return part_metadata
--- a/tools/distpartitioning/globalids.py
+++ b/tools/distpartitioning/globalids.py
@@ -50,7 +50,7 @@ def get_shuffle_global_nids(rank, world_size, global_nids_ranks, node_data):
    #form the outgoing message
    send_nodes = []
    for i in range(world_size):
-        send_nodes.append(torch.Tensor(global_nids_ranks[i]).type(dtype=torch.int64))
+        send_nodes.append(torch.from_numpy(global_nids_ranks[i]).type(dtype=torch.int64))
    #send-recieve messages
    alltoallv_cpu(rank, world_size, recv_nodes, send_nodes)