[Performance] Replace np.array with np.asarray (#1301)

* replace np.array with np.asarray * fix Co-authored-by: Minjie Wang <minjie.wang@nyu.edu>

[Performance] Replace np.array with np.asarray (#1301)
* replace np.array with np.asarray * fix Co-authored-by: Minjie Wang <minjie.wang@nyu.edu>
1c4bfb62 · Quan (Andy) Gan · GitHub · 5dd35580 · 1c4bfb62 · 1c4bfb62
Unverified Commit 1c4bfb62 authored Mar 03, 2020 by Quan (Andy) Gan Committed by GitHub Mar 03, 2020
17 changed files
--- a/python/dgl/_ffi/ndarray.py
+++ b/python/dgl/_ffi/ndarray.py
@@ -250,7 +250,7 @@ class NDArrayBase(_NDArrayBase):

        if not isinstance(source_array, np.ndarray):
            try:
-                source_array = np.array(source_array, dtype=self.dtype)
+                source_array = np.asarray(source_array, dtype=self.dtype)
            except:
                raise TypeError('array must be an array_like data,' +
                                'type %s is not supported' % str(type(source_array)))

--- a/python/dgl/backend/mxnet/tensor.py
+++ b/python/dgl/backend/mxnet/tensor.py
@@ -541,7 +541,7 @@ def _reduce_grad(grad, shape):
    num_to_squeeze = len(grad_shape) - len(in_shape)
    # pad in_shape
    in_shape = (1,) * num_to_squeeze + in_shape
-    reduce_idx = np.nonzero(np.array(grad_shape) - np.array(in_shape))[0]
+    reduce_idx = np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))[0]
    reduce_idx += 1  # skip batch dim
    grad = grad.sum(axis=tuple(reduce_idx), keepdims=True)
    return grad.reshape(shape)

--- a/python/dgl/backend/tensorflow/tensor.py
+++ b/python/dgl/backend/tensorflow/tensor.py
@@ -365,7 +365,7 @@ def zerocopy_from_dlpack(dlpack_tensor):

 def zerocopy_to_numpy(input):
    # NOTE: not zerocopy
-    return np.array(memoryview(input))
+    return np.asarray(memoryview(input))


 def zerocopy_from_numpy(np_array):
@@ -548,8 +548,7 @@ def _reduce_grad(grad, shape):
    num_to_squeeze = len(grad_shape) - len(in_shape)
    # pad inshape
    in_shape = (1,) * num_to_squeeze + in_shape
-    reduce_idx = np.array(np.nonzero(
-        np.array(grad_shape) - np.array(in_shape)))
+    reduce_idx = np.asarray(np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape)))
    reduce_idx += 1  # skip batch dim
    reduce_idx_tensor = tf.constant(tuple(
        reduce_idx.flatten().tolist()))

--- a/python/dgl/contrib/data/knowledge_graph.py
+++ b/python/dgl/contrib/data/knowledge_graph.py
@@ -184,9 +184,9 @@ class RGCNLinkDataset(object):
        test_path = os.path.join(self.dir, 'test.txt')
        entity_dict = _read_dictionary(entity_path)
        relation_dict = _read_dictionary(relation_path)
-        self.train = np.array(_read_triplets_as_list(train_path, entity_dict, relation_dict))
-        self.valid = np.array(_read_triplets_as_list(valid_path, entity_dict, relation_dict))
-        self.test = np.array(_read_triplets_as_list(test_path, entity_dict, relation_dict))
+        self.train = np.asarray(_read_triplets_as_list(train_path, entity_dict, relation_dict))
+        self.valid = np.asarray(_read_triplets_as_list(valid_path, entity_dict, relation_dict))
+        self.test = np.asarray(_read_triplets_as_list(test_path, entity_dict, relation_dict))
        self.num_nodes = len(entity_dict)
        print("# entities: {}".format(self.num_nodes))
        self.num_rels = len(relation_dict)
@@ -417,10 +417,10 @@ def _load_data(dataset_str='aifb', dataset_path=None):

            # sort indices by destination
            edge_list = sorted(edge_list, key=lambda x: (x[1], x[0], x[2]))
-            edge_list = np.array(edge_list, dtype=np.int)
+            edge_list = np.asarray(edge_list, dtype=np.int)
            print('Number of edges: ', len(edge_list))

-            np.savez(edge_file, edges=edge_list, n=np.array(num_node), nrel=np.array(num_rel))
+            np.savez(edge_file, edges=edge_list, n=np.asarray(num_node), nrel=np.asarray(num_rel))

        nodes_u_dict = {np.unicode(to_unicode(key)): val for key, val in
                        nodes_dict.items()}

--- a/python/dgl/contrib/sampling/randomwalk.py
+++ b/python/dgl/contrib/sampling/randomwalk.py
@@ -209,7 +209,7 @@ def metapath_random_walk(hg, etypes, seeds, num_traces):
        raise ValueError('beginning and ending node type mismatch')
    if len(seeds) == 0:
        return []
-    etype_array = ndarray.array(np.array([hg.get_etype_id(et) for et in etypes], dtype='int64'))
+    etype_array = ndarray.array(np.asarray([hg.get_etype_id(et) for et in etypes], dtype='int64'))
    seed_array = utils.toindex(seeds).todgltensor()
    traces = _CAPI_DGLMetapathRandomWalk(hg._graph, etype_array, seed_array, num_traces)
    return _split_traces(traces)

--- a/python/dgl/convert.py
+++ b/python/dgl/convert.py
@@ -604,7 +604,7 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, metagraph
            etype_id = etypes_invmap[etype]
            dsttype_id = ntypes_invmap[dsttype]
            canonical_etids.append((srctype_id, etype_id, dsttype_id))
-        canonical_etids = np.array(canonical_etids)
+        canonical_etids = np.asarray(canonical_etids)
        etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2)
    edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))]


--- a/python/dgl/data/chem/datasets/alchemy.py
+++ b/python/dgl/data/chem/datasets/alchemy.py
@@ -74,11 +74,11 @@ def alchemy_nodes(mol):
                                                 Chem.rdchem.HybridizationType.SP2,
                                                 Chem.rdchem.HybridizationType.SP3])
        h_u.append(num_h)
-        atom_feats_dict['n_feat'].append(F.tensor(np.array(h_u).astype(np.float32)))
+        atom_feats_dict['n_feat'].append(F.tensor(np.asarray(h_u, dtype=np.float32)))

    atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'], dim=0)
-    atom_feats_dict['node_type'] = F.tensor(np.array(
-        atom_feats_dict['node_type']).astype(np.int64))
+    atom_feats_dict['node_type'] = F.tensor(
+        np.asarray(atom_feats_dict['node_type'], dtype=np.int64))

    return atom_feats_dict

@@ -126,9 +126,9 @@ def alchemy_edges(mol, self_loop=False):
                np.linalg.norm(geom[u] - geom[v]))

    bond_feats_dict['e_feat'] = F.tensor(
-        np.array(bond_feats_dict['e_feat']).astype(np.float32))
+        np.asarray(bond_feats_dict['e_feat'], dtype=np.float32))
    bond_feats_dict['distance'] = F.tensor(
-        np.array(bond_feats_dict['distance']).astype(np.float32)).reshape(-1 , 1)
+        np.asarray(bond_feats_dict['distance'], dtype=np.float32)).reshape(-1, 1)

    return bond_feats_dict

@@ -239,7 +239,7 @@ class TencentAlchemyDataset(object):
                smiles = Chem.MolToSmiles(mol)
                self.smiles.append(smiles)
                self.graphs.append(graph)
-                label = F.tensor(np.array(label[1].tolist()).astype(np.float32))
+                label = F.tensor(np.asarray(label[1].tolist(), dtype=np.float32))
                self.labels.append(label)

            save_graphs(osp.join(self.file_dir, "%s_graphs.bin" % self.mode), self.graphs,
@@ -290,7 +290,7 @@ class TencentAlchemyDataset(object):
        std : int or float
            Default to be None.
        """
-        labels = np.array([i.numpy() for i in self.labels])
+        labels = np.asarray([i.numpy() for i in self.labels])
        if mean is None:
            mean = np.mean(labels, axis=0)
        if std is None:

--- a/python/dgl/data/chem/utils/complex_to_graph.py
+++ b/python/dgl/data/chem/utils/complex_to_graph.py
@@ -124,7 +124,7 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
    ligand_graph = graph((ligand_srcs, ligand_dsts),
                         'ligand_atom', 'ligand', num_ligand_atoms)
    ligand_graph.edata['distance'] = F.reshape(F.zerocopy_from_numpy(
-        np.array(ligand_dists).astype(np.float32)), (-1, 1))
+        np.asarray(ligand_dists, dtype=np.float32)), (-1, 1))

    # Construct graph for atoms in the protein
    protein_srcs, protein_dsts, protein_dists = k_nearest_neighbors(
@@ -132,7 +132,7 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
    protein_graph = graph((protein_srcs, protein_dsts),
                          'protein_atom', 'protein', num_protein_atoms)
    protein_graph.edata['distance'] = F.reshape(F.zerocopy_from_numpy(
-        np.array(protein_dists).astype(np.float32)), (-1, 1))
+        np.asarray(protein_dists, dtype=np.float32)), (-1, 1))

    # Construct 4 graphs for complex representation, including the connection within
    # protein atoms, the connection within ligand atoms and the connection between
@@ -140,9 +140,9 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
    complex_srcs, complex_dsts, complex_dists = k_nearest_neighbors(
        np.concatenate([ligand_coordinates, protein_coordinates]),
        neighbor_cutoff, max_num_neighbors)
-    complex_srcs = np.array(complex_srcs)
-    complex_dsts = np.array(complex_dsts)
-    complex_dists = np.array(complex_dists)
+    complex_srcs = np.asarray(complex_srcs)
+    complex_dsts = np.asarray(complex_dsts)
+    complex_dists = np.asarray(complex_dists)
    offset = num_ligand_atoms

    # ('ligand_atom', 'complex', 'ligand_atom')
@@ -206,11 +206,11 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
    )

    # Get atomic numbers for all atoms left and set node features
-    ligand_atomic_numbers = np.array(get_atomic_numbers(ligand_mol, ligand_atom_indices_left))
+    ligand_atomic_numbers = np.asarray(get_atomic_numbers(ligand_mol, ligand_atom_indices_left))
    # zero padding
    ligand_atomic_numbers = np.concatenate([
        ligand_atomic_numbers, np.zeros(num_ligand_atoms - len(ligand_atom_indices_left))])
-    protein_atomic_numbers = np.array(get_atomic_numbers(protein_mol, protein_atom_indices_left))
+    protein_atomic_numbers = np.asarray(get_atomic_numbers(protein_mol, protein_atom_indices_left))
    # zero padding
    protein_atomic_numbers = np.concatenate([
        protein_atomic_numbers, np.zeros(num_protein_atoms - len(protein_atom_indices_left))])

--- a/python/dgl/data/chem/utils/splitters.py
+++ b/python/dgl/data/chem/utils/splitters.py
@@ -108,7 +108,7 @@ def indices_split(dataset, frac_train, frac_val, frac_test, indices):
    list of length 3
        Subsets for training, validation and test, which are all :class:`Subset` instances.
    """
-    frac_list = np.array([frac_train, frac_val, frac_test])
+    frac_list = np.asarray([frac_train, frac_val, frac_test])
    assert np.allclose(np.sum(frac_list), 1.), \
        'Expect frac_list sum to 1, got {:.4f}'.format(np.sum(frac_list))
    num_data = len(dataset)

--- a/python/dgl/data/citation_graph.py
+++ b/python/dgl/data/citation_graph.py
@@ -136,12 +136,12 @@ class CitationGraphDataset(object):

 def _preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
-    rowsum = np.array(features.sum(1))
+    rowsum = np.asarray(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
-    return np.array(features.todense())
+    return np.asarray(features.todense())

 def _parse_index_file(filename):
    """Parse index file."""
@@ -329,12 +329,12 @@ class CoraBinary(object):
            for line in f.readlines():
                if line.startswith('graph'):
                    if len(cur) != 0:
-                        self.labels.append(np.array(cur))
+                        self.labels.append(np.asarray(cur))
                    cur = []
                else:
                    cur.append(int(line.strip()))
            if len(cur) != 0:
-                self.labels.append(np.array(cur))
+                self.labels.append(np.asarray(cur))
        # sanity check
        assert len(self.graphs) == len(self.pmpds)
        assert len(self.graphs) == len(self.labels)
@@ -376,11 +376,11 @@ class CoraDataset(object):
        self.num_labels = labels.shape[1]

        # build graph
-        idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
+        idx = np.asarray(idx_features_labels[:, 0], dtype=np.int32)
        idx_map = {j: i for i, j in enumerate(idx)}
        edges_unordered = np.genfromtxt("{}/cora/cora.cites".format(self.dir),
                                        dtype=np.int32)
-        edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
+        edges = np.asarray(list(map(idx_map.get, edges_unordered.flatten())),
                           dtype=np.int32).reshape(edges_unordered.shape)
        adj = sp.coo_matrix((np.ones(edges.shape[0]),
                             (edges[:, 0], edges[:, 1])),
@@ -392,7 +392,7 @@ class CoraDataset(object):
        self.graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph())

        features = _normalize(features)
-        self.features = np.array(features.todense())
+        self.features = np.asarray(features.todense())
        self.labels = np.where(labels)[1]

        self.train_mask = _sample_mask(range(140), labels.shape[0])
@@ -416,7 +416,7 @@ class CoraDataset(object):

 def _normalize(mx):
    """Row-normalize sparse matrix"""
-    rowsum = np.array(mx.sum(1))
+    rowsum = np.asarray(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
@@ -427,7 +427,7 @@ def _encode_onehot(labels):
    classes = list(sorted(set(labels)))
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
-    labels_onehot = np.array(list(map(classes_dict.get, labels)),
+    labels_onehot = np.asarray(list(map(classes_dict.get, labels)),
                               dtype=np.int32)
    return labels_onehot

--- a/python/dgl/data/gindt.py
+++ b/python/dgl/data/gindt.py
@@ -194,7 +194,7 @@ class GINDataset(object):
                else:
                    nattrs = None

-                g.ndata['label'] = np.array(nlabels)
+                g.ndata['label'] = np.asarray(nlabels)
                if len(self.nlabel_dict) > 1:
                    self.nlabels_flag = True


--- a/python/dgl/data/karate.py
+++ b/python/dgl/data/karate.py
@@ -17,7 +17,7 @@ class KarateClub(object):

    def __init__(self):
        kG = nx.karate_club_graph()
-        self.label = np.array(
+        self.label = np.asarray(
            [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64)
        g = DGLGraph(kG)
        g.ndata['label'] = self.label

--- a/python/dgl/data/rdf.py
+++ b/python/dgl/data/rdf.py
@@ -175,10 +175,10 @@ class RDFGraphDataset:
            dst.append(dst_id)
            etid.append(relclsid)

-        src = np.array(src)
-        dst = np.array(dst)
-        ntid = np.array(ntid)
-        etid = np.array(etid)
+        src = np.asarray(src)
+        dst = np.asarray(dst)
+        ntid = np.asarray(ntid)
+        etid = np.asarray(etid)
        ntypes = list(ent_classes.keys())
        etypes = list(rel_classes.keys())


--- a/python/dgl/data/tree.py
+++ b/python/dgl/data/tree.py
@@ -78,7 +78,7 @@ class SST(object):
                for line in pf.readlines():
                    sp = line.split(' ')
                    if sp[0].lower() in self.vocab:
-                        glove_emb[sp[0].lower()] = np.array([float(x) for x in sp[1:]])
+                        glove_emb[sp[0].lower()] = np.asarray([float(x) for x in sp[1:]])
        files = ['{}.txt'.format(self.mode)]
        corpus = BracketParseCorpusReader('{}/sst'.format(self.dir), files)
        sents = corpus.parsed_sents(files[0])

--- a/python/dgl/data/utils.py
+++ b/python/dgl/data/utils.py
@@ -65,7 +65,7 @@ def split_dataset(dataset, frac_list=None, shuffle=False, random_state=None):
    from itertools import accumulate
    if frac_list is None:
        frac_list = [0.8, 0.1, 0.1]
-    frac_list = np.array(frac_list)
+    frac_list = np.asarray(frac_list)
    assert np.allclose(np.sum(frac_list), 1.), \
        'Expect frac_list sum to 1, got {:.4f}'.format(np.sum(frac_list))
    num_data = len(dataset)

--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -1172,8 +1172,8 @@ def from_edge_list(elist, is_multigraph, readonly):
        src, dst = elist
    else:
        src, dst = zip(*elist)
-    src = np.array(src)
-    dst = np.array(dst)
+    src = np.asarray(src)
+    dst = np.asarray(dst)
    src_ids = utils.toindex(src)
    dst_ids = utils.toindex(dst)
    num_nodes = max(src.max(), dst.max()) + 1

--- a/python/dgl/utils.py
+++ b/python/dgl/utils.py
@@ -63,7 +63,7 @@ class Index(object):
            self._slice_data = slice(data.start, data.stop)
        else:
            try:
-                data = np.array(data).astype(np.int64)
+                data = np.asarray(data, dtype=np.int64)
            except Exception:  # pylint: disable=broad-except
                raise DGLError('Error index data: %s' % str(data))
            if data.ndim == 0:  # scalar array
@@ -517,5 +517,5 @@ def make_invmap(array, use_numpy=True):
    else:
        uniques = list(set(array))
    invmap = {x: i for i, x in enumerate(uniques)}
-    remapped = np.array([invmap[x] for x in array])
+    remapped = np.asarray([invmap[x] for x in array])
    return uniques, invmap, remapped