Unverified Commit 1c4bfb62 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Performance] Replace np.array with np.asarray (#1301)



* replace np.array with np.asarray

* fix
Co-authored-by: default avatarMinjie Wang <minjie.wang@nyu.edu>
parent 5dd35580
......@@ -250,7 +250,7 @@ class NDArrayBase(_NDArrayBase):
if not isinstance(source_array, np.ndarray):
try:
source_array = np.array(source_array, dtype=self.dtype)
source_array = np.asarray(source_array, dtype=self.dtype)
except:
raise TypeError('array must be an array_like data,' +
'type %s is not supported' % str(type(source_array)))
......
......@@ -541,7 +541,7 @@ def _reduce_grad(grad, shape):
num_to_squeeze = len(grad_shape) - len(in_shape)
# pad in_shape
in_shape = (1,) * num_to_squeeze + in_shape
reduce_idx = np.nonzero(np.array(grad_shape) - np.array(in_shape))[0]
reduce_idx = np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))[0]
reduce_idx += 1 # skip batch dim
grad = grad.sum(axis=tuple(reduce_idx), keepdims=True)
return grad.reshape(shape)
......
......@@ -365,7 +365,7 @@ def zerocopy_from_dlpack(dlpack_tensor):
def zerocopy_to_numpy(input):
# NOTE: not zerocopy
return np.array(memoryview(input))
return np.asarray(memoryview(input))
def zerocopy_from_numpy(np_array):
......@@ -548,8 +548,7 @@ def _reduce_grad(grad, shape):
num_to_squeeze = len(grad_shape) - len(in_shape)
# pad inshape
in_shape = (1,) * num_to_squeeze + in_shape
reduce_idx = np.array(np.nonzero(
np.array(grad_shape) - np.array(in_shape)))
reduce_idx = np.asarray(np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape)))
reduce_idx += 1 # skip batch dim
reduce_idx_tensor = tf.constant(tuple(
reduce_idx.flatten().tolist()))
......
......@@ -184,9 +184,9 @@ class RGCNLinkDataset(object):
test_path = os.path.join(self.dir, 'test.txt')
entity_dict = _read_dictionary(entity_path)
relation_dict = _read_dictionary(relation_path)
self.train = np.array(_read_triplets_as_list(train_path, entity_dict, relation_dict))
self.valid = np.array(_read_triplets_as_list(valid_path, entity_dict, relation_dict))
self.test = np.array(_read_triplets_as_list(test_path, entity_dict, relation_dict))
self.train = np.asarray(_read_triplets_as_list(train_path, entity_dict, relation_dict))
self.valid = np.asarray(_read_triplets_as_list(valid_path, entity_dict, relation_dict))
self.test = np.asarray(_read_triplets_as_list(test_path, entity_dict, relation_dict))
self.num_nodes = len(entity_dict)
print("# entities: {}".format(self.num_nodes))
self.num_rels = len(relation_dict)
......@@ -417,10 +417,10 @@ def _load_data(dataset_str='aifb', dataset_path=None):
# sort indices by destination
edge_list = sorted(edge_list, key=lambda x: (x[1], x[0], x[2]))
edge_list = np.array(edge_list, dtype=np.int)
edge_list = np.asarray(edge_list, dtype=np.int)
print('Number of edges: ', len(edge_list))
np.savez(edge_file, edges=edge_list, n=np.array(num_node), nrel=np.array(num_rel))
np.savez(edge_file, edges=edge_list, n=np.asarray(num_node), nrel=np.asarray(num_rel))
nodes_u_dict = {np.unicode(to_unicode(key)): val for key, val in
nodes_dict.items()}
......
......@@ -209,7 +209,7 @@ def metapath_random_walk(hg, etypes, seeds, num_traces):
raise ValueError('beginning and ending node type mismatch')
if len(seeds) == 0:
return []
etype_array = ndarray.array(np.array([hg.get_etype_id(et) for et in etypes], dtype='int64'))
etype_array = ndarray.array(np.asarray([hg.get_etype_id(et) for et in etypes], dtype='int64'))
seed_array = utils.toindex(seeds).todgltensor()
traces = _CAPI_DGLMetapathRandomWalk(hg._graph, etype_array, seed_array, num_traces)
return _split_traces(traces)
......
......@@ -604,7 +604,7 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, metagraph
etype_id = etypes_invmap[etype]
dsttype_id = ntypes_invmap[dsttype]
canonical_etids.append((srctype_id, etype_id, dsttype_id))
canonical_etids = np.array(canonical_etids)
canonical_etids = np.asarray(canonical_etids)
etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2)
edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))]
......
......@@ -74,11 +74,11 @@ def alchemy_nodes(mol):
Chem.rdchem.HybridizationType.SP2,
Chem.rdchem.HybridizationType.SP3])
h_u.append(num_h)
atom_feats_dict['n_feat'].append(F.tensor(np.array(h_u).astype(np.float32)))
atom_feats_dict['n_feat'].append(F.tensor(np.asarray(h_u, dtype=np.float32)))
atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'], dim=0)
atom_feats_dict['node_type'] = F.tensor(np.array(
atom_feats_dict['node_type']).astype(np.int64))
atom_feats_dict['node_type'] = F.tensor(
np.asarray(atom_feats_dict['node_type'], dtype=np.int64))
return atom_feats_dict
......@@ -126,9 +126,9 @@ def alchemy_edges(mol, self_loop=False):
np.linalg.norm(geom[u] - geom[v]))
bond_feats_dict['e_feat'] = F.tensor(
np.array(bond_feats_dict['e_feat']).astype(np.float32))
np.asarray(bond_feats_dict['e_feat'], dtype=np.float32))
bond_feats_dict['distance'] = F.tensor(
np.array(bond_feats_dict['distance']).astype(np.float32)).reshape(-1 , 1)
np.asarray(bond_feats_dict['distance'], dtype=np.float32)).reshape(-1, 1)
return bond_feats_dict
......@@ -239,7 +239,7 @@ class TencentAlchemyDataset(object):
smiles = Chem.MolToSmiles(mol)
self.smiles.append(smiles)
self.graphs.append(graph)
label = F.tensor(np.array(label[1].tolist()).astype(np.float32))
label = F.tensor(np.asarray(label[1].tolist(), dtype=np.float32))
self.labels.append(label)
save_graphs(osp.join(self.file_dir, "%s_graphs.bin" % self.mode), self.graphs,
......@@ -290,7 +290,7 @@ class TencentAlchemyDataset(object):
std : int or float
Default to be None.
"""
labels = np.array([i.numpy() for i in self.labels])
labels = np.asarray([i.numpy() for i in self.labels])
if mean is None:
mean = np.mean(labels, axis=0)
if std is None:
......
......@@ -124,7 +124,7 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
ligand_graph = graph((ligand_srcs, ligand_dsts),
'ligand_atom', 'ligand', num_ligand_atoms)
ligand_graph.edata['distance'] = F.reshape(F.zerocopy_from_numpy(
np.array(ligand_dists).astype(np.float32)), (-1, 1))
np.asarray(ligand_dists, dtype=np.float32)), (-1, 1))
# Construct graph for atoms in the protein
protein_srcs, protein_dsts, protein_dists = k_nearest_neighbors(
......@@ -132,7 +132,7 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
protein_graph = graph((protein_srcs, protein_dsts),
'protein_atom', 'protein', num_protein_atoms)
protein_graph.edata['distance'] = F.reshape(F.zerocopy_from_numpy(
np.array(protein_dists).astype(np.float32)), (-1, 1))
np.asarray(protein_dists, dtype=np.float32)), (-1, 1))
# Construct 4 graphs for complex representation, including the connection within
# protein atoms, the connection within ligand atoms and the connection between
......@@ -140,9 +140,9 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
complex_srcs, complex_dsts, complex_dists = k_nearest_neighbors(
np.concatenate([ligand_coordinates, protein_coordinates]),
neighbor_cutoff, max_num_neighbors)
complex_srcs = np.array(complex_srcs)
complex_dsts = np.array(complex_dsts)
complex_dists = np.array(complex_dists)
complex_srcs = np.asarray(complex_srcs)
complex_dsts = np.asarray(complex_dsts)
complex_dists = np.asarray(complex_dists)
offset = num_ligand_atoms
# ('ligand_atom', 'complex', 'ligand_atom')
......@@ -206,11 +206,11 @@ def ACNN_graph_construction_and_featurization(ligand_mol,
)
# Get atomic numbers for all atoms left and set node features
ligand_atomic_numbers = np.array(get_atomic_numbers(ligand_mol, ligand_atom_indices_left))
ligand_atomic_numbers = np.asarray(get_atomic_numbers(ligand_mol, ligand_atom_indices_left))
# zero padding
ligand_atomic_numbers = np.concatenate([
ligand_atomic_numbers, np.zeros(num_ligand_atoms - len(ligand_atom_indices_left))])
protein_atomic_numbers = np.array(get_atomic_numbers(protein_mol, protein_atom_indices_left))
protein_atomic_numbers = np.asarray(get_atomic_numbers(protein_mol, protein_atom_indices_left))
# zero padding
protein_atomic_numbers = np.concatenate([
protein_atomic_numbers, np.zeros(num_protein_atoms - len(protein_atom_indices_left))])
......
......@@ -108,7 +108,7 @@ def indices_split(dataset, frac_train, frac_val, frac_test, indices):
list of length 3
Subsets for training, validation and test, which are all :class:`Subset` instances.
"""
frac_list = np.array([frac_train, frac_val, frac_test])
frac_list = np.asarray([frac_train, frac_val, frac_test])
assert np.allclose(np.sum(frac_list), 1.), \
'Expect frac_list sum to 1, got {:.4f}'.format(np.sum(frac_list))
num_data = len(dataset)
......
......@@ -136,12 +136,12 @@ class CitationGraphDataset(object):
def _preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1))
rowsum = np.asarray(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
return np.array(features.todense())
return np.asarray(features.todense())
def _parse_index_file(filename):
"""Parse index file."""
......@@ -329,12 +329,12 @@ class CoraBinary(object):
for line in f.readlines():
if line.startswith('graph'):
if len(cur) != 0:
self.labels.append(np.array(cur))
self.labels.append(np.asarray(cur))
cur = []
else:
cur.append(int(line.strip()))
if len(cur) != 0:
self.labels.append(np.array(cur))
self.labels.append(np.asarray(cur))
# sanity check
assert len(self.graphs) == len(self.pmpds)
assert len(self.graphs) == len(self.labels)
......@@ -376,11 +376,11 @@ class CoraDataset(object):
self.num_labels = labels.shape[1]
# build graph
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx = np.asarray(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}
edges_unordered = np.genfromtxt("{}/cora/cora.cites".format(self.dir),
dtype=np.int32)
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
edges = np.asarray(list(map(idx_map.get, edges_unordered.flatten())),
dtype=np.int32).reshape(edges_unordered.shape)
adj = sp.coo_matrix((np.ones(edges.shape[0]),
(edges[:, 0], edges[:, 1])),
......@@ -392,7 +392,7 @@ class CoraDataset(object):
self.graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph())
features = _normalize(features)
self.features = np.array(features.todense())
self.features = np.asarray(features.todense())
self.labels = np.where(labels)[1]
self.train_mask = _sample_mask(range(140), labels.shape[0])
......@@ -416,7 +416,7 @@ class CoraDataset(object):
def _normalize(mx):
"""Row-normalize sparse matrix"""
rowsum = np.array(mx.sum(1))
rowsum = np.asarray(mx.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
......@@ -427,7 +427,7 @@ def _encode_onehot(labels):
classes = list(sorted(set(labels)))
classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
enumerate(classes)}
labels_onehot = np.array(list(map(classes_dict.get, labels)),
labels_onehot = np.asarray(list(map(classes_dict.get, labels)),
dtype=np.int32)
return labels_onehot
......@@ -194,7 +194,7 @@ class GINDataset(object):
else:
nattrs = None
g.ndata['label'] = np.array(nlabels)
g.ndata['label'] = np.asarray(nlabels)
if len(self.nlabel_dict) > 1:
self.nlabels_flag = True
......
......@@ -17,7 +17,7 @@ class KarateClub(object):
def __init__(self):
kG = nx.karate_club_graph()
self.label = np.array(
self.label = np.asarray(
[kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64)
g = DGLGraph(kG)
g.ndata['label'] = self.label
......
......@@ -175,10 +175,10 @@ class RDFGraphDataset:
dst.append(dst_id)
etid.append(relclsid)
src = np.array(src)
dst = np.array(dst)
ntid = np.array(ntid)
etid = np.array(etid)
src = np.asarray(src)
dst = np.asarray(dst)
ntid = np.asarray(ntid)
etid = np.asarray(etid)
ntypes = list(ent_classes.keys())
etypes = list(rel_classes.keys())
......
......@@ -78,7 +78,7 @@ class SST(object):
for line in pf.readlines():
sp = line.split(' ')
if sp[0].lower() in self.vocab:
glove_emb[sp[0].lower()] = np.array([float(x) for x in sp[1:]])
glove_emb[sp[0].lower()] = np.asarray([float(x) for x in sp[1:]])
files = ['{}.txt'.format(self.mode)]
corpus = BracketParseCorpusReader('{}/sst'.format(self.dir), files)
sents = corpus.parsed_sents(files[0])
......
......@@ -65,7 +65,7 @@ def split_dataset(dataset, frac_list=None, shuffle=False, random_state=None):
from itertools import accumulate
if frac_list is None:
frac_list = [0.8, 0.1, 0.1]
frac_list = np.array(frac_list)
frac_list = np.asarray(frac_list)
assert np.allclose(np.sum(frac_list), 1.), \
'Expect frac_list sum to 1, got {:.4f}'.format(np.sum(frac_list))
num_data = len(dataset)
......
......@@ -1172,8 +1172,8 @@ def from_edge_list(elist, is_multigraph, readonly):
src, dst = elist
else:
src, dst = zip(*elist)
src = np.array(src)
dst = np.array(dst)
src = np.asarray(src)
dst = np.asarray(dst)
src_ids = utils.toindex(src)
dst_ids = utils.toindex(dst)
num_nodes = max(src.max(), dst.max()) + 1
......
......@@ -63,7 +63,7 @@ class Index(object):
self._slice_data = slice(data.start, data.stop)
else:
try:
data = np.array(data).astype(np.int64)
data = np.asarray(data, dtype=np.int64)
except Exception: # pylint: disable=broad-except
raise DGLError('Error index data: %s' % str(data))
if data.ndim == 0: # scalar array
......@@ -517,5 +517,5 @@ def make_invmap(array, use_numpy=True):
else:
uniques = list(set(array))
invmap = {x: i for i, x in enumerate(uniques)}
remapped = np.array([invmap[x] for x in array])
remapped = np.asarray([invmap[x] for x in array])
return uniques, invmap, remapped
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment