Unverified Commit bb542066 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

remove pyinstrument. (#2772)


Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent edf64463
...@@ -4,7 +4,6 @@ This is an example of training GraphSage in a distributed fashion. Before traini ...@@ -4,7 +4,6 @@ This is an example of training GraphSage in a distributed fashion. Before traini
```bash ```bash
sudo pip3 install ogb sudo pip3 install ogb
sudo pip3 install pyinstrument
``` ```
To train GraphSage, it has five steps: To train GraphSage, it has five steps:
......
...@@ -20,7 +20,6 @@ import torch.nn.functional as F ...@@ -20,7 +20,6 @@ import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import torch.multiprocessing as mp import torch.multiprocessing as mp
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from pyinstrument import Profiler
def load_subtensor(g, seeds, input_nodes, device): def load_subtensor(g, seeds, input_nodes, device):
""" """
...@@ -186,9 +185,6 @@ def run(args, device, data): ...@@ -186,9 +185,6 @@ def run(args, device, data):
# Training loop # Training loop
iter_tput = [] iter_tput = []
profiler = Profiler()
if args.close_profiler == False:
profiler.start()
epoch = 0 epoch = 0
for epoch in range(args.num_epochs): for epoch in range(args.num_epochs):
tic = time.time() tic = time.time()
...@@ -253,9 +249,6 @@ def run(args, device, data): ...@@ -253,9 +249,6 @@ def run(args, device, data):
g.ndata['labels'], val_nid, test_nid, args.batch_size_eval, device) g.ndata['labels'], val_nid, test_nid, args.batch_size_eval, device)
print('Part {}, Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}'.format(g.rank(), val_acc, test_acc, print('Part {}, Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}'.format(g.rank(), val_acc, test_acc,
time.time() - start)) time.time() - start))
if args.close_profiler == False:
profiler.stop()
print(profiler.output_text(unicode=True, color=True))
def main(args): def main(args):
dgl.distributed.initialize(args.ip_config, args.num_servers, num_workers=args.num_workers) dgl.distributed.initialize(args.ip_config, args.num_servers, num_workers=args.num_workers)
...@@ -313,7 +306,6 @@ if __name__ == '__main__': ...@@ -313,7 +306,6 @@ if __name__ == '__main__':
help="Number of sampling processes. Use 0 for no extra process.") help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process') parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode') parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
parser.add_argument('--close_profiler', action='store_true', help='Close pyinstrument profiler')
args = parser.parse_args() args = parser.parse_args()
assert args.num_workers == int(os.environ.get('DGL_NUM_SAMPLER')), \ assert args.num_workers == int(os.environ.get('DGL_NUM_SAMPLER')), \
'The num_workers should be the same value with DGL_NUM_SAMPLER.' 'The num_workers should be the same value with DGL_NUM_SAMPLER.'
......
...@@ -21,7 +21,6 @@ import torch.nn.functional as F ...@@ -21,7 +21,6 @@ import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import torch.multiprocessing as mp import torch.multiprocessing as mp
from dgl.distributed import DistDataLoader from dgl.distributed import DistDataLoader
#from pyinstrument import Profiler
class SAGE(nn.Module): class SAGE(nn.Module):
def __init__(self, def __init__(self,
...@@ -328,8 +327,6 @@ def run(args, device, data): ...@@ -328,8 +327,6 @@ def run(args, device, data):
optimizer = optim.Adam(model.parameters(), lr=args.lr) optimizer = optim.Adam(model.parameters(), lr=args.lr)
# Training loop # Training loop
#profiler = Profiler()
#profiler.start()
epoch = 0 epoch = 0
for epoch in range(args.num_epochs): for epoch in range(args.num_epochs):
sample_time = 0 sample_time = 0
......
...@@ -5,7 +5,7 @@ This is an example of training RGCN node classification in a distributed fashion ...@@ -5,7 +5,7 @@ This is an example of training RGCN node classification in a distributed fashion
Before training, install python libs by pip: Before training, install python libs by pip:
```bash ```bash
pip3 install ogb pyinstrument pyarrow pip3 install ogb pyarrow
``` ```
To train RGCN, it has four steps: To train RGCN, it has four steps:
......
...@@ -29,7 +29,6 @@ from dgl.nn import RelGraphConv ...@@ -29,7 +29,6 @@ from dgl.nn import RelGraphConv
import tqdm import tqdm
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
from pyinstrument import Profiler
class EntityClassify(nn.Module): class EntityClassify(nn.Module):
""" Entity classification class for RGCN """ Entity classification class for RGCN
......
...@@ -4,8 +4,6 @@ import torch as th ...@@ -4,8 +4,6 @@ import torch as th
import numpy as np import numpy as np
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
from pyinstrument import Profiler
# Load OGB-MAG. # Load OGB-MAG.
dataset = DglNodePropPredDataset(name='ogbn-mag') dataset = DglNodePropPredDataset(name='ogbn-mag')
hg_orig, labels = dataset[0] hg_orig, labels = dataset[0]
......
...@@ -3,7 +3,6 @@ import json ...@@ -3,7 +3,6 @@ import json
import torch as th import torch as th
import numpy as np import numpy as np
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
from pyinstrument import Profiler
# Load OGB-MAG. # Load OGB-MAG.
dataset = DglNodePropPredDataset(name='ogbn-mag') dataset = DglNodePropPredDataset(name='ogbn-mag')
...@@ -21,9 +20,6 @@ print(hg) ...@@ -21,9 +20,6 @@ print(hg)
# subg_nodes[ntype] = np.random.choice(hg.number_of_nodes(ntype), int(hg.number_of_nodes(ntype) / 5), replace=False) # subg_nodes[ntype] = np.random.choice(hg.number_of_nodes(ntype), int(hg.number_of_nodes(ntype) / 5), replace=False)
#hg = dgl.compact_graphs(dgl.node_subgraph(hg, subg_nodes)) #hg = dgl.compact_graphs(dgl.node_subgraph(hg, subg_nodes))
profiler = Profiler()
profiler.start()
# OGB-MAG is stored in heterogeneous format. We need to convert it into homogeneous format. # OGB-MAG is stored in heterogeneous format. We need to convert it into homogeneous format.
g = dgl.to_homogeneous(hg) g = dgl.to_homogeneous(hg)
g.ndata['orig_id'] = g.ndata[dgl.NID] g.ndata['orig_id'] = g.ndata[dgl.NID]
...@@ -85,6 +81,3 @@ for etype in hg.etypes: ...@@ -85,6 +81,3 @@ for etype in hg.etypes:
eid_ranges[etype] = [int(eid[0]), int(eid[-1] + 1)] eid_ranges[etype] = [int(eid[0]), int(eid[-1] + 1)]
with open('mag.json', 'w') as outfile: with open('mag.json', 'w') as outfile:
json.dump({'nid': nid_ranges, 'eid': eid_ranges}, outfile, indent=4) json.dump({'nid': nid_ranges, 'eid': eid_ranges}, outfile, indent=4)
profiler.stop()
print(profiler.output_text(unicode=True, color=True))
...@@ -7,7 +7,6 @@ import dgl ...@@ -7,7 +7,6 @@ import dgl
import torch as th import torch as th
import pyarrow import pyarrow
from pyarrow import csv from pyarrow import csv
from pyinstrument import Profiler
parser = argparse.ArgumentParser(description='Construct graph partitions') parser = argparse.ArgumentParser(description='Construct graph partitions')
parser.add_argument('--input-dir', required=True, type=str, parser.add_argument('--input-dir', required=True, type=str,
...@@ -58,9 +57,6 @@ etype_offset_np = np.array([e[1] for e in etypes]) ...@@ -58,9 +57,6 @@ etype_offset_np = np.array([e[1] for e in etypes])
etypes = [e[0] for e in etypes] etypes = [e[0] for e in etypes]
etypes_map = {e:i for i, e in enumerate(etypes)} etypes_map = {e:i for i, e in enumerate(etypes)}
profiler = Profiler()
profiler.start()
def read_feats(file_name): def read_feats(file_name):
attrs = csv.read_csv(file_name, read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True), attrs = csv.read_csv(file_name, read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True),
parse_options=pyarrow.csv.ParseOptions(delimiter=' ')) parse_options=pyarrow.csv.ParseOptions(delimiter=' '))
...@@ -227,6 +223,3 @@ for part_id in range(num_parts): ...@@ -227,6 +223,3 @@ for part_id in range(num_parts):
'part_graph': part_graph_file} 'part_graph': part_graph_file}
with open('{}/{}.json'.format(output_dir, graph_name), 'w') as outfile: with open('{}/{}.json'.format(output_dir, graph_name), 'w') as outfile:
json.dump(part_metadata, outfile, sort_keys=True, indent=4) json.dump(part_metadata, outfile, sort_keys=True, indent=4)
profiler.stop()
print(profiler.output_text(unicode=True, color=True))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment