Unverified Commit 7a41c126 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Doc] Change random.py to random_partition.py in guide on distributed partition pipeline (#4438)



* Update distributed-preprocessing.rst

* Update
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-9-26.ap-northeast-1.compute.internal>
parent ad7be8be
...@@ -291,13 +291,13 @@ Step.1 Graph Partitioning ...@@ -291,13 +291,13 @@ Step.1 Graph Partitioning
This step reads the chunked graph data and calculates which partition each node This step reads the chunked graph data and calculates which partition each node
should belong to. The results are saved in a set of *partition assignment files*. should belong to. The results are saved in a set of *partition assignment files*.
For example, to randomly partition MAG240M-LSC to two parts, run the For example, to randomly partition MAG240M-LSC to two parts, run the
``partition_algo/random.py`` script in the ``tools`` folder: ``partition_algo/random_partition.py`` script in the ``tools`` folder:
.. code-block:: bash .. code-block:: bash
python /my/repo/dgl/tools/partition_algo/random.py python /my/repo/dgl/tools/partition_algo/random_partition.py
--metadata /mydata/MAG240M-LSC_chunked/metadata.json --in_dir /mydata/MAG240M-LSC_chunked
--output_path /mydata/MAG240M-LSC_2parts/ --out_dir /mydata/MAG240M-LSC_2parts
--num_partitions 2 --num_partitions 2
, which outputs files as follows: , which outputs files as follows:
......
...@@ -155,7 +155,7 @@ def test_part_pipeline(): ...@@ -155,7 +155,7 @@ def test_part_pipeline():
in_dir = os.path.join(root_dir, 'chunked-data') in_dir = os.path.join(root_dir, 'chunked-data')
output_dir = os.path.join(root_dir, '2parts') output_dir = os.path.join(root_dir, '2parts')
os.system('python tools/partition_algo/random_partition.py '\ os.system('python tools/partition_algo/random_partition.py '\
'--metadata {}/metadata.json --output_path {} --num_partitions {}'.format( '--in_dir {} --out_dir {} --num_partitions {}'.format(
in_dir, output_dir, num_chunks)) in_dir, output_dir, num_chunks))
for ntype in ['author', 'institution', 'paper']: for ntype in ['author', 'institution', 'paper']:
fname = os.path.join(output_dir, '{}.txt'.format(ntype)) fname = os.path.join(output_dir, '{}.txt'.format(ntype))
......
...@@ -35,15 +35,14 @@ def random_partition(metadata, num_parts, output_path): ...@@ -35,15 +35,14 @@ def random_partition(metadata, num_parts, output_path):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'--metadata', type=str, help='input metadata file of the chunked graph format') '--in_dir', type=str, help='input directory that contains the metadata file')
parser.add_argument( parser.add_argument(
'--output_path', type=str, help='output directory') '--out_dir', type=str, help='output directory')
parser.add_argument( parser.add_argument(
'--num_partitions', type=int, help='number of partitions') '--num_partitions', type=int, help='number of partitions')
logging.basicConfig(level='INFO') logging.basicConfig(level='INFO')
args = parser.parse_args() args = parser.parse_args()
with open(args.metadata) as f: with open(os.path.join(args.in_dir, 'metadata.json')) as f:
metadata = json.load(f) metadata = json.load(f)
output_path = args.output_path
num_parts = args.num_partitions num_parts = args.num_partitions
random_partition(metadata, num_parts, output_path) random_partition(metadata, num_parts, args.out_dir)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment