Unverified Commit 7a41c126 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Doc] Change random.py to random_partition.py in guide on distributed partition pipeline (#4438)



* Update distributed-preprocessing.rst

* Update
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-9-26.ap-northeast-1.compute.internal>
parent ad7be8be
......@@ -291,13 +291,13 @@ Step.1 Graph Partitioning
This step reads the chunked graph data and calculates which partition each node
should belong to. The results are saved in a set of *partition assignment files*.
For example, to randomly partition MAG240M-LSC to two parts, run the
``partition_algo/random.py`` script in the ``tools`` folder:
``partition_algo/random_partition.py`` script in the ``tools`` folder:
.. code-block:: bash
python /my/repo/dgl/tools/partition_algo/random.py
--metadata /mydata/MAG240M-LSC_chunked/metadata.json
--output_path /mydata/MAG240M-LSC_2parts/
python /my/repo/dgl/tools/partition_algo/random_partition.py
--in_dir /mydata/MAG240M-LSC_chunked
--out_dir /mydata/MAG240M-LSC_2parts
--num_partitions 2
, which outputs files as follows:
......
......@@ -155,7 +155,7 @@ def test_part_pipeline():
in_dir = os.path.join(root_dir, 'chunked-data')
output_dir = os.path.join(root_dir, '2parts')
os.system('python tools/partition_algo/random_partition.py '\
'--metadata {}/metadata.json --output_path {} --num_partitions {}'.format(
'--in_dir {} --out_dir {} --num_partitions {}'.format(
in_dir, output_dir, num_chunks))
for ntype in ['author', 'institution', 'paper']:
fname = os.path.join(output_dir, '{}.txt'.format(ntype))
......
......@@ -35,15 +35,14 @@ def random_partition(metadata, num_parts, output_path):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--metadata', type=str, help='input metadata file of the chunked graph format')
'--in_dir', type=str, help='input directory that contains the metadata file')
parser.add_argument(
'--output_path', type=str, help='output directory')
'--out_dir', type=str, help='output directory')
parser.add_argument(
'--num_partitions', type=int, help='number of partitions')
logging.basicConfig(level='INFO')
args = parser.parse_args()
with open(args.metadata) as f:
with open(os.path.join(args.in_dir, 'metadata.json')) as f:
metadata = json.load(f)
output_path = args.output_path
num_parts = args.num_partitions
random_partition(metadata, num_parts, output_path)
random_partition(metadata, num_parts, args.out_dir)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment