1.sh 1.39 KB
Newer Older
1
#!/bin/bash
lishen's avatar
lishen committed
2

lijian6's avatar
lijian6 committed
3
# rocSHMEM
4
export ROCSHMEM_GDA_NUM_QPS_DEFAULT_CTX=288
5
export ROCSHMEM_MAX_NUM_CONTEXTS=60
lishen's avatar
lishen committed
6
export ROCSHMEM_ALLOWED_IBV_DEVICES=mlx5_2,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_9
7
export ROCSHMEM_HEAP_SIZE=3737418240
lishen's avatar
lishen committed
8
export ROCSHMEM_TOPO_FILE_FORCE=./topo.config
9
# NMZ使用
10
11
# export ROCSHMEM_DISABLE_HDP_FLUSH=1
# export ROCSHMEM_GDR_DISABLE_XDP=1
lijian6's avatar
lijian6 committed
12

13
14
15
16
# # duSHMEM
# export LD_LIBRARY_PATH=/opt/dtk/dushmem/lib:$LD_LIBRARY_PATH
# export DEEP_EP_DEVICE_TO_HCA_MAPPING=0:mlx5_2:1,1:mlx5_3:1,2:mlx5_4:1,3:mlx5_5:1,4:mlx5_6:1,5:mlx5_7:1,6:mlx5_8:1,7:mlx5_9:1
# export NVSHMEM_SYMMETRIC_SIZE=10737418240
lijian6's avatar
lijian6 committed
17
18
19

# common
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
lishen's avatar
lishen committed
20
export PYTHONPATH=$(pwd)/../
lijian6's avatar
lijian6 committed
21
22

# test
23
torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 ./test_internode.py
lishen's avatar
lishen committed
24
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 ./test_internode.py --test-ll-compatibility
25
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 ./test_low_latency.py # --pressure-test
26
27
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 ./test_low_latency.py --use-logfmt
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 ./test_low_latency.py --enable-dispatch-ll-layered --enable-combine-overlap