1.sh 1.21 KB
Newer Older
1
pgrep -f /usr/bin/python | xargs kill -9
lishen's avatar
lishen committed
2

lijian6's avatar
lijian6 committed
3
4
5
6
7
8
export OMPI_MCA_pml=ucx
export OMPI_MCA_osc=ucx
export OMPI_MCA_coll_hcoll_enable=0
export UCX_TLS=rc,rocm
# export ROCSHMEM_UNIQUEID_WITH_MPI=1
export OMPI_MCA_rmaps_base_mapping_policy="slot:numa"
9
export ROCSHMEM_GDA_NUM_QPS_DEFAULT_CTX=288
10
export ROCSHMEM_MAX_NUM_CONTEXTS=48
lijian6's avatar
lijian6 committed
11
export UCX_ROCM_IPC_SIGPOOL_MAX_ELEMS=16384
12
export UCX_NET_DEVICES=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
lishen's avatar
lishen committed
13
export ROCSHMEM_ALLOWED_IBV_DEVICES=mlx5_2,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_9
lijian6's avatar
lijian6 committed
14
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
15
# export ROCSHMEM_HEAP_SIZE=536870912 805306368 10737418240
lijian6's avatar
lijian6 committed
16
export ROCSHMEM_HEAP_SIZE=10737418240
lishen's avatar
lishen committed
17
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 tests/test_internode.py
lishen's avatar
lishen committed
18
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 tests/test_low_latency.py
19
# torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 tests/test_low_latency_new.py --pressure-test
lijian6's avatar
lijian6 committed
20
torchrun --nproc-per-node=1 --nnodes=2 --node-rank=0 --master-addr="10.16.1.37" --master-port=1234 tests/test_internode.py --test-ll-compatibility