Unverified Commit 5b549c85 authored by Guangguan Wang's avatar Guangguan Wang Committed by GitHub
Browse files

Minor patches for deepep (#318)



* Add arg --pressure-test for test_low_latency.py

Add arg --pressure-test for test_low_latency.py
Signed-off-by: default avatarGuangguan Wang <guangguan.wang@linux.alibaba.com>

* Export NVSHMEM_QP_DEPTH

Export NVSHMEM_QP_DEPTH
Signed-off-by: default avatarGuangguan Wang <guangguan.wang@linux.alibaba.com>

---------
Signed-off-by: default avatarGuangguan Wang <guangguan.wang@linux.alibaba.com>
parent f9c06bb0
......@@ -85,7 +85,7 @@ class Buffer:
os.environ['NVSHMEM_IB_ENABLE_IBGDA'] = '1'
os.environ['NVSHMEM_IBGDA_NUM_RC_PER_PE'] = f'{num_qps_per_rank}'
# Make sure QP depth is always larger than the number of on-flight WRs, so that we can skip WQ slot check
os.environ['NVSHMEM_QP_DEPTH'] = '1024'
os.environ['NVSHMEM_QP_DEPTH'] = os.environ.get('NVSHMEM_QP_DEPTH', '1024')
# Reduce gpu memory usage
# 6 default teams + 1 extra team
......
......@@ -164,7 +164,7 @@ def test_loop(local_rank: int, num_local_ranks: int, args: argparse.Namespace):
test_main(num_tokens, hidden, num_experts, num_topk, rank, num_ranks, group, buffer,
use_logfmt=args.use_logfmt, seed=1)
do_pressure_test = False
do_pressure_test = args.pressure_test
for seed in range(int(1e9) if do_pressure_test else 0):
if local_rank == 0:
print(f'Testing with seed {seed} ...', flush=True)
......@@ -198,6 +198,8 @@ if __name__ == '__main__':
help='Whether to disable NVLink for testing')
parser.add_argument('--use-logfmt', action='store_true',
help='Whether to test LogFMT combine')
parser.add_argument("--pressure-test", action='store_true',
help='Whether to do pressure test')
args = parser.parse_args()
num_processes = args.num_processes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment