Commit b1864da3 authored by yuguo's avatar yuguo
Browse files

[DCU] add NVTE_TP_OVERLAP_AGGREGATE

parent fdb21575
......@@ -329,7 +329,7 @@ def initialize_ub(
"cga_size": 1 if method == "ring_exchange" else 2,
"set_sm_margin": not method == "ring_exchange",
"num_splits": tp_size if method == "ring_exchange" else 4,
"aggregate": False,
"aggregate": bool(int(os.getenv("NVTE_TP_OVERLAP_AGGREGATE", "0"))),
"atomic_gemm": False,
"use_ce": True,
"fp8_buf": name in layers_all_gather_overlap,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment