Commit 83fab71e authored by silencealiang's avatar silencealiang
Browse files

disable torch._dynamo for reduce_from_tensor_model_parallel_region

parent eb4333f0
......@@ -167,6 +167,10 @@ class CoreAdaptation(MegatronAdaptationABC):
MegatronAdaptation.register('megatron.core.tensor_parallel.mappings.reduce_scatter_to_sequence_parallel_region',
torch._dynamo.disable,
apply_wrapper=True)
# reduce_from_tensor_model_parallel_region
MegatronAdaptation.register('megatron.core.tensor_parallel.mappings.reduce_from_tensor_model_parallel_region',
torch._dynamo.disable,
apply_wrapper=True)
# flux
if int(os.getenv("USE_FLUX_OVERLAP", "0")):
......
......@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -158,37 +158,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
......@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in
[0])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment