Commit 83fab71e authored by silencealiang's avatar silencealiang
Browse files

disable torch._dynamo for reduce_from_tensor_model_parallel_region

parent eb4333f0
...@@ -167,6 +167,10 @@ class CoreAdaptation(MegatronAdaptationABC): ...@@ -167,6 +167,10 @@ class CoreAdaptation(MegatronAdaptationABC):
MegatronAdaptation.register('megatron.core.tensor_parallel.mappings.reduce_scatter_to_sequence_parallel_region', MegatronAdaptation.register('megatron.core.tensor_parallel.mappings.reduce_scatter_to_sequence_parallel_region',
torch._dynamo.disable, torch._dynamo.disable,
apply_wrapper=True) apply_wrapper=True)
# reduce_from_tensor_model_parallel_region
MegatronAdaptation.register('megatron.core.tensor_parallel.mappings.reduce_from_tensor_model_parallel_region',
torch._dynamo.disable,
apply_wrapper=True)
# flux # flux
if int(os.getenv("USE_FLUX_OVERLAP", "0")): if int(os.getenv("USE_FLUX_OVERLAP", "0")):
......
...@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac esac
\ No newline at end of file
...@@ -158,37 +158,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -158,37 +158,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac esac
\ No newline at end of file
...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
[0]) 0) numactl --cpunodebind=0 --membind=0 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 1) numactl --cpunodebind=1 --membind=1 ${APP} ;;
numactl --cpunodebind=0 --membind=0 ${APP} 2) numactl --cpunodebind=2 --membind=2 ${APP} ;;
;; 3) numactl --cpunodebind=3 --membind=3 ${APP} ;;
[1]) 4) numactl --cpunodebind=4 --membind=4 ${APP} ;;
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 5) numactl --cpunodebind=5 --membind=5 ${APP} ;;
numactl --cpunodebind=1 --membind=1 ${APP} 6) numactl --cpunodebind=6 --membind=6 ${APP} ;;
;; 7) numactl --cpunodebind=7 --membind=7 ${APP} ;;
[2]) esac
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ No newline at end of file
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment