Commit 535c37b6 authored by silencealiang's avatar silencealiang
Browse files

update model parameters format

parent 83fab71e
...@@ -96,7 +96,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks): ...@@ -96,7 +96,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks):
print("> initializing torch distributed ...", flush=True) print("> initializing torch distributed ...", flush=True)
# Manually set the device ids. # Manually set the device ids.
if device_count > 0: if device_count > 0:
torch.cuda.set_device(args.local_rank) torch.cuda.set_device(args.local_rank % device_count)
device_id = torch.device(f'cuda:{args.local_rank}') device_id = torch.device(f'cuda:{args.local_rank}')
else: else:
device_id = None device_id = None
......
...@@ -429,14 +429,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -429,14 +429,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -429,14 +429,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -429,14 +429,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -429,14 +429,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -429,14 +429,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -164,14 +164,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -164,14 +164,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -164,14 +164,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -164,14 +164,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -158,14 +158,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -158,14 +158,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then ...@@ -167,14 +167,29 @@ elif [[ $profiling == "hip" ]]; then
fi fi
#for hygon cpu #for hygon cpu
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
case ${LOCAL_RANK} in case ${LOCAL_RANK} in
0) numactl --cpunodebind=0 --membind=0 ${APP} ;; 0)
1) numactl --cpunodebind=1 --membind=1 ${APP} ;; export HIP_VISIBLE_DEVICES=0
2) numactl --cpunodebind=2 --membind=2 ${APP} ;; numactl --cpunodebind=0 --membind=0 ${APP} ;;
3) numactl --cpunodebind=3 --membind=3 ${APP} ;; 1)
4) numactl --cpunodebind=4 --membind=4 ${APP} ;; export HIP_VISIBLE_DEVICES=1
5) numactl --cpunodebind=5 --membind=5 ${APP} ;; numactl --cpunodebind=1 --membind=1 ${APP} ;;
6) numactl --cpunodebind=6 --membind=6 ${APP} ;; 2)
7) numactl --cpunodebind=7 --membind=7 ${APP} ;; export HIP_VISIBLE_DEVICES=2
numactl --cpunodebind=2 --membind=2 ${APP} ;;
3)
export HIP_VISIBLE_DEVICES=3
numactl --cpunodebind=3 --membind=3 ${APP} ;;
4)
export HIP_VISIBLE_DEVICES=4
numactl --cpunodebind=4 --membind=4 ${APP} ;;
5)
export HIP_VISIBLE_DEVICES=5
numactl --cpunodebind=5 --membind=5 ${APP} ;;
6)
export HIP_VISIBLE_DEVICES=6
numactl --cpunodebind=6 --membind=6 ${APP} ;;
7)
export HIP_VISIBLE_DEVICES=7
numactl --cpunodebind=7 --membind=7 ${APP} ;;
esac esac
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment