Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Baichuan_pytorch
Commits
70d83fd3
Commit
70d83fd3
authored
Nov 09, 2023
by
qianyj
Browse files
update
parent
7b47409b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
76 deletions
+146
-76
multi_node/env.sh
multi_node/env.sh
+50
-0
multi_node/run-13b-sft-single.sh
multi_node/run-13b-sft-single.sh
+41
-29
multi_node/run-13b-sft.sh
multi_node/run-13b-sft.sh
+4
-10
multi_node/run-7b-sft-lora-single.sh
multi_node/run-7b-sft-lora-single.sh
+46
-27
multi_node/run-7b-sft-lora.sh
multi_node/run-7b-sft-lora.sh
+5
-10
No files found.
multi_node/env.sh
0 → 100644
View file @
70d83fd3
#!/bin/bash
export
ROCM_PATH
=
/opt/dtk-23.04
export
ROCM_SOURCE_DIR
=
${
ROCM_PATH
}
echo
$ROCM_PATH
export
HIP_PATH
=
${
ROCM_PATH
}
/hip
export
AMDGPU_TARGETS
=
"gfx900;gfx906"
export
PATH
=
${
ROCM_PATH
}
/bin:
${
ROCM_PATH
}
/llvm/bin:
${
ROCM_PATH
}
/hcc/bin:
${
ROCM_PATH
}
/hip/bin:
$PATH
export
LD_LIBRARY_PATH
=
${
ROCM_PATH
}
/lib:
${
ROCM_PATH
}
/lib64:
$LD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
${
ROCM_PATH
}
/hip/lib:
${
ROCM_PATH
}
/llvm/lib:
${
ROCM_PATH
}
/opencl/lib/x86_64:
$LD_LIBRARY_PATH
export
C_INCLUDE_PATH
=
${
ROCM_PATH
}
/include:
${
ROCM_PATH
}
/hip/include/hip:
${
ROCM_PATH
}
/llvm/include:/opencl/include:
${
ROCM_PATH
}
/include/rocrand:
${
ROCM_PATH
}
/include/hiprand
export
CPLUS_INCLUDE_PATH
=
${
ROCM_PATH
}
/include:
${
ROCM_PATH
}
/hip/include/hip:
${
ROCM_PATH
}
/llvm/include:/opencl/include:
${
ROCM_PATH
}
/include/rocrand:
${
ROCM_PATH
}
/include/hiprand
export
PATH
=
${
ROCM_PATH
}
/miopen/bin:
${
ROCM_PATH
}
/rocblas/bin:
${
ROCM_PATH
}
/hipsparse/bin:
$PATH
export
LD_LIBRARY_PATH
=
${
ROCM_PATH
}
/miopen/lib:
${
ROCM_PATH
}
/rocblas/lib:
$LD_LIBRARY_PATH
export
MIOPEN_SYSTEM_DB_PATH
=
${
ROCM_PATH
}
/miopen/share/miopen/db/
export
LD_LIBRARY_PATH
=
/usr/lib64:
$LD_LIBRARY_PATH
export
LIBRARY_PATH
=
/usr/lib64:
$LIBRARY_PATH
export
RCCL_PATH
=
$ROCM_PATH
/rccl
export
NCCL_PATH
=
$ROCM_PATH
/rccl
export
LD_LIBRARY_PATH
=
$RCCL_PATH
/lib:
$LD_LIBRARY_PATH
export
MIOPEN_FIND_MODE
=
3
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
NCCL_P2P_LEVEL
=
5
export
NCCL_GDR_FLUSH_DISABLE
=
1
export
NCCL_NET_GDR_LEVEL
=
SYS
export
RCCL_NCHANNELS
=
2
export
NCCL_IB_HCA
=
mlx5
export
NCCL_SOCKET_IFNAME
=
ib0
export
NCCL_DEBUG
=
INFO
export
MIOPEN_FIND_MODE
=
3
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
MIOPEN_COMPILE_PARALLEL_LEVEL
=
1
export
NCCL_PLUGIN_P2P
=
ucx
export
HIP_CLANG_PATH
=
/opt/dtk-23.04/llvm/bin
export
HSA_PATH
=
/opt/dtk-23.04/hsa
export
AOMP
=
/opt/dtk-23.04/llvm
export
LD_LIBRARY_PATH
=
/opt/dtk-23.04/rccl/lib:/usr/lib64:/opt/dtk-23.04/miopen/lib:/opt/dtk-23.04/rocblas/lib:/opt/dtk-23.04/hip/lib:/opt/dtk-23.04/llvm/lib:/opt/dtk-23.04/opencl/lib/x86_64:/opt/dtk-23.04/lib:/opt/dtk-23.04/lib64:/opt/dtk-23.04/rccl/lib:/usr/lib64:/opt/dtk-23.04/miopen/lib:/opt/dtk-23.04/rocblas/lib:/opt/dtk-23.04/hip/lib:/opt/dtk-23.04/llvm/lib:/opt/dtk-23.04/opencl/lib/x86_64:/opt/dtk-23.04/lib:/opt/dtk-23.04/lib64:/opt/dtk-23.04/roctracer/lib:/opt/dtk-23.04/rocthrust/lib:/opt/dtk-23.04/rocsparse/lib:/opt/dtk-23.04/rocsolver/lib:/opt/dtk-23.04/rocrand/lib:/opt/dtk-23.04/rocprofiler/lib:/opt/dtk-23.04/rocprim/lib:/opt/dtk-23.04/dtk-23.04_smi/lib:/opt/dtk-23.04/rocfft/lib:/opt/dtk-23.04/rocblas/lib:/opt/dtk-23.04/rocalution/lib:/opt/dtk-23.04/rccl/lib:/opt/dtk-23.04/opencl/lib:/opt/dtk-23.04/oam/lib:/opt/dtk-23.04/migraphx/lib:/opt/dtk-23.04/miopengemm/lib:/opt/dtk-23.04/miopen/lib:/opt/dtk-23.04/llvm/lib-debug/src/openmp/libomptarget/plugins/remote/lib:/opt/dtk-23.04/llvm/lib/clang/14.0.0/lib:/opt/dtk-23.04/llvm/lib:/opt/dtk-23.04/hsa/lib:/opt/dtk-23.04/hipsparse/lib:/opt/dtk-23.04/hipsolver/lib:/opt/dtk-23.04/hiprand/lib:/opt/dtk-23.04/hipfft/lib:/opt/dtk-23.04/hipcub/lib:/opt/dtk-23.04/hipblas-clients/lib:/opt/dtk-23.04/hipblas/lib:/opt/dtk-23.04/hip/lib:/opt/dtk-23.04/lib:/opt/dtk-23.04/lib64:/opt/mpi/lib:/usr/local/lib/:/usr/local/lib64/:/usr/lib64/
export
PATH
=
/opt/dtk-23.04/miopen/bin:/opt/dtk-23.04/rocblas/bin:/opt/dtk-23.04/hipsparse/bin:/opt/dtk-23.04/bin:/opt/dtk-23.04/llvm/bin:/opt/dtk-23.04/hcc/bin:/opt/dtk-23.04/hip/bin:/opt/dtk-23.04/miopen/bin:/opt/dtk-23.04/rocblas/bin:/opt/dtk-23.04/hipsparse/bin:/opt/dtk-23.04/bin:/opt/dtk-23.04/llvm/bin:/opt/dtk-23.04/hcc/bin:/opt/dtk-23.04/hip/bin:/opt/dtk-23.04/libexec/rocprofiler:/opt/dtk-23.04/libexec/dtk-23.04_smi:/opt/dtk-23.04/rocprofiler/bin:/opt/dtk-23.04/opencl/bin:/opt/dtk-23.04/miopen/bin:/opt/dtk-23.04/llvm/lib/clang/14.0.0/bin:/opt/dtk-23.04/llvm/bin:/opt/dtk-23.04/hip/bin:/opt/dtk-23.04/bin:/opt/mpi/bin:/root/anaconda3/bin:/root/anaconda3/condabin:/usr/lib64/qt-3.3/bin:/root/perl5/bin:/opt/dtk-23.04/bin:/opt/dtk-23.04/hip/bin:/opt/dtk-23.04/llvm/bin:/opt/dtk-23.04/llvm/lib/clang/14.0.0/bin:/opt/dtk-23.04/miopen/bin:/opt/dtk-23.04/opencl/bin:/opt/dtk-23.04/rocprofiler/bin:/opt/dtk-23.04/libexec/dtk-23.04_smi:/opt/dtk-23.04/libexec/rocprofiler:/opt/rh/devtoolset-7/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin
export
ROCM_ROOT
=
/opt/dtk-23.04
export
ROCBLAS_TENSILE_LIBPATH
=
/opt/dtk-23.04/lib/rocblas/library
export
HIP_ROCCLR_HOME
=
/opt/dtk-23.04/hip
export
HIP_LIB_PATH
=
/opt/dtk-23.04/hip/lib
export
DEVICE_LIB_PATH
=
/opt/dtk-23.04/amdgcn/bitcode
multi_node/run-13b-sft-single.sh
View file @
70d83fd3
#!/bin/bash
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
MIOPEN_FIND_MODE
=
3
export
MIOPEN_COMPILE_PARALLEL_LEVEL
=
1
export
NCCL_PLUGIN_P2P
=
ucx
export
RCCL_NCHANNELS
=
2
export
NCCL_SOCKET_IFNAME
=
ib0
export
NCCL_P2P_LEVEL
=
5
source
env.sh
GPUS
=
$1
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
echo
"LRANK===============================
$lrank
"
RANK
=
$OMPI_COMM_WORLD_RANK
WORLD_SIZE
=
$OMPI_COMM_WORLD_SIZE
export
NCCL_IB_HCA
=
mlx5_0
#0号网卡
string
=
""
for
((
i
=
0
;
i<
$GPUS
;
i++
))
;
do
string
=
"
$string$i
,"
done
string
=
${
string
%
","
}
export
HIP_VISIBLE_DEVICES
=
$string
# echo "$HIP_VISIBLE_DEVICES"
APP
=
"python3 ../src/train_bash.py --stage sft
\
...
...
@@ -35,30 +31,46 @@ APP="python3 ../src/train_bash.py --stage sft \
--fp16
\
--deepspeed deepspeed.json
"
case
${
lrank
}
in
local_rank
=
$OMPI_COMM_WORLD_LOCAL_RANK
case
${
l
ocal_
rank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
4]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
5]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
6]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
7]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
multi_node/run-13b-sft.sh
View file @
70d83fd3
ulimit
-u
200000
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
OMP_NUM_THREADS
=
1
export
NCCL_DEBUG
=
INFO
export
MIOPEN_FIND_MODE
=
3
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
MIOPEN_COMPILE_PARALLEL_LEVEL
=
1
export
NCCL_PLUGIN_P2P
=
ucx
export
NCCL_SOCKET_IFNAME
=
ib0
export
NCCL_P2P_LEVEL
=
5
source
env.sh
echo
"START TIME:
$(
date
)
"
hostfile
=
./hostfile
np
=
$(
cat
$hostfile
|sort|uniq |wc
-l
)
np
=
$((
$np
*
8
))
which mpirun
mpirun
-np
$np
--allow-run-as-root
--hostfile
hostfile
--bind-to
none
--mca
btl_tcp_if_include enp97s0f1 mpi_single.sh 8
echo
"END TIME:
$(
date
)
"
...
...
multi_node/run-7b-sft-lora-single.sh
View file @
70d83fd3
#!/bin/bash
export
MIOPEN_FIND_MODE
=
3
export
GPU_MAX_HW_QUEUES
=
16
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
export
LOCAL_RANK
=
$OMPI_COMM_WORLD_LOCAL_RANK
export
RANK
=
$comm_rank
export
WORLD_SIZE
=
$comm_size
export
NCCL_IB_HCA
=
mlx5
export
NCCL_SOCKET_IFNAME
=
ib0
export
HIP_DIRECT_DISPATCH
=
0
source
env.sh
GPUS
=
$1
string
=
""
for
((
i
=
0
;
i<
$GPUS
;
i++
))
;
do
string
=
"
$string$i
,"
done
string
=
${
string
%
","
}
export
HIP_VISIBLE_DEVICES
=
$string
# echo "$HIP_VISIBLE_DEVICES"\
APP
=
"python3 ../src/train_bash.py --stage sft
\
...
...
@@ -39,30 +40,48 @@ APP="python3 ../src/train_bash.py --stage sft \
--fp16
\
--deepspeed deepspeed.json
"
case
${
lrank
}
in
local_rank
=
$OMPI_COMM_WORLD_LOCAL_RANK
echo
$local_rank
case
${
local_rank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
4]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
5]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
6]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
7]
)
export
HIP_VISIBLE_DEVICES
=
$string
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
multi_node/run-7b-sft-lora.sh
View file @
70d83fd3
ulimit
-u
200000
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
OMP_NUM_THREADS
=
1
export
NCCL_DEBUG
=
INFO
export
MIOPEN_FIND_MODE
=
3
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
MIOPEN_COMPILE_PARALLEL_LEVEL
=
1
export
NCCL_PLUGIN_P2P
=
ucx
export
NCCL_SOCKET_IFNAME
=
ib0
export
NCCL_P2P_LEVEL
=
5
source
env.sh
echo
"START TIME:
$(
date
)
"
hostfile
=
./hostfile
np
=
$(
cat
$hostfile
|sort|uniq |wc
-l
)
np
=
$((
$np
*
8
))
which mpirun
mpirun
-np
$np
--allow-run-as-root
--hostfile
hostfile
--bind-to
none
--mca
btl_tcp_if_include enp97s0f1
`
pwd
`
/run-7b-single-lora.sh 8
echo
"END TIME:
$(
date
)
"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment