Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
evt_fugx1
dcu_megatron
Commits
250c7fb0
Commit
250c7fb0
authored
May 22, 2025
by
wxj
Browse files
update model parameters format
parent
c788823b
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
24 additions
and
32 deletions
+24
-32
examples/deepseek_v3/run_deepseekv3_671B.sh
examples/deepseek_v3/run_deepseekv3_671B.sh
+2
-4
examples/deepseek_v3/train_deepseekv3_671B_128nodes.sh
examples/deepseek_v3/train_deepseekv3_671B_128nodes.sh
+1
-1
examples/deepseek_v3/train_deepseekv3_671B_1nodes.sh
examples/deepseek_v3/train_deepseekv3_671B_1nodes.sh
+1
-1
examples/deepseek_v3/train_deepseekv3_671B_4nodes.sh
examples/deepseek_v3/train_deepseekv3_671B_4nodes.sh
+1
-1
examples/gpt3/run_gpt_567B.sh
examples/gpt3/run_gpt_567B.sh
+2
-4
examples/gpt3/train_gpt_567B_128nodes.sh
examples/gpt3/train_gpt_567B_128nodes.sh
+1
-1
examples/gpt3/train_gpt_567B_1nodes.sh
examples/gpt3/train_gpt_567B_1nodes.sh
+1
-1
examples/mixtral/run_mixtral_8x22B.sh
examples/mixtral/run_mixtral_8x22B.sh
+2
-4
examples/mixtral/run_mixtral_8x7B.sh
examples/mixtral/run_mixtral_8x7B.sh
+2
-4
examples/mixtral/train_mixtral_8x22B_1nodes.sh
examples/mixtral/train_mixtral_8x22B_1nodes.sh
+1
-1
examples/mixtral/train_mixtral_8x22B_8nodes.sh
examples/mixtral/train_mixtral_8x22B_8nodes.sh
+1
-1
examples/mixtral/train_mixtral_8x7B_1nodes.sh
examples/mixtral/train_mixtral_8x7B_1nodes.sh
+1
-1
examples/mixtral/train_mixtral_8x7B_4nodes.sh
examples/mixtral/train_mixtral_8x7B_4nodes.sh
+1
-1
requirements/launch_with_binding.sh
requirements/launch_with_binding.sh
+7
-7
No files found.
examples/deepseek_v3/run_deepseekv3_671B.sh
View file @
250c7fb0
...
...
@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS
=
""
# how many gpus to use
DTK_ENV
=
""
# where env.sh of dtk
NCCL_ENV
=
""
# where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT
=
""
# mpi port to use
HOST
=
""
# hostname
PORT
=
""
# port id
DATA_PATH
=
""
# path to mmap_deepseekv3_datasets_text_document
...
...
@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_deepseekv3_671B \
--allow-run-as-root
\
--bind-to
none
\
--mca
plm_rsh_no_tree_spawn 1
\
--mca
plm_rsh_args
"-p
${
MPI_PORT
}
"
\
bash
-c
"
source
${
DTK_ENV
}
&&
\
source
${
NCCL_ENV
}
&&
\
./train_deepseekv3_671B_
$((${
GPUS
}
/
8
))
nodes.sh
\
${
HOST
}
\
${
PORT
}
\
...
...
examples/deepseek_v3/train_deepseekv3_671B_128nodes.sh
View file @
250c7fb0
...
...
@@ -29,7 +29,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
export
MP_PP0_LAYERS
=
5
# 是否使能视实际情况而定
### BASE CONFIG ###
...
...
examples/deepseek_v3/train_deepseekv3_671B_1nodes.sh
View file @
250c7fb0
...
...
@@ -29,7 +29,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
#export MP_PP0_LAYERS=2 # 是否使能视实际情况而定
### BASE CONFIG ###
...
...
examples/deepseek_v3/train_deepseekv3_671B_4nodes.sh
View file @
250c7fb0
...
...
@@ -29,7 +29,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
export
MP_PP0_LAYERS
=
2
# 是否使能视实际情况而定
### BASE CONFIG ###
...
...
examples/gpt3/run_gpt_567B.sh
View file @
250c7fb0
...
...
@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS
=
""
# how many gpus to use
DTK_ENV
=
""
# where env.sh of dtk
NCCL_ENV
=
""
# where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT
=
""
# mpi port to use
HOST
=
""
# hostname
PORT
=
""
# port id
DATA_PATH
=
""
# path to redpajama_text_document
...
...
@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_gpt_567B \
--allow-run-as-root
\
--bind-to
none
\
--mca
plm_rsh_no_tree_spawn 1
\
--mca
plm_rsh_args
"-p
${
MPI_PORT
}
"
\
bash
-c
"
source
${
DTK_ENV
}
&&
\
source
${
NCCL_ENV
}
&&
\
./train_gpt_567B_
$((${
GPUS
}
/
8
))
nodes.sh
\
${
HOST
}
\
${
PORT
}
\
...
...
examples/gpt3/train_gpt_567B_128nodes.sh
View file @
250c7fb0
...
...
@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS
=(
--rank
${
RANK
}
...
...
examples/gpt3/train_gpt_567B_1nodes.sh
View file @
250c7fb0
...
...
@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS
=(
--rank
${
RANK
}
...
...
examples/mixtral/run_mixtral_8x22B.sh
View file @
250c7fb0
...
...
@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS
=
""
# how many gpus to use
DTK_ENV
=
""
# where env.sh of dtk
NCCL_ENV
=
""
# where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT
=
""
# mpi port to use
HOST
=
""
# hostname
PORT
=
""
# port id
DATA_PATH
=
""
# path to my-mixtral_text_document
...
...
@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_mixtral_8x22B \
--allow-run-as-root
\
--bind-to
none
\
--mca
plm_rsh_no_tree_spawn 1
\
--mca
plm_rsh_args
"-p
${
MPI_PORT
}
"
\
bash
-c
"
source
${
DTK_ENV
}
&&
\
source
${
NCCL_ENV
}
&&
\
./train_mixtral_8x22B_
$((${
GPUS
}
/
8
))
nodes.sh
\
${
HOST
}
\
${
PORT
}
\
...
...
examples/mixtral/run_mixtral_8x7B.sh
View file @
250c7fb0
...
...
@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS
=
""
# how many gpus to use
DTK_ENV
=
""
# where env.sh of dtk
NCCL_ENV
=
""
# where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT
=
""
# mpi port to use
HOST
=
""
# hostname
PORT
=
""
# port id
DATA_PATH
=
""
# path to my-mixtral_text_document
...
...
@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_mixtral_8x7B \
--allow-run-as-root
\
--bind-to
none
\
--mca
plm_rsh_no_tree_spawn 1
\
--mca
plm_rsh_args
"-p
${
MPI_PORT
}
"
\
bash
-c
"
source
${
DTK_ENV
}
&&
\
source
${
NCCL_ENV
}
&&
\
./train_mixtral_8x7B_
$((${
GPUS
}
/
8
))
nodes.sh
\
${
HOST
}
\
${
PORT
}
\
...
...
examples/mixtral/train_mixtral_8x22B_1nodes.sh
View file @
250c7fb0
...
...
@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS
=(
--rank
${
RANK
}
...
...
examples/mixtral/train_mixtral_8x22B_8nodes.sh
View file @
250c7fb0
...
...
@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS
=(
--rank
${
RANK
}
...
...
examples/mixtral/train_mixtral_8x7B_1nodes.sh
View file @
250c7fb0
...
...
@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS
=(
--rank
${
RANK
}
...
...
examples/mixtral/train_mixtral_8x7B_4nodes.sh
View file @
250c7fb0
...
...
@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export
PYTHONPATH
=
${
MEGATRON_PATH
}
/Megatron-LM:
$PYTHONPATH
# enable BatchLinear
export
GROUPED_GEMM_BatchLinear
=
1
#
export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS
=(
--rank
${
RANK
}
...
...
requirements/launch_with_binding.sh
View file @
250c7fb0
#!/bin/bash
export
HIP_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
"
${
@
:2
}
"
# LOCAL_RANK=$1
# shift
LOCAL_RANK
=
$1
shift
numa_map
=(
0 1 2 3 4 5 6 7
)
NUMA_ID
=
${
numa_map
[
$LOCAL_RANK
]
}
numactl
--cpunodebind
=
${
NUMA_ID
}
--membind
=
${
NUMA_ID
}
"
$@
"
# numa_map=(0 1 2 3 4 5 6 7)
# NUMA_ID=${numa_map[$LOCAL_RANK]}
# numactl --cpunodebind=${NUMA_ID} --membind=${NUMA_ID} "$@"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment