Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
evt_fugx1
dcu_megatron
Commits
83fab71e
Commit
83fab71e
authored
May 15, 2025
by
silencealiang
Browse files
disable torch._dynamo for reduce_from_tensor_model_parallel_region
parent
eb4333f0
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
102 additions
and
328 deletions
+102
-328
dcu_megatron/adaptor/megatron_adaptor.py
dcu_megatron/adaptor/megatron_adaptor.py
+4
-0
examples/deepseek_v3/train_deepseekv3_671B_128nodes.sh
examples/deepseek_v3/train_deepseekv3_671B_128nodes.sh
+10
-33
examples/deepseek_v3/train_deepseekv3_671B_1nodes.sh
examples/deepseek_v3/train_deepseekv3_671B_1nodes.sh
+10
-33
examples/deepseek_v3/train_deepseekv3_671B_4nodes.sh
examples/deepseek_v3/train_deepseekv3_671B_4nodes.sh
+10
-33
examples/gpt3/train_gpt_567B_128nodes.sh
examples/gpt3/train_gpt_567B_128nodes.sh
+10
-33
examples/gpt3/train_gpt_567B_1nodes.sh
examples/gpt3/train_gpt_567B_1nodes.sh
+9
-32
examples/llama/train_llama2_7b_1nodes.sh
examples/llama/train_llama2_7b_1nodes.sh
+9
-32
examples/mixtral/train_mixtral_8x22B_1nodes.sh
examples/mixtral/train_mixtral_8x22B_1nodes.sh
+10
-33
examples/mixtral/train_mixtral_8x22B_8nodes.sh
examples/mixtral/train_mixtral_8x22B_8nodes.sh
+10
-33
examples/mixtral/train_mixtral_8x7B_1nodes.sh
examples/mixtral/train_mixtral_8x7B_1nodes.sh
+10
-33
examples/mixtral/train_mixtral_8x7B_4nodes.sh
examples/mixtral/train_mixtral_8x7B_4nodes.sh
+10
-33
No files found.
dcu_megatron/adaptor/megatron_adaptor.py
View file @
83fab71e
...
...
@@ -167,6 +167,10 @@ class CoreAdaptation(MegatronAdaptationABC):
MegatronAdaptation
.
register
(
'megatron.core.tensor_parallel.mappings.reduce_scatter_to_sequence_parallel_region'
,
torch
.
_dynamo
.
disable
,
apply_wrapper
=
True
)
# reduce_from_tensor_model_parallel_region
MegatronAdaptation
.
register
(
'megatron.core.tensor_parallel.mappings.reduce_from_tensor_model_parallel_region'
,
torch
.
_dynamo
.
disable
,
apply_wrapper
=
True
)
# flux
if
int
(
os
.
getenv
(
"USE_FLUX_OVERLAP"
,
"0"
)):
...
...
examples/deepseek_v3/train_deepseekv3_671B_128nodes.sh
View file @
83fab71e
...
...
@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/deepseek_v3/train_deepseekv3_671B_1nodes.sh
View file @
83fab71e
...
...
@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/deepseek_v3/train_deepseekv3_671B_4nodes.sh
View file @
83fab71e
...
...
@@ -429,37 +429,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/gpt3/train_gpt_567B_128nodes.sh
View file @
83fab71e
...
...
@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/gpt3/train_gpt_567B_1nodes.sh
View file @
83fab71e
...
...
@@ -164,37 +164,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/llama/train_llama2_7b_1nodes.sh
View file @
83fab71e
...
...
@@ -158,37 +158,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/mixtral/train_mixtral_8x22B_1nodes.sh
View file @
83fab71e
...
...
@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/mixtral/train_mixtral_8x22B_8nodes.sh
View file @
83fab71e
...
...
@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/mixtral/train_mixtral_8x7B_1nodes.sh
View file @
83fab71e
...
...
@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
examples/mixtral/train_mixtral_8x7B_4nodes.sh
View file @
83fab71e
...
...
@@ -167,37 +167,14 @@ elif [[ $profiling == "hip" ]]; then
fi
#for hygon cpu
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
case
${
LOCAL_RANK
}
in
[
0]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
[
4]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
[
5]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
[
6]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
[
7]
)
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
0
)
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
1
)
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
2
)
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
3
)
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
4
)
numactl
--cpunodebind
=
4
--membind
=
4
${
APP
}
;;
5
)
numactl
--cpunodebind
=
5
--membind
=
5
${
APP
}
;;
6
)
numactl
--cpunodebind
=
6
--membind
=
6
${
APP
}
;;
7
)
numactl
--cpunodebind
=
7
--membind
=
7
${
APP
}
;;
esac
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment