Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
ac26d1fb
Commit
ac26d1fb
authored
Jan 13, 2023
by
sunxx1
Browse files
Merge branch 'hepj-test' into 'main'
Hepj test See merge request dcutoolkit/deeplearing/dlexamples_new!48
parents
0016b0a7
7d366e11
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
684 additions
and
5 deletions
+684
-5
PyTorch/NLP/BERT/2node-run-pre/2nodes_single_process_pre1.sh
PyTorch/NLP/BERT/2node-run-pre/2nodes_single_process_pre1.sh
+61
-0
PyTorch/NLP/BERT/2node-run-pre/2nodes_single_process_pre2.sh
PyTorch/NLP/BERT/2node-run-pre/2nodes_single_process_pre2.sh
+64
-0
PyTorch/NLP/BERT/2node-run-pre/run_bert_pre1_4dcus.sh
PyTorch/NLP/BERT/2node-run-pre/run_bert_pre1_4dcus.sh
+32
-0
PyTorch/NLP/BERT/2node-run-pre/run_bert_pre2_4dcus.sh
PyTorch/NLP/BERT/2node-run-pre/run_bert_pre2_4dcus.sh
+32
-0
PyTorch/NLP/BERT/2node-run-squad/2nodes_single_process.sh
PyTorch/NLP/BERT/2node-run-squad/2nodes_single_process.sh
+57
-0
PyTorch/NLP/BERT/2node-run-squad/run_bert_squad_4dcus.sh
PyTorch/NLP/BERT/2node-run-squad/run_bert_squad_4dcus.sh
+41
-0
PyTorch/NLP/BERT/README.md
PyTorch/NLP/BERT/README.md
+14
-1
PyTorch/NLP/Conformer-main/2node-run-comformer/2nodes_single_process.sh
...nformer-main/2node-run-comformer/2nodes_single_process.sh
+58
-0
PyTorch/NLP/Conformer-main/2node-run-comformer/run_conformer_4dcus.sh
...Conformer-main/2node-run-comformer/run_conformer_4dcus.sh
+42
-0
PyTorch/NLP/Conformer-main/README.md
PyTorch/NLP/Conformer-main/README.md
+8
-1
PyTorch/NLP/Vision_Transformer/2node-run-vit/run-vit-finetune.sh
.../NLP/Vision_Transformer/2node-run-vit/run-vit-finetune.sh
+30
-0
PyTorch/NLP/Vision_Transformer/2node-run-vit/run-vit-pre.sh
PyTorch/NLP/Vision_Transformer/2node-run-vit/run-vit-pre.sh
+30
-0
PyTorch/NLP/Vision_Transformer/2node-run-vit/single_finetune-4.sh
...NLP/Vision_Transformer/2node-run-vit/single_finetune-4.sh
+55
-0
PyTorch/NLP/Vision_Transformer/2node-run-vit/single_pre-4.sh
PyTorch/NLP/Vision_Transformer/2node-run-vit/single_pre-4.sh
+53
-0
PyTorch/NLP/Vision_Transformer/README.md
PyTorch/NLP/Vision_Transformer/README.md
+14
-0
PyTorch/NLP/new-Transformer/2node-run/2nodes_single_process.sh
...ch/NLP/new-Transformer/2node-run/2nodes_single_process.sh
+44
-0
PyTorch/NLP/new-Transformer/2node-run/run_transformer_4dcus.sh
...ch/NLP/new-Transformer/2node-run/run_transformer_4dcus.sh
+34
-0
PyTorch/NLP/new-Transformer/README.md
PyTorch/NLP/new-Transformer/README.md
+15
-3
No files found.
PyTorch/NLP/BERT/2node-run-pre/2nodes_single_process_pre1.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
export
HIP_LAUNCH_BLOCKING
=
1
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
export
PATH_PHRASE1
=
/public/DL_DATA/wikicorpus_en/lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/wikicorpus_en/training
APP
=
"python3
${
HOME
}
/torch/bert-pretrain/run_pretraining_v4.py
\
--input_dir=
${
PATH_PHRASE1
}
\
--output_dir=
${
HOME
}
/outdir/torch/pre_wiki/phrase1
\
--config_file=
${
HOME
}
/model/uncased_L-24_H-1024_A-16/bert_config.json
\
--bert_model=bert-large-uncased
\
--train_batch_size=16
\
--max_seq_length=128
\
--max_predictions_per_seq=20
\
--max_steps=100000
\
--warmup_proportion=0.0
\
--num_steps_per_checkpoint=20000
\
--learning_rate=4.0e-4
\
--seed=12439
\
--gradient_accumulation_steps=1
\
--allreduce_post_accumulation
\
--gpus_per_node 2
\
--do_train
\
--local_rank
${
comm_rank
}
\
--world_size
${
comm_size
}
\
--dist_url tcp://
${
1
}
:34567
\
--json-summary
${
HOME
}
/outdir/torch/pre_wiki/phrase1/dllogger.json
"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
1
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
2
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/BERT/2node-run-pre/2nodes_single_process_pre2.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
export
HIP_LAUNCH_BLOCKING
=
1
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
export
PATH_PHRASE2
=
/public/DL_DATA/wikicorpus_en/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/wikicorpus_en/training
APP
=
"python3
${
HOME
}
/torch/bert-pretrain/run_pretraining_v4.py
\
--input_dir=
${
PATH_PHRASE2
}
\
--output_dir=
${
HOME
}
/outdir/torch/pre_wiki/phrase2
\
--config_file=
${
HOME
}
/model/uncased_L-24_H-1024_A-16/bert_config.json
\
--bert_model=bert-large-uncased
\
--train_batch_size=2
\
--max_seq_length=512
\
--max_predictions_per_seq=80
\
--max_steps=400000
\
--warmup_proportion=0.128
\
--num_steps_per_checkpoint=20000
\
--learning_rate=4.0e-3
\
--seed=12439
\
--gradient_accumulation_steps=1
\
--allreduce_post_accumulation
\
--gpus_per_node 2
\
--do_train
\
--phase2
\
--phase1_end_step=0
\
--local_rank
${
comm_rank
}
\
--world_size
${
comm_size
}
\
--dist_url tcp://
${
1
}
:34567
\
--json-summary
${
HOME
}
/outdir/torch/pre_wiki4/phrase2/dllogger.json
"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
1
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
2
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/BERT/2node-run-pre/run_bert_pre1_4dcus.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
HOME_PATH
=
/work/home/hepj
WORK_PATH
=
${
HOME_PATH
}
/torch/bert-pretrain/2node-run
source
~/env22.10.sh
which python3
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
export
NCCL_DEBUG
=
INFO
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/2nodes_single_process_pre1.sh
$dist_url
PyTorch/NLP/BERT/2node-run-pre/run_bert_pre2_4dcus.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
HOME_PATH
=
/work/home/hepj
WORK_PATH
=
${
HOME_PATH
}
/torch/bert-pretrain/2node-run
source
~/env22.10.sh
which python3
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
export
NCCL_DEBUG
=
INFO
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/2nodes_single_process_pre2.sh
$dist_url
PyTorch/NLP/BERT/2node-run-squad/2nodes_single_process.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
export
HIP_LAUNCH_BLOCKING
=
1
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
APP
=
"python3 /work/home/hepj/torch/bert-squad/run_squad_v4.py
\
--train_file
${
HOME
}
/data/sq1.1/train-v1.1.json
\
--predict_file
${
HOME
}
/data/sq1.1/dev-v1.1.json
\
--init_checkpoint
${
HOME
}
/model/pytorch_bert/model.ckpt-28252.pt
\
--vocab_file
${
HOME
}
/model/pytorch_bert/vocab.txt
\
--output_dir
${
HOME
}
/outdir/torch/SQUAD4
\
--config_file
${
HOME
}
/model/pytorch_bert/bert_config.json
\
--json-summary
${
HOME
}
/outdir/torch/SQUAD4/results.json
\
--bert_model bert-large-uncased
\
--do_train
\
--do_predict
\
--train_batch_size 4
\
--predict_batch_size 4
\
--gpus_per_node 2
\
--local_rank
${
comm_rank
}
\
--world_size
${
comm_size
}
\
--use_env
\
--dist_url tcp://
${
1
}
:34567
\
"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
1
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
2
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/BERT/2node-run-squad/run_bert_squad_4dcus.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
HOME_PATH
=
/work/home/hepj
WORK_PATH
=
${
HOME_PATH
}
/torch/bert-squad/2node-run
source
~/env22.10.sh
which python3
#export NCCL_GRAPH_DUMP_FILE=graph.xml
#export NCCL_GRAPH_FILE=test.xml
#export NCCL_NET_GDR_LEVEL=5
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
export
NCCL_DEBUG
=
INFO
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/single_process.sh $dist_url
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/single_process_ddp.sh $dist_url
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/2nodes_single_process.sh $dist_url
#hipprof mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/hipprof_single.sh $dist_url
#hipprof mpirun -np 4 --hostfile hostfile-18261131 hipprof_single.sh j17r3n01
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/2nodes_single_process.sh
$dist_url
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID hipprof ${WORK_PATH}/2nodes_single_process.sh $dist_url
PyTorch/NLP/BERT/README.md
View file @
ac26d1fb
...
@@ -108,6 +108,14 @@ python3 tf_to_torch/convert_tf_checkpoint.py --tf_checkpoint ~/NLP/cks/bs64k_32k
...
@@ -108,6 +108,14 @@ python3 tf_to_torch/convert_tf_checkpoint.py --tf_checkpoint ~/NLP/cks/bs64k_32k
./bert_squad4_fp16.sh #半精度 (按自己路径对single_squad4_fp16.sh里APP设置进行修改)
./bert_squad4_fp16.sh #半精度 (按自己路径对single_squad4_fp16.sh里APP设置进行修改)
```
```
```
#多机多卡
cd 2node-run-squad
sbatch run_bert_squad_4dcu.sh (按照自己情况对#SBATCH -p、#SBATCH -J进行修改;需要fp16可以在相应single文件APP中增加 --fp16 与 --amp参数,运行结果保存在相应的slurm文件中)
```
## 4.**PHRASE测试**
## 4.**PHRASE测试**
### 1.参数说明
### 1.参数说明
...
@@ -142,6 +150,9 @@ python3 tf_to_torch/convert_tf_checkpoint.py --tf_checkpoint ~/NLP/cks/bs64k_32k
...
@@ -142,6 +150,9 @@ python3 tf_to_torch/convert_tf_checkpoint.py --tf_checkpoint ~/NLP/cks/bs64k_32k
#多卡
#多卡
./bert_pre1_4.sh #单精度 (按自己路径对single_pre1_4.sh里APP设置进行修改)
./bert_pre1_4.sh #单精度 (按自己路径对single_pre1_4.sh里APP设置进行修改)
./bert_pre1_4_fp16.sh #半精度 (按自己路径对single_pre1_4_fp16.sh里APP设置进行修改)
./bert_pre1_4_fp16.sh #半精度 (按自己路径对single_pre1_4_fp16.sh里APP设置进行修改)
#多机多卡
cd 2node-run-pre
sbatch run_bert_pre1_4dcu.sh (按照自己情况对#SBATCH -p、#SBATCH -J进行修改;需要fp16可以在相应single文件APP中增加 --fp16 与 --amp参数,运行结果保存在相应的slurm文件中)
```
```
### 3.PHRASE2
### 3.PHRASE2
...
@@ -153,6 +164,8 @@ python3 tf_to_torch/convert_tf_checkpoint.py --tf_checkpoint ~/NLP/cks/bs64k_32k
...
@@ -153,6 +164,8 @@ python3 tf_to_torch/convert_tf_checkpoint.py --tf_checkpoint ~/NLP/cks/bs64k_32k
#多卡
#多卡
./bert_pre2_4.sh #单精度 (按自己路径对single_pre2_4.sh里APP设置进行修改)
./bert_pre2_4.sh #单精度 (按自己路径对single_pre2_4.sh里APP设置进行修改)
./bert_pre2_4_fp16.sh #半精度 (按自己路径对single_pre2_4_fp16.sh里APP设置进行修改)
./bert_pre2_4_fp16.sh #半精度 (按自己路径对single_pre2_4_fp16.sh里APP设置进行修改)
#多机多卡
cd 2node-run-pre
sbatch run_bert_pre2_4dcu.sh (按照自己情况对#SBATCH -p、#SBATCH -J进行修改;需要fp16可以在相应single文件APP中增加 --fp16 与 --amp参数,运行结果保存在相应的slurm文件中)
```
```
PyTorch/NLP/Conformer-main/2node-run-comformer/2nodes_single_process.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
export
HIP_LAUNCH_BLOCKING
=
1
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
export
RANK
=
$OMPI_COMM_WORLD_RANK
export
WORLD_SIZE
=
$OMPI_COMM_WORLD_SIZE
export
LOCAL_RANK
=
$OMPI_COMM_WORLD_LOCAL_RANK
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
#pyenv activate torch-dtk22.04.2
source
~/env22.04.2.sh
APP
=
"python /work/home/hepj/torch/Conformer-main/main.py
\
--model Conformer_small_patch16
\
--data-set IMNET
\
--batch-size 64
\
--world_size 4
\
--lr 0.001
\
--local_rank
${
comm_rank
}
\
--dist_url tcp://
${
1
}
:9999
\
--data-path /public/DL_DATA/ImageNet-pytorch
\
--output_dir /work/home/hepj/torch/Conformer-main/out_dir
\
--epochs 1"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
1
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
2
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/Conformer-main/2node-run-comformer/run_conformer_4dcus.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
HOME_PATH
=
/work/home/hepj
WORK_PATH
=
${
HOME_PATH
}
/torch/Conformer-main/2node-run
source
~/env22.10.sh
which python3
#export NCCL_GRAPH_DUMP_FILE=graph.xml
#export NCCL_GRAPH_FILE=test.xml
#export NCCL_NET_GDR_LEVEL=5
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
#export NCCL_DEBUG=INFO
#export HSA_USERPTR_FOR_PAGED_MEM=0
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/single_process.sh $dist_url
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/single_process_ddp.sh $dist_url
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/2nodes_single_process.sh $dist_url
#hipprof mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/hipprof_single.sh $dist_url
#hipprof mpirun -np 4 --hostfile hostfile-18261131 hipprof_single.sh j17r3n01
#mpirun -np 1 --hostfile hostfile-$SLURM_JOB_ID ${WORK_PATH}/2nodes_single_process.sh $dist_url
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/2nodes_single_process.sh
$dist_url
#mpirun -np ${num_dcu} --hostfile hostfile-$SLURM_JOB_ID hipprof ${WORK_PATH}/2nodes_single_process.sh $dist_url
PyTorch/NLP/Conformer-main/README.md
View file @
ac26d1fb
...
@@ -39,7 +39,7 @@ import collections.abc as container_abcs
...
@@ -39,7 +39,7 @@ import collections.abc as container_abcs
/public/software/apps/DeepLearning/Data/ImageNet-pytorch
/public/software/apps/DeepLearning/Data/ImageNet-pytorch
##
#
单卡
## 单卡
```
```
#启动
#启动
...
@@ -59,3 +59,10 @@ sh脚本中--nnodes 为机器数 ,--nproc_per_node每个机器显卡数目,
...
@@ -59,3 +59,10 @@ sh脚本中--nnodes 为机器数 ,--nproc_per_node每个机器显卡数目,
./run4.sh
./run4.sh
```
```
## 多机多卡
```
cd 2node-run-comformer
sbatch run_conformer_4dcus.sh (按照自己情况对#SBATCH -p、#SBATCH -J进行修改,运行结果保存在相应的slurm文件中)
```
PyTorch/NLP/Vision_Transformer/2node-run-vit/run-vit-finetune.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
WORK_PATH
=
/work/home/hepj/torch/Vision_Transformer/2node-run
source
~/env22.10.sh
which python3
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
# export NCCL_DEBUG=INFO
# export HSA_USERPTR_FOR_PAGED_MEM=0
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/single_finetune-4.sh
$dist_url
PyTorch/NLP/Vision_Transformer/2node-run-vit/run-vit-pre.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
WORK_PATH
=
/work/home/hepj/torch/mae-main/2node-run
source
~/env22.04.2.sh
which python3
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
export
NCCL_DEBUG
=
INFO
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/single_pre-4.sh
$dist_url
PyTorch/NLP/Vision_Transformer/2node-run-vit/single_finetune-4.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
source
~/env22.10.sh
export
PRETRAIN_CHKPT
=
/work/home/hepj/model/VIT/mae_pretrain_vit_base.pth
#mae_finetuned_vit_base.pth
export
IMAGENET_DIR
=
/public/DL_DATA/ImageNet-pytorch
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
APP
=
"python /work/home/hepj/torch/mae-main/main_finetune.py
\
--batch_size 32
\
--dist_on_itp
\
--dist_url tcp://
${
1
}
:34567
\
--local_rank
${
comm_rank
}
\
--model vit_base_patch16
\
--finetune
${
PRETRAIN_CHKPT
}
\
--epochs 1
\
--blr 5e-4 --layer_decay 0.65 --weight_decay 0.05
\
--drop_path 0.1 --mixup 0.8 --cutmix 1.0 --reprob 0.25 --dist_eval
\
--data_path
${
IMAGENET_DIR
}
\
"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
1
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
2
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/Vision_Transformer/2node-run-vit/single_pre-4.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
pyenv activate torch-dtk22.04.2
source
~/env22.04.2.sh
export
PRETRAIN_CHKPT
=
/work/home/hepj/model/VIT/mae_pretrain_vit_base.pth
#mae_finetuned_vit_base.pth
export
IMAGENET_DIR
=
/public/DL_DATA/ImageNet-pytorch
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
APP
=
"python /work/home/hepj/torch/mae-main/main_pretrain.py
\
--epochs 1
\
--dist_on_itp
\
--dist_url tcp://
${
1
}
:34567
\
--local_rank
${
comm_rank
}
\
--model mae_vit_base_patch16
\
--batch_size 64
\
--model mae_vit_base_patch16
\
--data_path
${
IMAGENET_DIR
}
"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
1
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
2
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
NCCL_SOCKET_IFNAME
=
eno1 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/Vision_Transformer/README.md
View file @
ac26d1fb
...
@@ -88,6 +88,13 @@ OMP_NUM_THREADS=1 python3 -m torch.distributed.launch --nproc_per_node=4 main_
...
@@ -88,6 +88,13 @@ OMP_NUM_THREADS=1 python3 -m torch.distributed.launch --nproc_per_node=4 main_
```
```
## 多机多卡
```
cd 2node-run-vit
sbatch run-vit-pre.sh (按照自己情况对#SBATCH -p、#SBATCH -J 进行修改;运行结果保存在相应的slurm文件中)
```
# 微调任务
# 微调任务
...
@@ -135,6 +142,13 @@ OMP_NUM_THREADS=1 python3 -m torch.distributed.launch --nproc_per_node=4 main_fi
...
@@ -135,6 +142,13 @@ OMP_NUM_THREADS=1 python3 -m torch.distributed.launch --nproc_per_node=4 main_fi
--dist_eval --data_path ${IMAGENET_DIR}
--dist_eval --data_path ${IMAGENET_DIR}
```
```
## 多机多卡
```
cd 2node-run-vit
sbatch run-vit-finetune.sh (按照自己情况对#SBATCH -p、#SBATCH -J 进行修改;运行结果保存在相应的slurm文件中)
```
# 结果验证
# 结果验证
验证使用的模型为mae_finetuned_vit_xxx.pth,下载地址:
验证使用的模型为mae_finetuned_vit_xxx.pth,下载地址:
...
...
PyTorch/NLP/new-Transformer/2node-run/2nodes_single_process.sh
0 → 100644
View file @
ac26d1fb
#!/bin/bash
export
MIOPEN_DEBUG_DISABLE_FIND_DB
=
1
export
NCCL_SOCKET_IFNAME
=
eno1
export
HSA_USERPTR_FOR_PAGED_MEM
=
0
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
MIOPEN_FIND_MODE
=
1
lrank
=
$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank
=
$OMPI_COMM_WORLD_RANK
comm_size
=
$OMPI_COMM_WORLD_SIZE
#TOKENS=4096
TOKENS
=
2560
export
DATA_PATH
=
~/data/wmt14_en_de_joined_dict
APP
=
"python3 /work/home/hepj/torch/TransFormer/train.py
$DATA_PATH
--save-dir 2node-outdir --arch transformer_wmt_en_de --share-decoder-input-output-embed --optimizer adam --adam-betas (0.9,0.98) --clip-norm 0.0 --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 --dropout 0.3 --weight-decay 0.0001 --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --max-tokens
$TOKENS
--eval-bleu --eval-bleu-args {
\"
beam
\"
:5,
\"
max_len_a
\"
:1.2,
\"
max_len_b
\"
:10} --eval-bleu-detok moses --eval-bleu-remove-bpe --eval-bleu-print-samples --best-checkpoint-metric bleu --maximize-best-checkpoint-metric --distributed-rank
${
comm_rank
}
--distributed-world-size
${
comm_size
}
--device-id
${
lrank
}
--local_rank
${
lrank
}
--distributed-init-method tcp://
${
1
}
:34567 --distributed-no-spawn --max-epoch 1"
case
${
lrank
}
in
[
0]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_0:1
export
UCX_IB_PCI_BW
=
mlx5_0:50Gbs
echo
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
0
--membind
=
0
${
APP
}
;;
[
1]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_1:1
export
UCX_IB_PCI_BW
=
mlx5_1:50Gbs
echo
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
1
--membind
=
1
${
APP
}
;;
[
2]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_2:1
export
UCX_IB_PCI_BW
=
mlx5_2:50Gbs
echo
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
2
--membind
=
2
${
APP
}
;;
[
3]
)
export
HIP_VISIBLE_DEVICES
=
0,1,2,3
export
UCX_NET_DEVICES
=
mlx5_3:1
export
UCX_IB_PCI_BW
=
mlx5_3:50Gbs
echo
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
NCCL_SOCKET_IFNAME
=
ib0 numactl
--cpunodebind
=
3
--membind
=
3
${
APP
}
;;
esac
PyTorch/NLP/new-Transformer/2node-run/run_transformer_4dcus.sh
0 → 100644
View file @
ac26d1fb
#!/usr/bin/env bash
#SBATCH -J 2node-test
#SBATCH -p wzhdtest
#SBATCH -N 2
#SBARCH -n 32
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
set
-x
HOME_PATH
=
/work/home/hepj
WORK_PATH
=
${
HOME_PATH
}
/torch/TransFormer/2node-run
source
~/env22.10.sh
which python3
#export NCCL_GRAPH_DUMP_FILE=graph.xml
#export NCCL_GRAPH_FILE=test.xml
#export NCCL_NET_GDR_LEVEL=5
hostfile
=
./
$SLURM_JOB_ID
scontrol show hostnames
$SLURM_JOB_NODELIST
>
${
hostfile
}
for
i
in
`
cat
$hostfile
`
do
echo
${
i
}
slots
=
4
>>
`
pwd
`
/hostfile-
$SLURM_JOB_ID
((
num_node
=
${
num_node
}
+1
))
done
num_dcu
=
$((${
num_node
}
*
4
))
echo
$num_dcu
nodename
=
$(
cat
$hostfile
|sed
-n
"1p"
)
echo
$nodename
dist_url
=
`
echo
$nodename
|
awk
'{print $1}'
`
#export NCCL_DEBUG=INFO
#export HSA_USERPTR_FOR_PAGED_MEM=0
mpirun
-np
${
num_dcu
}
--hostfile
hostfile-
$SLURM_JOB_ID
${
WORK_PATH
}
/2nodes_single_process.sh
$dist_url
PyTorch/NLP/new-Transformer/README.md
View file @
ac26d1fb
...
@@ -390,9 +390,21 @@ sbatch fp16_ run_transformer_4dcus.sh
...
@@ -390,9 +390,21 @@ sbatch fp16_ run_transformer_4dcus.sh
-
通过--arch 设置要测试的网络,eg:transformer_wmt_en_de 等;
-
通过--arch 设置要测试的网络,eg:transformer_wmt_en_de 等;
-
上述 run_transformer_4dcus.sh中mpirun 运行命令表示使用4张DCU加速卡训练。
-
上述 run_transformer_4dcus.sh中mpirun 运行命令表示使用4张DCU加速卡训练。
#### 3.5.
部分问题说明
#### 3.5.
多机多卡
##### 3.5.1. format错误
```
cd 2node-run
#fp32
sbatch run_transformer_4dcus.sh (按照自己情况对#SBATCH -p、#SBATCH -J进行修改,运行结果保存在相应的slurm文件中)
#fp16
sbatch run_transformer_4dcus_fp16.sh (按照自己情况对#SBATCH -p、#SBATCH -J进行修改,运行结果保存在相应的slurm文件中)
```
#### 3.6. 部分问题说明
##### 3.6.1. format错误
报错信息如下:
报错信息如下:
...
@@ -414,7 +426,7 @@ self._verbose += f"ref_len = {slef.ref_len:.0f}"
...
@@ -414,7 +426,7 @@ self._verbose += f"ref_len = {slef.ref_len:.0f}"
##### 3.
5
.2 json格式解析错误
##### 3.
6
.2 json格式解析错误
报错信息如下:
报错信息如下:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment