Commit 0a3a611b authored by liangjj's avatar liangjj
Browse files

update

parent 557ae9c4
...@@ -5,29 +5,38 @@ ...@@ -5,29 +5,38 @@
# 运行 # 运行
## 单卡 ## 单卡
export PYTHONPATH=/path/to/tensorflow/model:$PYTHONPATH export PYTHONPATH=/path/to/tensorflow/model:$PYTHONPATH
export HIP_VISIBLE_DEVICES=0 export HIP_VISIBLE_DEVICES=0
#without xla
python3 models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=128 --num_gpus=1 --use_synthetic_data=false python3 models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=128 --num_gpus=1 --use_synthetic_data=false
#with xla
TF_XLA_FLAGS="--tf_xla_auto_jit=2" python3 models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=128 --num_gpus=1 --use_synthetic_data=false
## 单机多卡 ## 单机多卡
export PYTHONPATH=/path/to/tensorflow/model:$PYTHONPATH export PYTHONPATH=/path/to/tensorflow/model:$PYTHONPATH
export HIP_VISIBLE_DEVICES=0,1,2,3 export HIP_VISIBLE_DEVICES=0,1,2,3
#without xla
python3 models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=512 --num_gpus=4 --use_synthetic_data=false python3 models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=512 --num_gpus=4 --use_synthetic_data=false
#with xla
TF_XLA_FLAGS="--tf_xla_auto_jit=2" python3 models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=512 --num_gpus=4 --use_synthetic_data=false
## 分布式多卡 ## 分布式多卡
# sed指令只需要执行一次,添加支持多卡运行的代码 # sed指令只需要执行一次,添加支持多卡运行的代码
sed -i '101 r configfile' models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py sed -i '101 r configfile' models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py
export PYTHONPATH=/path/to/tensorflow/model:$PYTHONPATH export PYTHONPATH=/path/to/tensorflow/model:$PYTHONPATH
#without xla
mpirun -np ${num_gpu} --hostfile hostfile -mca btl self,tcp --bind-to none scripts-run/single_process.sh mpirun -np ${num_gpu} --hostfile hostfile -mca btl self,tcp --bind-to none scripts-run/single_process.sh
#with xla
mpirun -np ${num_gpu} --hostfile hostfile -mca btl self,tcp --bind-to none scripts-run/single_process_xla.sh
### 测试说明 ### 测试说明
多卡测试时需要修改部分代码,具体可参考https://tensorflow.google.cn/guide/migrate/multi_worker_cpu_gpu_training?hl=en 多卡测试时需要修改部分代码,具体可参考https://tensorflow.google.cn/guide/migrate/multi_worker_cpu_gpu_training?hl=en
hostfile格式参考: hostfile格式参考:
node1 slots=4 node1 slots=4
......
#!/bin/bash #!/bin/bash
lrank=$OMPI_COMM_WORLD_LOCAL_RANK lrank=$OMPI_COMM_WORLD_LOCAL_RANK
d_rank=$OMPI_COMM_WORLD_RANK drank=$OMPI_COMM_WORLD_RANK
APP="python3 ../models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --num_gpus=1 --skip_eval=true --batch_size=512 --train_epochs=90 --use_synthetic_data=false --distribution_strategy=multi_worker_mirrored --all_reduce_alg=nccl --dtype=fp32 --data_dir=${data_dir} --task_index=${drank} " APP="python3 ./models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --num_gpus=1 --skip_eval=true --batch_size=512 --train_epochs=90 --use_synthetic_data=false --distribution_strategy=multi_worker_mirrored --all_reduce_alg=nccl --dtype=fp32 --data_dir=${data_dir} --task_index=${drank} "
case ${lrank} in case ${lrank} in
[0]) [0])
export HIP_VISIBLE_DEVICES=0 export HIP_VISIBLE_DEVICES=0
......
#!/bin/bash
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
drank=$OMPI_COMM_WORLD_RANK
APP="python3 ./models-master/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --num_gpus=1 --skip_eval=true --batch_size=512 --train_epochs=90 --use_synthetic_data=false --distribution_strategy=multi_worker_mirrored --all_reduce_alg=nccl --dtype=fp32 --data_dir=${data_dir} --task_index=${drank}"
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
TF_XLA_FLAGS="--tf_xla_auto_jit=2" numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=1
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
TF_XLA_FLAGS="--tf_xla_auto_jit=2" numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=2
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
TF_XLA_FLAGS="--tf_xla_auto_jit=2" numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=3
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
TF_XLA_FLAGS="--tf_xla_auto_jit=2" numactl --cpunodebind=3 --membind=3 ${APP}
;;
esac
...@@ -104,18 +104,32 @@ https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoi ...@@ -104,18 +104,32 @@ https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoi
## 单卡训练 ## 单卡训练
``` ```
#without_xla
export HIP_VISIBLE_DEVICES=0 export HIP_VISIBLE_DEVICES=0
python3 scripts/benchmark_training.py --gpus 1 --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights python3 scripts/benchmark_training.py --gpus 1 --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
#with xla
export HIP_VISIBLE_DEVICES=0
python3 scripts/benchmark_training_xla.py --gpus 1 --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
``` ```
## 多卡训练 ## 多卡训练
``` ```
#without xla
export HIP_VISIBLE_DEVICES=0,1 export HIP_VISIBLE_DEVICES=0,1
python3 scripts/benchmark_training.py --gpus 2 --batch_size 4 --model_dir save_model_2dcu --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights python3 scripts/benchmark_training.py --gpus 2 --batch_size 4 --model_dir save_model_2dcu --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
#with xla
export HIP_VISIBLE_DEVICES=0,1
python3 scripts/benchmark_training_xla.py --gpus 2 --batch_size 4 --model_dir save_model_2dcu --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
``` ```
## 推理 ## 推理
``` ```
#without xla
python3 scripts/benchmark_inference.py --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights python3 scripts/benchmark_inference.py --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
#with xla
python3 scripts/benchmark_inference_xla.py --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
``` ```
# 参考资料 # 参考资料
......
...@@ -201,11 +201,11 @@ class InputReader(object): ...@@ -201,11 +201,11 @@ class InputReader(object):
data_options.experimental_optimization.map_fusion = True data_options.experimental_optimization.map_fusion = True
data_options.experimental_optimization.map_parallelization = True data_options.experimental_optimization.map_parallelization = True
map_vectorization_options = tf.data.experimental.MapVectorizationOptions() #map_vectorization_options = tf.data.experimental.MapVectorizationOptions()
map_vectorization_options.enabled = True #map_vectorization_options.enabled = True
map_vectorization_options.use_choose_fastest = True #map_vectorization_options.use_choose_fastest = True
data_options.experimental_optimization.map_vectorization = map_vectorization_options #data_options.experimental_optimization.map_vectorization = map_vectorization_options
data_options.experimental_optimization.noop_elimination = True data_options.experimental_optimization.noop_elimination = True
data_options.experimental_optimization.parallel_batch = True data_options.experimental_optimization.parallel_batch = True
......
...@@ -43,7 +43,6 @@ def main(): ...@@ -43,7 +43,6 @@ def main():
f' --eval_samples 1200' f' --eval_samples 1200'
f' --use_batched_nms' f' --use_batched_nms'
f' --nouse_custom_box_proposals_op' f' --nouse_custom_box_proposals_op'
f' --xla'
f' --eval_batch_size {flags.batch_size}' f' --eval_batch_size {flags.batch_size}'
f' {"--amp" if flags.amp else ""}' f' {"--amp" if flags.amp else ""}'
) )
......
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Scripts that simplifies running evaluation benchmark """
import argparse
import os
import shutil
import subprocess
def main():
# CLI flags
parser = argparse.ArgumentParser(description="MaskRCNN evaluation benchmark")
parser.add_argument('--batch_size', type=int, required=True)
parser.add_argument('--amp', action='store_true')
parser.add_argument('--data_dir', type=str, default='/data')
parser.add_argument('--model_dir', type=str, default='/tmp/model')
parser.add_argument('--weights_dir', type=str, default='/model')
flags = parser.parse_args()
main_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../mask_rcnn_main.py'))
# build command
cmd = (
f'python {main_path}'
f' --mode eval'
f' --model_dir "{flags.model_dir}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
f' --validation_file_pattern "{os.path.join(flags.data_dir, "val*.tfrecord")}"'
f' --val_json_file "{os.path.join(flags.data_dir, "annotations/instances_val2017.json")}"'
f' --num_steps_per_eval 200'
f' --eval_samples 1200'
f' --use_batched_nms'
f' --nouse_custom_box_proposals_op'
f' --xla'
f' --eval_batch_size {flags.batch_size}'
f' {"--amp" if flags.amp else ""}'
)
# print command
line = '-' * shutil.get_terminal_size()[0]
print(line, cmd, line, sep='\n')
# run model
subprocess.call(cmd, shell=True)
if __name__ == '__main__':
main()
...@@ -45,7 +45,6 @@ def main(): ...@@ -45,7 +45,6 @@ def main():
f' --use_batched_nms' f' --use_batched_nms'
f' --noeval_after_training' f' --noeval_after_training'
f' --nouse_custom_box_proposals_op' f' --nouse_custom_box_proposals_op'
f' --xla'
f' --train_batch_size {flags.batch_size}' f' --train_batch_size {flags.batch_size}'
f' {"--amp" if flags.amp else ""}' f' {"--amp" if flags.amp else ""}'
) )
......
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Scripts that simplifies running training benchmark """
import argparse
import os
import shutil
import subprocess
def main():
# CLI flags
parser = argparse.ArgumentParser(description="MaskRCNN train benchmark")
parser.add_argument('--gpus', type=int, required=True)
parser.add_argument('--batch_size', type=int, required=True)
parser.add_argument('--amp', action='store_true')
parser.add_argument('--data_dir', type=str, default='/data')
parser.add_argument('--model_dir', type=str, default='/tmp/model')
parser.add_argument('--weights_dir', type=str, default='/model')
flags = parser.parse_args()
main_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../mask_rcnn_main.py'))
# build command
cmd = (
f'horovodrun -np {flags.gpus} '
f'python {main_path}'
f' --mode train'
f' --model_dir "{flags.model_dir}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
f' --training_file_pattern "{os.path.join(flags.data_dir, "train*.tfrecord")}"'
f' --init_learning_rate 0.04'
f' --total_steps 200'
f' --use_batched_nms'
f' --noeval_after_training'
f' --nouse_custom_box_proposals_op'
f' --xla'
f' --train_batch_size {flags.batch_size}'
f' {"--amp" if flags.amp else ""}'
)
# print command
line = '-' * shutil.get_terminal_size()[0]
print(line, cmd, line, sep='\n')
# run model
subprocess.call(cmd, shell=True)
if __name__ == '__main__':
main()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -49,8 +49,11 @@ export LD_LIBRARY_PATH=/public/software/compiler/rocm/dtk-21.10.1/roctracer/lib ...@@ -49,8 +49,11 @@ export LD_LIBRARY_PATH=/public/software/compiler/rocm/dtk-21.10.1/roctracer/lib
export HSA_FORCE_FINE_GRAIN_PCIE=1   export HSA_FORCE_FINE_GRAIN_PCIE=1  
export MIOPEN_FIND_MODE=3   export MIOPEN_FIND_MODE=3  
export HIP_VISIBLE_DEVICES=0   export HIP_VISIBLE_DEVICES=0  
cd /public/home/libodi/work1/ssd-tf2-master   cd /public/home/libodi/work1/ssd-tf2-master
#without xla
python3 train.py --dtype=fp32 python3 train.py --dtype=fp32
#with xla
TF_XLA_FLAGS="--tf_xla_auto_jit=2" python3 train.py --dtype=fp32
``` ```
使用ssd-tf2-master/voc_annotation.py自动生成训练集和验证集,其中训练集5717 张、验证集5823张。**具体为,修改voc_annotation.py里面的annotation_mode=2,运行voc_annotation.py生成根目录下的2007_train.txt和2007_val.txt。** 使用ssd-tf2-master/voc_annotation.py自动生成训练集和验证集,其中训练集5717 张、验证集5823张。**具体为,修改voc_annotation.py里面的annotation_mode=2,运行voc_annotation.py生成根目录下的2007_train.txt和2007_val.txt。**
......
...@@ -65,7 +65,7 @@ if __name__ == "__main__": ...@@ -65,7 +65,7 @@ if __name__ == "__main__":
# 网络一般不从0开始训练,至少会使用主干部分的权值,有些论文提到可以不用预训练,主要原因是他们 数据集较大 且 调参能力优秀。 # 网络一般不从0开始训练,至少会使用主干部分的权值,有些论文提到可以不用预训练,主要原因是他们 数据集较大 且 调参能力优秀。
# 如果一定要训练网络的主干部分,可以了解imagenet数据集,首先训练分类模型,分类模型的 主干部分 和该模型通用,基于此进行训练。 # 如果一定要训练网络的主干部分,可以了解imagenet数据集,首先训练分类模型,分类模型的 主干部分 和该模型通用,基于此进行训练。
#----------------------------------------------------------------------------------------------------------------------------# #----------------------------------------------------------------------------------------------------------------------------#
model_path = '' model_path = 'model_data/ssd_weights.h5'
#------------------------------------------------------# #------------------------------------------------------#
# 输入的shape大小 # 输入的shape大小
#------------------------------------------------------# #------------------------------------------------------#
...@@ -116,8 +116,8 @@ if __name__ == "__main__": ...@@ -116,8 +116,8 @@ if __name__ == "__main__":
#----------------------------------------------------# #----------------------------------------------------#
# 获得图片路径和标签 # 获得图片路径和标签
#----------------------------------------------------# #----------------------------------------------------#
train_annotation_path = '2012_train.txt' train_annotation_path = '2007_train.txt'
val_annotation_path = '2012_val.txt' val_annotation_path = '2007_val.txt'
#----------------------------------------------------# #----------------------------------------------------#
# 获取classes和anchor # 获取classes和anchor
......
...@@ -32,7 +32,7 @@ train_percent = 0.9 ...@@ -32,7 +32,7 @@ train_percent = 0.9
#-------------------------------------------------------# #-------------------------------------------------------#
VOCdevkit_path = 'VOCdevkit' VOCdevkit_path = 'VOCdevkit'
VOCdevkit_sets = [('2012', 'train'), ('2012', 'val')] VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')]
classes, _ = get_classes(classes_path) classes, _ = get_classes(classes_path)
def convert_annotation(year, image_id, list_file): def convert_annotation(year, image_id, list_file):
...@@ -56,8 +56,8 @@ if __name__ == "__main__": ...@@ -56,8 +56,8 @@ if __name__ == "__main__":
random.seed(0) random.seed(0)
if annotation_mode == 0 or annotation_mode == 1: if annotation_mode == 0 or annotation_mode == 1:
print("Generate txt in ImageSets.") print("Generate txt in ImageSets.")
xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2012/Annotations') xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations')
saveBasePath = os.path.join(VOCdevkit_path, 'VOC2012/ImageSets/Main') saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main')
temp_xml = os.listdir(xmlfilepath) temp_xml = os.listdir(xmlfilepath)
total_xml = [] total_xml = []
for xml in temp_xml: for xml in temp_xml:
...@@ -106,4 +106,4 @@ if __name__ == "__main__": ...@@ -106,4 +106,4 @@ if __name__ == "__main__":
convert_annotation(year, image_id, list_file) convert_annotation(year, image_id, list_file)
list_file.write('\n') list_file.write('\n')
list_file.close() list_file.close()
print("Generate 2012_train.txt and 2012_val.txt for train done.") print("Generate 2007_train.txt and 2007_val.txt for train done.")
...@@ -94,7 +94,7 @@ python tools/visualize_dataset.py --classes=./data/voc2012.names ...@@ -94,7 +94,7 @@ python tools/visualize_dataset.py --classes=./data/voc2012.names
``` ```
export HIP_VISIBLE_DEVICES=0 export HIP_VISIBLE_DEVICES=0
export PYTHONPATH=/public/home/zhenyi/miniconda3/envs/tf2.7.0-dtk21.10-build/bin/ export PYTHONPATH=/public/home/zhenyi/miniconda3/envs/tf2.7.0-dtk21.10-build/bin/
#without xla
python train.py \ python train.py \
--dataset ./data/voc2012_train.tfrecord \ --dataset ./data/voc2012_train.tfrecord \
--val_dataset ./data/voc2012_val.tfrecord \ --val_dataset ./data/voc2012_val.tfrecord \
...@@ -105,5 +105,16 @@ python train.py \ ...@@ -105,5 +105,16 @@ python train.py \
--epochs 10 \ --epochs 10 \
--weights ./checkpoints/yolov3.tf \ --weights ./checkpoints/yolov3.tf \
--weights_num_classes 80 --weights_num_classes 80
#with xla
TF_XLA_FLAGS="--tf_xla_auto_jit=2" python train.py \
--dataset ./data/voc2012_train.tfrecord \
--val_dataset ./data/voc2012_val.tfrecord \
--classes ./data/voc2012.names \
--num_classes 20 \
--mode fit --transfer darknet \
--batch_size 16 \
--epochs 10 \
--weights ./checkpoints/yolov3.tf \
--weights_num_classes 80
``` ```
...@@ -17,8 +17,16 @@ ...@@ -17,8 +17,16 @@
- `export MIOPEN_FIND_MODE=3` - `export MIOPEN_FIND_MODE=3`
## 测试运行 ## 测试运行
#whitout xla
` python3 main.py ` ` python3 main.py `
#with xla
TF_XLA_FLAGS="--tf_xla_auto_jit=2" python3 main.py
## 参数说明 ## 参数说明
main.py文件内: main.py文件内:
- trainGenerator的第一个参数为batch_size - trainGenerator的第一个参数为batch_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment