update README.md and delete train.sh

a1c98d9e · shantf · 63098752 · a1c98d9e · 63098752 · 63098752
Commit a1c98d9e authored Sep 28, 2024 by shantf
5 changed files
--- a/README.md
+++ b/README.md
@@ -22,8 +22,8 @@ H-DETR引入一对多匹配分支，将原始的一对一匹配分支与一个
 -v 路径、docker_name和imageID根据实际情况修改

 ```bash
-docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04.1-py38-latest
-docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=80G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
+docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=80G --privileged=true --device=/dev/kfd --device=/dev/dri/ -v /opt/hyhal:/opt/hyhal:ro --group-add video --name docker_name imageID bash

 cd /your_code_path/hdetr_pytorch
 pip install -r requirements.txt
@@ -39,7 +39,7 @@ pip install mmdet==2.26.0 （对应mmcv 1.7.1）
 cd ./docker

 docker build --no-cache -t hdetr:latest .
-docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=80G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
+docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=80G --privileged=true --device=/dev/kfd --device=/dev/dri/ -v /opt/hyhal:/opt/hyhal:ro --group-add video --name docker_name imageID bash

 cd /your_code_path/hdetr_pytorch
 pip install -r requirements.txt
@@ -53,9 +53,9 @@ pip install mmdet==2.26.0 （对应mmcv 1.7.1）
 关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
 ```bash
 DTK软件栈: dtk23.04.1
-python: python3.8
-torch: 1.13.1
-torchvision: 0.14.1
+python: python3.10
+torch: 2.1.0
+torchvision: 0.16.0
 ```
 `Tips:以上dtk软件栈、python、torch等DCU相关工具版本需要严格一一对应`

@@ -116,13 +116,9 @@ cd ../../
 + 如果有预训练模型，修改config中的`--pretrained_backbone_path`为保存的预训练模型地址，预训练模型可从[#预训练权重](#预训练权重)中；
 + 当前训练默认backbone为`R50`，如果使用backbone为`swin`，可前往[Swin-Transformer](https://github.com/microsoft/Swin-Transformer)择对应的预训练模型后再进行训练步骤；
 + 如果out_of_memory，设置`--batch_size`大小，当前默认为2。
+4. 如果想要使用单卡训练，将train_multi.sh的多卡设置为1，启动单卡即可。

-### 单机单卡
-```bash
-bash ./train.sh
-```
-
-### 单机多卡
+### 单机单卡/多卡
 ```bash
 bash ./train_multi.sh
 ```

--- a/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/decay0.05_drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
+++ b/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/decay0.05_drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
-#!/usr/bin/env bash
-
-set -x
-
-EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
-PY_ARGS=${@:1}
-
-python -u main.py \
-    --output_dir ${EXP_DIR} \
-    --with_box_refine \
-    --two_stage \
-    --dim_feedforward 2048 \
-    --epochs 36 \
-    --lr_drop 30 \
-    --num_queries_one2one 900 \
-    --num_queries_one2many 1500 \
-    --k_one2many 6 \
-    --lambda_one2many 1.0 \
-    --dropout 0.0 \
-    --mixed_selection \
-    --look_forward_twice \
-    --backbone swin_large \
-    --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
-    --drop_path_rate 0.5 \
-    --weight_decay 0.05 \
-    ${PY_ARGS}
--- a/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
+++ b/configs/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
-#!/usr/bin/env bash
-
-set -x
-
-EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
-PY_ARGS=${@:1}
-
-python -u main.py \
-    --output_dir ${EXP_DIR} \
-    --with_box_refine \
-    --two_stage \
-    --dim_feedforward 2048 \
-    --epochs 36 \
-    --lr_drop 30 \
-    --num_queries_one2one 900 \
-    --num_queries_one2many 1500 \
-    --k_one2many 6 \
-    --lambda_one2many 1.0 \
-    --dropout 0.0 \
-    --mixed_selection \
-    --look_forward_twice \
-    --backbone swin_large \
-    --pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
-    --drop_path_rate 0.5 \
-    ${PY_ARGS}
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
-FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04.1-py38-latest
\ No newline at end of file
+FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
\ No newline at end of file
--- a/train.sh
+++ b/train.sh
-#!/usr/bin/env bash
-set -x
-
-export HIP_VISIBLE_DEVICES=1 # 自行修改为训练的卡号
-export USE_MIOPEN_BATCHNORM=1
-
-echo "Training start ..."
-# coco_path是训练数据集地址，数据是coco format
-sh <config path> --coco_path <coco path>
-# example
-# sh ./configs/two_stage/deformable-detr-hybrid-branch/12eps/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
-
-echo "Training finished."