Commit 8f8cf840 authored by Rayyyyy's avatar Rayyyyy
Browse files

update multi_noiides

parent 7f0600e4
...@@ -178,10 +178,10 @@ python -m pip install -e detectron2 ...@@ -178,10 +178,10 @@ python -m pip install -e detectron2
``` ```
## 训练 ## 训练
下载预训练模型 [MAE ViT-Large model ](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pth), 修改 `$Painter_ROOT/train_painter_vit_large.sh` 中finetune参数地址. 下载预训练模型 [MAE ViT-Large model ](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pth), 修改 `$Painter_ROOT/train.sh` 中finetune参数地址.
### 单机多卡 ### 单机多卡
本项目默认参数是单机4卡 (total_bsz = 1x4x32 = 128), 如需使用其他的卡数, 请修改 train_painter_vit_large.sh 中对应参数. 本项目默认参数是单机4卡 (total_bsz = 1x4x32 = 128), 如需使用其他的卡数, 请修改 train.sh 中对应参数.
```bash ```bash
bash train.sh bash train.sh
``` ```
......
...@@ -8,13 +8,13 @@ np=$(($np*4)) ...@@ -8,13 +8,13 @@ np=$(($np*4))
echo $np echo $np
nodename=$(cat $hostfile |sed -n "1p") nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'` export dist_url=`echo $nodename | awk '{print $1}'`
which mpirun which mpirun
# 添加pythonlib环境, 用户需修改为自己的环境变量地址 # 添加pythonlib环境, 用户需修改为自己的环境变量地址
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/path/of/conda/envs/{env_name}/lib export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/path/of/conda/envs/{env_name}/lib
export PYTHON=python3 export PYTHON=python3
export NPROC_PER_NODE=4
# -np 显卡数量 # -np 显卡数量
# -x 将变量传递到single_process.sh脚本中 # -x 将变量传递到single_process.sh脚本中
......
...@@ -16,7 +16,7 @@ export NCCL_CROSS_NIC=1 ...@@ -16,7 +16,7 @@ export NCCL_CROSS_NIC=1
export RCCL_NCHANNELS=4 export RCCL_NCHANNELS=4
export MASTER_ADDR=$dist_url export MASTER_ADDR=$dist_url
export MASTER_PORT=4321 export MASTER_PORT=4321
export RANK=$OMPI_COMM_WORLD_RANK export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
lrank=$OMPI_COMM_WORLD_LOCAL_RANK lrank=$OMPI_COMM_WORLD_LOCAL_RANK
......
...@@ -6,6 +6,8 @@ export USE_MIOPEN_BATCHNORM=1 ...@@ -6,6 +6,8 @@ export USE_MIOPEN_BATCHNORM=1
DATA_PATH=/home/datasets DATA_PATH=/home/datasets
name=painter_vit_large name=painter_vit_large
python -m torch.distributed.launch --nproc_per_node=4 \ python -m torch.distributed.launch --nproc_per_node=4 \
--nnodes=${WORLD_SIZE} --node_rank=$RANK \
--master_addr=$MASTER_ADDR --master_port=12358 \
--use_env main_train.py \ --use_env main_train.py \
--batch_size 2 \ --batch_size 2 \
--accum_iter 16 \ --accum_iter 16 \
......
#!/bin/bash
export HSA_FORCE_FINE_GRAIN_PCIE=1
export USE_MIOPEN_BATCHNORM=1
DATA_PATH=/home/datasets
name=painter_vit_large
python -m torch.distributed.launch --nproc_per_node=8 \
--nnodes=${WORLD_SIZE} --node_rank=$RANK \
--master_addr=$MASTER_ADDR --master_port=12358 \
--use_env main_train.py \
--batch_size 2 \
--accum_iter 16 \
--model painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1 \
--num_mask_patches 784 \
--max_mask_patches_per_block 392 \
--epochs 15 \
--warmup_epochs 1 \
--lr 1e-3 \
--clip_grad 3 \
--layer_decay 0.8 \
--drop_path 0.1 \
--input_size 896 448 \
--save_freq 1 \
--data_path $DATA_PATH/ \
--json_path \
$DATA_PATH/nyu_depth_v2/nyuv2_sync_image_depth.json \
$DATA_PATH/ade20k/ade20k_training_image_semantic.json \
$DATA_PATH/coco/pano_ca_inst/coco_train_image_panoptic_inst.json \
$DATA_PATH/coco/pano_sem_seg/coco_train2017_image_panoptic_sem_seg.json \
$DATA_PATH/coco_pose/coco_pose_256x192_train.json \
$DATA_PATH/denoise/denoise_ssid_train.json \
$DATA_PATH/derain/derain_train.json \
$DATA_PATH/light_enhance/enhance_lol_train.json \
--val_json_path \
$DATA_PATH/nyu_depth_v2/nyuv2_test_image_depth.json \
$DATA_PATH/ade20k/ade20k_validation_image_semantic.json \
$DATA_PATH/coco/pano_ca_inst/coco_val_image_panoptic_inst.json \
$DATA_PATH/coco/pano_sem_seg/coco_val2017_image_panoptic_sem_seg.json \
$DATA_PATH/coco_pose/coco_pose_256x192_val.json \
$DATA_PATH/denoise/denoise_ssid_val.json \
$DATA_PATH/derain/derain_test_rain100h.json \
$DATA_PATH/light_enhance/enhance_lol_val.json \
--output_dir models/$name \
--log_dir models/$name/logs \
--finetune path/to/mae_pretrain_vit_large.pth \
# --log_wandb \
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment