Commit 8f8cf840 authored by Rayyyyy's avatar Rayyyyy
Browse files

update multi_noiides

parent 7f0600e4
......@@ -178,10 +178,10 @@ python -m pip install -e detectron2
```
## 训练
下载预训练模型 [MAE ViT-Large model ](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pth), 修改 `$Painter_ROOT/train_painter_vit_large.sh` 中finetune参数地址.
下载预训练模型 [MAE ViT-Large model ](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pth), 修改 `$Painter_ROOT/train.sh` 中finetune参数地址.
### 单机多卡
本项目默认参数是单机4卡 (total_bsz = 1x4x32 = 128), 如需使用其他的卡数, 请修改 train_painter_vit_large.sh 中对应参数.
本项目默认参数是单机4卡 (total_bsz = 1x4x32 = 128), 如需使用其他的卡数, 请修改 train.sh 中对应参数.
```bash
bash train.sh
```
......
......@@ -8,13 +8,13 @@ np=$(($np*4))
echo $np
nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
export dist_url=`echo $nodename | awk '{print $1}'`
which mpirun
# 添加pythonlib环境, 用户需修改为自己的环境变量地址
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/path/of/conda/envs/{env_name}/lib
export PYTHON=python3
export NPROC_PER_NODE=4
# -np 显卡数量
# -x 将变量传递到single_process.sh脚本中
......
......@@ -16,7 +16,7 @@ export NCCL_CROSS_NIC=1
export RCCL_NCHANNELS=4
export MASTER_ADDR=$dist_url
export MASTER_PORT=4321
export RANK=$OMPI_COMM_WORLD_RANK
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
......
......@@ -6,6 +6,8 @@ export USE_MIOPEN_BATCHNORM=1
DATA_PATH=/home/datasets
name=painter_vit_large
python -m torch.distributed.launch --nproc_per_node=4 \
--nnodes=${WORLD_SIZE} --node_rank=$RANK \
--master_addr=$MASTER_ADDR --master_port=12358 \
--use_env main_train.py \
--batch_size 2 \
--accum_iter 16 \
......
#!/bin/bash
export HSA_FORCE_FINE_GRAIN_PCIE=1
export USE_MIOPEN_BATCHNORM=1
DATA_PATH=/home/datasets
name=painter_vit_large
python -m torch.distributed.launch --nproc_per_node=8 \
--nnodes=${WORLD_SIZE} --node_rank=$RANK \
--master_addr=$MASTER_ADDR --master_port=12358 \
--use_env main_train.py \
--batch_size 2 \
--accum_iter 16 \
--model painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1 \
--num_mask_patches 784 \
--max_mask_patches_per_block 392 \
--epochs 15 \
--warmup_epochs 1 \
--lr 1e-3 \
--clip_grad 3 \
--layer_decay 0.8 \
--drop_path 0.1 \
--input_size 896 448 \
--save_freq 1 \
--data_path $DATA_PATH/ \
--json_path \
$DATA_PATH/nyu_depth_v2/nyuv2_sync_image_depth.json \
$DATA_PATH/ade20k/ade20k_training_image_semantic.json \
$DATA_PATH/coco/pano_ca_inst/coco_train_image_panoptic_inst.json \
$DATA_PATH/coco/pano_sem_seg/coco_train2017_image_panoptic_sem_seg.json \
$DATA_PATH/coco_pose/coco_pose_256x192_train.json \
$DATA_PATH/denoise/denoise_ssid_train.json \
$DATA_PATH/derain/derain_train.json \
$DATA_PATH/light_enhance/enhance_lol_train.json \
--val_json_path \
$DATA_PATH/nyu_depth_v2/nyuv2_test_image_depth.json \
$DATA_PATH/ade20k/ade20k_validation_image_semantic.json \
$DATA_PATH/coco/pano_ca_inst/coco_val_image_panoptic_inst.json \
$DATA_PATH/coco/pano_sem_seg/coco_val2017_image_panoptic_sem_seg.json \
$DATA_PATH/coco_pose/coco_pose_256x192_val.json \
$DATA_PATH/denoise/denoise_ssid_val.json \
$DATA_PATH/derain/derain_test_rain100h.json \
$DATA_PATH/light_enhance/enhance_lol_val.json \
--output_dir models/$name \
--log_dir models/$name/logs \
--finetune path/to/mae_pretrain_vit_large.pth \
# --log_wandb \
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment