First commit

491d0cec · chenych · 491d0cec · 491d0cec · 491d0cec · 491d0cec
Commit 491d0cec authored Jan 11, 2024 by chenych
20 changed files
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00073.jpg
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00073.jpg
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00078.jpg
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00078.jpg
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00084.jpg
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00084.jpg
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00087.jpg
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00087.jpg
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00092.jpg
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/rgb_00092.jpg
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00004.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00004.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00007.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00007.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00059.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00059.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00065.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00065.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00070.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00070.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00073.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00073.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00078.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00078.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00084.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00084.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00087.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00087.png
--- a/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00092.png
+++ b/toy_datasets/nyu_depth_v2/sync/study_room_0005b/sync_depth_00092.png
--- a/train.sh
+++ b/train.sh
+#!/bin/bash
+export HIP_VISIBLE_DEVICES=0,1,2,3 # 自行修改为训练的卡号和数量
+export HSA_FORCE_FINE_GRAIN_PCIE=1
+export USE_MIOPEN_BATCHNORM=1
+
+DATA_PATH=/home/datasets
+name=painter_vit_large
+python -m torch.distributed.launch --nproc_per_node=4 \
+	--use_env main_train.py  \
+    --batch_size 2 \
+    --accum_iter 16  \
+    --model painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1 \
+    --num_mask_patches 784 \
+    --max_mask_patches_per_block 392 \
+    --epochs 15 \
+    --warmup_epochs 1 \
+    --lr 1e-3 \
+    --clip_grad 3 \
+    --layer_decay 0.8 \
+    --drop_path 0.1 \
+    --input_size 896 448 \
+    --save_freq 1 \
+    --data_path $DATA_PATH/ \
+    --json_path  \
+    $DATA_PATH/nyu_depth_v2/nyuv2_sync_image_depth.json \
+    $DATA_PATH/ade20k/ade20k_training_image_semantic.json \
+    $DATA_PATH/coco/pano_ca_inst/coco_train_image_panoptic_inst.json \
+    $DATA_PATH/coco/pano_sem_seg/coco_train2017_image_panoptic_sem_seg.json \
+    $DATA_PATH/coco_pose/coco_pose_256x192_train.json \
+    $DATA_PATH/denoise/denoise_ssid_train.json \
+    $DATA_PATH/derain/derain_train.json \
+    $DATA_PATH/light_enhance/enhance_lol_train.json \
+    --val_json_path \
+    $DATA_PATH/nyu_depth_v2/nyuv2_test_image_depth.json \
+    $DATA_PATH/ade20k/ade20k_validation_image_semantic.json \
+    $DATA_PATH/coco/pano_ca_inst/coco_val_image_panoptic_inst.json \
+    $DATA_PATH/coco/pano_sem_seg/coco_val2017_image_panoptic_sem_seg.json \
+    $DATA_PATH/coco_pose/coco_pose_256x192_val.json \
+    $DATA_PATH/denoise/denoise_ssid_val.json \
+    $DATA_PATH/derain/derain_test_rain100h.json \
+    $DATA_PATH/light_enhance/enhance_lol_val.json \
+    --output_dir models/$name \
+    --log_dir models/$name/logs \
+    --finetune path/to/mae_pretrain_vit_large.pth \
+    # --log_wandb \
+
--- a/train_painter_vit_large.sh
+++ b/train_painter_vit_large.sh
+#!/bin/bash
+export HSA_FORCE_FINE_GRAIN_PCIE=1
+export USE_MIOPEN_BATCHNORM=1
+
+DATA_PATH=/home/datasets
+name=painter_vit_large
+python -m torch.distributed.launch --nproc_per_node=8 \
+	--nnodes=${WORLD_SIZE} --node_rank=$RANK \
+	--master_addr=$MASTER_ADDR --master_port=12358 \
+	--use_env main_train.py  \
+    --batch_size 2 \
+    --accum_iter 16  \
+    --model painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1 \
+    --num_mask_patches 784 \
+    --max_mask_patches_per_block 392 \
+    --epochs 15 \
+    --warmup_epochs 1 \
+    --lr 1e-3 \
+    --clip_grad 3 \
+    --layer_decay 0.8 \
+    --drop_path 0.1 \
+    --input_size 896 448 \
+    --save_freq 1 \
+    --data_path $DATA_PATH/ \
+    --json_path  \
+    $DATA_PATH/nyu_depth_v2/nyuv2_sync_image_depth.json \
+    $DATA_PATH/ade20k/ade20k_training_image_semantic.json \
+    $DATA_PATH/coco/pano_ca_inst/coco_train_image_panoptic_inst.json \
+    $DATA_PATH/coco/pano_sem_seg/coco_train2017_image_panoptic_sem_seg.json \
+    $DATA_PATH/coco_pose/coco_pose_256x192_train.json \
+    $DATA_PATH/denoise/denoise_ssid_train.json \
+    $DATA_PATH/derain/derain_train.json \
+    $DATA_PATH/light_enhance/enhance_lol_train.json \
+    --val_json_path \
+    $DATA_PATH/nyu_depth_v2/nyuv2_test_image_depth.json \
+    $DATA_PATH/ade20k/ade20k_validation_image_semantic.json \
+    $DATA_PATH/coco/pano_ca_inst/coco_val_image_panoptic_inst.json \
+    $DATA_PATH/coco/pano_sem_seg/coco_val2017_image_panoptic_sem_seg.json \
+    $DATA_PATH/coco_pose/coco_pose_256x192_val.json \
+    $DATA_PATH/denoise/denoise_ssid_val.json \
+    $DATA_PATH/derain/derain_test_rain100h.json \
+    $DATA_PATH/light_enhance/enhance_lol_val.json \
+    --output_dir models/$name \
+    --log_dir models/$name/logs \
+    --finetune path/to/mae_pretrain_vit_large.pth \
+    # --log_wandb \
+
--- a/util/crop.py
+++ b/util/crop.py
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+
+from torchvision import transforms
+from torchvision.transforms import functional as F
+
+
+class RandomResizedCrop(transforms.RandomResizedCrop):
+    """
+    RandomResizedCrop for matching TF/TPU implementation: no for-loop is used.
+    This may lead to results different with torchvision's version.
+    Following BYOL's TF code:
+    https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206
+    """
+    @staticmethod
+    def get_params(img, scale, ratio):
+        width, height = F._get_image_size(img)
+        area = height * width
+
+        target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
+        log_ratio = torch.log(torch.tensor(ratio))
+        aspect_ratio = torch.exp(
+            torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
+        ).item()
+
+        w = int(round(math.sqrt(target_area * aspect_ratio)))
+        h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+        w = min(w, width)
+        h = min(h, height)
+
+        i = torch.randint(0, height - h + 1, size=(1,)).item()
+        j = torch.randint(0, width - w + 1, size=(1,)).item()
+
+        return i, j, h, w
\ No newline at end of file
--- a/util/datasets.py
+++ b/util/datasets.py
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+
+import os
+import PIL
+
+from torchvision import datasets, transforms
+
+from timm.data import create_transform
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+
+
+def build_dataset(is_train, args):
+    transform = build_transform(is_train, args)
+
+    root = os.path.join(args.data_path, 'train' if is_train else 'val')
+    dataset = datasets.ImageFolder(root, transform=transform)
+
+    print(dataset)
+
+    return dataset
+
+
+def build_transform(is_train, args):
+    mean = IMAGENET_DEFAULT_MEAN
+    std = IMAGENET_DEFAULT_STD
+    # train transform
+    if is_train:
+        # this should always dispatch to transforms_imagenet_train
+        transform = create_transform(
+            input_size=args.input_size,
+            is_training=True,
+            color_jitter=args.color_jitter,
+            auto_augment=args.aa,
+            interpolation='bicubic',
+            re_prob=args.reprob,
+            re_mode=args.remode,
+            re_count=args.recount,
+            mean=mean,
+            std=std,
+        )
+        return transform
+
+    # eval transform
+    t = []
+    if args.input_size <= 224:
+        crop_pct = 224 / 256
+    else:
+        crop_pct = 1.0
+    size = int(args.input_size / crop_pct)
+    t.append(
+        transforms.Resize(size, interpolation=PIL.Image.BICUBIC),  # to maintain same ratio w.r.t. 224 images
+    )
+    t.append(transforms.CenterCrop(args.input_size))
+
+    t.append(transforms.ToTensor())
+    t.append(transforms.Normalize(mean, std))
+    return transforms.Compose(t)
--- a/util/ddp_utils.py
+++ b/util/ddp_utils.py
+import os
+import glob
+from PIL import Image
+import numpy as np
+
+import torch
+from torch.utils.data import Dataset
+import torch.distributed as dist
+
+
+class DatasetTest(Dataset):
+    """
+    define dataset for ddp
+    """
+    def __init__(self, img_src_dir, input_size, ext_list=('*.png', '*.jpg'), ):
+        super(DatasetTest, self).__init__()
+        self.img_src_dir = img_src_dir
+        self.input_size = input_size
+
+        img_path_list = []
+        for ext in ext_list:
+            img_path_tmp = glob.glob(os.path.join(img_src_dir, ext))
+            img_path_list.extend(img_path_tmp)
+        self.img_path_list = img_path_list
+
+    def __len__(self):
+        return len(self.img_path_list)
+
+    def __getitem__(self, index):
+        img_path = self.img_path_list[index]
+        img = Image.open(img_path).convert("RGB")
+        size_org = img.size
+        img = img.resize((self.input_size, self.input_size))
+        img = np.array(img) / 255.
+
+        return img, img_path, size_org
+
+
+def collate_fn(batch):
+    return batch
+    # batch = list(zip(*batch))
+    # return tuple(batch)
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ and 'LOCAL_RANK' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+    elif 'SLURM_PROCID' in os.environ:
+        args.rank = int(os.environ['SLURM_PROCID'])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return args
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+
+    return args