add preprocessing

3d92aebb · bailuo · fcc0bcf3 · 3d92aebb · 3d92aebb · 3d92aebb
Commit 3d92aebb authored Jul 16, 2024 by bailuo
8 changed files
--- a/preprocessing/dino/extract_dino_features.py
+++ b/preprocessing/dino/extract_dino_features.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Some parts are taken from https://github.com/Liusifei/UVC
+"""
+import os
+import glob
+import argparse
+import numpy as np
+from tqdm import tqdm
+
+import cv2
+import torch
+
+import utils
+import vision_transformer as vits
+
+
+def extract_feature(model, frame, return_h_w=False):
+    """Extract one frame feature everytime."""
+    out = model.get_intermediate_layers(frame.unsqueeze(0).cuda(), n=1)[0]
+    out = out[:, 1:, :]  # we discard the [CLS] token
+    h, w = int(frame.shape[1] / model.patch_embed.patch_size), int(frame.shape[2] / model.patch_embed.patch_size)
+    dim = out.shape[-1]
+    out = out[0].reshape(h, w, dim)
+    out = out.reshape(-1, dim)
+    if return_h_w:
+        return out, h, w
+    return out
+
+
+def read_frame(frame_dir, scale_size=[480]):
+    """
+    read a single frame & preprocess
+    """
+    img = cv2.imread(frame_dir)
+    ori_h, ori_w, _ = img.shape
+    if len(scale_size) == 1:
+        if (ori_h > ori_w):
+            tw = scale_size[0]
+            th = (tw * ori_h) / ori_w
+            th = int((th // 64) * 64)
+        else:
+            th = scale_size[0]
+            tw = (th * ori_w) / ori_h
+            tw = int((tw // 64) * 64)
+    else:
+        th, tw = scale_size
+    img = cv2.resize(img, (tw, th))
+    img = img.astype(np.float32)
+    img = img / 255.0
+    img = img[:, :, ::-1]
+    img = np.transpose(img.copy(), (2, 0, 1))
+    img = torch.from_numpy(img).float()
+    img = color_normalize(img)
+    return img, ori_h, ori_w
+
+
+def color_normalize(x, mean=[0.485, 0.456, 0.406], std=[0.228, 0.224, 0.225]):
+    for t, m, s in zip(x, mean, std):
+        t.sub_(m)
+        t.div_(s)
+    return x
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('Evaluation with video object segmentation on DAVIS 2017')
+    parser.add_argument('--pretrained_weights', default='.',
+                        type=str, help="Path to pretrained weights to evaluate.")
+    parser.add_argument('--arch', default='vit_small', type=str,
+                        choices=['vit_tiny', 'vit_small', 'vit_base'], help='Architecture (support only ViT atm).')
+    parser.add_argument('--patch_size', default=16, type=int, help='Patch resolution of the model.')
+    parser.add_argument("--checkpoint_key", default="teacher", type=str,
+                        help='Key to use in the checkpoint (example: "teacher")')
+    parser.add_argument('--output_dir', default=".", help='Path where to save segmentations')
+    parser.add_argument('--data_path', default='/path/to/davis/', type=str)
+    parser.add_argument("--n_last_frames", type=int, default=7, help="number of preceeding frames")
+    parser.add_argument("--size_mask_neighborhood", default=12, type=int,
+                        help="We restrict the set of source nodes considered to a spatial neighborhood of the query node")
+    parser.add_argument("--topk", type=int, default=5, help="accumulate label from top k neighbors")
+    parser.add_argument("--bs", type=int, default=6, help="Batch size, try to reduce if OOM")
+    parser.add_argument('--data_dir', type=str, default='', help='dataset dir')
+    args = parser.parse_args()
+
+    print("git:\n  {}\n".format(utils.get_sha()))
+    print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
+
+    # building network
+    model = vits.__dict__[args.arch](patch_size=args.patch_size, num_classes=0)
+    print(f"Model {args.arch} {args.patch_size}x{args.patch_size} built.")
+    model.cuda()
+    utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
+    for param in model.parameters():
+        param.requires_grad = False
+    model.eval()
+
+    scene_dir = args.data_dir
+
+    frame_list = sorted(glob.glob(os.path.join(scene_dir, 'color', '*')))
+    save_dir = os.path.join(scene_dir, 'features', 'dino')
+
+    print('computing dino features for {}...'.format(scene_dir))
+    os.makedirs(save_dir, exist_ok=True)
+
+    for frame_path in tqdm(frame_list):
+        frame, ori_h, ori_w = read_frame(frame_path)
+        frame_feat, h, w = extract_feature(model, frame, return_h_w=True)  # dim x h*w
+        frame_feat = frame_feat.reshape(h, w, -1)
+        frame_feat = frame_feat.cpu().numpy()
+        frame_name = os.path.basename(frame_path)
+        np.save(os.path.join(save_dir, frame_name + '.npy'), frame_feat)
+
+    print('computing dino features for {} is done \n'.format(scene_dir))
--- a/preprocessing/dino/hubconf.py
+++ b/preprocessing/dino/hubconf.py
--- a/preprocessing/dino/main_dino.py
+++ b/preprocessing/dino/main_dino.py
--- a/preprocessing/dino/run_with_submitit.py
+++ b/preprocessing/dino/run_with_submitit.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A script to run multinode training with submitit.
+Almost copy-paste from https://github.com/facebookresearch/deit/blob/main/run_with_submitit.py
+"""
+import argparse
+import os
+import uuid
+from pathlib import Path
+
+import main_dino
+import submitit
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Submitit for DINO", parents=[main_dino.get_args_parser()])
+    parser.add_argument("--ngpus", default=8, type=int, help="Number of gpus to request on each node")
+    parser.add_argument("--nodes", default=2, type=int, help="Number of nodes to request")
+    parser.add_argument("--timeout", default=2800, type=int, help="Duration of the job")
+
+    parser.add_argument("--partition", default="learnfair", type=str, help="Partition where to submit")
+    parser.add_argument("--use_volta32", action='store_true', help="Big models? Use this")
+    parser.add_argument('--comment', default="", type=str,
+                        help='Comment to pass to scheduler, e.g. priority message')
+    return parser.parse_args()
+
+
+def get_shared_folder() -> Path:
+    user = os.getenv("USER")
+    if Path("/checkpoint/").is_dir():
+        p = Path(f"/checkpoint/{user}/experiments")
+        p.mkdir(exist_ok=True)
+        return p
+    raise RuntimeError("No shared folder available")
+
+
+def get_init_file():
+    # Init file must not exist, but it's parent dir must exist.
+    os.makedirs(str(get_shared_folder()), exist_ok=True)
+    init_file = get_shared_folder() / f"{uuid.uuid4().hex}_init"
+    if init_file.exists():
+        os.remove(str(init_file))
+    return init_file
+
+
+class Trainer(object):
+    def __init__(self, args):
+        self.args = args
+
+    def __call__(self):
+        import main_dino
+
+        self._setup_gpu_args()
+        main_dino.train_dino(self.args)
+
+    def checkpoint(self):
+        import os
+        import submitit
+
+        self.args.dist_url = get_init_file().as_uri()
+        print("Requeuing ", self.args)
+        empty_trainer = type(self)(self.args)
+        return submitit.helpers.DelayedSubmission(empty_trainer)
+
+    def _setup_gpu_args(self):
+        import submitit
+        from pathlib import Path
+
+        job_env = submitit.JobEnvironment()
+        self.args.output_dir = Path(str(self.args.output_dir).replace("%j", str(job_env.job_id)))
+        self.args.gpu = job_env.local_rank
+        self.args.rank = job_env.global_rank
+        self.args.world_size = job_env.num_tasks
+        print(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
+
+
+def main():
+    args = parse_args()
+    if args.output_dir == "":
+        args.output_dir = get_shared_folder() / "%j"
+    Path(args.output_dir).mkdir(parents=True, exist_ok=True)
+    executor = submitit.AutoExecutor(folder=args.output_dir, slurm_max_num_timeout=30)
+
+    num_gpus_per_node = args.ngpus
+    nodes = args.nodes
+    timeout_min = args.timeout
+
+    partition = args.partition
+    kwargs = {}
+    if args.use_volta32:
+        kwargs['slurm_constraint'] = 'volta32gb'
+    if args.comment:
+        kwargs['slurm_comment'] = args.comment
+
+    executor.update_parameters(
+        mem_gb=40 * num_gpus_per_node,
+        gpus_per_node=num_gpus_per_node,
+        tasks_per_node=num_gpus_per_node,  # one task per GPU
+        cpus_per_task=10,
+        nodes=nodes,
+        timeout_min=timeout_min,  # max is 60 * 72
+        # Below are cluster dependent parameters
+        slurm_partition=partition,
+        slurm_signal_delay_s=120,
+        **kwargs
+    )
+
+    executor.update_parameters(name="dino")
+
+    args.dist_url = get_init_file().as_uri()
+
+    trainer = Trainer(args)
+    job = executor.submit(trainer)
+
+    print(f"Submitted job_id: {job.job_id}")
+    print(f"Logs and checkpoints will be saved at: {args.output_dir}")
+
+
+if __name__ == "__main__":
+    main()
--- a/preprocessing/dino/utils.py
+++ b/preprocessing/dino/utils.py
--- a/preprocessing/dino/video_generation.py
+++ b/preprocessing/dino/video_generation.py
--- a/preprocessing/dino/vision_transformer.py
+++ b/preprocessing/dino/vision_transformer.py
--- a/preprocessing/dino/visualize_attention.py
+++ b/preprocessing/dino/visualize_attention.py