Commit 3d92aebb authored by bailuo's avatar bailuo
Browse files

add preprocessing

parent fcc0bcf3
Pipeline #1379 canceled with stages
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Some parts are taken from https://github.com/Liusifei/UVC
"""
import os
import glob
import argparse
import numpy as np
from tqdm import tqdm
import cv2
import torch
import utils
import vision_transformer as vits
def extract_feature(model, frame, return_h_w=False):
"""Extract one frame feature everytime."""
out = model.get_intermediate_layers(frame.unsqueeze(0).cuda(), n=1)[0]
out = out[:, 1:, :] # we discard the [CLS] token
h, w = int(frame.shape[1] / model.patch_embed.patch_size), int(frame.shape[2] / model.patch_embed.patch_size)
dim = out.shape[-1]
out = out[0].reshape(h, w, dim)
out = out.reshape(-1, dim)
if return_h_w:
return out, h, w
return out
def read_frame(frame_dir, scale_size=[480]):
"""
read a single frame & preprocess
"""
img = cv2.imread(frame_dir)
ori_h, ori_w, _ = img.shape
if len(scale_size) == 1:
if (ori_h > ori_w):
tw = scale_size[0]
th = (tw * ori_h) / ori_w
th = int((th // 64) * 64)
else:
th = scale_size[0]
tw = (th * ori_w) / ori_h
tw = int((tw // 64) * 64)
else:
th, tw = scale_size
img = cv2.resize(img, (tw, th))
img = img.astype(np.float32)
img = img / 255.0
img = img[:, :, ::-1]
img = np.transpose(img.copy(), (2, 0, 1))
img = torch.from_numpy(img).float()
img = color_normalize(img)
return img, ori_h, ori_w
def color_normalize(x, mean=[0.485, 0.456, 0.406], std=[0.228, 0.224, 0.225]):
for t, m, s in zip(x, mean, std):
t.sub_(m)
t.div_(s)
return x
if __name__ == '__main__':
parser = argparse.ArgumentParser('Evaluation with video object segmentation on DAVIS 2017')
parser.add_argument('--pretrained_weights', default='.',
type=str, help="Path to pretrained weights to evaluate.")
parser.add_argument('--arch', default='vit_small', type=str,
choices=['vit_tiny', 'vit_small', 'vit_base'], help='Architecture (support only ViT atm).')
parser.add_argument('--patch_size', default=16, type=int, help='Patch resolution of the model.')
parser.add_argument("--checkpoint_key", default="teacher", type=str,
help='Key to use in the checkpoint (example: "teacher")')
parser.add_argument('--output_dir', default=".", help='Path where to save segmentations')
parser.add_argument('--data_path', default='/path/to/davis/', type=str)
parser.add_argument("--n_last_frames", type=int, default=7, help="number of preceeding frames")
parser.add_argument("--size_mask_neighborhood", default=12, type=int,
help="We restrict the set of source nodes considered to a spatial neighborhood of the query node")
parser.add_argument("--topk", type=int, default=5, help="accumulate label from top k neighbors")
parser.add_argument("--bs", type=int, default=6, help="Batch size, try to reduce if OOM")
parser.add_argument('--data_dir', type=str, default='', help='dataset dir')
args = parser.parse_args()
print("git:\n {}\n".format(utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
# building network
model = vits.__dict__[args.arch](patch_size=args.patch_size, num_classes=0)
print(f"Model {args.arch} {args.patch_size}x{args.patch_size} built.")
model.cuda()
utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
for param in model.parameters():
param.requires_grad = False
model.eval()
scene_dir = args.data_dir
frame_list = sorted(glob.glob(os.path.join(scene_dir, 'color', '*')))
save_dir = os.path.join(scene_dir, 'features', 'dino')
print('computing dino features for {}...'.format(scene_dir))
os.makedirs(save_dir, exist_ok=True)
for frame_path in tqdm(frame_list):
frame, ori_h, ori_w = read_frame(frame_path)
frame_feat, h, w = extract_feature(model, frame, return_h_w=True) # dim x h*w
frame_feat = frame_feat.reshape(h, w, -1)
frame_feat = frame_feat.cpu().numpy()
frame_name = os.path.basename(frame_path)
np.save(os.path.join(save_dir, frame_name + '.npy'), frame_feat)
print('computing dino features for {} is done \n'.format(scene_dir))
This diff is collapsed.
This diff is collapsed.
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A script to run multinode training with submitit.
Almost copy-paste from https://github.com/facebookresearch/deit/blob/main/run_with_submitit.py
"""
import argparse
import os
import uuid
from pathlib import Path
import main_dino
import submitit
def parse_args():
parser = argparse.ArgumentParser("Submitit for DINO", parents=[main_dino.get_args_parser()])
parser.add_argument("--ngpus", default=8, type=int, help="Number of gpus to request on each node")
parser.add_argument("--nodes", default=2, type=int, help="Number of nodes to request")
parser.add_argument("--timeout", default=2800, type=int, help="Duration of the job")
parser.add_argument("--partition", default="learnfair", type=str, help="Partition where to submit")
parser.add_argument("--use_volta32", action='store_true', help="Big models? Use this")
parser.add_argument('--comment', default="", type=str,
help='Comment to pass to scheduler, e.g. priority message')
return parser.parse_args()
def get_shared_folder() -> Path:
user = os.getenv("USER")
if Path("/checkpoint/").is_dir():
p = Path(f"/checkpoint/{user}/experiments")
p.mkdir(exist_ok=True)
return p
raise RuntimeError("No shared folder available")
def get_init_file():
# Init file must not exist, but it's parent dir must exist.
os.makedirs(str(get_shared_folder()), exist_ok=True)
init_file = get_shared_folder() / f"{uuid.uuid4().hex}_init"
if init_file.exists():
os.remove(str(init_file))
return init_file
class Trainer(object):
def __init__(self, args):
self.args = args
def __call__(self):
import main_dino
self._setup_gpu_args()
main_dino.train_dino(self.args)
def checkpoint(self):
import os
import submitit
self.args.dist_url = get_init_file().as_uri()
print("Requeuing ", self.args)
empty_trainer = type(self)(self.args)
return submitit.helpers.DelayedSubmission(empty_trainer)
def _setup_gpu_args(self):
import submitit
from pathlib import Path
job_env = submitit.JobEnvironment()
self.args.output_dir = Path(str(self.args.output_dir).replace("%j", str(job_env.job_id)))
self.args.gpu = job_env.local_rank
self.args.rank = job_env.global_rank
self.args.world_size = job_env.num_tasks
print(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
def main():
args = parse_args()
if args.output_dir == "":
args.output_dir = get_shared_folder() / "%j"
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
executor = submitit.AutoExecutor(folder=args.output_dir, slurm_max_num_timeout=30)
num_gpus_per_node = args.ngpus
nodes = args.nodes
timeout_min = args.timeout
partition = args.partition
kwargs = {}
if args.use_volta32:
kwargs['slurm_constraint'] = 'volta32gb'
if args.comment:
kwargs['slurm_comment'] = args.comment
executor.update_parameters(
mem_gb=40 * num_gpus_per_node,
gpus_per_node=num_gpus_per_node,
tasks_per_node=num_gpus_per_node, # one task per GPU
cpus_per_task=10,
nodes=nodes,
timeout_min=timeout_min, # max is 60 * 72
# Below are cluster dependent parameters
slurm_partition=partition,
slurm_signal_delay_s=120,
**kwargs
)
executor.update_parameters(name="dino")
args.dist_url = get_init_file().as_uri()
trainer = Trainer(args)
job = executor.submit(trainer)
print(f"Submitted job_id: {job.job_id}")
print(f"Logs and checkpoints will be saved at: {args.output_dir}")
if __name__ == "__main__":
main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment