Commit 754fbc04 authored by bailuo's avatar bailuo
Browse files

init

parent 7aa1ab82
Pipeline #1374 canceled with stages
import numpy as np
from torch.utils.data import Dataset, Sampler, IterableDataset
from torch.utils.data import DistributedSampler, WeightedRandomSampler
import bisect
import warnings
from typing import (
Iterable,
List,
Optional,
TypeVar,
)
from operator import itemgetter
import torch
from .raft import RAFTExhaustiveDataset
T_co = TypeVar('T_co', covariant=True)
T = TypeVar('T')
dataset_dict = {
'flow': RAFTExhaustiveDataset,
}
class DatasetFromSampler(Dataset):
"""Dataset to create indexes from `Sampler`.
Args:
sampler: PyTorch sampler
"""
def __init__(self, sampler: Sampler):
"""Initialisation for DatasetFromSampler."""
self.sampler = sampler
self.sampler_list = None
def __getitem__(self, index: int):
"""Gets element of the dataset.
Args:
index: index of the element in the dataset
Returns:
Single element by index
"""
if self.sampler_list is None:
self.sampler_list = list(self.sampler)
return self.sampler_list[index]
def __len__(self) -> int:
"""
Returns:
int: length of the dataset
"""
return len(self.sampler)
class DistributedSamplerWrapper(DistributedSampler):
"""
Wrapper over `Sampler` for distributed training.
Allows you to use any sampler in distributed mode.
It is especially useful in conjunction with
`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSamplerWrapper instance as a DataLoader
sampler, and load a subset of subsampled data of the original dataset
that is exclusive to it.
.. note::
Sampler is assumed to be of constant size.
"""
def __init__(
self,
sampler,
num_replicas: Optional[int] = None,
rank: Optional[int] = None,
shuffle: bool = True,
):
"""
Args:
sampler: Sampler used for subsampling
num_replicas (int, optional): Number of processes participating in
distributed training
rank (int, optional): Rank of the current process
within ``num_replicas``
shuffle (bool, optional): If true (default),
sampler will shuffle the indices
"""
super(DistributedSamplerWrapper, self).__init__(
DatasetFromSampler(sampler),
num_replicas=num_replicas,
rank=rank,
shuffle=shuffle,
)
self.sampler = sampler
def __iter__(self):
self.dataset = DatasetFromSampler(self.sampler)
indexes_of_indexes = super().__iter__()
subsampler_indexes = self.dataset
return iter(itemgetter(*indexes_of_indexes)(subsampler_indexes))
class ConcatDataset(Dataset[T_co]):
r"""Dataset as a concatenation of multiple datasets.
This class is useful to assemble different existing datasets.
Args:
datasets (sequence): List of datasets to be concatenated
"""
datasets: List[Dataset[T_co]]
cumulative_sizes: List[int]
@staticmethod
def cumsum(sequence):
r, s = [], 0
for e in sequence:
l = len(e)
r.append(l + s)
s += l
return r
def __init__(self, datasets: Iterable[Dataset]) -> None:
super(ConcatDataset, self).__init__()
self.datasets = list(datasets)
assert len(self.datasets) > 0, 'datasets should not be an empty iterable'
for d in self.datasets:
assert not isinstance(d, IterableDataset), "ConcatDataset does not support IterableDataset"
self.cumulative_sizes = self.cumsum(self.datasets)
def increase_max_interval_by(self, increment):
for dataset in self.datasets:
curr_max_interval = dataset.max_interval.value
dataset.max_interval.value = min(curr_max_interval + increment, dataset.num_imgs - 1)
def set_max_interval(self, max_interval):
for dataset in self.datasets:
dataset.max_interval.value = min(max_interval, dataset.num_imgs - 1)
def __len__(self):
return self.cumulative_sizes[-1]
def __getitem__(self, idx):
if idx < 0:
if -idx > len(self):
raise ValueError("absolute value of index should not exceed dataset length")
idx = len(self) + idx
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
if dataset_idx == 0:
sample_idx = idx
else:
sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
return self.datasets[dataset_idx][sample_idx]
@property
def cummulative_sizes(self):
warnings.warn("cummulative_sizes attribute is renamed to "
"cumulative_sizes", DeprecationWarning, stacklevel=2)
return self.cumulative_sizes
def get_training_dataset(args, max_interval):
if '+' not in args.dataset_types:
train_dataset = dataset_dict[args.dataset_types](args, max_interval=max_interval)
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None
else:
dataset_types = args.dataset_types.split('+')
weights = args.dataset_weights
assert len(dataset_types) == len(weights)
assert np.abs(np.sum(weights) - 1.) < 1e-6
train_datasets = []
train_weights_samples = []
for dataset_type, weight in zip(dataset_types, weights):
train_dataset = dataset_dict[dataset_type](args, max_interval=max_interval)
train_datasets.append(train_dataset)
num_samples = len(train_dataset)
weight_each_sample = weight / num_samples
train_weights_samples.extend([weight_each_sample]*num_samples)
train_dataset = ConcatDataset(train_datasets)
train_weights = torch.from_numpy(np.array(train_weights_samples))
sampler = WeightedRandomSampler(train_weights, len(train_weights))
train_sampler = DistributedSamplerWrapper(sampler) if args.distributed else sampler
return train_dataset, train_sampler
import os
import glob
import json
import imageio
import numpy as np
import cv2
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
import multiprocessing as mp
from util import normalize_coords, gen_grid_np
def get_sample_weights(flow_stats):
sample_weights = {}
for k in flow_stats.keys():
sample_weights[k] = {}
total_num = np.array(list(flow_stats[k].values())).sum()
for j in flow_stats[k].keys():
sample_weights[k][j] = 1. * flow_stats[k][j] / total_num
return sample_weights
class RAFTExhaustiveDataset(Dataset):
def __init__(self, args, max_interval=None):
self.args = args
self.seq_dir = args.data_dir
self.seq_name = os.path.basename(self.seq_dir.rstrip('/'))
self.img_dir = os.path.join(self.seq_dir, 'color')
self.flow_dir = os.path.join(self.seq_dir, 'raft_exhaustive')
img_names = sorted(os.listdir(self.img_dir))
self.num_imgs = min(self.args.num_imgs, len(img_names))
self.img_names = img_names[:self.num_imgs]
h, w, _ = imageio.imread(os.path.join(self.img_dir, img_names[0])).shape
self.h, self.w = h, w
max_interval = self.num_imgs - 1 if not max_interval else max_interval
self.max_interval = mp.Value('i', max_interval)
self.num_pts = self.args.num_pts
self.grid = gen_grid_np(self.h, self.w)
flow_stats = json.load(open(os.path.join(self.seq_dir, 'flow_stats.json')))
self.sample_weights = get_sample_weights(flow_stats)
def __len__(self):
return self.num_imgs * 100000
def set_max_interval(self, max_interval):
self.max_interval.value = min(max_interval, self.num_imgs - 1)
def increase_max_interval_by(self, increment):
curr_max_interval = self.max_interval.value
self.max_interval.value = min(curr_max_interval + increment, self.num_imgs - 1)
def __getitem__(self, idx):
cached_flow_pred_dir = os.path.join('out', '{}_{}'.format(self.args.expname, self.seq_name), 'flow')
cached_flow_pred_files = sorted(glob.glob(os.path.join(cached_flow_pred_dir, '*')))
flow_error_file = os.path.join(os.path.dirname(cached_flow_pred_dir), 'flow_error.txt')
if os.path.exists(flow_error_file):
flow_error = np.loadtxt(flow_error_file)
id1_sample_weights = flow_error / np.sum(flow_error)
id1 = np.random.choice(self.num_imgs, p=id1_sample_weights)
else:
id1 = idx % self.num_imgs
img_name1 = self.img_names[id1]
max_interval = min(self.max_interval.value, self.num_imgs - 1)
img2_candidates = sorted(list(self.sample_weights[img_name1].keys()))
img2_candidates = img2_candidates[max(id1 - max_interval, 0):min(id1 + max_interval, self.num_imgs - 1)]
# sample more often from i-1 and i+1
id2s = np.array([self.img_names.index(n) for n in img2_candidates])
sample_weights = np.array([self.sample_weights[img_name1][i] for i in img2_candidates])
sample_weights /= np.sum(sample_weights)
sample_weights[np.abs(id2s - id1) <= 1] = 0.5
sample_weights /= np.sum(sample_weights)
img_name2 = np.random.choice(img2_candidates, p=sample_weights)
id2 = self.img_names.index(img_name2)
frame_interval = abs(id1 - id2)
# read image, flow and confidence
img1 = imageio.imread(os.path.join(self.img_dir, img_name1)) / 255.
img2 = imageio.imread(os.path.join(self.img_dir, img_name2)) / 255.
flow_file = os.path.join(self.flow_dir, '{}_{}.npy'.format(img_name1, img_name2))
flow = np.load(flow_file)
mask_file = flow_file.replace('raft_exhaustive', 'raft_masks').replace('.npy', '.png')
masks = imageio.imread(mask_file) / 255.
coord1 = self.grid
coord2 = self.grid + flow
cycle_consistency_mask = masks[..., 0] > 0
occlusion_mask = masks[..., 1] > 0
if frame_interval == 1:
mask = np.ones_like(cycle_consistency_mask)
else:
mask = cycle_consistency_mask | occlusion_mask
if mask.sum() == 0:
invalid = True
mask = np.ones_like(cycle_consistency_mask)
else:
invalid = False
if len(cached_flow_pred_files) > 0 and self.args.use_error_map:
cached_flow_pred_file = cached_flow_pred_files[id1]
assert img_name1 + '_' in cached_flow_pred_file
sup_flow_file = os.path.join(self.flow_dir, os.path.basename(cached_flow_pred_file))
pred_flow = np.load(cached_flow_pred_file)
sup_flow = np.load(sup_flow_file)
error_map = np.linalg.norm(pred_flow - sup_flow, axis=-1)
error_map = cv2.GaussianBlur(error_map, (5, 5), 0)
error_selected = error_map[mask]
prob = error_selected / np.sum(error_selected)
select_ids_error = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts), p=prob)
select_ids_random = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts))
select_ids = np.random.choice(np.concatenate([select_ids_error, select_ids_random]), self.num_pts,
replace=False)
else:
if self.args.use_count_map:
count_map = imageio.imread(os.path.join(self.seq_dir, 'count_maps', img_name1.replace('.jpg', '.png')))
pixel_sample_weight = 1 / np.sqrt(count_map + 1.)
pixel_sample_weight = pixel_sample_weight[mask]
pixel_sample_weight /= pixel_sample_weight.sum()
select_ids = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts),
p=pixel_sample_weight)
else:
select_ids = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts))
pair_weight = np.cos((frame_interval - 1.) / max_interval * np.pi / 2)
pts1 = torch.from_numpy(coord1[mask][select_ids]).float()
pts2 = torch.from_numpy(coord2[mask][select_ids]).float()
pts2_normed = normalize_coords(pts2, self.h, self.w)[None, None]
covisible_mask = torch.from_numpy(cycle_consistency_mask[mask][select_ids]).float()[..., None]
weights = torch.ones_like(covisible_mask) * pair_weight
gt_rgb1 = torch.from_numpy(img1[mask][select_ids]).float()
gt_rgb2 = F.grid_sample(torch.from_numpy(img2).float().permute(2, 0, 1)[None], pts2_normed,
align_corners=True).squeeze().T
if invalid:
weights = torch.zeros_like(weights)
if np.random.choice([0, 1]):
id1, id2, pts1, pts2, gt_rgb1, gt_rgb2 = id2, id1, pts2, pts1, gt_rgb2, gt_rgb1
weights[covisible_mask == 0.] = 0
data = {'ids1': id1,
'ids2': id2,
'pts1': pts1, # [n_pts, 2]
'pts2': pts2, # [n_pts, 2]
'gt_rgb1': gt_rgb1, # [n_pts, 3]
'gt_rgb2': gt_rgb2,
'weights': weights, # [n_pts, 1]
'covisible_mask': covisible_mask, # [n_pts, 1]
}
return data
\ No newline at end of file
import os
import shutil
import sys
import subprocess
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, required=True, help='dataset dir')
parser.add_argument('--model', default='models/raft-things.pth', help="restore checkpoint")
parser.add_argument('--cycle_th', type=float, default=3., help='threshold for cycle consistency error')
parser.add_argument('--chain', action='store_true', help='if chaining cycle consistent flows (optional)')
args = parser.parse_args()
root = '/your_code_path/omnimotion/preprocessing/'
for files in os.listdir(args.data_dir):
data_dir_ = os.path.join(args.data_dir, files)
# compute raft optical flows between all pairs
os.chdir(root + 'RAFT')
subprocess.run(['python', 'exhaustive_raft.py', '--data_dir', data_dir_, '--model', args.model])
# compute dino feature maps
os.chdir(root + 'dino')
subprocess.run(['python', 'extract_dino_features.py', '--data_dir', data_dir_])
# filtering
os.chdir(root + 'RAFT')
subprocess.run(['python', 'filter_raft.py', '--data_dir', data_dir_, '--cycle_th', str(args.cycle_th)])
# chaining (optional)
subprocess.run(['python', 'chain_raft.py', '--data_dir', data_dir_])
# 模型编码
modelCode=698
# 模型名称
modelName=omnimotion_pytorch
# 模型描述
modelDescription=一种在视频序列中密集和长距离运动估计方法,可对运动目标逐像素跟踪。
# 应用场景
appScenario=制造,电商,医疗,教育
# 框架类型
frameType=pytorch
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import sys
sys.path.append('../')
from util import sigma2alpha
class MFNBase(nn.Module):
"""
Multiplicative filter network base class.
Expects the child class to define the 'filters' attribute, which should be
a nn.ModuleList of n_layers+1 filters with output equal to hidden_size.
"""
def __init__(
self, hidden_size, out_size, n_layers, weight_scale, bias=True, output_act=False
):
super().__init__()
self.linear = nn.ModuleList(
[nn.Linear(hidden_size, hidden_size, bias) for _ in range(n_layers)]
)
self.output_linear = nn.Linear(hidden_size, out_size)
self.output_act = output_act
for lin in self.linear:
lin.weight.data.uniform_(
-np.sqrt(weight_scale / hidden_size),
np.sqrt(weight_scale / hidden_size),
)
return
def forward(self, x):
out = self.filters[0](x)
for i in range(1, len(self.filters)):
out = self.filters[i](x) * self.linear[i - 1](out)
out = self.output_linear(out)
if self.output_act:
out = torch.sin(out)
return out
class FourierLayer(nn.Module):
"""
Sine filter as used in FourierNet.
"""
def __init__(self, in_features, out_features, weight_scale):
super().__init__()
self.linear = nn.Linear(in_features, out_features)
self.linear.weight.data *= weight_scale # gamma
self.linear.bias.data.uniform_(-np.pi, np.pi)
return
def forward(self, x):
return torch.sin(self.linear(x))
class FourierNet(MFNBase):
def __init__(
self,
in_size,
hidden_size,
out_size,
n_layers=3,
input_scale=256.0,
weight_scale=1.0,
bias=True,
output_act=False,
):
super().__init__(
hidden_size, out_size, n_layers, weight_scale, bias, output_act
)
self.filters = nn.ModuleList(
[
FourierLayer(in_size, hidden_size, input_scale / np.sqrt(n_layers + 1))
for _ in range(n_layers + 1)
]
)
class GaborLayer(nn.Module):
"""
Gabor-like filter as used in GaborNet.
"""
def __init__(self, in_features, out_features, weight_scale, alpha=1.0, beta=1.0):
super().__init__()
self.linear = nn.Linear(in_features, out_features)
self.mu = nn.Parameter(2 * torch.rand(out_features, in_features) - 1)
self.gamma = nn.Parameter(
torch.distributions.gamma.Gamma(alpha, beta).sample((out_features,))
)
self.linear.weight.data *= weight_scale * torch.sqrt(self.gamma[:, None])
self.linear.bias.data.uniform_(-np.pi, np.pi)
return
def forward(self, x):
D = (
(x ** 2).sum(-1)[..., None]
+ (self.mu ** 2).sum(-1)[None, :]
- 2 * x @ self.mu.T
)
return torch.sin(self.linear(x)) * torch.exp(-0.5 * D * self.gamma[None, :])
class GaborNet(MFNBase):
def __init__(
self,
in_size,
hidden_size,
out_size,
n_layers=3,
input_scale=256.0,
weight_scale=1.0,
alpha=6.0,
beta=1.0,
bias=True,
output_act=False,
):
super().__init__(
hidden_size, out_size, n_layers, weight_scale, bias, output_act
)
self.filters = nn.ModuleList(
[
GaborLayer(
in_size,
hidden_size,
input_scale / np.sqrt(n_layers + 1),
alpha / (n_layers + 1),
beta,
)
for _ in range(n_layers + 1)
]
)
def gradient(self, x):
# only for the color mlp
x.requires_grad_(True)
y = self.forward(x)[..., -1:]
y = F.softplus(y - 1.)
y = sigma2alpha(y)
d_output = torch.ones_like(y, requires_grad=False, device=y.device)
gradients = torch.autograd.grad(
outputs=y,
inputs=x,
grad_outputs=d_output,
create_graph=True,
retain_graph=True,
only_inputs=True)[0]
return gradients.unsqueeze(1)
import numpy as np
import torch
from torch import masked_select, nn
import torch.nn.functional as F
from torch.utils.checkpoint import checkpoint
import networks.pe_relu
class CouplingLayer(nn.Module):
def __init__(self, map_st, projection, mask):
super().__init__()
self.map_st = map_st
self.projection = projection
self.mask = mask
def forward(self, F, y):
y1 = y * self.mask
F_y1 = torch.cat([F, self.projection(y[..., self.mask.squeeze().bool()])], dim=-1)
st = self.map_st(F_y1)
s, t = torch.split(st, split_size_or_sections=1, dim=-1)
s = torch.clamp(s, min=-8, max=8)
x = y1 + (1 - self.mask) * ((y - t) * torch.exp(-s))
ldj = (-s).sum(-1)
return x, ldj
def inverse(self, F, x):
x1 = x * self.mask
F_x1 = torch.cat([F, self.projection(x[..., self.mask.squeeze().bool()])], dim=-1)
st = self.map_st(F_x1)
s, t = torch.split(st, split_size_or_sections=1, dim=-1)
s = torch.clamp(s, min=-8, max=8)
y = x1 + (1 - self.mask) * (x * torch.exp(s) + t)
ldj = s.sum(-1)
return y, ldj
class MLP(nn.Module):
def __init__(self, c_in, c_out, c_hiddens, act=nn.LeakyReLU, bn=nn.BatchNorm1d):
super().__init__()
layers = []
d_in = c_in
for d_out in c_hiddens:
layers.append(nn.Linear(d_in, d_out))
if bn is not None:
layers.append(bn(d_out))
layers.append(act())
d_in = d_out
layers.append(nn.Linear(d_in, c_out))
self.mlp = nn.Sequential(*layers)
self.c_out = c_out
def forward(self, x):
# x: B,...,C_in
input_shape = x.shape
C = input_shape[-1]
_x = x.reshape(-1, C) # X, C_in
y = self.mlp(_x) # X, C_out
y = y.reshape(*input_shape[:-1], self.c_out)
return y
def apply_homography_xy1(mat, xy1):
"""
:param mat (*, 3, 3) (# * dims must match uv dims)
:param xy1 (*, H, W, 3)
:returns warped coordinates (*, H, W, 2)
"""
out_h = torch.matmul(mat, xy1[..., None])
return out_h[..., :2, 0] / (out_h[..., 2:, 0] + 1e-8)
def apply_homography(mat, uv):
"""
:param mat (*, 3, 3) (# * dims must match uv dims)
:param uv (*, H, W, 2)
:returns warped coordinates (*, H, W, 2)
"""
uv_h = torch.cat([uv, torch.ones_like(uv[..., :1])], dim=-1) # (..., 3)
return apply_homography_xy1(mat, uv_h)
class NVPSimplified(nn.Module):
def __init__(
self,
n_layers,
feature_dims,
hidden_size,
proj_dims,
code_proj_hidden_size=[],
proj_type="simple",
pe_freq=4,
normalization=True,
affine=False,
activation=nn.LeakyReLU,
device='cuda',
):
super().__init__()
self._checkpoint = False
self.affine = affine
# make layers
input_dims = 3
normalization = nn.BatchNorm1d if normalization else None
self.layers1 = nn.ModuleList()
self.layers2 = nn.ModuleList()
self.code_projectors = nn.ModuleList()
self.layer_idx = [i for i in range(n_layers)]
i = 0
mask_selection = []
while i < n_layers:
mask_selection.append(torch.randperm(input_dims))
i += input_dims
mask_selection = torch.cat(mask_selection)
if isinstance(hidden_size, int):
hidden_size = [hidden_size]
for i in self.layer_idx:
# get mask
mask2 = torch.zeros(input_dims, device=device)
mask2[mask_selection[i]] = 1
mask1 = 1 - mask2
# get transformation
map_st = nn.Sequential(
MLP(
proj_dims + feature_dims,
2,
hidden_size,
bn=normalization,
act=activation,
)
)
proj = get_projection_layer(proj_dims=proj_dims, type=proj_type, pe_freq=pe_freq)
self.layers1.append(CouplingLayer(map_st, proj, mask1[None, None, None]))
# get code projector
if len(code_proj_hidden_size) == 0:
code_proj_hidden_size = [feature_dims]
self.code_projectors.append(
MLP(
feature_dims,
feature_dims,
code_proj_hidden_size,
bn=normalization,
act=activation,
)
)
if self.affine:
# this mlp takes time and depth as input and produce an affine transformation for x and y
self.affine_mlp = networks.pe_relu.MLP(input_dim=2,
hidden_size=256,
n_layers=2,
skip_layers=[],
use_pe=True,
pe_dims=[1],
pe_freq=pe_freq,
output_dim=5).to(device)
def _expand_features(self, F, x):
_, N, K, _ = x.shape
return F[:, None, None, :].expand(-1, N, K, -1)
def _call(self, func, *args, **kwargs):
if self._checkpoint:
return checkpoint(func, *args, **kwargs)
else:
return func(*args, **kwargs)
def invert_affine(self, a, b, c, d, tx, ty, zeros, ones):
determinant = a * d - b * c
inverse_determinant = 1.0 / determinant
inverted_a = d * inverse_determinant
inverted_b = -b * inverse_determinant
inverted_c = -c * inverse_determinant
inverted_d = a * inverse_determinant
inverted_tx = (b * ty - d * tx) * inverse_determinant
inverted_ty = (c * tx - a * ty) * inverse_determinant
return torch.cat([inverted_a, inverted_b, inverted_tx,
inverted_c, inverted_d, inverted_ty,
zeros, zeros, ones], dim=-1).reshape(*a.shape[:-1], 3, 3)
def get_affine(self, theta, inverse=False):
"""
expands the 5 parameters into 3x3 affine transformation matrix
:param theta (..., 5)
:returns mat (..., 3, 3)
"""
angle = theta[..., 0:1]
scale1 = torch.exp(theta[..., 1:2])
scale2 = torch.exp(theta[..., 3:4])
cos = torch.cos(angle)
sin = torch.sin(angle)
a = cos * scale1
b = -sin * scale1
c = sin * scale2
d = cos * scale2
tx = theta[..., 2:3]
ty = theta[..., 4:5]
zeros = torch.zeros_like(a)
ones = torch.ones_like(a)
if inverse:
return self.invert_affine(a, b, c, d, tx, ty, zeros, ones)
else:
return torch.cat([a, b, tx, c, d, ty, zeros, zeros, ones], dim=-1).reshape(*theta.shape[:-1], 3, 3)
def _affine_input(self, t, x, inverse=False):
depth = x[..., -1] # [n_imgs, n_pts, n_samples]
net_in = torch.stack([t[..., None].repeat(1, *x.shape[1:3]), depth], dim=-1)
affine = self.get_affine(self.affine_mlp(net_in), inverse=inverse) # [n_imgs, n_pts, n_samples, 3, 3]
xy = x[..., :2]
xy = apply_homography(affine, xy)
x = torch.cat([xy, depth[..., None]], dim=-1)
return x
def forward(self, t, feat, x):
y = x
if self.affine:
y = self._affine_input(t, y)
for i in self.layer_idx:
feat_i = self.code_projectors[i](feat)
feat_i = self._expand_features(feat_i, y)
l1 = self.layers1[i]
y, _ = self._call(l1, feat_i, y)
return y
def inverse(self, t, feat, y):
x = y
for i in reversed(self.layer_idx):
feat_i = self.code_projectors[i](feat)
feat_i = self._expand_features(feat_i, x)
l1 = self.layers1[i]
x, _ = self._call(l1.inverse, feat_i, x)
if self.affine:
x = self._affine_input(t, x, inverse=True)
return x
class BaseProjectionLayer(nn.Module):
@property
def proj_dims(self):
raise NotImplementedError()
def forward(self, x):
raise NotImplementedError()
class IdentityProjection(BaseProjectionLayer):
def __init__(self, input_dims):
super().__init__()
self._input_dims = input_dims
@property
def proj_dims(self):
return self._input_dims
def forward(self, x):
return x
class ProjectionLayer(BaseProjectionLayer):
def __init__(self, input_dims, proj_dims):
super().__init__()
self._proj_dims = proj_dims
self.proj = nn.Sequential(
nn.Linear(input_dims, 2 * proj_dims), nn.ReLU(), nn.Linear(2 * proj_dims, proj_dims)
)
@property
def proj_dims(self):
return self._proj_dims
def forward(self, x):
return self.proj(x)
class FixedPositionalEncoding(ProjectionLayer):
def __init__(self, input_dims, frequency, proj_dims):
super().__init__(input_dims, proj_dims)
ll = frequency
self.sigma = np.pi * torch.pow(2, torch.linspace(0, ll - 1, ll, device='cuda')).view(1, -1)
self.proj = nn.Sequential(
nn.Linear(input_dims + input_dims * ll * 2, proj_dims), nn.LeakyReLU()
)
@property
def proj_dims(self):
return self._proj_dims * 3
def forward(self, x):
encoded = torch.cat(
[
torch.sin(x[:, :, :, :, None] * self.sigma[None, None, None]),
torch.cos(x[:, :, :, :, None] * self.sigma[None, None, None]),
],
dim=-1,
).view(x.shape[0], x.shape[1], x.shape[2], -1)
x = torch.cat([x, encoded], dim=-1)
return self.proj(x)
class GaussianRandomFourierFeatures(ProjectionLayer):
def __init__(self, input_dims, proj_dims, gamma=1.0):
super().__init__(input_dims, proj_dims)
self._two_pi = 2 * np.pi
self._gamma = gamma
ll = proj_dims // 2
self.register_buffer("B", torch.randn(3, ll))
def forward(self, x):
xB = x.matmul(self.B * self._two_pi * self._gamma)
return torch.cat([torch.cos(xB), torch.sin(xB)], dim=-1)
class GaborLayer(nn.Module):
def __init__(self, input_dims, proj_dims, alpha=1., beta=1.0, weight_scale=128):
super().__init__()
self.linear = nn.Linear(input_dims, proj_dims)
self.mu = nn.Parameter(2 * torch.rand(proj_dims, input_dims) - 1)
self.gamma = nn.Parameter(
torch.distributions.gamma.Gamma(alpha, beta).sample((proj_dims,))
)
self.linear.weight.data *= weight_scale * torch.sqrt(self.gamma[:, None])
self.linear.bias.data.uniform_(-np.pi, np.pi)
self.linear2 = nn.Linear(input_dims, proj_dims)
self.linear2.weight.data.uniform_(
-np.sqrt(weight_scale / proj_dims),
np.sqrt(weight_scale / proj_dims)
)
def forward(self, x):
D = (
(x ** 2).sum(-1)[..., None]
+ (self.mu ** 2).sum(-1)[None, :]
- 2 * x @ self.mu.T
)
return torch.sin(self.linear(x)) * torch.exp(-0.5 * D * self.gamma[None, :]) * self.linear2(x)
def get_projection_layer(**kwargs):
type = kwargs["type"]
if type == "identity":
return IdentityProjection(3)
elif type == "simple":
return ProjectionLayer(2, kwargs.get("proj_dims", 128))
elif type == "fixed_positional_encoding":
return FixedPositionalEncoding(2, kwargs.get("pe_freq", 4), kwargs.get("proj_dims", 128))
elif type == "gaussianrff":
return GaussianRandomFourierFeatures(
3, kwargs.get("proj_dims", 10), kwargs.get("gamma", 1.0)
)
elif type == 'gabor':
return GaborLayer(3, kwargs.get("proj_dims", 128))
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
def positionalEncoding_vec(in_tensor, b):
original_shape = in_tensor.shape
in_tensor_flatten = in_tensor.reshape(torch.prod(torch.tensor(original_shape[:-1])), -1)
proj = torch.einsum('ij, k -> ijk', in_tensor_flatten, b) # shape (batch, in_tensor.size(1), freqNum)
mapped_coords = torch.cat((torch.sin(proj), torch.cos(proj)), dim=1) # shape (batch, 2*in_tensor.size(1), freqNum)
output = mapped_coords.transpose(2, 1).contiguous().view(mapped_coords.size(0), -1)
output = output.reshape(original_shape[:-1] + (-1,))
return output
class MLPf(nn.Module):
def __init__(self,
input_dim,
output_dim,
hidden_dim=256,
skip_layers=[4, 6],
num_layers=8,
use_pe=False,
pe_freq=10,
device='cuda',
):
super(MLPf, self).__init__()
if use_pe:
encoding_dimensions = 2 * 2 * pe_freq + input_dim # only encode the pixel locations not latent codes
self.b = torch.tensor([(2 ** j) * np.pi for j in range(pe_freq)], requires_grad=False).to(device)
else:
encoding_dimensions = input_dim
self.hidden = nn.ModuleList()
for i in range(num_layers):
if i == 0:
input_dims = encoding_dimensions
elif i in skip_layers:
input_dims = hidden_dim + encoding_dimensions
else:
input_dims = hidden_dim
if i == num_layers - 1:
# last layer
self.hidden.append(nn.Linear(input_dims, output_dim, bias=True))
else:
self.hidden.append(nn.Linear(input_dims, hidden_dim, bias=True))
self.skip_layers = skip_layers
self.num_layers = num_layers
self.use_pe = use_pe
self.pe_freq = pe_freq
def forward(self, x):
if self.use_pe:
coord = x[..., :2]
pos = positionalEncoding_vec(coord, self.b)
x = torch.cat([pos, x], dim=-1)
input = x
for i, layer in enumerate(self.hidden):
if i > 0:
x = F.relu(x)
if i in self.skip_layers:
x = torch.cat((x, input), -1)
x = layer(x)
return x
class MLPb(nn.Module):
def __init__(self,
input_dim,
output_dim=3,
hidden_dim=256,
skip_layers=[4, 6],
num_layers=8,
use_pe=False,
pe_freq=10,
device='cuda',
):
super(MLPb, self).__init__()
if use_pe:
encoding_dimensions = 2 * input_dim * pe_freq
self.b = torch.tensor([(2 ** j) * np.pi for j in range(pe_freq)], requires_grad=False).to(device)
else:
encoding_dimensions = input_dim
self.hidden = nn.ModuleList()
for i in range(num_layers):
if i == 0:
input_dims = encoding_dimensions
elif i in skip_layers:
input_dims = hidden_dim + encoding_dimensions
else:
input_dims = hidden_dim
if i == num_layers - 1:
# last layer
self.hidden.append(nn.Linear(input_dims, output_dim, bias=True))
else:
self.hidden.append(nn.Linear(input_dims, hidden_dim, bias=True))
self.skip_layers = skip_layers
self.num_layers = num_layers
self.use_pe = use_pe
self.pe_freq = pe_freq
def forward(self, x):
if self.use_pe:
pos = positionalEncoding_vec(x, self.b)
x = pos
input = x
for i, layer in enumerate(self.hidden):
if i > 0:
x = F.relu(x)
if i in self.skip_layers:
x = torch.cat((x, input), -1)
x = layer(x)
return x
class GaussianActivation(nn.Module):
def __init__(self, a=1., trainable=True):
super().__init__()
self.register_parameter('a', nn.Parameter(a*torch.ones(1), trainable))
def forward(self, x):
return torch.exp(-x**2/(2*self.a**2))
class MLP(nn.Module):
def __init__(self,
input_dim,
output_dim,
hidden_dim=256,
skip_layers=[4],
num_layers=8,
act='relu',
use_pe=False,
pe_freq=10,
pe_dims=None,
device='cuda',
act_trainable=False,
**kwargs):
super(MLP, self).__init__()
self.pe_dims = pe_dims
if use_pe:
if pe_dims == None:
encoding_dimensions = 2 * input_dim * pe_freq + input_dim
else:
encoding_dimensions = 2 * len(pe_dims) * pe_freq + input_dim
self.b = torch.tensor([(2 ** j) * np.pi for j in range(pe_freq)], requires_grad=False).to(device)
else:
encoding_dimensions = input_dim
self.hidden = nn.ModuleList()
for i in range(num_layers):
if i == 0:
input_dims = encoding_dimensions
elif i in skip_layers:
input_dims = hidden_dim + encoding_dimensions
else:
input_dims = hidden_dim
if act == 'relu':
act_ = nn.ReLU(True)
elif act == 'elu':
act_ = nn.ELU(True)
elif act == 'leakyrelu':
act_ = nn.LeakyReLU(True)
elif act == 'gaussian':
act_ = GaussianActivation(a=kwargs['a'], trainable=act_trainable)
else:
raise Exception('unknown activation function!')
if i == num_layers - 1:
# last layer
self.hidden.append(nn.Linear(input_dims, output_dim, bias=True))
else:
self.hidden.append(nn.Sequential(nn.Linear(input_dims, hidden_dim, bias=True), act_))
self.skip_layers = skip_layers
self.num_layers = num_layers
self.use_pe = use_pe
self.pe_freq = pe_freq
def forward(self, x):
if self.use_pe:
coord = x[..., self.pe_dims] if self.pe_dims is not None else x
pos = positionalEncoding_vec(coord, self.b)
x = torch.cat([pos, x], dim=-1)
input = x
for i, layer in enumerate(self.hidden):
if i in self.skip_layers:
x = torch.cat((x, input), -1)
x = layer(x)
return x
\ No newline at end of file
RAFT @ 3fa0bb0a
Subproject commit 3fa0bb0a9c633ea0a9bb8a79c576b6785d4e6a02
# Data processing
This README file contains instructions to compute and process RAFT optical flows for optimizing OmniMotion.
## Data format
The input video data should be organized in the following format:
```
├──sequence_name/
├──color/
├──00000.jpg
├──00001.jpg
.....
├──mask/ (optional; only used for visualization purposes)
├──00000.png
├──00001.png
.....
```
If you want to run on [DAVIS](https://davischallenge.org/index.html) video sequences, you can run `python get_davis.py <out_dir>`
which will download the original dataset and organize it in our format for processing. Alternatively, you can
download some of our processed sequences [here](https://omnimotion.cs.cornell.edu/dataset/) to skip processing and directly start training.
If you want to train on your own video sequence, we recommend you to start with
shorter sequences (< 60 frames) and lower resolution (<= 480p) to manage computational cost.
You may use `ffmpeg` to extract frames from the video.
## Preparation
The command below moves files to the correct locations and download pretrained models (this only needs to be run once).
```
cd preprocessing/
mv exhaustive_raft.py filter_raft.py chain_raft.py RAFT/;
cd RAFT; ./download_models.sh; cd ../
mv extract_dino_features.py dino/
```
## Computing and processing flow
Run the following command to process the input video sequence. Please use absolute path for the sequence directory.
```
conda activate omnimotion
python main_processing.py --data_dir <sequence directory> --chain
```
The processing contains several steps:
- computing all pairwise optical flows using `exhaustive_raft.py`
- computing dino features for each frame using `extract_dino_features.py`
- filtering flows using cycle consistency and appearance consistency check using`filter_raft.py`
- (optional) chaining only cycle consistent flows to create denser correspondences using `chain_raft.py`.
We found this to be helpful for handling sequences with rapid motion and large displacements.
For simple motion, this may be skipped by omitting `--chain` to save processing time.
After processing the folder should look like the following:
```
├──sequence_name/
├──color/
├──mask/ (optional; only used for visualization purposes)
├──count_maps/
├──features/
├──raft_exhaustive/
├──raft_masks/
├──flow_stats.json
```
## Discussion
This processing pipeline is designed to filter and process RAFT optical flow for training our method.
Our method can also take as input correspondences from other methods, e.g., [TAPIR](https://deepmind-tapir.github.io/) and
[CoTracker](https://co-tracker.github.io/).
If you want to use different correspondences as input supervision, note that their error patterns might be different from
those of RAFT optical flow, and you may need to devise new filtering methods that are effective for the specific correspondences
you are working with.
dino @ 7c446df5
Subproject commit 7c446df5b9f45747937fb0d72314eb9f7b66930a
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment