init

754fbc04 · bailuo · 7aa1ab82 · 754fbc04 · 754fbc04 · 754fbc04
Commit 754fbc04 authored Jul 16, 2024 by bailuo
20 changed files
--- a/loaders/create_training_dataset.py
+++ b/loaders/create_training_dataset.py
+import numpy as np
+from torch.utils.data import Dataset, Sampler, IterableDataset
+from torch.utils.data import DistributedSampler, WeightedRandomSampler
+import bisect
+import warnings
+from typing import (
+    Iterable,
+    List,
+    Optional,
+    TypeVar,
+)
+from operator import itemgetter
+import torch
+from .raft import RAFTExhaustiveDataset
+
+T_co = TypeVar('T_co', covariant=True)
+T = TypeVar('T')
+
+
+dataset_dict = {
+    'flow': RAFTExhaustiveDataset,
+}
+
+
+class DatasetFromSampler(Dataset):
+    """Dataset to create indexes from `Sampler`.
+    Args:
+        sampler: PyTorch sampler
+    """
+
+    def __init__(self, sampler: Sampler):
+        """Initialisation for DatasetFromSampler."""
+        self.sampler = sampler
+        self.sampler_list = None
+
+    def __getitem__(self, index: int):
+        """Gets element of the dataset.
+        Args:
+            index: index of the element in the dataset
+        Returns:
+            Single element by index
+        """
+        if self.sampler_list is None:
+            self.sampler_list = list(self.sampler)
+        return self.sampler_list[index]
+
+    def __len__(self) -> int:
+        """
+        Returns:
+            int: length of the dataset
+        """
+        return len(self.sampler)
+
+
+class DistributedSamplerWrapper(DistributedSampler):
+    """
+    Wrapper over `Sampler` for distributed training.
+    Allows you to use any sampler in distributed mode.
+    It is especially useful in conjunction with
+    `torch.nn.parallel.DistributedDataParallel`. In such case, each
+    process can pass a DistributedSamplerWrapper instance as a DataLoader
+    sampler, and load a subset of subsampled data of the original dataset
+    that is exclusive to it.
+    .. note::
+        Sampler is assumed to be of constant size.
+    """
+
+    def __init__(
+            self,
+            sampler,
+            num_replicas: Optional[int] = None,
+            rank: Optional[int] = None,
+            shuffle: bool = True,
+    ):
+        """
+        Args:
+            sampler: Sampler used for subsampling
+            num_replicas (int, optional): Number of processes participating in
+              distributed training
+            rank (int, optional): Rank of the current process
+              within ``num_replicas``
+            shuffle (bool, optional): If true (default),
+              sampler will shuffle the indices
+        """
+        super(DistributedSamplerWrapper, self).__init__(
+            DatasetFromSampler(sampler),
+            num_replicas=num_replicas,
+            rank=rank,
+            shuffle=shuffle,
+        )
+        self.sampler = sampler
+
+    def __iter__(self):
+        self.dataset = DatasetFromSampler(self.sampler)
+        indexes_of_indexes = super().__iter__()
+        subsampler_indexes = self.dataset
+        return iter(itemgetter(*indexes_of_indexes)(subsampler_indexes))
+
+
+class ConcatDataset(Dataset[T_co]):
+    r"""Dataset as a concatenation of multiple datasets.
+
+    This class is useful to assemble different existing datasets.
+
+    Args:
+        datasets (sequence): List of datasets to be concatenated
+    """
+    datasets: List[Dataset[T_co]]
+    cumulative_sizes: List[int]
+
+    @staticmethod
+    def cumsum(sequence):
+        r, s = [], 0
+        for e in sequence:
+            l = len(e)
+            r.append(l + s)
+            s += l
+        return r
+
+    def __init__(self, datasets: Iterable[Dataset]) -> None:
+        super(ConcatDataset, self).__init__()
+        self.datasets = list(datasets)
+        assert len(self.datasets) > 0, 'datasets should not be an empty iterable'
+        for d in self.datasets:
+            assert not isinstance(d, IterableDataset), "ConcatDataset does not support IterableDataset"
+        self.cumulative_sizes = self.cumsum(self.datasets)
+
+    def increase_max_interval_by(self, increment):
+        for dataset in self.datasets:
+            curr_max_interval = dataset.max_interval.value
+            dataset.max_interval.value = min(curr_max_interval + increment, dataset.num_imgs - 1)
+
+    def set_max_interval(self, max_interval):
+        for dataset in self.datasets:
+            dataset.max_interval.value = min(max_interval, dataset.num_imgs - 1)
+
+    def __len__(self):
+        return self.cumulative_sizes[-1]
+
+    def __getitem__(self, idx):
+        if idx < 0:
+            if -idx > len(self):
+                raise ValueError("absolute value of index should not exceed dataset length")
+            idx = len(self) + idx
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+        if dataset_idx == 0:
+            sample_idx = idx
+        else:
+            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+        return self.datasets[dataset_idx][sample_idx]
+
+    @property
+    def cummulative_sizes(self):
+        warnings.warn("cummulative_sizes attribute is renamed to "
+                      "cumulative_sizes", DeprecationWarning, stacklevel=2)
+        return self.cumulative_sizes
+
+
+def get_training_dataset(args, max_interval):
+    if '+' not in args.dataset_types:
+        train_dataset = dataset_dict[args.dataset_types](args, max_interval=max_interval)
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None
+    else:
+        dataset_types = args.dataset_types.split('+')
+        weights = args.dataset_weights
+        assert len(dataset_types) == len(weights)
+        assert np.abs(np.sum(weights) - 1.) < 1e-6
+        train_datasets = []
+        train_weights_samples = []
+        for dataset_type, weight in zip(dataset_types, weights):
+            train_dataset = dataset_dict[dataset_type](args, max_interval=max_interval)
+            train_datasets.append(train_dataset)
+            num_samples = len(train_dataset)
+            weight_each_sample = weight / num_samples
+            train_weights_samples.extend([weight_each_sample]*num_samples)
+
+        train_dataset = ConcatDataset(train_datasets)
+        train_weights = torch.from_numpy(np.array(train_weights_samples))
+        sampler = WeightedRandomSampler(train_weights, len(train_weights))
+        train_sampler = DistributedSamplerWrapper(sampler) if args.distributed else sampler
+
+    return train_dataset, train_sampler
+
+
--- a/loaders/raft.py
+++ b/loaders/raft.py
+import os
+import glob
+import json
+import imageio
+import numpy as np
+import cv2
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+import multiprocessing as mp
+from util import normalize_coords, gen_grid_np
+
+
+def get_sample_weights(flow_stats):
+    sample_weights = {}
+    for k in flow_stats.keys():
+        sample_weights[k] = {}
+        total_num = np.array(list(flow_stats[k].values())).sum()
+        for j in flow_stats[k].keys():
+            sample_weights[k][j] = 1. * flow_stats[k][j] / total_num
+    return sample_weights
+
+
+class RAFTExhaustiveDataset(Dataset):
+    def __init__(self, args, max_interval=None):
+        self.args = args
+        self.seq_dir = args.data_dir
+        self.seq_name = os.path.basename(self.seq_dir.rstrip('/'))
+        self.img_dir = os.path.join(self.seq_dir, 'color')
+        self.flow_dir = os.path.join(self.seq_dir, 'raft_exhaustive')
+        img_names = sorted(os.listdir(self.img_dir))
+        self.num_imgs = min(self.args.num_imgs, len(img_names))
+        self.img_names = img_names[:self.num_imgs]
+
+        h, w, _ = imageio.imread(os.path.join(self.img_dir, img_names[0])).shape
+        self.h, self.w = h, w
+        max_interval = self.num_imgs - 1 if not max_interval else max_interval
+        self.max_interval = mp.Value('i', max_interval)
+        self.num_pts = self.args.num_pts
+        self.grid = gen_grid_np(self.h, self.w)
+        flow_stats = json.load(open(os.path.join(self.seq_dir, 'flow_stats.json')))
+        self.sample_weights = get_sample_weights(flow_stats)
+
+    def __len__(self):
+        return self.num_imgs * 100000
+
+    def set_max_interval(self, max_interval):
+        self.max_interval.value = min(max_interval, self.num_imgs - 1)
+
+    def increase_max_interval_by(self, increment):
+        curr_max_interval = self.max_interval.value
+        self.max_interval.value = min(curr_max_interval + increment, self.num_imgs - 1)
+
+    def __getitem__(self, idx):
+        cached_flow_pred_dir = os.path.join('out', '{}_{}'.format(self.args.expname, self.seq_name), 'flow')
+        cached_flow_pred_files = sorted(glob.glob(os.path.join(cached_flow_pred_dir, '*')))
+        flow_error_file = os.path.join(os.path.dirname(cached_flow_pred_dir), 'flow_error.txt')
+        if os.path.exists(flow_error_file):
+            flow_error = np.loadtxt(flow_error_file)
+            id1_sample_weights = flow_error / np.sum(flow_error)
+            id1 = np.random.choice(self.num_imgs, p=id1_sample_weights)
+        else:
+            id1 = idx % self.num_imgs
+
+        img_name1 = self.img_names[id1]
+        max_interval = min(self.max_interval.value, self.num_imgs - 1)
+        img2_candidates = sorted(list(self.sample_weights[img_name1].keys()))
+        img2_candidates = img2_candidates[max(id1 - max_interval, 0):min(id1 + max_interval, self.num_imgs - 1)]
+
+        # sample more often from i-1 and i+1
+        id2s = np.array([self.img_names.index(n) for n in img2_candidates])
+        sample_weights = np.array([self.sample_weights[img_name1][i] for i in img2_candidates])
+        sample_weights /= np.sum(sample_weights)
+        sample_weights[np.abs(id2s - id1) <= 1] = 0.5
+        sample_weights /= np.sum(sample_weights)
+
+        img_name2 = np.random.choice(img2_candidates, p=sample_weights)
+        id2 = self.img_names.index(img_name2)
+        frame_interval = abs(id1 - id2)
+
+        # read image, flow and confidence
+        img1 = imageio.imread(os.path.join(self.img_dir, img_name1)) / 255.
+        img2 = imageio.imread(os.path.join(self.img_dir, img_name2)) / 255.
+
+        flow_file = os.path.join(self.flow_dir, '{}_{}.npy'.format(img_name1, img_name2))
+        flow = np.load(flow_file)
+        mask_file = flow_file.replace('raft_exhaustive', 'raft_masks').replace('.npy', '.png')
+        masks = imageio.imread(mask_file) / 255.
+
+        coord1 = self.grid
+        coord2 = self.grid + flow
+
+        cycle_consistency_mask = masks[..., 0] > 0
+        occlusion_mask = masks[..., 1] > 0
+
+        if frame_interval == 1:
+            mask = np.ones_like(cycle_consistency_mask)
+        else:
+            mask = cycle_consistency_mask | occlusion_mask
+
+        if mask.sum() == 0:
+            invalid = True
+            mask = np.ones_like(cycle_consistency_mask)
+        else:
+            invalid = False
+
+        if len(cached_flow_pred_files) > 0 and self.args.use_error_map:
+            cached_flow_pred_file = cached_flow_pred_files[id1]
+            assert img_name1 + '_' in cached_flow_pred_file
+            sup_flow_file = os.path.join(self.flow_dir, os.path.basename(cached_flow_pred_file))
+            pred_flow = np.load(cached_flow_pred_file)
+            sup_flow = np.load(sup_flow_file)
+            error_map = np.linalg.norm(pred_flow - sup_flow, axis=-1)
+            error_map = cv2.GaussianBlur(error_map, (5, 5), 0)
+            error_selected = error_map[mask]
+            prob = error_selected / np.sum(error_selected)
+            select_ids_error = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts), p=prob)
+            select_ids_random = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts))
+            select_ids = np.random.choice(np.concatenate([select_ids_error, select_ids_random]), self.num_pts,
+                                          replace=False)
+        else:
+            if self.args.use_count_map:
+                count_map = imageio.imread(os.path.join(self.seq_dir, 'count_maps', img_name1.replace('.jpg', '.png')))
+                pixel_sample_weight = 1 / np.sqrt(count_map + 1.)
+                pixel_sample_weight = pixel_sample_weight[mask]
+                pixel_sample_weight /= pixel_sample_weight.sum()
+                select_ids = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts),
+                                              p=pixel_sample_weight)
+            else:
+                select_ids = np.random.choice(mask.sum(), self.num_pts, replace=(mask.sum() < self.num_pts))
+
+        pair_weight = np.cos((frame_interval - 1.) / max_interval * np.pi / 2)
+
+        pts1 = torch.from_numpy(coord1[mask][select_ids]).float()
+        pts2 = torch.from_numpy(coord2[mask][select_ids]).float()
+        pts2_normed = normalize_coords(pts2, self.h, self.w)[None, None]
+
+        covisible_mask = torch.from_numpy(cycle_consistency_mask[mask][select_ids]).float()[..., None]
+        weights = torch.ones_like(covisible_mask) * pair_weight
+
+        gt_rgb1 = torch.from_numpy(img1[mask][select_ids]).float()
+        gt_rgb2 = F.grid_sample(torch.from_numpy(img2).float().permute(2, 0, 1)[None], pts2_normed,
+                                align_corners=True).squeeze().T
+
+        if invalid:
+            weights = torch.zeros_like(weights)
+
+        if np.random.choice([0, 1]):
+            id1, id2, pts1, pts2, gt_rgb1, gt_rgb2 = id2, id1, pts2, pts1, gt_rgb2, gt_rgb1
+            weights[covisible_mask == 0.] = 0
+
+        data = {'ids1': id1,
+                'ids2': id2,
+                'pts1': pts1,  # [n_pts, 2]
+                'pts2': pts2,  # [n_pts, 2]
+                'gt_rgb1': gt_rgb1,  # [n_pts, 3]
+                'gt_rgb2': gt_rgb2,
+                'weights': weights,  # [n_pts, 1]
+                'covisible_mask': covisible_mask,  # [n_pts, 1]
+                }
+        return data
\ No newline at end of file
--- a/main_processing.py
+++ b/main_processing.py
+import os
+import shutil
+import sys
+import subprocess
+import argparse
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data_dir', type=str, required=True, help='dataset dir')
+    parser.add_argument('--model', default='models/raft-things.pth', help="restore checkpoint")
+    parser.add_argument('--cycle_th', type=float, default=3., help='threshold for cycle consistency error')
+    parser.add_argument('--chain', action='store_true', help='if chaining cycle consistent flows (optional)')
+
+    args = parser.parse_args()
+
+    root = '/your_code_path/omnimotion/preprocessing/'
+    for files in os.listdir(args.data_dir):
+        data_dir_ = os.path.join(args.data_dir, files)
+
+        # compute raft optical flows between all pairs
+        os.chdir(root + 'RAFT')
+        subprocess.run(['python', 'exhaustive_raft.py', '--data_dir', data_dir_, '--model', args.model])
+
+        # compute dino feature maps
+        os.chdir(root + 'dino')
+        subprocess.run(['python', 'extract_dino_features.py', '--data_dir', data_dir_])
+
+        # filtering
+        os.chdir(root + 'RAFT')
+        subprocess.run(['python', 'filter_raft.py', '--data_dir', data_dir_, '--cycle_th', str(args.cycle_th)])
+
+        # chaining (optional)
+        subprocess.run(['python', 'chain_raft.py', '--data_dir', data_dir_])
+
+
--- a/model.properties
+++ b/model.properties
+# 模型编码
+modelCode=698
+# 模型名称
+modelName=omnimotion_pytorch
+# 模型描述
+modelDescription=一种在视频序列中密集和长距离运动估计方法，可对运动目标逐像素跟踪。
+# 应用场景
+appScenario=制造,电商,医疗,教育
+# 框架类型
+frameType=pytorch
--- a/networks/__init__.py
+++ b/networks/__init__.py
--- a/networks/__pycache__/__init__.cpython-310.pyc
+++ b/networks/__pycache__/__init__.cpython-310.pyc
--- a/networks/__pycache__/__init__.cpython-38.pyc
+++ b/networks/__pycache__/__init__.cpython-38.pyc
--- a/networks/__pycache__/mfn.cpython-310.pyc
+++ b/networks/__pycache__/mfn.cpython-310.pyc
--- a/networks/__pycache__/mfn.cpython-38.pyc
+++ b/networks/__pycache__/mfn.cpython-38.pyc
--- a/networks/__pycache__/nvp_simplified.cpython-310.pyc
+++ b/networks/__pycache__/nvp_simplified.cpython-310.pyc
--- a/networks/__pycache__/nvp_simplified.cpython-38.pyc
+++ b/networks/__pycache__/nvp_simplified.cpython-38.pyc
--- a/networks/__pycache__/pe_relu.cpython-310.pyc
+++ b/networks/__pycache__/pe_relu.cpython-310.pyc
--- a/networks/__pycache__/pe_relu.cpython-38.pyc
+++ b/networks/__pycache__/pe_relu.cpython-38.pyc
--- a/networks/mfn.py
+++ b/networks/mfn.py
+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+import sys
+sys.path.append('../')
+from util import sigma2alpha
+
+
+class MFNBase(nn.Module):
+    """
+    Multiplicative filter network base class.
+
+    Expects the child class to define the 'filters' attribute, which should be
+    a nn.ModuleList of n_layers+1 filters with output equal to hidden_size.
+    """
+
+    def __init__(
+            self, hidden_size, out_size, n_layers, weight_scale, bias=True, output_act=False
+    ):
+        super().__init__()
+
+        self.linear = nn.ModuleList(
+            [nn.Linear(hidden_size, hidden_size, bias) for _ in range(n_layers)]
+        )
+        self.output_linear = nn.Linear(hidden_size, out_size)
+        self.output_act = output_act
+
+        for lin in self.linear:
+            lin.weight.data.uniform_(
+                -np.sqrt(weight_scale / hidden_size),
+                np.sqrt(weight_scale / hidden_size),
+            )
+
+        return
+
+    def forward(self, x):
+        out = self.filters[0](x)
+        for i in range(1, len(self.filters)):
+            out = self.filters[i](x) * self.linear[i - 1](out)
+        out = self.output_linear(out)
+
+        if self.output_act:
+            out = torch.sin(out)
+
+        return out
+
+
+class FourierLayer(nn.Module):
+    """
+    Sine filter as used in FourierNet.
+    """
+
+    def __init__(self, in_features, out_features, weight_scale):
+        super().__init__()
+        self.linear = nn.Linear(in_features, out_features)
+        self.linear.weight.data *= weight_scale  # gamma
+        self.linear.bias.data.uniform_(-np.pi, np.pi)
+        return
+
+    def forward(self, x):
+        return torch.sin(self.linear(x))
+
+
+class FourierNet(MFNBase):
+    def __init__(
+            self,
+            in_size,
+            hidden_size,
+            out_size,
+            n_layers=3,
+            input_scale=256.0,
+            weight_scale=1.0,
+            bias=True,
+            output_act=False,
+    ):
+        super().__init__(
+            hidden_size, out_size, n_layers, weight_scale, bias, output_act
+        )
+        self.filters = nn.ModuleList(
+            [
+                FourierLayer(in_size, hidden_size, input_scale / np.sqrt(n_layers + 1))
+                for _ in range(n_layers + 1)
+            ]
+        )
+
+
+class GaborLayer(nn.Module):
+    """
+    Gabor-like filter as used in GaborNet.
+    """
+
+    def __init__(self, in_features, out_features, weight_scale, alpha=1.0, beta=1.0):
+        super().__init__()
+        self.linear = nn.Linear(in_features, out_features)
+        self.mu = nn.Parameter(2 * torch.rand(out_features, in_features) - 1)
+        self.gamma = nn.Parameter(
+            torch.distributions.gamma.Gamma(alpha, beta).sample((out_features,))
+        )
+        self.linear.weight.data *= weight_scale * torch.sqrt(self.gamma[:, None])
+        self.linear.bias.data.uniform_(-np.pi, np.pi)
+        return
+
+    def forward(self, x):
+        D = (
+                (x ** 2).sum(-1)[..., None]
+                + (self.mu ** 2).sum(-1)[None, :]
+                - 2 * x @ self.mu.T
+        )
+        return torch.sin(self.linear(x)) * torch.exp(-0.5 * D * self.gamma[None, :])
+
+
+class GaborNet(MFNBase):
+    def __init__(
+            self,
+            in_size,
+            hidden_size,
+            out_size,
+            n_layers=3,
+            input_scale=256.0,
+            weight_scale=1.0,
+            alpha=6.0,
+            beta=1.0,
+            bias=True,
+            output_act=False,
+    ):
+        super().__init__(
+            hidden_size, out_size, n_layers, weight_scale, bias, output_act
+        )
+        self.filters = nn.ModuleList(
+            [
+                GaborLayer(
+                    in_size,
+                    hidden_size,
+                    input_scale / np.sqrt(n_layers + 1),
+                    alpha / (n_layers + 1),
+                    beta,
+                    )
+                for _ in range(n_layers + 1)
+            ]
+        )
+
+    def gradient(self, x):
+        # only for the color mlp
+        x.requires_grad_(True)
+        y = self.forward(x)[..., -1:]
+        y = F.softplus(y - 1.)
+        y = sigma2alpha(y)
+        d_output = torch.ones_like(y, requires_grad=False, device=y.device)
+        gradients = torch.autograd.grad(
+            outputs=y,
+            inputs=x,
+            grad_outputs=d_output,
+            create_graph=True,
+            retain_graph=True,
+            only_inputs=True)[0]
+        return gradients.unsqueeze(1)
--- a/networks/nvp_simplified.py
+++ b/networks/nvp_simplified.py
+import numpy as np
+import torch
+from torch import masked_select, nn
+import torch.nn.functional as F
+from torch.utils.checkpoint import checkpoint
+import networks.pe_relu
+
+
+class CouplingLayer(nn.Module):
+    def __init__(self, map_st, projection, mask):
+        super().__init__()
+        self.map_st = map_st
+        self.projection = projection
+        self.mask = mask
+
+    def forward(self, F, y):
+        y1 = y * self.mask
+        F_y1 = torch.cat([F, self.projection(y[..., self.mask.squeeze().bool()])], dim=-1)
+        st = self.map_st(F_y1)
+        s, t = torch.split(st, split_size_or_sections=1, dim=-1)
+        s = torch.clamp(s, min=-8, max=8)
+        x = y1 + (1 - self.mask) * ((y - t) * torch.exp(-s))
+        ldj = (-s).sum(-1)
+
+        return x, ldj
+
+    def inverse(self, F, x):
+        x1 = x * self.mask
+
+        F_x1 = torch.cat([F, self.projection(x[..., self.mask.squeeze().bool()])], dim=-1)
+        st = self.map_st(F_x1)
+        s, t = torch.split(st, split_size_or_sections=1, dim=-1)
+        s = torch.clamp(s, min=-8, max=8)
+        y = x1 + (1 - self.mask) * (x * torch.exp(s) + t)
+        ldj = s.sum(-1)
+
+        return y, ldj
+
+
+class MLP(nn.Module):
+    def __init__(self, c_in, c_out, c_hiddens, act=nn.LeakyReLU, bn=nn.BatchNorm1d):
+        super().__init__()
+        layers = []
+        d_in = c_in
+        for d_out in c_hiddens:
+            layers.append(nn.Linear(d_in, d_out))
+            if bn is not None:
+                layers.append(bn(d_out))
+            layers.append(act())
+            d_in = d_out
+        layers.append(nn.Linear(d_in, c_out))
+        self.mlp = nn.Sequential(*layers)
+        self.c_out = c_out
+
+    def forward(self, x):
+        # x: B,...,C_in
+        input_shape = x.shape
+        C = input_shape[-1]
+        _x = x.reshape(-1, C)  # X, C_in
+        y = self.mlp(_x)  # X, C_out
+        y = y.reshape(*input_shape[:-1], self.c_out)
+        return y
+
+
+def apply_homography_xy1(mat, xy1):
+    """
+    :param mat (*, 3, 3) (# * dims must match uv dims)
+    :param xy1 (*, H, W, 3)
+    :returns warped coordinates (*, H, W, 2)
+    """
+    out_h = torch.matmul(mat, xy1[..., None])
+    return out_h[..., :2, 0] / (out_h[..., 2:, 0] + 1e-8)
+
+
+def apply_homography(mat, uv):
+    """
+    :param mat (*, 3, 3) (# * dims must match uv dims)
+    :param uv (*, H, W, 2)
+    :returns warped coordinates (*, H, W, 2)
+    """
+    uv_h = torch.cat([uv, torch.ones_like(uv[..., :1])], dim=-1)  # (..., 3)
+    return apply_homography_xy1(mat, uv_h)
+
+
+class NVPSimplified(nn.Module):
+    def __init__(
+            self,
+            n_layers,
+            feature_dims,
+            hidden_size,
+            proj_dims,
+            code_proj_hidden_size=[],
+            proj_type="simple",
+            pe_freq=4,
+            normalization=True,
+            affine=False,
+            activation=nn.LeakyReLU,
+            device='cuda',
+    ):
+        super().__init__()
+        self._checkpoint = False
+        self.affine = affine
+
+        # make layers
+        input_dims = 3
+        normalization = nn.BatchNorm1d if normalization else None
+
+        self.layers1 = nn.ModuleList()
+        self.layers2 = nn.ModuleList()
+        self.code_projectors = nn.ModuleList()
+        self.layer_idx = [i for i in range(n_layers)]
+
+        i = 0
+        mask_selection = []
+        while i < n_layers:
+            mask_selection.append(torch.randperm(input_dims))
+            i += input_dims
+        mask_selection = torch.cat(mask_selection)
+
+        if isinstance(hidden_size, int):
+            hidden_size = [hidden_size]
+
+        for i in self.layer_idx:
+            # get mask
+            mask2 = torch.zeros(input_dims, device=device)
+            mask2[mask_selection[i]] = 1
+            mask1 = 1 - mask2
+
+            # get transformation
+            map_st = nn.Sequential(
+                MLP(
+                    proj_dims + feature_dims,
+                    2,
+                    hidden_size,
+                    bn=normalization,
+                    act=activation,
+                    )
+            )
+
+            proj = get_projection_layer(proj_dims=proj_dims, type=proj_type, pe_freq=pe_freq)
+            self.layers1.append(CouplingLayer(map_st, proj, mask1[None, None, None]))
+
+            # get code projector
+            if len(code_proj_hidden_size) == 0:
+                code_proj_hidden_size = [feature_dims]
+            self.code_projectors.append(
+                MLP(
+                    feature_dims,
+                    feature_dims,
+                    code_proj_hidden_size,
+                    bn=normalization,
+                    act=activation,
+                )
+            )
+
+        if self.affine:
+            # this mlp takes time and depth as input and produce an affine transformation for x and y
+            self.affine_mlp = networks.pe_relu.MLP(input_dim=2,
+                                                   hidden_size=256,
+                                                   n_layers=2,
+                                                   skip_layers=[],
+                                                   use_pe=True,
+                                                   pe_dims=[1],
+                                                   pe_freq=pe_freq,
+                                                   output_dim=5).to(device)
+
+    def _expand_features(self, F, x):
+        _, N, K, _ = x.shape
+        return F[:, None, None, :].expand(-1, N, K, -1)
+
+    def _call(self, func, *args, **kwargs):
+        if self._checkpoint:
+            return checkpoint(func, *args, **kwargs)
+        else:
+            return func(*args, **kwargs)
+
+    def invert_affine(self, a, b, c, d, tx, ty, zeros, ones):
+        determinant = a * d - b * c
+
+        inverse_determinant = 1.0 / determinant
+
+        inverted_a = d * inverse_determinant
+        inverted_b = -b * inverse_determinant
+        inverted_c = -c * inverse_determinant
+        inverted_d = a * inverse_determinant
+        inverted_tx = (b * ty - d * tx) * inverse_determinant
+        inverted_ty = (c * tx - a * ty) * inverse_determinant
+
+        return torch.cat([inverted_a, inverted_b, inverted_tx,
+                          inverted_c, inverted_d, inverted_ty,
+                          zeros, zeros, ones], dim=-1).reshape(*a.shape[:-1], 3, 3)
+
+    def get_affine(self, theta, inverse=False):
+        """
+        expands the 5 parameters into 3x3 affine transformation matrix
+        :param theta (..., 5)
+        :returns mat (..., 3, 3)
+        """
+        angle = theta[..., 0:1]
+        scale1 = torch.exp(theta[..., 1:2])
+        scale2 = torch.exp(theta[..., 3:4])
+        cos = torch.cos(angle)
+        sin = torch.sin(angle)
+        a = cos * scale1
+        b = -sin * scale1
+        c = sin * scale2
+        d = cos * scale2
+        tx = theta[..., 2:3]
+        ty = theta[..., 4:5]
+        zeros = torch.zeros_like(a)
+        ones = torch.ones_like(a)
+        if inverse:
+            return self.invert_affine(a, b, c, d, tx, ty, zeros, ones)
+        else:
+            return torch.cat([a, b, tx, c, d, ty, zeros, zeros, ones], dim=-1).reshape(*theta.shape[:-1], 3, 3)
+
+    def _affine_input(self, t, x, inverse=False):
+        depth = x[..., -1]  # [n_imgs, n_pts, n_samples]
+        net_in = torch.stack([t[..., None].repeat(1, *x.shape[1:3]), depth], dim=-1)
+        affine = self.get_affine(self.affine_mlp(net_in), inverse=inverse)  # [n_imgs, n_pts, n_samples, 3, 3]
+        xy = x[..., :2]
+        xy = apply_homography(affine, xy)
+        x = torch.cat([xy, depth[..., None]], dim=-1)
+        return x
+
+    def forward(self, t, feat, x):
+        y = x
+        if self.affine:
+            y = self._affine_input(t, y)
+        for i in self.layer_idx:
+            feat_i = self.code_projectors[i](feat)
+            feat_i = self._expand_features(feat_i, y)
+            l1 = self.layers1[i]
+            y, _ = self._call(l1, feat_i, y)
+        return y
+
+    def inverse(self, t, feat, y):
+        x = y
+        for i in reversed(self.layer_idx):
+            feat_i = self.code_projectors[i](feat)
+            feat_i = self._expand_features(feat_i, x)
+            l1 = self.layers1[i]
+            x, _ = self._call(l1.inverse, feat_i, x)
+        if self.affine:
+            x = self._affine_input(t, x, inverse=True)
+        return x
+
+
+class BaseProjectionLayer(nn.Module):
+    @property
+    def proj_dims(self):
+        raise NotImplementedError()
+
+    def forward(self, x):
+        raise NotImplementedError()
+
+
+class IdentityProjection(BaseProjectionLayer):
+    def __init__(self, input_dims):
+        super().__init__()
+        self._input_dims = input_dims
+
+    @property
+    def proj_dims(self):
+        return self._input_dims
+
+    def forward(self, x):
+        return x
+
+
+class ProjectionLayer(BaseProjectionLayer):
+    def __init__(self, input_dims, proj_dims):
+        super().__init__()
+        self._proj_dims = proj_dims
+
+        self.proj = nn.Sequential(
+            nn.Linear(input_dims, 2 * proj_dims), nn.ReLU(), nn.Linear(2 * proj_dims, proj_dims)
+        )
+
+    @property
+    def proj_dims(self):
+        return self._proj_dims
+
+    def forward(self, x):
+        return self.proj(x)
+
+
+class FixedPositionalEncoding(ProjectionLayer):
+    def __init__(self, input_dims, frequency, proj_dims):
+        super().__init__(input_dims, proj_dims)
+        ll = frequency
+        self.sigma = np.pi * torch.pow(2, torch.linspace(0, ll - 1, ll, device='cuda')).view(1, -1)
+        self.proj = nn.Sequential(
+            nn.Linear(input_dims + input_dims * ll * 2, proj_dims), nn.LeakyReLU()
+        )
+
+    @property
+    def proj_dims(self):
+        return self._proj_dims * 3
+
+    def forward(self, x):
+        encoded = torch.cat(
+            [
+                torch.sin(x[:, :, :, :, None] * self.sigma[None, None, None]),
+                torch.cos(x[:, :, :, :, None] * self.sigma[None, None, None]),
+            ],
+            dim=-1,
+        ).view(x.shape[0], x.shape[1], x.shape[2], -1)
+        x = torch.cat([x, encoded], dim=-1)
+        return self.proj(x)
+
+
+class GaussianRandomFourierFeatures(ProjectionLayer):
+    def __init__(self, input_dims, proj_dims, gamma=1.0):
+        super().__init__(input_dims, proj_dims)
+        self._two_pi = 2 * np.pi
+        self._gamma = gamma
+        ll = proj_dims // 2
+        self.register_buffer("B", torch.randn(3, ll))
+
+    def forward(self, x):
+        xB = x.matmul(self.B * self._two_pi * self._gamma)
+        return torch.cat([torch.cos(xB), torch.sin(xB)], dim=-1)
+
+
+class GaborLayer(nn.Module):
+    def __init__(self, input_dims, proj_dims, alpha=1., beta=1.0, weight_scale=128):
+        super().__init__()
+        self.linear = nn.Linear(input_dims, proj_dims)
+        self.mu = nn.Parameter(2 * torch.rand(proj_dims, input_dims) - 1)
+        self.gamma = nn.Parameter(
+            torch.distributions.gamma.Gamma(alpha, beta).sample((proj_dims,))
+        )
+        self.linear.weight.data *= weight_scale * torch.sqrt(self.gamma[:, None])
+        self.linear.bias.data.uniform_(-np.pi, np.pi)
+        self.linear2 = nn.Linear(input_dims, proj_dims)
+        self.linear2.weight.data.uniform_(
+            -np.sqrt(weight_scale / proj_dims),
+            np.sqrt(weight_scale / proj_dims)
+        )
+
+    def forward(self, x):
+        D = (
+                (x ** 2).sum(-1)[..., None]
+                + (self.mu ** 2).sum(-1)[None, :]
+                - 2 * x @ self.mu.T
+        )
+        return torch.sin(self.linear(x)) * torch.exp(-0.5 * D * self.gamma[None, :]) * self.linear2(x)
+
+
+def get_projection_layer(**kwargs):
+    type = kwargs["type"]
+
+    if type == "identity":
+        return IdentityProjection(3)
+    elif type == "simple":
+        return ProjectionLayer(2, kwargs.get("proj_dims", 128))
+    elif type == "fixed_positional_encoding":
+        return FixedPositionalEncoding(2, kwargs.get("pe_freq", 4), kwargs.get("proj_dims", 128))
+    elif type == "gaussianrff":
+        return GaussianRandomFourierFeatures(
+            3, kwargs.get("proj_dims", 10), kwargs.get("gamma", 1.0)
+        )
+    elif type == 'gabor':
+        return GaborLayer(3, kwargs.get("proj_dims", 128))
--- a/networks/pe_relu.py
+++ b/networks/pe_relu.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+
+def positionalEncoding_vec(in_tensor, b):
+    original_shape = in_tensor.shape
+    in_tensor_flatten = in_tensor.reshape(torch.prod(torch.tensor(original_shape[:-1])), -1)
+    proj = torch.einsum('ij, k -> ijk', in_tensor_flatten, b)  # shape (batch, in_tensor.size(1), freqNum)
+    mapped_coords = torch.cat((torch.sin(proj), torch.cos(proj)), dim=1)  # shape (batch, 2*in_tensor.size(1), freqNum)
+    output = mapped_coords.transpose(2, 1).contiguous().view(mapped_coords.size(0), -1)
+    output = output.reshape(original_shape[:-1] + (-1,))
+    return output
+
+
+class MLPf(nn.Module):
+    def __init__(self,
+                 input_dim,
+                 output_dim,
+                 hidden_dim=256,
+                 skip_layers=[4, 6],
+                 num_layers=8,
+                 use_pe=False,
+                 pe_freq=10,
+                 device='cuda',
+                 ):
+        super(MLPf, self).__init__()
+        if use_pe:
+            encoding_dimensions = 2 * 2 * pe_freq + input_dim  # only encode the pixel locations not latent codes
+            self.b = torch.tensor([(2 ** j) * np.pi for j in range(pe_freq)], requires_grad=False).to(device)
+        else:
+            encoding_dimensions = input_dim
+
+        self.hidden = nn.ModuleList()
+        for i in range(num_layers):
+            if i == 0:
+                input_dims = encoding_dimensions
+            elif i in skip_layers:
+                input_dims = hidden_dim + encoding_dimensions
+            else:
+                input_dims = hidden_dim
+
+            if i == num_layers - 1:
+                # last layer
+                self.hidden.append(nn.Linear(input_dims, output_dim, bias=True))
+            else:
+                self.hidden.append(nn.Linear(input_dims, hidden_dim, bias=True))
+
+        self.skip_layers = skip_layers
+        self.num_layers = num_layers
+        self.use_pe = use_pe
+        self.pe_freq = pe_freq
+
+    def forward(self, x):
+        if self.use_pe:
+            coord = x[..., :2]
+            pos = positionalEncoding_vec(coord, self.b)
+            x = torch.cat([pos, x], dim=-1)
+
+        input = x
+        for i, layer in enumerate(self.hidden):
+            if i > 0:
+                x = F.relu(x)
+            if i in self.skip_layers:
+                x = torch.cat((x, input), -1)
+            x = layer(x)
+        return x
+
+
+class MLPb(nn.Module):
+    def __init__(self,
+                 input_dim,
+                 output_dim=3,
+                 hidden_dim=256,
+                 skip_layers=[4, 6],
+                 num_layers=8,
+                 use_pe=False,
+                 pe_freq=10,
+                 device='cuda',
+                 ):
+        super(MLPb, self).__init__()
+        if use_pe:
+            encoding_dimensions = 2 * input_dim * pe_freq
+            self.b = torch.tensor([(2 ** j) * np.pi for j in range(pe_freq)], requires_grad=False).to(device)
+        else:
+            encoding_dimensions = input_dim
+
+        self.hidden = nn.ModuleList()
+        for i in range(num_layers):
+            if i == 0:
+                input_dims = encoding_dimensions
+            elif i in skip_layers:
+                input_dims = hidden_dim + encoding_dimensions
+            else:
+                input_dims = hidden_dim
+
+            if i == num_layers - 1:
+                # last layer
+                self.hidden.append(nn.Linear(input_dims, output_dim, bias=True))
+            else:
+                self.hidden.append(nn.Linear(input_dims, hidden_dim, bias=True))
+
+        self.skip_layers = skip_layers
+        self.num_layers = num_layers
+        self.use_pe = use_pe
+        self.pe_freq = pe_freq
+
+    def forward(self, x):
+        if self.use_pe:
+            pos = positionalEncoding_vec(x, self.b)
+            x = pos
+
+        input = x
+        for i, layer in enumerate(self.hidden):
+            if i > 0:
+                x = F.relu(x)
+            if i in self.skip_layers:
+                x = torch.cat((x, input), -1)
+            x = layer(x)
+        return x
+
+
+class GaussianActivation(nn.Module):
+    def __init__(self, a=1., trainable=True):
+        super().__init__()
+        self.register_parameter('a', nn.Parameter(a*torch.ones(1), trainable))
+
+    def forward(self, x):
+        return torch.exp(-x**2/(2*self.a**2))
+
+
+class MLP(nn.Module):
+    def __init__(self,
+                 input_dim,
+                 output_dim,
+                 hidden_dim=256,
+                 skip_layers=[4],
+                 num_layers=8,
+                 act='relu',
+                 use_pe=False,
+                 pe_freq=10,
+                 pe_dims=None,
+                 device='cuda',
+                 act_trainable=False,
+                 **kwargs):
+        super(MLP, self).__init__()
+        self.pe_dims = pe_dims
+        if use_pe:
+            if pe_dims == None:
+                encoding_dimensions = 2 * input_dim * pe_freq + input_dim
+            else:
+                encoding_dimensions = 2 * len(pe_dims) * pe_freq + input_dim
+            self.b = torch.tensor([(2 ** j) * np.pi for j in range(pe_freq)], requires_grad=False).to(device)
+        else:
+            encoding_dimensions = input_dim
+
+        self.hidden = nn.ModuleList()
+        for i in range(num_layers):
+            if i == 0:
+                input_dims = encoding_dimensions
+            elif i in skip_layers:
+                input_dims = hidden_dim + encoding_dimensions
+            else:
+                input_dims = hidden_dim
+
+            if act == 'relu':
+                act_ = nn.ReLU(True)
+            elif act == 'elu':
+                act_ = nn.ELU(True)
+            elif act == 'leakyrelu':
+                act_ = nn.LeakyReLU(True)
+            elif act == 'gaussian':
+                act_ = GaussianActivation(a=kwargs['a'], trainable=act_trainable)
+            else:
+                raise Exception('unknown activation function!')
+
+            if i == num_layers - 1:
+                # last layer
+                self.hidden.append(nn.Linear(input_dims, output_dim, bias=True))
+            else:
+                self.hidden.append(nn.Sequential(nn.Linear(input_dims, hidden_dim, bias=True), act_))
+
+        self.skip_layers = skip_layers
+        self.num_layers = num_layers
+        self.use_pe = use_pe
+        self.pe_freq = pe_freq
+
+    def forward(self, x):
+        if self.use_pe:
+            coord = x[..., self.pe_dims] if self.pe_dims is not None else x
+            pos = positionalEncoding_vec(coord, self.b)
+            x = torch.cat([pos, x], dim=-1)
+
+        input = x
+        for i, layer in enumerate(self.hidden):
+            if i in self.skip_layers:
+                x = torch.cat((x, input), -1)
+            x = layer(x)
+        return x
\ No newline at end of file
--- a/RAFT @ 3fa0bb0a
+++ b/RAFT @ 3fa0bb0a
+Subproject commit 3fa0bb0a9c633ea0a9bb8a79c576b6785d4e6a02
--- a/preprocessing/README.md
+++ b/preprocessing/README.md
+# Data processing
+
+This README file contains instructions to compute and process RAFT optical flows for optimizing OmniMotion.
+
+## Data format
+The input video data should be organized in the following format:
+```
+├──sequence_name/
+    ├──color/
+        ├──00000.jpg
+        ├──00001.jpg
+        .....
+    ├──mask/ (optional; only used for visualization purposes)
+        ├──00000.png
+        ├──00001.png
+        ..... 
+```
+If you want to run on [DAVIS](https://davischallenge.org/index.html) video sequences, you can run `python get_davis.py <out_dir>` 
+which will download the original dataset and organize it in our format for processing. Alternatively, you can 
+download some of our processed sequences [here](https://omnimotion.cs.cornell.edu/dataset/) to skip processing and directly start training.
+
+If you want to train on your own video sequence, we recommend you to start with
+shorter sequences (< 60 frames) and lower resolution (<= 480p) to manage computational cost. 
+You may use `ffmpeg` to extract frames from the video.
+
+
+## Preparation
+The command below moves files to the correct locations and download pretrained models (this only needs to be run once).
+```
+cd preprocessing/  
+
+mv exhaustive_raft.py filter_raft.py chain_raft.py RAFT/;
+cd RAFT; ./download_models.sh; cd ../
+
+mv extract_dino_features.py dino/
+```
+
+## Computing and processing flow
+
+Run the following command to process the input video sequence. Please use absolute path for the sequence directory.
+```
+conda activate omnimotion
+python main_processing.py --data_dir <sequence directory> --chain
+```
+The processing contains several steps:
+- computing all pairwise optical flows using `exhaustive_raft.py`
+- computing dino features for each frame using `extract_dino_features.py`
+- filtering flows using cycle consistency and appearance consistency check using`filter_raft.py`
+- (optional) chaining only cycle consistent flows to create denser correspondences using `chain_raft.py`. 
+  We found this to be helpful for handling sequences with rapid motion and large displacements. 
+  For simple motion, this may be skipped by omitting `--chain` to save processing time. 
+
+After processing the folder should look like the following:
+```
+├──sequence_name/
+    ├──color/
+    ├──mask/ (optional; only used for visualization purposes)
+    ├──count_maps/
+    ├──features/
+    ├──raft_exhaustive/
+    ├──raft_masks/
+    ├──flow_stats.json
+```
+
+## Discussion
+This processing pipeline is designed to filter and process RAFT optical flow for training our method. 
+Our method can also take as input correspondences from other methods, e.g., [TAPIR](https://deepmind-tapir.github.io/) and
+[CoTracker](https://co-tracker.github.io/). 
+If you want to use different correspondences as input supervision, note that their error patterns might be different from
+those of RAFT optical flow, and you may need to devise new filtering methods that are effective for the specific correspondences
+you are working with.
--- a/dino @ 7c446df5
+++ b/dino @ 7c446df5
+Subproject commit 7c446df5b9f45747937fb0d72314eb9f7b66930a
--- a/requirements.txt
+++ b/requirements.txt
+matplotlib 
+tensorboard 
+scipy 
+opencv-python 
+tqdm 
+tensorboardX 
+onfigargparse 
+ipdb 
+kornia 
+imageio
+imageio-ffmpeg
\ No newline at end of file