add preprocessing

3d92aebb · bailuo · fcc0bcf3 · 3d92aebb · 3d92aebb · 3d92aebb
Commit 3d92aebb authored Jul 16, 2024 by bailuo
20 changed files
--- a/preprocessing/RAFT/core/utils/__pycache__/__init__.cpython-310.pyc
+++ b/preprocessing/RAFT/core/utils/__pycache__/__init__.cpython-310.pyc
--- a/preprocessing/RAFT/core/utils/__pycache__/utils.cpython-310.pyc
+++ b/preprocessing/RAFT/core/utils/__pycache__/utils.cpython-310.pyc
--- a/preprocessing/RAFT/core/utils/augmentor.py
+++ b/preprocessing/RAFT/core/utils/augmentor.py
+import numpy as np
+import random
+import math
+from PIL import Image
+
+import cv2
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+
+import torch
+from torchvision.transforms import ColorJitter
+import torch.nn.functional as F
+
+
+class FlowAugmentor:
+    def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True):
+        
+        # spatial augmentation params
+        self.crop_size = crop_size
+        self.min_scale = min_scale
+        self.max_scale = max_scale
+        self.spatial_aug_prob = 0.8
+        self.stretch_prob = 0.8
+        self.max_stretch = 0.2
+
+        # flip augmentation params
+        self.do_flip = do_flip
+        self.h_flip_prob = 0.5
+        self.v_flip_prob = 0.1
+
+        # photometric augmentation params
+        self.photo_aug = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5/3.14)
+        self.asymmetric_color_aug_prob = 0.2
+        self.eraser_aug_prob = 0.5
+
+    def color_transform(self, img1, img2):
+        """ Photometric augmentation """
+
+        # asymmetric
+        if np.random.rand() < self.asymmetric_color_aug_prob:
+            img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
+            img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
+
+        # symmetric
+        else:
+            image_stack = np.concatenate([img1, img2], axis=0)
+            image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
+            img1, img2 = np.split(image_stack, 2, axis=0)
+
+        return img1, img2
+
+    def eraser_transform(self, img1, img2, bounds=[50, 100]):
+        """ Occlusion augmentation """
+
+        ht, wd = img1.shape[:2]
+        if np.random.rand() < self.eraser_aug_prob:
+            mean_color = np.mean(img2.reshape(-1, 3), axis=0)
+            for _ in range(np.random.randint(1, 3)):
+                x0 = np.random.randint(0, wd)
+                y0 = np.random.randint(0, ht)
+                dx = np.random.randint(bounds[0], bounds[1])
+                dy = np.random.randint(bounds[0], bounds[1])
+                img2[y0:y0+dy, x0:x0+dx, :] = mean_color
+
+        return img1, img2
+
+    def spatial_transform(self, img1, img2, flow):
+        # randomly sample scale
+        ht, wd = img1.shape[:2]
+        min_scale = np.maximum(
+            (self.crop_size[0] + 8) / float(ht), 
+            (self.crop_size[1] + 8) / float(wd))
+
+        scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
+        scale_x = scale
+        scale_y = scale
+        if np.random.rand() < self.stretch_prob:
+            scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
+            scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
+        
+        scale_x = np.clip(scale_x, min_scale, None)
+        scale_y = np.clip(scale_y, min_scale, None)
+
+        if np.random.rand() < self.spatial_aug_prob:
+            # rescale the images
+            img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow = flow * [scale_x, scale_y]
+
+        if self.do_flip:
+            if np.random.rand() < self.h_flip_prob: # h-flip
+                img1 = img1[:, ::-1]
+                img2 = img2[:, ::-1]
+                flow = flow[:, ::-1] * [-1.0, 1.0]
+
+            if np.random.rand() < self.v_flip_prob: # v-flip
+                img1 = img1[::-1, :]
+                img2 = img2[::-1, :]
+                flow = flow[::-1, :] * [1.0, -1.0]
+
+        y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0])
+        x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1])
+        
+        img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+
+        return img1, img2, flow
+
+    def __call__(self, img1, img2, flow):
+        img1, img2 = self.color_transform(img1, img2)
+        img1, img2 = self.eraser_transform(img1, img2)
+        img1, img2, flow = self.spatial_transform(img1, img2, flow)
+
+        img1 = np.ascontiguousarray(img1)
+        img2 = np.ascontiguousarray(img2)
+        flow = np.ascontiguousarray(flow)
+
+        return img1, img2, flow
+
+class SparseFlowAugmentor:
+    def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False):
+        # spatial augmentation params
+        self.crop_size = crop_size
+        self.min_scale = min_scale
+        self.max_scale = max_scale
+        self.spatial_aug_prob = 0.8
+        self.stretch_prob = 0.8
+        self.max_stretch = 0.2
+
+        # flip augmentation params
+        self.do_flip = do_flip
+        self.h_flip_prob = 0.5
+        self.v_flip_prob = 0.1
+
+        # photometric augmentation params
+        self.photo_aug = ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3/3.14)
+        self.asymmetric_color_aug_prob = 0.2
+        self.eraser_aug_prob = 0.5
+        
+    def color_transform(self, img1, img2):
+        image_stack = np.concatenate([img1, img2], axis=0)
+        image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
+        img1, img2 = np.split(image_stack, 2, axis=0)
+        return img1, img2
+
+    def eraser_transform(self, img1, img2):
+        ht, wd = img1.shape[:2]
+        if np.random.rand() < self.eraser_aug_prob:
+            mean_color = np.mean(img2.reshape(-1, 3), axis=0)
+            for _ in range(np.random.randint(1, 3)):
+                x0 = np.random.randint(0, wd)
+                y0 = np.random.randint(0, ht)
+                dx = np.random.randint(50, 100)
+                dy = np.random.randint(50, 100)
+                img2[y0:y0+dy, x0:x0+dx, :] = mean_color
+
+        return img1, img2
+
+    def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
+        ht, wd = flow.shape[:2]
+        coords = np.meshgrid(np.arange(wd), np.arange(ht))
+        coords = np.stack(coords, axis=-1)
+
+        coords = coords.reshape(-1, 2).astype(np.float32)
+        flow = flow.reshape(-1, 2).astype(np.float32)
+        valid = valid.reshape(-1).astype(np.float32)
+
+        coords0 = coords[valid>=1]
+        flow0 = flow[valid>=1]
+
+        ht1 = int(round(ht * fy))
+        wd1 = int(round(wd * fx))
+
+        coords1 = coords0 * [fx, fy]
+        flow1 = flow0 * [fx, fy]
+
+        xx = np.round(coords1[:,0]).astype(np.int32)
+        yy = np.round(coords1[:,1]).astype(np.int32)
+
+        v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
+        xx = xx[v]
+        yy = yy[v]
+        flow1 = flow1[v]
+
+        flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
+        valid_img = np.zeros([ht1, wd1], dtype=np.int32)
+
+        flow_img[yy, xx] = flow1
+        valid_img[yy, xx] = 1
+
+        return flow_img, valid_img
+
+    def spatial_transform(self, img1, img2, flow, valid):
+        # randomly sample scale
+
+        ht, wd = img1.shape[:2]
+        min_scale = np.maximum(
+            (self.crop_size[0] + 1) / float(ht), 
+            (self.crop_size[1] + 1) / float(wd))
+
+        scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
+        scale_x = np.clip(scale, min_scale, None)
+        scale_y = np.clip(scale, min_scale, None)
+
+        if np.random.rand() < self.spatial_aug_prob:
+            # rescale the images
+            img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow, valid = self.resize_sparse_flow_map(flow, valid, fx=scale_x, fy=scale_y)
+
+        if self.do_flip:
+            if np.random.rand() < 0.5: # h-flip
+                img1 = img1[:, ::-1]
+                img2 = img2[:, ::-1]
+                flow = flow[:, ::-1] * [-1.0, 1.0]
+                valid = valid[:, ::-1]
+
+        margin_y = 20
+        margin_x = 50
+
+        y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
+        x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x)
+
+        y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
+        x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
+
+        img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        return img1, img2, flow, valid
+
+
+    def __call__(self, img1, img2, flow, valid):
+        img1, img2 = self.color_transform(img1, img2)
+        img1, img2 = self.eraser_transform(img1, img2)
+        img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid)
+
+        img1 = np.ascontiguousarray(img1)
+        img2 = np.ascontiguousarray(img2)
+        flow = np.ascontiguousarray(flow)
+        valid = np.ascontiguousarray(valid)
+
+        return img1, img2, flow, valid
--- a/preprocessing/RAFT/core/utils/flow_viz.py
+++ b/preprocessing/RAFT/core/utils/flow_viz.py
+# Flow visualization code used from https://github.com/tomrunia/OpticalFlow_Visualization
+
+
+# MIT License
+#
+# Copyright (c) 2018 Tom Runia
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to conditions.
+#
+# Author: Tom Runia
+# Date Created: 2018-08-03
+
+import numpy as np
+
+def make_colorwheel():
+    """
+    Generates a color wheel for optical flow visualization as presented in:
+        Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
+        URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
+
+    Code follows the original C++ source code of Daniel Scharstein.
+    Code follows the the Matlab source code of Deqing Sun.
+
+    Returns:
+        np.ndarray: Color wheel
+    """
+
+    RY = 15
+    YG = 6
+    GC = 4
+    CB = 11
+    BM = 13
+    MR = 6
+
+    ncols = RY + YG + GC + CB + BM + MR
+    colorwheel = np.zeros((ncols, 3))
+    col = 0
+
+    # RY
+    colorwheel[0:RY, 0] = 255
+    colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY)
+    col = col+RY
+    # YG
+    colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG)
+    colorwheel[col:col+YG, 1] = 255
+    col = col+YG
+    # GC
+    colorwheel[col:col+GC, 1] = 255
+    colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC)
+    col = col+GC
+    # CB
+    colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB)
+    colorwheel[col:col+CB, 2] = 255
+    col = col+CB
+    # BM
+    colorwheel[col:col+BM, 2] = 255
+    colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM)
+    col = col+BM
+    # MR
+    colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR)
+    colorwheel[col:col+MR, 0] = 255
+    return colorwheel
+
+
+def flow_uv_to_colors(u, v, convert_to_bgr=False):
+    """
+    Applies the flow color wheel to (possibly clipped) flow components u and v.
+
+    According to the C++ source code of Daniel Scharstein
+    According to the Matlab source code of Deqing Sun
+
+    Args:
+        u (np.ndarray): Input horizontal flow of shape [H,W]
+        v (np.ndarray): Input vertical flow of shape [H,W]
+        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
+
+    Returns:
+        np.ndarray: Flow visualization image of shape [H,W,3]
+    """
+    flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
+    colorwheel = make_colorwheel()  # shape [55x3]
+    ncols = colorwheel.shape[0]
+    rad = np.sqrt(np.square(u) + np.square(v))
+    a = np.arctan2(-v, -u)/np.pi
+    fk = (a+1) / 2*(ncols-1)
+    k0 = np.floor(fk).astype(np.int32)
+    k1 = k0 + 1
+    k1[k1 == ncols] = 0
+    f = fk - k0
+    for i in range(colorwheel.shape[1]):
+        tmp = colorwheel[:,i]
+        col0 = tmp[k0] / 255.0
+        col1 = tmp[k1] / 255.0
+        col = (1-f)*col0 + f*col1
+        idx = (rad <= 1)
+        col[idx]  = 1 - rad[idx] * (1-col[idx])
+        col[~idx] = col[~idx] * 0.75   # out of range
+        # Note the 2-i => BGR instead of RGB
+        ch_idx = 2-i if convert_to_bgr else i
+        flow_image[:,:,ch_idx] = np.floor(255 * col)
+    return flow_image
+
+
+def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
+    """
+    Expects a two dimensional flow image of shape.
+
+    Args:
+        flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
+        clip_flow (float, optional): Clip maximum of flow values. Defaults to None.
+        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
+
+    Returns:
+        np.ndarray: Flow visualization image of shape [H,W,3]
+    """
+    assert flow_uv.ndim == 3, 'input flow must have three dimensions'
+    assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]'
+    if clip_flow is not None:
+        flow_uv = np.clip(flow_uv, 0, clip_flow)
+    u = flow_uv[:,:,0]
+    v = flow_uv[:,:,1]
+    rad = np.sqrt(np.square(u) + np.square(v))
+    rad_max = np.max(rad)
+    epsilon = 1e-5
+    u = u / (rad_max + epsilon)
+    v = v / (rad_max + epsilon)
+    return flow_uv_to_colors(u, v, convert_to_bgr)
\ No newline at end of file
--- a/preprocessing/RAFT/core/utils/frame_utils.py
+++ b/preprocessing/RAFT/core/utils/frame_utils.py
+import numpy as np
+from PIL import Image
+from os.path import *
+import re
+
+import cv2
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+
+TAG_CHAR = np.array([202021.25], np.float32)
+
+def readFlow(fn):
+    """ Read .flo file in Middlebury format"""
+    # Code adapted from:
+    # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
+
+    # WARNING: this will work on little-endian architectures (eg Intel x86) only!
+    # print 'fn = %s'%(fn)
+    with open(fn, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)
+        if 202021.25 != magic:
+            print('Magic number incorrect. Invalid .flo file')
+            return None
+        else:
+            w = np.fromfile(f, np.int32, count=1)
+            h = np.fromfile(f, np.int32, count=1)
+            # print 'Reading %d x %d flo file\n' % (w, h)
+            data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
+            # Reshape data into 3D array (columns, rows, bands)
+            # The reshape here is for visualization, the original code is (w,h,2)
+            return np.resize(data, (int(h), int(w), 2))
+
+def readPFM(file):
+    file = open(file, 'rb')
+
+    color = None
+    width = None
+    height = None
+    scale = None
+    endian = None
+
+    header = file.readline().rstrip()
+    if header == b'PF':
+        color = True
+    elif header == b'Pf':
+        color = False
+    else:
+        raise Exception('Not a PFM file.')
+
+    dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
+    if dim_match:
+        width, height = map(int, dim_match.groups())
+    else:
+        raise Exception('Malformed PFM header.')
+
+    scale = float(file.readline().rstrip())
+    if scale < 0: # little-endian
+        endian = '<'
+        scale = -scale
+    else:
+        endian = '>' # big-endian
+
+    data = np.fromfile(file, endian + 'f')
+    shape = (height, width, 3) if color else (height, width)
+
+    data = np.reshape(data, shape)
+    data = np.flipud(data)
+    return data
+
+def writeFlow(filename,uv,v=None):
+    """ Write optical flow to file.
+    
+    If v is None, uv is assumed to contain both u and v channels,
+    stacked in depth.
+    Original code by Deqing Sun, adapted from Daniel Scharstein.
+    """
+    nBands = 2
+
+    if v is None:
+        assert(uv.ndim == 3)
+        assert(uv.shape[2] == 2)
+        u = uv[:,:,0]
+        v = uv[:,:,1]
+    else:
+        u = uv
+
+    assert(u.shape == v.shape)
+    height,width = u.shape
+    f = open(filename,'wb')
+    # write the header
+    f.write(TAG_CHAR)
+    np.array(width).astype(np.int32).tofile(f)
+    np.array(height).astype(np.int32).tofile(f)
+    # arrange into matrix form
+    tmp = np.zeros((height, width*nBands))
+    tmp[:,np.arange(width)*2] = u
+    tmp[:,np.arange(width)*2 + 1] = v
+    tmp.astype(np.float32).tofile(f)
+    f.close()
+
+
+def readFlowKITTI(filename):
+    flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
+    flow = flow[:,:,::-1].astype(np.float32)
+    flow, valid = flow[:, :, :2], flow[:, :, 2]
+    flow = (flow - 2**15) / 64.0
+    return flow, valid
+
+def readDispKITTI(filename):
+    disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
+    valid = disp > 0.0
+    flow = np.stack([-disp, np.zeros_like(disp)], -1)
+    return flow, valid
+
+
+def writeFlowKITTI(filename, uv):
+    uv = 64.0 * uv + 2**15
+    valid = np.ones([uv.shape[0], uv.shape[1], 1])
+    uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
+    cv2.imwrite(filename, uv[..., ::-1])
+    
+
+def read_gen(file_name, pil=False):
+    ext = splitext(file_name)[-1]
+    if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
+        return Image.open(file_name)
+    elif ext == '.bin' or ext == '.raw':
+        return np.load(file_name)
+    elif ext == '.flo':
+        return readFlow(file_name).astype(np.float32)
+    elif ext == '.pfm':
+        flow = readPFM(file_name).astype(np.float32)
+        if len(flow.shape) == 2:
+            return flow
+        else:
+            return flow[:, :, :-1]
+    return []
\ No newline at end of file
--- a/preprocessing/RAFT/core/utils/utils.py
+++ b/preprocessing/RAFT/core/utils/utils.py
+import torch
+import torch.nn.functional as F
+import numpy as np
+from scipy import interpolate
+
+
+class InputPadder:
+    """ Pads images such that dimensions are divisible by 8 """
+    def __init__(self, dims, mode='sintel'):
+        self.ht, self.wd = dims[-2:]
+        pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
+        pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
+        if mode == 'sintel':
+            self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2]
+        else:
+            self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht]
+
+    def pad(self, *inputs):
+        return [F.pad(x, self._pad, mode='replicate') for x in inputs]
+
+    def unpad(self,x):
+        ht, wd = x.shape[-2:]
+        c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
+        return x[..., c[0]:c[1], c[2]:c[3]]
+
+def forward_interpolate(flow):
+    flow = flow.detach().cpu().numpy()
+    dx, dy = flow[0], flow[1]
+
+    ht, wd = dx.shape
+    x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
+
+    x1 = x0 + dx
+    y1 = y0 + dy
+    
+    x1 = x1.reshape(-1)
+    y1 = y1.reshape(-1)
+    dx = dx.reshape(-1)
+    dy = dy.reshape(-1)
+
+    valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
+    x1 = x1[valid]
+    y1 = y1[valid]
+    dx = dx[valid]
+    dy = dy[valid]
+
+    flow_x = interpolate.griddata(
+        (x1, y1), dx, (x0, y0), method='nearest', fill_value=0)
+
+    flow_y = interpolate.griddata(
+        (x1, y1), dy, (x0, y0), method='nearest', fill_value=0)
+
+    flow = np.stack([flow_x, flow_y], axis=0)
+    return torch.from_numpy(flow).float()
+
+
+def bilinear_sampler(img, coords, mode='bilinear', mask=False):
+    """ Wrapper for grid_sample, uses pixel coordinates """
+    H, W = img.shape[-2:]
+    xgrid, ygrid = coords.split([1,1], dim=-1)
+    xgrid = 2*xgrid/(W-1) - 1
+    ygrid = 2*ygrid/(H-1) - 1
+
+    grid = torch.cat([xgrid, ygrid], dim=-1)
+    img = F.grid_sample(img, grid, align_corners=True)
+
+    if mask:
+        mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
+        return img, mask.float()
+
+    return img
+
+
+def coords_grid(batch, ht, wd, device):
+    coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
+    coords = torch.stack(coords[::-1], dim=0).float()
+    return coords[None].repeat(batch, 1, 1, 1)
+
+
+def upflow8(flow, mode='bilinear'):
+    new_size = (8 * flow.shape[2], 8 * flow.shape[3])
+    return  8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)
--- a/preprocessing/RAFT/demo-frames/frame_0016.png
+++ b/preprocessing/RAFT/demo-frames/frame_0016.png
--- a/preprocessing/RAFT/demo-frames/frame_0017.png
+++ b/preprocessing/RAFT/demo-frames/frame_0017.png
--- a/preprocessing/RAFT/demo-frames/frame_0018.png
+++ b/preprocessing/RAFT/demo-frames/frame_0018.png
--- a/preprocessing/RAFT/demo-frames/frame_0019.png
+++ b/preprocessing/RAFT/demo-frames/frame_0019.png
--- a/preprocessing/RAFT/demo-frames/frame_0020.png
+++ b/preprocessing/RAFT/demo-frames/frame_0020.png
--- a/preprocessing/RAFT/demo-frames/frame_0021.png
+++ b/preprocessing/RAFT/demo-frames/frame_0021.png
--- a/preprocessing/RAFT/demo-frames/frame_0022.png
+++ b/preprocessing/RAFT/demo-frames/frame_0022.png
--- a/preprocessing/RAFT/demo-frames/frame_0023.png
+++ b/preprocessing/RAFT/demo-frames/frame_0023.png
--- a/preprocessing/RAFT/demo-frames/frame_0024.png
+++ b/preprocessing/RAFT/demo-frames/frame_0024.png
--- a/preprocessing/RAFT/demo-frames/frame_0025.png
+++ b/preprocessing/RAFT/demo-frames/frame_0025.png
--- a/preprocessing/RAFT/demo.py
+++ b/preprocessing/RAFT/demo.py
+import sys
+sys.path.append('core')
+
+import argparse
+import os
+import cv2
+import glob
+import numpy as np
+import torch
+from PIL import Image
+
+from raft import RAFT
+from utils import flow_viz
+from utils.utils import InputPadder
+
+
+
+DEVICE = 'cuda'
+
+def load_image(imfile):
+    img = np.array(Image.open(imfile)).astype(np.uint8)
+    img = torch.from_numpy(img).permute(2, 0, 1).float()
+    return img[None].to(DEVICE)
+
+
+def viz(img, flo):
+    img = img[0].permute(1,2,0).cpu().numpy()
+    flo = flo[0].permute(1,2,0).cpu().numpy()
+    
+    # map flow to rgb image
+    flo = flow_viz.flow_to_image(flo)
+    img_flo = np.concatenate([img, flo], axis=0)
+
+    # import matplotlib.pyplot as plt
+    # plt.imshow(img_flo / 255.0)
+    # plt.show()
+
+    cv2.imshow('image', img_flo[:, :, [2,1,0]]/255.0)
+    cv2.waitKey()
+
+
+def demo(args):
+    model = torch.nn.DataParallel(RAFT(args))
+    model.load_state_dict(torch.load(args.model))
+
+    model = model.module
+    model.to(DEVICE)
+    model.eval()
+
+    with torch.no_grad():
+        images = glob.glob(os.path.join(args.path, '*.png')) + \
+                 glob.glob(os.path.join(args.path, '*.jpg'))
+        
+        images = sorted(images)
+        for imfile1, imfile2 in zip(images[:-1], images[1:]):
+            image1 = load_image(imfile1)
+            image2 = load_image(imfile2)
+
+            padder = InputPadder(image1.shape)
+            image1, image2 = padder.pad(image1, image2)
+
+            flow_low, flow_up = model(image1, image2, iters=20, test_mode=True)
+            viz(image1, flow_up)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', help="restore checkpoint")
+    parser.add_argument('--path', help="dataset for evaluation")
+    parser.add_argument('--small', action='store_true', help='use small model')
+    parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+    parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
+    args = parser.parse_args()
+
+    demo(args)
--- a/preprocessing/RAFT/download_models.sh
+++ b/preprocessing/RAFT/download_models.sh
+#!/bin/bash
+wget https://dl.dropboxusercontent.com/s/4j4z58wuv8o0mfz/models.zip
+unzip models.zip
--- a/preprocessing/RAFT/evaluate.py
+++ b/preprocessing/RAFT/evaluate.py
+import sys
+sys.path.append('core')
+
+from PIL import Image
+import argparse
+import os
+import time
+import numpy as np
+import torch
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+
+import datasets
+from utils import flow_viz
+from utils import frame_utils
+
+from raft import RAFT
+from utils.utils import InputPadder, forward_interpolate
+
+
+@torch.no_grad()
+def create_sintel_submission(model, iters=32, warm_start=False, output_path='sintel_submission'):
+    """ Create submission for the Sintel leaderboard """
+    model.eval()
+    for dstype in ['clean', 'final']:
+        test_dataset = datasets.MpiSintel(split='test', aug_params=None, dstype=dstype)
+        
+        flow_prev, sequence_prev = None, None
+        for test_id in range(len(test_dataset)):
+            image1, image2, (sequence, frame) = test_dataset[test_id]
+            if sequence != sequence_prev:
+                flow_prev = None
+            
+            padder = InputPadder(image1.shape)
+            image1, image2 = padder.pad(image1[None].cuda(), image2[None].cuda())
+
+            flow_low, flow_pr = model(image1, image2, iters=iters, flow_init=flow_prev, test_mode=True)
+            flow = padder.unpad(flow_pr[0]).permute(1, 2, 0).cpu().numpy()
+
+            if warm_start:
+                flow_prev = forward_interpolate(flow_low[0])[None].cuda()
+            
+            output_dir = os.path.join(output_path, dstype, sequence)
+            output_file = os.path.join(output_dir, 'frame%04d.flo' % (frame+1))
+
+            if not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+
+            frame_utils.writeFlow(output_file, flow)
+            sequence_prev = sequence
+
+
+@torch.no_grad()
+def create_kitti_submission(model, iters=24, output_path='kitti_submission'):
+    """ Create submission for the Sintel leaderboard """
+    model.eval()
+    test_dataset = datasets.KITTI(split='testing', aug_params=None)
+
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    for test_id in range(len(test_dataset)):
+        image1, image2, (frame_id, ) = test_dataset[test_id]
+        padder = InputPadder(image1.shape, mode='kitti')
+        image1, image2 = padder.pad(image1[None].cuda(), image2[None].cuda())
+
+        _, flow_pr = model(image1, image2, iters=iters, test_mode=True)
+        flow = padder.unpad(flow_pr[0]).permute(1, 2, 0).cpu().numpy()
+
+        output_filename = os.path.join(output_path, frame_id)
+        frame_utils.writeFlowKITTI(output_filename, flow)
+
+
+@torch.no_grad()
+def validate_chairs(model, iters=24):
+    """ Perform evaluation on the FlyingChairs (test) split """
+    model.eval()
+    epe_list = []
+
+    val_dataset = datasets.FlyingChairs(split='validation')
+    for val_id in range(len(val_dataset)):
+        image1, image2, flow_gt, _ = val_dataset[val_id]
+        image1 = image1[None].cuda()
+        image2 = image2[None].cuda()
+
+        _, flow_pr = model(image1, image2, iters=iters, test_mode=True)
+        epe = torch.sum((flow_pr[0].cpu() - flow_gt)**2, dim=0).sqrt()
+        epe_list.append(epe.view(-1).numpy())
+
+    epe = np.mean(np.concatenate(epe_list))
+    print("Validation Chairs EPE: %f" % epe)
+    return {'chairs': epe}
+
+
+@torch.no_grad()
+def validate_sintel(model, iters=32):
+    """ Peform validation using the Sintel (train) split """
+    model.eval()
+    results = {}
+    for dstype in ['clean', 'final']:
+        val_dataset = datasets.MpiSintel(split='training', dstype=dstype)
+        epe_list = []
+
+        for val_id in range(len(val_dataset)):
+            image1, image2, flow_gt, _ = val_dataset[val_id]
+            image1 = image1[None].cuda()
+            image2 = image2[None].cuda()
+
+            padder = InputPadder(image1.shape)
+            image1, image2 = padder.pad(image1, image2)
+
+            flow_low, flow_pr = model(image1, image2, iters=iters, test_mode=True)
+            flow = padder.unpad(flow_pr[0]).cpu()
+
+            epe = torch.sum((flow - flow_gt)**2, dim=0).sqrt()
+            epe_list.append(epe.view(-1).numpy())
+
+        epe_all = np.concatenate(epe_list)
+        epe = np.mean(epe_all)
+        px1 = np.mean(epe_all<1)
+        px3 = np.mean(epe_all<3)
+        px5 = np.mean(epe_all<5)
+
+        print("Validation (%s) EPE: %f, 1px: %f, 3px: %f, 5px: %f" % (dstype, epe, px1, px3, px5))
+        results[dstype] = np.mean(epe_list)
+
+    return results
+
+
+@torch.no_grad()
+def validate_kitti(model, iters=24):
+    """ Peform validation using the KITTI-2015 (train) split """
+    model.eval()
+    val_dataset = datasets.KITTI(split='training')
+
+    out_list, epe_list = [], []
+    for val_id in range(len(val_dataset)):
+        image1, image2, flow_gt, valid_gt = val_dataset[val_id]
+        image1 = image1[None].cuda()
+        image2 = image2[None].cuda()
+
+        padder = InputPadder(image1.shape, mode='kitti')
+        image1, image2 = padder.pad(image1, image2)
+
+        flow_low, flow_pr = model(image1, image2, iters=iters, test_mode=True)
+        flow = padder.unpad(flow_pr[0]).cpu()
+
+        epe = torch.sum((flow - flow_gt)**2, dim=0).sqrt()
+        mag = torch.sum(flow_gt**2, dim=0).sqrt()
+
+        epe = epe.view(-1)
+        mag = mag.view(-1)
+        val = valid_gt.view(-1) >= 0.5
+
+        out = ((epe > 3.0) & ((epe/mag) > 0.05)).float()
+        epe_list.append(epe[val].mean().item())
+        out_list.append(out[val].cpu().numpy())
+
+    epe_list = np.array(epe_list)
+    out_list = np.concatenate(out_list)
+
+    epe = np.mean(epe_list)
+    f1 = 100 * np.mean(out_list)
+
+    print("Validation KITTI: %f, %f" % (epe, f1))
+    return {'kitti-epe': epe, 'kitti-f1': f1}
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', help="restore checkpoint")
+    parser.add_argument('--dataset', help="dataset for evaluation")
+    parser.add_argument('--small', action='store_true', help='use small model')
+    parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+    parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
+    args = parser.parse_args()
+
+    model = torch.nn.DataParallel(RAFT(args))
+    model.load_state_dict(torch.load(args.model))
+
+    model.cuda()
+    model.eval()
+
+    # create_sintel_submission(model.module, warm_start=True)
+    # create_kitti_submission(model.module)
+
+    with torch.no_grad():
+        if args.dataset == 'chairs':
+            validate_chairs(model.module)
+
+        elif args.dataset == 'sintel':
+            validate_sintel(model.module)
+
+        elif args.dataset == 'kitti':
+            validate_kitti(model.module)
+
+
--- a/preprocessing/RAFT/exhaustive_raft.py
+++ b/preprocessing/RAFT/exhaustive_raft.py
+"""
+This script computes all pairwise RAFT optical flow fields
+for each pair, we use previous flow as initialization to compute the current flow
+"""
+
+import sys
+
+sys.path.append('core')
+
+import argparse
+import os
+import glob
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+
+from raft import RAFT
+from utils.utils import InputPadder
+import warnings
+
+warnings.filterwarnings("ignore")
+
+DEVICE = 'cuda'
+
+
+def load_image(imfile):
+    img = np.array(Image.open(imfile)).astype(np.uint8)
+    img = torch.from_numpy(img).permute(2, 0, 1).float()
+    return img[None].to(DEVICE)
+
+
+def run_exhaustive_flow(args):
+    model = torch.nn.DataParallel(RAFT(args))
+    model.load_state_dict(torch.load(args.model))
+
+    model = model.module
+    model.to(DEVICE)
+    model.eval()
+
+    data_dir = args.data_dir
+    print('computing all pairwise optical flows for {}...'.format(data_dir))
+
+    flow_out_dir = os.path.join(data_dir, 'raft_exhaustive')
+    os.makedirs(flow_out_dir, exist_ok=True)
+
+    img_files = sorted(glob.glob(os.path.join(data_dir, 'color', '*')))
+    num_imgs = len(img_files)
+    pbar = tqdm(total=num_imgs * (num_imgs - 1))
+    with torch.no_grad():
+        for i in range(num_imgs - 1):
+            flow_low_prev = None
+            for j in range(i + 1, num_imgs):
+                imfile1 = img_files[i]
+                imfile2 = img_files[j]
+                image1 = load_image(imfile1)
+                image2 = load_image(imfile2)
+
+                padder = InputPadder(image1.shape)
+                image1, image2 = padder.pad(image1, image2)
+
+                flow_low, flow_up = model(image1, image2, iters=20, test_mode=True, flow_init=flow_low_prev)
+                flow_up = padder.unpad(flow_up)
+
+                flow_up_np = flow_up.squeeze().permute(1, 2, 0).cpu().numpy()
+                save_file = os.path.join(flow_out_dir,
+                                         '{}_{}.npy'.format(os.path.basename(imfile1), os.path.basename(imfile2)))
+                np.save(save_file, flow_up_np)
+                flow_low_prev = flow_low
+                pbar.update(1)
+
+        for i in range(num_imgs - 1, 0, -1):
+            flow_low_prev = None
+            for j in range(i - 1, -1, -1):
+                imfile1 = img_files[i]
+                imfile2 = img_files[j]
+                image1 = load_image(imfile1)
+                image2 = load_image(imfile2)
+
+                padder = InputPadder(image1.shape)
+                image1, image2 = padder.pad(image1, image2)
+
+                flow_low, flow_up = model(image1, image2, iters=20, test_mode=True, flow_init=flow_low_prev)
+                flow_up = padder.unpad(flow_up)
+
+                flow_up_np = flow_up.squeeze().permute(1, 2, 0).cpu().numpy()
+                save_file = os.path.join(flow_out_dir,
+                                         '{}_{}.npy'.format(os.path.basename(imfile1), os.path.basename(imfile2)))
+                np.save(save_file, flow_up_np)
+                flow_low_prev = flow_low
+                pbar.update(1)
+        pbar.close()
+        print('computing all pairwise optical flows for {} is done \n'.format(data_dir))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', default='models/raft-things.pth', help="restore checkpoint")
+    parser.add_argument('--small', action='store_true', help='use small model')
+    parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+    parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
+    parser.add_argument('--data_dir', type=str, default='', help='dataset dir')
+    args = parser.parse_args()
+
+    run_exhaustive_flow(args)
+
+