Initial commit

1f95925c · Samuli Laine · 1f95925c · 1f95925c · 1f95925c · 1f95925c
Commit 1f95925c authored Nov 03, 2020 by Samuli Laine
5 changed files
--- a/samples/torch/envphong.py
+++ b/samples/torch/envphong.py
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import argparse
+import numpy as np
+import torch
+import os
+import sys
+import pathlib
+import imageio
+
+import util
+
+import nvdiffrast.torch as dr
+
+#----------------------------------------------------------------------------
+# Environment map and Phong BRDF learning.
+#----------------------------------------------------------------------------
+
+def fit_env_phong(max_iter          = 1000,
+                  log_interval      = 10,
+                  display_interval  = None,
+                  display_res       = 1024,
+                  res               = 1024,
+                  lr_base           = 1e-2,
+                  lr_ramp           = 1.0,
+                  out_dir           = None,
+                  log_fn            = None,
+                  mp4save_interval  = None,
+                  mp4save_fn        = None):
+
+    log_file = None
+    writer = None
+    if out_dir:
+        os.makedirs(out_dir, exist_ok=True)
+        if log_fn:
+            log_file = open(out_dir + '/' + log_fn, 'wt')
+        if mp4save_interval != 0:
+            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
+    else:
+        mp4save_interval = None
+
+    # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
+    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
+    with np.load(f'{datadir}/envphong.npz') as f:
+        pos_idx, pos, normals, env = f.values()
+    env = env.astype(np.float32)/255.0
+    env = np.stack(env)[:, ::-1].copy()
+    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
+
+    # Move all the stuff to GPU.
+    pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda')
+    pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda')
+    normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda')
+    env = torch.as_tensor(env, dtype=torch.float32, device='cuda')
+
+    # Target Phong parameters.
+    phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
+    phong_exp = 25.0
+    phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda')
+
+    # Learned variables: environment maps, phong color, phong exponent.
+    env_var = torch.ones_like(env) * .5
+    env_var.requires_grad_()
+    phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda')
+    phong_var_raw.requires_grad_()
+    phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda')
+
+    # Render.
+    ang = 0.0
+    imgloss_avg, phong_avg = [], []
+    glctx = dr.RasterizeGLContext()
+    zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda')
+    one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda')
+
+    # Adam optimizer for environment map and phong with a learning rate ramp.
+    optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base)
+    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter)))
+
+    for it in range(max_iter + 1):
+        phong_var = phong_var_raw * phong_var_mul
+
+        # Random rotation/translation matrix for optimization.
+        r_rot = util.random_rotation_translation(0.25)
+
+        # Smooth rotation for display.
+        ang = ang + 0.01
+        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
+
+        # Modelview and modelview + projection matrices.
+        proj  = util.projection(x=0.4, n=1.0, f=200.0)
+        r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
+        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
+        a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
+        a_mvp = np.matmul(proj, a_mv).astype(np.float32)
+        a_mvc = a_mvp
+        r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda')
+        a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda')
+
+        # Solve camera positions.
+        a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda')
+        r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda')
+
+        # Random light direction.        
+        lightdir = np.random.normal(size=[3])
+        lightdir /= np.linalg.norm(lightdir) + 1e-8
+        lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
+
+        def render_refl(ldir, cpos, mvp):
+            # Transform and rasterize.
+            viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices.
+            reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices.
+            reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize.
+            pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...]
+            rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res])
+            refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors.
+
+            # Phong light.
+            refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5  # Normalize.
+            ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R.
+
+            # Return
+            return refl, refld, ldotr, (rast_out[..., -1:] == 0)
+
+        # Render the reflections.
+        refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp)
+
+        # Reference color. No need for AA because we are not learning geometry.
+        color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
+        color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong.
+        color = torch.where(mask, one_tensor, color) # White background.
+
+        # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
+        color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
+        color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong.
+        color_opt = torch.where(mask, one_tensor, color_opt) # White background.
+
+        # Compute loss and train.
+        loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        scheduler.step()
+
+        # Collect losses.
+        imgloss_avg.append(loss.detach().cpu().numpy())
+        phong_avg.append(phong_var.detach().cpu().numpy())
+
+        # Print/save log.
+        if log_interval and (it % log_interval == 0):
+            imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), []
+            phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), []
+            phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
+            phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp
+            s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
+            print(s)
+            if log_file:
+                log_file.write(s + '\n')
+
+        # Show/save result image.        
+        display_image = display_interval and (it % display_interval == 0)
+        save_mp4 = mp4save_interval and (it % mp4save_interval == 0)
+
+        if display_image or save_mp4:
+            lightdir = np.asarray([.8, -1., .5, 0.0])
+            lightdir = np.matmul(a_mvc, lightdir)[:3]
+            lightdir /= np.linalg.norm(lightdir)
+            lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
+            refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp)
+            color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
+            color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3]
+            color_opt = torch.where(mask, one_tensor, color_opt)
+            result_image = color_opt.detach()[0].cpu().numpy()
+            if display_image:
+                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
+            if save_mp4:
+                writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))
+
+    # Done.
+    if writer is not None:
+        writer.close()
+    if log_file:
+        log_file.close()
+
+#----------------------------------------------------------------------------
+# Main function.
+#----------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(description='Environment map fitting example')
+    parser.add_argument('--outdir', help='Specify output directory', default='')
+    parser.add_argument('--display-interval', type=int, default=0)
+    parser.add_argument('--mp4save-interval', type=int, default=10)
+    parser.add_argument('--max-iter', type=int, default=5000)
+    args = parser.parse_args()
+
+    # Set up logging.
+    if args.outdir:
+        out_dir = f'{args.outdir}/env_phong'
+        print (f'Saving results under {out_dir}')
+    else:
+        out_dir = None
+        print ('No output directory specified, not saving log or images')
+
+    # Run.
+    fit_env_phong(
+        max_iter=args.max_iter,
+        log_interval=100,
+        display_interval=args.display_interval,
+        out_dir=out_dir,
+        mp4save_interval=args.mp4save_interval,
+        mp4save_fn='progress.mp4'
+    )
+
+    # Done.
+    print("Done.")
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    main()
+
+#----------------------------------------------------------------------------
--- a/samples/torch/pose.py
+++ b/samples/torch/pose.py
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import argparse
+import os
+import pathlib
+import sys
+import numpy as np
+import torch
+import imageio
+
+import util
+
+import nvdiffrast.torch as dr
+
+#----------------------------------------------------------------------------
+# Quaternion math.
+#----------------------------------------------------------------------------
+
+# Unit quaternion.
+def q_unit():
+    return np.asarray([1, 0, 0, 0], np.float32)
+
+# Get a random normalized quaternion.
+def q_rnd():
+    u, v, w = np.random.uniform(0.0, 1.0, size=[3])
+    v *= 2.0 * np.pi
+    w *= 2.0 * np.pi
+    return np.asarray([(1.0-u)**0.5 * np.sin(v), (1.0-u)**0.5 * np.cos(v), u**0.5 * np.sin(w), u**0.5 * np.cos(w)], np.float32)
+
+# Get a random quaternion from the octahedral symmetric group S_4.
+_r2 = 0.5**0.5
+_q_S4 = [[ 1.0, 0.0, 0.0, 0.0], [ 0.0, 1.0, 0.0, 0.0], [ 0.0, 0.0, 1.0, 0.0], [ 0.0, 0.0, 0.0, 1.0],
+         [-0.5, 0.5, 0.5, 0.5], [-0.5,-0.5,-0.5, 0.5], [ 0.5,-0.5, 0.5, 0.5], [ 0.5, 0.5,-0.5, 0.5],
+         [ 0.5, 0.5, 0.5, 0.5], [-0.5, 0.5,-0.5, 0.5], [ 0.5,-0.5,-0.5, 0.5], [-0.5,-0.5, 0.5, 0.5],
+         [ _r2,-_r2, 0.0, 0.0], [ _r2, _r2, 0.0, 0.0], [ 0.0, 0.0, _r2, _r2], [ 0.0, 0.0,-_r2, _r2],
+         [ 0.0, _r2, _r2, 0.0], [ _r2, 0.0, 0.0,-_r2], [ _r2, 0.0, 0.0, _r2], [ 0.0,-_r2, _r2, 0.0],
+         [ _r2, 0.0, _r2, 0.0], [ 0.0, _r2, 0.0, _r2], [ _r2, 0.0,-_r2, 0.0], [ 0.0,-_r2, 0.0, _r2]]
+def q_rnd_S4():
+    return np.asarray(_q_S4[np.random.randint(24)], np.float32)
+
+# Quaternion slerp.
+def q_slerp(p, q, t):
+    d = np.dot(p, q)
+    if d < 0.0:
+        q = -q
+        d = -d
+    if d > 0.999:
+        a = p + t * (q-p)
+        return a / np.linalg.norm(a)
+    t0 = np.arccos(d)
+    tt = t0 * t
+    st = np.sin(tt)
+    st0 = np.sin(t0)
+    s1 = st / st0
+    s0 = np.cos(tt) - d*s1
+    return s0*p + s1*q
+
+# Quaterion scale (slerp vs. identity quaternion).
+def q_scale(q, scl):
+    return q_slerp(q_unit(), q, scl)
+
+# Quaternion product.
+def q_mul(p, q):
+    s1, V1 = p[0], p[1:]
+    s2, V2 = q[0], q[1:]
+    s = s1*s2 - np.dot(V1, V2)
+    V = s1*V2 + s2*V1 + np.cross(V1, V2)
+    return np.asarray([s, V[0], V[1], V[2]], np.float32)
+
+# Angular difference between two quaternions in degrees.
+def q_angle_deg(p, q):
+    p = p.detach().cpu().numpy()
+    q = q.detach().cpu().numpy()
+    d = np.abs(np.dot(p, q))
+    d = min(d, 1.0)
+    return np.degrees(2.0 * np.arccos(d))
+
+# Quaternion product
+def q_mul_torch(p, q):
+    a = p[0]*q[0] - p[1]*q[1] - p[2]*q[2] - p[3]*q[3]
+    b = p[0]*q[1] + p[1]*q[0] + p[2]*q[3] - p[3]*q[2]
+    c = p[0]*q[2] + p[2]*q[0] + p[3]*q[1] - p[1]*q[3]
+    d = p[0]*q[3] + p[3]*q[0] + p[1]*q[2] - p[2]*q[1]
+    return torch.stack([a, b, c, d])
+
+# Convert quaternion to 4x4 rotation matrix.
+def q_to_mtx(q):
+    r0 = torch.stack([1.0-2.0*q[1]**2 - 2.0*q[2]**2, 2.0*q[0]*q[1] - 2.0*q[2]*q[3], 2.0*q[0]*q[2] + 2.0*q[1]*q[3]])
+    r1 = torch.stack([2.0*q[0]*q[1] + 2.0*q[2]*q[3], 1.0 - 2.0*q[0]**2 - 2.0*q[2]**2, 2.0*q[1]*q[2] - 2.0*q[0]*q[3]])
+    r2 = torch.stack([2.0*q[0]*q[2] - 2.0*q[1]*q[3], 2.0*q[1]*q[2] + 2.0*q[0]*q[3], 1.0 - 2.0*q[0]**2 - 2.0*q[1]**2])
+    rr = torch.transpose(torch.stack([r0, r1, r2]), 1, 0)
+    rr = torch.cat([rr, torch.tensor([[0], [0], [0]], dtype=torch.float32).cuda()], dim=1) # Pad right column.
+    rr = torch.cat([rr, torch.tensor([[0, 0, 0, 1]], dtype=torch.float32).cuda()], dim=0)  # Pad bottom row.
+    return rr
+
+# Transform vertex positions to clip space
+def transform_pos(mtx, pos):
+    t_mtx = torch.from_numpy(mtx).cuda() if isinstance(mtx, np.ndarray) else mtx
+    # (x,y,z) -> (x,y,z,1)
+    posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).cuda()], axis=1)
+    return torch.matmul(posw, t_mtx.t())[None, ...]
+
+def render(glctx, mtx, pos, pos_idx, col, col_idx, resolution: int):
+    # Setup TF graph for reference.
+    pos_clip    = transform_pos(mtx, pos)
+    rast_out, _ = dr.rasterize(glctx, pos_clip, pos_idx, resolution=[resolution, resolution])
+    color   , _ = dr.interpolate(col[None, ...], rast_out, col_idx)
+    color       = dr.antialias(color, rast_out, pos_clip, pos_idx)
+    return color
+
+#----------------------------------------------------------------------------
+# Cube pose fitter.
+#----------------------------------------------------------------------------
+
+def fit_pose(max_iter           = 10000,
+             repeats            = 1,
+             log_interval       = 10,
+             display_interval   = None,
+             display_res        = 512,
+             lr_base            = 0.01,
+             lr_falloff         = 1.0,
+             nr_base            = 1.0,
+             nr_falloff         = 1e-4,
+             grad_phase_start   = 0.5,
+             resolution         = 256,
+             out_dir            = None,
+             log_fn             = None,
+             mp4save_interval   = None,
+             mp4save_fn         = None):
+
+    log_file = None
+    writer = None
+    if out_dir:
+        os.makedirs(out_dir, exist_ok=True)
+        if log_fn:
+            log_file = open(out_dir + '/' + log_fn, 'wt')
+        if mp4save_interval != 0:
+            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
+    else:
+        mp4save_interval = None
+
+    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
+    with np.load(f'{datadir}/cube_p.npz') as f:
+        pos_idx, pos, col_idx, col = f.values()
+    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))
+
+    # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1).  Drop
+    # the last column in that case.
+    if pos.shape[1] == 4: pos = pos[:, 0:3]
+
+    # Create position/triangle index tensors
+    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
+    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
+    col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
+    vtx_col = torch.from_numpy(col.astype(np.float32)).cuda()
+
+    glctx = dr.RasterizeGLContext()
+
+    for rep in range(repeats):
+        pose_target = torch.tensor(q_rnd(), device='cuda')
+        pose_init   = q_rnd()
+        pose_opt    = torch.tensor(pose_init / np.sum(pose_init**2)**0.5, dtype=torch.float32, device='cuda', requires_grad=True)
+
+        loss_best   = np.inf
+        pose_best   = pose_opt.detach().clone()
+
+        # Modelview + projection matrix.
+        mvp = torch.tensor(np.matmul(util.projection(x=0.4), util.translate(0, 0, -3.5)).astype(np.float32), device='cuda')
+
+        # Adam optimizer for texture with a learning rate ramp.
+        optimizer = torch.optim.Adam([pose_opt], betas=(0.9, 0.999), lr=lr_base)
+        # Render.
+        for it in range(max_iter + 1):
+            # Set learning rate.
+            itf = 1.0 * it / max_iter
+            nr = nr_base * nr_falloff**itf
+            lr = lr_base * lr_falloff**itf
+            for param_group in optimizer.param_groups:
+                param_group['lr'] = lr
+
+            # Noise input.
+            if itf >= grad_phase_start:
+                noise = q_unit()
+            else:
+                noise = q_scale(q_rnd(), nr)
+                noise = q_mul(noise, q_rnd_S4()) # Orientation noise.
+
+            # Render.
+            color          = render(glctx, torch.matmul(mvp, q_to_mtx(pose_target)), vtx_pos, pos_idx, vtx_col, col_idx, resolution)
+            pose_total_opt = q_mul_torch(pose_opt, noise)
+            mtx_total_opt  = torch.matmul(mvp, q_to_mtx(pose_total_opt))
+            color_opt      = render(glctx, mtx_total_opt, vtx_pos, pos_idx, vtx_col, col_idx, resolution)
+
+            # Image-space loss.
+            diff = (color_opt - color)**2 # L2 norm.
+            diff = torch.tanh(5.0 * torch.max(diff, dim=-1)[0])
+            loss = torch.mean(diff)
+
+            # Measure image-space loss and update best found pose.
+            loss_val = float(loss)
+            if (loss_val < loss_best) and (loss_val > 0.0):
+                pose_best = pose_total_opt.detach().clone()
+                loss_best = loss_val
+                if itf < grad_phase_start:
+                    with torch.no_grad(): pose_opt[:] = pose_best
+
+            # Print/save log.
+            if log_interval and (it % log_interval == 0):
+                err = q_angle_deg(pose_opt, pose_target)
+                ebest = q_angle_deg(pose_best, pose_target)
+                s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % (rep, it, err, ebest, loss_val, loss_best, lr, nr)
+                print(s)
+                if log_file:
+                    log_file.write(s + "\n")
+
+            # Run gradient training step.
+            if itf >= grad_phase_start:
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+            with torch.no_grad():
+                pose_opt /= torch.sum(pose_opt**2)**0.5
+
+            # Show/save image.
+            display_image = display_interval and (it % display_interval == 0)
+            save_mp4      = mp4save_interval and (it % mp4save_interval == 0)
+
+            if display_image or save_mp4:
+                c = color[0].detach().cpu().numpy()
+                img_ref  = color[0].detach().cpu().numpy()
+                img_opt  = color_opt[0].detach().cpu().numpy()
+                img_best = render(glctx, torch.matmul(mvp, q_to_mtx(pose_best)), vtx_pos, pos_idx, vtx_col, col_idx, resolution)[0].detach().cpu().numpy()
+                result_image = np.concatenate([img_ref, img_best, img_opt], axis=1)
+
+                if display_image:
+                    util.display_image(result_image, size=display_res, title='(%d) %d / %d' % (rep, it, max_iter))
+                if save_mp4:
+                    writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))
+
+    # Done.
+    if writer is not None:
+        writer.close()
+    if log_file:
+        log_file.close()
+
+#----------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(description='Cube pose fitting example')
+    parser.add_argument('--outdir', help='Specify output directory', default='')
+    parser.add_argument('--display-interval', type=int, default=0)
+    parser.add_argument('--mp4save-interval', type=int, default=10)
+    parser.add_argument('--max-iter', type=int, default=1000)
+    parser.add_argument('--repeats', type=int, default=1)
+    args = parser.parse_args()
+
+    # Set up logging.
+    if args.outdir:
+        out_dir = f'{args.outdir}/pose'
+        print (f'Saving results under {out_dir}')
+    else:
+        out_dir = None
+        print ('No output directory specified, not saving log or images')
+
+    # Run.
+    fit_pose(
+        max_iter=args.max_iter,
+        repeats=args.repeats,
+        log_interval=100,
+        display_interval=args.display_interval,
+        out_dir=out_dir,
+        log_fn='log.txt',
+        mp4save_interval=args.mp4save_interval,
+        mp4save_fn='progress.mp4'
+    )
+
+    # Done.
+    print("Done.")
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    main()
+
+#----------------------------------------------------------------------------
--- a/samples/torch/triangle.py
+++ b/samples/torch/triangle.py
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import imageio
+import numpy as np
+import torch
+import nvdiffrast.torch as dr
+
+def tensor(*args, **kwargs):
+    return torch.tensor(*args, device='cuda', **kwargs)
+
+pos = tensor([[[-0.8, -0.8, 0, 1], [0.8, -0.8, 0, 1], [-0.8, 0.8, 0, 1]]], dtype=torch.float32)
+col = tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=torch.float32)
+tri = tensor([[0, 1, 2]], dtype=torch.int32)
+
+glctx = dr.RasterizeGLContext()
+rast, _ = dr.rasterize(glctx, pos, tri, resolution=[256, 256])
+out, _ = dr.interpolate(col, rast, tri)
+
+img = out.cpu().numpy()[0, ::-1, :, :] # Flip vertically.
+img = np.clip(np.rint(img * 255), 0, 255).astype(np.uint8) # Quantize to np.uint8
+
+print("Saving to 'tri.png'.")
+imageio.imsave('tri.png', img)
--- a/samples/torch/util.py
+++ b/samples/torch/util.py
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import numpy as np
+import torch
+
+#----------------------------------------------------------------------------
+# Projection and transformation matrix helpers.
+#----------------------------------------------------------------------------
+
+def projection(x=0.1, n=1.0, f=50.0):
+    return np.array([[n/x,    0,            0,              0],
+                     [  0, n/-x,            0,              0],
+                     [  0,    0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
+                     [  0,    0,           -1,              0]]).astype(np.float32)
+
+def translate(x, y, z):
+    return np.array([[1, 0, 0, x],
+                     [0, 1, 0, y],
+                     [0, 0, 1, z],
+                     [0, 0, 0, 1]]).astype(np.float32)
+
+def rotate_x(a):
+    s, c = np.sin(a), np.cos(a)
+    return np.array([[1,  0, 0, 0],
+                     [0,  c, s, 0],
+                     [0, -s, c, 0],
+                     [0,  0, 0, 1]]).astype(np.float32)
+
+def rotate_y(a):
+    s, c = np.sin(a), np.cos(a)
+    return np.array([[ c, 0, s, 0],
+                     [ 0, 1, 0, 0],
+                     [-s, 0, c, 0],
+                     [ 0, 0, 0, 1]]).astype(np.float32)
+
+def random_rotation_translation(t):
+    m = np.random.normal(size=[3, 3])
+    m[1] = np.cross(m[0], m[2])
+    m[2] = np.cross(m[0], m[1])
+    m = m / np.linalg.norm(m, axis=1, keepdims=True)
+    m = np.pad(m, [[0, 1], [0, 1]], mode='constant')
+    m[3, 3] = 1.0
+    m[:3, 3] = np.random.uniform(-t, t, size=[3])
+    return m
+
+#----------------------------------------------------------------------------
+# Bilinear downsample by 2x.
+#----------------------------------------------------------------------------
+
+def bilinear_downsample(x):
+    w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0
+    w = w.expand(x.shape[-1], 1, 4, 4) 
+    x = torch.nn.functional.conv2d(x.permute(0, 3, 1, 2), w, padding=1, stride=2, groups=x.shape[-1])
+    return x.permute(0, 2, 3, 1)
+
+#----------------------------------------------------------------------------
+# Image display function using OpenGL.
+#----------------------------------------------------------------------------
+
+_glfw_window = None
+def display_image(image, zoom=None, size=None, title=None): # HWC
+    # Import OpenGL and glfw.
+    import OpenGL.GL as gl
+    import glfw
+
+    # Zoom image if requested.
+    image = np.asarray(image)
+    if size is not None:
+        assert zoom is None
+        zoom = max(1, size // image.shape[0])
+    if zoom is not None:
+        image = image.repeat(zoom, axis=0).repeat(zoom, axis=1)
+    height, width, channels = image.shape
+
+    # Initialize window.
+    if title is None:
+        title = 'Debug window'
+    global _glfw_window
+    if _glfw_window is None:
+        glfw.init()
+        _glfw_window = glfw.create_window(width, height, title, None, None)
+        glfw.make_context_current(_glfw_window)
+        glfw.show_window(_glfw_window)
+        glfw.swap_interval(0)
+    else:
+        glfw.make_context_current(_glfw_window)
+        glfw.set_window_title(_glfw_window, title)
+        glfw.set_window_size(_glfw_window, width, height)
+
+    # Update window.
+    glfw.poll_events()
+    gl.glClearColor(0, 0, 0, 1)
+    gl.glClear(gl.GL_COLOR_BUFFER_BIT)
+    gl.glWindowPos2f(0, 0)
+    gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1)
+    gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels]
+    gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name]
+    gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1])
+    glfw.swap_buffers(_glfw_window)
+    if glfw.window_should_close(_glfw_window):
+        return False
+    return True
+
+#----------------------------------------------------------------------------
+# Image save helper.
+#----------------------------------------------------------------------------
+
+def save_image(fn, x):
+    import imageio
+    x = np.rint(x * 255.0)
+    x = np.clip(x, 0, 255).astype(np.uint8)
+    imageio.imsave(fn, x)
+
+#----------------------------------------------------------------------------
--- a/setup.py
+++ b/setup.py
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import nvdiffrast
+import setuptools
+import os
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="nvdiffrast",
+    version=nvdiffrast.__version__,
+    author="Samuli Laine",
+    author_email="slaine@nvidia.com",
+    description="nvdiffrast - modular primitives for high-performance differentiable rendering",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/NVlabs/nvdiffrast",
+    packages=setuptools.find_packages(),
+    package_data={
+        'nvdiffrast': [
+            'common/*.h',
+            'common/*.inl',
+            'common/*.cu',
+            'common/*.cpp',
+            'lib/*.h',
+            'torch/*.h',
+            'torch/*.inl',
+            'torch/*.cpp',
+            'tensorflow/*.cu',
+        ] + (['lib/*.lib'] if os.name == 'nt' else [])
+    },
+    include_package_data=True,
+    install_requires=['numpy'],  # note: can't require torch here as it will install torch even for a TensorFlow container
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+)