Commit a75d2bda authored by mashun1's avatar mashun1
Browse files

evtexture

parents
Pipeline #1325 canceled with stages
import os
try:
import esim_py
print("Installation sucessful!")
except ImportError:
print("esim_py not found, importing binaries. These do not correspond to source files in this repo")
import sys
binaries_folder = os.path.join(os.path.dirname(__file__), "..", "bin")
sys.path.append(binaries_folder)
import esim_py
print("Import of binaries successful!")
# esim\_torch
This package exposes python bindings for ESIM with GPU support.
Test your installation with
```bash
cd esim_torch/
python test.py
```
which should create a plot.
The currently supported functions are listed in the example below:
```python
import esim_torch
# constructor
esim = esim_torch.ESIM(
contrast_threshold_neg, # contrast threshold for negative events
contrast_threshold_pos, # contrast threshold for positive events
refractory_period_ns # refractory period in nanoseconds
)
# event generation
events = esim.forward(
log_images, # torch tensor with type float32, shape T x H x W
timestamps_ns # torch tensor with type int64, shape T
)
# Reset the internal state of the simulator
events.reset()
# events can also be generated in a for loop
# to keep memory requirements low
for log_image, timestamp_ns in zip(log_images, timestamps_ns):
sub_events = esim.forward(log_image, timestamp_ns)
# for the first image, no events are generated, so this needs to be skipped
if sub_events is None:
continue
# do something with the events
some_function(sub_events)
```
import argparse
from operator import sub
import os
import esim_torch
import numpy as np
import glob
import cv2
import tqdm
import torch
def is_valid_dir(subdirs, files):
return len(subdirs) == 1 and len(files) == 1 and "timestamps.txt" in files and "imgs" in subdirs
def process_dir(outdir, indir, args):
print(f"Processing folder {indir}... Generating events in {outdir}")
os.makedirs(outdir, exist_ok=True)
# constructor
esim = esim_torch.ESIM(args.contrast_threshold_negative,
args.contrast_threshold_positive,
args.refractory_period_ns)
timestamps = np.genfromtxt(os.path.join(indir, "timestamps.txt"), dtype="float64")
timestamps_ns = (timestamps * 1e9).astype("int64")
timestamps_ns = torch.from_numpy(timestamps_ns).cuda()
image_files = sorted(glob.glob(os.path.join(indir, "imgs", "*.png")))
pbar = tqdm.tqdm(total=len(image_files)-1)
num_events = 0
counter = 0
for image_file, timestamp_ns in zip(image_files, timestamps_ns):
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
log_image = np.log(image.astype("float32") / 255 + 1e-5)
log_image = torch.from_numpy(log_image).cuda()
sub_events = esim.forward(log_image, timestamp_ns)
# for the first image, no events are generated, so this needs to be skipped
if sub_events is None:
continue
sub_events = {k: v.cpu() for k, v in sub_events.items()}
num_events += len(sub_events['t'])
# do something with the events
np.savez(os.path.join(outdir, "%010d.npz" % counter), **sub_events)
pbar.set_description(f"Num events generated: {num_events}")
pbar.update(1)
counter += 1
if __name__ == "__main__":
parser = argparse.ArgumentParser("""Generate events from a high frequency video stream""")
parser.add_argument("--contrast_threshold_negative", "-cn", type=float, default=0.2)
parser.add_argument("--contrast_threshold_positive", "-cp", type=float, default=0.2)
parser.add_argument("--refractory_period_ns", "-rp", type=int, default=0)
parser.add_argument("--input_dir", "-i", default="", required=True)
parser.add_argument("--output_dir", "-o", default="", required=True)
args = parser.parse_args()
print(f"Generating events with cn={args.contrast_threshold_negative}, cp={args.contrast_threshold_positive} and rp={args.refractory_period_ns}")
for path, subdirs, files in os.walk(args.input_dir):
if is_valid_dir(subdirs, files):
output_folder = os.path.join(args.output_dir, os.path.relpath(path, args.input_dir))
process_dir(output_folder, path, args)
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='esim_torch',
package_dir={'':'src'},
packages=['esim_torch'],
ext_modules=[
CUDAExtension(name='esim_cuda',
sources=[
'src/esim_torch/esim_cuda_kernel.cu',
],
# extra_compile_args={
#'cxx': ['-g'],
#'nvcc': ['-arch=sm_60', '-O3', '-use_fast_math']
#}
)
],
cmdclass={
'build_ext': BuildExtension
})
from .esim_torch import EventSimulator_torch as ESIM
\ No newline at end of file
#include <torch/extension.h>
#include <vector>
// CUDA forward declarations
std::vector<torch::Tensor> esim_forward(
const torch::Tensor& images,
const torch::Tensor& timestamps,
const torch::Tensor& init_reference_values,
const torch::Tensor& reference_values_over_time,
const torch::Tensor& offsets,
torch::Tensor& events,
torch::Tensor& timestamps_last_event,
float contrast_threshold_negative,
float contrast_threshold_positive,
int64_t refractory_period);
std::vector<torch::Tensor> esim_forward_count_events(
const torch::Tensor& images,
const torch::Tensor& timestamps,
const torch::Tensor& init_reference_values,
torch::Tensor& reference_values_over_time
torch::Tensor& event_counts,
torch::Tensor& timestamps_last_event,
float contrast_threshold_negative,
float contrast_threshold_positive);
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &esim_forward, "ESIM forward (CUDA)");
m.def("forward_count_events", &esim_forward_count_events, "ESIM forward count events (CUDA)");
}
\ No newline at end of file
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
#define CHECK_DEVICE(x, y) AT_ASSERTM(x.device().index() == y.device().index(), #x " and " #y " must be in same CUDA device")
/*
Precompute the reference values and number of events between the reference values to read out easily after
*/
template <typename scalar_t>
__global__ void count_events_cuda_forward_kernel(
const scalar_t* __restrict__ imgs,
const scalar_t* __restrict__ init_refs,
scalar_t* __restrict__ refs_over_time,
int64_t* __restrict__ count_ev,
int T, int H, int W, float ct_neg, float ct_pos)
{
// linear index
const int linIdx = blockIdx.x * blockDim.x + threadIdx.x;
// check that thread is not out of valid range
if (linIdx >= H * W)
return;
scalar_t ref = init_refs[linIdx];
int tot_num_events = 0;
for (int t=0; t<T-1; t++)
{
int tidx = (t+1) * H * W + linIdx;
int tidx_min_1 = t * H * W + linIdx;
scalar_t i0 = imgs[tidx_min_1];
scalar_t i1 = imgs[tidx];
int num_events, polarity;
// process events leading up to i1.
polarity = (i1 >= ref) ? 1 : -1;
float ct = (i1 >= ref) ? ct_pos : ct_neg;
num_events = std::abs(i1 - ref) / ct;
tot_num_events += num_events;
ref += polarity * ct * num_events;
// store number of events and reference values for later
// triggered_events_t stores the number of events between t-1 and t
// refs_t stores the reference at t.
refs_over_time[tidx_min_1] = ref;
}
count_ev[linIdx] = tot_num_events;
}
template <typename scalar_t>
__global__ void esim_cuda_forward_kernel(
const scalar_t* __restrict__ imgs,
const int64_t* __restrict__ ts,
const scalar_t* __restrict__ init_ref,
const scalar_t* __restrict__ refs_over_time,
const int64_t* __restrict__ offsets,
int64_t* __restrict__ ev,
int64_t* __restrict__ t_last_ev,
int T, int H, int W, float ct_neg, float ct_pos, int64_t t_ref
)
{
// linear index
const int linIdx = blockIdx.x * blockDim.x + threadIdx.x;
// check that thread is not out of valid range
if (linIdx >= H * W)
return;
int x = linIdx % W;
int y = linIdx / W;
scalar_t ref0 = init_ref[linIdx];
int64_t offset = offsets[linIdx];
for (int t=0; t<T-1; t++) {
// offset_t stores the offset at t.
scalar_t i0 = imgs[linIdx+(t)*H*W]; // shifts forward one timestamp
scalar_t i1 = imgs[linIdx+(t+1)*H*W]; // shifts forward one timestamp
int64_t t0 = ts[t];
int64_t t1 = ts[t+1];
if (t > 0) {
ref0 = refs_over_time[linIdx+(t-1)*H*W];
}
int polarity = (i1 >= ref0) ? 1 : -1;
float ct = (i1 >= ref0) ? ct_pos : ct_neg;
int64_t num_events = std::abs(i1 - ref0) / ct;
int64_t t_prev = t_last_ev[linIdx];
for (int evIdx=0; evIdx<num_events; evIdx++)
{
scalar_t r = (ref0 + (evIdx+1) * polarity * ct - i0) / (i1 - i0);
int64_t timestamp = t0 + (t1-t0)*r;
int64_t delta_t = timestamp - t_prev;
if (delta_t > t_ref || t_prev == 0) {
int64_t idx = 4 * (offset + evIdx);
ev[idx + 0] = x;
ev[idx + 1] = y;
ev[idx + 2] = timestamp;
ev[idx + 3] = polarity;
t_last_ev[linIdx] = timestamp;
t_prev = timestamp;
}
}
offset += num_events;
}
}
std::vector<torch::Tensor> esim_forward_count_events(
const torch::Tensor& imgs, // T x H x W
const torch::Tensor& init_refs, // H x W
torch::Tensor& refs_over_time, // T-1 x H x W
torch::Tensor& count_ev, // H x W
float ct_neg,
float ct_pos)
{
CHECK_INPUT(imgs);
CHECK_INPUT(count_ev);
CHECK_INPUT(init_refs);
CHECK_INPUT(refs_over_time);
CHECK_DEVICE(imgs, count_ev);
CHECK_DEVICE(imgs, init_refs);
CHECK_DEVICE(imgs, refs_over_time);
//cudaSetDevice(imgs.device().index());
unsigned T = imgs.size(0);
unsigned H = imgs.size(1);
unsigned W = imgs.size(2);
//unsigned MAX_NUM_EVENTS = ev.size(1);
unsigned threads = 256;
dim3 blocks((H * W + threads - 1) / threads, 1);
count_events_cuda_forward_kernel<float><<<blocks, threads>>>(
imgs.data<float>(),
init_refs.data<float>(),
refs_over_time.data<float>(),
count_ev.data<int64_t>(),
T, H, W, ct_neg, ct_pos
);
return {refs_over_time, count_ev};
}
torch::Tensor esim_forward(
const torch::Tensor& imgs, // T x H x W
const torch::Tensor& ts, // T
const torch::Tensor& init_refs, // H x W
const torch::Tensor& refs_over_time, // T-1 x H x W
const torch::Tensor& offsets, // H x W
torch::Tensor& ev, // N x 4, x y t p
torch::Tensor& t_last_ev, // H x W
float ct_neg,
float ct_pos,
int64_t dt_ref
)
{
CHECK_INPUT(imgs);
CHECK_INPUT(ts);
CHECK_INPUT(ev);
CHECK_INPUT(offsets);
CHECK_INPUT(refs_over_time);
CHECK_INPUT(init_refs);
CHECK_DEVICE(imgs, ts);
CHECK_DEVICE(imgs, ev);
CHECK_DEVICE(imgs, offsets);
CHECK_DEVICE(imgs, init_refs);
CHECK_DEVICE(imgs, refs_over_time);
CHECK_DEVICE(imgs, t_last_ev);
//cudaSetDevice(imgs.device().index());
unsigned T = imgs.size(0);
unsigned H = imgs.size(1);
unsigned W = imgs.size(2);
unsigned threads = 256;
dim3 blocks((H * W + threads - 1) / threads, 1);
esim_cuda_forward_kernel<float><<<blocks, threads>>>(
imgs.data<float>(),
ts.data<int64_t>(),
init_refs.data<float>(),
refs_over_time.data<float>(),
offsets.data<int64_t>(),
ev.data<int64_t>(),
t_last_ev.data<int64_t>(),
T, H, W, ct_neg, ct_pos, dt_ref
);
return ev;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &esim_forward, "ESIM forward (CUDA)");
m.def("forward_count_events", &esim_forward_count_events, "ESIM forward count events (CUDA)");
}
\ No newline at end of file
import torch
import esim_cuda
class EventSimulator_torch(torch.nn.Module):
def __init__(self, contrast_threshold_neg=0.2, contrast_threshold_pos=0.2, refractory_period_ns=0):
self.contrast_threshold_neg = contrast_threshold_neg
self.contrast_threshold_pos = contrast_threshold_pos
self.refractory_period_ns = int(refractory_period_ns)
self.initial_reference_values = None
self.timestamps_last_event = None
self.last_image = None
self.last_time = None
def _check_inputs(self, images, timestamps):
assert timestamps.dtype == torch.int64, timestamps.dtype
assert images.dtype == torch.float32, images.dtype
def reset(self):
self.initial_reference_values = None
self.last_image = None
self.last_time = None
def forward(self,
images,
timestamps):
if len(images.shape) == 2:
images = images.unsqueeze(0)
if len(timestamps.shape) == 0:
timestamps = timestamps.unsqueeze(0)
self._check_inputs(images, timestamps)
if self.initial_reference_values is None:
self.initial_reference_values = images[0].clone()
self.timestamps_last_event = torch.zeros_like(self.initial_reference_values).long()
if self.last_image is not None:
images = torch.cat([self.last_image, images], 0)
timestamps = torch.cat([self.last_time, timestamps], 0)
if len(images) == 1:
self.last_image = images[-1:]
self.last_time = timestamps[-1:]
return None
events = self.initialized_forward(images, timestamps)
self.last_image = images[-1:]
self.last_time = timestamps[-1:]
return events
def initialized_forward(self, images, timestamps):
T, H, W = images.shape
reference_values_over_time = torch.zeros((T-1, H, W),
device=images.device,
dtype=images.dtype)
event_counts = torch.zeros_like(images[0]).long()
reference_values_over_time, event_counts = esim_cuda.forward_count_events(images,
self.initial_reference_values,
reference_values_over_time,
event_counts,
self.contrast_threshold_neg,
self.contrast_threshold_pos)
# compute the offsets for each event group
cumsum = event_counts.view(-1).cumsum(dim=0)
total_num_events = cumsum[-1]
offsets = cumsum.view(H, W) - event_counts
# compute events on the GPU
events = torch.zeros((total_num_events, 4), device=cumsum.device, dtype=cumsum.dtype)
events = esim_cuda.forward(images,
timestamps,
self.initial_reference_values,
reference_values_over_time,
offsets,
events,
self.timestamps_last_event,
self.contrast_threshold_neg,
self.contrast_threshold_pos,
self.refractory_period_ns)
# sort by timestamps. Do this for each batch of events
if len(events) == 0:
return None
events = events[events[:,2].argsort()]
events = events[events[:,2]>0]
self.initial_reference_values = reference_values_over_time[-1]
return dict(zip(['x','y','t','p'], events.T))
\ No newline at end of file
import torch
import matplotlib.pyplot as plt
import numpy as np
import glob
import cv2
import esim_torch
if __name__ == "__main__":
esim = esim_torch.ESIM(contrast_threshold_neg=0.2,
contrast_threshold_pos=0.2,
refractory_period_ns=0)
print("Loading images")
image_files = sorted(glob.glob("../esim_py/tests/data/images/images/*.png"))
images = np.stack([cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in image_files])
timestamps_s = np.genfromtxt("../esim_py/tests/data/images/timestamps.txt")
timestamps_ns = (timestamps_s * 1e9).astype("int64")
log_images = np.log(images.astype("float32") / 255 + 1e-4)
# generate torch tensors
print("Loading data to GPU")
device = "cuda:0"
log_images = torch.from_numpy(log_images).to(device)
timestamps_ns = torch.from_numpy(timestamps_ns).to(device)
# generate events with GPU support
print("Generating events")
events = esim.forward(log_images, timestamps_ns)
# render events
image = images[0]
print("Plotting")
first_few_events = {k: v[:10000].cpu().numpy() for k,v in events.items()}
image_color = np.stack([image,image,image],-1)
image_color[first_few_events['y'], first_few_events['x'], :] = 0
image_color[first_few_events['y'], first_few_events['x'], first_few_events['p']] = 255
plt.imshow(image_color)
plt.show()
import torch
import matplotlib.pyplot as plt
import numpy as np
import esim_torch
def increasing_sin_wave(t):
return (400 * np.sin((t-t[0])*20*np.pi)*(t-t[0])+150).astype("uint8").reshape((-1,1,1))
if __name__ == "__main__":
c = 0.2
refractory_period_ns = 5e6
esim = esim_torch.ESIM(contrast_threshold_neg=c,
contrast_threshold_pos=c,
refractory_period_ns=refractory_period_ns)
print("Loading images")
timestamps_s = np.genfromtxt("../esim_py/tests/data/images/timestamps.txt")
images = increasing_sin_wave(timestamps_s)
timestamps_ns = (timestamps_s * 1e9).astype("int64")
log_images = np.log(images.astype("float32") / 255 + 1e-4)
# generate torch tensors
print("Loading data to GPU")
device = "cuda:0"
log_images = torch.from_numpy(log_images).to(device)
timestamps_ns = torch.from_numpy(timestamps_ns).to(device)
# generate events with GPU support
print("Generating events")
events = esim.forward(log_images, timestamps_ns)
# render events
image = images[0]
print("Plotting")
event_timestamps = events['t']
event_polarities = events['p']
i0 = log_images[0].cpu().numpy().ravel()
fig, ax = plt.subplots(ncols=2)
timestamps_ns = timestamps_ns.cpu().numpy()
log_images = log_images.cpu().numpy().ravel()
ax[0].plot(timestamps_ns, log_images)
ax[0].plot(timestamps_ns, images.ravel())
ax[0].set_ylim([np.log(1e-1),np.log(1 + 1e-4)])
ax[0].set_ylabel("Log Intensity")
ax[0].set_xlabel("Time [ns]")
ax[1].set_ylabel("Time since last event [ns]")
ax[1].set_xlabel("Timestamp of event [ns]")
ax[1].set_xlim([0,3e8])
for i in range(-10,3):
ax[0].plot([0,timestamps_ns[-1]], [i0+i*c, i0+i*c], c='g')
event_timestamps = event_timestamps.cpu().numpy()
for i, (t, p) in enumerate(zip(event_timestamps, event_polarities)):
color = "r" if p == -1 else "b"
ax[0].plot([t, t], [-3, 0], c=color)
if i > 0:
ax[1].scatter([t], [t-event_timestamps[i-1]], c=color)
ax[1].plot([0,3e8], [refractory_period_ns, refractory_period_ns])
plt.show()
absl-py==0.12.0
apache-beam==2.34.0
asttokens==2.0.5
astunparse==1.6.3
attrs==21.4.0
avro-python3==1.9.2.1
backcall==0.2.0
cachetools==4.2.4
certifi==2021.10.8
charset-normalizer==2.0.12
clang==5.0
crcmod==1.7
cycler==0.11.0
decorator==5.1.1
dill==0.3.1.1
docopt==0.6.2
executing==0.8.3
fastavro==1.4.10
flatbuffers==1.12
fonttools==4.31.2
future==0.18.2
gast==0.4.0
gin-config==0.5.0
google-api-core==1.31.5
google-auth==1.35.0
google-auth-oauthlib==0.4.6
google-cloud-bigquery-storage==1.1.0
google-pasta==0.2.0
googleapis-common-protos==1.56.0
grpcio==1.44.0
h5py==3.1.0
hdfs==2.7.0
httplib2==0.19.1
idna==3.3
imageio==2.15.0
importlib-metadata==4.11.3
ipython==8.2.0
jedi==0.18.1
keras==2.6.0
Keras-Preprocessing==1.1.2
kiwisolver==1.4.2
Markdown==3.3.6
matplotlib==3.5.1
matplotlib-inline==0.1.3
mediapy==1.0.3
natsort==8.1.0
networkx==2.7.1
numpy==1.19.5
oauth2client==4.1.3
oauthlib==3.2.0
opencv-python==4.5.5.64
opt-einsum==3.3.0
orjson==3.6.7
packaging==21.3
parameterized==0.8.1
parso==0.8.3
pexpect==4.8.0
pickleshare==0.7.5
Pillow==9.0.1
promise==2.3
prompt-toolkit==3.0.28
protobuf==3.19.4
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==5.0.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pydot==1.4.2
Pygments==2.11.2
pymongo==3.12.3
pyparsing==2.4.7
python-dateutil==2.8.2
pytz==2022.1
PyWavelets==1.3.0
requests==2.27.1
requests-oauthlib==1.3.1
rsa==4.8
scikit-image==0.19.1
scikit-video==1.1.11
scipy==1.8.0
six==1.15.0
stack-data==0.2.0
tensorboard==2.6.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
tensorflow==2.6.2
tensorflow-addons==0.15.0
tensorflow-datasets==4.4.0
tensorflow-estimator==2.6.0
tensorflow-metadata==1.7.0
termcolor==1.1.0
tifffile==2022.3.25
tqdm==4.63.1
traitlets==5.1.1
typeguard==2.13.3
typing-extensions==3.7.4.3
urllib3==1.26.9
wcwidth==0.2.5
Werkzeug==2.1.0
wrapt==1.12.1
zipp==3.7.0
# Adaptive Upsampling
## Generate Upsampled Video or Image Sequences
You can use our example directory to experiment
```bash
device=cpu
# device=0
CUDA_VISIBLE_DEVICES=$device python upsample.py --input_dir=../example/original --output_dir=../example/upsampled
```
The **expected input structure** is as follows:
```
input_dir
├── seq0
│   ├── fps.txt
│   └── imgs
│   ├── 00000001.png
│   ├── 00000002.png
│   ├── 00000003.png
│   └── ....png
├── seq1
│   └── video.mp4
└── dirname_does_not_matter
├── fps.txt
└── filename_does_not_matter.mov
```
- The number of sequences (subfolders of the input directory) is unlimited.
- The `fps.txt` file
- must specify the frames per second in the first line. The rest of the file should be empty (see example directory).
- is required for sequences (such as seq0) with image files.
- is **optional** for sequences with a video file. In case of a missing `fps.txt` file, the frames per second will be inferred from the metadata of the video file.
The **resulting output structure** is as follows:
```
output_dir
├── seq0
│   ├── imgs
│   │ ├── 00000001.png
│   │ ├── 00000002.png
│   │ ├── 00000003.png
│   │ └── ....png
│ └── timestamps.txt
├── seq1
│   ├── imgs
│   │ ├── 00000001.png
│   │ ├── 00000002.png
│   │ ├── 00000003.png
│   │ └── ....png
│ └── timestamps.txt
└── dirname_does_not_matter
   ├── imgs
   │ ├── 00000001.png
   │ ├── 00000002.png
   │ ├── 00000003.png
   │ └── ....png
└── timestamps.txt
```
The resulting image directories can later be used to generate events. The `timestamps.txt` file contains the timestamp of each image in seconds.
## Remarks
- Use a GPU device whenever possible to speed up the upsampling procedure.
- The upsampling will increase the storage requirements significantly. Try a small sample first to get an impression.
- Downsample (height and width) your images and video to save storage space and processing time.
- Why store the upsampling result in images:
- Images support random access from a dataloader. A video file, for example, can typically only be accessed sequentally when we try to avoid loading the whole video into RAM.
- Same sequence can be accessed by multiple processes (e.g. PyTorch num\_workers > 1).
- Well established C++ interface to load images. This is useful to generate events on the fly (needed for contrast threshold randomization) in C++ code without loading data in Python first.
If there is a need to store the resulting sequences in a different format, raise an issue (feature request) on this GitHub repository.
- Be aware that upsampling videos might fail due to a [bug in scikit-video](https://github.com/scikit-video/scikit-video/issues/60)
### Generating Video Files from Images
If you want to convert an ordered sequence of images (here png files) into video format you can use the following command (you may have to deactivate the current conda environment):
```bash
frame_rate=25
img_dirpath="example/original/seq0/imgs"
img_suffix=".png"
output_file="video.mp4"
ffmpeg -framerate $frame_rate -pattern_type glob -i "$img_dirpath/*$img_suffix" -c:v libx265 -x265-params lossless=1 $output_file
```
### Generating Images from a Video File
If you want to convert a video file to a sequence of images:
```bash
input_file="video.mp4"
output_dirpath="your_path_to_specify"
ffmpeg -i $input_file "$output_dirpath/%08d.png"
```
import argparse
import os
# Must be set before importing torch.
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
from utils import Upsampler
def get_flags():
parser = argparse.ArgumentParser()
parser.add_argument("--input_dir", required=True, help='Path to input directory. See README.md for expected structure of the directory.')
parser.add_argument("--output_dir", required=True, help='Path to non-existing output directory. This script will generate the directory.')
args = parser.parse_args()
return args
def main():
flags = get_flags()
upsampler = Upsampler(input_dir=flags.input_dir, output_dir=flags.output_dir)
upsampler.upsample()
if __name__ == '__main__':
main()
from .dataset import Sequence
from .upsampler import Upsampler
from .utils import get_sequence_or_none
mean = [0.429, 0.431, 0.397]
std = [1, 1, 1]
fps_filename = 'fps.txt'
imgs_dirname = 'imgs'
# TODO(magehrig): Use https://github.com/ahupp/python-magic instead.
video_formats = {'.webm', '.mp4', '.m4p', '.m4v', '.avi', '.avchd', '.ogg', '.mov', '.ogv', '.vob', '.f4v', '.mkv', '.svi', '.m2v', '.mpg', '.mp2', '.mpeg', '.mpe', '.mpv', '.amv', '.wmv', '.flv', '.mts', '.m2ts', '.ts', '.qt', '.3gp', '.3g2', '.f4p', '.f4a', '.f4b'}
img_formats = {'.png', '.jpg', '.jpeg', '.bmp', '.pbm', '.pgm', '.ppm', '.pnm', '.webp', '.tiff', '.tif'}
import os
from pathlib import Path
from typing import Union
from fractions import Fraction
from PIL import Image
import skvideo.io
import numpy as np
from .const import mean, std, img_formats
class Sequence:
def __init__(self):
pass
def __iter__(self):
return self
def __next__(self):
raise NotImplementedError
def __len__(self):
raise NotImplementedError
class ImageSequence(Sequence):
def __init__(self, imgs_dirpath: str, fps: float):
super().__init__()
self.fps = fps
assert os.path.isdir(imgs_dirpath)
self.imgs_dirpath = imgs_dirpath
self.file_names = [f for f in os.listdir(imgs_dirpath) if self._is_img_file(f)]
assert self.file_names
self.file_names.sort()
@classmethod
def _is_img_file(cls, path: str):
return Path(path).suffix.lower() in img_formats
def __next__(self):
for idx in range(0, len(self.file_names) - 1):
file_paths = self._get_path_from_name([self.file_names[idx], self.file_names[idx + 1]])
imgs = [self._pil_loader(f) for f in file_paths]
times_sec = [idx/self.fps, (idx + 1)/self.fps]
yield imgs, times_sec
def __len__(self):
return len(self.file_names) - 1
@staticmethod
def _pil_loader(path):
with open(path, 'rb') as f:
img = Image.open(f)
img = img.convert('RGB')
w_orig, h_orig = img.size
w, h = w_orig//32*32, h_orig//32*32
left = (w_orig - w)//2
upper = (h_orig - h)//2
right = left + w
lower = upper + h
img = img.crop((left, upper, right, lower))
return np.array(img).astype("float32") / 255
def _get_path_from_name(self, file_names: Union[list, str]) -> Union[list, str]:
if isinstance(file_names, list):
return [os.path.join(self.imgs_dirpath, f) for f in file_names]
return os.path.join(self.imgs_dirpath, file_names)
class VideoSequence(Sequence):
def __init__(self, video_filepath: str, fps: float=None):
super().__init__()
metadata = skvideo.io.ffprobe(os.path.abspath(video_filepath))
self.fps = fps
if self.fps is None:
self.fps = float(Fraction(metadata['video']['@avg_frame_rate']))
assert self.fps > 0, 'Could not retrieve fps from video metadata. fps: {}'.format(self.fps)
print('Using video metadata: Got fps of {} frames/sec'.format(self.fps))
# Length is number of frames - 1 (because we return pairs).
self.len = int(metadata['video']['@nb_frames']) - 1
self.videogen = skvideo.io.vreader(os.path.abspath(video_filepath))
self.last_frame = None
def __next__(self):
for idx, frame in enumerate(self.videogen):
h_orig, w_orig, _ = frame.shape
w, h = w_orig//32*32, h_orig//32*32
left = (w_orig - w)//2
upper = (h_orig - h)//2
right = left + w
lower = upper + h
frame = frame[upper:lower, left:right].astype("float32") / 255
assert frame.shape[:2] == (h, w)
if self.last_frame is None:
self.last_frame = frame
continue
last_frame_copy = self.last_frame.copy()
self.last_frame = frame
imgs = [last_frame_copy, frame]
times_sec = [(idx - 1)/self.fps, idx/self.fps]
yield imgs, times_sec
def __len__(self):
return self.len
# Copyright 2022 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A wrapper class for running a frame interpolation TF2 saved model.
Usage:
model_path='/tmp/saved_model/'
it = Interpolator(model_path)
result_batch = it.interpolate(image_batch_0, image_batch_1, batch_dt)
Where image_batch_1 and image_batch_2 are numpy tensors with TF standard
(B,H,W,C) layout, batch_dt is the sub-frame time in range [0,1], (B,) layout.
"""
from typing import Optional
import numpy as np
import tensorflow as tf
def _pad_to_align(x, align):
"""Pad image batch x so width and height divide by align.
Args:
x: Image batch to align.
align: Number to align to.
Returns:
1) An image padded so width % align == 0 and height % align == 0.
2) A bounding box that can be fed readily to tf.image.crop_to_bounding_box
to undo the padding.
"""
# Input checking.
assert np.ndim(x) == 4
assert align > 0, 'align must be a positive number.'
height, width = x.shape[-3:-1]
height_to_pad = (align - height % align) if height % align != 0 else 0
width_to_pad = (align - width % align) if width % align != 0 else 0
bbox_to_pad = {
'offset_height': height_to_pad // 2,
'offset_width': width_to_pad // 2,
'target_height': height + height_to_pad,
'target_width': width + width_to_pad
}
padded_x = tf.image.pad_to_bounding_box(x, **bbox_to_pad)
bbox_to_crop = {
'offset_height': height_to_pad // 2,
'offset_width': width_to_pad // 2,
'target_height': height,
'target_width': width
}
return padded_x, bbox_to_crop
class Interpolator:
"""A class for generating interpolated frames between two input frames.
Uses TF2 saved model format.
"""
def __init__(self, model_path: str,
align: Optional[int] = None) -> None:
"""Loads a saved model.
Args:
model_path: Path to the saved model. If none are provided, uses the
default model.
align: 'If >1, pad the input size so it divides with this before
inference.'
"""
self._model = tf.compat.v2.saved_model.load(model_path)
self._align = align
def interpolate(self, x0: np.ndarray, x1: np.ndarray,
dt: np.ndarray) -> np.ndarray:
"""Generates an interpolated frame between given two batches of frames.
All input tensors should be np.float32 datatype.
Args:
x0: First image batch. Dimensions: (batch_size, height, width, channels)
x1: Second image batch. Dimensions: (batch_size, height, width, channels)
dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)
Returns:
The result with dimensions (batch_size, height, width, channels).
"""
if self._align is not None:
x0, bbox_to_crop = _pad_to_align(x0, self._align)
x1, _ = _pad_to_align(x1, self._align)
inputs = {'x0': x0, 'x1': x1, 'time': dt[..., np.newaxis]}
result = self._model(inputs, training=False)
forward_flow = result['forward_flow_pyramid'][0].numpy()
backward_flow = result['backward_flow_pyramid'][0].numpy()
image = result['image']
if self._align is not None:
image = tf.image.crop_to_bounding_box(image, **bbox_to_crop)
return image.numpy(), forward_flow, backward_flow
# Taken from https://github.com/avinashpaliwal/Super-SloMo/blob/bbf0375958d66dab48143166a5b80cd26a406458/model.py
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class down(nn.Module):
"""
A class for creating neural network blocks containing layers:
Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
This is used in the UNet Class to create a UNet like NN architecture.
...
Methods
-------
forward(x)
Returns output tensor after passing input `x` to the neural network
block.
"""
def __init__(self, inChannels, outChannels, filterSize):
"""
Parameters
----------
inChannels : int
number of input channels for the first convolutional layer.
outChannels : int
number of output channels for the first convolutional layer.
This is also used as input and output channels for the
second convolutional layer.
filterSize : int
filter size for the convolution filter. input N would create
a N x N filter.
"""
super(down, self).__init__()
# Initialize convolutional layers.
self.conv1 = nn.Conv2d(inChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
def forward(self, x):
"""
Returns output tensor after passing input `x` to the neural network
block.
Parameters
----------
x : tensor
input to the NN block.
Returns
-------
tensor
output of the NN block.
"""
# Average pooling with kernel size 2 (2 x 2).
x = F.avg_pool2d(x, 2)
# Convolution + Leaky ReLU
x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
# Convolution + Leaky ReLU
x = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
return x
class up(nn.Module):
"""
A class for creating neural network blocks containing layers:
Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
This is used in the UNet Class to create a UNet like NN architecture.
...
Methods
-------
forward(x, skpCn)
Returns output tensor after passing input `x` to the neural network
block.
"""
def __init__(self, inChannels, outChannels):
"""
Parameters
----------
inChannels : int
number of input channels for the first convolutional layer.
outChannels : int
number of output channels for the first convolutional layer.
This is also used for setting input and output channels for
the second convolutional layer.
"""
super(up, self).__init__()
# Initialize convolutional layers.
self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1)
# (2 * outChannels) is used for accommodating skip connection.
self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
def forward(self, x, skpCn):
"""
Returns output tensor after passing input `x` to the neural network
block.
Parameters
----------
x : tensor
input to the NN block.
skpCn : tensor
skip connection input to the NN block.
Returns
-------
tensor
output of the NN block.
"""
# Bilinear interpolation with scaling 2.
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
# Convolution + Leaky ReLU
x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
# Convolution + Leaky ReLU on (`x`, `skpCn`)
x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope = 0.1)
return x
class UNet(nn.Module):
"""
A class for creating UNet like architecture as specified by the
Super SloMo paper.
...
Methods
-------
forward(x)
Returns output tensor after passing input `x` to the neural network
block.
"""
def __init__(self, inChannels, outChannels):
"""
Parameters
----------
inChannels : int
number of input channels for the UNet.
outChannels : int
number of output channels for the UNet.
"""
super(UNet, self).__init__()
# Initialize neural network blocks.
self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3)
self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3)
self.down1 = down(32, 64, 5)
self.down2 = down(64, 128, 3)
self.down3 = down(128, 256, 3)
self.down4 = down(256, 512, 3)
self.down5 = down(512, 512, 3)
self.up1 = up(512, 512)
self.up2 = up(512, 256)
self.up3 = up(256, 128)
self.up4 = up(128, 64)
self.up5 = up(64, 32)
self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)
def forward(self, x):
"""
Returns output tensor after passing input `x` to the neural network.
Parameters
----------
x : tensor
input to the UNet.
Returns
-------
tensor
output of the UNet.
"""
x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
s2 = self.down1(s1)
s3 = self.down2(s2)
s4 = self.down3(s3)
s5 = self.down4(s4)
x = self.down5(s5)
x = self.up1(x, s5)
x = self.up2(x, s4)
x = self.up3(x, s3)
x = self.up4(x, s2)
x = self.up5(x, s1)
x = F.leaky_relu(self.conv3(x), negative_slope = 0.1)
return x
class backWarp(nn.Module):
"""
A class for creating a backwarping object.
This is used for backwarping to an image:
Given optical flow from frame I0 to I1 --> F_0_1 and frame I1,
it generates I0 <-- backwarp(F_0_1, I1).
...
Methods
-------
forward(x)
Returns output tensor after passing input `img` and `flow` to the backwarping
block.
"""
def __init__(self, W, H, device):
"""
Parameters
----------
W : int
width of the image.
H : int
height of the image.
device : device
computation device (cpu/cuda).
"""
super(backWarp, self).__init__()
# create a grid
gridX, gridY = np.meshgrid(np.arange(W), np.arange(H))
self.W = W
self.H = H
self.gridX = torch.tensor(gridX, requires_grad=False, device=device)
self.gridY = torch.tensor(gridY, requires_grad=False, device=device)
def forward(self, img, flow):
"""
Returns output tensor after passing input `img` and `flow` to the backwarping
block.
I0 = backwarp(I1, F_0_1)
Parameters
----------
img : tensor
frame I1.
flow : tensor
optical flow from I0 and I1: F_0_1.
Returns
-------
tensor
frame I0.
"""
# Extract horizontal and vertical flows.
u = flow[:, 0, :, :]
v = flow[:, 1, :, :]
x = self.gridX.unsqueeze(0).expand_as(u).float() + u
y = self.gridY.unsqueeze(0).expand_as(v).float() + v
# range -1 to 1
x = 2*(x/self.W - 0.5)
y = 2*(y/self.H - 0.5)
# stacking X and Y
grid = torch.stack((x,y), dim=3)
# Sample pixels using bilinear interpolation.
imgOut = torch.nn.functional.grid_sample(img, grid, align_corners=True)
return imgOut
import os
import shutil
import cv2
import numpy as np
from tqdm import tqdm
from . import Sequence
from .const import imgs_dirname
from .interpolator import Interpolator
from .utils import get_sequence_or_none
class Upsampler:
_timestamps_filename = 'timestamps.txt'
def __init__(self, input_dir: str, output_dir: str):
assert os.path.isdir(input_dir), 'The input directory must exist'
assert not os.path.exists(output_dir), 'The output directory must not exist'
self._prepare_output_dir(input_dir, output_dir)
self.src_dir = input_dir
self.dest_dir = output_dir
path = os.path.join(os.path.dirname(__file__), "../../pretrained_models/film_net/Style/saved_model")
self.interpolator = Interpolator(path, None)
def upsample(self):
sequence_counter = 0
for src_absdirpath, dirnames, filenames in os.walk(self.src_dir):
sequence = get_sequence_or_none(src_absdirpath)
if sequence is None:
continue
sequence_counter += 1
print('Processing sequence number {}'.format(src_absdirpath))
reldirpath = os.path.relpath(src_absdirpath, self.src_dir)
dest_imgs_dir = os.path.join(self.dest_dir, reldirpath, imgs_dirname)
dest_timestamps_filepath = os.path.join(self.dest_dir, reldirpath, self._timestamps_filename)
self.upsample_sequence(sequence, dest_imgs_dir, dest_timestamps_filepath)
def upsample_sequence(self, sequence: Sequence, dest_imgs_dir: str, dest_timestamps_filepath: str):
os.makedirs(dest_imgs_dir, exist_ok=True)
timestamps_list = list()
idx = 0
for img_pair, time_pair in tqdm(next(sequence), total=len(sequence), desc=type(sequence).__name__):
I0 = img_pair[0][None]
I1 = img_pair[1][None]
t0, t1 = time_pair
total_frames, total_timestamps = self._upsample_adaptive(I0, I1, t0, t1)
total_frames = [I0[0]] + total_frames
timestamps = [t0] + total_timestamps
sorted_indices = np.argsort(timestamps)
total_frames = [total_frames[j] for j in sorted_indices]
timestamps = [timestamps[i] for i in sorted_indices]
timestamps_list += timestamps
for frame in total_frames:
self._write_img(frame, idx, dest_imgs_dir)
idx += 1
timestamps_list.append(t1)
self._write_img(I1[0, ...], idx, dest_imgs_dir)
self._write_timestamps(timestamps_list, dest_timestamps_filepath)
def _upsample_adaptive(self, I0, I1, t0, t1, num_bisections=-1):
if num_bisections == 0:
return [], []
dt = self.batch_dt = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)
image, F_0_1, F_1_0 = self.interpolator.interpolate(I0, I1, dt)
if num_bisections < 0:
flow_mag_0_1_max = ((F_0_1 ** 2).sum(-1) ** .5).max()
flow_mag_1_0_max = ((F_1_0 ** 2).sum(-1) ** .5).max()
num_bisections = int(np.ceil(np.log(max([flow_mag_0_1_max, flow_mag_1_0_max]))/np.log(2)))
if num_bisections == 0:
return [image[0]], [(t0 + t1) / 2]
left_images, left_timestamps = self._upsample_adaptive(I0, image, t0, (t0+t1)/2, num_bisections=num_bisections-1)
right_images, right_timestamps = self._upsample_adaptive(image, I1, (t0+t1)/2, t1, num_bisections=num_bisections-1)
timestamps = left_timestamps + [(t0+t1)/2] + right_timestamps
images = left_images + [image[0]] + right_images
return images, timestamps
def _prepare_output_dir(self, src_dir: str, dest_dir: str):
# Copy directory structure.
def ignore_files(directory, files):
return [f for f in files if os.path.isfile(os.path.join(directory, f))]
shutil.copytree(src_dir, dest_dir, ignore=ignore_files)
@staticmethod
def _write_img(img: np.ndarray, idx: int, imgs_dir: str):
assert os.path.isdir(imgs_dir)
img = np.clip(img * 255, 0, 255).astype("uint8")
path = os.path.join(imgs_dir, "%08d.png" % idx)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite(path, img)
@staticmethod
def _write_timestamps(timestamps: list, timestamps_filename: str):
with open(timestamps_filename, 'w') as t_file:
t_file.writelines([str(t) + '\n' for t in timestamps])
\ No newline at end of file
import os
from pathlib import Path
from typing import Union
from .const import fps_filename, imgs_dirname, video_formats
from .dataset import Sequence, ImageSequence, VideoSequence
def is_video_file(filepath: str) -> bool:
return Path(filepath).suffix.lower() in video_formats
def get_fps_file(dirpath: str) -> Union[None, str]:
fps_file = os.path.join(dirpath, fps_filename)
if os.path.isfile(fps_file):
return fps_file
return None
def get_imgs_directory(dirpath: str) -> Union[None, str]:
imgs_dir = os.path.join(dirpath, imgs_dirname)
if os.path.isdir(imgs_dir):
return imgs_dir
return None
def get_video_file(dirpath: str) -> Union[None, str]:
filenames = [f for f in os.listdir(dirpath) if is_video_file(f)]
if len(filenames) == 0:
return None
assert len(filenames) == 1
filepath = os.path.join(dirpath, filenames[0])
return filepath
def fps_from_file(fps_file) -> float:
assert os.path.isfile(fps_file)
with open(fps_file, 'r') as f:
fps = float(f.readline().strip())
assert fps > 0, 'Expected fps to be larger than 0. Instead got fps={}'.format(fps)
return fps
def get_sequence_or_none(dirpath: str) -> Union[None, Sequence]:
fps_file = get_fps_file(dirpath)
if fps_file:
# Must be a sequence (either ImageSequence or VideoSequence)
fps = fps_from_file(fps_file)
imgs_dir = get_imgs_directory(dirpath)
if imgs_dir:
return ImageSequence(imgs_dir, fps)
video_file = get_video_file(dirpath)
assert video_file is not None
return VideoSequence(video_file, fps)
# Can be VideoSequence if there is a video file. But have to use fps from meta data.
video_file = get_video_file(dirpath)
if video_file is not None:
return VideoSequence(video_file)
return None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment