"deploy/vscode:/vscode.git/clone" did not exist on "ae4167dc32fe4e35dffc90b48a80ee07cd1dc00f"
Unverified Commit 8cfd677c authored by Joey Ballentine's avatar Joey Ballentine Committed by GitHub
Browse files

Replace chainner_models with Spandrel package (#2146)

* Replace chainner_models with Spandrel

* Update to latest spandrel

* Use spandrel_foss instead

* update spandrel to new FOSS-compliant version
parent ffc4b7c3
# pylint: skip-file
import math
import re
import numpy as np
import torch
import torch.nn as nn
import torch.utils.checkpoint as checkpoint
from einops import rearrange
from einops.layers.torch import Rearrange
from torch import Tensor
from torch.nn import functional as F
from .timm.drop import DropPath
from .timm.weight_init import trunc_normal_
def img2windows(img, H_sp, W_sp):
"""
Input: Image (B, C, H, W)
Output: Window Partition (B', N, C)
"""
B, C, H, W = img.shape
img_reshape = img.view(B, C, H // H_sp, H_sp, W // W_sp, W_sp)
img_perm = (
img_reshape.permute(0, 2, 4, 3, 5, 1).contiguous().reshape(-1, H_sp * W_sp, C)
)
return img_perm
def windows2img(img_splits_hw, H_sp, W_sp, H, W):
"""
Input: Window Partition (B', N, C)
Output: Image (B, H, W, C)
"""
B = int(img_splits_hw.shape[0] / (H * W / H_sp / W_sp))
img = img_splits_hw.view(B, H // H_sp, W // W_sp, H_sp, W_sp, -1)
img = img.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
return img
class SpatialGate(nn.Module):
"""Spatial-Gate.
Args:
dim (int): Half of input channels.
"""
def __init__(self, dim):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.conv = nn.Conv2d(
dim, dim, kernel_size=3, stride=1, padding=1, groups=dim
) # DW Conv
def forward(self, x, H, W):
# Split
x1, x2 = x.chunk(2, dim=-1)
B, N, C = x.shape
x2 = (
self.conv(self.norm(x2).transpose(1, 2).contiguous().view(B, C // 2, H, W))
.flatten(2)
.transpose(-1, -2)
.contiguous()
)
return x1 * x2
class SGFN(nn.Module):
"""Spatial-Gate Feed-Forward Network.
Args:
in_features (int): Number of input channels.
hidden_features (int | None): Number of hidden channels. Default: None
out_features (int | None): Number of output channels. Default: None
act_layer (nn.Module): Activation layer. Default: nn.GELU
drop (float): Dropout rate. Default: 0.0
"""
def __init__(
self,
in_features,
hidden_features=None,
out_features=None,
act_layer=nn.GELU,
drop=0.0,
):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.sg = SpatialGate(hidden_features // 2)
self.fc2 = nn.Linear(hidden_features // 2, out_features)
self.drop = nn.Dropout(drop)
def forward(self, x, H, W):
"""
Input: x: (B, H*W, C), H, W
Output: x: (B, H*W, C)
"""
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.sg(x, H, W)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class DynamicPosBias(nn.Module):
# The implementation builds on Crossformer code https://github.com/cheerss/CrossFormer/blob/main/models/crossformer.py
"""Dynamic Relative Position Bias.
Args:
dim (int): Number of input channels.
num_heads (int): Number of attention heads.
residual (bool): If True, use residual strage to connect conv.
"""
def __init__(self, dim, num_heads, residual):
super().__init__()
self.residual = residual
self.num_heads = num_heads
self.pos_dim = dim // 4
self.pos_proj = nn.Linear(2, self.pos_dim)
self.pos1 = nn.Sequential(
nn.LayerNorm(self.pos_dim),
nn.ReLU(inplace=True),
nn.Linear(self.pos_dim, self.pos_dim),
)
self.pos2 = nn.Sequential(
nn.LayerNorm(self.pos_dim),
nn.ReLU(inplace=True),
nn.Linear(self.pos_dim, self.pos_dim),
)
self.pos3 = nn.Sequential(
nn.LayerNorm(self.pos_dim),
nn.ReLU(inplace=True),
nn.Linear(self.pos_dim, self.num_heads),
)
def forward(self, biases):
if self.residual:
pos = self.pos_proj(biases) # 2Gh-1 * 2Gw-1, heads
pos = pos + self.pos1(pos)
pos = pos + self.pos2(pos)
pos = self.pos3(pos)
else:
pos = self.pos3(self.pos2(self.pos1(self.pos_proj(biases))))
return pos
class Spatial_Attention(nn.Module):
"""Spatial Window Self-Attention.
It supports rectangle window (containing square window).
Args:
dim (int): Number of input channels.
idx (int): The indentix of window. (0/1)
split_size (tuple(int)): Height and Width of spatial window.
dim_out (int | None): The dimension of the attention output. Default: None
num_heads (int): Number of attention heads. Default: 6
attn_drop (float): Dropout ratio of attention weight. Default: 0.0
proj_drop (float): Dropout ratio of output. Default: 0.0
qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set
position_bias (bool): The dynamic relative position bias. Default: True
"""
def __init__(
self,
dim,
idx,
split_size=[8, 8],
dim_out=None,
num_heads=6,
attn_drop=0.0,
proj_drop=0.0,
qk_scale=None,
position_bias=True,
):
super().__init__()
self.dim = dim
self.dim_out = dim_out or dim
self.split_size = split_size
self.num_heads = num_heads
self.idx = idx
self.position_bias = position_bias
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
if idx == 0:
H_sp, W_sp = self.split_size[0], self.split_size[1]
elif idx == 1:
W_sp, H_sp = self.split_size[0], self.split_size[1]
else:
print("ERROR MODE", idx)
exit(0)
self.H_sp = H_sp
self.W_sp = W_sp
if self.position_bias:
self.pos = DynamicPosBias(self.dim // 4, self.num_heads, residual=False)
# generate mother-set
position_bias_h = torch.arange(1 - self.H_sp, self.H_sp)
position_bias_w = torch.arange(1 - self.W_sp, self.W_sp)
biases = torch.stack(torch.meshgrid([position_bias_h, position_bias_w]))
biases = biases.flatten(1).transpose(0, 1).contiguous().float()
self.register_buffer("rpe_biases", biases)
# get pair-wise relative position index for each token inside the window
coords_h = torch.arange(self.H_sp)
coords_w = torch.arange(self.W_sp)
coords = torch.stack(torch.meshgrid([coords_h, coords_w]))
coords_flatten = torch.flatten(coords, 1)
relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
relative_coords = relative_coords.permute(1, 2, 0).contiguous()
relative_coords[:, :, 0] += self.H_sp - 1
relative_coords[:, :, 1] += self.W_sp - 1
relative_coords[:, :, 0] *= 2 * self.W_sp - 1
relative_position_index = relative_coords.sum(-1)
self.register_buffer("relative_position_index", relative_position_index)
self.attn_drop = nn.Dropout(attn_drop)
def im2win(self, x, H, W):
B, N, C = x.shape
x = x.transpose(-2, -1).contiguous().view(B, C, H, W)
x = img2windows(x, self.H_sp, self.W_sp)
x = (
x.reshape(-1, self.H_sp * self.W_sp, self.num_heads, C // self.num_heads)
.permute(0, 2, 1, 3)
.contiguous()
)
return x
def forward(self, qkv, H, W, mask=None):
"""
Input: qkv: (B, 3*L, C), H, W, mask: (B, N, N), N is the window size
Output: x (B, H, W, C)
"""
q, k, v = qkv[0], qkv[1], qkv[2]
B, L, C = q.shape
assert L == H * W, "flatten img_tokens has wrong size"
# partition the q,k,v, image to window
q = self.im2win(q, H, W)
k = self.im2win(k, H, W)
v = self.im2win(v, H, W)
q = q * self.scale
attn = q @ k.transpose(-2, -1) # B head N C @ B head C N --> B head N N
# calculate drpe
if self.position_bias:
pos = self.pos(self.rpe_biases)
# select position bias
relative_position_bias = pos[self.relative_position_index.view(-1)].view(
self.H_sp * self.W_sp, self.H_sp * self.W_sp, -1
)
relative_position_bias = relative_position_bias.permute(
2, 0, 1
).contiguous()
attn = attn + relative_position_bias.unsqueeze(0)
N = attn.shape[3]
# use mask for shift window
if mask is not None:
nW = mask.shape[0]
attn = attn.view(B, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(
0
)
attn = attn.view(-1, self.num_heads, N, N)
attn = nn.functional.softmax(attn, dim=-1, dtype=attn.dtype)
attn = self.attn_drop(attn)
x = attn @ v
x = x.transpose(1, 2).reshape(
-1, self.H_sp * self.W_sp, C
) # B head N N @ B head N C
# merge the window, window to image
x = windows2img(x, self.H_sp, self.W_sp, H, W) # B H' W' C
return x
class Adaptive_Spatial_Attention(nn.Module):
# The implementation builds on CAT code https://github.com/Zhengchen1999/CAT
"""Adaptive Spatial Self-Attention
Args:
dim (int): Number of input channels.
num_heads (int): Number of attention heads. Default: 6
split_size (tuple(int)): Height and Width of spatial window.
shift_size (tuple(int)): Shift size for spatial window.
qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set.
drop (float): Dropout rate. Default: 0.0
attn_drop (float): Attention dropout rate. Default: 0.0
rg_idx (int): The indentix of Residual Group (RG)
b_idx (int): The indentix of Block in each RG
"""
def __init__(
self,
dim,
num_heads,
reso=64,
split_size=[8, 8],
shift_size=[1, 2],
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
rg_idx=0,
b_idx=0,
):
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.split_size = split_size
self.shift_size = shift_size
self.b_idx = b_idx
self.rg_idx = rg_idx
self.patches_resolution = reso
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
assert (
0 <= self.shift_size[0] < self.split_size[0]
), "shift_size must in 0-split_size0"
assert (
0 <= self.shift_size[1] < self.split_size[1]
), "shift_size must in 0-split_size1"
self.branch_num = 2
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(drop)
self.attns = nn.ModuleList(
[
Spatial_Attention(
dim // 2,
idx=i,
split_size=split_size,
num_heads=num_heads // 2,
dim_out=dim // 2,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
position_bias=True,
)
for i in range(self.branch_num)
]
)
if (self.rg_idx % 2 == 0 and self.b_idx > 0 and (self.b_idx - 2) % 4 == 0) or (
self.rg_idx % 2 != 0 and self.b_idx % 4 == 0
):
attn_mask = self.calculate_mask(
self.patches_resolution, self.patches_resolution
)
self.register_buffer("attn_mask_0", attn_mask[0])
self.register_buffer("attn_mask_1", attn_mask[1])
else:
attn_mask = None
self.register_buffer("attn_mask_0", None)
self.register_buffer("attn_mask_1", None)
self.dwconv = nn.Sequential(
nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim),
nn.BatchNorm2d(dim),
nn.GELU(),
)
self.channel_interaction = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(dim, dim // 8, kernel_size=1),
nn.BatchNorm2d(dim // 8),
nn.GELU(),
nn.Conv2d(dim // 8, dim, kernel_size=1),
)
self.spatial_interaction = nn.Sequential(
nn.Conv2d(dim, dim // 16, kernel_size=1),
nn.BatchNorm2d(dim // 16),
nn.GELU(),
nn.Conv2d(dim // 16, 1, kernel_size=1),
)
def calculate_mask(self, H, W):
# The implementation builds on Swin Transformer code https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py
# calculate attention mask for shift window
img_mask_0 = torch.zeros((1, H, W, 1)) # 1 H W 1 idx=0
img_mask_1 = torch.zeros((1, H, W, 1)) # 1 H W 1 idx=1
h_slices_0 = (
slice(0, -self.split_size[0]),
slice(-self.split_size[0], -self.shift_size[0]),
slice(-self.shift_size[0], None),
)
w_slices_0 = (
slice(0, -self.split_size[1]),
slice(-self.split_size[1], -self.shift_size[1]),
slice(-self.shift_size[1], None),
)
h_slices_1 = (
slice(0, -self.split_size[1]),
slice(-self.split_size[1], -self.shift_size[1]),
slice(-self.shift_size[1], None),
)
w_slices_1 = (
slice(0, -self.split_size[0]),
slice(-self.split_size[0], -self.shift_size[0]),
slice(-self.shift_size[0], None),
)
cnt = 0
for h in h_slices_0:
for w in w_slices_0:
img_mask_0[:, h, w, :] = cnt
cnt += 1
cnt = 0
for h in h_slices_1:
for w in w_slices_1:
img_mask_1[:, h, w, :] = cnt
cnt += 1
# calculate mask for window-0
img_mask_0 = img_mask_0.view(
1,
H // self.split_size[0],
self.split_size[0],
W // self.split_size[1],
self.split_size[1],
1,
)
img_mask_0 = (
img_mask_0.permute(0, 1, 3, 2, 4, 5)
.contiguous()
.view(-1, self.split_size[0], self.split_size[1], 1)
) # nW, sw[0], sw[1], 1
mask_windows_0 = img_mask_0.view(-1, self.split_size[0] * self.split_size[1])
attn_mask_0 = mask_windows_0.unsqueeze(1) - mask_windows_0.unsqueeze(2)
attn_mask_0 = attn_mask_0.masked_fill(
attn_mask_0 != 0, float(-100.0)
).masked_fill(attn_mask_0 == 0, float(0.0))
# calculate mask for window-1
img_mask_1 = img_mask_1.view(
1,
H // self.split_size[1],
self.split_size[1],
W // self.split_size[0],
self.split_size[0],
1,
)
img_mask_1 = (
img_mask_1.permute(0, 1, 3, 2, 4, 5)
.contiguous()
.view(-1, self.split_size[1], self.split_size[0], 1)
) # nW, sw[1], sw[0], 1
mask_windows_1 = img_mask_1.view(-1, self.split_size[1] * self.split_size[0])
attn_mask_1 = mask_windows_1.unsqueeze(1) - mask_windows_1.unsqueeze(2)
attn_mask_1 = attn_mask_1.masked_fill(
attn_mask_1 != 0, float(-100.0)
).masked_fill(attn_mask_1 == 0, float(0.0))
return attn_mask_0, attn_mask_1
def forward(self, x, H, W):
"""
Input: x: (B, H*W, C), H, W
Output: x: (B, H*W, C)
"""
B, L, C = x.shape
assert L == H * W, "flatten img_tokens has wrong size"
qkv = self.qkv(x).reshape(B, -1, 3, C).permute(2, 0, 1, 3) # 3, B, HW, C
# V without partition
v = qkv[2].transpose(-2, -1).contiguous().view(B, C, H, W)
# image padding
max_split_size = max(self.split_size[0], self.split_size[1])
pad_l = pad_t = 0
pad_r = (max_split_size - W % max_split_size) % max_split_size
pad_b = (max_split_size - H % max_split_size) % max_split_size
qkv = qkv.reshape(3 * B, H, W, C).permute(0, 3, 1, 2) # 3B C H W
qkv = (
F.pad(qkv, (pad_l, pad_r, pad_t, pad_b))
.reshape(3, B, C, -1)
.transpose(-2, -1)
) # l r t b
_H = pad_b + H
_W = pad_r + W
_L = _H * _W
# window-0 and window-1 on split channels [C/2, C/2]; for square windows (e.g., 8x8), window-0 and window-1 can be merged
# shift in block: (0, 4, 8, ...), (2, 6, 10, ...), (0, 4, 8, ...), (2, 6, 10, ...), ...
if (self.rg_idx % 2 == 0 and self.b_idx > 0 and (self.b_idx - 2) % 4 == 0) or (
self.rg_idx % 2 != 0 and self.b_idx % 4 == 0
):
qkv = qkv.view(3, B, _H, _W, C)
qkv_0 = torch.roll(
qkv[:, :, :, :, : C // 2],
shifts=(-self.shift_size[0], -self.shift_size[1]),
dims=(2, 3),
)
qkv_0 = qkv_0.view(3, B, _L, C // 2)
qkv_1 = torch.roll(
qkv[:, :, :, :, C // 2 :],
shifts=(-self.shift_size[1], -self.shift_size[0]),
dims=(2, 3),
)
qkv_1 = qkv_1.view(3, B, _L, C // 2)
if self.patches_resolution != _H or self.patches_resolution != _W:
mask_tmp = self.calculate_mask(_H, _W)
x1_shift = self.attns[0](qkv_0, _H, _W, mask=mask_tmp[0].to(x.device))
x2_shift = self.attns[1](qkv_1, _H, _W, mask=mask_tmp[1].to(x.device))
else:
x1_shift = self.attns[0](qkv_0, _H, _W, mask=self.attn_mask_0)
x2_shift = self.attns[1](qkv_1, _H, _W, mask=self.attn_mask_1)
x1 = torch.roll(
x1_shift, shifts=(self.shift_size[0], self.shift_size[1]), dims=(1, 2)
)
x2 = torch.roll(
x2_shift, shifts=(self.shift_size[1], self.shift_size[0]), dims=(1, 2)
)
x1 = x1[:, :H, :W, :].reshape(B, L, C // 2)
x2 = x2[:, :H, :W, :].reshape(B, L, C // 2)
# attention output
attened_x = torch.cat([x1, x2], dim=2)
else:
x1 = self.attns[0](qkv[:, :, :, : C // 2], _H, _W)[:, :H, :W, :].reshape(
B, L, C // 2
)
x2 = self.attns[1](qkv[:, :, :, C // 2 :], _H, _W)[:, :H, :W, :].reshape(
B, L, C // 2
)
# attention output
attened_x = torch.cat([x1, x2], dim=2)
# convolution output
conv_x = self.dwconv(v)
# Adaptive Interaction Module (AIM)
# C-Map (before sigmoid)
channel_map = (
self.channel_interaction(conv_x)
.permute(0, 2, 3, 1)
.contiguous()
.view(B, 1, C)
)
# S-Map (before sigmoid)
attention_reshape = attened_x.transpose(-2, -1).contiguous().view(B, C, H, W)
spatial_map = self.spatial_interaction(attention_reshape)
# C-I
attened_x = attened_x * torch.sigmoid(channel_map)
# S-I
conv_x = torch.sigmoid(spatial_map) * conv_x
conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(B, L, C)
x = attened_x + conv_x
x = self.proj(x)
x = self.proj_drop(x)
return x
class Adaptive_Channel_Attention(nn.Module):
# The implementation builds on XCiT code https://github.com/facebookresearch/xcit
"""Adaptive Channel Self-Attention
Args:
dim (int): Number of input channels.
num_heads (int): Number of attention heads. Default: 6
qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set.
attn_drop (float): Attention dropout rate. Default: 0.0
drop_path (float): Stochastic depth rate. Default: 0.0
"""
def __init__(
self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.0,
proj_drop=0.0,
):
super().__init__()
self.num_heads = num_heads
self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1))
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.dwconv = nn.Sequential(
nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim),
nn.BatchNorm2d(dim),
nn.GELU(),
)
self.channel_interaction = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(dim, dim // 8, kernel_size=1),
nn.BatchNorm2d(dim // 8),
nn.GELU(),
nn.Conv2d(dim // 8, dim, kernel_size=1),
)
self.spatial_interaction = nn.Sequential(
nn.Conv2d(dim, dim // 16, kernel_size=1),
nn.BatchNorm2d(dim // 16),
nn.GELU(),
nn.Conv2d(dim // 16, 1, kernel_size=1),
)
def forward(self, x, H, W):
"""
Input: x: (B, H*W, C), H, W
Output: x: (B, H*W, C)
"""
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
qkv = qkv.permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2]
q = q.transpose(-2, -1)
k = k.transpose(-2, -1)
v = v.transpose(-2, -1)
v_ = v.reshape(B, C, N).contiguous().view(B, C, H, W)
q = torch.nn.functional.normalize(q, dim=-1)
k = torch.nn.functional.normalize(k, dim=-1)
attn = (q @ k.transpose(-2, -1)) * self.temperature
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
# attention output
attened_x = (attn @ v).permute(0, 3, 1, 2).reshape(B, N, C)
# convolution output
conv_x = self.dwconv(v_)
# Adaptive Interaction Module (AIM)
# C-Map (before sigmoid)
attention_reshape = attened_x.transpose(-2, -1).contiguous().view(B, C, H, W)
channel_map = self.channel_interaction(attention_reshape)
# S-Map (before sigmoid)
spatial_map = (
self.spatial_interaction(conv_x)
.permute(0, 2, 3, 1)
.contiguous()
.view(B, N, 1)
)
# S-I
attened_x = attened_x * torch.sigmoid(spatial_map)
# C-I
conv_x = conv_x * torch.sigmoid(channel_map)
conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(B, N, C)
x = attened_x + conv_x
x = self.proj(x)
x = self.proj_drop(x)
return x
class DATB(nn.Module):
def __init__(
self,
dim,
num_heads,
reso=64,
split_size=[2, 4],
shift_size=[1, 2],
expansion_factor=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
rg_idx=0,
b_idx=0,
):
super().__init__()
self.norm1 = norm_layer(dim)
if b_idx % 2 == 0:
# DSTB
self.attn = Adaptive_Spatial_Attention(
dim,
num_heads=num_heads,
reso=reso,
split_size=split_size,
shift_size=shift_size,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop,
attn_drop=attn_drop,
rg_idx=rg_idx,
b_idx=b_idx,
)
else:
# DCTB
self.attn = Adaptive_Channel_Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
ffn_hidden_dim = int(dim * expansion_factor)
self.ffn = SGFN(
in_features=dim,
hidden_features=ffn_hidden_dim,
out_features=dim,
act_layer=act_layer,
)
self.norm2 = norm_layer(dim)
def forward(self, x, x_size):
"""
Input: x: (B, H*W, C), x_size: (H, W)
Output: x: (B, H*W, C)
"""
H, W = x_size
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
x = x + self.drop_path(self.ffn(self.norm2(x), H, W))
return x
class ResidualGroup(nn.Module):
"""ResidualGroup
Args:
dim (int): Number of input channels.
reso (int): Input resolution.
num_heads (int): Number of attention heads.
split_size (tuple(int)): Height and Width of spatial window.
expansion_factor (float): Ratio of ffn hidden dim to embedding dim.
qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. Default: None
drop (float): Dropout rate. Default: 0
attn_drop(float): Attention dropout rate. Default: 0
drop_paths (float | None): Stochastic depth rate.
act_layer (nn.Module): Activation layer. Default: nn.GELU
norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm
depth (int): Number of dual aggregation Transformer blocks in residual group.
use_chk (bool): Whether to use checkpointing to save memory.
resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
"""
def __init__(
self,
dim,
reso,
num_heads,
split_size=[2, 4],
expansion_factor=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_paths=None,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
depth=2,
use_chk=False,
resi_connection="1conv",
rg_idx=0,
):
super().__init__()
self.use_chk = use_chk
self.reso = reso
self.blocks = nn.ModuleList(
[
DATB(
dim=dim,
num_heads=num_heads,
reso=reso,
split_size=split_size,
shift_size=[split_size[0] // 2, split_size[1] // 2],
expansion_factor=expansion_factor,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop,
attn_drop=attn_drop,
drop_path=drop_paths[i],
act_layer=act_layer,
norm_layer=norm_layer,
rg_idx=rg_idx,
b_idx=i,
)
for i in range(depth)
]
)
if resi_connection == "1conv":
self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
elif resi_connection == "3conv":
self.conv = nn.Sequential(
nn.Conv2d(dim, dim // 4, 3, 1, 1),
nn.LeakyReLU(negative_slope=0.2, inplace=True),
nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
nn.LeakyReLU(negative_slope=0.2, inplace=True),
nn.Conv2d(dim // 4, dim, 3, 1, 1),
)
def forward(self, x, x_size):
"""
Input: x: (B, H*W, C), x_size: (H, W)
Output: x: (B, H*W, C)
"""
H, W = x_size
res = x
for blk in self.blocks:
if self.use_chk:
x = checkpoint.checkpoint(blk, x, x_size)
else:
x = blk(x, x_size)
x = rearrange(x, "b (h w) c -> b c h w", h=H, w=W)
x = self.conv(x)
x = rearrange(x, "b c h w -> b (h w) c")
x = res + x
return x
class Upsample(nn.Sequential):
"""Upsample module.
Args:
scale (int): Scale factor. Supported scales: 2^n and 3.
num_feat (int): Channel number of intermediate features.
"""
def __init__(self, scale, num_feat):
m = []
if (scale & (scale - 1)) == 0: # scale = 2^n
for _ in range(int(math.log(scale, 2))):
m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
m.append(nn.PixelShuffle(2))
elif scale == 3:
m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
m.append(nn.PixelShuffle(3))
else:
raise ValueError(
f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
)
super(Upsample, self).__init__(*m)
class UpsampleOneStep(nn.Sequential):
"""UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
Used in lightweight SR to save parameters.
Args:
scale (int): Scale factor. Supported scales: 2^n and 3.
num_feat (int): Channel number of intermediate features.
"""
def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
self.num_feat = num_feat
self.input_resolution = input_resolution
m = []
m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1))
m.append(nn.PixelShuffle(scale))
super(UpsampleOneStep, self).__init__(*m)
def flops(self):
h, w = self.input_resolution
flops = h * w * self.num_feat * 3 * 9
return flops
class DAT(nn.Module):
"""Dual Aggregation Transformer
Args:
img_size (int): Input image size. Default: 64
in_chans (int): Number of input image channels. Default: 3
embed_dim (int): Patch embedding dimension. Default: 180
depths (tuple(int)): Depth of each residual group (number of DATB in each RG).
split_size (tuple(int)): Height and Width of spatial window.
num_heads (tuple(int)): Number of attention heads in different residual groups.
expansion_factor (float): Ratio of ffn hidden dim to embedding dim. Default: 4
qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. Default: None
drop_rate (float): Dropout rate. Default: 0
attn_drop_rate (float): Attention dropout rate. Default: 0
drop_path_rate (float): Stochastic depth rate. Default: 0.1
act_layer (nn.Module): Activation layer. Default: nn.GELU
norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm
use_chk (bool): Whether to use checkpointing to save memory.
upscale: Upscale factor. 2/3/4 for image SR
img_range: Image range. 1. or 255.
resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
"""
def __init__(self, state_dict):
super().__init__()
# defaults
img_size = 64
in_chans = 3
embed_dim = 180
split_size = [2, 4]
depth = [2, 2, 2, 2]
num_heads = [2, 2, 2, 2]
expansion_factor = 4.0
qkv_bias = True
qk_scale = None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
act_layer = nn.GELU
norm_layer = nn.LayerNorm
use_chk = False
upscale = 2
img_range = 1.0
resi_connection = "1conv"
upsampler = "pixelshuffle"
self.model_arch = "DAT"
self.sub_type = "SR"
self.state = state_dict
state_keys = state_dict.keys()
if "conv_before_upsample.0.weight" in state_keys:
if "conv_up1.weight" in state_keys:
upsampler = "nearest+conv"
else:
upsampler = "pixelshuffle"
supports_fp16 = False
elif "upsample.0.weight" in state_keys:
upsampler = "pixelshuffledirect"
else:
upsampler = ""
num_feat = (
state_dict.get("conv_before_upsample.0.weight", None).shape[1]
if state_dict.get("conv_before_upsample.weight", None)
else 64
)
num_in_ch = state_dict["conv_first.weight"].shape[1]
in_chans = num_in_ch
if "conv_last.weight" in state_keys:
num_out_ch = state_dict["conv_last.weight"].shape[0]
else:
num_out_ch = num_in_ch
upscale = 1
if upsampler == "nearest+conv":
upsample_keys = [
x for x in state_keys if "conv_up" in x and "bias" not in x
]
for upsample_key in upsample_keys:
upscale *= 2
elif upsampler == "pixelshuffle":
upsample_keys = [
x
for x in state_keys
if "upsample" in x and "conv" not in x and "bias" not in x
]
for upsample_key in upsample_keys:
shape = state_dict[upsample_key].shape[0]
upscale *= math.sqrt(shape // num_feat)
upscale = int(upscale)
elif upsampler == "pixelshuffledirect":
upscale = int(
math.sqrt(state_dict["upsample.0.bias"].shape[0] // num_out_ch)
)
max_layer_num = 0
max_block_num = 0
for key in state_keys:
result = re.match(r"layers.(\d*).blocks.(\d*).norm1.weight", key)
if result:
layer_num, block_num = result.groups()
max_layer_num = max(max_layer_num, int(layer_num))
max_block_num = max(max_block_num, int(block_num))
depth = [max_block_num + 1 for _ in range(max_layer_num + 1)]
if "layers.0.blocks.1.attn.temperature" in state_keys:
num_heads_num = state_dict["layers.0.blocks.1.attn.temperature"].shape[0]
num_heads = [num_heads_num for _ in range(max_layer_num + 1)]
else:
num_heads = depth
embed_dim = state_dict["conv_first.weight"].shape[0]
expansion_factor = float(
state_dict["layers.0.blocks.0.ffn.fc1.weight"].shape[0] / embed_dim
)
# TODO: could actually count the layers, but this should do
if "layers.0.conv.4.weight" in state_keys:
resi_connection = "3conv"
else:
resi_connection = "1conv"
if "layers.0.blocks.2.attn.attn_mask_0" in state_keys:
attn_mask_0_x, attn_mask_0_y, attn_mask_0_z = state_dict[
"layers.0.blocks.2.attn.attn_mask_0"
].shape
img_size = int(math.sqrt(attn_mask_0_x * attn_mask_0_y))
if "layers.0.blocks.0.attn.attns.0.rpe_biases" in state_keys:
split_sizes = (
state_dict["layers.0.blocks.0.attn.attns.0.rpe_biases"][-1] + 1
)
split_size = [int(x) for x in split_sizes]
self.in_nc = num_in_ch
self.out_nc = num_out_ch
self.num_feat = num_feat
self.embed_dim = embed_dim
self.num_heads = num_heads
self.depth = depth
self.scale = upscale
self.upsampler = upsampler
self.img_size = img_size
self.img_range = img_range
self.expansion_factor = expansion_factor
self.resi_connection = resi_connection
self.split_size = split_size
self.supports_fp16 = False # Too much weirdness to support this at the moment
self.supports_bfp16 = True
self.min_size_restriction = 16
num_in_ch = in_chans
num_out_ch = in_chans
num_feat = 64
self.img_range = img_range
if in_chans == 3:
rgb_mean = (0.4488, 0.4371, 0.4040)
self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
else:
self.mean = torch.zeros(1, 1, 1, 1)
self.upscale = upscale
self.upsampler = upsampler
# ------------------------- 1, Shallow Feature Extraction ------------------------- #
self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
# ------------------------- 2, Deep Feature Extraction ------------------------- #
self.num_layers = len(depth)
self.use_chk = use_chk
self.num_features = (
self.embed_dim
) = embed_dim # num_features for consistency with other models
heads = num_heads
self.before_RG = nn.Sequential(
Rearrange("b c h w -> b (h w) c"), nn.LayerNorm(embed_dim)
)
curr_dim = embed_dim
dpr = [
x.item() for x in torch.linspace(0, drop_path_rate, np.sum(depth))
] # stochastic depth decay rule
self.layers = nn.ModuleList()
for i in range(self.num_layers):
layer = ResidualGroup(
dim=embed_dim,
num_heads=heads[i],
reso=img_size,
split_size=split_size,
expansion_factor=expansion_factor,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_paths=dpr[sum(depth[:i]) : sum(depth[: i + 1])],
act_layer=act_layer,
norm_layer=norm_layer,
depth=depth[i],
use_chk=use_chk,
resi_connection=resi_connection,
rg_idx=i,
)
self.layers.append(layer)
self.norm = norm_layer(curr_dim)
# build the last conv layer in deep feature extraction
if resi_connection == "1conv":
self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
elif resi_connection == "3conv":
# to save parameters and memory
self.conv_after_body = nn.Sequential(
nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
nn.LeakyReLU(negative_slope=0.2, inplace=True),
nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
nn.LeakyReLU(negative_slope=0.2, inplace=True),
nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1),
)
# ------------------------- 3, Reconstruction ------------------------- #
if self.upsampler == "pixelshuffle":
# for classical SR
self.conv_before_upsample = nn.Sequential(
nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
)
self.upsample = Upsample(upscale, num_feat)
self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
elif self.upsampler == "pixelshuffledirect":
# for lightweight SR (to save parameters)
self.upsample = UpsampleOneStep(
upscale, embed_dim, num_out_ch, (img_size, img_size)
)
self.apply(self._init_weights)
self.load_state_dict(state_dict, strict=True)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=0.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(
m, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm, nn.InstanceNorm2d)
):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
def forward_features(self, x):
_, _, H, W = x.shape
x_size = [H, W]
x = self.before_RG(x)
for layer in self.layers:
x = layer(x, x_size)
x = self.norm(x)
x = rearrange(x, "b (h w) c -> b c h w", h=H, w=W)
return x
def forward(self, x):
"""
Input: x: (B, C, H, W)
"""
self.mean = self.mean.type_as(x)
x = (x - self.mean) * self.img_range
if self.upsampler == "pixelshuffle":
# for image SR
x = self.conv_first(x)
x = self.conv_after_body(self.forward_features(x)) + x
x = self.conv_before_upsample(x)
x = self.conv_last(self.upsample(x))
elif self.upsampler == "pixelshuffledirect":
# for lightweight SR
x = self.conv_first(x)
x = self.conv_after_body(self.forward_features(x)) + x
x = self.upsample(x)
x = x / self.img_range + self.mean
return x
# pylint: skip-file
# HAT from https://github.com/XPixelGroup/HAT/blob/main/hat/archs/hat_arch.py
import math
import re
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
from .timm.helpers import to_2tuple
from .timm.weight_init import trunc_normal_
def drop_path(x, drop_prob: float = 0.0, training: bool = False):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py
"""
if drop_prob == 0.0 or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0],) + (1,) * (
x.ndim - 1
) # work with diff dim tensors, not just 2D ConvNets
random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
random_tensor.floor_() # binarize
output = x.div(keep_prob) * random_tensor
return output
class DropPath(nn.Module):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py
"""
def __init__(self, drop_prob=None):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
def forward(self, x):
return drop_path(x, self.drop_prob, self.training) # type: ignore
class ChannelAttention(nn.Module):
"""Channel attention used in RCAN.
Args:
num_feat (int): Channel number of intermediate features.
squeeze_factor (int): Channel squeeze factor. Default: 16.
"""
def __init__(self, num_feat, squeeze_factor=16):
super(ChannelAttention, self).__init__()
self.attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(num_feat, num_feat // squeeze_factor, 1, padding=0),
nn.ReLU(inplace=True),
nn.Conv2d(num_feat // squeeze_factor, num_feat, 1, padding=0),
nn.Sigmoid(),
)
def forward(self, x):
y = self.attention(x)
return x * y
class CAB(nn.Module):
def __init__(self, num_feat, compress_ratio=3, squeeze_factor=30):
super(CAB, self).__init__()
self.cab = nn.Sequential(
nn.Conv2d(num_feat, num_feat // compress_ratio, 3, 1, 1),
nn.GELU(),
nn.Conv2d(num_feat // compress_ratio, num_feat, 3, 1, 1),
ChannelAttention(num_feat, squeeze_factor),
)
def forward(self, x):
return self.cab(x)
class Mlp(nn.Module):
def __init__(
self,
in_features,
hidden_features=None,
out_features=None,
act_layer=nn.GELU,
drop=0.0,
):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
def window_partition(x, window_size):
"""
Args:
x: (b, h, w, c)
window_size (int): window size
Returns:
windows: (num_windows*b, window_size, window_size, c)
"""
b, h, w, c = x.shape
x = x.view(b, h // window_size, window_size, w // window_size, window_size, c)
windows = (
x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, c)
)
return windows
def window_reverse(windows, window_size, h, w):
"""
Args:
windows: (num_windows*b, window_size, window_size, c)
window_size (int): Window size
h (int): Height of image
w (int): Width of image
Returns:
x: (b, h, w, c)
"""
b = int(windows.shape[0] / (h * w / window_size / window_size))
x = windows.view(
b, h // window_size, w // window_size, window_size, window_size, -1
)
x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(b, h, w, -1)
return x
class WindowAttention(nn.Module):
r"""Window based multi-head self attention (W-MSA) module with relative position bias.
It supports both of shifted and non-shifted window.
Args:
dim (int): Number of input channels.
window_size (tuple[int]): The height and width of the window.
num_heads (int): Number of attention heads.
qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
proj_drop (float, optional): Dropout ratio of output. Default: 0.0
"""
def __init__(
self,
dim,
window_size,
num_heads,
qkv_bias=True,
qk_scale=None,
attn_drop=0.0,
proj_drop=0.0,
):
super().__init__()
self.dim = dim
self.window_size = window_size # Wh, Ww
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
# define a parameter table of relative position bias
self.relative_position_bias_table = nn.Parameter( # type: ignore
torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)
) # 2*Wh-1 * 2*Ww-1, nH
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
trunc_normal_(self.relative_position_bias_table, std=0.02)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x, rpi, mask=None):
"""
Args:
x: input features with shape of (num_windows*b, n, c)
mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
"""
b_, n, c = x.shape
qkv = (
self.qkv(x)
.reshape(b_, n, 3, self.num_heads, c // self.num_heads)
.permute(2, 0, 3, 1, 4)
)
q, k, v = (
qkv[0],
qkv[1],
qkv[2],
) # make torchscript happy (cannot use tensor as tuple)
q = q * self.scale
attn = q @ k.transpose(-2, -1)
relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view(
self.window_size[0] * self.window_size[1],
self.window_size[0] * self.window_size[1],
-1,
) # Wh*Ww,Wh*Ww,nH
relative_position_bias = relative_position_bias.permute(
2, 0, 1
).contiguous() # nH, Wh*Ww, Wh*Ww
attn = attn + relative_position_bias.unsqueeze(0)
if mask is not None:
nw = mask.shape[0]
attn = attn.view(b_ // nw, nw, self.num_heads, n, n) + mask.unsqueeze(
1
).unsqueeze(0)
attn = attn.view(-1, self.num_heads, n, n)
attn = self.softmax(attn)
else:
attn = self.softmax(attn)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(b_, n, c)
x = self.proj(x)
x = self.proj_drop(x)
return x
class HAB(nn.Module):
r"""Hybrid Attention Block.
Args:
dim (int): Number of input channels.
input_resolution (tuple[int]): Input resolution.
num_heads (int): Number of attention heads.
window_size (int): Window size.
shift_size (int): Shift size for SW-MSA.
mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
drop (float, optional): Dropout rate. Default: 0.0
attn_drop (float, optional): Attention dropout rate. Default: 0.0
drop_path (float, optional): Stochastic depth rate. Default: 0.0
act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
"""
def __init__(
self,
dim,
input_resolution,
num_heads,
window_size=7,
shift_size=0,
compress_ratio=3,
squeeze_factor=30,
conv_scale=0.01,
mlp_ratio=4.0,
qkv_bias=True,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
):
super().__init__()
self.dim = dim
self.input_resolution = input_resolution
self.num_heads = num_heads
self.window_size = window_size
self.shift_size = shift_size
self.mlp_ratio = mlp_ratio
if min(self.input_resolution) <= self.window_size:
# if window size is larger than input resolution, we don't partition windows
self.shift_size = 0
self.window_size = min(self.input_resolution)
assert (
0 <= self.shift_size < self.window_size
), "shift_size must in 0-window_size"
self.norm1 = norm_layer(dim)
self.attn = WindowAttention(
dim,
window_size=to_2tuple(self.window_size),
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
self.conv_scale = conv_scale
self.conv_block = CAB(
num_feat=dim, compress_ratio=compress_ratio, squeeze_factor=squeeze_factor
)
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
def forward(self, x, x_size, rpi_sa, attn_mask):
h, w = x_size
b, _, c = x.shape
# assert seq_len == h * w, "input feature has wrong size"
shortcut = x
x = self.norm1(x)
x = x.view(b, h, w, c)
# Conv_X
conv_x = self.conv_block(x.permute(0, 3, 1, 2))
conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(b, h * w, c)
# cyclic shift
if self.shift_size > 0:
shifted_x = torch.roll(
x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)
)
attn_mask = attn_mask
else:
shifted_x = x
attn_mask = None
# partition windows
x_windows = window_partition(
shifted_x, self.window_size
) # nw*b, window_size, window_size, c
x_windows = x_windows.view(
-1, self.window_size * self.window_size, c
) # nw*b, window_size*window_size, c
# W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
attn_windows = self.attn(x_windows, rpi=rpi_sa, mask=attn_mask)
# merge windows
attn_windows = attn_windows.view(-1, self.window_size, self.window_size, c)
shifted_x = window_reverse(attn_windows, self.window_size, h, w) # b h' w' c
# reverse cyclic shift
if self.shift_size > 0:
attn_x = torch.roll(
shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)
)
else:
attn_x = shifted_x
attn_x = attn_x.view(b, h * w, c)
# FFN
x = shortcut + self.drop_path(attn_x) + conv_x * self.conv_scale
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
class PatchMerging(nn.Module):
r"""Patch Merging Layer.
Args:
input_resolution (tuple[int]): Resolution of input feature.
dim (int): Number of input channels.
norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
"""
def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
super().__init__()
self.input_resolution = input_resolution
self.dim = dim
self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
self.norm = norm_layer(4 * dim)
def forward(self, x):
"""
x: b, h*w, c
"""
h, w = self.input_resolution
b, seq_len, c = x.shape
assert seq_len == h * w, "input feature has wrong size"
assert h % 2 == 0 and w % 2 == 0, f"x size ({h}*{w}) are not even."
x = x.view(b, h, w, c)
x0 = x[:, 0::2, 0::2, :] # b h/2 w/2 c
x1 = x[:, 1::2, 0::2, :] # b h/2 w/2 c
x2 = x[:, 0::2, 1::2, :] # b h/2 w/2 c
x3 = x[:, 1::2, 1::2, :] # b h/2 w/2 c
x = torch.cat([x0, x1, x2, x3], -1) # b h/2 w/2 4*c
x = x.view(b, -1, 4 * c) # b h/2*w/2 4*c
x = self.norm(x)
x = self.reduction(x)
return x
class OCAB(nn.Module):
# overlapping cross-attention block
def __init__(
self,
dim,
input_resolution,
window_size,
overlap_ratio,
num_heads,
qkv_bias=True,
qk_scale=None,
mlp_ratio=2,
norm_layer=nn.LayerNorm,
):
super().__init__()
self.dim = dim
self.input_resolution = input_resolution
self.window_size = window_size
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.overlap_win_size = int(window_size * overlap_ratio) + window_size
self.norm1 = norm_layer(dim)
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.unfold = nn.Unfold(
kernel_size=(self.overlap_win_size, self.overlap_win_size),
stride=window_size,
padding=(self.overlap_win_size - window_size) // 2,
)
# define a parameter table of relative position bias
self.relative_position_bias_table = nn.Parameter( # type: ignore
torch.zeros(
(window_size + self.overlap_win_size - 1)
* (window_size + self.overlap_win_size - 1),
num_heads,
)
) # 2*Wh-1 * 2*Ww-1, nH
trunc_normal_(self.relative_position_bias_table, std=0.02)
self.softmax = nn.Softmax(dim=-1)
self.proj = nn.Linear(dim, dim)
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(
in_features=dim, hidden_features=mlp_hidden_dim, act_layer=nn.GELU
)
def forward(self, x, x_size, rpi):
h, w = x_size
b, _, c = x.shape
shortcut = x
x = self.norm1(x)
x = x.view(b, h, w, c)
qkv = self.qkv(x).reshape(b, h, w, 3, c).permute(3, 0, 4, 1, 2) # 3, b, c, h, w
q = qkv[0].permute(0, 2, 3, 1) # b, h, w, c
kv = torch.cat((qkv[1], qkv[2]), dim=1) # b, 2*c, h, w
# partition windows
q_windows = window_partition(
q, self.window_size
) # nw*b, window_size, window_size, c
q_windows = q_windows.view(
-1, self.window_size * self.window_size, c
) # nw*b, window_size*window_size, c
kv_windows = self.unfold(kv) # b, c*w*w, nw
kv_windows = rearrange(
kv_windows,
"b (nc ch owh oww) nw -> nc (b nw) (owh oww) ch",
nc=2,
ch=c,
owh=self.overlap_win_size,
oww=self.overlap_win_size,
).contiguous() # 2, nw*b, ow*ow, c
# Do the above rearrangement without the rearrange function
# kv_windows = kv_windows.view(
# 2, b, self.overlap_win_size, self.overlap_win_size, c, -1
# )
# kv_windows = kv_windows.permute(0, 5, 1, 2, 3, 4).contiguous()
# kv_windows = kv_windows.view(
# 2, -1, self.overlap_win_size * self.overlap_win_size, c
# )
k_windows, v_windows = kv_windows[0], kv_windows[1] # nw*b, ow*ow, c
b_, nq, _ = q_windows.shape
_, n, _ = k_windows.shape
d = self.dim // self.num_heads
q = q_windows.reshape(b_, nq, self.num_heads, d).permute(
0, 2, 1, 3
) # nw*b, nH, nq, d
k = k_windows.reshape(b_, n, self.num_heads, d).permute(
0, 2, 1, 3
) # nw*b, nH, n, d
v = v_windows.reshape(b_, n, self.num_heads, d).permute(
0, 2, 1, 3
) # nw*b, nH, n, d
q = q * self.scale
attn = q @ k.transpose(-2, -1)
relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view(
self.window_size * self.window_size,
self.overlap_win_size * self.overlap_win_size,
-1,
) # ws*ws, wse*wse, nH
relative_position_bias = relative_position_bias.permute(
2, 0, 1
).contiguous() # nH, ws*ws, wse*wse
attn = attn + relative_position_bias.unsqueeze(0)
attn = self.softmax(attn)
attn_windows = (attn @ v).transpose(1, 2).reshape(b_, nq, self.dim)
# merge windows
attn_windows = attn_windows.view(
-1, self.window_size, self.window_size, self.dim
)
x = window_reverse(attn_windows, self.window_size, h, w) # b h w c
x = x.view(b, h * w, self.dim)
x = self.proj(x) + shortcut
x = x + self.mlp(self.norm2(x))
return x
class AttenBlocks(nn.Module):
"""A series of attention blocks for one RHAG.
Args:
dim (int): Number of input channels.
input_resolution (tuple[int]): Input resolution.
depth (int): Number of blocks.
num_heads (int): Number of attention heads.
window_size (int): Local window size.
mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
drop (float, optional): Dropout rate. Default: 0.0
attn_drop (float, optional): Attention dropout rate. Default: 0.0
drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
"""
def __init__(
self,
dim,
input_resolution,
depth,
num_heads,
window_size,
compress_ratio,
squeeze_factor,
conv_scale,
overlap_ratio,
mlp_ratio=4.0,
qkv_bias=True,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
norm_layer=nn.LayerNorm,
downsample=None,
use_checkpoint=False,
):
super().__init__()
self.dim = dim
self.input_resolution = input_resolution
self.depth = depth
self.use_checkpoint = use_checkpoint
# build blocks
self.blocks = nn.ModuleList(
[
HAB(
dim=dim,
input_resolution=input_resolution,
num_heads=num_heads,
window_size=window_size,
shift_size=0 if (i % 2 == 0) else window_size // 2,
compress_ratio=compress_ratio,
squeeze_factor=squeeze_factor,
conv_scale=conv_scale,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop,
attn_drop=attn_drop,
drop_path=drop_path[i]
if isinstance(drop_path, list)
else drop_path,
norm_layer=norm_layer,
)
for i in range(depth)
]
)
# OCAB
self.overlap_attn = OCAB(
dim=dim,
input_resolution=input_resolution,
window_size=window_size,
overlap_ratio=overlap_ratio,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
mlp_ratio=mlp_ratio, # type: ignore
norm_layer=norm_layer,
)
# patch merging layer
if downsample is not None:
self.downsample = downsample(
input_resolution, dim=dim, norm_layer=norm_layer
)
else:
self.downsample = None
def forward(self, x, x_size, params):
for blk in self.blocks:
x = blk(x, x_size, params["rpi_sa"], params["attn_mask"])
x = self.overlap_attn(x, x_size, params["rpi_oca"])
if self.downsample is not None:
x = self.downsample(x)
return x
class RHAG(nn.Module):
"""Residual Hybrid Attention Group (RHAG).
Args:
dim (int): Number of input channels.
input_resolution (tuple[int]): Input resolution.
depth (int): Number of blocks.
num_heads (int): Number of attention heads.
window_size (int): Local window size.
mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
drop (float, optional): Dropout rate. Default: 0.0
attn_drop (float, optional): Attention dropout rate. Default: 0.0
drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
img_size: Input image size.
patch_size: Patch size.
resi_connection: The convolutional block before residual connection.
"""
def __init__(
self,
dim,
input_resolution,
depth,
num_heads,
window_size,
compress_ratio,
squeeze_factor,
conv_scale,
overlap_ratio,
mlp_ratio=4.0,
qkv_bias=True,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
norm_layer=nn.LayerNorm,
downsample=None,
use_checkpoint=False,
img_size=224,
patch_size=4,
resi_connection="1conv",
):
super(RHAG, self).__init__()
self.dim = dim
self.input_resolution = input_resolution
self.residual_group = AttenBlocks(
dim=dim,
input_resolution=input_resolution,
depth=depth,
num_heads=num_heads,
window_size=window_size,
compress_ratio=compress_ratio,
squeeze_factor=squeeze_factor,
conv_scale=conv_scale,
overlap_ratio=overlap_ratio,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop,
attn_drop=attn_drop,
drop_path=drop_path,
norm_layer=norm_layer,
downsample=downsample,
use_checkpoint=use_checkpoint,
)
if resi_connection == "1conv":
self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
elif resi_connection == "identity":
self.conv = nn.Identity()
self.patch_embed = PatchEmbed(
img_size=img_size,
patch_size=patch_size,
in_chans=0,
embed_dim=dim,
norm_layer=None,
)
self.patch_unembed = PatchUnEmbed(
img_size=img_size,
patch_size=patch_size,
in_chans=0,
embed_dim=dim,
norm_layer=None,
)
def forward(self, x, x_size, params):
return (
self.patch_embed(
self.conv(
self.patch_unembed(self.residual_group(x, x_size, params), x_size)
)
)
+ x
)
class PatchEmbed(nn.Module):
r"""Image to Patch Embedding
Args:
img_size (int): Image size. Default: 224.
patch_size (int): Patch token size. Default: 4.
in_chans (int): Number of input image channels. Default: 3.
embed_dim (int): Number of linear projection output channels. Default: 96.
norm_layer (nn.Module, optional): Normalization layer. Default: None
"""
def __init__(
self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
):
super().__init__()
img_size = to_2tuple(img_size)
patch_size = to_2tuple(patch_size)
patches_resolution = [
img_size[0] // patch_size[0], # type: ignore
img_size[1] // patch_size[1], # type: ignore
]
self.img_size = img_size
self.patch_size = patch_size
self.patches_resolution = patches_resolution
self.num_patches = patches_resolution[0] * patches_resolution[1]
self.in_chans = in_chans
self.embed_dim = embed_dim
if norm_layer is not None:
self.norm = norm_layer(embed_dim)
else:
self.norm = None
def forward(self, x):
x = x.flatten(2).transpose(1, 2) # b Ph*Pw c
if self.norm is not None:
x = self.norm(x)
return x
class PatchUnEmbed(nn.Module):
r"""Image to Patch Unembedding
Args:
img_size (int): Image size. Default: 224.
patch_size (int): Patch token size. Default: 4.
in_chans (int): Number of input image channels. Default: 3.
embed_dim (int): Number of linear projection output channels. Default: 96.
norm_layer (nn.Module, optional): Normalization layer. Default: None
"""
def __init__(
self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
):
super().__init__()
img_size = to_2tuple(img_size)
patch_size = to_2tuple(patch_size)
patches_resolution = [
img_size[0] // patch_size[0], # type: ignore
img_size[1] // patch_size[1], # type: ignore
]
self.img_size = img_size
self.patch_size = patch_size
self.patches_resolution = patches_resolution
self.num_patches = patches_resolution[0] * patches_resolution[1]
self.in_chans = in_chans
self.embed_dim = embed_dim
def forward(self, x, x_size):
x = (
x.transpose(1, 2)
.contiguous()
.view(x.shape[0], self.embed_dim, x_size[0], x_size[1])
) # b Ph*Pw c
return x
class Upsample(nn.Sequential):
"""Upsample module.
Args:
scale (int): Scale factor. Supported scales: 2^n and 3.
num_feat (int): Channel number of intermediate features.
"""
def __init__(self, scale, num_feat):
m = []
if (scale & (scale - 1)) == 0: # scale = 2^n
for _ in range(int(math.log(scale, 2))):
m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
m.append(nn.PixelShuffle(2))
elif scale == 3:
m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
m.append(nn.PixelShuffle(3))
else:
raise ValueError(
f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
)
super(Upsample, self).__init__(*m)
class HAT(nn.Module):
r"""Hybrid Attention Transformer
A PyTorch implementation of : `Activating More Pixels in Image Super-Resolution Transformer`.
Some codes are based on SwinIR.
Args:
img_size (int | tuple(int)): Input image size. Default 64
patch_size (int | tuple(int)): Patch size. Default: 1
in_chans (int): Number of input image channels. Default: 3
embed_dim (int): Patch embedding dimension. Default: 96
depths (tuple(int)): Depth of each Swin Transformer layer.
num_heads (tuple(int)): Number of attention heads in different layers.
window_size (int): Window size. Default: 7
mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
drop_rate (float): Dropout rate. Default: 0
attn_drop_rate (float): Attention dropout rate. Default: 0
drop_path_rate (float): Stochastic depth rate. Default: 0.1
norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
patch_norm (bool): If True, add normalization after patch embedding. Default: True
use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
img_range: Image range. 1. or 255.
upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
"""
def __init__(
self,
state_dict,
**kwargs,
):
super(HAT, self).__init__()
# Defaults
img_size = 64
patch_size = 1
in_chans = 3
embed_dim = 96
depths = (6, 6, 6, 6)
num_heads = (6, 6, 6, 6)
window_size = 7
compress_ratio = 3
squeeze_factor = 30
conv_scale = 0.01
overlap_ratio = 0.5
mlp_ratio = 4.0
qkv_bias = True
qk_scale = None
drop_rate = 0.0
attn_drop_rate = 0.0
drop_path_rate = 0.1
norm_layer = nn.LayerNorm
ape = False
patch_norm = True
use_checkpoint = False
upscale = 2
img_range = 1.0
upsampler = ""
resi_connection = "1conv"
self.state = state_dict
self.model_arch = "HAT"
self.sub_type = "SR"
self.supports_fp16 = False
self.support_bf16 = True
self.min_size_restriction = 16
state_keys = list(state_dict.keys())
num_feat = state_dict["conv_last.weight"].shape[1]
in_chans = state_dict["conv_first.weight"].shape[1]
num_out_ch = state_dict["conv_last.weight"].shape[0]
embed_dim = state_dict["conv_first.weight"].shape[0]
if "conv_before_upsample.0.weight" in state_keys:
if "conv_up1.weight" in state_keys:
upsampler = "nearest+conv"
else:
upsampler = "pixelshuffle"
supports_fp16 = False
elif "upsample.0.weight" in state_keys:
upsampler = "pixelshuffledirect"
else:
upsampler = ""
upscale = 1
if upsampler == "nearest+conv":
upsample_keys = [
x for x in state_keys if "conv_up" in x and "bias" not in x
]
for upsample_key in upsample_keys:
upscale *= 2
elif upsampler == "pixelshuffle":
upsample_keys = [
x
for x in state_keys
if "upsample" in x and "conv" not in x and "bias" not in x
]
for upsample_key in upsample_keys:
shape = self.state[upsample_key].shape[0]
upscale *= math.sqrt(shape // num_feat)
upscale = int(upscale)
elif upsampler == "pixelshuffledirect":
upscale = int(
math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch)
)
max_layer_num = 0
max_block_num = 0
for key in state_keys:
result = re.match(
r"layers.(\d*).residual_group.blocks.(\d*).conv_block.cab.0.weight", key
)
if result:
layer_num, block_num = result.groups()
max_layer_num = max(max_layer_num, int(layer_num))
max_block_num = max(max_block_num, int(block_num))
depths = [max_block_num + 1 for _ in range(max_layer_num + 1)]
if (
"layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
in state_keys
):
num_heads_num = self.state[
"layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
].shape[-1]
num_heads = [num_heads_num for _ in range(max_layer_num + 1)]
else:
num_heads = depths
mlp_ratio = float(
self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0]
/ embed_dim
)
# TODO: could actually count the layers, but this should do
if "layers.0.conv.4.weight" in state_keys:
resi_connection = "3conv"
else:
resi_connection = "1conv"
window_size = int(math.sqrt(self.state["relative_position_index_SA"].shape[0]))
# Not sure if this is needed or used at all anywhere in HAT's config
if "layers.0.residual_group.blocks.1.attn_mask" in state_keys:
img_size = int(
math.sqrt(
self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0]
)
* window_size
)
self.window_size = window_size
self.shift_size = window_size // 2
self.overlap_ratio = overlap_ratio
self.in_nc = in_chans
self.out_nc = num_out_ch
self.num_feat = num_feat
self.embed_dim = embed_dim
self.num_heads = num_heads
self.depths = depths
self.window_size = window_size
self.mlp_ratio = mlp_ratio
self.scale = upscale
self.upsampler = upsampler
self.img_size = img_size
self.img_range = img_range
self.resi_connection = resi_connection
num_in_ch = in_chans
# num_out_ch = in_chans
# num_feat = 64
self.img_range = img_range
if in_chans == 3:
rgb_mean = (0.4488, 0.4371, 0.4040)
self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
else:
self.mean = torch.zeros(1, 1, 1, 1)
self.upscale = upscale
self.upsampler = upsampler
# relative position index
relative_position_index_SA = self.calculate_rpi_sa()
relative_position_index_OCA = self.calculate_rpi_oca()
self.register_buffer("relative_position_index_SA", relative_position_index_SA)
self.register_buffer("relative_position_index_OCA", relative_position_index_OCA)
# ------------------------- 1, shallow feature extraction ------------------------- #
self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
# ------------------------- 2, deep feature extraction ------------------------- #
self.num_layers = len(depths)
self.embed_dim = embed_dim
self.ape = ape
self.patch_norm = patch_norm
self.num_features = embed_dim
self.mlp_ratio = mlp_ratio
# split image into non-overlapping patches
self.patch_embed = PatchEmbed(
img_size=img_size,
patch_size=patch_size,
in_chans=embed_dim,
embed_dim=embed_dim,
norm_layer=norm_layer if self.patch_norm else None,
)
num_patches = self.patch_embed.num_patches
patches_resolution = self.patch_embed.patches_resolution
self.patches_resolution = patches_resolution
# merge non-overlapping patches into image
self.patch_unembed = PatchUnEmbed(
img_size=img_size,
patch_size=patch_size,
in_chans=embed_dim,
embed_dim=embed_dim,
norm_layer=norm_layer if self.patch_norm else None,
)
# absolute position embedding
if self.ape:
self.absolute_pos_embed = nn.Parameter( # type: ignore[arg-type]
torch.zeros(1, num_patches, embed_dim)
)
trunc_normal_(self.absolute_pos_embed, std=0.02)
self.pos_drop = nn.Dropout(p=drop_rate)
# stochastic depth
dpr = [
x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
] # stochastic depth decay rule
# build Residual Hybrid Attention Groups (RHAG)
self.layers = nn.ModuleList()
for i_layer in range(self.num_layers):
layer = RHAG(
dim=embed_dim,
input_resolution=(patches_resolution[0], patches_resolution[1]),
depth=depths[i_layer],
num_heads=num_heads[i_layer],
window_size=window_size,
compress_ratio=compress_ratio,
squeeze_factor=squeeze_factor,
conv_scale=conv_scale,
overlap_ratio=overlap_ratio,
mlp_ratio=self.mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[
sum(depths[:i_layer]) : sum(depths[: i_layer + 1]) # type: ignore
], # no impact on SR results
norm_layer=norm_layer,
downsample=None,
use_checkpoint=use_checkpoint,
img_size=img_size,
patch_size=patch_size,
resi_connection=resi_connection,
)
self.layers.append(layer)
self.norm = norm_layer(self.num_features)
# build the last conv layer in deep feature extraction
if resi_connection == "1conv":
self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
elif resi_connection == "identity":
self.conv_after_body = nn.Identity()
# ------------------------- 3, high quality image reconstruction ------------------------- #
if self.upsampler == "pixelshuffle":
# for classical SR
self.conv_before_upsample = nn.Sequential(
nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
)
self.upsample = Upsample(upscale, num_feat)
self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
self.apply(self._init_weights)
self.load_state_dict(self.state, strict=False)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=0.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.LayerNorm):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
def calculate_rpi_sa(self):
# calculate relative position index for SA
coords_h = torch.arange(self.window_size)
coords_w = torch.arange(self.window_size)
coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww
coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww
relative_coords = (
coords_flatten[:, :, None] - coords_flatten[:, None, :]
) # 2, Wh*Ww, Wh*Ww
relative_coords = relative_coords.permute(
1, 2, 0
).contiguous() # Wh*Ww, Wh*Ww, 2
relative_coords[:, :, 0] += self.window_size - 1 # shift to start from 0
relative_coords[:, :, 1] += self.window_size - 1
relative_coords[:, :, 0] *= 2 * self.window_size - 1
relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
return relative_position_index
def calculate_rpi_oca(self):
# calculate relative position index for OCA
window_size_ori = self.window_size
window_size_ext = self.window_size + int(self.overlap_ratio * self.window_size)
coords_h = torch.arange(window_size_ori)
coords_w = torch.arange(window_size_ori)
coords_ori = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, ws, ws
coords_ori_flatten = torch.flatten(coords_ori, 1) # 2, ws*ws
coords_h = torch.arange(window_size_ext)
coords_w = torch.arange(window_size_ext)
coords_ext = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, wse, wse
coords_ext_flatten = torch.flatten(coords_ext, 1) # 2, wse*wse
relative_coords = (
coords_ext_flatten[:, None, :] - coords_ori_flatten[:, :, None]
) # 2, ws*ws, wse*wse
relative_coords = relative_coords.permute(
1, 2, 0
).contiguous() # ws*ws, wse*wse, 2
relative_coords[:, :, 0] += (
window_size_ori - window_size_ext + 1
) # shift to start from 0
relative_coords[:, :, 1] += window_size_ori - window_size_ext + 1
relative_coords[:, :, 0] *= window_size_ori + window_size_ext - 1
relative_position_index = relative_coords.sum(-1)
return relative_position_index
def calculate_mask(self, x_size):
# calculate attention mask for SW-MSA
h, w = x_size
img_mask = torch.zeros((1, h, w, 1)) # 1 h w 1
h_slices = (
slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None),
)
w_slices = (
slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None),
)
cnt = 0
for h in h_slices:
for w in w_slices:
img_mask[:, h, w, :] = cnt
cnt += 1
mask_windows = window_partition(
img_mask, self.window_size
) # nw, window_size, window_size, 1
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(
attn_mask == 0, float(0.0)
)
return attn_mask
@torch.jit.ignore # type: ignore
def no_weight_decay(self):
return {"absolute_pos_embed"}
@torch.jit.ignore # type: ignore
def no_weight_decay_keywords(self):
return {"relative_position_bias_table"}
def check_image_size(self, x):
_, _, h, w = x.size()
mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
return x
def forward_features(self, x):
x_size = (x.shape[2], x.shape[3])
# Calculate attention mask and relative position index in advance to speed up inference.
# The original code is very time-cosuming for large window size.
attn_mask = self.calculate_mask(x_size).to(x.device)
params = {
"attn_mask": attn_mask,
"rpi_sa": self.relative_position_index_SA,
"rpi_oca": self.relative_position_index_OCA,
}
x = self.patch_embed(x)
if self.ape:
x = x + self.absolute_pos_embed
x = self.pos_drop(x)
for layer in self.layers:
x = layer(x, x_size, params)
x = self.norm(x) # b seq_len c
x = self.patch_unembed(x, x_size)
return x
def forward(self, x):
H, W = x.shape[2:]
self.mean = self.mean.type_as(x)
x = (x - self.mean) * self.img_range
x = self.check_image_size(x)
if self.upsampler == "pixelshuffle":
# for classical SR
x = self.conv_first(x)
x = self.conv_after_body(self.forward_features(x)) + x
x = self.conv_before_upsample(x)
x = self.conv_last(self.upsample(x))
x = x / self.img_range + self.mean
return x[:, :, : H * self.upscale, : W * self.upscale]
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
MIT License
Copyright (c) 2022 Xiangyu Chen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
BSD 3-Clause License
Copyright (c) 2021, Xintao Wang
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2022 Kai Zhang (cskaizhang@gmail.com, https://cszn.github.io/). All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2018-2022 BasicSR Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Creative Commons Legal Code
CC0 1.0 Universal
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [2021] [SwinIR Authors]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [2021] [SwinIR Authors]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [2021] Samsung Research
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# pylint: skip-file
"""
Model adapted from advimman's lama project: https://github.com/advimman/lama
"""
# Fast Fourier Convolution NeurIPS 2020
# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py
# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf
from typing import List
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms.functional import InterpolationMode, rotate
class LearnableSpatialTransformWrapper(nn.Module):
def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True):
super().__init__()
self.impl = impl
self.angle = torch.rand(1) * angle_init_range
if train_angle:
self.angle = nn.Parameter(self.angle, requires_grad=True)
self.pad_coef = pad_coef
def forward(self, x):
if torch.is_tensor(x):
return self.inverse_transform(self.impl(self.transform(x)), x)
elif isinstance(x, tuple):
x_trans = tuple(self.transform(elem) for elem in x)
y_trans = self.impl(x_trans)
return tuple(
self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x)
)
else:
raise ValueError(f"Unexpected input type {type(x)}")
def transform(self, x):
height, width = x.shape[2:]
pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)
x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode="reflect")
x_padded_rotated = rotate(
x_padded, self.angle.to(x_padded), InterpolationMode.BILINEAR, fill=0
)
return x_padded_rotated
def inverse_transform(self, y_padded_rotated, orig_x):
height, width = orig_x.shape[2:]
pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)
y_padded = rotate(
y_padded_rotated,
-self.angle.to(y_padded_rotated),
InterpolationMode.BILINEAR,
fill=0,
)
y_height, y_width = y_padded.shape[2:]
y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w]
return y
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
nn.Sigmoid(),
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
res = x * y.expand_as(x)
return res
class FourierUnit(nn.Module):
def __init__(
self,
in_channels,
out_channels,
groups=1,
spatial_scale_factor=None,
spatial_scale_mode="bilinear",
spectral_pos_encoding=False,
use_se=False,
se_kwargs=None,
ffc3d=False,
fft_norm="ortho",
):
# bn_layer not used
super(FourierUnit, self).__init__()
self.groups = groups
self.conv_layer = torch.nn.Conv2d(
in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0),
out_channels=out_channels * 2,
kernel_size=1,
stride=1,
padding=0,
groups=self.groups,
bias=False,
)
self.bn = torch.nn.BatchNorm2d(out_channels * 2)
self.relu = torch.nn.ReLU(inplace=True)
# squeeze and excitation block
self.use_se = use_se
if use_se:
if se_kwargs is None:
se_kwargs = {}
self.se = SELayer(self.conv_layer.in_channels, **se_kwargs)
self.spatial_scale_factor = spatial_scale_factor
self.spatial_scale_mode = spatial_scale_mode
self.spectral_pos_encoding = spectral_pos_encoding
self.ffc3d = ffc3d
self.fft_norm = fft_norm
def forward(self, x):
half_check = False
if x.type() == "torch.cuda.HalfTensor":
# half only works on gpu anyway
half_check = True
batch = x.shape[0]
if self.spatial_scale_factor is not None:
orig_size = x.shape[-2:]
x = F.interpolate(
x,
scale_factor=self.spatial_scale_factor,
mode=self.spatial_scale_mode,
align_corners=False,
)
# (batch, c, h, w/2+1, 2)
fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1)
if half_check == True:
ffted = torch.fft.rfftn(
x.float(), dim=fft_dim, norm=self.fft_norm
) # .type(torch.cuda.HalfTensor)
else:
ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm)
ffted = torch.stack((ffted.real, ffted.imag), dim=-1)
ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1)
ffted = ffted.view(
(
batch,
-1,
)
+ ffted.size()[3:]
)
if self.spectral_pos_encoding:
height, width = ffted.shape[-2:]
coords_vert = (
torch.linspace(0, 1, height)[None, None, :, None]
.expand(batch, 1, height, width)
.to(ffted)
)
coords_hor = (
torch.linspace(0, 1, width)[None, None, None, :]
.expand(batch, 1, height, width)
.to(ffted)
)
ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1)
if self.use_se:
ffted = self.se(ffted)
if half_check == True:
ffted = self.conv_layer(ffted.half()) # (batch, c*2, h, w/2+1)
else:
ffted = self.conv_layer(
ffted
) # .type(torch.cuda.FloatTensor) # (batch, c*2, h, w/2+1)
ffted = self.relu(self.bn(ffted))
# forcing to be always float
ffted = ffted.float()
ffted = (
ffted.view(
(
batch,
-1,
2,
)
+ ffted.size()[2:]
)
.permute(0, 1, 3, 4, 2)
.contiguous()
) # (batch,c, t, h, w/2+1, 2)
ffted = torch.complex(ffted[..., 0], ffted[..., 1])
ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:]
output = torch.fft.irfftn(
ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm
)
if half_check == True:
output = output.half()
if self.spatial_scale_factor is not None:
output = F.interpolate(
output,
size=orig_size,
mode=self.spatial_scale_mode,
align_corners=False,
)
return output
class SpectralTransform(nn.Module):
def __init__(
self,
in_channels,
out_channels,
stride=1,
groups=1,
enable_lfu=True,
separable_fu=False,
**fu_kwargs,
):
# bn_layer not used
super(SpectralTransform, self).__init__()
self.enable_lfu = enable_lfu
if stride == 2:
self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
else:
self.downsample = nn.Identity()
self.stride = stride
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels, out_channels // 2, kernel_size=1, groups=groups, bias=False
),
nn.BatchNorm2d(out_channels // 2),
nn.ReLU(inplace=True),
)
fu_class = FourierUnit
self.fu = fu_class(out_channels // 2, out_channels // 2, groups, **fu_kwargs)
if self.enable_lfu:
self.lfu = fu_class(out_channels // 2, out_channels // 2, groups)
self.conv2 = torch.nn.Conv2d(
out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False
)
def forward(self, x):
x = self.downsample(x)
x = self.conv1(x)
output = self.fu(x)
if self.enable_lfu:
_, c, h, _ = x.shape
split_no = 2
split_s = h // split_no
xs = torch.cat(
torch.split(x[:, : c // 4], split_s, dim=-2), dim=1
).contiguous()
xs = torch.cat(torch.split(xs, split_s, dim=-1), dim=1).contiguous()
xs = self.lfu(xs)
xs = xs.repeat(1, 1, split_no, split_no).contiguous()
else:
xs = 0
output = self.conv2(x + output + xs)
return output
class FFC(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
ratio_gin,
ratio_gout,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=False,
enable_lfu=True,
padding_type="reflect",
gated=False,
**spectral_kwargs,
):
super(FFC, self).__init__()
assert stride == 1 or stride == 2, "Stride should be 1 or 2."
self.stride = stride
in_cg = int(in_channels * ratio_gin)
in_cl = in_channels - in_cg
out_cg = int(out_channels * ratio_gout)
out_cl = out_channels - out_cg
# groups_g = 1 if groups == 1 else int(groups * ratio_gout)
# groups_l = 1 if groups == 1 else groups - groups_g
self.ratio_gin = ratio_gin
self.ratio_gout = ratio_gout
self.global_in_num = in_cg
module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d
self.convl2l = module(
in_cl,
out_cl,
kernel_size,
stride,
padding,
dilation,
groups,
bias,
padding_mode=padding_type,
)
module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d
self.convl2g = module(
in_cl,
out_cg,
kernel_size,
stride,
padding,
dilation,
groups,
bias,
padding_mode=padding_type,
)
module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d
self.convg2l = module(
in_cg,
out_cl,
kernel_size,
stride,
padding,
dilation,
groups,
bias,
padding_mode=padding_type,
)
module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform
self.convg2g = module(
in_cg,
out_cg,
stride,
1 if groups == 1 else groups // 2,
enable_lfu,
**spectral_kwargs,
)
self.gated = gated
module = (
nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d
)
self.gate = module(in_channels, 2, 1)
def forward(self, x):
x_l, x_g = x if type(x) is tuple else (x, 0)
out_xl, out_xg = 0, 0
if self.gated:
total_input_parts = [x_l]
if torch.is_tensor(x_g):
total_input_parts.append(x_g)
total_input = torch.cat(total_input_parts, dim=1)
gates = torch.sigmoid(self.gate(total_input))
g2l_gate, l2g_gate = gates.chunk(2, dim=1)
else:
g2l_gate, l2g_gate = 1, 1
if self.ratio_gout != 1:
out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate
if self.ratio_gout != 0:
out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g)
return out_xl, out_xg
class FFC_BN_ACT(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
ratio_gin,
ratio_gout,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=False,
norm_layer=nn.BatchNorm2d,
activation_layer=nn.Identity,
padding_type="reflect",
enable_lfu=True,
**kwargs,
):
super(FFC_BN_ACT, self).__init__()
self.ffc = FFC(
in_channels,
out_channels,
kernel_size,
ratio_gin,
ratio_gout,
stride,
padding,
dilation,
groups,
bias,
enable_lfu,
padding_type=padding_type,
**kwargs,
)
lnorm = nn.Identity if ratio_gout == 1 else norm_layer
gnorm = nn.Identity if ratio_gout == 0 else norm_layer
global_channels = int(out_channels * ratio_gout)
self.bn_l = lnorm(out_channels - global_channels)
self.bn_g = gnorm(global_channels)
lact = nn.Identity if ratio_gout == 1 else activation_layer
gact = nn.Identity if ratio_gout == 0 else activation_layer
self.act_l = lact(inplace=True)
self.act_g = gact(inplace=True)
def forward(self, x):
x_l, x_g = self.ffc(x)
x_l = self.act_l(self.bn_l(x_l))
x_g = self.act_g(self.bn_g(x_g))
return x_l, x_g
class FFCResnetBlock(nn.Module):
def __init__(
self,
dim,
padding_type,
norm_layer,
activation_layer=nn.ReLU,
dilation=1,
spatial_transform_kwargs=None,
inline=False,
**conv_kwargs,
):
super().__init__()
self.conv1 = FFC_BN_ACT(
dim,
dim,
kernel_size=3,
padding=dilation,
dilation=dilation,
norm_layer=norm_layer,
activation_layer=activation_layer,
padding_type=padding_type,
**conv_kwargs,
)
self.conv2 = FFC_BN_ACT(
dim,
dim,
kernel_size=3,
padding=dilation,
dilation=dilation,
norm_layer=norm_layer,
activation_layer=activation_layer,
padding_type=padding_type,
**conv_kwargs,
)
if spatial_transform_kwargs is not None:
self.conv1 = LearnableSpatialTransformWrapper(
self.conv1, **spatial_transform_kwargs
)
self.conv2 = LearnableSpatialTransformWrapper(
self.conv2, **spatial_transform_kwargs
)
self.inline = inline
def forward(self, x):
if self.inline:
x_l, x_g = (
x[:, : -self.conv1.ffc.global_in_num],
x[:, -self.conv1.ffc.global_in_num :],
)
else:
x_l, x_g = x if type(x) is tuple else (x, 0)
id_l, id_g = x_l, x_g
x_l, x_g = self.conv1((x_l, x_g))
x_l, x_g = self.conv2((x_l, x_g))
x_l, x_g = id_l + x_l, id_g + x_g
out = x_l, x_g
if self.inline:
out = torch.cat(out, dim=1)
return out
class ConcatTupleLayer(nn.Module):
def forward(self, x):
assert isinstance(x, tuple)
x_l, x_g = x
assert torch.is_tensor(x_l) or torch.is_tensor(x_g)
if not torch.is_tensor(x_g):
return x_l
return torch.cat(x, dim=1)
class FFCResNetGenerator(nn.Module):
def __init__(
self,
input_nc,
output_nc,
ngf=64,
n_downsampling=3,
n_blocks=18,
norm_layer=nn.BatchNorm2d,
padding_type="reflect",
activation_layer=nn.ReLU,
up_norm_layer=nn.BatchNorm2d,
up_activation=nn.ReLU(True),
init_conv_kwargs={},
downsample_conv_kwargs={},
resnet_conv_kwargs={},
spatial_transform_layers=None,
spatial_transform_kwargs={},
max_features=1024,
out_ffc=False,
out_ffc_kwargs={},
):
assert n_blocks >= 0
super().__init__()
"""
init_conv_kwargs = {'ratio_gin': 0, 'ratio_gout': 0, 'enable_lfu': False}
downsample_conv_kwargs = {'ratio_gin': '${generator.init_conv_kwargs.ratio_gout}', 'ratio_gout': '${generator.downsample_conv_kwargs.ratio_gin}', 'enable_lfu': False}
resnet_conv_kwargs = {'ratio_gin': 0.75, 'ratio_gout': '${generator.resnet_conv_kwargs.ratio_gin}', 'enable_lfu': False}
spatial_transform_kwargs = {}
out_ffc_kwargs = {}
"""
"""
print(input_nc, output_nc, ngf, n_downsampling, n_blocks, norm_layer,
padding_type, activation_layer,
up_norm_layer, up_activation,
spatial_transform_layers,
add_out_act, max_features, out_ffc, file=sys.stderr)
4 3 64 3 18 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
reflect <class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.batchnorm.BatchNorm2d'>
ReLU(inplace=True)
None sigmoid 1024 False
"""
init_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}
downsample_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}
resnet_conv_kwargs = {
"ratio_gin": 0.75,
"ratio_gout": 0.75,
"enable_lfu": False,
}
spatial_transform_kwargs = {}
out_ffc_kwargs = {}
model = [
nn.ReflectionPad2d(3),
FFC_BN_ACT(
input_nc,
ngf,
kernel_size=7,
padding=0,
norm_layer=norm_layer,
activation_layer=activation_layer,
**init_conv_kwargs,
),
]
### downsample
for i in range(n_downsampling):
mult = 2**i
if i == n_downsampling - 1:
cur_conv_kwargs = dict(downsample_conv_kwargs)
cur_conv_kwargs["ratio_gout"] = resnet_conv_kwargs.get("ratio_gin", 0)
else:
cur_conv_kwargs = downsample_conv_kwargs
model += [
FFC_BN_ACT(
min(max_features, ngf * mult),
min(max_features, ngf * mult * 2),
kernel_size=3,
stride=2,
padding=1,
norm_layer=norm_layer,
activation_layer=activation_layer,
**cur_conv_kwargs,
)
]
mult = 2**n_downsampling
feats_num_bottleneck = min(max_features, ngf * mult)
### resnet blocks
for i in range(n_blocks):
cur_resblock = FFCResnetBlock(
feats_num_bottleneck,
padding_type=padding_type,
activation_layer=activation_layer,
norm_layer=norm_layer,
**resnet_conv_kwargs,
)
if spatial_transform_layers is not None and i in spatial_transform_layers:
cur_resblock = LearnableSpatialTransformWrapper(
cur_resblock, **spatial_transform_kwargs
)
model += [cur_resblock]
model += [ConcatTupleLayer()]
### upsample
for i in range(n_downsampling):
mult = 2 ** (n_downsampling - i)
model += [
nn.ConvTranspose2d(
min(max_features, ngf * mult),
min(max_features, int(ngf * mult / 2)),
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
),
up_norm_layer(min(max_features, int(ngf * mult / 2))),
up_activation,
]
if out_ffc:
model += [
FFCResnetBlock(
ngf,
padding_type=padding_type,
activation_layer=activation_layer,
norm_layer=norm_layer,
inline=True,
**out_ffc_kwargs,
)
]
model += [
nn.ReflectionPad2d(3),
nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),
]
model.append(nn.Sigmoid())
self.model = nn.Sequential(*model)
def forward(self, image, mask):
return self.model(torch.cat([image, mask], dim=1))
class LaMa(nn.Module):
def __init__(self, state_dict) -> None:
super(LaMa, self).__init__()
self.model_arch = "LaMa"
self.sub_type = "Inpaint"
self.in_nc = 4
self.out_nc = 3
self.scale = 1
self.min_size = None
self.pad_mod = 8
self.pad_to_square = False
self.model = FFCResNetGenerator(self.in_nc, self.out_nc)
self.state = {
k.replace("generator.model", "model.model"): v
for k, v in state_dict.items()
}
self.supports_fp16 = False
self.support_bf16 = True
self.load_state_dict(self.state, strict=False)
def forward(self, img, mask):
masked_img = img * (1 - mask)
inpainted_mask = mask * self.model.forward(masked_img, mask)
result = inpainted_mask + (1 - mask) * img
return result
import math
import torch.nn as nn
class CA_layer(nn.Module):
def __init__(self, channel, reduction=16):
super(CA_layer, self).__init__()
# global average pooling
self.gap = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False),
nn.GELU(),
nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False),
# nn.Sigmoid()
)
def forward(self, x):
y = self.fc(self.gap(x))
return x * y.expand_as(x)
class Simple_CA_layer(nn.Module):
def __init__(self, channel):
super(Simple_CA_layer, self).__init__()
self.gap = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(
in_channels=channel,
out_channels=channel,
kernel_size=1,
padding=0,
stride=1,
groups=1,
bias=True,
)
def forward(self, x):
return x * self.fc(self.gap(x))
class ECA_layer(nn.Module):
"""Constructs a ECA module.
Args:
channel: Number of channels of the input feature map
k_size: Adaptive selection of kernel size
"""
def __init__(self, channel):
super(ECA_layer, self).__init__()
b = 1
gamma = 2
k_size = int(abs(math.log(channel, 2) + b) / gamma)
k_size = k_size if k_size % 2 else k_size + 1
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv = nn.Conv1d(
1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
)
# self.sigmoid = nn.Sigmoid()
def forward(self, x):
# x: input features with shape [b, c, h, w]
# b, c, h, w = x.size()
# feature descriptor on the global spatial information
y = self.avg_pool(x)
# Two different branches of ECA module
y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
# Multi-scale information fusion
# y = self.sigmoid(y)
return x * y.expand_as(x)
class ECA_MaxPool_layer(nn.Module):
"""Constructs a ECA module.
Args:
channel: Number of channels of the input feature map
k_size: Adaptive selection of kernel size
"""
def __init__(self, channel):
super(ECA_MaxPool_layer, self).__init__()
b = 1
gamma = 2
k_size = int(abs(math.log(channel, 2) + b) / gamma)
k_size = k_size if k_size % 2 else k_size + 1
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.conv = nn.Conv1d(
1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
)
# self.sigmoid = nn.Sigmoid()
def forward(self, x):
# x: input features with shape [b, c, h, w]
# b, c, h, w = x.size()
# feature descriptor on the global spatial information
y = self.max_pool(x)
# Two different branches of ECA module
y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
# Multi-scale information fusion
# y = self.sigmoid(y)
return x * y.expand_as(x)
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################
# File: OSA.py
# Created Date: Tuesday April 28th 2022
# Author: Chen Xuanhong
# Email: chenxuanhongzju@outlook.com
# Last Modified: Sunday, 23rd April 2023 3:07:42 pm
# Modified By: Chen Xuanhong
# Copyright (c) 2020 Shanghai Jiao Tong University
#############################################################
import torch
import torch.nn.functional as F
from einops import rearrange, repeat
from einops.layers.torch import Rearrange, Reduce
from torch import einsum, nn
from .layernorm import LayerNorm2d
# helpers
def exists(val):
return val is not None
def default(val, d):
return val if exists(val) else d
def cast_tuple(val, length=1):
return val if isinstance(val, tuple) else ((val,) * length)
# helper classes
class PreNormResidual(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x):
return self.fn(self.norm(x)) + x
class Conv_PreNormResidual(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.norm = LayerNorm2d(dim)
self.fn = fn
def forward(self, x):
return self.fn(self.norm(x)) + x
class FeedForward(nn.Module):
def __init__(self, dim, mult=2, dropout=0.0):
super().__init__()
inner_dim = int(dim * mult)
self.net = nn.Sequential(
nn.Linear(dim, inner_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(inner_dim, dim),
nn.Dropout(dropout),
)
def forward(self, x):
return self.net(x)
class Conv_FeedForward(nn.Module):
def __init__(self, dim, mult=2, dropout=0.0):
super().__init__()
inner_dim = int(dim * mult)
self.net = nn.Sequential(
nn.Conv2d(dim, inner_dim, 1, 1, 0),
nn.GELU(),
nn.Dropout(dropout),
nn.Conv2d(inner_dim, dim, 1, 1, 0),
nn.Dropout(dropout),
)
def forward(self, x):
return self.net(x)
class Gated_Conv_FeedForward(nn.Module):
def __init__(self, dim, mult=1, bias=False, dropout=0.0):
super().__init__()
hidden_features = int(dim * mult)
self.project_in = nn.Conv2d(dim, hidden_features * 2, kernel_size=1, bias=bias)
self.dwconv = nn.Conv2d(
hidden_features * 2,
hidden_features * 2,
kernel_size=3,
stride=1,
padding=1,
groups=hidden_features * 2,
bias=bias,
)
self.project_out = nn.Conv2d(hidden_features, dim, kernel_size=1, bias=bias)
def forward(self, x):
x = self.project_in(x)
x1, x2 = self.dwconv(x).chunk(2, dim=1)
x = F.gelu(x1) * x2
x = self.project_out(x)
return x
# MBConv
class SqueezeExcitation(nn.Module):
def __init__(self, dim, shrinkage_rate=0.25):
super().__init__()
hidden_dim = int(dim * shrinkage_rate)
self.gate = nn.Sequential(
Reduce("b c h w -> b c", "mean"),
nn.Linear(dim, hidden_dim, bias=False),
nn.SiLU(),
nn.Linear(hidden_dim, dim, bias=False),
nn.Sigmoid(),
Rearrange("b c -> b c 1 1"),
)
def forward(self, x):
return x * self.gate(x)
class MBConvResidual(nn.Module):
def __init__(self, fn, dropout=0.0):
super().__init__()
self.fn = fn
self.dropsample = Dropsample(dropout)
def forward(self, x):
out = self.fn(x)
out = self.dropsample(out)
return out + x
class Dropsample(nn.Module):
def __init__(self, prob=0):
super().__init__()
self.prob = prob
def forward(self, x):
device = x.device
if self.prob == 0.0 or (not self.training):
return x
keep_mask = (
torch.FloatTensor((x.shape[0], 1, 1, 1), device=device).uniform_()
> self.prob
)
return x * keep_mask / (1 - self.prob)
def MBConv(
dim_in, dim_out, *, downsample, expansion_rate=4, shrinkage_rate=0.25, dropout=0.0
):
hidden_dim = int(expansion_rate * dim_out)
stride = 2 if downsample else 1
net = nn.Sequential(
nn.Conv2d(dim_in, hidden_dim, 1),
# nn.BatchNorm2d(hidden_dim),
nn.GELU(),
nn.Conv2d(
hidden_dim, hidden_dim, 3, stride=stride, padding=1, groups=hidden_dim
),
# nn.BatchNorm2d(hidden_dim),
nn.GELU(),
SqueezeExcitation(hidden_dim, shrinkage_rate=shrinkage_rate),
nn.Conv2d(hidden_dim, dim_out, 1),
# nn.BatchNorm2d(dim_out)
)
if dim_in == dim_out and not downsample:
net = MBConvResidual(net, dropout=dropout)
return net
# attention related classes
class Attention(nn.Module):
def __init__(
self,
dim,
dim_head=32,
dropout=0.0,
window_size=7,
with_pe=True,
):
super().__init__()
assert (
dim % dim_head
) == 0, "dimension should be divisible by dimension per head"
self.heads = dim // dim_head
self.scale = dim_head**-0.5
self.with_pe = with_pe
self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
self.attend = nn.Sequential(nn.Softmax(dim=-1), nn.Dropout(dropout))
self.to_out = nn.Sequential(
nn.Linear(dim, dim, bias=False), nn.Dropout(dropout)
)
# relative positional bias
if self.with_pe:
self.rel_pos_bias = nn.Embedding((2 * window_size - 1) ** 2, self.heads)
pos = torch.arange(window_size)
grid = torch.stack(torch.meshgrid(pos, pos))
grid = rearrange(grid, "c i j -> (i j) c")
rel_pos = rearrange(grid, "i ... -> i 1 ...") - rearrange(
grid, "j ... -> 1 j ..."
)
rel_pos += window_size - 1
rel_pos_indices = (rel_pos * torch.tensor([2 * window_size - 1, 1])).sum(
dim=-1
)
self.register_buffer("rel_pos_indices", rel_pos_indices, persistent=False)
def forward(self, x):
batch, height, width, window_height, window_width, _, device, h = (
*x.shape,
x.device,
self.heads,
)
# flatten
x = rearrange(x, "b x y w1 w2 d -> (b x y) (w1 w2) d")
# project for queries, keys, values
q, k, v = self.to_qkv(x).chunk(3, dim=-1)
# split heads
q, k, v = map(lambda t: rearrange(t, "b n (h d ) -> b h n d", h=h), (q, k, v))
# scale
q = q * self.scale
# sim
sim = einsum("b h i d, b h j d -> b h i j", q, k)
# add positional bias
if self.with_pe:
bias = self.rel_pos_bias(self.rel_pos_indices)
sim = sim + rearrange(bias, "i j h -> h i j")
# attention
attn = self.attend(sim)
# aggregate
out = einsum("b h i j, b h j d -> b h i d", attn, v)
# merge heads
out = rearrange(
out, "b h (w1 w2) d -> b w1 w2 (h d)", w1=window_height, w2=window_width
)
# combine heads out
out = self.to_out(out)
return rearrange(out, "(b x y) ... -> b x y ...", x=height, y=width)
class Block_Attention(nn.Module):
def __init__(
self,
dim,
dim_head=32,
bias=False,
dropout=0.0,
window_size=7,
with_pe=True,
):
super().__init__()
assert (
dim % dim_head
) == 0, "dimension should be divisible by dimension per head"
self.heads = dim // dim_head
self.ps = window_size
self.scale = dim_head**-0.5
self.with_pe = with_pe
self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
self.qkv_dwconv = nn.Conv2d(
dim * 3,
dim * 3,
kernel_size=3,
stride=1,
padding=1,
groups=dim * 3,
bias=bias,
)
self.attend = nn.Sequential(nn.Softmax(dim=-1), nn.Dropout(dropout))
self.to_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
def forward(self, x):
# project for queries, keys, values
b, c, h, w = x.shape
qkv = self.qkv_dwconv(self.qkv(x))
q, k, v = qkv.chunk(3, dim=1)
# split heads
q, k, v = map(
lambda t: rearrange(
t,
"b (h d) (x w1) (y w2) -> (b x y) h (w1 w2) d",
h=self.heads,
w1=self.ps,
w2=self.ps,
),
(q, k, v),
)
# scale
q = q * self.scale
# sim
sim = einsum("b h i d, b h j d -> b h i j", q, k)
# attention
attn = self.attend(sim)
# aggregate
out = einsum("b h i j, b h j d -> b h i d", attn, v)
# merge heads
out = rearrange(
out,
"(b x y) head (w1 w2) d -> b (head d) (x w1) (y w2)",
x=h // self.ps,
y=w // self.ps,
head=self.heads,
w1=self.ps,
w2=self.ps,
)
out = self.to_out(out)
return out
class Channel_Attention(nn.Module):
def __init__(self, dim, heads, bias=False, dropout=0.0, window_size=7):
super(Channel_Attention, self).__init__()
self.heads = heads
self.temperature = nn.Parameter(torch.ones(heads, 1, 1))
self.ps = window_size
self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
self.qkv_dwconv = nn.Conv2d(
dim * 3,
dim * 3,
kernel_size=3,
stride=1,
padding=1,
groups=dim * 3,
bias=bias,
)
self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
def forward(self, x):
b, c, h, w = x.shape
qkv = self.qkv_dwconv(self.qkv(x))
qkv = qkv.chunk(3, dim=1)
q, k, v = map(
lambda t: rearrange(
t,
"b (head d) (h ph) (w pw) -> b (h w) head d (ph pw)",
ph=self.ps,
pw=self.ps,
head=self.heads,
),
qkv,
)
q = F.normalize(q, dim=-1)
k = F.normalize(k, dim=-1)
attn = (q @ k.transpose(-2, -1)) * self.temperature
attn = attn.softmax(dim=-1)
out = attn @ v
out = rearrange(
out,
"b (h w) head d (ph pw) -> b (head d) (h ph) (w pw)",
h=h // self.ps,
w=w // self.ps,
ph=self.ps,
pw=self.ps,
head=self.heads,
)
out = self.project_out(out)
return out
class Channel_Attention_grid(nn.Module):
def __init__(self, dim, heads, bias=False, dropout=0.0, window_size=7):
super(Channel_Attention_grid, self).__init__()
self.heads = heads
self.temperature = nn.Parameter(torch.ones(heads, 1, 1))
self.ps = window_size
self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
self.qkv_dwconv = nn.Conv2d(
dim * 3,
dim * 3,
kernel_size=3,
stride=1,
padding=1,
groups=dim * 3,
bias=bias,
)
self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
def forward(self, x):
b, c, h, w = x.shape
qkv = self.qkv_dwconv(self.qkv(x))
qkv = qkv.chunk(3, dim=1)
q, k, v = map(
lambda t: rearrange(
t,
"b (head d) (h ph) (w pw) -> b (ph pw) head d (h w)",
ph=self.ps,
pw=self.ps,
head=self.heads,
),
qkv,
)
q = F.normalize(q, dim=-1)
k = F.normalize(k, dim=-1)
attn = (q @ k.transpose(-2, -1)) * self.temperature
attn = attn.softmax(dim=-1)
out = attn @ v
out = rearrange(
out,
"b (ph pw) head d (h w) -> b (head d) (h ph) (w pw)",
h=h // self.ps,
w=w // self.ps,
ph=self.ps,
pw=self.ps,
head=self.heads,
)
out = self.project_out(out)
return out
class OSA_Block(nn.Module):
def __init__(
self,
channel_num=64,
bias=True,
ffn_bias=True,
window_size=8,
with_pe=False,
dropout=0.0,
):
super(OSA_Block, self).__init__()
w = window_size
self.layer = nn.Sequential(
MBConv(
channel_num,
channel_num,
downsample=False,
expansion_rate=1,
shrinkage_rate=0.25,
),
Rearrange(
"b d (x w1) (y w2) -> b x y w1 w2 d", w1=w, w2=w
), # block-like attention
PreNormResidual(
channel_num,
Attention(
dim=channel_num,
dim_head=channel_num // 4,
dropout=dropout,
window_size=window_size,
with_pe=with_pe,
),
),
Rearrange("b x y w1 w2 d -> b d (x w1) (y w2)"),
Conv_PreNormResidual(
channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
),
# channel-like attention
Conv_PreNormResidual(
channel_num,
Channel_Attention(
dim=channel_num, heads=4, dropout=dropout, window_size=window_size
),
),
Conv_PreNormResidual(
channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
),
Rearrange(
"b d (w1 x) (w2 y) -> b x y w1 w2 d", w1=w, w2=w
), # grid-like attention
PreNormResidual(
channel_num,
Attention(
dim=channel_num,
dim_head=channel_num // 4,
dropout=dropout,
window_size=window_size,
with_pe=with_pe,
),
),
Rearrange("b x y w1 w2 d -> b d (w1 x) (w2 y)"),
Conv_PreNormResidual(
channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
),
# channel-like attention
Conv_PreNormResidual(
channel_num,
Channel_Attention_grid(
dim=channel_num, heads=4, dropout=dropout, window_size=window_size
),
),
Conv_PreNormResidual(
channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
),
)
def forward(self, x):
out = self.layer(x)
return out
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################
# File: OSAG.py
# Created Date: Tuesday April 28th 2022
# Author: Chen Xuanhong
# Email: chenxuanhongzju@outlook.com
# Last Modified: Sunday, 23rd April 2023 3:08:49 pm
# Modified By: Chen Xuanhong
# Copyright (c) 2020 Shanghai Jiao Tong University
#############################################################
import torch.nn as nn
from .esa import ESA
from .OSA import OSA_Block
class OSAG(nn.Module):
def __init__(
self,
channel_num=64,
bias=True,
block_num=4,
ffn_bias=False,
window_size=0,
pe=False,
):
super(OSAG, self).__init__()
# print("window_size: %d" % (window_size))
# print("with_pe", pe)
# print("ffn_bias: %d" % (ffn_bias))
# block_script_name = kwargs.get("block_script_name", "OSA")
# block_class_name = kwargs.get("block_class_name", "OSA_Block")
# script_name = "." + block_script_name
# package = __import__(script_name, fromlist=True)
block_class = OSA_Block # getattr(package, block_class_name)
group_list = []
for _ in range(block_num):
temp_res = block_class(
channel_num,
bias,
ffn_bias=ffn_bias,
window_size=window_size,
with_pe=pe,
)
group_list.append(temp_res)
group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias))
self.residual_layer = nn.Sequential(*group_list)
esa_channel = max(channel_num // 4, 16)
self.esa = ESA(esa_channel, channel_num)
def forward(self, x):
out = self.residual_layer(x)
out = out + x
return self.esa(out)
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################
# File: OmniSR.py
# Created Date: Tuesday April 28th 2022
# Author: Chen Xuanhong
# Email: chenxuanhongzju@outlook.com
# Last Modified: Sunday, 23rd April 2023 3:06:36 pm
# Modified By: Chen Xuanhong
# Copyright (c) 2020 Shanghai Jiao Tong University
#############################################################
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from .OSAG import OSAG
from .pixelshuffle import pixelshuffle_block
class OmniSR(nn.Module):
def __init__(
self,
state_dict,
**kwargs,
):
super(OmniSR, self).__init__()
self.state = state_dict
bias = True # Fine to assume this for now
block_num = 1 # Fine to assume this for now
ffn_bias = True
pe = True
num_feat = state_dict["input.weight"].shape[0] or 64
num_in_ch = state_dict["input.weight"].shape[1] or 3
num_out_ch = num_in_ch # we can just assume this for now. pixelshuffle smh
pixelshuffle_shape = state_dict["up.0.weight"].shape[0]
up_scale = math.sqrt(pixelshuffle_shape / num_out_ch)
if up_scale - int(up_scale) > 0:
print(
"out_nc is probably different than in_nc, scale calculation might be wrong"
)
up_scale = int(up_scale)
res_num = 0
for key in state_dict.keys():
if "residual_layer" in key:
temp_res_num = int(key.split(".")[1])
if temp_res_num > res_num:
res_num = temp_res_num
res_num = res_num + 1 # zero-indexed
residual_layer = []
self.res_num = res_num
if (
"residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
in state_dict.keys()
):
rel_pos_bias_weight = state_dict[
"residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
].shape[0]
self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2)
else:
self.window_size = 8
self.up_scale = up_scale
for _ in range(res_num):
temp_res = OSAG(
channel_num=num_feat,
bias=bias,
block_num=block_num,
ffn_bias=ffn_bias,
window_size=self.window_size,
pe=pe,
)
residual_layer.append(temp_res)
self.residual_layer = nn.Sequential(*residual_layer)
self.input = nn.Conv2d(
in_channels=num_in_ch,
out_channels=num_feat,
kernel_size=3,
stride=1,
padding=1,
bias=bias,
)
self.output = nn.Conv2d(
in_channels=num_feat,
out_channels=num_feat,
kernel_size=3,
stride=1,
padding=1,
bias=bias,
)
self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias)
# self.tail = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias)
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, sqrt(2. / n))
# chaiNNer specific stuff
self.model_arch = "OmniSR"
self.sub_type = "SR"
self.in_nc = num_in_ch
self.out_nc = num_out_ch
self.num_feat = num_feat
self.scale = up_scale
self.supports_fp16 = True # TODO: Test this
self.supports_bfp16 = True
self.min_size_restriction = 16
self.load_state_dict(state_dict, strict=False)
def check_image_size(self, x):
_, _, h, w = x.size()
# import pdb; pdb.set_trace()
mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
# x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0)
return x
def forward(self, x):
H, W = x.shape[2:]
x = self.check_image_size(x)
residual = self.input(x)
out = self.residual_layer(residual)
# origin
out = torch.add(self.output(out), residual)
out = self.up(out)
out = out[:, :, : H * self.up_scale, : W * self.up_scale]
return out
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################
# File: esa.py
# Created Date: Tuesday April 28th 2022
# Author: Chen Xuanhong
# Email: chenxuanhongzju@outlook.com
# Last Modified: Thursday, 20th April 2023 9:28:06 am
# Modified By: Chen Xuanhong
# Copyright (c) 2020 Shanghai Jiao Tong University
#############################################################
import torch
import torch.nn as nn
import torch.nn.functional as F
from .layernorm import LayerNorm2d
def moment(x, dim=(2, 3), k=2):
assert len(x.size()) == 4
mean = torch.mean(x, dim=dim).unsqueeze(-1).unsqueeze(-1)
mk = (1 / (x.size(2) * x.size(3))) * torch.sum(torch.pow(x - mean, k), dim=dim)
return mk
class ESA(nn.Module):
"""
Modification of Enhanced Spatial Attention (ESA), which is proposed by
`Residual Feature Aggregation Network for Image Super-Resolution`
Note: `conv_max` and `conv3_` are NOT used here, so the corresponding codes
are deleted.
"""
def __init__(self, esa_channels, n_feats, conv=nn.Conv2d):
super(ESA, self).__init__()
f = esa_channels
self.conv1 = conv(n_feats, f, kernel_size=1)
self.conv_f = conv(f, f, kernel_size=1)
self.conv2 = conv(f, f, kernel_size=3, stride=2, padding=0)
self.conv3 = conv(f, f, kernel_size=3, padding=1)
self.conv4 = conv(f, n_feats, kernel_size=1)
self.sigmoid = nn.Sigmoid()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
c1_ = self.conv1(x)
c1 = self.conv2(c1_)
v_max = F.max_pool2d(c1, kernel_size=7, stride=3)
c3 = self.conv3(v_max)
c3 = F.interpolate(
c3, (x.size(2), x.size(3)), mode="bilinear", align_corners=False
)
cf = self.conv_f(c1_)
c4 = self.conv4(c3 + cf)
m = self.sigmoid(c4)
return x * m
class LK_ESA(nn.Module):
def __init__(
self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
):
super(LK_ESA, self).__init__()
f = esa_channels
self.conv1 = conv(n_feats, f, kernel_size=1)
self.conv_f = conv(f, f, kernel_size=1)
kernel_size = 17
kernel_expand = kernel_expand
padding = kernel_size // 2
self.vec_conv = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(1, kernel_size),
padding=(0, padding),
groups=2,
bias=bias,
)
self.vec_conv3x1 = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(1, 3),
padding=(0, 1),
groups=2,
bias=bias,
)
self.hor_conv = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(kernel_size, 1),
padding=(padding, 0),
groups=2,
bias=bias,
)
self.hor_conv1x3 = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(3, 1),
padding=(1, 0),
groups=2,
bias=bias,
)
self.conv4 = conv(f, n_feats, kernel_size=1)
self.sigmoid = nn.Sigmoid()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
c1_ = self.conv1(x)
res = self.vec_conv(c1_) + self.vec_conv3x1(c1_)
res = self.hor_conv(res) + self.hor_conv1x3(res)
cf = self.conv_f(c1_)
c4 = self.conv4(res + cf)
m = self.sigmoid(c4)
return x * m
class LK_ESA_LN(nn.Module):
def __init__(
self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
):
super(LK_ESA_LN, self).__init__()
f = esa_channels
self.conv1 = conv(n_feats, f, kernel_size=1)
self.conv_f = conv(f, f, kernel_size=1)
kernel_size = 17
kernel_expand = kernel_expand
padding = kernel_size // 2
self.norm = LayerNorm2d(n_feats)
self.vec_conv = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(1, kernel_size),
padding=(0, padding),
groups=2,
bias=bias,
)
self.vec_conv3x1 = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(1, 3),
padding=(0, 1),
groups=2,
bias=bias,
)
self.hor_conv = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(kernel_size, 1),
padding=(padding, 0),
groups=2,
bias=bias,
)
self.hor_conv1x3 = nn.Conv2d(
in_channels=f * kernel_expand,
out_channels=f * kernel_expand,
kernel_size=(3, 1),
padding=(1, 0),
groups=2,
bias=bias,
)
self.conv4 = conv(f, n_feats, kernel_size=1)
self.sigmoid = nn.Sigmoid()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
c1_ = self.norm(x)
c1_ = self.conv1(c1_)
res = self.vec_conv(c1_) + self.vec_conv3x1(c1_)
res = self.hor_conv(res) + self.hor_conv1x3(res)
cf = self.conv_f(c1_)
c4 = self.conv4(res + cf)
m = self.sigmoid(c4)
return x * m
class AdaGuidedFilter(nn.Module):
def __init__(
self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
):
super(AdaGuidedFilter, self).__init__()
self.gap = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(
in_channels=n_feats,
out_channels=1,
kernel_size=1,
padding=0,
stride=1,
groups=1,
bias=True,
)
self.r = 5
def box_filter(self, x, r):
channel = x.shape[1]
kernel_size = 2 * r + 1
weight = 1.0 / (kernel_size**2)
box_kernel = weight * torch.ones(
(channel, 1, kernel_size, kernel_size), dtype=torch.float32, device=x.device
)
output = F.conv2d(x, weight=box_kernel, stride=1, padding=r, groups=channel)
return output
def forward(self, x):
_, _, H, W = x.shape
N = self.box_filter(
torch.ones((1, 1, H, W), dtype=x.dtype, device=x.device), self.r
)
# epsilon = self.fc(self.gap(x))
# epsilon = torch.pow(epsilon, 2)
epsilon = 1e-2
mean_x = self.box_filter(x, self.r) / N
var_x = self.box_filter(x * x, self.r) / N - mean_x * mean_x
A = var_x / (var_x + epsilon)
b = (1 - A) * mean_x
m = A * x + b
# mean_A = self.box_filter(A, self.r) / N
# mean_b = self.box_filter(b, self.r) / N
# m = mean_A * x + mean_b
return x * m
class AdaConvGuidedFilter(nn.Module):
def __init__(
self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
):
super(AdaConvGuidedFilter, self).__init__()
f = esa_channels
self.conv_f = conv(f, f, kernel_size=1)
kernel_size = 17
kernel_expand = kernel_expand
padding = kernel_size // 2
self.vec_conv = nn.Conv2d(
in_channels=f,
out_channels=f,
kernel_size=(1, kernel_size),
padding=(0, padding),
groups=f,
bias=bias,
)
self.hor_conv = nn.Conv2d(
in_channels=f,
out_channels=f,
kernel_size=(kernel_size, 1),
padding=(padding, 0),
groups=f,
bias=bias,
)
self.gap = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(
in_channels=f,
out_channels=f,
kernel_size=1,
padding=0,
stride=1,
groups=1,
bias=True,
)
def forward(self, x):
y = self.vec_conv(x)
y = self.hor_conv(y)
sigma = torch.pow(y, 2)
epsilon = self.fc(self.gap(y))
weight = sigma / (sigma + epsilon)
m = weight * x + (1 - weight)
return x * m
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment