Commit edf89af3 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

small fixes

parent 36f7d1db
......@@ -17,6 +17,7 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
output_features.copy_(bias);
else
output_features.zero_();
if (nActive)
output_features.addmm_(input_features, weight);
return nActive * input_nPlanes * output_nPlanes;
}
......@@ -26,8 +27,10 @@ void cpu_NetworkInNetwork_updateGradInput(
/*float*/ at::Tensor d_output_features,
/*float*/ at::Tensor weight) {
d_input_features.resize_({(int)d_output_features.size(0), weight.size(0)});
int nActive = d_output_features.size(0);
d_input_features.resize_({nActive, weight.size(0)});
d_input_features.zero_();
if (nActive)
at::mm_out(d_input_features, d_output_features, weight.t());
}
template <typename T>
......@@ -38,5 +41,6 @@ void cpu_NetworkInNetwork_accGradParameters(
auto nActive = input_features.size(0);
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
if (nActive)
at::mm_out(d_weight, input_features.t(), d_output_features);
}
......@@ -14,12 +14,13 @@ double cuda_NetworkInNetwork_updateOutput(
auto nActive = input_features.size(0);
auto input_nPlanes = weight.size(0);
auto output_nPlanes = weight.size(1);
output_features.resize_({nActive, input_nPlanes});
output_features.resize_({nActive, output_nPlanes});
if (bias.numel())
output_features.copy_(bias);
else
output_features.zero_();
output_features.addmm(input_features, weight);
if (nActive)
output_features.addmm_(input_features, weight);
return nActive * input_nPlanes * output_nPlanes;
}
......@@ -28,8 +29,11 @@ void cuda_NetworkInNetwork_updateGradInput(
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight) {
d_input_features.resize_({(int)d_output_features.size(0), weight.size(0)});
int nActive = d_output_features.size(0);
d_input_features.resize_({nActive, weight.size(0)});
d_input_features.zero_();
if (nActive)
at::mm_out(d_input_features, d_output_features, weight.t());
}
......@@ -41,5 +45,6 @@ void cuda_NetworkInNetwork_accGradParameters(
auto nActive = input_features.size(0);
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
if (nActive)
at::mm_out(d_weight, input_features.t(), d_output_features);
}
......@@ -6,7 +6,7 @@
forward_pass_multiplyAdd_count = 0
forward_pass_hidden_states = 0
from .activations import Tanh, Sigmoid, ReLU, ELU, BatchNormELU
from .activations import Tanh, Sigmoid, ReLU, ELU, SELU, BatchNormELU
from .averagePooling import AveragePooling
from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
from .classificationTrainValidate import ClassificationTrainValidate
......
......@@ -48,5 +48,13 @@ class ELU(Module):
output.spatial_size = input.spatial_size
return output
class SELU(Module):
def forward(self, input):
output = SparseConvNetTensor()
output.features = F.selu(input.features)
output.metadata = input.metadata
output.spatial_size = input.spatial_size
return output
def BatchNormELU(nPlanes, eps=1e-4, momentum=0.9):
return sparseconvnet.Sequential().add(BatchNormalization(nPlanes,eps,momentum)).add(ELU())
......@@ -55,7 +55,7 @@ class InputBatch(SparseConvNetTensor):
self.metadata.setInputSpatialLocations(
self.features, locations.contiguous(), vectors.contiguous(), overwrite)
def set_locations_(self, locations, vector, overwrite=False):
def set_locations_(self, locations, vectors, overwrite=False):
self.metadata.setInputSpatialLocations(
self.features, locations, vectors, overwrite)
......
......@@ -39,12 +39,11 @@ class NetworkInNetworkFunction(Function):
weight,\
bias = ctx.saved_tensors
grad_input = grad_output.new()
grad_weight = grad_output.new().resize_as_(weight).zero_()
grad_bias = torch.zeros_like(bias)
grad_weight = torch.zeros_like(weight)
if bias is None:
grad_bias = None
else:
grad_bias = grad_output.new().resize_as_(bias)
grad_bias = torch.zeros_like(bias)
sparseconvnet_SCN.NetworkInNetwork_updateGradInput(
grad_input,
grad_output,
......
import torch
import torch
from torch.nn.functional import normalize
from torch.nn.parameter import Parameter
class SpectralNorm(object):
def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
self.name = name
self.dim = dim
if n_power_iterations <= 0:
raise ValueError('Expected n_power_iterations to be positive, but '
'got n_power_iterations={}'.format(n_power_iterations))
self.n_power_iterations = n_power_iterations
self.eps = eps
def compute_weight(self, module):
weight = getattr(module, self.name + '_orig')
u = getattr(module, self.name + '_u')
weight_mat = weight
if self.dim != 0:
# permute dim to front
weight_mat = weight_mat.permute(self.dim,
*[d for d in range(weight_mat.dim()) if d != self.dim])
height = weight_mat.size(0)
weight_mat = weight_mat.reshape(height, -1)
with torch.no_grad():
for _ in range(self.n_power_iterations):
# Spectral norm of weight equals to `u^T W v`, where `u` and `v`
# are the first left and right singular vectors.
# This power iteration produces approximations of `u` and `v`.
v = normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps)
u = normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps)
sigma = torch.dot(u, torch.matmul(weight_mat, v))
weight = weight / sigma
return weight, u
def remove(self, module):
weight = getattr(module, self.name)
delattr(module, self.name)
delattr(module, self.name + '_u')
delattr(module, self.name + '_orig')
module.register_parameter(self.name, torch.nn.Parameter(weight))
def __call__(self, module, inputs):
if module.training:
weight, u = self.compute_weight(module)
setattr(module, self.name, weight)
setattr(module, self.name + '_u', u)
else:
r_g = getattr(module, self.name + '_orig').requires_grad
getattr(module, self.name).detach_().requires_grad_(r_g)
@staticmethod
def apply(module, name, n_power_iterations, dim, eps):
fn = SpectralNorm(name, n_power_iterations, dim, eps)
weight = module._parameters[name]
height = weight.size(dim)
u = normalize(weight.new_empty(height).normal_(0, 1), dim=0, eps=fn.eps)
delattr(module, fn.name)
module.register_parameter(fn.name + "_orig", weight)
# We still need to assign weight back as fn.name because all sorts of
# things may assume that it exists, e.g., when initializing weights.
# However, we can't directly assign as it could be an nn.Parameter and
# gets added as a parameter. Instead, we register weight.data as a
# buffer, which will cause weight to be included in the state dict
# and also supports nn.init due to shared storage.
module.register_buffer(fn.name, weight.data)
module.register_buffer(fn.name + "_u", u)
module.register_forward_pre_hook(fn)
return fn
def spectral_norm(module, n_power_iterations=1, eps=1e-12):
"""
https://github.com/pytorch/pytorch/blob/master/torch/nn/utils/spectral_norm.py
"""
dim=1
torch.nn.utils.SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
#torch.nn.utils.
SpectralNorm.apply(module, 'weight', n_power_iterations, dim, eps)
return module
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment