Commit edf89af3 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

small fixes

parent 36f7d1db
...@@ -17,7 +17,8 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features, ...@@ -17,7 +17,8 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
output_features.copy_(bias); output_features.copy_(bias);
else else
output_features.zero_(); output_features.zero_();
output_features.addmm_(input_features, weight); if (nActive)
output_features.addmm_(input_features, weight);
return nActive * input_nPlanes * output_nPlanes; return nActive * input_nPlanes * output_nPlanes;
} }
template <typename T> template <typename T>
...@@ -26,9 +27,11 @@ void cpu_NetworkInNetwork_updateGradInput( ...@@ -26,9 +27,11 @@ void cpu_NetworkInNetwork_updateGradInput(
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor d_output_features,
/*float*/ at::Tensor weight) { /*float*/ at::Tensor weight) {
d_input_features.resize_({(int)d_output_features.size(0), weight.size(0)}); int nActive = d_output_features.size(0);
d_input_features.resize_({nActive, weight.size(0)});
d_input_features.zero_(); d_input_features.zero_();
at::mm_out(d_input_features, d_output_features, weight.t()); if (nActive)
at::mm_out(d_input_features, d_output_features, weight.t());
} }
template <typename T> template <typename T>
void cpu_NetworkInNetwork_accGradParameters( void cpu_NetworkInNetwork_accGradParameters(
...@@ -38,5 +41,6 @@ void cpu_NetworkInNetwork_accGradParameters( ...@@ -38,5 +41,6 @@ void cpu_NetworkInNetwork_accGradParameters(
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
at::mm_out(d_weight, input_features.t(), d_output_features); if (nActive)
at::mm_out(d_weight, input_features.t(), d_output_features);
} }
...@@ -14,12 +14,13 @@ double cuda_NetworkInNetwork_updateOutput( ...@@ -14,12 +14,13 @@ double cuda_NetworkInNetwork_updateOutput(
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto input_nPlanes = weight.size(0); auto input_nPlanes = weight.size(0);
auto output_nPlanes = weight.size(1); auto output_nPlanes = weight.size(1);
output_features.resize_({nActive, input_nPlanes}); output_features.resize_({nActive, output_nPlanes});
if (bias.numel()) if (bias.numel())
output_features.copy_(bias); output_features.copy_(bias);
else else
output_features.zero_(); output_features.zero_();
output_features.addmm(input_features, weight); if (nActive)
output_features.addmm_(input_features, weight);
return nActive * input_nPlanes * output_nPlanes; return nActive * input_nPlanes * output_nPlanes;
} }
...@@ -28,9 +29,12 @@ void cuda_NetworkInNetwork_updateGradInput( ...@@ -28,9 +29,12 @@ void cuda_NetworkInNetwork_updateGradInput(
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight) { /*cuda float*/ at::Tensor weight) {
d_input_features.resize_({(int)d_output_features.size(0), weight.size(0)});
int nActive = d_output_features.size(0);
d_input_features.resize_({nActive, weight.size(0)});
d_input_features.zero_(); d_input_features.zero_();
at::mm_out(d_input_features, d_output_features, weight.t()); if (nActive)
at::mm_out(d_input_features, d_output_features, weight.t());
} }
template <typename T> template <typename T>
...@@ -41,5 +45,6 @@ void cuda_NetworkInNetwork_accGradParameters( ...@@ -41,5 +45,6 @@ void cuda_NetworkInNetwork_accGradParameters(
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
at::mm_out(d_weight, input_features.t(), d_output_features); if (nActive)
at::mm_out(d_weight, input_features.t(), d_output_features);
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
forward_pass_multiplyAdd_count = 0 forward_pass_multiplyAdd_count = 0
forward_pass_hidden_states = 0 forward_pass_hidden_states = 0
from .activations import Tanh, Sigmoid, ReLU, ELU, BatchNormELU from .activations import Tanh, Sigmoid, ReLU, ELU, SELU, BatchNormELU
from .averagePooling import AveragePooling from .averagePooling import AveragePooling
from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
from .classificationTrainValidate import ClassificationTrainValidate from .classificationTrainValidate import ClassificationTrainValidate
......
...@@ -48,5 +48,13 @@ class ELU(Module): ...@@ -48,5 +48,13 @@ class ELU(Module):
output.spatial_size = input.spatial_size output.spatial_size = input.spatial_size
return output return output
class SELU(Module):
def forward(self, input):
output = SparseConvNetTensor()
output.features = F.selu(input.features)
output.metadata = input.metadata
output.spatial_size = input.spatial_size
return output
def BatchNormELU(nPlanes, eps=1e-4, momentum=0.9): def BatchNormELU(nPlanes, eps=1e-4, momentum=0.9):
return sparseconvnet.Sequential().add(BatchNormalization(nPlanes,eps,momentum)).add(ELU()) return sparseconvnet.Sequential().add(BatchNormalization(nPlanes,eps,momentum)).add(ELU())
...@@ -55,7 +55,7 @@ class InputBatch(SparseConvNetTensor): ...@@ -55,7 +55,7 @@ class InputBatch(SparseConvNetTensor):
self.metadata.setInputSpatialLocations( self.metadata.setInputSpatialLocations(
self.features, locations.contiguous(), vectors.contiguous(), overwrite) self.features, locations.contiguous(), vectors.contiguous(), overwrite)
def set_locations_(self, locations, vector, overwrite=False): def set_locations_(self, locations, vectors, overwrite=False):
self.metadata.setInputSpatialLocations( self.metadata.setInputSpatialLocations(
self.features, locations, vectors, overwrite) self.features, locations, vectors, overwrite)
......
...@@ -39,12 +39,11 @@ class NetworkInNetworkFunction(Function): ...@@ -39,12 +39,11 @@ class NetworkInNetworkFunction(Function):
weight,\ weight,\
bias = ctx.saved_tensors bias = ctx.saved_tensors
grad_input = grad_output.new() grad_input = grad_output.new()
grad_weight = grad_output.new().resize_as_(weight).zero_() grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias)
if bias is None: if bias is None:
grad_bias = None grad_bias = None
else: else:
grad_bias = grad_output.new().resize_as_(bias) grad_bias = torch.zeros_like(bias)
sparseconvnet_SCN.NetworkInNetwork_updateGradInput( sparseconvnet_SCN.NetworkInNetwork_updateGradInput(
grad_input, grad_input,
grad_output, grad_output,
......
import torch import torch
import torch
from torch.nn.functional import normalize
from torch.nn.parameter import Parameter
class SpectralNorm(object):
def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
self.name = name
self.dim = dim
if n_power_iterations <= 0:
raise ValueError('Expected n_power_iterations to be positive, but '
'got n_power_iterations={}'.format(n_power_iterations))
self.n_power_iterations = n_power_iterations
self.eps = eps
def compute_weight(self, module):
weight = getattr(module, self.name + '_orig')
u = getattr(module, self.name + '_u')
weight_mat = weight
if self.dim != 0:
# permute dim to front
weight_mat = weight_mat.permute(self.dim,
*[d for d in range(weight_mat.dim()) if d != self.dim])
height = weight_mat.size(0)
weight_mat = weight_mat.reshape(height, -1)
with torch.no_grad():
for _ in range(self.n_power_iterations):
# Spectral norm of weight equals to `u^T W v`, where `u` and `v`
# are the first left and right singular vectors.
# This power iteration produces approximations of `u` and `v`.
v = normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps)
u = normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps)
sigma = torch.dot(u, torch.matmul(weight_mat, v))
weight = weight / sigma
return weight, u
def remove(self, module):
weight = getattr(module, self.name)
delattr(module, self.name)
delattr(module, self.name + '_u')
delattr(module, self.name + '_orig')
module.register_parameter(self.name, torch.nn.Parameter(weight))
def __call__(self, module, inputs):
if module.training:
weight, u = self.compute_weight(module)
setattr(module, self.name, weight)
setattr(module, self.name + '_u', u)
else:
r_g = getattr(module, self.name + '_orig').requires_grad
getattr(module, self.name).detach_().requires_grad_(r_g)
@staticmethod
def apply(module, name, n_power_iterations, dim, eps):
fn = SpectralNorm(name, n_power_iterations, dim, eps)
weight = module._parameters[name]
height = weight.size(dim)
u = normalize(weight.new_empty(height).normal_(0, 1), dim=0, eps=fn.eps)
delattr(module, fn.name)
module.register_parameter(fn.name + "_orig", weight)
# We still need to assign weight back as fn.name because all sorts of
# things may assume that it exists, e.g., when initializing weights.
# However, we can't directly assign as it could be an nn.Parameter and
# gets added as a parameter. Instead, we register weight.data as a
# buffer, which will cause weight to be included in the state dict
# and also supports nn.init due to shared storage.
module.register_buffer(fn.name, weight.data)
module.register_buffer(fn.name + "_u", u)
module.register_forward_pre_hook(fn)
return fn
def spectral_norm(module, n_power_iterations=1, eps=1e-12): def spectral_norm(module, n_power_iterations=1, eps=1e-12):
""" """
https://github.com/pytorch/pytorch/blob/master/torch/nn/utils/spectral_norm.py https://github.com/pytorch/pytorch/blob/master/torch/nn/utils/spectral_norm.py
""" """
dim=1 dim=1
torch.nn.utils.SpectralNorm.apply(module, name, n_power_iterations, dim, eps) #torch.nn.utils.
SpectralNorm.apply(module, 'weight', n_power_iterations, dim, eps)
return module return module
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment