small fixes

edf89af3 · Benjamin Thomas Graham · 36f7d1db · edf89af3 · edf89af3 · edf89af3
Commit edf89af3 authored Jul 23, 2018 by Benjamin Thomas Graham
7 changed files
--- a/sparseconvnet/SCN/CPU/NetworkInNetwork.cpp
+++ b/sparseconvnet/SCN/CPU/NetworkInNetwork.cpp
@@ -17,7 +17,8 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
    output_features.copy_(bias);
  else
    output_features.zero_();
-  output_features.addmm_(input_features, weight);
+  if (nActive)
+    output_features.addmm_(input_features, weight);
  return nActive * input_nPlanes * output_nPlanes;
 }
 template <typename T>
@@ -26,9 +27,11 @@ void cpu_NetworkInNetwork_updateGradInput(
    /*float*/ at::Tensor d_output_features,
    /*float*/ at::Tensor weight) {
-  d_input_features.resize_({(int)d_output_features.size(0), weight.size(0)});
+  int nActive = d_output_features.size(0);
+  d_input_features.resize_({nActive, weight.size(0)});
  d_input_features.zero_();
-  at::mm_out(d_input_features, d_output_features, weight.t());
+  if (nActive)
+    at::mm_out(d_input_features, d_output_features, weight.t());
 }
 template <typename T>
 void cpu_NetworkInNetwork_accGradParameters(
@@ -38,5 +41,6 @@ void cpu_NetworkInNetwork_accGradParameters(
  auto nActive = input_features.size(0);
  if (nActive and d_bias.numel())
    at::sum_out(d_bias, d_output_features, {0}, false);
-  at::mm_out(d_weight, input_features.t(), d_output_features);
+  if (nActive)
+    at::mm_out(d_weight, input_features.t(), d_output_features);
 }
--- a/sparseconvnet/SCN/CUDA/NetworkInNetwork.cpp
+++ b/sparseconvnet/SCN/CUDA/NetworkInNetwork.cpp
@@ -14,12 +14,13 @@ double cuda_NetworkInNetwork_updateOutput(
  auto nActive = input_features.size(0);
  auto input_nPlanes = weight.size(0);
  auto output_nPlanes = weight.size(1);
-  output_features.resize_({nActive, input_nPlanes});
+  output_features.resize_({nActive, output_nPlanes});
  if (bias.numel())
    output_features.copy_(bias);
  else
    output_features.zero_();
-  output_features.addmm(input_features, weight);
+  if (nActive)
+    output_features.addmm_(input_features, weight);
  return nActive * input_nPlanes * output_nPlanes;
 }
@@ -28,9 +29,12 @@ void cuda_NetworkInNetwork_updateGradInput(
    /*cuda float*/ at::Tensor d_input_features,
    /*cuda float*/ at::Tensor d_output_features,
    /*cuda float*/ at::Tensor weight) {
-  d_input_features.resize_({(int)d_output_features.size(0), weight.size(0)});
+  int nActive = d_output_features.size(0);
+  d_input_features.resize_({nActive, weight.size(0)});
  d_input_features.zero_();
-  at::mm_out(d_input_features, d_output_features, weight.t());
+  if (nActive)
+    at::mm_out(d_input_features, d_output_features, weight.t());
 }
 template <typename T>
@@ -41,5 +45,6 @@ void cuda_NetworkInNetwork_accGradParameters(
  auto nActive = input_features.size(0);
  if (nActive and d_bias.numel())
    at::sum_out(d_bias, d_output_features, {0}, false);
-  at::mm_out(d_weight, input_features.t(), d_output_features);
+  if (nActive)
+    at::mm_out(d_weight, input_features.t(), d_output_features);
 }
--- a/sparseconvnet/__init__.py
+++ b/sparseconvnet/__init__.py
@@ -6,7 +6,7 @@
 forward_pass_multiplyAdd_count = 0
 forward_pass_hidden_states = 0
-from .activations import Tanh, Sigmoid, ReLU, ELU, BatchNormELU
+from .activations import Tanh, Sigmoid, ReLU, ELU, SELU, BatchNormELU
 from .averagePooling import AveragePooling
 from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
 from .classificationTrainValidate import ClassificationTrainValidate

--- a/sparseconvnet/activations.py
+++ b/sparseconvnet/activations.py
@@ -48,5 +48,13 @@ class ELU(Module):
        output.spatial_size = input.spatial_size
        return output
+class SELU(Module):
+    def forward(self, input):
+        output = SparseConvNetTensor()
+        output.features = F.selu(input.features)
+        output.metadata = input.metadata
+        output.spatial_size = input.spatial_size
+        return output
 def BatchNormELU(nPlanes, eps=1e-4, momentum=0.9):
    return sparseconvnet.Sequential().add(BatchNormalization(nPlanes,eps,momentum)).add(ELU())
--- a/sparseconvnet/inputBatch.py
+++ b/sparseconvnet/inputBatch.py
@@ -55,7 +55,7 @@ class InputBatch(SparseConvNetTensor):
        self.metadata.setInputSpatialLocations(
            self.features, locations.contiguous(), vectors.contiguous(), overwrite)
-    def set_locations_(self, locations, vector, overwrite=False):
+    def set_locations_(self, locations, vectors, overwrite=False):
        self.metadata.setInputSpatialLocations(
            self.features, locations, vectors, overwrite)

--- a/sparseconvnet/networkInNetwork.py
+++ b/sparseconvnet/networkInNetwork.py
@@ -39,12 +39,11 @@ class NetworkInNetworkFunction(Function):
            weight,\
            bias = ctx.saved_tensors
        grad_input = grad_output.new()
-        grad_weight = grad_output.new().resize_as_(weight).zero_()
+        grad_weight = torch.zeros_like(weight)
-        grad_bias = torch.zeros_like(bias)
        if bias is None:
            grad_bias = None
        else:
-            grad_bias = grad_output.new().resize_as_(bias)
+            grad_bias = torch.zeros_like(bias)
        sparseconvnet_SCN.NetworkInNetwork_updateGradInput(
            grad_input,
            grad_output,

--- a/sparseconvnet/spectral_norm.py
+++ b/sparseconvnet/spectral_norm.py
 import torch
+import torch
+from torch.nn.functional import normalize
+from torch.nn.parameter import Parameter
+class SpectralNorm(object):
+    def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
+        self.name = name
+        self.dim = dim
+        if n_power_iterations <= 0:
+            raise ValueError('Expected n_power_iterations to be positive, but '
+                             'got n_power_iterations={}'.format(n_power_iterations))
+        self.n_power_iterations = n_power_iterations
+        self.eps = eps
+    def compute_weight(self, module):
+        weight = getattr(module, self.name + '_orig')
+        u = getattr(module, self.name + '_u')
+        weight_mat = weight
+        if self.dim != 0:
+            # permute dim to front
+            weight_mat = weight_mat.permute(self.dim,
+                                            *[d for d in range(weight_mat.dim()) if d != self.dim])
+        height = weight_mat.size(0)
+        weight_mat = weight_mat.reshape(height, -1)
+        with torch.no_grad():
+            for _ in range(self.n_power_iterations):
+                # Spectral norm of weight equals to `u^T W v`, where `u` and `v`
+                # are the first left and right singular vectors.
+                # This power iteration produces approximations of `u` and `v`.
+                v = normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps)
+                u = normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps)
+        sigma = torch.dot(u, torch.matmul(weight_mat, v))
+        weight = weight / sigma
+        return weight, u
+    def remove(self, module):
+        weight = getattr(module, self.name)
+        delattr(module, self.name)
+        delattr(module, self.name + '_u')
+        delattr(module, self.name + '_orig')
+        module.register_parameter(self.name, torch.nn.Parameter(weight))
+    def __call__(self, module, inputs):
+        if module.training:
+            weight, u = self.compute_weight(module)
+            setattr(module, self.name, weight)
+            setattr(module, self.name + '_u', u)
+        else:
+            r_g = getattr(module, self.name + '_orig').requires_grad
+            getattr(module, self.name).detach_().requires_grad_(r_g)
+    @staticmethod
+    def apply(module, name, n_power_iterations, dim, eps):
+        fn = SpectralNorm(name, n_power_iterations, dim, eps)
+        weight = module._parameters[name]
+        height = weight.size(dim)
+        u = normalize(weight.new_empty(height).normal_(0, 1), dim=0, eps=fn.eps)
+        delattr(module, fn.name)
+        module.register_parameter(fn.name + "_orig", weight)
+        # We still need to assign weight back as fn.name because all sorts of
+        # things may assume that it exists, e.g., when initializing weights.
+        # However, we can't directly assign as it could be an nn.Parameter and
+        # gets added as a parameter. Instead, we register weight.data as a
+        # buffer, which will cause weight to be included in the state dict
+        # and also supports nn.init due to shared storage.
+        module.register_buffer(fn.name, weight.data)
+        module.register_buffer(fn.name + "_u", u)
+        module.register_forward_pre_hook(fn)
+        return fn
 def spectral_norm(module, n_power_iterations=1, eps=1e-12):
    """
    https://github.com/pytorch/pytorch/blob/master/torch/nn/utils/spectral_norm.py
    """
    dim=1
-    torch.nn.utils.SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
+    #torch.nn.utils.
+    SpectralNorm.apply(module, 'weight', n_power_iterations, dim, eps)
    return module