Commit 8e89c34b authored by Daniel Povey's avatar Daniel Povey
Browse files

Some automated name changes

parent 371e2657
from .learned_nonlin import learned_nonlin
from .mutual_information import mutual_information
......@@ -12,59 +12,59 @@ def _resolve(name):
try:
import torch_learned_nonlin_cpu
import torch_mutual_information_cpu
except ImportError:
if VERBOSE:
print('Falling back to JIT compiling torch_learned_nonlin_cpu')
torch_learned_nonlin_cpu = load(
name='torch_learned_nonlin_cpu',
print('Falling back to JIT compiling torch_mutual_information_cpu')
torch_mutual_information_cpu = load(
name='torch_mutual_information_cpu',
sources=[
_resolve('learned_nonlin_cpu.cpp'),
_resolve('mutual_information_cpu.cpp'),
],
verbose=VERBOSE,
)
try:
import torch_learned_nonlin_cuda
import torch_mutual_information_cuda
except ImportError:
if VERBOSE:
print('Falling back to JIT compiling torch_learned_nonlin_cuda')
torch_learned_nonlin_cuda = None
print('Falling back to JIT compiling torch_mutual_information_cuda')
torch_mutual_information_cuda = None
if torch.cuda.is_available():
torch_learned_nonlin_cuda = load(
name='torch_learned_nonlin_cuda',
torch_mutual_information_cuda = load(
name='torch_mutual_information_cuda',
sources=[
_resolve('learned_nonlin_cuda.cpp'),
_resolve('learned_nonlin_cuda_kernel.cu'),
_resolve('mutual_information_cuda.cpp'),
_resolve('mutual_information_cuda_kernel.cu'),
],
verbose=VERBOSE,
)
def _learned_nonlin_forward_dispatcher(input: torch.Tensor,
def _mutual_information_forward_dispatcher(input: torch.Tensor,
params: torch.Tensor) -> torch.Tensor:
if input.is_cuda:
if torch_learned_nonlin_cuda is None:
if torch_mutual_information_cuda is None:
raise EnvironmentError(f'Failed to load native CUDA module')
return torch_learned_nonlin_cuda.learned_nonlin_cuda(
return torch_mutual_information_cuda.mutual_information_cuda(
input, params.contiguous())
else:
return torch_learned_nonlin_cpu.learned_nonlin_cpu(
return torch_mutual_information_cpu.mutual_information_cpu(
input, params)
def _learned_nonlin_backward_dispatcher(input: torch.Tensor,
def _mutual_information_backward_dispatcher(input: torch.Tensor,
params: torch.Tensor,
grad_output) -> Tuple[torch.Tensor, torch.Tensor]:
if input.is_cuda:
if torch_learned_nonlin_cuda is None:
if torch_mutual_information_cuda is None:
raise EnvironmentError(f'Failed to load native CUDA module')
return tuple(torch_learned_nonlin_cuda.learned_nonlin_backward_cuda(
return tuple(torch_mutual_information_cuda.mutual_information_backward_cuda(
input, params,
grad_output))
else:
return tuple(torch_learned_nonlin_cpu.learned_nonlin_backward_cpu(
return tuple(torch_mutual_information_cpu.mutual_information_backward_cpu(
input, params, grad_output))
......@@ -115,7 +115,7 @@ class LearnedNonlinFunction(torch.autograd.Function):
ctx.dim = dim
ctx.save_for_backward(input, params)
output = _learned_nonlin_forward_dispatcher(_reshape_as_3dim(input, dim),
output = _mutual_information_forward_dispatcher(_reshape_as_3dim(input, dim),
params)
return output
......@@ -127,12 +127,12 @@ class LearnedNonlinFunction(torch.autograd.Function):
# input, so that if this reshaping results in a copy it is not retained
# (this saves memory at the expense of a little extra work in such
# situations).
grad_input, grad_params = _learned_nonlin_backward_dispatcher(
grad_input, grad_params = _mutual_information_backward_dispatcher(
_reshape_as_3dim(input, ctx.dim), params, grad_output)
return grad_input.reshape(input.shape), grad_params, None
def learned_nonlin(input, params, dim):
def mutual_information(input, params, dim):
"""Learned nonlinearity.
Args:
input: The input, to be transformed pointwise; may be of any shape.
......
......@@ -2,9 +2,9 @@
// forward of learned_nonlin. See """... """ comment of `learned_nonlin` in
// learned_nonlin.py for documentation of the behavior of this function.
torch::Tensor learned_nonlin_cpu(torch::Tensor input,
// forward of mutual_information. See """... """ comment of `mutual_information` in
// mutual_information.py for documentation of the behavior of this function.
torch::Tensor mutual_information_cpu(torch::Tensor input,
torch::Tensor params) {
TORCH_CHECK(input.dim() == 3, "input must be 3-dimensional");
TORCH_CHECK(params.dim() == 2, "params must be 2-dimensional.");
......@@ -29,7 +29,7 @@ torch::Tensor learned_nonlin_cpu(torch::Tensor input,
torch::Tensor y_vals = torch::empty({C, N}, opts),
output = torch::empty({B, C, T}, opts);
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "learned_nonlin_cpu_loop", ([&] {
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "mutual_information_cpu_loop", ([&] {
auto params_a = params.accessor<scalar_t, 2>(),
y_vals_a = y_vals.accessor<scalar_t, 2>();
for (int c = 0; c < C; c++) {
......@@ -74,9 +74,9 @@ torch::Tensor learned_nonlin_cpu(torch::Tensor input,
}
// backward of learned_nonlin. Returns (input_grad, params_grad)
// backward of mutual_information. Returns (input_grad, params_grad)
std::vector<torch::Tensor> learned_nonlin_backward_cpu(torch::Tensor input,
std::vector<torch::Tensor> mutual_information_backward_cpu(torch::Tensor input,
torch::Tensor params,
torch::Tensor output_grad) {
TORCH_CHECK(input.dim() == 3, "input must be 3-dimensional");
......@@ -107,7 +107,7 @@ std::vector<torch::Tensor> learned_nonlin_backward_cpu(torch::Tensor input,
params_grad = torch::zeros({C, N + 1}, opts),
input_grad = torch::zeros({B, C, T}, opts);
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "learned_nonlin_backward_cpu_loop", ([&] {
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "mutual_information_backward_cpu_loop", ([&] {
auto params_a = params.accessor<scalar_t, 2>(),
params_grad_a = params_grad.accessor<scalar_t, 2>(),
y_vals_a = y_vals.accessor<scalar_t, 2>(),
......@@ -186,6 +186,6 @@ std::vector<torch::Tensor> learned_nonlin_backward_cpu(torch::Tensor input,
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("learned_nonlin_cpu", &learned_nonlin_cpu, "Integrated convolution forward function (CPU)");
m.def("learned_nonlin_backward_cpu", &learned_nonlin_backward_cpu, "Integrated convolution backward function (CPU)");
m.def("mutual_information_cpu", &mutual_information_cpu, "Integrated convolution forward function (CPU)");
m.def("mutual_information_backward_cpu", &mutual_information_backward_cpu, "Integrated convolution backward function (CPU)");
}
#include <torch/extension.h>
// forward of learned_nonlin. """... """ comment of `learned_nonlin`
// in learned_nonlin.py documents the behavior of this function.
torch::Tensor learned_nonlin_cuda(torch::Tensor input,
// forward of mutual_information. """... """ comment of `mutual_information`
// in mutual_information.py documents the behavior of this function.
torch::Tensor mutual_information_cuda(torch::Tensor input,
torch::Tensor params);
// backward of learned_nonlin; returns (grad_input, grad_params).
std::vector<torch::Tensor> learned_nonlin_backward_cuda(torch::Tensor input,
// backward of mutual_information; returns (grad_input, grad_params).
std::vector<torch::Tensor> mutual_information_backward_cuda(torch::Tensor input,
torch::Tensor params,
torch::Tensor grad_output);
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("learned_nonlin_cuda", &learned_nonlin_cuda, "Learned nonlinearity forward function (CUDA)");
m.def("learned_nonlin_backward_cuda", &learned_nonlin_backward_cuda, "Learned nonlinearity backward function (CUDA)");
m.def("mutual_information_cuda", &mutual_information_cuda, "Learned nonlinearity forward function (CUDA)");
m.def("mutual_information_backward_cuda", &mutual_information_backward_cuda, "Learned nonlinearity backward function (CUDA)");
}
......@@ -43,7 +43,7 @@ __forceinline__ __device__ scalar_t tiled_warp_reduce_sum(int threads_per_tile,
/*
Forward of learned_nonlin. Each thread group handles a single channel (channel
Forward of mutual_information. Each thread group handles a single channel (channel
c = blockIdx.x); the gridDim is (C, nb, 1) where 1 <= nb <= B (nb relates to the
image within the batch).
......@@ -81,7 +81,7 @@ __forceinline__ __device__ scalar_t tiled_warp_reduce_sum(int threads_per_tile,
1 <= gridDim.y <= B, where B is the number of blocks
gridDim.z == 1
When we invoke this kernel, we'll invoke it as:
learned_nonlin_kernel<<<gridDim, blockDim, bytesShared, stream>>>
mutual_information_kernel<<<gridDim, blockDim, bytesShared, stream>>>
where bytesShared is the number of bytes needed in `extern_buf`:
bytesShared = sizeof(shared_t) * (2N + 3)
We also require N + 1 <= THREADS_PER_BLOCK.
......@@ -90,7 +90,7 @@ extern __shared__ int extern_buf[];
template <typename scalar_t>
__global__
void learned_nonlin_kernel(
void mutual_information_kernel(
torch::PackedTensorAccessor32<scalar_t, 3> input, // B, C, T, i.e. batch, channels, time
torch::PackedTensorAccessor32<scalar_t, 2> params, // C, N + 1
torch::PackedTensorAccessor32<scalar_t, 3> output,
......@@ -212,7 +212,7 @@ __forceinline__ __device__ scalar_t strided_reduce_sum(int N,
}
/*
Backward of learned_nonlin. Each thread group handles a single channel (channel
Backward of mutual_information. Each thread group handles a single channel (channel
c = blockIdx.x); the gridDim is (C, nb, 1) where 1 <= nb <= B (nb relates to the
image within the batch).
......@@ -253,7 +253,7 @@ __forceinline__ __device__ scalar_t strided_reduce_sum(int N,
1 <= gridDim.y <= B, where B is the number of blocks
gridDim.z == 1
When we invoke this kernel, we'll invoke it as:
learned_nonlin_backward_kernel<<<gridDim, blockDim, bytesShared, stream>>>
mutual_information_backward_kernel<<<gridDim, blockDim, bytesShared, stream>>>
where bytesShared is the number of bytes needed in `extern_buf`:
bytesShared = sizeof(shared_t) * (2N + 3)
......@@ -272,7 +272,7 @@ __forceinline__ __device__ scalar_t strided_reduce_sum(int N,
*/
template <typename scalar_t>
__global__
void learned_nonlin_backward_kernel(
void mutual_information_backward_kernel(
torch::PackedTensorAccessor32<scalar_t, 3> input, // B, C, T, i.e. batch, channels, time
torch::PackedTensorAccessor32<scalar_t, 2> params, // C, N + 1
torch::PackedTensorAccessor32<scalar_t, 3> output_grad, // B, C, T
......@@ -548,7 +548,7 @@ void learned_nonlin_backward_kernel(
torch::Tensor learned_nonlin_cuda(torch::Tensor input,
torch::Tensor mutual_information_cuda(torch::Tensor input,
torch::Tensor params) {
TORCH_CHECK(input.dim() == 3, "input must be 3-dimensional");
......@@ -611,8 +611,8 @@ torch::Tensor learned_nonlin_cuda(torch::Tensor input,
dim3 gridDim(C, grid_dim_y, 1);
// blockDim is scalar, just THREADS_PER_BLOCK.
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "learned_nonlin_kernel", ([&] {
learned_nonlin_kernel<scalar_t><<<gridDim, THREADS_PER_BLOCK, sizeof(scalar_t) * shared_mem_numel, at::cuda::getCurrentCUDAStream()>>>(
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "mutual_information_kernel", ([&] {
mutual_information_kernel<scalar_t><<<gridDim, THREADS_PER_BLOCK, sizeof(scalar_t) * shared_mem_numel, at::cuda::getCurrentCUDAStream()>>>(
input.packed_accessor32<scalar_t, 3>(),
params.packed_accessor32<scalar_t, 2>(),
output.packed_accessor32<scalar_t, 3>(),
......@@ -623,7 +623,7 @@ torch::Tensor learned_nonlin_cuda(torch::Tensor input,
std::vector<torch::Tensor> learned_nonlin_backward_cuda(torch::Tensor input,
std::vector<torch::Tensor> mutual_information_backward_cuda(torch::Tensor input,
torch::Tensor params,
torch::Tensor output_grad) {
TORCH_CHECK(input.dim() == 3, "input must be 3-dimensional");
......@@ -701,8 +701,8 @@ std::vector<torch::Tensor> learned_nonlin_backward_cuda(torch::Tensor input,
dim3 gridDim(C, grid_dim_y, 1);
// blockDim is scalar, just THREADS_PER_BLOCK.
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "learned_nonlin_backward_kernel", ([&] {
learned_nonlin_backward_kernel<scalar_t><<<gridDim, THREADS_PER_BLOCK, sizeof(scalar_t) * shared_mem_numel, at::cuda::getCurrentCUDAStream()>>>(
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "mutual_information_backward_kernel", ([&] {
mutual_information_backward_kernel<scalar_t><<<gridDim, THREADS_PER_BLOCK, sizeof(scalar_t) * shared_mem_numel, at::cuda::getCurrentCUDAStream()>>>(
input.packed_accessor32<scalar_t, 3>(),
params.packed_accessor32<scalar_t, 2>(),
output_grad.packed_accessor32<scalar_t, 3>(),
......
......@@ -3,10 +3,10 @@
import random
import torch
from torch_learned_nonlin import learned_nonlin
from torch_mutual_information import mutual_information
def test_learned_nonlin_basic():
def test_mutual_information_basic():
for dtype in [torch.float32, torch.float64]:
B = 2
C = 4
......@@ -24,16 +24,16 @@ def test_learned_nonlin_basic():
print("params = ", params)
print("x.shape = ", x.shape)
y = learned_nonlin(x, params, dim = 1)
y = mutual_information(x, params, dim = 1)
if True:
# Check
x2 = x.reshape(B, C, 5, 2)
assert torch.allclose(learned_nonlin(x, params, dim = 1), learned_nonlin(x2, params, dim = 1).reshape(x.shape))
assert torch.allclose(mutual_information(x, params, dim = 1), mutual_information(x2, params, dim = 1).reshape(x.shape))
x2 = x.reshape(B, 1, C, 10)
assert torch.allclose(learned_nonlin(x, params, dim = 1), learned_nonlin(x2, params, dim = 2).reshape(x.shape))
assert torch.allclose(mutual_information(x, params, dim = 1), mutual_information(x2, params, dim = 2).reshape(x.shape))
......@@ -47,7 +47,7 @@ def test_learned_nonlin_basic():
x2.requires_grad = True
params2 = params.to(device).detach()
params2.requires_grad = True
y2 = learned_nonlin(x2, params2, dim = 1).to(torch.device('cpu'))
y2 = mutual_information(x2, params2, dim = 1).to(torch.device('cpu'))
print("Checking CUDA is same")
if not torch.allclose(y, y2, atol=1.0e-06):
print(f"Error: CPU versus CUDA not the same: {y} vs. {y2}, diff = {y2-y}")
......@@ -70,7 +70,7 @@ def test_learned_nonlin_basic():
# Just eyeballing the above tgo make sure it looks reasonable.
def test_learned_nonlin_deriv():
def test_mutual_information_deriv():
""" Tests derivatives in randomized way """
for _ in range(10):
for dtype in [torch.float32, torch.float64]:
......@@ -85,7 +85,7 @@ def test_learned_nonlin_deriv():
x.requires_grad = True
params.requires_grad = True
print(f"B,C,T,K = {B},{C},{T},{K}")
y = learned_nonlin(x, params, dim = 1)
y = mutual_information(x, params, dim = 1)
y_deriv = torch.randn_like(y)
y.backward(gradient=y_deriv)
......@@ -96,7 +96,7 @@ def test_learned_nonlin_deriv():
x2, params2 = x.to(device).detach(), params.to(device).detach()
x2.requires_grad = True
params2.requires_grad = True
y2 = learned_nonlin(x2, params2, dim = 1)
y2 = mutual_information(x2, params2, dim = 1)
if N >= 4 and N <= 16: # Currently backprop requires these conditions
y2.backward(gradient=y_deriv.to(device))
......@@ -122,7 +122,7 @@ def test_learned_nonlin_deriv():
delta = 1.0e-04
delta_x = torch.randn_like(x) * delta
pred_change = (x.grad * delta_x).sum()
y2 = learned_nonlin(x + delta_x, params, dim = 1)
y2 = mutual_information(x + delta_x, params, dim = 1)
observed_change = (y_deriv * (y2 - y)).sum()
print(f"for input: pred_change = {pred_change}, observed_change={observed_change}")
if not torch.allclose(pred_change, observed_change, rtol=5.0e-02, atol=3.0e-05):
......@@ -131,13 +131,13 @@ def test_learned_nonlin_deriv():
delta_params = torch.randn_like(params) * delta
pred_change = (params.grad * delta_params).sum()
observed_change = (y_deriv * (learned_nonlin(x, params + delta_params, dim = 1) - y)).sum()
observed_change = (y_deriv * (mutual_information(x, params + delta_params, dim = 1) - y)).sum()
print(f"for params: pred_change = {pred_change}, observed_change={observed_change}")
assert torch.allclose(pred_change, observed_change, rtol=1.0e-02, atol=1.0e-05)
def test_learned_nonlin_zeros():
def test_mutual_information_zeros():
N = 1
C = 2
H = 3
......@@ -158,7 +158,7 @@ def test_learned_nonlin_zeros():
pos_mul.requires_grad = True
output_ref = torch.zeros(N, C, H, W, device=device, dtype=dtype)
output = learned_nonlin(input, pos_add, pos_mul)
output = mutual_information(input, pos_add, pos_mul)
assert torch.allclose(output, output_ref)
output.sum().backward()
......@@ -167,7 +167,7 @@ def test_learned_nonlin_zeros():
print("pos_mul_grad=", pos_mul.grad)
def test_learned_nonlin_compare():
def test_mutual_information_compare():
N = 1
C = 2
H = 3
......@@ -192,8 +192,8 @@ def test_learned_nonlin_compare():
for x in [ pos_add, pos_mul, pos_add_cuda, pos_mul_cuda, input, input_cuda ]:
x.requires_grad = True
output = learned_nonlin(input, pos_add, pos_mul)
output_cuda = learned_nonlin(input_cuda, pos_add_cuda, pos_mul_cuda)
output = mutual_information(input, pos_add, pos_mul)
output_cuda = mutual_information(input_cuda, pos_add_cuda, pos_mul_cuda)
print("output = ", output)
print("output_cuda = ", output_cuda)
......@@ -223,7 +223,7 @@ def test_learned_nonlin_compare():
def test_learned_nonlin_rand_compare():
def test_mutual_information_rand_compare():
for _ in range(30):
N = random.randint(1, 256)
C = random.randint(1, 64)
......@@ -261,8 +261,8 @@ def test_learned_nonlin_rand_compare():
pos_add_cuda = pos_add.to(device)
pos_mul_cuda = pos_mul.to(device)
output = learned_nonlin(input, pos_add, pos_mul)
output_cuda = learned_nonlin(input_cuda, pos_add_cuda, pos_mul_cuda)
output = mutual_information(input, pos_add, pos_mul)
output_cuda = mutual_information(input_cuda, pos_add_cuda, pos_mul_cuda)
diff = (output - output_cuda.to(torch.device('cpu'))).abs().sum()
sum_abs = output.abs().sum()
......@@ -275,7 +275,7 @@ def test_learned_nonlin_rand_compare():
def test_learned_nonlin_rand_grad():
def test_mutual_information_rand_grad():
for _ in range(30):
N = random.randint(1, 256)
C = random.randint(1, 64)
......@@ -313,7 +313,7 @@ def test_learned_nonlin_rand_grad():
pos_add.requires_grad = True
pos_mul.requires_grad = True
output = learned_nonlin(input, pos_add, pos_mul)
output = mutual_information(input, pos_add, pos_mul)
output_grad = torch.randn(N, C, H, W, dtype=dtype, device=device)
output.backward(gradient=output_grad)
......@@ -321,27 +321,27 @@ def test_learned_nonlin_rand_grad():
delta = 1.0e-05
pos_delta = delta * torch.randn(C, kH, kW, dtype=dtype, device=device)
pred_change = (pos_delta * pos_add.grad).sum().to('cpu').item()
change = (output_grad * (learned_nonlin(input, pos_add + pos_delta, pos_mul) - output )).sum().to('cpu').item()
change = (output_grad * (mutual_information(input, pos_add + pos_delta, pos_mul) - output )).sum().to('cpu').item()
print(f"For pos_add: pred_change={pred_change}, change={change}")
#assert abs(pred_change - change) < 1.0e-04
pred_change = (pos_delta * pos_mul.grad).sum().to('cpu').item()
change = (output_grad * (learned_nonlin(input, pos_add, pos_mul + pos_delta) - output )).sum().to('cpu').item()
change = (output_grad * (mutual_information(input, pos_add, pos_mul + pos_delta) - output )).sum().to('cpu').item()
print(f"For pos_mul: pred_change={pred_change}, change={change}")
#assert abs(pred_change - change) / abs(change) < 1.0e-04
input_delta = delta * torch.randn(N, 2*C, H, W, dtype=dtype, device=device)
pred_change = (input_delta * input.grad).sum().to('cpu').item()
change = (output_grad * (learned_nonlin(input + input_delta, pos_add, pos_mul) - output )).sum().to('cpu').item()
change = (output_grad * (mutual_information(input + input_delta, pos_add, pos_mul) - output )).sum().to('cpu').item()
print(f"For input: pred_change={pred_change}, change={change}")
#assert abs(pred_change - change) / abs(change) < 1.0e-04
if __name__ == "__main__":
test_learned_nonlin_basic()
test_learned_nonlin_deriv()
test_mutual_information_basic()
test_mutual_information_deriv()
if False:
test_learned_nonlin_rand_grad()
test_learned_nonlin_zeros()
test_learned_nonlin_compare()
test_learned_nonlin_rand_compare()
test_mutual_information_rand_grad()
test_mutual_information_zeros()
test_mutual_information_compare()
test_mutual_information_rand_compare()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment