encoding.py 3.97 KB
Newer Older
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
1
2
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
Zhang's avatar
v0.4.2  
Zhang committed
3
4
## Email: zhanghang0704@gmail.com
## Copyright (c) 2018
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
5
6
##
## This source code is licensed under the MIT-style license found in the
Hang Zhang's avatar
sync BN  
Hang Zhang committed
7
## LICENSE file in the root directory of this source tree
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
8
9
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Hang Zhang's avatar
sync BN  
Hang Zhang committed
10
"""Functions for Encoding Layer"""
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
11
12
import torch
from torch.autograd import Function, Variable
Hang Zhang's avatar
Hang Zhang committed
13
import torch.nn.functional as F
Hang Zhang's avatar
Hang Zhang committed
14
15
16
17

from encoding import cpu
if torch.cuda.device_count() > 0:
    from encoding import gpu
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
18

Hang Zhang's avatar
Hang Zhang committed
19
__all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine']
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
20
21

class _aggregate(Function):
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
22
    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
23
    def forward(ctx, A, X, C):
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
24
        # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
Hang Zhang's avatar
sync BN  
Hang Zhang committed
25
        ctx.save_for_backward(A, X, C)
Zhang's avatar
v0.4.2  
Zhang committed
26
        if A.is_cuda:
Hang Zhang's avatar
Hang Zhang committed
27
            E = gpu.aggregate_forward(A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
28
        else:
Hang Zhang's avatar
Hang Zhang committed
29
            E = cpu.aggregate_forward(A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
30
31
        return E

Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
32
    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
33
34
    def backward(ctx, gradE):
        A, X, C = ctx.saved_variables
Zhang's avatar
v0.4.2  
Zhang committed
35
        if A.is_cuda:
Hang Zhang's avatar
Hang Zhang committed
36
            gradA, gradX, gradC = gpu.aggregate_backward(gradE, A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
37
        else:
Hang Zhang's avatar
Hang Zhang committed
38
            gradA, gradX, gradC = cpu.aggregate_backward(gradE, A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
39
40
41
        return gradA, gradX, gradC

def aggregate(A, X, C):
Zhang's avatar
v0.4.2  
Zhang committed
42
    r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect
Hang Zhang's avatar
sync BN  
Hang Zhang committed
43
    to the codewords (:math:`C`) with assignment weights (:math:`A`).
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
44
45

    .. math::
Zhang's avatar
v0.4.2  
Zhang committed
46

Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
47
48
49
        e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k)

    Shape:
Hang Zhang's avatar
sync BN  
Hang Zhang committed
50
51
52
53
        - Input: :math:`A\in\mathcal{R}^{B\times N\times K}`
          :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}`
          (where :math:`B` is batch, :math:`N` is total number of features,
          :math:`K` is number is codewords, :math:`D` is feature dimensions.)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
54
55
56
57
58
59
60
61
62
63
        - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`

    Examples:
        >>> B,N,K,D = 2,3,4,5
        >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True)
        >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True)
        >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True)
        >>> func = encoding.aggregate()
        >>> E = func(A, X, C)
    """
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
64
    return _aggregate.apply(A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
65

Hang Zhang's avatar
Hang Zhang committed
66
class _scaled_l2(Function):
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
67
    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
68
    def forward(ctx, X, C, S):
Zhang's avatar
v0.4.2  
Zhang committed
69
        if X.is_cuda:
Hang Zhang's avatar
Hang Zhang committed
70
            SL = gpu.scaled_l2_forward(X, C, S)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
71
        else:
Hang Zhang's avatar
Hang Zhang committed
72
            SL = cpu.scaled_l2_forward(X, C, S)
Hang Zhang's avatar
sync BN  
Hang Zhang committed
73
        ctx.save_for_backward(X, C, S, SL)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
74
        return SL
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
75
76

    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
77
78
    def backward(ctx, gradSL):
        X, C, S, SL = ctx.saved_variables
Zhang's avatar
v0.4.2  
Zhang committed
79
        if X.is_cuda:
Hang Zhang's avatar
Hang Zhang committed
80
            gradX, gradC, gradS = gpu.scaled_l2_backward(gradSL, X, C, S, SL)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
81
        else:
Hang Zhang's avatar
Hang Zhang committed
82
            gradX, gradC, gradS = cpu.scaled_l2_backward(gradSL, X, C, S, SL)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
83
84
        return gradX, gradC, gradS

Hang Zhang's avatar
Hang Zhang committed
85
86
def scaled_l2(X, C, S):
    r""" scaled_l2 distance
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
87
88
89
90
91

    .. math::
        sl_{ik} = s_k \|x_i-c_k\|^2

    Shape:
Hang Zhang's avatar
sync BN  
Hang Zhang committed
92
93
94
95
        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
          :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
          (where :math:`B` is batch, :math:`N` is total number of features,
          :math:`K` is number is codewords, :math:`D` is feature dimensions.)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
96
97
        - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
    """
Hang Zhang's avatar
Hang Zhang committed
98
    return _scaled_l2.apply(X, C, S)
Hang Zhang's avatar
Hang Zhang committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

# Experimental
def pairwise_cosine(X, C, normalize=False):
    r"""Pairwise Cosine Similarity or Dot-product Similarity
    Shape:
        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
          :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
          (where :math:`B` is batch, :math:`N` is total number of features,
          :math:`K` is number is codewords, :math:`D` is feature dimensions.)
        - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
    """
    if normalize:
        X = F.normalize(X, dim=2, eps=1e-8)
        C = F.normalize(C, dim=1, eps=1e-8)
    return torch.matmul(X, C.t())