encoding.py 3.94 KB
Newer Older
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
1
2
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
Zhang's avatar
v0.4.2  
Zhang committed
3
4
## Email: zhanghang0704@gmail.com
## Copyright (c) 2018
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
5
6
##
## This source code is licensed under the MIT-style license found in the
Hang Zhang's avatar
sync BN  
Hang Zhang committed
7
## LICENSE file in the root directory of this source tree
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
8
9
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Hang Zhang's avatar
sync BN  
Hang Zhang committed
10
"""Functions for Encoding Layer"""
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
11
12
import torch
from torch.autograd import Function, Variable
Hang Zhang's avatar
Hang Zhang committed
13
import torch.nn.functional as F
Zhang's avatar
v0.4.2  
Zhang committed
14
from .. import lib
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
15

Hang Zhang's avatar
Hang Zhang committed
16
__all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine']
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
17
18

class _aggregate(Function):
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
19
    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
20
    def forward(ctx, A, X, C):
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
21
        # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
Hang Zhang's avatar
sync BN  
Hang Zhang committed
22
        ctx.save_for_backward(A, X, C)
Zhang's avatar
v0.4.2  
Zhang committed
23
24
        if A.is_cuda:
            E = lib.gpu.aggregate_forward(A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
25
        else:
Hang Zhang's avatar
Hang Zhang committed
26
            E = lib.cpu.aggregate_forward(A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
27
28
        return E

Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
29
    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
30
31
    def backward(ctx, gradE):
        A, X, C = ctx.saved_variables
Zhang's avatar
v0.4.2  
Zhang committed
32
33
        if A.is_cuda:
            gradA, gradX, gradC = lib.gpu.aggregate_backward(gradE, A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
34
        else:
Hang Zhang's avatar
Hang Zhang committed
35
            gradA, gradX, gradC = lib.cpu.aggregate_backward(gradE, A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
36
37
38
        return gradA, gradX, gradC

def aggregate(A, X, C):
Zhang's avatar
v0.4.2  
Zhang committed
39
    r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect
Hang Zhang's avatar
sync BN  
Hang Zhang committed
40
    to the codewords (:math:`C`) with assignment weights (:math:`A`).
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
41
42

    .. math::
Zhang's avatar
v0.4.2  
Zhang committed
43

Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
44
45
46
        e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k)

    Shape:
Hang Zhang's avatar
sync BN  
Hang Zhang committed
47
48
49
50
        - Input: :math:`A\in\mathcal{R}^{B\times N\times K}`
          :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}`
          (where :math:`B` is batch, :math:`N` is total number of features,
          :math:`K` is number is codewords, :math:`D` is feature dimensions.)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
51
52
53
54
55
56
57
58
59
60
        - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`

    Examples:
        >>> B,N,K,D = 2,3,4,5
        >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True)
        >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True)
        >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True)
        >>> func = encoding.aggregate()
        >>> E = func(A, X, C)
    """
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
61
    return _aggregate.apply(A, X, C)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
62

Hang Zhang's avatar
Hang Zhang committed
63
class _scaled_l2(Function):
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
64
    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
65
    def forward(ctx, X, C, S):
Zhang's avatar
v0.4.2  
Zhang committed
66
67
        if X.is_cuda:
            SL = lib.gpu.scaled_l2_forward(X, C, S)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
68
        else:
Hang Zhang's avatar
Hang Zhang committed
69
            SL = lib.cpu.scaled_l2_forward(X, C, S)
Hang Zhang's avatar
sync BN  
Hang Zhang committed
70
        ctx.save_for_backward(X, C, S, SL)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
71
        return SL
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
72
73

    @staticmethod
Hang Zhang's avatar
sync BN  
Hang Zhang committed
74
75
    def backward(ctx, gradSL):
        X, C, S, SL = ctx.saved_variables
Zhang's avatar
v0.4.2  
Zhang committed
76
77
        if X.is_cuda:
            gradX, gradC, gradS = lib.gpu.scaled_l2_backward(gradSL, X, C, S, SL)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
78
        else:
Hang Zhang's avatar
Hang Zhang committed
79
            gradX, gradC, gradS = lib.cpu.scaled_l2_backward(gradSL, X, C, S, SL)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
80
81
        return gradX, gradC, gradS

Hang Zhang's avatar
Hang Zhang committed
82
83
def scaled_l2(X, C, S):
    r""" scaled_l2 distance
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
84
85
86
87
88

    .. math::
        sl_{ik} = s_k \|x_i-c_k\|^2

    Shape:
Hang Zhang's avatar
sync BN  
Hang Zhang committed
89
90
91
92
        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
          :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
          (where :math:`B` is batch, :math:`N` is total number of features,
          :math:`K` is number is codewords, :math:`D` is feature dimensions.)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
93
94
        - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
    """
Hang Zhang's avatar
Hang Zhang committed
95
    return _scaled_l2.apply(X, C, S)
Hang Zhang's avatar
Hang Zhang committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

# Experimental
def pairwise_cosine(X, C, normalize=False):
    r"""Pairwise Cosine Similarity or Dot-product Similarity
    Shape:
        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
          :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
          (where :math:`B` is batch, :math:`N` is total number of features,
          :math:`K` is number is codewords, :math:`D` is feature dimensions.)
        - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
    """
    if normalize:
        X = F.normalize(X, dim=2, eps=1e-8)
        C = F.normalize(C, dim=1, eps=1e-8)
    return torch.matmul(X, C.t())