softmax.py 5.25 KB
Newer Older
1
2
import torch

3
from torch_scatter import scatter_add, scatter_max
4

5

6
7
def scatter_log_softmax(src, index, dim=-1, dim_size=None):
    r"""
8
9
    Numerical safe log-softmax of all values from
    the :attr:`src` tensor into :attr:`out` at the
10
11
12
13
14
15
16
    indices specified in the :attr:`index` tensor along a given axis
    :attr:`dim`.If multiple indices reference the same location, their
    **contributions average** (`cf.` :meth:`~torch_scatter.scatter_add`).

    For one-dimensional tensors, the operation computes

    .. math::
17
        \mathrm{out}_i = softmax(\mathrm{src}_i) =
18
        \mathrm{src}_i - \mathrm{logsumexp}_j ( \mathrm{src}_j)
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

    where :math:`\mathrm{logsumexp}_j` is over :math:`j` such that
    :math:`\mathrm{index}_j = i`.

    Compute a numerically safe log softmax operation
    from the :attr:`src` tensor into :attr:`out` at the indices
    specified in the :attr:`index` tensor along a given axis :attr:`dim`. For
    each value in :attr:`src`, its output index is specified by its index in
    :attr:`input` for dimensions outside of :attr:`dim` and by the
    corresponding value in :attr:`index` for dimension :attr:`dim`.

    Args:
        src (Tensor): The source tensor.
        index (LongTensor): The indices of elements to scatter.
        dim (int, optional): The axis along which to index.
            (default: :obj:`-1`)
        dim_size (int, optional): If :attr:`out` is not given, automatically
            create output with size :attr:`dim_size` at dimension :attr:`dim`.
            If :attr:`dim_size` is not given, a minimal sized output tensor is
            returned. (default: :obj:`None`)
        fill_value (int, optional): If :attr:`out` is not given, automatically
            fill output tensor with :attr:`fill_value`. If set to :obj:`None`,
            the output tensor is filled with the smallest possible value of
            :obj:`src.dtype`. (default: :obj:`None`)

    :rtype: :class:`Tensor`
    """
46
    if not torch.is_floating_point(src):
47
48
        raise ValueError('log_softmax can be computed only over '
                         'tensors with floating point data types.')
49

50
51
52
    max_value_per_index, _ = scatter_max(src, index,
                                         dim=dim,
                                         dim_size=dim_size)
53
    max_per_src_element = max_value_per_index.gather(dim, index)
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
    recentered_scores = src - max_per_src_element

    sum_per_index = scatter_add(
        src=recentered_scores.exp(),
        index=index,
        dim=dim,
        dim_size=dim_size
    )
    log_normalizing_constants = sum_per_index.log().gather(dim, index)

    return recentered_scores - log_normalizing_constants


def scatter_softmax(src, index, dim=-1, dim_size=None, epsilon=1e-16):
69
    r"""
70
71
    Numerical safe log-softmax of all values from
    the :attr:`src` tensor into :attr:`out` at the
72
73
74
75
76
77
78
    indices specified in the :attr:`index` tensor along a given axis
    :attr:`dim`. If multiple indices reference the same location, their
    **contributions average** (`cf.` :meth:`~torch_scatter.scatter_add`).

    For one-dimensional tensors, the operation computes

    .. math::
79
        \mathrm{out}_i = softmax(\mathrm{src}_i) =
80
        \frac{\exp(\mathrm{src}_i)}{\mathrm{logsumexp}_j ( \mathrm{src}_j)}
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

    where :math:`\mathrm{logsumexp}_j` is over :math:`j` such that
    :math:`\mathrm{index}_j = i`.

    Compute a numerically safe softmax operation
    from the :attr:`src` tensor into :attr:`out` at the indices
    specified in the :attr:`index` tensor along a given axis :attr:`dim`. For
    each value in :attr:`src`, its output index is specified by its index in
    :attr:`input` for dimensions outside of :attr:`dim` and by the
    corresponding value in :attr:`index` for dimension :attr:`dim`.

    Args:
        src (Tensor): The source tensor.
        index (LongTensor): The indices of elements to scatter.
        dim (int, optional): The axis along which to index.
            (default: :obj:`-1`)
        dim_size (int, optional): If :attr:`out` is not given, automatically
            create output with size :attr:`dim_size` at dimension :attr:`dim`.
            If :attr:`dim_size` is not given, a minimal sized output tensor is
            returned. (default: :obj:`None`)
        fill_value (int, optional): If :attr:`out` is not given, automatically
            fill output tensor with :attr:`fill_value`. If set to :obj:`None`,
            the output tensor is filled with the smallest possible value of
            :obj:`src.dtype`. (default: :obj:`None`)

    :rtype: :class:`Tensor`
    """
108
    if not torch.is_floating_point(src):
109
110
        raise ValueError('softmax can be computed only over '
                         'tensors with floating point data types.')
111

112
113
114
    max_value_per_index, _ = scatter_max(src, index,
                                         dim=dim,
                                         dim_size=dim_size)
115
116
117
118
119
120
121
122
123
124
125
126
127
    max_per_src_element = max_value_per_index.gather(dim, index)

    recentered_scores = src - max_per_src_element
    exped_recentered_scores = recentered_scores.exp()

    sum_per_index = scatter_add(
        src=exped_recentered_scores,
        index=index,
        dim=dim,
        dim_size=dim_size
    )
    normalizing_constant = (sum_per_index + epsilon).gather(dim, index)
    return exped_recentered_scores / normalizing_constant