sampling.py 3.23 KB
Newer Older
Jared Casper's avatar
Jared Casper committed
1
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
mshoeybi's avatar
mshoeybi committed
2

mshoeybi's avatar
mshoeybi committed
3
"""Sampling utilities.
mshoeybi's avatar
mshoeybi committed
4
5
6
7
Part of this code is inspired by:
 - https://github.com/ari-holtzman/degen/blob/master/gen.py
 - https://huggingface.co/transformers/_modules/transformers/generation_logits_process.html
"""
mshoeybi's avatar
mshoeybi committed
8
9
10
11
12


import torch


mshoeybi's avatar
mshoeybi committed
13
14
15

def modify_logits_for_top_k_filtering(logits, top_k):
    """Set the logits for none top-k values to -inf."""
mshoeybi's avatar
mshoeybi committed
16
17
18
19
20
21

    filter_ = logits < torch.topk(logits, top_k)[0][..., -1, None]
    logits.masked_fill_(filter_, float('-Inf'))



mshoeybi's avatar
mshoeybi committed
22
23
def modify_logits_for_top_p_filtering(logits, top_p):
    """Set the logits for none top-p values to -inf."""
mshoeybi's avatar
mshoeybi committed
24
25
26
27
28
29
30

    # First sort and calculate cumulative sum of probabilities.
    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
    cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1)

    # Filteration based on the cumulative sum.
    filter_ = cumulative_probs > top_p
mshoeybi's avatar
mshoeybi committed
31
32
33
34
35
    # This shift by 1 is weird and I cannot justify it. This existed
    # in the original implementation:
    #   https://github.com/ari-holtzman/degen/blob/master/gen.py
    # and I guess it is needed so keeping it for now.
    filter_[:, 1:] = filter_[:, :-1].clone()
mshoeybi's avatar
mshoeybi committed
36
37
38
39
40
41
42
43
    # Make sure we at least have one token to select from.
    filter_[..., 0] = 0

    # Fill in the filtered part
    filter_ = filter_.scatter(1, sorted_indices, filter_)
    logits.masked_fill_(filter_, float('-Inf'))


mshoeybi's avatar
mshoeybi committed
44

mshoeybi's avatar
mshoeybi committed
45
def sample(logits, top_k=0, top_p=0.0, temperature=1.0, vocab_size=None):
mshoeybi's avatar
mshoeybi committed
46
47
48
    """ Sample and generate a token.
    Note: logits has the dimension [b, v] where b is the batch size
          and v is the vocabulary size.
mshoeybi's avatar
mshoeybi committed
49
50
51
52
    If vocab_size is provided, we will make sure the sample that is
    generated is in [0, vocab-size). This will avoid out of vocabulary
    generations due to padding.
    """
mshoeybi's avatar
mshoeybi committed
53
54

    # Check logits for consistency.
mshoeybi's avatar
mshoeybi committed
55
    assert logits.ndim == 2, 'expected the logits to be of [b, v] shape.'
mshoeybi's avatar
mshoeybi committed
56
57
    assert logits.type() == 'torch.cuda.FloatTensor', \
        'input logits should be floats.'
mshoeybi's avatar
mshoeybi committed
58

mshoeybi's avatar
mshoeybi committed
59

mshoeybi's avatar
mshoeybi committed
60
    # Greedy is just simple argmax.
mshoeybi's avatar
mshoeybi committed
61
    if top_k == 1:
mshoeybi's avatar
mshoeybi committed
62
63
64
65
66
        assert top_p == 0.0, 'cannot set both greedy and top-p samplings.'
        samples = torch.argmax(logits, dim=-1)

    # Top-k or top-p sampling.
    else:
mshoeybi's avatar
mshoeybi committed
67
68
        # Clone so we do not modify the inputs,
        logits = logits.clone()
mshoeybi's avatar
mshoeybi committed
69
        # Apply temperature in place.
mshoeybi's avatar
mshoeybi committed
70
71
        if temperature != 1.0:
            logits.div_(temperature)
mshoeybi's avatar
mshoeybi committed
72

mshoeybi's avatar
mshoeybi committed
73
        if top_k > 1:
mshoeybi's avatar
mshoeybi committed
74
75
76
77
            assert top_p == 0.0, 'cannot set both top-k and top-p samplings.'
            assert top_k <= logits.size(1), 'top-k is larger than logit size.'
            if vocab_size:
                assert top_k < vocab_size, 'top-k is larger than vocab size.'
mshoeybi's avatar
mshoeybi committed
78
            modify_logits_for_top_k_filtering(logits, top_k)
mshoeybi's avatar
mshoeybi committed
79

mshoeybi's avatar
mshoeybi committed
80
81
82
        elif top_p > 0.0:
            assert top_p <= 1.0, 'top-p should be in (0, 1].'
            modify_logits_for_top_p_filtering(logits, top_p)
mshoeybi's avatar
mshoeybi committed
83
84

        # After filtering, we need to recalculate the distribution.
mshoeybi's avatar
mshoeybi committed
85
86
        probs = logits.softmax(dim=-1)
        samples = torch.multinomial(probs, num_samples=1).view(-1)
mshoeybi's avatar
mshoeybi committed
87
88
89
90
91
92

    # If vocab size is provided, make sure the samples are in
    # in the range [0, vocab-size).
    if vocab_size:
        samples = torch.clamp(samples, min=0, max=(vocab_size - 1))

mshoeybi's avatar
mshoeybi committed
93
    return samples