run_pplm.py 28.9 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
# coding=utf-8
Rosanne Liu's avatar
Rosanne Liu committed
3
4

#Copyright (c) 2019 Uber Technologies, Inc.
Julien Chaumond's avatar
Julien Chaumond committed
5
#
Rosanne Liu's avatar
Rosanne Liu committed
6
7
8
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
Julien Chaumond's avatar
Julien Chaumond committed
9
#
Rosanne Liu's avatar
Rosanne Liu committed
10
#http://www.apache.org/licenses/LICENSE-2.0
Julien Chaumond's avatar
Julien Chaumond committed
11
#
Rosanne Liu's avatar
Rosanne Liu committed
12
13
14
15
16
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
Julien Chaumond's avatar
Julien Chaumond committed
17
18
19
20
21
22

"""
Example command with bag of words:
python examples/run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95

Example command with discriminator:
23
python examples/run_pplm.py -D sentiment --class_label 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
24
25
26
"""

import argparse
27
import json
Julien Chaumond's avatar
Julien Chaumond committed
28
29
30
31
32
33
34
35
36
37
38
39
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import trange

from transformers import GPT2Tokenizer
from transformers.file_utils import cached_path
from transformers.modeling_gpt2 import GPT2LMHeadModel
40
from pplm_classification_head import ClassificationHead
Julien Chaumond's avatar
Julien Chaumond committed
41
42
43
44
45

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
46
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
47
48
49
50
51
52
53
54
55
56
57
58
59

BAG_OF_WORDS_ARCHIVE_MAP = {
    'legal': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    'military': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    'politics': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    'religion': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    'science': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    'space': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    'technology': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
Julien Chaumond's avatar
Julien Chaumond committed
60
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
61
62
63
64
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
65
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
66
67
    },
    "sentiment": {
Julien Chaumond's avatar
Julien Chaumond committed
68
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
69
70
71
72
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
73
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
74
75
76
77
    },
}


78
79
def to_var(x, requires_grad=False, volatile=False, device='cuda'):
    if torch.cuda.is_available() and device == 'cuda':
Piero Molino's avatar
Piero Molino committed
80
        x = x.cuda()
81
82
    elif device != 'cuda':
        x = x.to(device)
Piero Molino's avatar
Piero Molino committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    return Variable(x, requires_grad=requires_grad, volatile=volatile)


def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
            return torch.where(logits < batch_mins,
                               torch.ones_like(logits) * 0.0, logits)
100
101
        return torch.where(logits < batch_mins,
                           torch.ones_like(logits) * -BIG_CONST,
Piero Molino's avatar
Piero Molino committed
102
103
104
                           logits)


105
106
107
def perturb_past(
        past,
        model,
108
        last,
109
110
111
112
113
        unpert_past=None,
        unpert_logits=None,
        accumulated_hidden=None,
        grad_norms=None,
        stepsize=0.01,
114
        one_hot_bows_vectors=None,
115
        classifier=None,
116
        class_label=None,
117
118
119
        loss_type=0,
        num_iterations=3,
        horizon_length=1,
120
        window_length=0,
121
122
        decay=False,
        gamma=1.5,
123
124
        kl_scale=0.01,
        device='cuda',
125
):
Piero Molino's avatar
Piero Molino committed
126
    # Generate inital perturbed past
127
128
129
130
    grad_accumulator = [
        (np.zeros(p.shape).astype("float32"))
        for p in past
    ]
Julien Chaumond's avatar
Julien Chaumond committed
131
132
133
134

    if accumulated_hidden is None:
        accumulated_hidden = 0

135
    if decay:
136
137
138
139
140
        decay_mask = torch.arange(
            0.,
            1.0 + SMALL_CONST,
            1.0 / (window_length)
        )[1:]
Julien Chaumond's avatar
Julien Chaumond committed
141
142
143
    else:
        decay_mask = 1.0

144
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
145
    # Generate a mask is gradient perturbated is based on a past window
146
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
147

148
149
150
151
152
153
    if curr_length > window_length and window_length > 0:
        ones_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([window_length])
                + tuple(past[0].shape[-1:])
        )
Piero Molino's avatar
Piero Molino committed
154

155
156
157
158
159
        zeros_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([curr_length - window_length])
                + tuple(past[0].shape[-1:])
        )
Julien Chaumond's avatar
Julien Chaumond committed
160
161
162
163
164

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

165
166
167
168
        window_mask = torch.cat(
            (ones_mask, torch.zeros(zeros_key_val_shape)),
            dim=-2
        ).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
169
    else:
170
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
171

172
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
173
    loss_per_iter = []
174
    new_accumulated_hidden = None
175
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
176
        print("Iteration ", i + 1)
177
        curr_perturbation = [
178
            to_var(torch.from_numpy(p_), requires_grad=True, device=device)
179
180
181
182
183
184
185
            for p_ in grad_accumulator
        ]

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
        all_logits, _, all_hidden = model(last, past=perturbed_past)
Piero Molino's avatar
Piero Molino committed
186
        hidden = all_hidden[-1]
187
188
189
190
191
192
193
        new_accumulated_hidden = accumulated_hidden + torch.sum(
            hidden,
            dim=1
        ).detach()
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
194
195
196

        loss = 0.0
        loss_list = []
197
198
199
200
201
202
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
203
204
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

205
        if loss_type == 2 or loss_type == 3:
Julien Chaumond's avatar
Julien Chaumond committed
206
            ce_loss = torch.nn.CrossEntropyLoss()
207
208
209
210
211
212
213
214
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
                _, curr_unpert_past, curr_all_hidden = model(
                    past=curr_unpert_past,
Julien Chaumond's avatar
Julien Chaumond committed
215
216
                    inputs_embeds=inputs_embeds
                )
217
                curr_hidden = curr_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
218
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(
219
                    curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
220

221
222
            prediction = classifier(new_accumulated_hidden /
                                    (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
223

224
225
            label = torch.tensor(prediction.shape[0] * [class_label],
                                 device=device,
Piero Molino's avatar
Piero Molino committed
226
                                 dtype=torch.long)
227
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
228
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
229
230
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
231

Piero Molino's avatar
Piero Molino committed
232
233
        kl_loss = 0.0
        if kl_scale > 0.0:
234
235
236
237
238
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
            unpert_probs = (
                    unpert_probs + SMALL_CONST *
                    (unpert_probs <= SMALL_CONST).float().to(device).detach()
            )
239
240
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(
                device).detach()
241
            corrected_probs = probs + correction.detach()
Rosanne Liu's avatar
Rosanne Liu committed
242
            kl_loss = kl_scale * (
243
244
245
246
                (corrected_probs * (corrected_probs / unpert_probs).log()).sum()
            )
            print(' kl_loss', kl_loss.data.cpu().numpy())
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
247
248
249
250

        loss_per_iter.append(loss.data.cpu().numpy())
        print(' pplm_loss', (loss - kl_loss).data.cpu().numpy())

251
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
252
        loss.backward()
253
254
255

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
256
257
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
258
259
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
260
        else:
261
262
263
264
            grad_norms = [
                (torch.norm(p_.grad * window_mask) + SMALL_CONST)
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
265

266
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
267
        grad = [
268
            -stepsize *
269
270
            (p_.grad * window_mask / grad_norms[
                index] ** gamma).data.cpu().numpy()
271
272
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
273

274
275
276
277
278
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
279
280
            p_.grad.data.zero_()

281
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
282
        new_past = []
283
284
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
285
286
        past = new_past

287
288
    # apply the accumulated perturbations to the past
    grad_accumulator = [
289
        to_var(torch.from_numpy(p_), requires_grad=True, device=device)
290
291
292
        for p_ in grad_accumulator
    ]
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
293

294
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
295
296
297


def get_classifier(
298
        name: Optional[str], class_label: Union[str, int],
299
        device: str
Julien Chaumond's avatar
Julien Chaumond committed
300
301
302
303
304
305
306
307
308
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
    classifier = ClassificationHead(
        class_size=params['class_size'],
        embed_size=params['embed_size']
    ).to(device)
309
310
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
311
    elif "path" in params:
312
        resolved_archive_file = params["path"]
313
314
315
    else:
        raise ValueError("Either url or path have to be specified "
                         "in the discriminator model parameters")
Piero Molino's avatar
Piero Molino committed
316
317
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
318
319
    classifier.eval()

320
321
322
    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
Julien Chaumond's avatar
Julien Chaumond committed
323
324
        else:
            label_id = params["default_class"]
325
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
326
327
328
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

329
330
331
    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
Julien Chaumond's avatar
Julien Chaumond committed
332
333
        else:
            label_id = params["default_class"]
334
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
335
336
337
338
339
340
341
342
343
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


344
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str], tokenizer) -> \
345
        List[List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
346
347
348
349
350
351
352
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
353
354
            words = f.read().strip().split("\n")
        bow_indices.append(
355
            [tokenizer.encode(word.strip(), add_prefix_space=True) for word in
Piero Molino's avatar
Piero Molino committed
356
             words])
Julien Chaumond's avatar
Julien Chaumond committed
357
358
359
    return bow_indices


360
def build_bows_one_hot_vectors(bow_indices, tokenizer, device='cuda'):
Julien Chaumond's avatar
Julien Chaumond committed
361
362
363
364
365
366
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
367
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
368
        num_words = single_bow.shape[0]
369
        one_hot_bow = torch.zeros(num_words, tokenizer.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
370
371
372
373
374
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


375
def full_text_generation(
376
        model,
377
        tokenizer,
378
379
380
        context=None,
        num_samples=1,
        device="cuda",
381
        bag_of_words=None,
382
        discrim=None,
383
        class_label=None,
384
385
386
387
        length=100,
        stepsize=0.02,
        temperature=1.0,
        top_k=10,
388
389
390
        sample=False,
        num_iterations=3,
        grad_length=10000,
391
        horizon_length=1,
392
        window_length=0,
393
394
        decay=False,
        gamma=1.5,
395
396
        gm_scale=0.9,
        kl_scale=0.01,
397
398
        **kwargs
):
Julien Chaumond's avatar
Julien Chaumond committed
399
    classifier, class_id = get_classifier(
400
        discrim,
401
        class_label,
Julien Chaumond's avatar
Julien Chaumond committed
402
403
404
        device
    )

405
406
    bow_indices = []
    if bag_of_words:
407
408
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"),
                                               tokenizer)
Piero Molino's avatar
Piero Molino committed
409

410
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
411
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
412
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
413

414
415
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
416
417
418
        print("Using PPLM-BoW")

    elif classifier is not None:
419
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
420
421
422
        print("Using PPLM-Discrim")

    else:
423
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
424

425
    unpert_gen_tok_text, _, _ = generate_text_pplm(
426
        model=model,
427
        tokenizer=tokenizer,
428
429
430
        context=context,
        device=device,
        length=length,
431
        sample=sample,
432
433
        perturb=False
    )
434
435
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
436

437
438
439
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
440

441
    for i in range(num_samples):
442
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
443
            model=model,
444
            tokenizer=tokenizer,
445
446
447
448
449
            context=context,
            device=device,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
450
            class_label=class_id,
451
452
453
454
455
            loss_type=loss_type,
            length=length,
            stepsize=stepsize,
            temperature=temperature,
            top_k=top_k,
456
457
458
            sample=sample,
            num_iterations=num_iterations,
            grad_length=grad_length,
459
            horizon_length=horizon_length,
460
            window_length=window_length,
461
462
            decay=decay,
            gamma=gamma,
463
464
            gm_scale=gm_scale,
            kl_scale=kl_scale,
465
        )
466
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
467
        if classifier is not None:
468
469
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
470

471
472
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
473

474
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
475

476
477
478

def generate_text_pplm(
        model,
479
        tokenizer,
480
481
482
483
        context=None,
        past=None,
        device="cuda",
        perturb=True,
484
        bow_indices=None,
485
        classifier=None,
486
        class_label=None,
487
488
489
490
491
        loss_type=0,
        length=100,
        stepsize=0.02,
        temperature=1.0,
        top_k=10,
492
493
494
        sample=False,
        num_iterations=3,
        grad_length=10000,
495
        horizon_length=1,
496
        window_length=0,
497
498
        decay=False,
        gamma=1.5,
499
500
        gm_scale=0.9,
        kl_scale=0.01,
501
):
502
503
504
505
506
507
    output_so_far = None
    if context:
        context_t = torch.tensor(context, device=device, dtype=torch.long)
        while len(context_t.shape) < 2:
            context_t = context_t.unsqueeze(0)
        output_so_far = context_t
Julien Chaumond's avatar
Julien Chaumond committed
508

509
    # collect one hot vectors for bags of words
510
511
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, tokenizer,
                                                      device)
512

Julien Chaumond's avatar
Julien Chaumond committed
513
    grad_norms = None
514
    last = None
515
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
516
    loss_in_time = []
517
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
518
519

        # Get past/probs for current output, except for last word
520
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
521

522
523
524
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
525
526
            if output_so_far.shape[1] > 1:
                _, past, _ = model(output_so_far[:, :-1])
Piero Molino's avatar
Piero Molino committed
527

528
529
        unpert_logits, unpert_past, unpert_all_hidden = model(output_so_far)
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
530

531
        # check if we are abowe grad max length
532
533
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
534
        else:
535
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
536

537
        # modify the past if necessary
538
        if not perturb or num_iterations == 0:
539
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
540
541

        else:
542
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
543
544
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

545
546
547
548
549
550
551
552
553
554
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
555
                    one_hot_bows_vectors=one_hot_bows_vectors,
556
                    classifier=classifier,
557
                    class_label=class_label,
558
559
560
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    horizon_length=horizon_length,
561
                    window_length=window_length,
562
563
                    decay=decay,
                    gamma=gamma,
564
565
                    kl_scale=kl_scale,
                    device=device,
566
567
568
569
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
570

571
572
573
        pert_logits, past, pert_all_hidden = model(last, past=pert_past)
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
        pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
574
575

        if classifier is not None:
Piero Molino's avatar
Piero Molino committed
576
            ce_loss = torch.nn.CrossEntropyLoss()
577
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
578
            label = torch.tensor([class_label], device=device,
Piero Molino's avatar
Piero Molino committed
579
                                 dtype=torch.long)
580
581
582
583
584
            unpert_discrim_loss = ce_loss(prediction, label)
            print(
                "unperturbed discrim loss",
                unpert_discrim_loss.data.cpu().numpy()
            )
Julien Chaumond's avatar
Julien Chaumond committed
585
        else:
586
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
587
588

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
589
590
        if perturb:

591
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
592

593
594
595
            pert_probs = ((pert_probs ** gm_scale) * (
                    unpert_probs ** (1 - gm_scale)))  # + SMALL_CONST
            pert_probs = top_k_filter(pert_probs, k=top_k,
596
                                      probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
597

598
599
600
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
601
602

        else:
603
604
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
            pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
605

606
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
607
        if sample:
608
609
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
610
        else:
611
612
613
614
615
616
617
618
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
        output_so_far = (
            last if output_so_far is None
            else torch.cat((output_so_far, last), dim=1)
        )

619
        print(tokenizer.decode(output_so_far.tolist()[0]))
620
621

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
622
623


624
625
626
627
628
629
630
631
632
633
634
635
636
637
def set_generic_model_params(discrim_weights, discrim_meta):
    if discrim_weights is None:
        raise ValueError('When using a generic discriminator, '
                         'discrim_weights need to be specified')
    if discrim_meta is None:
        raise ValueError('When using a generic discriminator, '
                         'discrim_meta need to be specified')

    with open(discrim_meta, 'r') as discrim_meta_file:
        meta = json.load(discrim_meta_file)
    meta['path'] = discrim_weights
    DISCRIMINATOR_MODELS_PARAMS['generic'] = meta


638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
def run_pplm_example(
        pretrained_model="gpt2-medium",
        cond_text="",
        uncond=False,
        num_samples=1,
        bag_of_words=None,
        discrim=None,
        discrim_weights=None,
        discrim_meta=None,
        class_label=-1,
        length=100,
        stepsize=0.02,
        temperature=1.0,
        top_k=10,
        sample=False,
        num_iterations=3,
        grad_length=10000,
        horizon_length=1,
        window_length=0,
        decay=False,
        gamma=1.5,
        gm_scale=0.9,
        kl_scale=0.01,
        seed=0,
        no_cuda=False,
        colorama=False
):
665
    # set Random seed
666
667
    torch.manual_seed(seed)
    np.random.seed(seed)
Julien Chaumond's avatar
Julien Chaumond committed
668

669
    # set the device
670
671
672
673
    device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu"

    if discrim == 'generic':
        set_generic_model_params(discrim_weights, discrim_meta)
Julien Chaumond's avatar
Julien Chaumond committed
674

675
676
677
678
    if discrim is not None:
        pretrained_model = DISCRIMINATOR_MODELS_PARAMS[discrim][
            "pretrained_model"
        ]
679
        print("discrim = {}, pretrained_model set "
680
              "to discriminator's = {}".format(discrim, pretrained_model))
681

682
    # load pretrained model
Julien Chaumond's avatar
Julien Chaumond committed
683
    model = GPT2LMHeadModel.from_pretrained(
684
        pretrained_model,
Julien Chaumond's avatar
Julien Chaumond committed
685
686
687
688
689
        output_hidden_states=True
    )
    model.to(device)
    model.eval()

690
691
692
    # load tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)

Piero Molino's avatar
Piero Molino committed
693
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
694
695
696
    for param in model.parameters():
        param.requires_grad = False

697
    # figure out conditioning text
698
699
700
    if uncond:
        tokenized_cond_text = tokenizer.encode(
            [tokenizer.bos_token]
701
        )
Julien Chaumond's avatar
Julien Chaumond committed
702
    else:
703
        raw_text = cond_text
Julien Chaumond's avatar
Julien Chaumond committed
704
        while not raw_text:
705
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
706
            raw_text = input("Model prompt >>> ")
707
        tokenized_cond_text = tokenizer.encode(tokenizer.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
708

709
    print("= Prefix of sentence =")
710
    print(tokenizer.decode(tokenized_cond_text))
711
    print()
Piero Molino's avatar
Piero Molino committed
712

713
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
714

715
716
717
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
        model=model,
        tokenizer=tokenizer,
        context=tokenized_cond_text,
        device=device,
        num_samples=num_samples,
        bag_of_words=bag_of_words,
        discrim=discrim,
        class_label=class_label,
        length=length,
        stepsize=stepsize,
        temperature=temperature,
        top_k=top_k,
        sample=sample,
        num_iterations=num_iterations,
        grad_length=grad_length,
        horizon_length=horizon_length,
        window_length=window_length,
        decay=decay,
        gamma=gamma,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
739
740
741
    )

    # untokenize unperturbed text
742
    unpert_gen_text = tokenizer.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
743

744
745
746
747
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
748

749
750
    generated_texts = []

751
    bow_word_ids = set()
752
753
754
    if bag_of_words and colorama:
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"),
                                               tokenizer)
755
756
757
758
759
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
760
761
762
763
764

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
765
            if colorama:
Piero Molino's avatar
Piero Molino committed
766
767
                import colorama

768
769
                pert_gen_text = ''
                for word_id in pert_gen_tok_text.tolist()[0]:
770
                    if word_id in bow_word_ids:
771
772
                        pert_gen_text += '{}{}{}'.format(
                            colorama.Fore.RED,
773
                            tokenizer.decode([word_id]),
774
775
                            colorama.Style.RESET_ALL
                        )
Piero Molino's avatar
Piero Molino committed
776
                    else:
777
                        pert_gen_text += tokenizer.decode([word_id])
Piero Molino's avatar
Piero Molino committed
778
            else:
779
                pert_gen_text = tokenizer.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
780

781
782
783
784
785
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
        except:
            pass
Julien Chaumond's avatar
Julien Chaumond committed
786

787
788
789
790
        # keep the prefix, perturbed seq, original seq for each index
        generated_texts.append(
            (tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text)
        )
Julien Chaumond's avatar
Julien Chaumond committed
791

Piero Molino's avatar
Piero Molino committed
792
    return
Julien Chaumond's avatar
Julien Chaumond committed
793
794


Piero Molino's avatar
Piero Molino committed
795
if __name__ == '__main__':
796
797
798
799
800
801
802
803
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--pretrained_model",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
804
805
806
807
808
809
810
811
812
813
814
815
816
817
    parser.add_argument(
        "--cond_text", type=str, default="The lake",
        help="Prefix texts to condition on"
    )
    parser.add_argument(
        "--uncond", action="store_true",
        help="Generate from end-of-text as prefix"
    )
    parser.add_argument(
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
    )
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
        help="Bags of words used for PPLM-BoW. "
             "Either a BOW id (see list in code) or a filepath. "
             "Multiple BoWs separated by ;",
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
        choices=("clickbait", "sentiment", "toxicity", "generic"),
        help="Discriminator to use",
    )
    parser.add_argument('--discrim_weights', type=str, default=None,
                        help='Weights for the generic discriminator')
    parser.add_argument('--discrim_meta', type=str, default=None,
                        help='Meta information for the generic discriminator')
    parser.add_argument(
        "--class_label",
        type=int,
        default=-1,
        help="Class label used for the discriminator",
    )
    parser.add_argument("--length", type=int, default=100)
846
    parser.add_argument("--stepsize", type=float, default=0.02)
847
848
849
850
851
852
853
854
855
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
    parser.add_argument(
        "--sample", action="store_true",
        help="Generate from end-of-text as prefix"
    )
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
856
        "--window_length",
857
        type=int,
858
859
860
        default=0,
        help="Length of past which is being optimized; "
             "0 corresponds to infinite window length",
861
862
863
864
865
866
867
868
869
870
    )
    parser.add_argument(
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
    )
    parser.add_argument("--decay", action="store_true",
                        help="whether to decay or not")
    parser.add_argument("--gamma", type=float, default=1.5)
871
872
873
874
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
875
876
877
878
879
    parser.add_argument("--colorama", action="store_true",
                        help="colors keywords")

    args = parser.parse_args()
    run_pplm_example(**vars(args))