run_pplm.py 29.5 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# coding=utf-8
# Copyright 2018 The Uber AI Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Example command with bag of words:
python examples/run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95

Example command with discriminator:
22
python examples/run_pplm.py -D sentiment --class_label 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
23
24
25
"""

import argparse
26
import json
Julien Chaumond's avatar
Julien Chaumond committed
27
28
29
30
31
32
33
34
35
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import trange

36
from examples.run_pplm_discrim_train import ClassificationHead
Julien Chaumond's avatar
Julien Chaumond committed
37
38
39
40
41
42
43
44
from transformers import GPT2Tokenizer
from transformers.file_utils import cached_path
from transformers.modeling_gpt2 import GPT2LMHeadModel

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
45
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

BAG_OF_WORDS_ARCHIVE_MAP = {
    'kitchen': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/kitchen.txt",
    'legal': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    'military': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    'monsters': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/monsters.txt",
    'politics': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    'positive_words': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/positive_words.txt",
    'religion': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    'science': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    'space': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    'technology': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
Julien Chaumond's avatar
Julien Chaumond committed
62
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
63
64
65
66
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
67
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
68
69
    },
    "sentiment": {
Julien Chaumond's avatar
Julien Chaumond committed
70
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
71
72
73
74
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
75
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
76
77
    },
    "toxicity": {
Julien Chaumond's avatar
Julien Chaumond committed
78
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/toxic_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
79
80
81
82
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_toxic": 0, "toxic": 1},
        "default_class": 0,
83
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
84
85
86
87
    },
}


88
89
def to_var(x, requires_grad=False, volatile=False, device='cuda'):
    if torch.cuda.is_available() and device == 'cuda':
Piero Molino's avatar
Piero Molino committed
90
        x = x.cuda()
91
92
    elif device != 'cuda':
        x = x.to(device)
Piero Molino's avatar
Piero Molino committed
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
    return Variable(x, requires_grad=requires_grad, volatile=volatile)


def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
            return torch.where(logits < batch_mins,
                               torch.ones_like(logits) * 0.0, logits)
110
111
        return torch.where(logits < batch_mins,
                           torch.ones_like(logits) * -BIG_CONST,
Piero Molino's avatar
Piero Molino committed
112
113
114
                           logits)


115
116
117
def perturb_past(
        past,
        model,
118
        last,
119
120
121
122
123
        unpert_past=None,
        unpert_logits=None,
        accumulated_hidden=None,
        grad_norms=None,
        stepsize=0.01,
124
        one_hot_bows_vectors=None,
125
        classifier=None,
126
        class_label=None,
127
128
129
        loss_type=0,
        num_iterations=3,
        horizon_length=1,
130
        window_length=0,
131
132
        decay=False,
        gamma=1.5,
133
134
        kl_scale=0.01,
        device='cuda',
135
):
Piero Molino's avatar
Piero Molino committed
136
    # Generate inital perturbed past
137
138
139
140
    grad_accumulator = [
        (np.zeros(p.shape).astype("float32"))
        for p in past
    ]
Julien Chaumond's avatar
Julien Chaumond committed
141
142
143
144

    if accumulated_hidden is None:
        accumulated_hidden = 0

145
    if decay:
146
147
148
149
150
        decay_mask = torch.arange(
            0.,
            1.0 + SMALL_CONST,
            1.0 / (window_length)
        )[1:]
Julien Chaumond's avatar
Julien Chaumond committed
151
152
153
    else:
        decay_mask = 1.0

154
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
155
    # Generate a mask is gradient perturbated is based on a past window
156
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
157

158
159
160
161
162
163
    if curr_length > window_length and window_length > 0:
        ones_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([window_length])
                + tuple(past[0].shape[-1:])
        )
Piero Molino's avatar
Piero Molino committed
164

165
166
167
168
169
        zeros_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([curr_length - window_length])
                + tuple(past[0].shape[-1:])
        )
Julien Chaumond's avatar
Julien Chaumond committed
170
171
172
173
174

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

175
176
177
178
        window_mask = torch.cat(
            (ones_mask, torch.zeros(zeros_key_val_shape)),
            dim=-2
        ).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
179
    else:
180
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
181

182
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
183
    loss_per_iter = []
184
    new_accumulated_hidden = None
185
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
186
        print("Iteration ", i + 1)
187
        curr_perturbation = [
188
            to_var(torch.from_numpy(p_), requires_grad=True, device=device)
189
190
191
192
193
194
195
            for p_ in grad_accumulator
        ]

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
        all_logits, _, all_hidden = model(last, past=perturbed_past)
Piero Molino's avatar
Piero Molino committed
196
        hidden = all_hidden[-1]
197
198
199
200
201
202
203
        new_accumulated_hidden = accumulated_hidden + torch.sum(
            hidden,
            dim=1
        ).detach()
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
204
205
206

        loss = 0.0
        loss_list = []
207
208
209
210
211
212
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
213
214
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

215
        if loss_type == 2 or loss_type == 3:
Julien Chaumond's avatar
Julien Chaumond committed
216
            ce_loss = torch.nn.CrossEntropyLoss()
217
218
219
220
221
222
223
224
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
                _, curr_unpert_past, curr_all_hidden = model(
                    past=curr_unpert_past,
Julien Chaumond's avatar
Julien Chaumond committed
225
226
                    inputs_embeds=inputs_embeds
                )
227
                curr_hidden = curr_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
228
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(
229
                    curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
230

231
232
            prediction = classifier(new_accumulated_hidden /
                                    (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
233

234
235
            label = torch.tensor(prediction.shape[0] * [class_label],
                                 device=device,
Piero Molino's avatar
Piero Molino committed
236
                                 dtype=torch.long)
237
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
238
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
239
240
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
241

Piero Molino's avatar
Piero Molino committed
242
243
        kl_loss = 0.0
        if kl_scale > 0.0:
244
245
246
247
248
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
            unpert_probs = (
                    unpert_probs + SMALL_CONST *
                    (unpert_probs <= SMALL_CONST).float().to(device).detach()
            )
249
250
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(
                device).detach()
251
            corrected_probs = probs + correction.detach()
Rosanne Liu's avatar
Rosanne Liu committed
252
            kl_loss = kl_scale * (
253
254
255
256
                (corrected_probs * (corrected_probs / unpert_probs).log()).sum()
            )
            print(' kl_loss', kl_loss.data.cpu().numpy())
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
257
258
259
260

        loss_per_iter.append(loss.data.cpu().numpy())
        print(' pplm_loss', (loss - kl_loss).data.cpu().numpy())

261
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
262
        loss.backward()
263
264
265

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
266
267
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
268
269
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
270
        else:
271
272
273
274
            grad_norms = [
                (torch.norm(p_.grad * window_mask) + SMALL_CONST)
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
275

276
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
277
        grad = [
278
            -stepsize *
279
280
            (p_.grad * window_mask / grad_norms[
                index] ** gamma).data.cpu().numpy()
281
282
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
283

284
285
286
287
288
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
289
290
            p_.grad.data.zero_()

291
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
292
        new_past = []
293
294
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
295
296
        past = new_past

297
298
    # apply the accumulated perturbations to the past
    grad_accumulator = [
299
        to_var(torch.from_numpy(p_), requires_grad=True, device=device)
300
301
302
        for p_ in grad_accumulator
    ]
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
303

304
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
305
306
307


def get_classifier(
308
        name: Optional[str], class_label: Union[str, int],
309
        device: str
Julien Chaumond's avatar
Julien Chaumond committed
310
311
312
313
314
315
316
317
318
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
    classifier = ClassificationHead(
        class_size=params['class_size'],
        embed_size=params['embed_size']
    ).to(device)
319
320
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
321
    elif "path" in params:
322
        resolved_archive_file = params["path"]
323
324
325
    else:
        raise ValueError("Either url or path have to be specified "
                         "in the discriminator model parameters")
Piero Molino's avatar
Piero Molino committed
326
327
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
328
329
    classifier.eval()

330
331
332
    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
Julien Chaumond's avatar
Julien Chaumond committed
333
334
        else:
            label_id = params["default_class"]
335
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
336
337
338
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

339
340
341
    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
Julien Chaumond's avatar
Julien Chaumond committed
342
343
        else:
            label_id = params["default_class"]
344
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
345
346
347
348
349
350
351
352
353
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


354
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str], tokenizer) -> \
355
        List[List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
356
357
358
359
360
361
362
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
363
364
            words = f.read().strip().split("\n")
        bow_indices.append(
365
            [tokenizer.encode(word.strip(), add_prefix_space=True) for word in
Piero Molino's avatar
Piero Molino committed
366
             words])
Julien Chaumond's avatar
Julien Chaumond committed
367
368
369
    return bow_indices


370
def build_bows_one_hot_vectors(bow_indices, tokenizer, device='cuda'):
Julien Chaumond's avatar
Julien Chaumond committed
371
372
373
374
375
376
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
377
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
378
        num_words = single_bow.shape[0]
379
        one_hot_bow = torch.zeros(num_words, tokenizer.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
380
381
382
383
384
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


385
def full_text_generation(
386
        model,
387
        tokenizer,
388
389
390
        context=None,
        num_samples=1,
        device="cuda",
391
        bag_of_words=None,
392
        discrim=None,
393
        class_label=None,
394
395
396
397
        length=100,
        stepsize=0.02,
        temperature=1.0,
        top_k=10,
398
399
400
        sample=False,
        num_iterations=3,
        grad_length=10000,
401
        horizon_length=1,
402
        window_length=0,
403
404
        decay=False,
        gamma=1.5,
405
406
        gm_scale=0.9,
        kl_scale=0.01,
407
408
        **kwargs
):
Julien Chaumond's avatar
Julien Chaumond committed
409
    classifier, class_id = get_classifier(
410
        discrim,
411
        class_label,
Julien Chaumond's avatar
Julien Chaumond committed
412
413
414
        device
    )

415
416
    bow_indices = []
    if bag_of_words:
417
418
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"),
                                               tokenizer)
Piero Molino's avatar
Piero Molino committed
419

420
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
421
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
422
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
423

424
425
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
426
427
428
        print("Using PPLM-BoW")

    elif classifier is not None:
429
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
430
431
432
        print("Using PPLM-Discrim")

    else:
433
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
434

435
    unpert_gen_tok_text, _, _ = generate_text_pplm(
436
        model=model,
437
        tokenizer=tokenizer,
438
439
440
        context=context,
        device=device,
        length=length,
441
        sample=sample,
442
443
        perturb=False
    )
444
445
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
446

447
448
449
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
450

451
    for i in range(num_samples):
452
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
453
            model=model,
454
            tokenizer=tokenizer,
455
456
457
458
459
            context=context,
            device=device,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
460
            class_label=class_id,
461
462
463
464
465
            loss_type=loss_type,
            length=length,
            stepsize=stepsize,
            temperature=temperature,
            top_k=top_k,
466
467
468
            sample=sample,
            num_iterations=num_iterations,
            grad_length=grad_length,
469
            horizon_length=horizon_length,
470
            window_length=window_length,
471
472
            decay=decay,
            gamma=gamma,
473
474
            gm_scale=gm_scale,
            kl_scale=kl_scale,
475
        )
476
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
477
        if classifier is not None:
478
479
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
480

481
482
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
483

484
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
485

486
487
488

def generate_text_pplm(
        model,
489
        tokenizer,
490
491
492
493
        context=None,
        past=None,
        device="cuda",
        perturb=True,
494
        bow_indices=None,
495
        classifier=None,
496
        class_label=None,
497
498
499
500
501
        loss_type=0,
        length=100,
        stepsize=0.02,
        temperature=1.0,
        top_k=10,
502
503
504
        sample=False,
        num_iterations=3,
        grad_length=10000,
505
        horizon_length=1,
506
        window_length=0,
507
508
        decay=False,
        gamma=1.5,
509
510
        gm_scale=0.9,
        kl_scale=0.01,
511
):
512
513
514
515
516
517
    output_so_far = None
    if context:
        context_t = torch.tensor(context, device=device, dtype=torch.long)
        while len(context_t.shape) < 2:
            context_t = context_t.unsqueeze(0)
        output_so_far = context_t
Julien Chaumond's avatar
Julien Chaumond committed
518

519
    # collect one hot vectors for bags of words
520
521
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, tokenizer,
                                                      device)
522

Julien Chaumond's avatar
Julien Chaumond committed
523
    grad_norms = None
524
    last = None
525
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
526
    loss_in_time = []
527
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
528
529

        # Get past/probs for current output, except for last word
530
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
531

532
533
534
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
535
536
            if output_so_far.shape[1] > 1:
                _, past, _ = model(output_so_far[:, :-1])
Piero Molino's avatar
Piero Molino committed
537

538
539
        unpert_logits, unpert_past, unpert_all_hidden = model(output_so_far)
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
540

541
        # check if we are abowe grad max length
542
543
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
544
        else:
545
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
546

547
        # modify the past if necessary
548
        if not perturb or num_iterations == 0:
549
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
550
551

        else:
552
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
553
554
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

555
556
557
558
559
560
561
562
563
564
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
565
                    one_hot_bows_vectors=one_hot_bows_vectors,
566
                    classifier=classifier,
567
                    class_label=class_label,
568
569
570
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    horizon_length=horizon_length,
571
                    window_length=window_length,
572
573
                    decay=decay,
                    gamma=gamma,
574
575
                    kl_scale=kl_scale,
                    device=device,
576
577
578
579
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
580

581
582
583
        pert_logits, past, pert_all_hidden = model(last, past=pert_past)
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
        pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
584
585

        if classifier is not None:
Piero Molino's avatar
Piero Molino committed
586
            ce_loss = torch.nn.CrossEntropyLoss()
587
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
588
            label = torch.tensor([class_label], device=device,
Piero Molino's avatar
Piero Molino committed
589
                                 dtype=torch.long)
590
591
592
593
594
            unpert_discrim_loss = ce_loss(prediction, label)
            print(
                "unperturbed discrim loss",
                unpert_discrim_loss.data.cpu().numpy()
            )
Julien Chaumond's avatar
Julien Chaumond committed
595
        else:
596
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
597
598

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
599
600
        if perturb:

601
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
602

603
604
605
            pert_probs = ((pert_probs ** gm_scale) * (
                    unpert_probs ** (1 - gm_scale)))  # + SMALL_CONST
            pert_probs = top_k_filter(pert_probs, k=top_k,
606
                                      probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
607

608
609
610
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
611
612

        else:
613
614
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
            pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
615

616
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
617
        if sample:
618
619
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
620
        else:
621
622
623
624
625
626
627
628
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
        output_so_far = (
            last if output_so_far is None
            else torch.cat((output_so_far, last), dim=1)
        )

629
        print(tokenizer.decode(output_so_far.tolist()[0]))
630
631

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
632
633


634
635
636
637
638
639
640
641
642
643
644
645
646
647
def set_generic_model_params(discrim_weights, discrim_meta):
    if discrim_weights is None:
        raise ValueError('When using a generic discriminator, '
                         'discrim_weights need to be specified')
    if discrim_meta is None:
        raise ValueError('When using a generic discriminator, '
                         'discrim_meta need to be specified')

    with open(discrim_meta, 'r') as discrim_meta_file:
        meta = json.load(discrim_meta_file)
    meta['path'] = discrim_weights
    DISCRIMINATOR_MODELS_PARAMS['generic'] = meta


648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
def run_pplm_example(
        pretrained_model="gpt2-medium",
        cond_text="",
        uncond=False,
        num_samples=1,
        bag_of_words=None,
        discrim=None,
        discrim_weights=None,
        discrim_meta=None,
        class_label=-1,
        length=100,
        stepsize=0.02,
        temperature=1.0,
        top_k=10,
        sample=False,
        num_iterations=3,
        grad_length=10000,
        horizon_length=1,
        window_length=0,
        decay=False,
        gamma=1.5,
        gm_scale=0.9,
        kl_scale=0.01,
        seed=0,
        no_cuda=False,
        colorama=False
):
675
    # set Random seed
676
677
    torch.manual_seed(seed)
    np.random.seed(seed)
Julien Chaumond's avatar
Julien Chaumond committed
678

679
    # set the device
680
681
682
683
    device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu"

    if discrim == 'generic':
        set_generic_model_params(discrim_weights, discrim_meta)
Julien Chaumond's avatar
Julien Chaumond committed
684

685
686
687
688
    if discrim is not None:
        pretrained_model = DISCRIMINATOR_MODELS_PARAMS[discrim][
            "pretrained_model"
        ]
689
        print("discrim = {}, pretrained_model set "
690
              "to discriminator's = {}".format(discrim, pretrained_model))
691

692
    # load pretrained model
Julien Chaumond's avatar
Julien Chaumond committed
693
    model = GPT2LMHeadModel.from_pretrained(
694
        pretrained_model,
Julien Chaumond's avatar
Julien Chaumond committed
695
696
697
698
699
        output_hidden_states=True
    )
    model.to(device)
    model.eval()

700
701
702
    # load tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)

Piero Molino's avatar
Piero Molino committed
703
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
704
705
706
    for param in model.parameters():
        param.requires_grad = False

707
    # figure out conditioning text
708
709
710
    if uncond:
        tokenized_cond_text = tokenizer.encode(
            [tokenizer.bos_token]
711
        )
Julien Chaumond's avatar
Julien Chaumond committed
712
    else:
713
        raw_text = cond_text
Julien Chaumond's avatar
Julien Chaumond committed
714
        while not raw_text:
715
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
716
            raw_text = input("Model prompt >>> ")
717
        tokenized_cond_text = tokenizer.encode(tokenizer.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
718

719
    print("= Prefix of sentence =")
720
    print(tokenizer.decode(tokenized_cond_text))
721
    print()
Piero Molino's avatar
Piero Molino committed
722

723
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
724

725
726
727
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
        model=model,
        tokenizer=tokenizer,
        context=tokenized_cond_text,
        device=device,
        num_samples=num_samples,
        bag_of_words=bag_of_words,
        discrim=discrim,
        class_label=class_label,
        length=length,
        stepsize=stepsize,
        temperature=temperature,
        top_k=top_k,
        sample=sample,
        num_iterations=num_iterations,
        grad_length=grad_length,
        horizon_length=horizon_length,
        window_length=window_length,
        decay=decay,
        gamma=gamma,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
749
750
751
    )

    # untokenize unperturbed text
752
    unpert_gen_text = tokenizer.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
753

754
755
756
757
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
758

759
760
    generated_texts = []

761
    bow_word_ids = set()
762
763
764
    if bag_of_words and colorama:
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"),
                                               tokenizer)
765
766
767
768
769
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
770
771
772
773
774

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
775
            if colorama:
Piero Molino's avatar
Piero Molino committed
776
777
                import colorama

778
779
                pert_gen_text = ''
                for word_id in pert_gen_tok_text.tolist()[0]:
780
                    if word_id in bow_word_ids:
781
782
                        pert_gen_text += '{}{}{}'.format(
                            colorama.Fore.RED,
783
                            tokenizer.decode([word_id]),
784
785
                            colorama.Style.RESET_ALL
                        )
Piero Molino's avatar
Piero Molino committed
786
                    else:
787
                        pert_gen_text += tokenizer.decode([word_id])
Piero Molino's avatar
Piero Molino committed
788
            else:
789
                pert_gen_text = tokenizer.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
790

791
792
793
794
795
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
        except:
            pass
Julien Chaumond's avatar
Julien Chaumond committed
796

797
798
799
800
        # keep the prefix, perturbed seq, original seq for each index
        generated_texts.append(
            (tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text)
        )
Julien Chaumond's avatar
Julien Chaumond committed
801

Piero Molino's avatar
Piero Molino committed
802
    return
Julien Chaumond's avatar
Julien Chaumond committed
803
804


Piero Molino's avatar
Piero Molino committed
805
if __name__ == '__main__':
806
807
808
809
810
811
812
813
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--pretrained_model",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
814
815
816
817
818
819
820
821
822
823
824
825
826
827
    parser.add_argument(
        "--cond_text", type=str, default="The lake",
        help="Prefix texts to condition on"
    )
    parser.add_argument(
        "--uncond", action="store_true",
        help="Generate from end-of-text as prefix"
    )
    parser.add_argument(
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
    )
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
        help="Bags of words used for PPLM-BoW. "
             "Either a BOW id (see list in code) or a filepath. "
             "Multiple BoWs separated by ;",
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
        choices=("clickbait", "sentiment", "toxicity", "generic"),
        help="Discriminator to use",
    )
    parser.add_argument('--discrim_weights', type=str, default=None,
                        help='Weights for the generic discriminator')
    parser.add_argument('--discrim_meta', type=str, default=None,
                        help='Meta information for the generic discriminator')
    parser.add_argument(
        "--class_label",
        type=int,
        default=-1,
        help="Class label used for the discriminator",
    )
    parser.add_argument("--length", type=int, default=100)
856
    parser.add_argument("--stepsize", type=float, default=0.02)
857
858
859
860
861
862
863
864
865
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
    parser.add_argument(
        "--sample", action="store_true",
        help="Generate from end-of-text as prefix"
    )
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
866
        "--window_length",
867
        type=int,
868
869
870
        default=0,
        help="Length of past which is being optimized; "
             "0 corresponds to infinite window length",
871
872
873
874
875
876
877
878
879
880
    )
    parser.add_argument(
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
    )
    parser.add_argument("--decay", action="store_true",
                        help="whether to decay or not")
    parser.add_argument("--gamma", type=float, default=1.5)
881
882
883
884
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
885
886
887
888
889
    parser.add_argument("--colorama", action="store_true",
                        help="colors keywords")

    args = parser.parse_args()
    run_pplm_example(**vars(args))