run_pplm.py 26.2 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# coding=utf-8
# Copyright 2018 The Uber AI Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO: add code for training a custom discriminator

"""
Example command with bag of words:
python examples/run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95

Example command with discriminator:
python examples/run_pplm.py -D sentiment --label_class 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
"""

import argparse
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import trange

37
from examples.run_pplm_discrim_train import ClassificationHead
Julien Chaumond's avatar
Julien Chaumond committed
38
39
40
41
42
43
44
45
from transformers import GPT2Tokenizer
from transformers.file_utils import cached_path
from transformers.modeling_gpt2 import GPT2LMHeadModel

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
46
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
TOKENIZER = GPT2Tokenizer.from_pretrained("gpt2-medium")

BAG_OF_WORDS_ARCHIVE_MAP = {
    'kitchen': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/kitchen.txt",
    'legal': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    'military': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    'monsters': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/monsters.txt",
    'politics': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    'positive_words': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/positive_words.txt",
    'religion': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    'science': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    'space': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    'technology': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifierhead.pt",
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
    },
    "sentiment": {
Piero Molino's avatar
Piero Molino committed
71
        "url": "http://s.yosinski.com/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
    },
    "toxicity": {
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/toxicity_classifierhead.pt",
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_toxic": 0, "toxic": 1},
        "default_class": 0,
    },
}


87
88
def to_var(x, requires_grad=False, volatile=False, device='cuda'):
    if torch.cuda.is_available() and device == 'cuda':
Piero Molino's avatar
Piero Molino committed
89
        x = x.cuda()
90
91
    elif device != 'cuda':
        x = x.to(device)
Piero Molino's avatar
Piero Molino committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    return Variable(x, requires_grad=requires_grad, volatile=volatile)


def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
            return torch.where(logits < batch_mins,
                               torch.ones_like(logits) * 0.0, logits)
109
110
        return torch.where(logits < batch_mins,
                           torch.ones_like(logits) * -BIG_CONST,
Piero Molino's avatar
Piero Molino committed
111
112
113
                           logits)


114
115
116
def perturb_past(
        past,
        model,
117
        last,
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
        unpert_past=None,
        unpert_logits=None,
        accumulated_hidden=None,
        grad_norms=None,
        stepsize=0.01,
        classifier=None,
        label_class=None,
        one_hot_bows_vectors=None,
        loss_type=0,
        num_iterations=3,
        kl_scale=0.01,
        window_length=0,
        horizon_length=1,
        decay=False,
        gamma=1.5,
133
        device='cuda'
134
):
Piero Molino's avatar
Piero Molino committed
135
    # Generate inital perturbed past
136
137
138
139
    grad_accumulator = [
        (np.zeros(p.shape).astype("float32"))
        for p in past
    ]
Julien Chaumond's avatar
Julien Chaumond committed
140
141
142
143

    if accumulated_hidden is None:
        accumulated_hidden = 0

144
    if decay:
145
146
147
148
149
        decay_mask = torch.arange(
            0.,
            1.0 + SMALL_CONST,
            1.0 / (window_length)
        )[1:]
Julien Chaumond's avatar
Julien Chaumond committed
150
151
152
    else:
        decay_mask = 1.0

153
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
154
    # Generate a mask is gradient perturbated is based on a past window
155
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
156

157
158
159
160
161
162
    if curr_length > window_length and window_length > 0:
        ones_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([window_length])
                + tuple(past[0].shape[-1:])
        )
Piero Molino's avatar
Piero Molino committed
163

164
165
166
167
168
        zeros_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([curr_length - window_length])
                + tuple(past[0].shape[-1:])
        )
Julien Chaumond's avatar
Julien Chaumond committed
169
170
171
172
173

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

174
175
176
177
        window_mask = torch.cat(
            (ones_mask, torch.zeros(zeros_key_val_shape)),
            dim=-2
        ).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
178
    else:
179
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
180

181
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
182
    loss_per_iter = []
183
    new_accumulated_hidden = None
184
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
185
        print("Iteration ", i + 1)
186
        curr_perturbation = [
187
            to_var(torch.from_numpy(p_), requires_grad=True, device=device)
188
189
190
191
192
193
194
            for p_ in grad_accumulator
        ]

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
        all_logits, _, all_hidden = model(last, past=perturbed_past)
Piero Molino's avatar
Piero Molino committed
195
        hidden = all_hidden[-1]
196
197
198
199
200
201
202
        new_accumulated_hidden = accumulated_hidden + torch.sum(
            hidden,
            dim=1
        ).detach()
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
203
204
205

        loss = 0.0
        loss_list = []
206
207
208
209
210
211
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
212
213
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

214
        if loss_type == 2 or loss_type == 3:
Julien Chaumond's avatar
Julien Chaumond committed
215
            ce_loss = torch.nn.CrossEntropyLoss()
216
217
218
219
220
221
222
223
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
                _, curr_unpert_past, curr_all_hidden = model(
                    past=curr_unpert_past,
Julien Chaumond's avatar
Julien Chaumond committed
224
225
                    inputs_embeds=inputs_embeds
                )
226
                curr_hidden = curr_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
227
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(
228
                    curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
229

230
231
            prediction = classifier(new_accumulated_hidden /
                                    (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
232

233
            label = torch.tensor([label_class], device=device,
Piero Molino's avatar
Piero Molino committed
234
                                 dtype=torch.long)
235
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
236
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
237
238
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
239

Piero Molino's avatar
Piero Molino committed
240
241
        kl_loss = 0.0
        if kl_scale > 0.0:
242
243
244
245
246
247
248
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
            unpert_probs = (
                    unpert_probs + SMALL_CONST *
                    (unpert_probs <= SMALL_CONST).float().to(device).detach()
            )
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(device).detach()
            corrected_probs = probs + correction.detach()
Rosanne Liu's avatar
Rosanne Liu committed
249
            kl_loss = kl_scale * (
250
251
252
253
                (corrected_probs * (corrected_probs / unpert_probs).log()).sum()
            )
            print(' kl_loss', kl_loss.data.cpu().numpy())
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
254
255
256
257

        loss_per_iter.append(loss.data.cpu().numpy())
        print(' pplm_loss', (loss - kl_loss).data.cpu().numpy())

258
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
259
        loss.backward()
260
261
262

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
263
264
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
265
266
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
267
        else:
268
269
270
271
            grad_norms = [
                (torch.norm(p_.grad * window_mask) + SMALL_CONST)
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
272

273
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
274
        grad = [
275
276
277
278
            -stepsize *
            (p_.grad * window_mask / grad_norms[index] ** gamma).data.cpu().numpy()
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
279

280
281
282
283
284
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
285
286
            p_.grad.data.zero_()

287
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
288
        new_past = []
289
290
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
291
292
        past = new_past

293
294
    # apply the accumulated perturbations to the past
    grad_accumulator = [
295
        to_var(torch.from_numpy(p_), requires_grad=True, device=device)
296
297
298
        for p_ in grad_accumulator
    ]
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
299

300
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
301
302
303


def get_classifier(
Piero Molino's avatar
Piero Molino committed
304
        name: Optional[str], label_class: Union[str, int],
305
        device: str
Julien Chaumond's avatar
Julien Chaumond committed
306
307
308
309
310
311
312
313
314
315
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
    classifier = ClassificationHead(
        class_size=params['class_size'],
        embed_size=params['embed_size']
    ).to(device)
    resolved_archive_file = cached_path(params["url"])
Piero Molino's avatar
Piero Molino committed
316
317
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
    classifier.eval()

    if isinstance(label_class, str):
        if label_class in params["class_vocab"]:
            label_id = params["class_vocab"][label_class]
        else:
            label_id = params["default_class"]
            print("label_class {} not in class_vocab".format(label_class))
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    elif isinstance(label_class, int):
        if label_class in set(params["class_vocab"].values()):
            label_id = label_class
        else:
            label_id = params["default_class"]
            print("label_class {} not in class_vocab".format(label_class))
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


Piero Molino's avatar
Piero Molino committed
344
345
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str]) -> List[
    List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
346
347
348
349
350
351
352
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
353
354
355
356
            words = f.read().strip().split("\n")
        bow_indices.append(
            [TOKENIZER.encode(word.strip(), add_prefix_space=True) for word in
             words])
Julien Chaumond's avatar
Julien Chaumond committed
357
358
359
    return bow_indices


360
def build_bows_one_hot_vectors(bow_indices, device='cuda'):
Julien Chaumond's avatar
Julien Chaumond committed
361
362
363
364
365
366
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
367
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
368
        num_words = single_bow.shape[0]
369
        one_hot_bow = torch.zeros(num_words, TOKENIZER.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
370
371
372
373
374
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


375
def full_text_generation(
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
        model,
        context=None,
        num_samples=1,
        device="cuda",
        sample=True,
        discrim=None,
        label_class=None,
        bag_of_words=None,
        length=100,
        grad_length=10000,
        stepsize=0.02,
        num_iterations=3,
        temperature=1.0,
        gm_scale=0.9,
        kl_scale=0.01,
        top_k=10,
        window_length=0,
        horizon_length=1,
        decay=False,
        gamma=1.5,
        **kwargs
):
Julien Chaumond's avatar
Julien Chaumond committed
398
    classifier, class_id = get_classifier(
399
400
        discrim,
        label_class,
Julien Chaumond's avatar
Julien Chaumond committed
401
402
403
        device
    )

404
405
406
    bow_indices = []
    if bag_of_words:
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"))
Piero Molino's avatar
Piero Molino committed
407

408
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
409
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
410
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
411

412
413
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
414
415
416
        print("Using PPLM-BoW")

    elif classifier is not None:
417
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
418
419
420
        print("Using PPLM-Discrim")

    else:
421
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
422

423
    unpert_gen_tok_text, _, _ = generate_text_pplm(
424
425
426
427
428
429
        model=model,
        context=context,
        device=device,
        length=length,
        perturb=False
    )
430
431
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
432

433
434
435
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
436

437
    for i in range(num_samples):
438
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
            model=model,
            context=context,
            device=device,
            sample=sample,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
            label_class=class_id,
            loss_type=loss_type,
            length=length,
            grad_length=grad_length,
            stepsize=stepsize,
            num_iterations=num_iterations,
            temperature=temperature,
            gm_scale=gm_scale,
            kl_scale=kl_scale,
            top_k=top_k,
            window_length=window_length,
            horizon_length=horizon_length,
            decay=decay,
            gamma=gamma,
        )
461
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
462
        if classifier is not None:
463
464
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
465

466
467
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
468

469
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
470

471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495

def generate_text_pplm(
        model,
        context=None,
        past=None,
        device="cuda",
        sample=True,
        perturb=True,
        classifier=None,
        label_class=None,
        bow_indices=None,
        loss_type=0,
        length=100,
        grad_length=10000,
        stepsize=0.02,
        num_iterations=3,
        temperature=1.0,
        gm_scale=0.9,
        kl_scale=0.01,
        top_k=10,
        window_length=0,
        horizon_length=1,
        decay=False,
        gamma=1.5,
):
496
497
498
499
500
    output_so_far = (
        torch.tensor(context, device=device, dtype=torch.long).unsqueeze(0)
        if context
        else None
    )
Julien Chaumond's avatar
Julien Chaumond committed
501

502
    # collect one hot vectors for bags of words
503
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, device)
504

Julien Chaumond's avatar
Julien Chaumond committed
505
    grad_norms = None
506
    last = None
507
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
508
    loss_in_time = []
509
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
510
511

        # Get past/probs for current output, except for last word
512
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
513

514
515
516
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
517
518
            if output_so_far.shape[1] > 1:
                _, past, _ = model(output_so_far[:, :-1])
Piero Molino's avatar
Piero Molino committed
519

520
521
        unpert_logits, unpert_past, unpert_all_hidden = model(output_so_far)
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
522

523
        # check if we are abowe grad max length
524
525
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
526
        else:
527
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
528

529
        # modify the past if necessary
530
        if not perturb or num_iterations == 0:
531
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
532
533

        else:
534
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
535
536
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
                    classifier=classifier,
                    label_class=label_class,
                    one_hot_bows_vectors=one_hot_bows_vectors,
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    kl_scale=kl_scale,
                    window_length=window_length,
                    horizon_length=horizon_length,
                    decay=decay,
                    gamma=gamma,
557
                    device=device
558
559
560
561
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
562

563
564
565
        pert_logits, past, pert_all_hidden = model(last, past=pert_past)
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
        pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
566
567

        if classifier is not None:
Piero Molino's avatar
Piero Molino committed
568
            ce_loss = torch.nn.CrossEntropyLoss()
569
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
570
            label = torch.tensor([label_class], device=device,
Piero Molino's avatar
Piero Molino committed
571
                                 dtype=torch.long)
572
573
574
575
576
            unpert_discrim_loss = ce_loss(prediction, label)
            print(
                "unperturbed discrim loss",
                unpert_discrim_loss.data.cpu().numpy()
            )
Julien Chaumond's avatar
Julien Chaumond committed
577
        else:
578
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
579
580

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
581
582
        if perturb:

583
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
584

585
586
587
            pert_probs = ((pert_probs ** gm_scale) * (
                    unpert_probs ** (1 - gm_scale)))  # + SMALL_CONST
            pert_probs = top_k_filter(pert_probs, k=top_k,
588
                                      probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
589

590
591
592
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
593
594

        else:
595
596
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
            pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
597

598
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
599
        if sample:
600
601
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
602
        else:
603
604
605
606
607
608
609
610
611
612
613
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
        output_so_far = (
            last if output_so_far is None
            else torch.cat((output_so_far, last), dim=1)
        )

        print(TOKENIZER.decode(output_so_far.tolist()[0]))

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
614
615
616
617


def run_model():
    parser = argparse.ArgumentParser()
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
    parser.add_argument(
        "--model_path",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
        help="Bags of words used for PPLM-BoW. "
             "Either a BOW id (see list in code) or a filepath. "
             "Multiple BoWs separated by ;",
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
        choices=("clickbait", "sentiment", "toxicity"),
        help="Discriminator to use for loss-type 2",
    )
    parser.add_argument(
        "--label_class",
        type=int,
        default=-1,
        help="Class label used for the discriminator",
    )
    parser.add_argument("--stepsize", type=float, default=0.02)
Julien Chaumond's avatar
Julien Chaumond committed
649
650
651
652
653
654
    parser.add_argument("--length", type=int, default=100)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
    parser.add_argument(
        "--uncond", action="store_true",
        help="Generate from end-of-text as prefix"
    )
    parser.add_argument(
        "--cond_text", type=str, default="The lake",
        help="Prefix texts to condition on"
    )
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
    )
    parser.add_argument(
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
    )
    parser.add_argument(
        "--window_length",
        type=int,
        default=0,
        help="Length of past which is being optimized; "
             "0 corresponds to infinite window length",
    )
    parser.add_argument("--decay", action="store_true",
                        help="whether to decay or not")
    parser.add_argument("--gamma", type=float, default=1.5)
688
689
    parser.add_argument("--colorama", action="store_true",
                        help="colors keywords")
Julien Chaumond's avatar
Julien Chaumond committed
690
691
692

    args = parser.parse_args()

693
    # set Random seed
Julien Chaumond's avatar
Julien Chaumond committed
694
695
696
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

697
698
    # set the device
    device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
Julien Chaumond's avatar
Julien Chaumond committed
699

700
    # load pretrained model
Julien Chaumond's avatar
Julien Chaumond committed
701
702
703
704
705
706
707
    model = GPT2LMHeadModel.from_pretrained(
        args.model_path,
        output_hidden_states=True
    )
    model.to(device)
    model.eval()

Piero Molino's avatar
Piero Molino committed
708
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
709
710
711
    for param in model.parameters():
        param.requires_grad = False

712
    # figure out conditioning text
Julien Chaumond's avatar
Julien Chaumond committed
713
    if args.uncond:
714
715
716
        tokenized_cond_text = TOKENIZER.encode(
            [TOKENIZER.bos_token]
        )
Julien Chaumond's avatar
Julien Chaumond committed
717
718
719
    else:
        raw_text = args.cond_text
        while not raw_text:
720
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
721
            raw_text = input("Model prompt >>> ")
722
        tokenized_cond_text = TOKENIZER.encode(TOKENIZER.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
723

724
725
726
    print("= Prefix of sentence =")
    print(TOKENIZER.decode(tokenized_cond_text))
    print()
Piero Molino's avatar
Piero Molino committed
727

728
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
729

730
731
732
733
734
735
736
737
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
        model=model, context=tokenized_cond_text, device=device, **vars(args)
    )

    # untokenize unperturbed text
    unpert_gen_text = TOKENIZER.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
738

739
740
741
742
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
743

744
745
    generated_texts = []

746
747
748
749
750
751
752
753
    bow_word_ids = set()
    if args.bag_of_words and args.colorama:
        bow_indices = get_bag_of_words_indices(args.bag_of_words.split(";"))
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
754
755
756
757
758

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
Piero Molino's avatar
Piero Molino committed
759
760
761
            if args.colorama:
                import colorama

762
763
                pert_gen_text = ''
                for word_id in pert_gen_tok_text.tolist()[0]:
764
                    if word_id in bow_word_ids:
765
766
767
768
769
                        pert_gen_text += '{}{}{}'.format(
                            colorama.Fore.RED,
                            TOKENIZER.decode([word_id]),
                            colorama.Style.RESET_ALL
                        )
Piero Molino's avatar
Piero Molino committed
770
                    else:
771
                        pert_gen_text += TOKENIZER.decode([word_id])
Piero Molino's avatar
Piero Molino committed
772
            else:
773
                pert_gen_text = TOKENIZER.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
774

775
776
777
778
779
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
        except:
            pass
Julien Chaumond's avatar
Julien Chaumond committed
780

781
782
783
784
        # keep the prefix, perturbed seq, original seq for each index
        generated_texts.append(
            (tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text)
        )
Julien Chaumond's avatar
Julien Chaumond committed
785

Piero Molino's avatar
Piero Molino committed
786
    return
Julien Chaumond's avatar
Julien Chaumond committed
787
788


Piero Molino's avatar
Piero Molino committed
789
if __name__ == '__main__':
Julien Chaumond's avatar
Julien Chaumond committed
790
    run_model()