run_pplm.py 27.2 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# coding=utf-8
# Copyright 2018 The Uber AI Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Example command with bag of words:
python examples/run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95

Example command with discriminator:
22
python examples/run_pplm.py -D sentiment --class_label 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
23
24
25
"""

import argparse
26
import json
Julien Chaumond's avatar
Julien Chaumond committed
27
28
29
30
31
32
33
34
35
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import trange

36
from examples.run_pplm_discrim_train import ClassificationHead
Julien Chaumond's avatar
Julien Chaumond committed
37
38
39
40
41
42
43
44
from transformers import GPT2Tokenizer
from transformers.file_utils import cached_path
from transformers.modeling_gpt2 import GPT2LMHeadModel

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
45
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
TOKENIZER = GPT2Tokenizer.from_pretrained("gpt2-medium")

BAG_OF_WORDS_ARCHIVE_MAP = {
    'kitchen': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/kitchen.txt",
    'legal': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    'military': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    'monsters': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/monsters.txt",
    'politics': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    'positive_words': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/positive_words.txt",
    'religion': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    'science': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    'space': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    'technology': "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifierhead.pt",
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
    },
    "sentiment": {
Piero Molino's avatar
Piero Molino committed
70
        "url": "http://s.yosinski.com/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
    },
    "toxicity": {
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/toxicity_classifierhead.pt",
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_toxic": 0, "toxic": 1},
        "default_class": 0,
    },
}


86
87
def to_var(x, requires_grad=False, volatile=False, device='cuda'):
    if torch.cuda.is_available() and device == 'cuda':
Piero Molino's avatar
Piero Molino committed
88
        x = x.cuda()
89
90
    elif device != 'cuda':
        x = x.to(device)
Piero Molino's avatar
Piero Molino committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    return Variable(x, requires_grad=requires_grad, volatile=volatile)


def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
            return torch.where(logits < batch_mins,
                               torch.ones_like(logits) * 0.0, logits)
108
109
        return torch.where(logits < batch_mins,
                           torch.ones_like(logits) * -BIG_CONST,
Piero Molino's avatar
Piero Molino committed
110
111
112
                           logits)


113
114
115
def perturb_past(
        past,
        model,
116
        last,
117
118
119
120
121
122
        unpert_past=None,
        unpert_logits=None,
        accumulated_hidden=None,
        grad_norms=None,
        stepsize=0.01,
        classifier=None,
123
        class_label=None,
124
125
126
127
128
129
130
131
        one_hot_bows_vectors=None,
        loss_type=0,
        num_iterations=3,
        kl_scale=0.01,
        window_length=0,
        horizon_length=1,
        decay=False,
        gamma=1.5,
132
        device='cuda'
133
):
Piero Molino's avatar
Piero Molino committed
134
    # Generate inital perturbed past
135
136
137
138
    grad_accumulator = [
        (np.zeros(p.shape).astype("float32"))
        for p in past
    ]
Julien Chaumond's avatar
Julien Chaumond committed
139
140
141
142

    if accumulated_hidden is None:
        accumulated_hidden = 0

143
    if decay:
144
145
146
147
148
        decay_mask = torch.arange(
            0.,
            1.0 + SMALL_CONST,
            1.0 / (window_length)
        )[1:]
Julien Chaumond's avatar
Julien Chaumond committed
149
150
151
    else:
        decay_mask = 1.0

152
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
153
    # Generate a mask is gradient perturbated is based on a past window
154
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
155

156
157
158
159
160
161
    if curr_length > window_length and window_length > 0:
        ones_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([window_length])
                + tuple(past[0].shape[-1:])
        )
Piero Molino's avatar
Piero Molino committed
162

163
164
165
166
167
        zeros_key_val_shape = (
                tuple(past[0].shape[:-2])
                + tuple([curr_length - window_length])
                + tuple(past[0].shape[-1:])
        )
Julien Chaumond's avatar
Julien Chaumond committed
168
169
170
171
172

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

173
174
175
176
        window_mask = torch.cat(
            (ones_mask, torch.zeros(zeros_key_val_shape)),
            dim=-2
        ).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
177
    else:
178
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
179

180
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
181
    loss_per_iter = []
182
    new_accumulated_hidden = None
183
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
184
        print("Iteration ", i + 1)
185
        curr_perturbation = [
186
            to_var(torch.from_numpy(p_), requires_grad=True, device=device)
187
188
189
190
191
192
193
            for p_ in grad_accumulator
        ]

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
        all_logits, _, all_hidden = model(last, past=perturbed_past)
Piero Molino's avatar
Piero Molino committed
194
        hidden = all_hidden[-1]
195
196
197
198
199
200
201
        new_accumulated_hidden = accumulated_hidden + torch.sum(
            hidden,
            dim=1
        ).detach()
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
202
203
204

        loss = 0.0
        loss_list = []
205
206
207
208
209
210
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
211
212
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

213
        if loss_type == 2 or loss_type == 3:
Julien Chaumond's avatar
Julien Chaumond committed
214
            ce_loss = torch.nn.CrossEntropyLoss()
215
216
217
218
219
220
221
222
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
                _, curr_unpert_past, curr_all_hidden = model(
                    past=curr_unpert_past,
Julien Chaumond's avatar
Julien Chaumond committed
223
224
                    inputs_embeds=inputs_embeds
                )
225
                curr_hidden = curr_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
226
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(
227
                    curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
228

229
230
            prediction = classifier(new_accumulated_hidden /
                                    (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
231

232
            label = torch.tensor([class_label], device=device,
Piero Molino's avatar
Piero Molino committed
233
                                 dtype=torch.long)
234
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
235
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
236
237
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
238

Piero Molino's avatar
Piero Molino committed
239
240
        kl_loss = 0.0
        if kl_scale > 0.0:
241
242
243
244
245
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
            unpert_probs = (
                    unpert_probs + SMALL_CONST *
                    (unpert_probs <= SMALL_CONST).float().to(device).detach()
            )
246
247
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(
                device).detach()
248
            corrected_probs = probs + correction.detach()
Rosanne Liu's avatar
Rosanne Liu committed
249
            kl_loss = kl_scale * (
250
251
252
253
                (corrected_probs * (corrected_probs / unpert_probs).log()).sum()
            )
            print(' kl_loss', kl_loss.data.cpu().numpy())
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
254
255
256
257

        loss_per_iter.append(loss.data.cpu().numpy())
        print(' pplm_loss', (loss - kl_loss).data.cpu().numpy())

258
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
259
        loss.backward()
260
261
262

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
263
264
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
265
266
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
267
        else:
268
269
270
271
            grad_norms = [
                (torch.norm(p_.grad * window_mask) + SMALL_CONST)
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
272

273
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
274
        grad = [
275
            -stepsize *
276
277
            (p_.grad * window_mask / grad_norms[
                index] ** gamma).data.cpu().numpy()
278
279
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
280

281
282
283
284
285
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
286
287
            p_.grad.data.zero_()

288
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
289
        new_past = []
290
291
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
292
293
        past = new_past

294
295
    # apply the accumulated perturbations to the past
    grad_accumulator = [
296
        to_var(torch.from_numpy(p_), requires_grad=True, device=device)
297
298
299
        for p_ in grad_accumulator
    ]
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
300

301
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
302
303
304


def get_classifier(
305
        name: Optional[str], class_label: Union[str, int],
306
        device: str
Julien Chaumond's avatar
Julien Chaumond committed
307
308
309
310
311
312
313
314
315
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
    classifier = ClassificationHead(
        class_size=params['class_size'],
        embed_size=params['embed_size']
    ).to(device)
316
317
318
319
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
    else:
        resolved_archive_file = params["path"]
Piero Molino's avatar
Piero Molino committed
320
321
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
322
323
    classifier.eval()

324
325
326
    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
Julien Chaumond's avatar
Julien Chaumond committed
327
328
        else:
            label_id = params["default_class"]
329
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
330
331
332
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

333
334
335
    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
Julien Chaumond's avatar
Julien Chaumond committed
336
337
        else:
            label_id = params["default_class"]
338
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
339
340
341
342
343
344
345
346
347
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


Piero Molino's avatar
Piero Molino committed
348
349
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str]) -> List[
    List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
350
351
352
353
354
355
356
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
357
358
359
360
            words = f.read().strip().split("\n")
        bow_indices.append(
            [TOKENIZER.encode(word.strip(), add_prefix_space=True) for word in
             words])
Julien Chaumond's avatar
Julien Chaumond committed
361
362
363
    return bow_indices


364
def build_bows_one_hot_vectors(bow_indices, device='cuda'):
Julien Chaumond's avatar
Julien Chaumond committed
365
366
367
368
369
370
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
371
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
372
        num_words = single_bow.shape[0]
373
        one_hot_bow = torch.zeros(num_words, TOKENIZER.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
374
375
376
377
378
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


379
def full_text_generation(
380
381
382
383
384
385
        model,
        context=None,
        num_samples=1,
        device="cuda",
        sample=True,
        discrim=None,
386
        class_label=None,
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
        bag_of_words=None,
        length=100,
        grad_length=10000,
        stepsize=0.02,
        num_iterations=3,
        temperature=1.0,
        gm_scale=0.9,
        kl_scale=0.01,
        top_k=10,
        window_length=0,
        horizon_length=1,
        decay=False,
        gamma=1.5,
        **kwargs
):
Julien Chaumond's avatar
Julien Chaumond committed
402
    classifier, class_id = get_classifier(
403
        discrim,
404
        class_label,
Julien Chaumond's avatar
Julien Chaumond committed
405
406
407
        device
    )

408
409
410
    bow_indices = []
    if bag_of_words:
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"))
Piero Molino's avatar
Piero Molino committed
411

412
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
413
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
414
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
415

416
417
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
418
419
420
        print("Using PPLM-BoW")

    elif classifier is not None:
421
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
422
423
424
        print("Using PPLM-Discrim")

    else:
425
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
426

427
    unpert_gen_tok_text, _, _ = generate_text_pplm(
428
429
430
431
432
433
        model=model,
        context=context,
        device=device,
        length=length,
        perturb=False
    )
434
435
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
436

437
438
439
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
440

441
    for i in range(num_samples):
442
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
443
444
445
446
447
448
449
            model=model,
            context=context,
            device=device,
            sample=sample,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
450
            class_label=class_id,
451
452
453
454
455
456
457
458
459
460
461
462
463
464
            loss_type=loss_type,
            length=length,
            grad_length=grad_length,
            stepsize=stepsize,
            num_iterations=num_iterations,
            temperature=temperature,
            gm_scale=gm_scale,
            kl_scale=kl_scale,
            top_k=top_k,
            window_length=window_length,
            horizon_length=horizon_length,
            decay=decay,
            gamma=gamma,
        )
465
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
466
        if classifier is not None:
467
468
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
469

470
471
    if device == 'cuda':
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
472

473
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
474

475
476
477
478
479
480
481
482
483

def generate_text_pplm(
        model,
        context=None,
        past=None,
        device="cuda",
        sample=True,
        perturb=True,
        classifier=None,
484
        class_label=None,
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
        bow_indices=None,
        loss_type=0,
        length=100,
        grad_length=10000,
        stepsize=0.02,
        num_iterations=3,
        temperature=1.0,
        gm_scale=0.9,
        kl_scale=0.01,
        top_k=10,
        window_length=0,
        horizon_length=1,
        decay=False,
        gamma=1.5,
):
500
501
502
503
504
    output_so_far = (
        torch.tensor(context, device=device, dtype=torch.long).unsqueeze(0)
        if context
        else None
    )
Julien Chaumond's avatar
Julien Chaumond committed
505

506
    # collect one hot vectors for bags of words
507
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, device)
508

Julien Chaumond's avatar
Julien Chaumond committed
509
    grad_norms = None
510
    last = None
511
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
512
    loss_in_time = []
513
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
514
515

        # Get past/probs for current output, except for last word
516
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
517

518
519
520
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
521
522
            if output_so_far.shape[1] > 1:
                _, past, _ = model(output_so_far[:, :-1])
Piero Molino's avatar
Piero Molino committed
523

524
525
        unpert_logits, unpert_past, unpert_all_hidden = model(output_so_far)
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
526

527
        # check if we are abowe grad max length
528
529
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
530
        else:
531
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
532

533
        # modify the past if necessary
534
        if not perturb or num_iterations == 0:
535
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
536
537

        else:
538
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
539
540
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

541
542
543
544
545
546
547
548
549
550
551
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
                    classifier=classifier,
552
                    class_label=class_label,
553
554
555
556
557
558
559
560
                    one_hot_bows_vectors=one_hot_bows_vectors,
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    kl_scale=kl_scale,
                    window_length=window_length,
                    horizon_length=horizon_length,
                    decay=decay,
                    gamma=gamma,
561
                    device=device
562
563
564
565
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
566

567
568
569
        pert_logits, past, pert_all_hidden = model(last, past=pert_past)
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
        pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
570
571

        if classifier is not None:
Piero Molino's avatar
Piero Molino committed
572
            ce_loss = torch.nn.CrossEntropyLoss()
573
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
574
            label = torch.tensor([class_label], device=device,
Piero Molino's avatar
Piero Molino committed
575
                                 dtype=torch.long)
576
577
578
579
580
            unpert_discrim_loss = ce_loss(prediction, label)
            print(
                "unperturbed discrim loss",
                unpert_discrim_loss.data.cpu().numpy()
            )
Julien Chaumond's avatar
Julien Chaumond committed
581
        else:
582
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
583
584

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
585
586
        if perturb:

587
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
588

589
590
591
            pert_probs = ((pert_probs ** gm_scale) * (
                    unpert_probs ** (1 - gm_scale)))  # + SMALL_CONST
            pert_probs = top_k_filter(pert_probs, k=top_k,
592
                                      probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
593

594
595
596
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
597
598

        else:
599
600
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
            pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
601

602
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
603
        if sample:
604
605
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
606
        else:
607
608
609
610
611
612
613
614
615
616
617
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
        output_so_far = (
            last if output_so_far is None
            else torch.cat((output_so_far, last), dim=1)
        )

        print(TOKENIZER.decode(output_so_far.tolist()[0]))

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
618
619


620
621
622
623
624
625
626
627
628
629
630
631
632
633
def set_generic_model_params(discrim_weights, discrim_meta):
    if discrim_weights is None:
        raise ValueError('When using a generic discriminator, '
                         'discrim_weights need to be specified')
    if discrim_meta is None:
        raise ValueError('When using a generic discriminator, '
                         'discrim_meta need to be specified')

    with open(discrim_meta, 'r') as discrim_meta_file:
        meta = json.load(discrim_meta_file)
    meta['path'] = discrim_weights
    DISCRIMINATOR_MODELS_PARAMS['generic'] = meta


Julien Chaumond's avatar
Julien Chaumond committed
634
635
def run_model():
    parser = argparse.ArgumentParser()
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
    parser.add_argument(
        "--model_path",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
        help="Bags of words used for PPLM-BoW. "
             "Either a BOW id (see list in code) or a filepath. "
             "Multiple BoWs separated by ;",
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
657
658
        choices=("clickbait", "sentiment", "toxicity", "generic"),
        help="Discriminator to use",
659
    )
660
661
662
663
    parser.add_argument('--discrim_weights', type=str, default=None,
                        help='Weights for the generic discriminator')
    parser.add_argument('--discrim_meta', type=str, default=None,
                        help='Meta information for the generic discriminator')
664
    parser.add_argument(
665
        "--class_label",
666
667
668
669
670
        type=int,
        default=-1,
        help="Class label used for the discriminator",
    )
    parser.add_argument("--stepsize", type=float, default=0.02)
Julien Chaumond's avatar
Julien Chaumond committed
671
672
673
674
675
676
    parser.add_argument("--length", type=int, default=100)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
    parser.add_argument(
        "--uncond", action="store_true",
        help="Generate from end-of-text as prefix"
    )
    parser.add_argument(
        "--cond_text", type=str, default="The lake",
        help="Prefix texts to condition on"
    )
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
    )
    parser.add_argument(
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
    )
    parser.add_argument(
        "--window_length",
        type=int,
        default=0,
        help="Length of past which is being optimized; "
             "0 corresponds to infinite window length",
    )
    parser.add_argument("--decay", action="store_true",
                        help="whether to decay or not")
    parser.add_argument("--gamma", type=float, default=1.5)
710
711
    parser.add_argument("--colorama", action="store_true",
                        help="colors keywords")
Julien Chaumond's avatar
Julien Chaumond committed
712
713
714

    args = parser.parse_args()

715
    # set Random seed
Julien Chaumond's avatar
Julien Chaumond committed
716
717
718
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

719
720
    # set the device
    device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
Julien Chaumond's avatar
Julien Chaumond committed
721

722
723
724
    if args.discrim == 'generic':
        set_generic_model_params(args.discrim_weights, args.discrim_meta)

725
    # load pretrained model
Julien Chaumond's avatar
Julien Chaumond committed
726
727
728
729
730
731
732
    model = GPT2LMHeadModel.from_pretrained(
        args.model_path,
        output_hidden_states=True
    )
    model.to(device)
    model.eval()

Piero Molino's avatar
Piero Molino committed
733
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
734
735
736
    for param in model.parameters():
        param.requires_grad = False

737
    # figure out conditioning text
Julien Chaumond's avatar
Julien Chaumond committed
738
    if args.uncond:
739
740
741
        tokenized_cond_text = TOKENIZER.encode(
            [TOKENIZER.bos_token]
        )
Julien Chaumond's avatar
Julien Chaumond committed
742
743
744
    else:
        raw_text = args.cond_text
        while not raw_text:
745
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
746
            raw_text = input("Model prompt >>> ")
747
        tokenized_cond_text = TOKENIZER.encode(TOKENIZER.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
748

749
750
751
    print("= Prefix of sentence =")
    print(TOKENIZER.decode(tokenized_cond_text))
    print()
Piero Molino's avatar
Piero Molino committed
752

753
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
754

755
756
757
758
759
760
761
762
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
        model=model, context=tokenized_cond_text, device=device, **vars(args)
    )

    # untokenize unperturbed text
    unpert_gen_text = TOKENIZER.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
763

764
765
766
767
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
768

769
770
    generated_texts = []

771
772
773
774
775
776
777
778
    bow_word_ids = set()
    if args.bag_of_words and args.colorama:
        bow_indices = get_bag_of_words_indices(args.bag_of_words.split(";"))
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
779
780
781
782
783

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
Piero Molino's avatar
Piero Molino committed
784
785
786
            if args.colorama:
                import colorama

787
788
                pert_gen_text = ''
                for word_id in pert_gen_tok_text.tolist()[0]:
789
                    if word_id in bow_word_ids:
790
791
792
793
794
                        pert_gen_text += '{}{}{}'.format(
                            colorama.Fore.RED,
                            TOKENIZER.decode([word_id]),
                            colorama.Style.RESET_ALL
                        )
Piero Molino's avatar
Piero Molino committed
795
                    else:
796
                        pert_gen_text += TOKENIZER.decode([word_id])
Piero Molino's avatar
Piero Molino committed
797
            else:
798
                pert_gen_text = TOKENIZER.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
799

800
801
802
803
804
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
        except:
            pass
Julien Chaumond's avatar
Julien Chaumond committed
805

806
807
808
809
        # keep the prefix, perturbed seq, original seq for each index
        generated_texts.append(
            (tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text)
        )
Julien Chaumond's avatar
Julien Chaumond committed
810

Piero Molino's avatar
Piero Molino committed
811
    return
Julien Chaumond's avatar
Julien Chaumond committed
812
813


Piero Molino's avatar
Piero Molino committed
814
if __name__ == '__main__':
Julien Chaumond's avatar
Julien Chaumond committed
815
    run_model()