run_pplm.py 28.3 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
# coding=utf-8
Rosanne Liu's avatar
Rosanne Liu committed
3

4
# Copyright (c) 2019 Uber Technologies, Inc.
Julien Chaumond's avatar
Julien Chaumond committed
5
#
6
7
8
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Julien Chaumond's avatar
Julien Chaumond committed
9
#
10
# http://www.apache.org/licenses/LICENSE-2.0
Julien Chaumond's avatar
Julien Chaumond committed
11
#
12
13
14
15
16
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Julien Chaumond's avatar
Julien Chaumond committed
17
18
19

"""
Example command with bag of words:
20
python run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
21
22

Example command with discriminator:
23
python run_pplm.py -D sentiment --class_label 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
24
25
26
"""

import argparse
27
import json
Julien Chaumond's avatar
Julien Chaumond committed
28
29
30
31
32
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
33
from pplm_classification_head import ClassificationHead
34
from torch import nn
Julien Chaumond's avatar
Julien Chaumond committed
35
36
from tqdm import trange

Sylvain Gugger's avatar
Sylvain Gugger committed
37
from transformers import GPT2LMHeadModel, GPT2Tokenizer
Julien Chaumond's avatar
Julien Chaumond committed
38
from transformers.file_utils import cached_path
Aymeric Augustin's avatar
Aymeric Augustin committed
39

Julien Chaumond's avatar
Julien Chaumond committed
40
41
42
43
44

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
45
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
46
47

BAG_OF_WORDS_ARCHIVE_MAP = {
48
49
50
51
52
53
54
    "legal": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    "military": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    "politics": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    "religion": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    "science": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    "space": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    "technology": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
Julien Chaumond's avatar
Julien Chaumond committed
55
56
57
58
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
Julien Chaumond's avatar
Julien Chaumond committed
59
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
60
61
62
63
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
64
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
65
66
    },
    "sentiment": {
Julien Chaumond's avatar
Julien Chaumond committed
67
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
68
69
70
71
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
72
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
73
74
75
76
    },
}


Piero Molino's avatar
Piero Molino committed
77
78
79
80
81
82
83
84
85
86
87
88
def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
89
90
            return torch.where(logits < batch_mins, torch.ones_like(logits) * 0.0, logits)
        return torch.where(logits < batch_mins, torch.ones_like(logits) * -BIG_CONST, logits)
Piero Molino's avatar
Piero Molino committed
91
92


93
def perturb_past(
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    past,
    model,
    last,
    unpert_past=None,
    unpert_logits=None,
    accumulated_hidden=None,
    grad_norms=None,
    stepsize=0.01,
    one_hot_bows_vectors=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    num_iterations=3,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    kl_scale=0.01,
    device="cuda",
113
):
Piero Molino's avatar
Piero Molino committed
114
    # Generate inital perturbed past
115
    grad_accumulator = [(np.zeros(p.shape).astype("float32")) for p in past]
Julien Chaumond's avatar
Julien Chaumond committed
116
117
118
119

    if accumulated_hidden is None:
        accumulated_hidden = 0

120
    if decay:
121
        decay_mask = torch.arange(0.0, 1.0 + SMALL_CONST, 1.0 / (window_length))[1:]
Julien Chaumond's avatar
Julien Chaumond committed
122
123
124
    else:
        decay_mask = 1.0

125
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
126
    # Generate a mask is gradient perturbated is based on a past window
127
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
128

129
    if curr_length > window_length and window_length > 0:
130
        ones_key_val_shape = tuple(past[0].shape[:-2]) + (window_length,) + tuple(past[0].shape[-1:])
Piero Molino's avatar
Piero Molino committed
131

132
        zeros_key_val_shape = tuple(past[0].shape[:-2]) + (curr_length - window_length,) + tuple(past[0].shape[-1:])
Julien Chaumond's avatar
Julien Chaumond committed
133
134
135
136
137

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

138
        window_mask = torch.cat((ones_mask, torch.zeros(zeros_key_val_shape)), dim=-2).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
139
    else:
140
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
141

142
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
143
    loss_per_iter = []
144
    new_accumulated_hidden = None
145
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
146
        print("Iteration ", i + 1)
147
        curr_perturbation = [torch.from_numpy(p_).requires_grad_(True).to(device=device) for p_ in grad_accumulator]
songyouwei's avatar
songyouwei committed
148
149
150
        # make sure p_.grad is not None
        for p_ in curr_perturbation:
            p_.retain_grad()
151
152
153
154

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
chutaklee's avatar
chutaklee committed
155
156
        lm_output = model(last, past_key_values=perturbed_past)
        all_logits, all_hidden = lm_output["logits"], lm_output["hidden_states"]
Piero Molino's avatar
Piero Molino committed
157
        hidden = all_hidden[-1]
158
        new_accumulated_hidden = accumulated_hidden + torch.sum(hidden, dim=1).detach()
159
160
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
161
        probs = nn.functional.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
162
163
164

        loss = 0.0
        loss_list = []
165
166
167
168
169
170
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
171
172
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

173
        if loss_type == 2 or loss_type == 3:
174
            ce_loss = nn.CrossEntropyLoss()
175
176
177
178
179
180
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
chutaklee's avatar
chutaklee committed
181
                lm_output = model(past_key_values=curr_unpert_past, inputs_embeds=inputs_embeds)
182
183
184
185
186
187
188
189
                curr_all_logits, curr_unpert_past, curr_all_hidden = (
                    lm_output["logits"],
                    lm_output["past_key_values"],
                    lm_output["hidden_states"],
                )
                curr_logits = curr_all_logits[:, -1, :]
                curr_probs = nn.functional.softmax(curr_logits, dim=-1)
                curr_probs = torch.unsqueeze(curr_probs, dim=1)
190
                curr_hidden = curr_all_hidden[-1]
191
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
192

193
            prediction = classifier(new_accumulated_hidden / (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
194

195
            label = torch.tensor(prediction.shape[0] * [class_label], device=device, dtype=torch.long)
196
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
197
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
198
199
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
200

Piero Molino's avatar
Piero Molino committed
201
202
        kl_loss = 0.0
        if kl_scale > 0.0:
203
            unpert_probs = nn.functional.softmax(unpert_logits[:, -1, :], dim=-1)
204
205
            unpert_probs = unpert_probs + SMALL_CONST * (unpert_probs <= SMALL_CONST).float().to(device).detach()
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(device).detach()
206
            corrected_probs = probs + correction.detach()
207
208
            kl_loss = kl_scale * ((corrected_probs * (corrected_probs / unpert_probs).log()).sum())
            print(" kl_loss", kl_loss.data.cpu().numpy())
209
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
210
211

        loss_per_iter.append(loss.data.cpu().numpy())
212
        print(" pplm_loss", (loss - kl_loss).data.cpu().numpy())
Julien Chaumond's avatar
Julien Chaumond committed
213

214
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
215
        loss.backward()
216
217
218

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
219
220
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
221
222
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
223
        else:
224
            grad_norms = [
225
                (torch.norm(p_.grad * window_mask) + SMALL_CONST) for index, p_ in enumerate(curr_perturbation)
226
            ]
Julien Chaumond's avatar
Julien Chaumond committed
227

228
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
229
        grad = [
230
            -stepsize * (p_.grad * window_mask / grad_norms[index] ** gamma).data.cpu().numpy()
231
232
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
233

234
235
236
237
238
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
239
240
            p_.grad.data.zero_()

241
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
242
        new_past = []
243
244
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
245
246
        past = new_past

247
    # apply the accumulated perturbations to the past
248
    grad_accumulator = [torch.from_numpy(p_).requires_grad_(True).to(device=device) for p_ in grad_accumulator]
249
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
250

251
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
252
253
254


def get_classifier(
255
    name: Optional[str], class_label: Union[str, int], device: str
Julien Chaumond's avatar
Julien Chaumond committed
256
257
258
259
260
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
261
    classifier = ClassificationHead(class_size=params["class_size"], embed_size=params["embed_size"]).to(device)
262
263
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
264
    elif "path" in params:
265
        resolved_archive_file = params["path"]
266
    else:
267
        raise ValueError("Either url or path have to be specified in the discriminator model parameters")
268
    classifier.load_state_dict(torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
269
270
    classifier.eval()

271
272
273
    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
Julien Chaumond's avatar
Julien Chaumond committed
274
275
        else:
            label_id = params["default_class"]
276
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
277
278
279
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

280
281
282
    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
Julien Chaumond's avatar
Julien Chaumond committed
283
284
        else:
            label_id = params["default_class"]
285
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
286
287
288
289
290
291
292
293
294
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


295
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str], tokenizer) -> List[List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
296
297
298
299
300
301
302
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
303
            words = f.read().strip().split("\n")
304
        bow_indices.append([tokenizer.encode(word.strip(), add_prefix_space=True) for word in words])
Julien Chaumond's avatar
Julien Chaumond committed
305
306
307
    return bow_indices


308
def build_bows_one_hot_vectors(bow_indices, tokenizer, device="cuda"):
Julien Chaumond's avatar
Julien Chaumond committed
309
310
311
312
313
314
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
315
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
316
        num_words = single_bow.shape[0]
317
        one_hot_bow = torch.zeros(num_words, tokenizer.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
318
319
320
321
322
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


323
def full_text_generation(
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
    model,
    tokenizer,
    context=None,
    num_samples=1,
    device="cuda",
    bag_of_words=None,
    discrim=None,
    class_label=None,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
345
    repetition_penalty=1.0,
346
    **kwargs,
347
):
348
    classifier, class_id = get_classifier(discrim, class_label, device)
Julien Chaumond's avatar
Julien Chaumond committed
349

350
351
    bow_indices = []
    if bag_of_words:
352
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"), tokenizer)
Piero Molino's avatar
Piero Molino committed
353

354
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
355
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
356
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
357

358
359
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
360
361
362
        print("Using PPLM-BoW")

    elif classifier is not None:
363
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
364
365
366
        print("Using PPLM-Discrim")

    else:
367
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
368

369
    unpert_gen_tok_text, _, _ = generate_text_pplm(
370
371
372
373
374
375
376
377
        model=model,
        tokenizer=tokenizer,
        context=context,
        device=device,
        length=length,
        sample=sample,
        perturb=False,
        repetition_penalty=repetition_penalty,
378
    )
379
    if device == "cuda":
380
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
381

382
383
384
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
385

386
    for i in range(num_samples):
387
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
388
            model=model,
389
            tokenizer=tokenizer,
390
391
392
393
394
            context=context,
            device=device,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
395
            class_label=class_id,
396
397
398
399
400
            loss_type=loss_type,
            length=length,
            stepsize=stepsize,
            temperature=temperature,
            top_k=top_k,
401
402
403
            sample=sample,
            num_iterations=num_iterations,
            grad_length=grad_length,
404
            horizon_length=horizon_length,
405
            window_length=window_length,
406
407
            decay=decay,
            gamma=gamma,
408
409
            gm_scale=gm_scale,
            kl_scale=kl_scale,
410
            repetition_penalty=repetition_penalty,
411
        )
412
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
413
        if classifier is not None:
414
415
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
416

417
    if device == "cuda":
418
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
419

420
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
421

422
423

def generate_text_pplm(
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
    model,
    tokenizer,
    context=None,
    past=None,
    device="cuda",
    perturb=True,
    bow_indices=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
447
    repetition_penalty=1.0,
448
):
449
450
451
452
453
454
    output_so_far = None
    if context:
        context_t = torch.tensor(context, device=device, dtype=torch.long)
        while len(context_t.shape) < 2:
            context_t = context_t.unsqueeze(0)
        output_so_far = context_t
Julien Chaumond's avatar
Julien Chaumond committed
455

456
    # collect one hot vectors for bags of words
457
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, tokenizer, device)
458

Julien Chaumond's avatar
Julien Chaumond committed
459
    grad_norms = None
460
    last = None
461
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
462
    loss_in_time = []
463
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
464
        # Get past/probs for current output, except for last word
465
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
466

467
468
469
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
470
            if output_so_far.shape[1] > 1:
chutaklee's avatar
chutaklee committed
471
                past = model(output_so_far[:, :-1])["past_key_values"]
Piero Molino's avatar
Piero Molino committed
472

chutaklee's avatar
chutaklee committed
473
474
475
476
477
478
        lm_output = model(output_so_far)
        unpert_logits, unpert_past, unpert_all_hidden = (
            lm_output["logits"],
            lm_output["past_key_values"],
            lm_output["hidden_states"],
        )
479
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
480

481
        # check if we are abowe grad max length
482
483
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
484
        else:
485
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
486

487
        # modify the past if necessary
488
        if not perturb or num_iterations == 0:
489
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
490
491

        else:
492
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
493
494
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

495
496
497
498
499
500
501
502
503
504
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
505
                    one_hot_bows_vectors=one_hot_bows_vectors,
506
                    classifier=classifier,
507
                    class_label=class_label,
508
509
510
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    horizon_length=horizon_length,
511
                    window_length=window_length,
512
513
                    decay=decay,
                    gamma=gamma,
514
515
                    kl_scale=kl_scale,
                    device=device,
516
517
518
519
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
520

chutaklee's avatar
chutaklee committed
521
522
523
524
525
        lm_output = model(last, past_key_values=pert_past)
        pert_logits, past = (
            lm_output["logits"],
            lm_output["past_key_values"],
        )
526
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
527
528
529
530
531
532
533

        for token_idx in set(output_so_far[0].tolist()):
            if pert_logits[0, token_idx] < 0:
                pert_logits[0, token_idx] *= repetition_penalty
            else:
                pert_logits[0, token_idx] /= repetition_penalty

534
        pert_probs = nn.functional.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
535
536

        if classifier is not None:
537
            ce_loss = nn.CrossEntropyLoss()
538
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
539
            label = torch.tensor([class_label], device=device, dtype=torch.long)
540
            unpert_discrim_loss = ce_loss(prediction, label)
541
            print("unperturbed discrim loss", unpert_discrim_loss.data.cpu().numpy())
Julien Chaumond's avatar
Julien Chaumond committed
542
        else:
543
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
544
545

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
546
        if perturb:
547
            unpert_probs = nn.functional.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
548

549
            pert_probs = (pert_probs**gm_scale) * (unpert_probs ** (1 - gm_scale))  # + SMALL_CONST
550
            pert_probs = top_k_filter(pert_probs, k=top_k, probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
551

552
553
554
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
555
556

        else:
557
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
558
            pert_probs = nn.functional.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
559

560
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
561
        if sample:
562
563
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
564
        else:
565
566
567
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
568
        output_so_far = last if output_so_far is None else torch.cat((output_so_far, last), dim=1)
569

570
        print(tokenizer.decode(output_so_far.tolist()[0]))
571
572

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
573
574


575
576
def set_generic_model_params(discrim_weights, discrim_meta):
    if discrim_weights is None:
577
        raise ValueError("When using a generic discriminator, discrim_weights need to be specified")
578
    if discrim_meta is None:
579
        raise ValueError("When using a generic discriminator, discrim_meta need to be specified")
580

581
    with open(discrim_meta, "r") as discrim_meta_file:
582
        meta = json.load(discrim_meta_file)
583
584
    meta["path"] = discrim_weights
    DISCRIMINATOR_MODELS_PARAMS["generic"] = meta
585
586


587
def run_pplm_example(
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
    pretrained_model="gpt2-medium",
    cond_text="",
    uncond=False,
    num_samples=1,
    bag_of_words=None,
    discrim=None,
    discrim_weights=None,
    discrim_meta=None,
    class_label=-1,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
    seed=0,
    no_cuda=False,
    colorama=False,
613
    repetition_penalty=1.0,
614
):
615
    # set Random seed
616
617
    torch.manual_seed(seed)
    np.random.seed(seed)
Julien Chaumond's avatar
Julien Chaumond committed
618

619
    # set the device
620
621
    device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu"

622
    if discrim == "generic":
623
        set_generic_model_params(discrim_weights, discrim_meta)
Julien Chaumond's avatar
Julien Chaumond committed
624

625
    if discrim is not None:
626
        pretrained_model = DISCRIMINATOR_MODELS_PARAMS[discrim]["pretrained_model"]
627
        print("discrim = {}, pretrained_model set to discriminator's = {}".format(discrim, pretrained_model))
628

629
    # load pretrained model
630
    model = GPT2LMHeadModel.from_pretrained(pretrained_model, output_hidden_states=True)
Julien Chaumond's avatar
Julien Chaumond committed
631
632
633
    model.to(device)
    model.eval()

634
635
636
    # load tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)

Piero Molino's avatar
Piero Molino committed
637
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
638
639
640
    for param in model.parameters():
        param.requires_grad = False

641
    # figure out conditioning text
642
    if uncond:
643
        tokenized_cond_text = tokenizer.encode([tokenizer.bos_token])
Julien Chaumond's avatar
Julien Chaumond committed
644
    else:
645
        raw_text = cond_text
Julien Chaumond's avatar
Julien Chaumond committed
646
        while not raw_text:
647
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
648
            raw_text = input("Model prompt >>> ")
649
        tokenized_cond_text = tokenizer.encode(tokenizer.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
650

651
    print("= Prefix of sentence =")
652
    print(tokenizer.decode(tokenized_cond_text))
653
    print()
Piero Molino's avatar
Piero Molino committed
654

655
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
656

657
658
659
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
        model=model,
        tokenizer=tokenizer,
        context=tokenized_cond_text,
        device=device,
        num_samples=num_samples,
        bag_of_words=bag_of_words,
        discrim=discrim,
        class_label=class_label,
        length=length,
        stepsize=stepsize,
        temperature=temperature,
        top_k=top_k,
        sample=sample,
        num_iterations=num_iterations,
        grad_length=grad_length,
        horizon_length=horizon_length,
        window_length=window_length,
        decay=decay,
        gamma=gamma,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
681
        repetition_penalty=repetition_penalty,
682
683
684
    )

    # untokenize unperturbed text
685
    unpert_gen_text = tokenizer.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
686

687
688
689
690
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
691

692
693
    generated_texts = []

694
    bow_word_ids = set()
695
    if bag_of_words and colorama:
696
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"), tokenizer)
697
698
699
700
701
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
702
703
704
705
706

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
707
            if colorama:
Piero Molino's avatar
Piero Molino committed
708
709
                import colorama

710
                pert_gen_text = ""
711
                for word_id in pert_gen_tok_text.tolist()[0]:
712
                    if word_id in bow_word_ids:
713
                        pert_gen_text += "{}{}{}".format(
Lysandre's avatar
Lysandre committed
714
715
716
                            colorama.Fore.RED,
                            tokenizer.decode([word_id]),
                            colorama.Style.RESET_ALL,
717
                        )
Piero Molino's avatar
Piero Molino committed
718
                    else:
719
                        pert_gen_text += tokenizer.decode([word_id])
Piero Molino's avatar
Piero Molino committed
720
            else:
721
                pert_gen_text = tokenizer.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
722

723
724
725
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
726
727
        except Exception as exc:
            print("Ignoring error while generating perturbed text:", exc)
Julien Chaumond's avatar
Julien Chaumond committed
728

729
        # keep the prefix, perturbed seq, original seq for each index
730
        generated_texts.append((tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text))
Julien Chaumond's avatar
Julien Chaumond committed
731

Piero Molino's avatar
Piero Molino committed
732
    return
Julien Chaumond's avatar
Julien Chaumond committed
733
734


735
if __name__ == "__main__":
736
737
738
739
740
741
742
743
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--pretrained_model",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
744
745
    parser.add_argument("--cond_text", type=str, default="The lake", help="Prefix texts to condition on")
    parser.add_argument("--uncond", action="store_true", help="Generate from end-of-text as prefix")
746
    parser.add_argument(
Lysandre's avatar
Lysandre committed
747
748
749
750
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
751
    )
752
753
754
755
756
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
757
758
759
760
761
        help=(
            "Bags of words used for PPLM-BoW. "
            "Either a BOW id (see list in code) or a filepath. "
            "Multiple BoWs separated by ;"
        ),
762
763
764
765
766
767
768
769
770
771
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
        choices=("clickbait", "sentiment", "toxicity", "generic"),
        help="Discriminator to use",
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
772
773
774
775
        "--discrim_weights",
        type=str,
        default=None,
        help="Weights for the generic discriminator",
776
777
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
778
779
780
781
        "--discrim_meta",
        type=str,
        default=None,
        help="Meta information for the generic discriminator",
782
783
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
784
785
786
787
        "--class_label",
        type=int,
        default=-1,
        help="Class label used for the discriminator",
788
789
    )
    parser.add_argument("--length", type=int, default=100)
790
    parser.add_argument("--stepsize", type=float, default=0.02)
791
792
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
793
    parser.add_argument("--sample", action="store_true", help="Generate from end-of-text as prefix")
794
795
796
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
797
        "--window_length",
798
        type=int,
799
        default=0,
800
        help="Length of past which is being optimized; 0 corresponds to infinite window length",
801
802
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
803
804
805
806
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
807
    )
808
    parser.add_argument("--decay", action="store_true", help="whether to decay or not")
809
    parser.add_argument("--gamma", type=float, default=1.5)
810
811
812
813
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
814
    parser.add_argument("--colorama", action="store_true", help="colors keywords")
815
    parser.add_argument(
Lysandre's avatar
Lysandre committed
816
817
818
819
        "--repetition_penalty",
        type=float,
        default=1.0,
        help="Penalize repetition. More than 1.0 -> less repetition",
820
    )
821
822
823

    args = parser.parse_args()
    run_pplm_example(**vars(args))