run_pplm.py 28.4 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
# coding=utf-8
Rosanne Liu's avatar
Rosanne Liu committed
3

4
# Copyright (c) 2019 Uber Technologies, Inc.
Julien Chaumond's avatar
Julien Chaumond committed
5
#
6
7
8
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Julien Chaumond's avatar
Julien Chaumond committed
9
#
10
# http://www.apache.org/licenses/LICENSE-2.0
Julien Chaumond's avatar
Julien Chaumond committed
11
#
12
13
14
15
16
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Julien Chaumond's avatar
Julien Chaumond committed
17
18
19

"""
Example command with bag of words:
20
python run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
21
22

Example command with discriminator:
23
python run_pplm.py -D sentiment --class_label 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
24
25
26
"""

import argparse
27
import json
Julien Chaumond's avatar
Julien Chaumond committed
28
29
30
31
32
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
33
from torch import nn
Julien Chaumond's avatar
Julien Chaumond committed
34
35
from tqdm import trange

Aymeric Augustin's avatar
Aymeric Augustin committed
36
from pplm_classification_head import ClassificationHead
Sylvain Gugger's avatar
Sylvain Gugger committed
37
from transformers import GPT2LMHeadModel, GPT2Tokenizer
Julien Chaumond's avatar
Julien Chaumond committed
38
from transformers.file_utils import cached_path
Aymeric Augustin's avatar
Aymeric Augustin committed
39

Julien Chaumond's avatar
Julien Chaumond committed
40
41
42
43
44

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
45
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
46
47

BAG_OF_WORDS_ARCHIVE_MAP = {
48
49
50
51
52
53
54
    "legal": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    "military": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    "politics": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    "religion": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    "science": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    "space": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    "technology": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
Julien Chaumond's avatar
Julien Chaumond committed
55
56
57
58
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
Julien Chaumond's avatar
Julien Chaumond committed
59
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
60
61
62
63
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
64
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
65
66
    },
    "sentiment": {
Julien Chaumond's avatar
Julien Chaumond committed
67
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
68
69
70
71
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
72
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
73
74
75
76
    },
}


Piero Molino's avatar
Piero Molino committed
77
78
79
80
81
82
83
84
85
86
87
88
def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
89
90
            return torch.where(logits < batch_mins, torch.ones_like(logits) * 0.0, logits)
        return torch.where(logits < batch_mins, torch.ones_like(logits) * -BIG_CONST, logits)
Piero Molino's avatar
Piero Molino committed
91
92


93
def perturb_past(
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    past,
    model,
    last,
    unpert_past=None,
    unpert_logits=None,
    accumulated_hidden=None,
    grad_norms=None,
    stepsize=0.01,
    one_hot_bows_vectors=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    num_iterations=3,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    kl_scale=0.01,
    device="cuda",
113
):
Piero Molino's avatar
Piero Molino committed
114
    # Generate inital perturbed past
115
    grad_accumulator = [(np.zeros(p.shape).astype("float32")) for p in past]
Julien Chaumond's avatar
Julien Chaumond committed
116
117
118
119

    if accumulated_hidden is None:
        accumulated_hidden = 0

120
    if decay:
121
        decay_mask = torch.arange(0.0, 1.0 + SMALL_CONST, 1.0 / (window_length))[1:]
Julien Chaumond's avatar
Julien Chaumond committed
122
123
124
    else:
        decay_mask = 1.0

125
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
126
    # Generate a mask is gradient perturbated is based on a past window
127
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
128

129
    if curr_length > window_length and window_length > 0:
130
        ones_key_val_shape = tuple(past[0].shape[:-2]) + tuple([window_length]) + tuple(past[0].shape[-1:])
Piero Molino's avatar
Piero Molino committed
131

132
        zeros_key_val_shape = (
133
            tuple(past[0].shape[:-2]) + tuple([curr_length - window_length]) + tuple(past[0].shape[-1:])
134
        )
Julien Chaumond's avatar
Julien Chaumond committed
135
136
137
138
139

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

140
        window_mask = torch.cat((ones_mask, torch.zeros(zeros_key_val_shape)), dim=-2).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
141
    else:
142
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
143

144
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
145
    loss_per_iter = []
146
    new_accumulated_hidden = None
147
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
148
        print("Iteration ", i + 1)
149
        curr_perturbation = [torch.from_numpy(p_).requires_grad_(True).to(device=device) for p_ in grad_accumulator]
songyouwei's avatar
songyouwei committed
150
151
152
        # make sure p_.grad is not None
        for p_ in curr_perturbation:
            p_.retain_grad()
153
154
155
156

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
chutaklee's avatar
chutaklee committed
157
158
        lm_output = model(last, past_key_values=perturbed_past)
        all_logits, all_hidden = lm_output["logits"], lm_output["hidden_states"]
Piero Molino's avatar
Piero Molino committed
159
        hidden = all_hidden[-1]
160
        new_accumulated_hidden = accumulated_hidden + torch.sum(hidden, dim=1).detach()
161
162
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
163
        probs = nn.functional.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
164
165
166

        loss = 0.0
        loss_list = []
167
168
169
170
171
172
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
173
174
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

175
        if loss_type == 2 or loss_type == 3:
176
            ce_loss = nn.CrossEntropyLoss()
177
178
179
180
181
182
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
chutaklee's avatar
chutaklee committed
183
                lm_output = model(past_key_values=curr_unpert_past, inputs_embeds=inputs_embeds)
184
185
186
187
188
189
190
191
                curr_all_logits, curr_unpert_past, curr_all_hidden = (
                    lm_output["logits"],
                    lm_output["past_key_values"],
                    lm_output["hidden_states"],
                )
                curr_logits = curr_all_logits[:, -1, :]
                curr_probs = nn.functional.softmax(curr_logits, dim=-1)
                curr_probs = torch.unsqueeze(curr_probs, dim=1)
192
                curr_hidden = curr_all_hidden[-1]
193
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
194

195
            prediction = classifier(new_accumulated_hidden / (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
196

197
            label = torch.tensor(prediction.shape[0] * [class_label], device=device, dtype=torch.long)
198
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
199
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
200
201
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
202

Piero Molino's avatar
Piero Molino committed
203
204
        kl_loss = 0.0
        if kl_scale > 0.0:
205
            unpert_probs = nn.functional.softmax(unpert_logits[:, -1, :], dim=-1)
206
207
            unpert_probs = unpert_probs + SMALL_CONST * (unpert_probs <= SMALL_CONST).float().to(device).detach()
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(device).detach()
208
            corrected_probs = probs + correction.detach()
209
210
            kl_loss = kl_scale * ((corrected_probs * (corrected_probs / unpert_probs).log()).sum())
            print(" kl_loss", kl_loss.data.cpu().numpy())
211
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
212
213

        loss_per_iter.append(loss.data.cpu().numpy())
214
        print(" pplm_loss", (loss - kl_loss).data.cpu().numpy())
Julien Chaumond's avatar
Julien Chaumond committed
215

216
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
217
        loss.backward()
218
219
220

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
221
222
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
223
224
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
225
        else:
226
            grad_norms = [
227
                (torch.norm(p_.grad * window_mask) + SMALL_CONST) for index, p_ in enumerate(curr_perturbation)
228
            ]
Julien Chaumond's avatar
Julien Chaumond committed
229

230
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
231
        grad = [
232
            -stepsize * (p_.grad * window_mask / grad_norms[index] ** gamma).data.cpu().numpy()
233
234
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
235

236
237
238
239
240
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
241
242
            p_.grad.data.zero_()

243
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
244
        new_past = []
245
246
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
247
248
        past = new_past

249
    # apply the accumulated perturbations to the past
250
    grad_accumulator = [torch.from_numpy(p_).requires_grad_(True).to(device=device) for p_ in grad_accumulator]
251
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
252

253
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
254
255
256


def get_classifier(
257
    name: Optional[str], class_label: Union[str, int], device: str
Julien Chaumond's avatar
Julien Chaumond committed
258
259
260
261
262
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
263
    classifier = ClassificationHead(class_size=params["class_size"], embed_size=params["embed_size"]).to(device)
264
265
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
266
    elif "path" in params:
267
        resolved_archive_file = params["path"]
268
    else:
269
        raise ValueError("Either url or path have to be specified in the discriminator model parameters")
270
    classifier.load_state_dict(torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
271
272
    classifier.eval()

273
274
275
    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
Julien Chaumond's avatar
Julien Chaumond committed
276
277
        else:
            label_id = params["default_class"]
278
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
279
280
281
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

282
283
284
    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
Julien Chaumond's avatar
Julien Chaumond committed
285
286
        else:
            label_id = params["default_class"]
287
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
288
289
290
291
292
293
294
295
296
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


297
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str], tokenizer) -> List[List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
298
299
300
301
302
303
304
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
305
            words = f.read().strip().split("\n")
306
        bow_indices.append([tokenizer.encode(word.strip(), add_prefix_space=True) for word in words])
Julien Chaumond's avatar
Julien Chaumond committed
307
308
309
    return bow_indices


310
def build_bows_one_hot_vectors(bow_indices, tokenizer, device="cuda"):
Julien Chaumond's avatar
Julien Chaumond committed
311
312
313
314
315
316
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
317
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
318
        num_words = single_bow.shape[0]
319
        one_hot_bow = torch.zeros(num_words, tokenizer.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
320
321
322
323
324
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


325
def full_text_generation(
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
    model,
    tokenizer,
    context=None,
    num_samples=1,
    device="cuda",
    bag_of_words=None,
    discrim=None,
    class_label=None,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
347
    repetition_penalty=1.0,
348
    **kwargs
349
):
350
    classifier, class_id = get_classifier(discrim, class_label, device)
Julien Chaumond's avatar
Julien Chaumond committed
351

352
353
    bow_indices = []
    if bag_of_words:
354
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"), tokenizer)
Piero Molino's avatar
Piero Molino committed
355

356
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
357
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
358
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
359

360
361
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
362
363
364
        print("Using PPLM-BoW")

    elif classifier is not None:
365
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
366
367
368
        print("Using PPLM-Discrim")

    else:
369
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
370

371
    unpert_gen_tok_text, _, _ = generate_text_pplm(
372
373
374
375
376
377
378
379
        model=model,
        tokenizer=tokenizer,
        context=context,
        device=device,
        length=length,
        sample=sample,
        perturb=False,
        repetition_penalty=repetition_penalty,
380
    )
381
    if device == "cuda":
382
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
383

384
385
386
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
387

388
    for i in range(num_samples):
389
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
390
            model=model,
391
            tokenizer=tokenizer,
392
393
394
395
396
            context=context,
            device=device,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
397
            class_label=class_id,
398
399
400
401
402
            loss_type=loss_type,
            length=length,
            stepsize=stepsize,
            temperature=temperature,
            top_k=top_k,
403
404
405
            sample=sample,
            num_iterations=num_iterations,
            grad_length=grad_length,
406
            horizon_length=horizon_length,
407
            window_length=window_length,
408
409
            decay=decay,
            gamma=gamma,
410
411
            gm_scale=gm_scale,
            kl_scale=kl_scale,
412
            repetition_penalty=repetition_penalty,
413
        )
414
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
415
        if classifier is not None:
416
417
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
418

419
    if device == "cuda":
420
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
421

422
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
423

424
425

def generate_text_pplm(
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
    model,
    tokenizer,
    context=None,
    past=None,
    device="cuda",
    perturb=True,
    bow_indices=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
449
    repetition_penalty=1.0,
450
):
451
452
453
454
455
456
    output_so_far = None
    if context:
        context_t = torch.tensor(context, device=device, dtype=torch.long)
        while len(context_t.shape) < 2:
            context_t = context_t.unsqueeze(0)
        output_so_far = context_t
Julien Chaumond's avatar
Julien Chaumond committed
457

458
    # collect one hot vectors for bags of words
459
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, tokenizer, device)
460

Julien Chaumond's avatar
Julien Chaumond committed
461
    grad_norms = None
462
    last = None
463
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
464
    loss_in_time = []
465
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
466
467

        # Get past/probs for current output, except for last word
468
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
469

470
471
472
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
473
            if output_so_far.shape[1] > 1:
chutaklee's avatar
chutaklee committed
474
                past = model(output_so_far[:, :-1])["past_key_values"]
Piero Molino's avatar
Piero Molino committed
475

chutaklee's avatar
chutaklee committed
476
477
478
479
480
481
        lm_output = model(output_so_far)
        unpert_logits, unpert_past, unpert_all_hidden = (
            lm_output["logits"],
            lm_output["past_key_values"],
            lm_output["hidden_states"],
        )
482
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
483

484
        # check if we are abowe grad max length
485
486
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
487
        else:
488
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
489

490
        # modify the past if necessary
491
        if not perturb or num_iterations == 0:
492
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
493
494

        else:
495
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
496
497
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

498
499
500
501
502
503
504
505
506
507
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
508
                    one_hot_bows_vectors=one_hot_bows_vectors,
509
                    classifier=classifier,
510
                    class_label=class_label,
511
512
513
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    horizon_length=horizon_length,
514
                    window_length=window_length,
515
516
                    decay=decay,
                    gamma=gamma,
517
518
                    kl_scale=kl_scale,
                    device=device,
519
520
521
522
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
523

chutaklee's avatar
chutaklee committed
524
525
526
527
528
        lm_output = model(last, past_key_values=pert_past)
        pert_logits, past = (
            lm_output["logits"],
            lm_output["past_key_values"],
        )
529
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
530
531
532
533
534
535
536

        for token_idx in set(output_so_far[0].tolist()):
            if pert_logits[0, token_idx] < 0:
                pert_logits[0, token_idx] *= repetition_penalty
            else:
                pert_logits[0, token_idx] /= repetition_penalty

537
        pert_probs = nn.functional.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
538
539

        if classifier is not None:
540
            ce_loss = nn.CrossEntropyLoss()
541
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
542
            label = torch.tensor([class_label], device=device, dtype=torch.long)
543
            unpert_discrim_loss = ce_loss(prediction, label)
544
            print("unperturbed discrim loss", unpert_discrim_loss.data.cpu().numpy())
Julien Chaumond's avatar
Julien Chaumond committed
545
        else:
546
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
547
548

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
549
550
        if perturb:

551
            unpert_probs = nn.functional.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
552

553
554
            pert_probs = (pert_probs ** gm_scale) * (unpert_probs ** (1 - gm_scale))  # + SMALL_CONST
            pert_probs = top_k_filter(pert_probs, k=top_k, probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
555

556
557
558
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
559
560

        else:
561
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
562
            pert_probs = nn.functional.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
563

564
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
565
        if sample:
566
567
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
568
        else:
569
570
571
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
572
        output_so_far = last if output_so_far is None else torch.cat((output_so_far, last), dim=1)
573

574
        print(tokenizer.decode(output_so_far.tolist()[0]))
575
576

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
577
578


579
580
def set_generic_model_params(discrim_weights, discrim_meta):
    if discrim_weights is None:
581
        raise ValueError("When using a generic discriminator, discrim_weights need to be specified")
582
    if discrim_meta is None:
583
        raise ValueError("When using a generic discriminator, discrim_meta need to be specified")
584

585
    with open(discrim_meta, "r") as discrim_meta_file:
586
        meta = json.load(discrim_meta_file)
587
588
    meta["path"] = discrim_weights
    DISCRIMINATOR_MODELS_PARAMS["generic"] = meta
589
590


591
def run_pplm_example(
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
    pretrained_model="gpt2-medium",
    cond_text="",
    uncond=False,
    num_samples=1,
    bag_of_words=None,
    discrim=None,
    discrim_weights=None,
    discrim_meta=None,
    class_label=-1,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
    seed=0,
    no_cuda=False,
    colorama=False,
617
    repetition_penalty=1.0,
618
):
619
    # set Random seed
620
621
    torch.manual_seed(seed)
    np.random.seed(seed)
Julien Chaumond's avatar
Julien Chaumond committed
622

623
    # set the device
624
625
    device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu"

626
    if discrim == "generic":
627
        set_generic_model_params(discrim_weights, discrim_meta)
Julien Chaumond's avatar
Julien Chaumond committed
628

629
    if discrim is not None:
630
        pretrained_model = DISCRIMINATOR_MODELS_PARAMS[discrim]["pretrained_model"]
631
        print("discrim = {}, pretrained_model set to discriminator's = {}".format(discrim, pretrained_model))
632

633
    # load pretrained model
634
    model = GPT2LMHeadModel.from_pretrained(pretrained_model, output_hidden_states=True)
Julien Chaumond's avatar
Julien Chaumond committed
635
636
637
    model.to(device)
    model.eval()

638
639
640
    # load tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)

Piero Molino's avatar
Piero Molino committed
641
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
642
643
644
    for param in model.parameters():
        param.requires_grad = False

645
    # figure out conditioning text
646
    if uncond:
647
        tokenized_cond_text = tokenizer.encode([tokenizer.bos_token])
Julien Chaumond's avatar
Julien Chaumond committed
648
    else:
649
        raw_text = cond_text
Julien Chaumond's avatar
Julien Chaumond committed
650
        while not raw_text:
651
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
652
            raw_text = input("Model prompt >>> ")
653
        tokenized_cond_text = tokenizer.encode(tokenizer.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
654

655
    print("= Prefix of sentence =")
656
    print(tokenizer.decode(tokenized_cond_text))
657
    print()
Piero Molino's avatar
Piero Molino committed
658

659
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
660

661
662
663
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
        model=model,
        tokenizer=tokenizer,
        context=tokenized_cond_text,
        device=device,
        num_samples=num_samples,
        bag_of_words=bag_of_words,
        discrim=discrim,
        class_label=class_label,
        length=length,
        stepsize=stepsize,
        temperature=temperature,
        top_k=top_k,
        sample=sample,
        num_iterations=num_iterations,
        grad_length=grad_length,
        horizon_length=horizon_length,
        window_length=window_length,
        decay=decay,
        gamma=gamma,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
685
        repetition_penalty=repetition_penalty,
686
687
688
    )

    # untokenize unperturbed text
689
    unpert_gen_text = tokenizer.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
690

691
692
693
694
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
695

696
697
    generated_texts = []

698
    bow_word_ids = set()
699
    if bag_of_words and colorama:
700
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"), tokenizer)
701
702
703
704
705
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
706
707
708
709
710

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
711
            if colorama:
Piero Molino's avatar
Piero Molino committed
712
713
                import colorama

714
                pert_gen_text = ""
715
                for word_id in pert_gen_tok_text.tolist()[0]:
716
                    if word_id in bow_word_ids:
717
                        pert_gen_text += "{}{}{}".format(
Lysandre's avatar
Lysandre committed
718
719
720
                            colorama.Fore.RED,
                            tokenizer.decode([word_id]),
                            colorama.Style.RESET_ALL,
721
                        )
Piero Molino's avatar
Piero Molino committed
722
                    else:
723
                        pert_gen_text += tokenizer.decode([word_id])
Piero Molino's avatar
Piero Molino committed
724
            else:
725
                pert_gen_text = tokenizer.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
726

727
728
729
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
730
731
        except Exception as exc:
            print("Ignoring error while generating perturbed text:", exc)
Julien Chaumond's avatar
Julien Chaumond committed
732

733
        # keep the prefix, perturbed seq, original seq for each index
734
        generated_texts.append((tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text))
Julien Chaumond's avatar
Julien Chaumond committed
735

Piero Molino's avatar
Piero Molino committed
736
    return
Julien Chaumond's avatar
Julien Chaumond committed
737
738


739
if __name__ == "__main__":
740
741
742
743
744
745
746
747
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--pretrained_model",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
748
749
    parser.add_argument("--cond_text", type=str, default="The lake", help="Prefix texts to condition on")
    parser.add_argument("--uncond", action="store_true", help="Generate from end-of-text as prefix")
750
    parser.add_argument(
Lysandre's avatar
Lysandre committed
751
752
753
754
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
755
    )
756
757
758
759
760
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
761
762
763
764
765
        help=(
            "Bags of words used for PPLM-BoW. "
            "Either a BOW id (see list in code) or a filepath. "
            "Multiple BoWs separated by ;"
        ),
766
767
768
769
770
771
772
773
774
775
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
        choices=("clickbait", "sentiment", "toxicity", "generic"),
        help="Discriminator to use",
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
776
777
778
779
        "--discrim_weights",
        type=str,
        default=None,
        help="Weights for the generic discriminator",
780
781
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
782
783
784
785
        "--discrim_meta",
        type=str,
        default=None,
        help="Meta information for the generic discriminator",
786
787
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
788
789
790
791
        "--class_label",
        type=int,
        default=-1,
        help="Class label used for the discriminator",
792
793
    )
    parser.add_argument("--length", type=int, default=100)
794
    parser.add_argument("--stepsize", type=float, default=0.02)
795
796
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
797
    parser.add_argument("--sample", action="store_true", help="Generate from end-of-text as prefix")
798
799
800
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
801
        "--window_length",
802
        type=int,
803
        default=0,
804
        help="Length of past which is being optimized; 0 corresponds to infinite window length",
805
806
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
807
808
809
810
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
811
    )
812
    parser.add_argument("--decay", action="store_true", help="whether to decay or not")
813
    parser.add_argument("--gamma", type=float, default=1.5)
814
815
816
817
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
818
    parser.add_argument("--colorama", action="store_true", help="colors keywords")
819
    parser.add_argument(
Lysandre's avatar
Lysandre committed
820
821
822
823
        "--repetition_penalty",
        type=float,
        default=1.0,
        help="Penalize repetition. More than 1.0 -> less repetition",
824
    )
825
826
827

    args = parser.parse_args()
    run_pplm_example(**vars(args))