run_pplm.py 28 KB
Newer Older
Piero Molino's avatar
Piero Molino committed
1
#! /usr/bin/env python3
Julien Chaumond's avatar
Julien Chaumond committed
2
# coding=utf-8
Rosanne Liu's avatar
Rosanne Liu committed
3

4
# Copyright (c) 2019 Uber Technologies, Inc.
Julien Chaumond's avatar
Julien Chaumond committed
5
#
6
7
8
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Julien Chaumond's avatar
Julien Chaumond committed
9
#
10
# http://www.apache.org/licenses/LICENSE-2.0
Julien Chaumond's avatar
Julien Chaumond committed
11
#
12
13
14
15
16
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Julien Chaumond's avatar
Julien Chaumond committed
17
18
19

"""
Example command with bag of words:
20
python run_pplm.py -B space --cond_text "The president" --length 100 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.01 --window_length 5 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
21
22

Example command with discriminator:
23
python run_pplm.py -D sentiment --class_label 3 --cond_text "The lake" --length 10 --gamma 1.0 --num_iterations 30 --num_samples 10 --stepsize 0.01 --kl_scale 0.01 --gm_scale 0.95
Julien Chaumond's avatar
Julien Chaumond committed
24
25
26
"""

import argparse
27
import json
Julien Chaumond's avatar
Julien Chaumond committed
28
29
30
31
32
33
34
35
from operator import add
from typing import List, Optional, Tuple, Union

import numpy as np
import torch
import torch.nn.functional as F
from tqdm import trange

Aymeric Augustin's avatar
Aymeric Augustin committed
36
from pplm_classification_head import ClassificationHead
Sylvain Gugger's avatar
Sylvain Gugger committed
37
from transformers import GPT2LMHeadModel, GPT2Tokenizer
Julien Chaumond's avatar
Julien Chaumond committed
38
from transformers.file_utils import cached_path
Aymeric Augustin's avatar
Aymeric Augustin committed
39

Julien Chaumond's avatar
Julien Chaumond committed
40
41
42
43
44

PPLM_BOW = 1
PPLM_DISCRIM = 2
PPLM_BOW_DISCRIM = 3
SMALL_CONST = 1e-15
45
BIG_CONST = 1e10
Julien Chaumond's avatar
Julien Chaumond committed
46
47

BAG_OF_WORDS_ARCHIVE_MAP = {
48
49
50
51
52
53
54
    "legal": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/legal.txt",
    "military": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/military.txt",
    "politics": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/politics.txt",
    "religion": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/religion.txt",
    "science": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/science.txt",
    "space": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/space.txt",
    "technology": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/bow/technology.txt",
Julien Chaumond's avatar
Julien Chaumond committed
55
56
57
58
}

DISCRIMINATOR_MODELS_PARAMS = {
    "clickbait": {
Julien Chaumond's avatar
Julien Chaumond committed
59
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/clickbait_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
60
61
62
63
        "class_size": 2,
        "embed_size": 1024,
        "class_vocab": {"non_clickbait": 0, "clickbait": 1},
        "default_class": 1,
64
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
65
66
    },
    "sentiment": {
Julien Chaumond's avatar
Julien Chaumond committed
67
        "url": "https://s3.amazonaws.com/models.huggingface.co/bert/pplm/discriminators/SST_classifier_head.pt",
Julien Chaumond's avatar
Julien Chaumond committed
68
69
70
71
        "class_size": 5,
        "embed_size": 1024,
        "class_vocab": {"very_positive": 2, "very_negative": 3},
        "default_class": 3,
72
        "pretrained_model": "gpt2-medium",
Julien Chaumond's avatar
Julien Chaumond committed
73
74
75
76
    },
}


Piero Molino's avatar
Piero Molino committed
77
78
79
80
81
82
83
84
85
86
87
88
def top_k_filter(logits, k, probs=False):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        if probs:
89
90
            return torch.where(logits < batch_mins, torch.ones_like(logits) * 0.0, logits)
        return torch.where(logits < batch_mins, torch.ones_like(logits) * -BIG_CONST, logits)
Piero Molino's avatar
Piero Molino committed
91
92


93
def perturb_past(
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    past,
    model,
    last,
    unpert_past=None,
    unpert_logits=None,
    accumulated_hidden=None,
    grad_norms=None,
    stepsize=0.01,
    one_hot_bows_vectors=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    num_iterations=3,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    kl_scale=0.01,
    device="cuda",
113
):
Piero Molino's avatar
Piero Molino committed
114
    # Generate inital perturbed past
115
    grad_accumulator = [(np.zeros(p.shape).astype("float32")) for p in past]
Julien Chaumond's avatar
Julien Chaumond committed
116
117
118
119

    if accumulated_hidden is None:
        accumulated_hidden = 0

120
    if decay:
121
        decay_mask = torch.arange(0.0, 1.0 + SMALL_CONST, 1.0 / (window_length))[1:]
Julien Chaumond's avatar
Julien Chaumond committed
122
123
124
    else:
        decay_mask = 1.0

125
    # TODO fix this comment (SUMANTH)
Piero Molino's avatar
Piero Molino committed
126
    # Generate a mask is gradient perturbated is based on a past window
127
    _, _, _, curr_length, _ = past[0].shape
Piero Molino's avatar
Piero Molino committed
128

129
    if curr_length > window_length and window_length > 0:
130
        ones_key_val_shape = tuple(past[0].shape[:-2]) + tuple([window_length]) + tuple(past[0].shape[-1:])
Piero Molino's avatar
Piero Molino committed
131

132
        zeros_key_val_shape = (
133
            tuple(past[0].shape[:-2]) + tuple([curr_length - window_length]) + tuple(past[0].shape[-1:])
134
        )
Julien Chaumond's avatar
Julien Chaumond committed
135
136
137
138
139

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

140
        window_mask = torch.cat((ones_mask, torch.zeros(zeros_key_val_shape)), dim=-2).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
141
    else:
142
        window_mask = torch.ones_like(past[0]).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
143

144
    # accumulate perturbations for num_iterations
Julien Chaumond's avatar
Julien Chaumond committed
145
    loss_per_iter = []
146
    new_accumulated_hidden = None
147
    for i in range(num_iterations):
Julien Chaumond's avatar
Julien Chaumond committed
148
        print("Iteration ", i + 1)
149
        curr_perturbation = [torch.from_numpy(p_).requires_grad_(True).to(device=device) for p_ in grad_accumulator]
songyouwei's avatar
songyouwei committed
150
151
152
        # make sure p_.grad is not None
        for p_ in curr_perturbation:
            p_.retain_grad()
153
154
155
156

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
chutaklee's avatar
chutaklee committed
157
158
        lm_output = model(last, past_key_values=perturbed_past)
        all_logits, all_hidden = lm_output["logits"], lm_output["hidden_states"]
Piero Molino's avatar
Piero Molino committed
159
        hidden = all_hidden[-1]
160
        new_accumulated_hidden = accumulated_hidden + torch.sum(hidden, dim=1).detach()
161
162
163
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)
Piero Molino's avatar
Piero Molino committed
164
165
166

        loss = 0.0
        loss_list = []
167
168
169
170
171
172
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
Piero Molino's avatar
Piero Molino committed
173
174
            print(" pplm_bow_loss:", loss.data.cpu().numpy())

175
        if loss_type == 2 or loss_type == 3:
Julien Chaumond's avatar
Julien Chaumond committed
176
            ce_loss = torch.nn.CrossEntropyLoss()
177
178
179
180
181
182
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
chutaklee's avatar
chutaklee committed
183
184
                lm_output = model(past_key_values=curr_unpert_past, inputs_embeds=inputs_embeds)
                curr_unpert_past, curr_all_hidden = lm_output["past_key_values"], lm_output["hidden_states"]
185
                curr_hidden = curr_all_hidden[-1]
186
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(curr_hidden, dim=1)
Julien Chaumond's avatar
Julien Chaumond committed
187

188
            prediction = classifier(new_accumulated_hidden / (curr_length + 1 + horizon_length))
Julien Chaumond's avatar
Julien Chaumond committed
189

190
            label = torch.tensor(prediction.shape[0] * [class_label], device=device, dtype=torch.long)
191
            discrim_loss = ce_loss(prediction, label)
Julien Chaumond's avatar
Julien Chaumond committed
192
            print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
Piero Molino's avatar
Piero Molino committed
193
194
            loss += discrim_loss
            loss_list.append(discrim_loss)
Julien Chaumond's avatar
Julien Chaumond committed
195

Piero Molino's avatar
Piero Molino committed
196
197
        kl_loss = 0.0
        if kl_scale > 0.0:
198
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
199
200
            unpert_probs = unpert_probs + SMALL_CONST * (unpert_probs <= SMALL_CONST).float().to(device).detach()
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(device).detach()
201
            corrected_probs = probs + correction.detach()
202
203
            kl_loss = kl_scale * ((corrected_probs * (corrected_probs / unpert_probs).log()).sum())
            print(" kl_loss", kl_loss.data.cpu().numpy())
204
            loss += kl_loss
Julien Chaumond's avatar
Julien Chaumond committed
205
206

        loss_per_iter.append(loss.data.cpu().numpy())
207
        print(" pplm_loss", (loss - kl_loss).data.cpu().numpy())
Julien Chaumond's avatar
Julien Chaumond committed
208

209
        # compute gradients
Rosanne Liu's avatar
Rosanne Liu committed
210
        loss.backward()
211
212
213

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
Julien Chaumond's avatar
Julien Chaumond committed
214
215
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
216
217
                for index, p_ in enumerate(curr_perturbation)
            ]
Julien Chaumond's avatar
Julien Chaumond committed
218
        else:
219
            grad_norms = [
220
                (torch.norm(p_.grad * window_mask) + SMALL_CONST) for index, p_ in enumerate(curr_perturbation)
221
            ]
Julien Chaumond's avatar
Julien Chaumond committed
222

223
        # normalize gradients
Julien Chaumond's avatar
Julien Chaumond committed
224
        grad = [
225
            -stepsize * (p_.grad * window_mask / grad_norms[index] ** gamma).data.cpu().numpy()
226
227
            for index, p_ in enumerate(curr_perturbation)
        ]
Julien Chaumond's avatar
Julien Chaumond committed
228

229
230
231
232
233
        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
Julien Chaumond's avatar
Julien Chaumond committed
234
235
            p_.grad.data.zero_()

236
        # removing past from the graph
Julien Chaumond's avatar
Julien Chaumond committed
237
        new_past = []
238
239
        for p_ in past:
            new_past.append(p_.detach())
Julien Chaumond's avatar
Julien Chaumond committed
240
241
        past = new_past

242
    # apply the accumulated perturbations to the past
243
    grad_accumulator = [torch.from_numpy(p_).requires_grad_(True).to(device=device) for p_ in grad_accumulator]
244
    pert_past = list(map(add, past, grad_accumulator))
Julien Chaumond's avatar
Julien Chaumond committed
245

246
    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Julien Chaumond's avatar
Julien Chaumond committed
247
248
249


def get_classifier(
250
    name: Optional[str], class_label: Union[str, int], device: str
Julien Chaumond's avatar
Julien Chaumond committed
251
252
253
254
255
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
256
    classifier = ClassificationHead(class_size=params["class_size"], embed_size=params["embed_size"]).to(device)
257
258
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
259
    elif "path" in params:
260
        resolved_archive_file = params["path"]
261
    else:
262
        raise ValueError("Either url or path have to be specified in the discriminator model parameters")
263
    classifier.load_state_dict(torch.load(resolved_archive_file, map_location=device))
Julien Chaumond's avatar
Julien Chaumond committed
264
265
    classifier.eval()

266
267
268
    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
Julien Chaumond's avatar
Julien Chaumond committed
269
270
        else:
            label_id = params["default_class"]
271
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
272
273
274
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

275
276
277
    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
Julien Chaumond's avatar
Julien Chaumond committed
278
279
        else:
            label_id = params["default_class"]
280
            print("class_label {} not in class_vocab".format(class_label))
Julien Chaumond's avatar
Julien Chaumond committed
281
282
283
284
285
286
287
288
289
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id


290
def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str], tokenizer) -> List[List[List[int]]]:
Julien Chaumond's avatar
Julien Chaumond committed
291
292
293
294
295
296
297
    bow_indices = []
    for id_or_path in bag_of_words_ids_or_paths:
        if id_or_path in BAG_OF_WORDS_ARCHIVE_MAP:
            filepath = cached_path(BAG_OF_WORDS_ARCHIVE_MAP[id_or_path])
        else:
            filepath = id_or_path
        with open(filepath, "r") as f:
Piero Molino's avatar
Piero Molino committed
298
            words = f.read().strip().split("\n")
299
        bow_indices.append([tokenizer.encode(word.strip(), add_prefix_space=True) for word in words])
Julien Chaumond's avatar
Julien Chaumond committed
300
301
302
    return bow_indices


303
def build_bows_one_hot_vectors(bow_indices, tokenizer, device="cuda"):
Julien Chaumond's avatar
Julien Chaumond committed
304
305
306
307
308
309
    if bow_indices is None:
        return None

    one_hot_bows_vectors = []
    for single_bow in bow_indices:
        single_bow = list(filter(lambda x: len(x) <= 1, single_bow))
310
        single_bow = torch.tensor(single_bow).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
311
        num_words = single_bow.shape[0]
312
        one_hot_bow = torch.zeros(num_words, tokenizer.vocab_size).to(device)
Julien Chaumond's avatar
Julien Chaumond committed
313
314
315
316
317
        one_hot_bow.scatter_(1, single_bow, 1)
        one_hot_bows_vectors.append(one_hot_bow)
    return one_hot_bows_vectors


318
def full_text_generation(
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
    model,
    tokenizer,
    context=None,
    num_samples=1,
    device="cuda",
    bag_of_words=None,
    discrim=None,
    class_label=None,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
340
    repetition_penalty=1.0,
341
    **kwargs
342
):
343
    classifier, class_id = get_classifier(discrim, class_label, device)
Julien Chaumond's avatar
Julien Chaumond committed
344

345
346
    bow_indices = []
    if bag_of_words:
347
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"), tokenizer)
Piero Molino's avatar
Piero Molino committed
348

349
    if bag_of_words and classifier:
Julien Chaumond's avatar
Julien Chaumond committed
350
        print("Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.")
351
        loss_type = PPLM_BOW_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
352

353
354
    elif bag_of_words:
        loss_type = PPLM_BOW
Julien Chaumond's avatar
Julien Chaumond committed
355
356
357
        print("Using PPLM-BoW")

    elif classifier is not None:
358
        loss_type = PPLM_DISCRIM
Julien Chaumond's avatar
Julien Chaumond committed
359
360
361
        print("Using PPLM-Discrim")

    else:
362
        raise Exception("Specify either a bag of words or a discriminator")
Julien Chaumond's avatar
Julien Chaumond committed
363

364
    unpert_gen_tok_text, _, _ = generate_text_pplm(
365
366
367
368
369
370
371
372
        model=model,
        tokenizer=tokenizer,
        context=context,
        device=device,
        length=length,
        sample=sample,
        perturb=False,
        repetition_penalty=repetition_penalty,
373
    )
374
    if device == "cuda":
375
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
376

377
378
379
    pert_gen_tok_texts = []
    discrim_losses = []
    losses_in_time = []
Piero Molino's avatar
Piero Molino committed
380

381
    for i in range(num_samples):
382
        pert_gen_tok_text, discrim_loss, loss_in_time = generate_text_pplm(
383
            model=model,
384
            tokenizer=tokenizer,
385
386
387
388
389
            context=context,
            device=device,
            perturb=True,
            bow_indices=bow_indices,
            classifier=classifier,
390
            class_label=class_id,
391
392
393
394
395
            loss_type=loss_type,
            length=length,
            stepsize=stepsize,
            temperature=temperature,
            top_k=top_k,
396
397
398
            sample=sample,
            num_iterations=num_iterations,
            grad_length=grad_length,
399
            horizon_length=horizon_length,
400
            window_length=window_length,
401
402
            decay=decay,
            gamma=gamma,
403
404
            gm_scale=gm_scale,
            kl_scale=kl_scale,
405
            repetition_penalty=repetition_penalty,
406
        )
407
        pert_gen_tok_texts.append(pert_gen_tok_text)
Julien Chaumond's avatar
Julien Chaumond committed
408
        if classifier is not None:
409
410
            discrim_losses.append(discrim_loss.data.cpu().numpy())
        losses_in_time.append(loss_in_time)
Julien Chaumond's avatar
Julien Chaumond committed
411

412
    if device == "cuda":
413
        torch.cuda.empty_cache()
Julien Chaumond's avatar
Julien Chaumond committed
414

415
    return unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
Julien Chaumond's avatar
Julien Chaumond committed
416

417
418

def generate_text_pplm(
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
    model,
    tokenizer,
    context=None,
    past=None,
    device="cuda",
    perturb=True,
    bow_indices=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
442
    repetition_penalty=1.0,
443
):
444
445
446
447
448
449
    output_so_far = None
    if context:
        context_t = torch.tensor(context, device=device, dtype=torch.long)
        while len(context_t.shape) < 2:
            context_t = context_t.unsqueeze(0)
        output_so_far = context_t
Julien Chaumond's avatar
Julien Chaumond committed
450

451
    # collect one hot vectors for bags of words
452
    one_hot_bows_vectors = build_bows_one_hot_vectors(bow_indices, tokenizer, device)
453

Julien Chaumond's avatar
Julien Chaumond committed
454
    grad_norms = None
455
    last = None
456
    unpert_discrim_loss = 0
Julien Chaumond's avatar
Julien Chaumond committed
457
    loss_in_time = []
458
    for i in trange(length, ascii=True):
Julien Chaumond's avatar
Julien Chaumond committed
459
460

        # Get past/probs for current output, except for last word
461
        # Note that GPT takes 2 inputs: past + current_token
Julien Chaumond's avatar
Julien Chaumond committed
462

463
464
465
        # run model forward to obtain unperturbed
        if past is None and output_so_far is not None:
            last = output_so_far[:, -1:]
466
            if output_so_far.shape[1] > 1:
chutaklee's avatar
chutaklee committed
467
                past = model(output_so_far[:, :-1])["past_key_values"]
Piero Molino's avatar
Piero Molino committed
468

chutaklee's avatar
chutaklee committed
469
470
471
472
473
474
        lm_output = model(output_so_far)
        unpert_logits, unpert_past, unpert_all_hidden = (
            lm_output["logits"],
            lm_output["past_key_values"],
            lm_output["hidden_states"],
        )
475
        unpert_last_hidden = unpert_all_hidden[-1]
Piero Molino's avatar
Piero Molino committed
476

477
        # check if we are abowe grad max length
478
479
        if i >= grad_length:
            current_stepsize = stepsize * 0
Julien Chaumond's avatar
Julien Chaumond committed
480
        else:
481
            current_stepsize = stepsize
Julien Chaumond's avatar
Julien Chaumond committed
482

483
        # modify the past if necessary
484
        if not perturb or num_iterations == 0:
485
            pert_past = past
Julien Chaumond's avatar
Julien Chaumond committed
486
487

        else:
488
            accumulated_hidden = unpert_last_hidden[:, :-1, :]
Julien Chaumond's avatar
Julien Chaumond committed
489
490
            accumulated_hidden = torch.sum(accumulated_hidden, dim=1)

491
492
493
494
495
496
497
498
499
500
            if past is not None:
                pert_past, _, grad_norms, loss_this_iter = perturb_past(
                    past,
                    model,
                    last,
                    unpert_past=unpert_past,
                    unpert_logits=unpert_logits,
                    accumulated_hidden=accumulated_hidden,
                    grad_norms=grad_norms,
                    stepsize=current_stepsize,
501
                    one_hot_bows_vectors=one_hot_bows_vectors,
502
                    classifier=classifier,
503
                    class_label=class_label,
504
505
506
                    loss_type=loss_type,
                    num_iterations=num_iterations,
                    horizon_length=horizon_length,
507
                    window_length=window_length,
508
509
                    decay=decay,
                    gamma=gamma,
510
511
                    kl_scale=kl_scale,
                    device=device,
512
513
514
515
                )
                loss_in_time.append(loss_this_iter)
            else:
                pert_past = past
Piero Molino's avatar
Piero Molino committed
516

chutaklee's avatar
chutaklee committed
517
518
519
520
521
        lm_output = model(last, past_key_values=pert_past)
        pert_logits, past = (
            lm_output["logits"],
            lm_output["past_key_values"],
        )
522
        pert_logits = pert_logits[:, -1, :] / temperature  # + SMALL_CONST
523
524
525
526
527
528
529

        for token_idx in set(output_so_far[0].tolist()):
            if pert_logits[0, token_idx] < 0:
                pert_logits[0, token_idx] *= repetition_penalty
            else:
                pert_logits[0, token_idx] /= repetition_penalty

530
        pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
531
532

        if classifier is not None:
Piero Molino's avatar
Piero Molino committed
533
            ce_loss = torch.nn.CrossEntropyLoss()
534
            prediction = classifier(torch.mean(unpert_last_hidden, dim=1))
535
            label = torch.tensor([class_label], device=device, dtype=torch.long)
536
            unpert_discrim_loss = ce_loss(prediction, label)
537
            print("unperturbed discrim loss", unpert_discrim_loss.data.cpu().numpy())
Julien Chaumond's avatar
Julien Chaumond committed
538
        else:
539
            unpert_discrim_loss = 0
Piero Molino's avatar
Piero Molino committed
540
541

        # Fuse the modified model and original model
Julien Chaumond's avatar
Julien Chaumond committed
542
543
        if perturb:

544
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
Piero Molino's avatar
Piero Molino committed
545

546
547
            pert_probs = (pert_probs ** gm_scale) * (unpert_probs ** (1 - gm_scale))  # + SMALL_CONST
            pert_probs = top_k_filter(pert_probs, k=top_k, probs=True)  # + SMALL_CONST
Julien Chaumond's avatar
Julien Chaumond committed
548

549
550
551
            # rescale
            if torch.sum(pert_probs) <= 1:
                pert_probs = pert_probs / torch.sum(pert_probs)
Julien Chaumond's avatar
Julien Chaumond committed
552
553

        else:
554
555
            pert_logits = top_k_filter(pert_logits, k=top_k)  # + SMALL_CONST
            pert_probs = F.softmax(pert_logits, dim=-1)
Julien Chaumond's avatar
Julien Chaumond committed
556

557
        # sample or greedy
Julien Chaumond's avatar
Julien Chaumond committed
558
        if sample:
559
560
            last = torch.multinomial(pert_probs, num_samples=1)

Julien Chaumond's avatar
Julien Chaumond committed
561
        else:
562
563
564
            _, last = torch.topk(pert_probs, k=1, dim=-1)

        # update context/output_so_far appending the new token
565
        output_so_far = last if output_so_far is None else torch.cat((output_so_far, last), dim=1)
566

567
        print(tokenizer.decode(output_so_far.tolist()[0]))
568
569

    return output_so_far, unpert_discrim_loss, loss_in_time
Julien Chaumond's avatar
Julien Chaumond committed
570
571


572
573
def set_generic_model_params(discrim_weights, discrim_meta):
    if discrim_weights is None:
574
        raise ValueError("When using a generic discriminator, discrim_weights need to be specified")
575
    if discrim_meta is None:
576
        raise ValueError("When using a generic discriminator, discrim_meta need to be specified")
577

578
    with open(discrim_meta, "r") as discrim_meta_file:
579
        meta = json.load(discrim_meta_file)
580
581
    meta["path"] = discrim_weights
    DISCRIMINATOR_MODELS_PARAMS["generic"] = meta
582
583


584
def run_pplm_example(
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
    pretrained_model="gpt2-medium",
    cond_text="",
    uncond=False,
    num_samples=1,
    bag_of_words=None,
    discrim=None,
    discrim_weights=None,
    discrim_meta=None,
    class_label=-1,
    length=100,
    stepsize=0.02,
    temperature=1.0,
    top_k=10,
    sample=False,
    num_iterations=3,
    grad_length=10000,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    gm_scale=0.9,
    kl_scale=0.01,
    seed=0,
    no_cuda=False,
    colorama=False,
610
    repetition_penalty=1.0,
611
):
612
    # set Random seed
613
614
    torch.manual_seed(seed)
    np.random.seed(seed)
Julien Chaumond's avatar
Julien Chaumond committed
615

616
    # set the device
617
618
    device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu"

619
    if discrim == "generic":
620
        set_generic_model_params(discrim_weights, discrim_meta)
Julien Chaumond's avatar
Julien Chaumond committed
621

622
    if discrim is not None:
623
        pretrained_model = DISCRIMINATOR_MODELS_PARAMS[discrim]["pretrained_model"]
624
        print("discrim = {}, pretrained_model set to discriminator's = {}".format(discrim, pretrained_model))
625

626
    # load pretrained model
627
    model = GPT2LMHeadModel.from_pretrained(pretrained_model, output_hidden_states=True)
Julien Chaumond's avatar
Julien Chaumond committed
628
629
630
    model.to(device)
    model.eval()

631
632
633
    # load tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)

Piero Molino's avatar
Piero Molino committed
634
    # Freeze GPT-2 weights
Julien Chaumond's avatar
Julien Chaumond committed
635
636
637
    for param in model.parameters():
        param.requires_grad = False

638
    # figure out conditioning text
639
    if uncond:
640
        tokenized_cond_text = tokenizer.encode([tokenizer.bos_token])
Julien Chaumond's avatar
Julien Chaumond committed
641
    else:
642
        raw_text = cond_text
Julien Chaumond's avatar
Julien Chaumond committed
643
        while not raw_text:
644
            print("Did you forget to add `--cond_text`? ")
Julien Chaumond's avatar
Julien Chaumond committed
645
            raw_text = input("Model prompt >>> ")
646
        tokenized_cond_text = tokenizer.encode(tokenizer.bos_token + raw_text)
Piero Molino's avatar
Piero Molino committed
647

648
    print("= Prefix of sentence =")
649
    print(tokenizer.decode(tokenized_cond_text))
650
    print()
Piero Molino's avatar
Piero Molino committed
651

652
    # generate unperturbed and perturbed texts
Piero Molino's avatar
Piero Molino committed
653

654
655
656
    # full_text_generation returns:
    # unpert_gen_tok_text, pert_gen_tok_texts, discrim_losses, losses_in_time
    unpert_gen_tok_text, pert_gen_tok_texts, _, _ = full_text_generation(
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
        model=model,
        tokenizer=tokenizer,
        context=tokenized_cond_text,
        device=device,
        num_samples=num_samples,
        bag_of_words=bag_of_words,
        discrim=discrim,
        class_label=class_label,
        length=length,
        stepsize=stepsize,
        temperature=temperature,
        top_k=top_k,
        sample=sample,
        num_iterations=num_iterations,
        grad_length=grad_length,
        horizon_length=horizon_length,
        window_length=window_length,
        decay=decay,
        gamma=gamma,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
678
        repetition_penalty=repetition_penalty,
679
680
681
    )

    # untokenize unperturbed text
682
    unpert_gen_text = tokenizer.decode(unpert_gen_tok_text.tolist()[0])
Piero Molino's avatar
Piero Molino committed
683

684
685
686
687
    print("=" * 80)
    print("= Unperturbed generated text =")
    print(unpert_gen_text)
    print()
Piero Molino's avatar
Piero Molino committed
688

689
690
    generated_texts = []

691
    bow_word_ids = set()
692
    if bag_of_words and colorama:
693
        bow_indices = get_bag_of_words_indices(bag_of_words.split(";"), tokenizer)
694
695
696
697
698
        for single_bow_list in bow_indices:
            # filtering all words in the list composed of more than 1 token
            filtered = list(filter(lambda x: len(x) <= 1, single_bow_list))
            # w[0] because we are sure w has only 1 item because previous fitler
            bow_word_ids.update(w[0] for w in filtered)
699
700
701
702
703

    # iterate through the perturbed texts
    for i, pert_gen_tok_text in enumerate(pert_gen_tok_texts):
        try:
            # untokenize unperturbed text
704
            if colorama:
Piero Molino's avatar
Piero Molino committed
705
706
                import colorama

707
                pert_gen_text = ""
708
                for word_id in pert_gen_tok_text.tolist()[0]:
709
                    if word_id in bow_word_ids:
710
                        pert_gen_text += "{}{}{}".format(
Lysandre's avatar
Lysandre committed
711
712
713
                            colorama.Fore.RED,
                            tokenizer.decode([word_id]),
                            colorama.Style.RESET_ALL,
714
                        )
Piero Molino's avatar
Piero Molino committed
715
                    else:
716
                        pert_gen_text += tokenizer.decode([word_id])
Piero Molino's avatar
Piero Molino committed
717
            else:
718
                pert_gen_text = tokenizer.decode(pert_gen_tok_text.tolist()[0])
Julien Chaumond's avatar
Julien Chaumond committed
719

720
721
722
            print("= Perturbed generated text {} =".format(i + 1))
            print(pert_gen_text)
            print()
723
724
        except Exception as exc:
            print("Ignoring error while generating perturbed text:", exc)
Julien Chaumond's avatar
Julien Chaumond committed
725

726
        # keep the prefix, perturbed seq, original seq for each index
727
        generated_texts.append((tokenized_cond_text, pert_gen_tok_text, unpert_gen_tok_text))
Julien Chaumond's avatar
Julien Chaumond committed
728

Piero Molino's avatar
Piero Molino committed
729
    return
Julien Chaumond's avatar
Julien Chaumond committed
730
731


732
if __name__ == "__main__":
733
734
735
736
737
738
739
740
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--pretrained_model",
        "-M",
        type=str,
        default="gpt2-medium",
        help="pretrained model name or path to local checkpoint",
    )
741
742
    parser.add_argument("--cond_text", type=str, default="The lake", help="Prefix texts to condition on")
    parser.add_argument("--uncond", action="store_true", help="Generate from end-of-text as prefix")
743
    parser.add_argument(
Lysandre's avatar
Lysandre committed
744
745
746
747
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate from the modified latents",
748
    )
749
750
751
752
753
    parser.add_argument(
        "--bag_of_words",
        "-B",
        type=str,
        default=None,
754
755
756
757
758
        help=(
            "Bags of words used for PPLM-BoW. "
            "Either a BOW id (see list in code) or a filepath. "
            "Multiple BoWs separated by ;"
        ),
759
760
761
762
763
764
765
766
767
768
    )
    parser.add_argument(
        "--discrim",
        "-D",
        type=str,
        default=None,
        choices=("clickbait", "sentiment", "toxicity", "generic"),
        help="Discriminator to use",
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
769
770
771
772
        "--discrim_weights",
        type=str,
        default=None,
        help="Weights for the generic discriminator",
773
774
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
775
776
777
778
        "--discrim_meta",
        type=str,
        default=None,
        help="Meta information for the generic discriminator",
779
780
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
781
782
783
784
        "--class_label",
        type=int,
        default=-1,
        help="Class label used for the discriminator",
785
786
    )
    parser.add_argument("--length", type=int, default=100)
787
    parser.add_argument("--stepsize", type=float, default=0.02)
788
789
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=10)
790
    parser.add_argument("--sample", action="store_true", help="Generate from end-of-text as prefix")
791
792
793
    parser.add_argument("--num_iterations", type=int, default=3)
    parser.add_argument("--grad_length", type=int, default=10000)
    parser.add_argument(
794
        "--window_length",
795
        type=int,
796
        default=0,
797
        help="Length of past which is being optimized; 0 corresponds to infinite window length",
798
799
    )
    parser.add_argument(
Lysandre's avatar
Lysandre committed
800
801
802
803
        "--horizon_length",
        type=int,
        default=1,
        help="Length of future to optimize over",
804
    )
805
    parser.add_argument("--decay", action="store_true", help="whether to decay or not")
806
    parser.add_argument("--gamma", type=float, default=1.5)
807
808
809
810
    parser.add_argument("--gm_scale", type=float, default=0.9)
    parser.add_argument("--kl_scale", type=float, default=0.01)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--no_cuda", action="store_true", help="no cuda")
811
    parser.add_argument("--colorama", action="store_true", help="colors keywords")
812
    parser.add_argument(
Lysandre's avatar
Lysandre committed
813
814
815
816
        "--repetition_penalty",
        type=float,
        default=1.0,
        help="Penalize repetition. More than 1.0 -> less repetition",
817
    )
818
819
820

    args = parser.parse_args()
    run_pplm_example(**vars(args))