run_tf_glue.py 3.06 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import tensorflow as tf
import tensorflow_datasets
from pytorch_transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification, glue_convert_examples_to_features

# Load tokenizer, model, dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
tf_model = TFBertForSequenceClassification.from_pretrained('bert-base-cased')
dataset = tensorflow_datasets.load("glue/mrpc")

# Prepare dataset for GLUE
train_dataset = glue_convert_examples_to_features(dataset['train'], tokenizer, task='mrpc', max_length=128)
valid_dataset = glue_convert_examples_to_features(dataset['validation'], tokenizer, task='mrpc', max_length=128)
train_dataset = train_dataset.shuffle(100).batch(32).repeat(3)
valid_dataset = valid_dataset.batch(64)

# Compile tf.keras model for training
learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(2e-5, 345, end_learning_rate=0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
tf_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, clipnorm=1.0),
              loss=loss, metrics=['sparse_categorical_accuracy'])

# Train and evaluate using tf.keras.Model.fit()
tf_model.fit(train_dataset, epochs=3, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)

# Save the model and load it in PyTorch
tf_model.save_pretrained('./runs/')
pt_model = BertForSequenceClassification.from_pretrained('./runs/')

# Quickly inspect a few predictions
thomwolf's avatar
thomwolf committed
30
31
inputs = tokenizer.encode_plus("I said the company is doing great", "The company has good results", add_special_tokens=True)
pred = pt_model(torch.tensor([tokens]))
thomwolf's avatar
thomwolf committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

# Divers
import torch

import tensorflow as tf
import tensorflow_datasets
from pytorch_transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification, glue_convert_examples_to_features

# Load tokenizer, model, dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
model = TFBertForSequenceClassification.from_pretrained('bert-base-cased')

pt_train_dataset = torch.load('../../data/glue_data//MRPC/cached_train_bert-base-cased_128_mrpc')

def gen():
    for el in pt_train_dataset:
        yield ((el.input_ids, el.attention_mask, el.token_type_ids), (el.label,))

dataset = tf.data.Dataset.from_generator(gen,
            ((tf.int32, tf.int32, tf.int32), (tf.int64,)),
            ((tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])),
             (tf.TensorShape([]),)))

dataset = dataset.shuffle(100).batch(32)
next(iter(dataset))

learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(2e-5, 345, 0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=tf.keras.optimizers.Adam(
                  learning_rate=learning_rate,
                  epsilon=1e-08,
                  clipnorm=1.0),
              loss=loss,
              metrics=[['sparse_categorical_accuracy']])

tensorboard_cbk = tf.keras.callbacks.TensorBoard(log_dir='./runs/', update_freq=10, histogram_freq=1)

# Train model
model.fit(dataset, epochs=3, callbacks=[tensorboard_cbk])