"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "64c393ee74a2294d1608dc327a997683b4ea689e"
Commit 2c731fd1 authored by thomwolf's avatar thomwolf
Browse files

small tweaks

parent 9343a231
...@@ -349,7 +349,6 @@ class BertModel(nn.Module): ...@@ -349,7 +349,6 @@ class BertModel(nn.Module):
"""BERT model ("Bidirectional Embedding Representations from a Transformer"). """BERT model ("Bidirectional Embedding Representations from a Transformer").
Example usage: Example usage:
```python ```python
# Already been converted into WordPiece token ids # Already been converted into WordPiece token ids
input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
...@@ -359,16 +358,10 @@ class BertModel(nn.Module): ...@@ -359,16 +358,10 @@ class BertModel(nn.Module):
config = modeling.BertConfig(vocab_size=32000, hidden_size=512, config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
model = modeling.BertModel(config=config, is_training=True, model = modeling.BertModel(config=config)
input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
label_embeddings = tf.get_variable(...)
pooled_output = model.get_pooled_output()
logits = tf.matmul(pooled_output, label_embeddings)
...
``` ```
""" """
def __init__(self, config: BertConfig): def __init__(self, config: BertConfig):
"""Constructor for BertModel. """Constructor for BertModel.
...@@ -400,7 +393,26 @@ class BertModel(nn.Module): ...@@ -400,7 +393,26 @@ class BertModel(nn.Module):
return all_encoder_layers, pooled_output return all_encoder_layers, pooled_output
class BertForSequenceClassification(nn.Module): class BertForSequenceClassification(nn.Module):
def __init__(self, config, num_labels): """BERT model for classification.
This module is composed of the BERT model with a linear layer on top of
the pooled output.
Example usage:
```python
# Already been converted into WordPiece token ids
input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]])
config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
num_labels = 2
model = modeling.BertModel(config, num_labels)
logits = model(input_ids, token_type_ids, input_mask)
```
""" def __init__(self, config, num_labels):
super(BertForSequenceClassification, self).__init__() super(BertForSequenceClassification, self).__init__()
self.bert = BertModel(config) self.bert = BertModel(config)
self.dropout = nn.Dropout(config.hidden_dropout_prob) self.dropout = nn.Dropout(config.hidden_dropout_prob)
......
...@@ -115,16 +115,10 @@ parser.add_argument("--save_checkpoints_steps", ...@@ -115,16 +115,10 @@ parser.add_argument("--save_checkpoints_steps",
default = 1000, default = 1000,
type = int, type = int,
help = "How often to save the model checkpoint.") help = "How often to save the model checkpoint.")
parser.add_argument("--iterations_per_loop",
default = 1000,
type = int,
help = "How many steps to make in each estimator call.")
parser.add_argument("--no_cuda", parser.add_argument("--no_cuda",
default = False, default = False,
type = bool, type = bool,
help = "Whether not to use CUDA when available") help = "Whether not to use CUDA when available")
parser.add_argument("--local_rank", parser.add_argument("--local_rank",
type=int, type=int,
default=-1, default=-1,
...@@ -518,16 +512,17 @@ def main(): ...@@ -518,16 +512,17 @@ def main():
model.train() model.train()
global_step = 0 global_step = 0
for input_ids, input_mask, segment_ids, label_ids in train_dataloader: for epoch in args.num_train_epochs:
input_ids = input_ids.to(device) for input_ids, input_mask, segment_ids, label_ids in train_dataloader:
input_mask = input_mask.float().to(device) input_ids = input_ids.to(device)
segment_ids = segment_ids.to(device) input_mask = input_mask.float().to(device)
label_ids = label_ids.to(device) segment_ids = segment_ids.to(device)
label_ids = label_ids.to(device)
loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
loss.backward() loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
optimizer.step() loss.backward()
global_step += 1 optimizer.step()
global_step += 1
if args.do_eval: if args.do_eval:
eval_examples = processor.get_dev_examples(args.data_dir) eval_examples = processor.get_dev_examples(args.data_dir)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment