Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
4a0b59e9
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "b88090914d77f39311f59055546c74f7f17fad9b"
Commit
4a0b59e9
authored
Nov 01, 2018
by
thomwolf
Browse files
run_classifier WIP
parent
7af7f817
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
69 deletions
+47
-69
modeling_pytorch.py
modeling_pytorch.py
+1
-1
optimization_pytorch.py
optimization_pytorch.py
+2
-2
run_classifier_pytorch.py
run_classifier_pytorch.py
+44
-66
No files found.
modeling_pytorch.py
View file @
4a0b59e9
...
@@ -237,7 +237,7 @@ class BERTSelfAttention(nn.Module):
...
@@ -237,7 +237,7 @@ class BERTSelfAttention(nn.Module):
context_layer
=
torch
.
matmul
(
attention_probs
,
value_layer
)
context_layer
=
torch
.
matmul
(
attention_probs
,
value_layer
)
context_layer
=
context_layer
.
permute
(
0
,
2
,
1
,
3
).
contiguous
()
context_layer
=
context_layer
.
permute
(
0
,
2
,
1
,
3
).
contiguous
()
new_context_layer_shape
=
context_layer
.
size
()[:
-
2
]
+
(
self
.
all_head_size
,)
new_context_layer_shape
=
context_layer
.
size
()[:
-
2
]
+
(
self
.
all_head_size
,)
context_layer
=
context_layer
.
view
(
*
new_
x
_shape
)
context_layer
=
context_layer
.
view
(
*
new_
context_layer
_shape
)
return
context_layer
return
context_layer
...
...
optimization_pytorch.py
View file @
4a0b59e9
...
@@ -22,7 +22,7 @@ SCHEDULES = {
...
@@ -22,7 +22,7 @@ SCHEDULES = {
}
}
class
OpenAI
Adam
(
Optimizer
):
class
BERT
Adam
(
Optimizer
):
"""Implements Open AI version of Adam algorithm with weight decay fix.
"""Implements Open AI version of Adam algorithm with weight decay fix.
"""
"""
def
__init__
(
self
,
params
,
lr
,
schedule
,
warmup
,
t_total
,
def
__init__
(
self
,
params
,
lr
,
schedule
,
warmup
,
t_total
,
...
@@ -43,7 +43,7 @@ class OpenAIAdam(Optimizer):
...
@@ -43,7 +43,7 @@ class OpenAIAdam(Optimizer):
defaults
=
dict
(
lr
=
lr
,
schedule
=
schedule
,
warmup
=
warmup
,
t_total
=
t_total
,
defaults
=
dict
(
lr
=
lr
,
schedule
=
schedule
,
warmup
=
warmup
,
t_total
=
t_total
,
b1
=
b1
,
b2
=
b2
,
e
=
e
,
l2
=
l2
,
vector_l2
=
vector_l2
,
b1
=
b1
,
b2
=
b2
,
e
=
e
,
l2
=
l2
,
vector_l2
=
vector_l2
,
max_grad_norm
=
max_grad_norm
)
max_grad_norm
=
max_grad_norm
)
super
(
OpenAI
Adam
,
self
).
__init__
(
params
,
defaults
)
super
(
BERT
Adam
,
self
).
__init__
(
params
,
defaults
)
def
get_lr
(
self
):
def
get_lr
(
self
):
lr
=
[]
lr
=
[]
...
...
run_classifier_pytorch.py
View file @
4a0b59e9
...
@@ -20,7 +20,8 @@ from __future__ import print_function
...
@@ -20,7 +20,8 @@ from __future__ import print_function
import
csv
import
csv
import
os
import
os
import
modeling_pytorch
from
modeling_pytorch
import
BertConfig
,
BertModel
from
optimization_pytorch
import
BERTAdam
# import optimization
# import optimization
import
tokenization_pytorch
import
tokenization_pytorch
import
torch
import
torch
...
@@ -116,10 +117,16 @@ parser.add_argument("--iterations_per_loop",
...
@@ -116,10 +117,16 @@ parser.add_argument("--iterations_per_loop",
type
=
int
,
type
=
int
,
help
=
"How many steps to make in each estimator call."
)
help
=
"How many steps to make in each estimator call."
)
parser
.
add_argument
(
"--
use_gpu
"
,
parser
.
add_argument
(
"--
no_cuda
"
,
default
=
Tru
e
,
default
=
Fals
e
,
type
=
bool
,
type
=
bool
,
help
=
"Whether to use GPU"
)
help
=
"Whether not to use CUDA when available"
)
parser
.
add_argument
(
"--local_rank"
,
type
=
int
,
default
=-
1
,
help
=
"local_rank for distributed training on gpus"
)
### BEGIN - TO DELETE EVENTUALLY --> NO SENSE IN PYTORCH ###
### BEGIN - TO DELETE EVENTUALLY --> NO SENSE IN PYTORCH ###
parser
.
add_argument
(
"--use_tpu"
,
parser
.
add_argument
(
"--use_tpu"
,
default
=
False
,
default
=
False
,
...
@@ -422,18 +429,6 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
...
@@ -422,18 +429,6 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
tokens_b
.
pop
()
tokens_b
.
pop
()
def
create_model
(
bert_config
,
is_training
,
input_ids
,
input_mask
,
segment_ids
,
labels
,
num_labels
,
use_one_hot_embeddings
):
raise
NotImplementedError
()
def
model_fn_builder
(
bert_config
,
num_labels
,
init_checkpoint
,
learning_rate
,
num_train_steps
,
num_warmup_steps
,
use_gpu
,
use_one_hot_embeddings
):
raise
NotImplementedError
()
### ATTENTION - I removed the `use_tpu` argument
def
input_fn_builder
(
features
,
seq_length
,
is_training
,
drop_remainder
):
def
input_fn_builder
(
features
,
seq_length
,
is_training
,
drop_remainder
):
"""Creates an `input_fn` closure to be passed to TPUEstimator."""
### ATTENTION - To rewrite ###
"""Creates an `input_fn` closure to be passed to TPUEstimator."""
### ATTENTION - To rewrite ###
...
@@ -478,10 +473,19 @@ def main(_):
...
@@ -478,10 +473,19 @@ def main(_):
"mrpc"
:
MrpcProcessor
,
"mrpc"
:
MrpcProcessor
,
}
}
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
else
:
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
n_gpu
=
1
print
(
"Initializing the distributed backend: NCCL"
)
print
(
"device"
,
device
,
"n_gpu"
,
n_gpu
)
if
not
args
.
do_train
and
not
args
.
do_eval
:
if
not
args
.
do_train
and
not
args
.
do_eval
:
raise
ValueError
(
"At least one of `do_train` or `do_eval` must be True."
)
raise
ValueError
(
"At least one of `do_train` or `do_eval` must be True."
)
bert_config
=
modeling_pytorch
.
BertConfig
.
from_json_file
(
args
.
bert_config_file
)
bert_config
=
BertConfig
.
from_json_file
(
args
.
bert_config_file
)
if
args
.
max_seq_length
>
bert_config
.
max_position_embeddings
:
if
args
.
max_seq_length
>
bert_config
.
max_position_embeddings
:
raise
ValueError
(
raise
ValueError
(
...
@@ -506,49 +510,24 @@ def main(_):
...
@@ -506,49 +510,24 @@ def main(_):
tokenizer
=
tokenization_pytorch
.
FullTokenizer
(
tokenizer
=
tokenization_pytorch
.
FullTokenizer
(
vocab_file
=
args
.
vocab_file
,
do_lower_case
=
args
.
do_lower_case
)
vocab_file
=
args
.
vocab_file
,
do_lower_case
=
args
.
do_lower_case
)
# tpu_cluster_resolver = None
# if FLAGS.use_tpu and FLAGS.tpu_name:
# tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
# FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
# is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
# run_config = tf.contrib.tpu.RunConfig(
# cluster=tpu_cluster_resolver,
# master=FLAGS.master,
# model_dir=FLAGS.output_dir,
# save_checkpoints_steps=FLAGS.save_checkpoints_steps,
# tpu_config=tf.contrib.tpu.TPUConfig(
# iterations_per_loop=FLAGS.iterations_per_loop,
# num_shards=FLAGS.num_tpu_cores,
# per_host_input_for_training=is_per_host))
train_examples
=
None
train_examples
=
None
num_train_steps
=
None
num_train_steps
=
None
num_warmup_steps
=
None
if
args
.
do_train
:
if
args
.
do_train
:
train_examples
=
processor
.
get_train_examples
(
args
.
data_dir
)
train_examples
=
processor
.
get_train_examples
(
args
.
data_dir
)
num_train_steps
=
int
(
num_train_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
*
args
.
num_train_epochs
)
len
(
train_examples
)
/
args
.
train_batch_size
*
args
.
num_train_epochs
)
num_warmup_steps
=
int
(
num_train_steps
*
args
.
warmup_proportion
)
model
=
BertModel
(
bert_config
)
model_fn
=
model_fn_builder
(
if
args
.
init_checkpoint
is
not
None
:
bert_config
=
bert_config
,
model
.
load_state_dict
(
torch
.
load
(
args
.
init_checkpoint
,
map_location
=
'cpu'
))
num_labels
=
len
(
label_list
),
model
.
to
(
device
)
init_checkpoint
=
args
.
init_checkpoint
,
learning_rate
=
args
.
learning_rate
,
optimizer
=
BERTAdam
([{
'params'
:
[
p
for
n
,
p
in
model
.
named_parameters
()
if
n
!=
'bias'
],
'l2'
:
0.01
},
num_train_steps
=
num_train_steps
,
{
'params'
:
[
p
for
n
,
p
in
model
.
named_parameters
()
if
n
!=
'bias'
]}
num_warmup_steps
=
num_warmup_steps
,
],
use_gpu
=
args
.
use_gpu
,
lr
=
args
.
learning_rate
,
schedule
=
'warmup_linear'
,
use_one_hot_embeddings
=
args
.
use_gpu
)
### TO DO - to check when model_fn is written)
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_steps
)
# If TPU is not available, this will fall back to normal Estimator on CPU
# or GPU. - TO DO
# estimator = tf.contrib.tpu.TPUEstimator(
# use_tpu=args.use_tpu,
# model_fn=model_fn,
# config=run_config,
# train_batch_size=args.train_batch_size,
# eval_batch_size=args.eval_batch_size)
if
args
.
do_train
:
if
args
.
do_train
:
train_features
=
convert_examples_to_features
(
train_features
=
convert_examples_to_features
(
...
@@ -606,4 +585,3 @@ def main(_):
...
@@ -606,4 +585,3 @@ def main(_):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
return
None
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment