Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
b97af8cc
Commit
b97af8cc
authored
Sep 13, 2019
by
thomwolf
Browse files
skip finetuned checkpoints
parent
65c49bb2
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
108 deletions
+17
-108
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+5
-0
pytorch_transformers/convert_pytorch_checkpoint_to_tf2.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf2.py
+11
-3
pytorch_transformers/modeling_tf_transfo_xl_utilities.py
pytorch_transformers/modeling_tf_transfo_xl_utilities.py
+1
-105
No files found.
pytorch_transformers/__init__.py
View file @
b97af8cc
...
@@ -113,6 +113,11 @@ if _tf_available:
...
@@ -113,6 +113,11 @@ if _tf_available:
load_gpt2_pt_weights_in_tf2
,
load_gpt2_pt_weights_in_tf2
,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_tf_transfo_xl
import
(
TFTransfoXLPreTrainedModel
,
TFTransfoXLMainLayer
,
TFTransfoXLModel
,
TFTransfoXLLMHeadModel
,
load_transfo_xl_pt_weights_in_tf2
,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_tf_xlnet
import
(
TFXLNetPreTrainedModel
,
TFXLNetMainLayer
,
from
.modeling_tf_xlnet
import
(
TFXLNetPreTrainedModel
,
TFXLNetMainLayer
,
TFXLNetModel
,
TFXLNetLMHeadModel
,
TFXLNetModel
,
TFXLNetLMHeadModel
,
TFXLNetForSequenceClassification
,
TFXLNetForSequenceClassification
,
...
...
pytorch_transformers/convert_pytorch_checkpoint_to_tf2.py
View file @
b97af8cc
...
@@ -27,7 +27,8 @@ from pytorch_transformers import is_torch_available, cached_path
...
@@ -27,7 +27,8 @@ from pytorch_transformers import is_torch_available, cached_path
from
pytorch_transformers
import
(
BertConfig
,
TFBertForPreTraining
,
load_bert_pt_weights_in_tf2
,
from
pytorch_transformers
import
(
BertConfig
,
TFBertForPreTraining
,
load_bert_pt_weights_in_tf2
,
GPT2Config
,
TFGPT2LMHeadModel
,
load_gpt2_pt_weights_in_tf2
,
GPT2Config
,
TFGPT2LMHeadModel
,
load_gpt2_pt_weights_in_tf2
,
XLNetConfig
,
TFXLNetLMHeadModel
,
load_xlnet_pt_weights_in_tf2
,
XLNetConfig
,
TFXLNetLMHeadModel
,
load_xlnet_pt_weights_in_tf2
,
XLMConfig
,
TFXLMWithLMHeadModel
,
load_xlm_pt_weights_in_tf2
,)
XLMConfig
,
TFXLMWithLMHeadModel
,
load_xlm_pt_weights_in_tf2
,
TransfoXLConfig
,
TFTransfoXLLMHeadModel
,
load_transfo_xl_pt_weights_in_tf2
,)
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
...
@@ -35,12 +36,15 @@ if is_torch_available():
...
@@ -35,12 +36,15 @@ if is_torch_available():
from
pytorch_transformers
import
(
BertForPreTraining
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
from
pytorch_transformers
import
(
BertForPreTraining
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLMWithLMHeadModel
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,)
XLMWithLMHeadModel
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TransfoXLLMHeadModel
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,)
else
:
else
:
(
BertForPreTraining
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
(
BertForPreTraining
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLMWithLMHeadModel
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,)
=
(
XLMWithLMHeadModel
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TransfoXLLMHeadModel
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,)
=
(
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
...
@@ -55,6 +59,7 @@ MODEL_CLASSES = {
...
@@ -55,6 +59,7 @@ MODEL_CLASSES = {
'gpt2'
:
(
GPT2Config
,
TFGPT2LMHeadModel
,
load_gpt2_pt_weights_in_tf2
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
),
'gpt2'
:
(
GPT2Config
,
TFGPT2LMHeadModel
,
load_gpt2_pt_weights_in_tf2
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
),
'xlnet'
:
(
XLNetConfig
,
TFXLNetLMHeadModel
,
load_xlnet_pt_weights_in_tf2
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
),
'xlnet'
:
(
XLNetConfig
,
TFXLNetLMHeadModel
,
load_xlnet_pt_weights_in_tf2
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
),
'xlm'
:
(
XLMConfig
,
TFXLMWithLMHeadModel
,
load_xlm_pt_weights_in_tf2
,
XLMWithLMHeadModel
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
),
'xlm'
:
(
XLMConfig
,
TFXLMWithLMHeadModel
,
load_xlm_pt_weights_in_tf2
,
XLMWithLMHeadModel
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
),
'transfo-xl'
:
(
TransfoXLConfig
,
TFTransfoXLLMHeadModel
,
load_transfo_xl_pt_weights_in_tf2
,
TransfoXLLMHeadModel
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
),
}
}
def
convert_pt_checkpoint_to_tf
(
model_type
,
pytorch_checkpoint_path
,
config_file
,
tf_dump_path
,
compare_with_pt_model
=
False
):
def
convert_pt_checkpoint_to_tf
(
model_type
,
pytorch_checkpoint_path
,
config_file
,
tf_dump_path
,
compare_with_pt_model
=
False
):
...
@@ -118,6 +123,9 @@ def convert_all_pt_checkpoints_to_tf(args_model_type, tf_dump_path, compare_with
...
@@ -118,6 +123,9 @@ def convert_all_pt_checkpoints_to_tf(args_model_type, tf_dump_path, compare_with
print
(
"-"
*
100
)
print
(
"-"
*
100
)
print
(
" Converting checkpoint {}/{}: {}"
.
format
(
i
,
len
(
aws_config_map
),
shortcut_name
))
print
(
" Converting checkpoint {}/{}: {}"
.
format
(
i
,
len
(
aws_config_map
),
shortcut_name
))
print
(
"-"
*
100
)
print
(
"-"
*
100
)
if
'finetuned'
in
shortcut_name
:
print
(
" Skipping fintenued checkpoint "
)
continue
config_file
=
cached_path
(
aws_config_map
[
shortcut_name
],
force_download
=
True
)
config_file
=
cached_path
(
aws_config_map
[
shortcut_name
],
force_download
=
True
)
model_file
=
cached_path
(
aws_model_maps
[
shortcut_name
],
force_download
=
True
)
model_file
=
cached_path
(
aws_model_maps
[
shortcut_name
],
force_download
=
True
)
...
...
pytorch_transformers/modeling_tf_transfo_xl_utilities.py
View file @
b97af8cc
...
@@ -13,8 +13,7 @@
...
@@ -13,8 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
""" Utilities for PyTorch Transformer XL model.
""" A TF 2.0 Adaptive Softmax for Transformer XL model.
Directly adapted from https://github.com/kimiyoung/transformer-xl.
"""
"""
from
collections
import
defaultdict
from
collections
import
defaultdict
...
@@ -174,106 +173,3 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
...
@@ -174,106 +173,3 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
self
.
add_metric
(
loss
,
name
=
self
.
name
,
aggregation
=
'mean'
if
return_mean
else
''
)
self
.
add_metric
(
loss
,
name
=
self
.
name
,
aggregation
=
'mean'
if
return_mean
else
''
)
return
out
return
out
def
mul_adaptive_logsoftmax
(
hidden
,
target
,
n_token
,
d_embed
,
d_proj
,
cutoffs
,
params
,
tie_projs
,
initializer
=
None
,
proj_initializer
=
None
,
div_val
=
1
,
perms
=
None
,
proj_same_dim
=
True
,
scope
=
'adaptive_softmax'
,
**
kwargs
):
def
_logit
(
x
,
W
,
b
,
proj
):
y
=
x
if
x
.
shape
.
ndims
==
3
:
if
proj
is
not
None
:
y
=
tf
.
einsum
(
'ibd,ed->ibe'
,
y
,
proj
)
return
tf
.
einsum
(
'ibd,nd->ibn'
,
y
,
W
)
+
b
else
:
if
proj
is
not
None
:
y
=
tf
.
einsum
(
'id,ed->ie'
,
y
,
proj
)
return
tf
.
einsum
(
'id,nd->in'
,
y
,
W
)
+
b
params_W
,
params_projs
=
params
[
0
],
params
[
1
]
with
tf
.
variable_scope
(
scope
):
if
len
(
cutoffs
)
==
0
:
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
n_token
],
initializer
=
tf
.
zeros_initializer
())
output
=
_logit
(
hidden
,
params_W
,
softmax_b
,
params_projs
)
nll
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
target
,
logits
=
output
)
nll
=
tf
.
reduce_mean
(
nll
)
else
:
total_loss
,
total_cnt
=
0
,
0
cutoff_ends
=
[
0
]
+
cutoffs
+
[
n_token
]
for
i
in
range
(
len
(
cutoff_ends
)
-
1
):
with
tf
.
variable_scope
(
'cutoff_{}'
.
format
(
i
)):
l_idx
,
r_idx
=
cutoff_ends
[
i
],
cutoff_ends
[
i
+
1
]
cur_d_embed
=
d_embed
//
(
div_val
**
i
)
if
div_val
==
1
:
cur_W
=
params_W
[
l_idx
:
r_idx
]
else
:
cur_W
=
params_W
[
i
]
cur_b
=
tf
.
get_variable
(
'b'
,
[
r_idx
-
l_idx
],
initializer
=
tf
.
zeros_initializer
())
if
tie_projs
[
i
]:
if
div_val
==
1
:
cur_proj
=
params_projs
else
:
cur_proj
=
params_projs
[
i
]
else
:
if
(
div_val
==
1
or
not
proj_same_dim
)
and
d_proj
==
cur_d_embed
:
cur_proj
=
None
else
:
cur_proj
=
tf
.
get_variable
(
'proj'
,
[
cur_d_embed
,
d_proj
],
initializer
=
proj_initializer
)
if
i
==
0
:
cluster_W
=
tf
.
get_variable
(
'cluster_W'
,
[
len
(
cutoffs
),
d_embed
],
initializer
=
tf
.
zeros_initializer
())
cluster_b
=
tf
.
get_variable
(
'cluster_b'
,
[
len
(
cutoffs
)],
initializer
=
tf
.
zeros_initializer
())
cur_W
=
tf
.
concat
([
cur_W
,
cluster_W
],
0
)
cur_b
=
tf
.
concat
([
cur_b
,
cluster_b
],
0
)
head_logit
=
_logit
(
hidden
,
cur_W
,
cur_b
,
cur_proj
)
head_target
=
kwargs
.
get
(
"head_target"
)
head_nll
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
head_target
,
logits
=
head_logit
)
masked_loss
=
head_nll
*
perms
[
i
]
total_loss
+=
tf
.
reduce_sum
(
masked_loss
)
total_cnt
+=
tf
.
reduce_sum
(
perms
[
i
])
# head_logprob = tf.nn.log_softmax(head_logit)
# final_logprob = head_logprob * perms[i][:, :, None]
# final_target = tf.one_hot(target, tf.shape(head_logprob)[2])
# total_loss -= tf.einsum('ibn,ibn->', final_logprob, final_target)
# total_cnt += tf.reduce_sum(perms[i])
else
:
cur_head_nll
=
tf
.
einsum
(
'ib,ibk->k'
,
head_nll
,
perms
[
i
])
cur_hidden
=
tf
.
einsum
(
'ibd,ibk->kd'
,
hidden
,
perms
[
i
])
tail_logit
=
_logit
(
cur_hidden
,
cur_W
,
cur_b
,
cur_proj
)
tail_target
=
tf
.
einsum
(
'ib,ibk->k'
,
tf
.
to_float
(
target
-
l_idx
),
perms
[
i
])
tail_nll
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
tf
.
to_int32
(
tail_target
),
logits
=
tail_logit
)
sum_nll
=
cur_head_nll
+
tail_nll
mask
=
tf
.
reduce_sum
(
perms
[
i
],
[
0
,
1
])
masked_loss
=
sum_nll
*
mask
total_loss
+=
tf
.
reduce_sum
(
masked_loss
)
total_cnt
+=
tf
.
reduce_sum
(
mask
)
nll
=
total_loss
/
total_cnt
return
nll
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment