Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0731fa15
"vscode:/vscode.git/clone" did not exist on "ea1507fb458d41a0ee43045bcee2bcd77bf79497"
Commit
0731fa15
authored
Jan 07, 2020
by
Julien Plu
Committed by
Lysandre Debut
Jan 29, 2020
Browse files
Apply quality and style requirements
parent
a3998e76
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
52 additions
and
59 deletions
+52
-59
src/transformers/__init__.py
src/transformers/__init__.py
+0
-7
src/transformers/convert_pytorch_checkpoint_to_tf2.py
src/transformers/convert_pytorch_checkpoint_to_tf2.py
+3
-4
src/transformers/modeling_tf_camembert.py
src/transformers/modeling_tf_camembert.py
+38
-39
templates/adding_a_new_example_script/run_xxx.py
templates/adding_a_new_example_script/run_xxx.py
+11
-8
templates/adding_a_new_example_script/utils_xxx.py
templates/adding_a_new_example_script/utils_xxx.py
+0
-1
No files found.
src/transformers/__init__.py
View file @
0731fa15
...
...
@@ -29,10 +29,8 @@ from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
from
.configuration_mmbt
import
MMBTConfig
from
.configuration_openai
import
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OpenAIGPTConfig
from
.configuration_roberta
import
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
RobertaConfig
from
.configuration_camembert
import
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
CamembertConfig
from
.configuration_t5
import
T5_PRETRAINED_CONFIG_ARCHIVE_MAP
,
T5Config
from
.configuration_transfo_xl
import
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TransfoXLConfig
# Configurations
from
.configuration_utils
import
PretrainedConfig
from
.configuration_xlm
import
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLMConfig
...
...
@@ -57,7 +55,6 @@ from .data import (
xnli_processors
,
xnli_tasks_num_labels
,
)
# Files and general utilities
from
.file_utils
import
(
CONFIG_NAME
,
...
...
@@ -74,10 +71,8 @@ from .file_utils import (
is_tf_available
,
is_torch_available
,
)
# Model Cards
from
.modelcard
import
ModelCard
# TF 2.0 <=> PyTorch conversion utilities
from
.modeling_tf_pytorch_utils
import
(
convert_tf_weight_name_to_pt_weight_name
,
...
...
@@ -88,7 +83,6 @@ from .modeling_tf_pytorch_utils import (
load_tf2_model_in_pytorch_model
,
load_tf2_weights_in_pytorch_model
,
)
# Pipelines
from
.pipelines
import
(
CsvPipelineDataFormat
,
...
...
@@ -114,7 +108,6 @@ from .tokenization_openai import OpenAIGPTTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_t5
import
T5Tokenizer
from
.tokenization_transfo_xl
import
TransfoXLCorpus
,
TransfoXLTokenizer
# Tokenizers
from
.tokenization_utils
import
PreTrainedTokenizer
from
.tokenization_xlm
import
XLMTokenizer
...
...
src/transformers/convert_pytorch_checkpoint_to_tf2.py
View file @
0731fa15
...
...
@@ -22,12 +22,12 @@ import os
from
transformers
import
(
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
...
...
@@ -35,17 +35,18 @@ from transformers import (
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
AlbertConfig
,
BertConfig
,
CamembertConfig
,
CTRLConfig
,
DistilBertConfig
,
GPT2Config
,
OpenAIGPTConfig
,
RobertaConfig
,
CamembertConfig
,
T5Config
,
TFAlbertForMaskedLM
,
TFBertForPreTraining
,
TFBertForQuestionAnswering
,
TFBertForSequenceClassification
,
TFCamembertForMaskedLM
,
TFCTRLLMHeadModel
,
TFDistilBertForMaskedLM
,
TFDistilBertForQuestionAnswering
,
...
...
@@ -53,8 +54,6 @@ from transformers import (
TFOpenAIGPTLMHeadModel
,
TFRobertaForMaskedLM
,
TFRobertaForSequenceClassification
,
TFCamembertForMaskedLM
,
TFCamembertForSequenceClassification
,
TFT5WithLMHeadModel
,
TFTransfoXLLMHeadModel
,
TFXLMRobertaForMaskedLM
,
...
...
src/transformers/modeling_tf_camembert.py
View file @
0731fa15
...
...
@@ -18,8 +18,6 @@
import
logging
import
tensorflow
as
tf
from
.configuration_camembert
import
CamembertConfig
from
.file_utils
import
add_start_docstrings
from
.modeling_tf_roberta
import
(
...
...
@@ -29,10 +27,11 @@ from .modeling_tf_roberta import (
TFRobertaModel
,
)
logger
=
logging
.
getLogger
(
__name__
)
TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
=
{
#"camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-tf_model.h5"
#
"camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-tf_model.h5"
}
...
...
templates/adding_a_new_example_script/run_xxx.py
View file @
0731fa15
...
...
@@ -52,7 +52,6 @@ from utils_squad import (
write_predictions
,
write_predictions_extended
,
)
# The follwing import is the official SQuAD evaluation script (2.0).
# You can remove it from the dependencies if you are using this script outside of the library
# We've added it here for automated tests (see examples/test_examples.py file)
...
...
@@ -333,7 +332,8 @@ def evaluate(args, model, tokenizer, prefix=""):
def
load_and_cache_examples
(
args
,
tokenizer
,
evaluate
=
False
,
output_examples
=
False
):
if
args
.
local_rank
not
in
[
-
1
,
0
]
and
not
evaluate
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset,
# and the others will use the cache
# Load data features from cache or dataset file
input_file
=
args
.
predict_file
if
evaluate
else
args
.
train_file
...
...
@@ -366,7 +366,8 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
torch
.
save
(
features
,
cached_features_file
)
if
args
.
local_rank
==
0
and
not
evaluate
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset,
# and the others will use the cache
# Convert to Tensors and build dataset
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
features
],
dtype
=
torch
.
long
)
...
...
@@ -620,7 +621,8 @@ def main():
# Load pretrained model and tokenizer
if
args
.
local_rank
not
in
[
-
1
,
0
]:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training will download model & vocab
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training will
# download model & vocab
args
.
model_type
=
args
.
model_type
.
lower
()
config_class
,
model_class
,
tokenizer_class
=
MODEL_CLASSES
[
args
.
model_type
]
...
...
@@ -641,15 +643,16 @@ def main():
)
if
args
.
local_rank
==
0
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training will download model & vocab
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training will
# download model & vocab
model
.
to
(
args
.
device
)
logger
.
info
(
"Training/evaluation parameters %s"
,
args
)
# Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum
if args.fp16 is set.
# Otherwise it'll default to "promote" mode, and we'll get fp32 operations.
Note that running `--fp16_opt_level="O2"` will
# remove the need for this code, but it is still valid.
# Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum
#
if args.fp16 is set.
Otherwise it'll default to "promote" mode, and we'll get fp32 operations.
#
Note that running `--fp16_opt_level="O2"` will
remove the need for this code, but it is still valid.
if
args
.
fp16
:
try
:
import
apex
...
...
templates/adding_a_new_example_script/utils_xxx.py
View file @
0731fa15
...
...
@@ -21,7 +21,6 @@ import logging
import
math
from
transformers.tokenization_bert
import
BasicTokenizer
,
whitespace_tokenize
# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method)
from
utils_squad_evaluate
import
find_all_best_thresh_v2
,
get_raw_scores
,
make_qid_to_has_ans
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment