Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
2ab78325
Commit
2ab78325
authored
Dec 21, 2019
by
Aymeric Augustin
Browse files
Fix F821 flake8 warning (x47).
Ignore warnings related to Python 2, because it's going away soon.
parent
631be270
Changes
21
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
77 additions
and
37 deletions
+77
-37
examples/contrib/run_swag.py
examples/contrib/run_swag.py
+1
-1
examples/run_generation.py
examples/run_generation.py
+1
-1
examples/utils_multiple_choice.py
examples/utils_multiple_choice.py
+1
-1
templates/adding_a_new_model/modeling_tf_xxx.py
templates/adding_a_new_model/modeling_tf_xxx.py
+11
-0
templates/adding_a_new_model/modeling_xxx.py
templates/adding_a_new_model/modeling_xxx.py
+18
-0
transformers/commands/user.py
transformers/commands/user.py
+2
-2
transformers/data/processors/utils.py
transformers/data/processors/utils.py
+1
-1
transformers/file_utils.py
transformers/file_utils.py
+1
-1
transformers/hf_api.py
transformers/hf_api.py
+5
-6
transformers/modeling_bert.py
transformers/modeling_bert.py
+6
-2
transformers/modeling_tf_albert.py
transformers/modeling_tf_albert.py
+6
-2
transformers/modeling_tf_auto.py
transformers/modeling_tf_auto.py
+2
-2
transformers/modeling_tf_bert.py
transformers/modeling_tf_bert.py
+6
-2
transformers/modeling_tf_xlnet.py
transformers/modeling_tf_xlnet.py
+2
-2
transformers/modeling_xlnet.py
transformers/modeling_xlnet.py
+1
-1
transformers/tests/tokenization_utils_test.py
transformers/tests/tokenization_utils_test.py
+1
-1
transformers/tokenization_albert.py
transformers/tokenization_albert.py
+1
-1
transformers/tokenization_gpt2.py
transformers/tokenization_gpt2.py
+1
-1
transformers/tokenization_transfo_xl.py
transformers/tokenization_transfo_xl.py
+4
-4
transformers/tokenization_utils.py
transformers/tokenization_utils.py
+6
-6
No files found.
examples/contrib/run_swag.py
View file @
2ab78325
...
...
@@ -108,7 +108,7 @@ def read_swag_examples(input_file, is_training=True):
lines
=
[]
for
line
in
reader
:
if
sys
.
version_info
[
0
]
==
2
:
line
=
list
(
unicode
(
cell
,
"utf-8"
)
for
cell
in
line
)
line
=
list
(
unicode
(
cell
,
"utf-8"
)
for
cell
in
line
)
# noqa: F821
lines
.
append
(
line
)
if
is_training
and
lines
[
0
][
-
1
]
!=
"label"
:
...
...
examples/run_generation.py
View file @
2ab78325
...
...
@@ -225,7 +225,7 @@ def main():
# Batch size == 1. to add more examples please use num_return_sequences > 1
generated_sequence
=
output_sequences
[
0
].
tolist
()
text
=
tokenizer
.
decode
(
generated_sequence
,
clean_up_tokenization_spaces
=
True
)
text
=
text
[:
t
.
find
(
args
.
stop_token
)
if
args
.
stop_token
else
None
]
text
=
text
[:
tex
t
.
find
(
args
.
stop_token
)
if
args
.
stop_token
else
None
]
print
(
text
)
...
...
examples/utils_multiple_choice.py
View file @
2ab78325
...
...
@@ -184,7 +184,7 @@ class SwagProcessor(DataProcessor):
lines
=
[]
for
line
in
reader
:
if
sys
.
version_info
[
0
]
==
2
:
line
=
list
(
unicode
(
cell
,
"utf-8"
)
for
cell
in
line
)
line
=
list
(
unicode
(
cell
,
"utf-8"
)
for
cell
in
line
)
# noqa: F821
lines
.
append
(
line
)
return
lines
...
...
templates/adding_a_new_model/modeling_tf_xxx.py
View file @
2ab78325
...
...
@@ -68,6 +68,14 @@ TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP = {
#
# See the conversion methods in modeling_tf_pytorch_utils.py for more details
####################################################
TFXxxAttention
=
tf
.
keras
.
layers
.
Layer
TFXxxIntermediate
=
tf
.
keras
.
layers
.
Layer
TFXxxOutput
=
tf
.
keras
.
layers
.
Layer
class
TFXxxLayer
(
tf
.
keras
.
layers
.
Layer
):
def
__init__
(
self
,
config
,
**
kwargs
):
super
(
TFXxxLayer
,
self
).
__init__
(
**
kwargs
)
...
...
@@ -316,6 +324,9 @@ class TFXxxModel(TFXxxPreTrainedModel):
return
outputs
TFXxxMLMHead
=
tf
.
keras
.
layers
.
Layer
@
add_start_docstrings
(
"""Xxx Model with a `language modeling` head on top. """
,
XXX_START_DOCSTRING
,
XXX_INPUTS_DOCSTRING
)
...
...
templates/adding_a_new_model/modeling_xxx.py
View file @
2ab78325
...
...
@@ -135,6 +135,14 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
#
# See the conversion methods in modeling_tf_pytorch_utils.py for more details
####################################################
XxxAttention
=
nn
.
Module
XxxIntermediate
=
nn
.
Module
XxxOutput
=
nn
.
Module
class
XxxLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
XxxLayer
,
self
).
__init__
()
...
...
@@ -160,6 +168,16 @@ class XxxLayer(nn.Module):
# pointers for your model and the weights initialization
# method if its not fully covered by PreTrainedModel's default method
####################################################
XxxLayerNorm
=
torch
.
nn
.
LayerNorm
XxxEmbeddings
=
nn
.
Module
XxxEncoder
=
nn
.
Module
XxxPooler
=
nn
.
Module
class
XxxPreTrainedModel
(
PreTrainedModel
):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
...
...
transformers/commands/user.py
View file @
2ab78325
import
os
from
argparse
import
ArgumentParser
from
getpass
import
getpass
from
typing
import
List
,
Union
from
transformers.commands
import
BaseTransformersCLICommand
from
transformers.hf_api
import
HfApi
,
HfFolder
,
HTTPError
...
...
@@ -96,8 +97,7 @@ class LogoutCommand(BaseUserCommand):
class
ListObjsCommand
(
BaseUserCommand
):
def
tabulate
(
self
,
rows
,
headers
):
# type: (List[List[Union[str, int]]], List[str]) -> str
def
tabulate
(
self
,
rows
:
List
[
List
[
Union
[
str
,
int
]]],
headers
:
List
[
str
])
->
str
:
"""
Inspired by:
stackoverflow.com/a/8356620/593036
...
...
transformers/data/processors/utils.py
View file @
2ab78325
...
...
@@ -102,7 +102,7 @@ class DataProcessor(object):
lines
=
[]
for
line
in
reader
:
if
sys
.
version_info
[
0
]
==
2
:
line
=
list
(
unicode
(
cell
,
"utf-8"
)
for
cell
in
line
)
line
=
list
(
unicode
(
cell
,
"utf-8"
)
for
cell
in
line
)
# noqa: F821
lines
.
append
(
line
)
return
lines
...
...
transformers/file_utils.py
View file @
2ab78325
...
...
@@ -419,7 +419,7 @@ def get_from_cache(
with
open
(
meta_path
,
"w"
)
as
meta_file
:
output_string
=
json
.
dumps
(
meta
)
if
sys
.
version_info
[
0
]
==
2
and
isinstance
(
output_string
,
str
):
output_string
=
unicode
(
output_string
,
"utf-8"
)
#
The beauty of python 2
output_string
=
unicode
(
output_string
,
"utf-8"
)
#
noqa: F821
meta_file
.
write
(
output_string
)
return
cache_path
transformers/hf_api.py
View file @
2ab78325
...
...
@@ -14,8 +14,10 @@
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
import
io
import
os
from
os.path
import
expanduser
from
typing
import
List
import
requests
import
six
...
...
@@ -93,7 +95,7 @@ class HfApi:
return
d
[
"user"
]
def
logout
(
self
,
token
):
# type: (...) ->
void
# type: (...) ->
None
"""
Call HF API to log out.
"""
...
...
@@ -135,8 +137,7 @@ class HfApi:
pf
.
close
()
return
urls
.
access
def
list_objs
(
self
,
token
):
# type: (...) -> List[S3Obj]
def
list_objs
(
self
,
token
)
->
List
[
S3Obj
]:
"""
Call HF API to list all stored files for user.
"""
...
...
@@ -156,9 +157,7 @@ class TqdmProgressFileReader:
for implementation details.
"""
def
__init__
(
self
,
f
# type: io.BufferedReader
):
def
__init__
(
self
,
f
:
io
.
BufferedReader
):
self
.
f
=
f
self
.
total_size
=
os
.
fstat
(
f
.
fileno
()).
st_size
# type: int
self
.
pbar
=
tqdm
(
total
=
self
.
total_size
,
leave
=
False
)
...
...
transformers/modeling_bert.py
View file @
2ab78325
...
...
@@ -339,7 +339,9 @@ class BertIntermediate(nn.Module):
def
__init__
(
self
,
config
):
super
(
BertIntermediate
,
self
).
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)):
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)
# noqa: F821
):
self
.
intermediate_act_fn
=
ACT2FN
[
config
.
hidden_act
]
else
:
self
.
intermediate_act_fn
=
config
.
hidden_act
...
...
@@ -459,7 +461,9 @@ class BertPredictionHeadTransform(nn.Module):
def
__init__
(
self
,
config
):
super
(
BertPredictionHeadTransform
,
self
).
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)):
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)
# noqa: F821
):
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
else
:
self
.
transform_act_fn
=
config
.
hidden_act
...
...
transformers/modeling_tf_albert.py
View file @
2ab78325
...
...
@@ -311,7 +311,9 @@ class TFAlbertLayer(tf.keras.layers.Layer):
config
.
intermediate_size
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"ffn"
)
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)):
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)
# noqa: F821
):
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
else
:
self
.
activation
=
config
.
hidden_act
...
...
@@ -452,7 +454,9 @@ class TFAlbertMLMHead(tf.keras.layers.Layer):
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
embedding_size
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"dense"
)
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)):
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)
# noqa: F821
):
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
else
:
self
.
activation
=
config
.
hidden_act
...
...
transformers/modeling_tf_auto.py
View file @
2ab78325
...
...
@@ -690,9 +690,9 @@ class TFAutoModelForQuestionAnswering(object):
elif
isinstance
(
config
,
BertConfig
):
return
TFBertForQuestionAnswering
(
config
)
elif
isinstance
(
config
,
XLNetConfig
):
r
eturn
TFXLNetForQuestionAnswering
(
config
)
r
aise
NotImplementedError
(
"
TFXLNetForQuestionAnswering
isn't implemented"
)
elif
isinstance
(
config
,
XLMConfig
):
r
eturn
TFXLMForQuestionAnswering
(
config
)
r
aise
NotImplementedError
(
"
TFXLMForQuestionAnswering
isn't implemented"
)
raise
ValueError
(
"Unrecognized configuration class {}"
.
format
(
config
))
@
classmethod
...
...
transformers/modeling_tf_bert.py
View file @
2ab78325
...
...
@@ -315,7 +315,9 @@ class TFBertIntermediate(tf.keras.layers.Layer):
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
intermediate_size
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"dense"
)
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)):
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)
# noqa: F821
):
self
.
intermediate_act_fn
=
ACT2FN
[
config
.
hidden_act
]
else
:
self
.
intermediate_act_fn
=
config
.
hidden_act
...
...
@@ -420,7 +422,9 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
hidden_size
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"dense"
)
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)):
if
isinstance
(
config
.
hidden_act
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
hidden_act
,
unicode
)
# noqa: F821
):
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
else
:
self
.
transform_act_fn
=
config
.
hidden_act
...
...
transformers/modeling_tf_xlnet.py
View file @
2ab78325
...
...
@@ -295,7 +295,7 @@ class TFXLNetFeedForward(tf.keras.layers.Layer):
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
if
isinstance
(
config
.
ff_activation
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
ff_activation
,
unicode
)
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
ff_activation
,
unicode
)
# noqa: F821
):
self
.
activation_function
=
ACT2FN
[
config
.
ff_activation
]
else
:
...
...
@@ -483,7 +483,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
if
dtype
is
not
None
and
dtype
!=
tf
.
float32
:
fwd_pos_seq
=
tf
.
cast
(
fwd_pos_seq
,
dtype
=
dtype
)
if
self
.
clamp_len
>
0
:
fwd_pos_seq
=
tf
.
clip_by_value
(
fwd_pos_seq
,
-
clamp_len
,
clamp_len
)
fwd_pos_seq
=
tf
.
clip_by_value
(
fwd_pos_seq
,
-
self
.
clamp_len
,
self
.
clamp_len
)
pos_emb
=
self
.
positional_embedding
(
fwd_pos_seq
,
inv_freq
,
bsz
)
return
pos_emb
...
...
transformers/modeling_xlnet.py
View file @
2ab78325
...
...
@@ -431,7 +431,7 @@ class XLNetFeedForward(nn.Module):
self
.
layer_2
=
nn
.
Linear
(
config
.
d_inner
,
config
.
d_model
)
self
.
dropout
=
nn
.
Dropout
(
config
.
dropout
)
if
isinstance
(
config
.
ff_activation
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
ff_activation
,
unicode
)
sys
.
version_info
[
0
]
==
2
and
isinstance
(
config
.
ff_activation
,
unicode
)
# noqa: F821
):
self
.
activation_function
=
ACT2FN
[
config
.
ff_activation
]
else
:
...
...
transformers/tests/tokenization_utils_test.py
View file @
2ab78325
...
...
@@ -35,7 +35,7 @@ class TokenizerUtilsTest(unittest.TestCase):
for
special_tok
in
tokenizer
.
all_special_tokens
:
if
six
.
PY2
:
self
.
assertIsInstance
(
special_tok
,
unicode
)
self
.
assertIsInstance
(
special_tok
,
unicode
)
# noqa: F821
else
:
self
.
assertIsInstance
(
special_tok
,
str
)
special_tok_id
=
tokenizer
.
convert_tokens_to_ids
(
special_tok
)
...
...
transformers/tokenization_albert.py
View file @
2ab78325
...
...
@@ -156,7 +156,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
"""
text
=
self
.
preprocess_text
(
text
)
# note(zhiliny): in some systems, sentencepiece only accepts str for py2
if
six
.
PY2
and
isinstance
(
text
,
unicode
):
if
six
.
PY2
and
isinstance
(
text
,
unicode
):
# noqa: F821
text
=
text
.
encode
(
"utf-8"
)
if
not
sample
:
...
...
transformers/tokenization_gpt2.py
View file @
2ab78325
...
...
@@ -80,7 +80,7 @@ def bytes_to_unicode():
This is a signficant percentage of your normal, say, 32K bpe vocab.
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
"""
_chr
=
unichr
if
sys
.
version_info
[
0
]
==
2
else
chr
_chr
=
unichr
if
sys
.
version_info
[
0
]
==
2
else
chr
# noqa: F821
bs
=
(
list
(
range
(
ord
(
"!"
),
ord
(
"~"
)
+
1
))
+
list
(
range
(
ord
(
"¡"
),
ord
(
"¬"
)
+
1
))
+
list
(
range
(
ord
(
"®"
),
ord
(
"ÿ"
)
+
1
))
)
...
...
transformers/tokenization_transfo_xl.py
View file @
2ab78325
...
...
@@ -36,10 +36,10 @@ try:
except
ImportError
:
pass
#
if sys.version_info[0] == 2:
#
import cPickle as pickle
#
else:
#
import pickle
if
sys
.
version_info
[
0
]
==
2
:
import
cPickle
as
pickle
else
:
import
pickle
logger
=
logging
.
getLogger
(
__name__
)
...
...
transformers/tokenization_utils.py
View file @
2ab78325
...
...
@@ -252,10 +252,10 @@ class PreTrainedTokenizer(object):
if
key
in
self
.
SPECIAL_TOKENS_ATTRIBUTES
:
if
key
==
"additional_special_tokens"
:
assert
isinstance
(
value
,
(
list
,
tuple
))
and
all
(
isinstance
(
t
,
str
)
or
(
six
.
PY2
and
isinstance
(
t
,
unicode
))
for
t
in
value
isinstance
(
t
,
str
)
or
(
six
.
PY2
and
isinstance
(
t
,
unicode
))
for
t
in
value
# noqa: F821
)
else
:
assert
isinstance
(
value
,
str
)
or
(
six
.
PY2
and
isinstance
(
value
,
unicode
))
assert
isinstance
(
value
,
str
)
or
(
six
.
PY2
and
isinstance
(
value
,
unicode
))
# noqa: F821
setattr
(
self
,
key
,
value
)
@
classmethod
...
...
@@ -567,7 +567,7 @@ class PreTrainedTokenizer(object):
to_add_tokens
=
[]
for
token
in
new_tokens
:
assert
isinstance
(
token
,
str
)
or
(
six
.
PY2
and
isinstance
(
token
,
unicode
))
assert
isinstance
(
token
,
str
)
or
(
six
.
PY2
and
isinstance
(
token
,
unicode
))
# noqa: F821
if
self
.
init_kwargs
.
get
(
"do_lower_case"
,
False
)
and
token
not
in
self
.
all_special_tokens
:
token
=
token
.
lower
()
if
(
...
...
@@ -650,11 +650,11 @@ class PreTrainedTokenizer(object):
assert
key
in
self
.
SPECIAL_TOKENS_ATTRIBUTES
if
key
==
"additional_special_tokens"
:
assert
isinstance
(
value
,
(
list
,
tuple
))
and
all
(
isinstance
(
t
,
str
)
or
(
six
.
PY2
and
isinstance
(
t
,
unicode
))
for
t
in
value
isinstance
(
t
,
str
)
or
(
six
.
PY2
and
isinstance
(
t
,
unicode
))
for
t
in
value
# noqa: F821
)
added_tokens
+=
self
.
add_tokens
(
value
)
else
:
assert
isinstance
(
value
,
str
)
or
(
six
.
PY2
and
isinstance
(
value
,
unicode
))
assert
isinstance
(
value
,
str
)
or
(
six
.
PY2
and
isinstance
(
value
,
unicode
))
# noqa: F821
added_tokens
+=
self
.
add_tokens
([
value
])
logger
.
info
(
"Assigning %s to the %s key of the tokenizer"
,
value
,
key
)
setattr
(
self
,
key
,
value
)
...
...
@@ -746,7 +746,7 @@ class PreTrainedTokenizer(object):
if
tokens
is
None
:
return
None
if
isinstance
(
tokens
,
str
)
or
(
six
.
PY2
and
isinstance
(
tokens
,
unicode
)):
if
isinstance
(
tokens
,
str
)
or
(
six
.
PY2
and
isinstance
(
tokens
,
unicode
)):
# noqa: F821
return
self
.
_convert_token_to_id_with_added_voc
(
tokens
)
ids
=
[]
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment