Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
448c4672
Unverified
Commit
448c4672
authored
May 14, 2020
by
Julien Chaumond
Committed by
GitHub
May 14, 2020
Browse files
Fix: unpin flake8 and fix cs errors (#4367)
* Fix: unpin flake8 and fix cs errors * Ok we still need to quote those
parent
c547f15a
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
35 additions
and
21 deletions
+35
-21
examples/benchmarks.py
examples/benchmarks.py
+1
-1
examples/distillation/distiller.py
examples/distillation/distiller.py
+4
-4
examples/distillation/scripts/binarized_data.py
examples/distillation/scripts/binarized_data.py
+1
-1
examples/distillation/scripts/extract.py
examples/distillation/scripts/extract.py
+1
-1
examples/distillation/scripts/extract_distilbert.py
examples/distillation/scripts/extract_distilbert.py
+5
-5
examples/distillation/train.py
examples/distillation/train.py
+2
-2
setup.cfg
setup.cfg
+1
-1
setup.py
setup.py
+1
-1
src/transformers/convert_marian_to_pytorch.py
src/transformers/convert_marian_to_pytorch.py
+1
-1
src/transformers/data/datasets/glue.py
src/transformers/data/datasets/glue.py
+1
-1
src/transformers/data/datasets/language_modeling.py
src/transformers/data/datasets/language_modeling.py
+1
-1
src/transformers/pipelines.py
src/transformers/pipelines.py
+5
-1
tests/test_tokenization_common.py
tests/test_tokenization_common.py
+11
-1
No files found.
examples/benchmarks.py
View file @
448c4672
...
...
@@ -478,7 +478,7 @@ def _compute_pytorch(
dictionary
[
model_name
][
"memory"
][
batch_size
][
slice_size
]
=
"N/A"
if
not
no_speed
:
print_fn
(
"Going through model with sequence of shape"
.
format
(
sequence
.
shape
))
print_fn
(
"Going through model with sequence of shape
{}
"
.
format
(
sequence
.
shape
))
runtimes
=
timeit
.
repeat
(
lambda
:
inference
(
sequence
),
repeat
=
average_over
,
number
=
3
)
average_time
=
sum
(
runtimes
)
/
float
(
len
(
runtimes
))
/
3.0
dictionary
[
model_name
][
"time"
][
batch_size
][
slice_size
]
=
average_time
...
...
examples/distillation/distiller.py
View file @
448c4672
...
...
@@ -80,7 +80,7 @@ class Distiller:
self
.
mlm
=
params
.
mlm
if
self
.
mlm
:
logger
.
info
(
f
"Using MLM loss for LM step."
)
logger
.
info
(
"Using MLM loss for LM step."
)
self
.
mlm_mask_prop
=
params
.
mlm_mask_prop
assert
0.0
<=
self
.
mlm_mask_prop
<=
1.0
assert
params
.
word_mask
+
params
.
word_keep
+
params
.
word_rand
==
1.0
...
...
@@ -91,7 +91,7 @@ class Distiller:
self
.
pred_probs
=
self
.
pred_probs
.
half
()
self
.
token_probs
=
self
.
token_probs
.
half
()
else
:
logger
.
info
(
f
"Using CLM loss for LM step."
)
logger
.
info
(
"Using CLM loss for LM step."
)
self
.
epoch
=
0
self
.
n_iter
=
0
...
...
@@ -365,8 +365,8 @@ class Distiller:
self
.
end_epoch
()
if
self
.
is_master
:
logger
.
info
(
f
"Save very last checkpoint as `pytorch_model.bin`."
)
self
.
save_checkpoint
(
checkpoint_name
=
f
"pytorch_model.bin"
)
logger
.
info
(
"Save very last checkpoint as `pytorch_model.bin`."
)
self
.
save_checkpoint
(
checkpoint_name
=
"pytorch_model.bin"
)
logger
.
info
(
"Training is finished"
)
def
step
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
,
lm_labels
:
torch
.
tensor
):
...
...
examples/distillation/scripts/binarized_data.py
View file @
448c4672
...
...
@@ -60,7 +60,7 @@ def main():
with
open
(
args
.
file_path
,
"r"
,
encoding
=
"utf8"
)
as
fp
:
data
=
fp
.
readlines
()
logger
.
info
(
f
"Start encoding"
)
logger
.
info
(
"Start encoding"
)
logger
.
info
(
f
"
{
len
(
data
)
}
examples to process."
)
rslt
=
[]
...
...
examples/distillation/scripts/extract.py
View file @
448c4672
...
...
@@ -93,7 +93,7 @@ if __name__ == "__main__":
elif
args
.
model_type
==
"gpt2"
:
for
w
in
[
"weight"
,
"bias"
]:
compressed_sd
[
f
"
{
prefix
}
.ln_f.
{
w
}
"
]
=
state_dict
[
f
"
{
prefix
}
.ln_f.
{
w
}
"
]
compressed_sd
[
f
"lm_head.weight"
]
=
state_dict
[
f
"lm_head.weight"
]
compressed_sd
[
"lm_head.weight"
]
=
state_dict
[
"lm_head.weight"
]
print
(
f
"N layers selected for distillation:
{
std_idx
}
"
)
print
(
f
"Number of params transfered for distillation:
{
len
(
compressed_sd
.
keys
())
}
"
)
...
...
examples/distillation/scripts/extract_distilbert.py
View file @
448c4672
...
...
@@ -37,7 +37,7 @@ if __name__ == "__main__":
model
=
BertForMaskedLM
.
from_pretrained
(
args
.
model_name
)
prefix
=
"bert"
else
:
raise
ValueError
(
f
'args.model_type should be "bert".'
)
raise
ValueError
(
'args.model_type should be "bert".'
)
state_dict
=
model
.
state_dict
()
compressed_sd
=
{}
...
...
@@ -78,12 +78,12 @@ if __name__ == "__main__":
]
std_idx
+=
1
compressed_sd
[
f
"vocab_projector.weight"
]
=
state_dict
[
f
"cls.predictions.decoder.weight"
]
compressed_sd
[
f
"vocab_projector.bias"
]
=
state_dict
[
f
"cls.predictions.bias"
]
compressed_sd
[
"vocab_projector.weight"
]
=
state_dict
[
"cls.predictions.decoder.weight"
]
compressed_sd
[
"vocab_projector.bias"
]
=
state_dict
[
"cls.predictions.bias"
]
if
args
.
vocab_transform
:
for
w
in
[
"weight"
,
"bias"
]:
compressed_sd
[
f
"vocab_transform.
{
w
}
"
]
=
state_dict
[
f
"cls.predictions.transform.dense.
{
w
}
"
]
compressed_sd
[
f
"vocab_layer_norm.
{
w
}
"
]
=
state_dict
[
f
"cls.predictions.transform.LayerNorm.
{
w
}
"
]
compressed_sd
[
f
"vocab_transform.
{
w
}
"
]
=
state_dict
[
"cls.predictions.transform.dense.{w}"
]
compressed_sd
[
f
"vocab_layer_norm.
{
w
}
"
]
=
state_dict
[
"cls.predictions.transform.LayerNorm.{w}"
]
print
(
f
"N layers selected for distillation:
{
std_idx
}
"
)
print
(
f
"Number of params transfered for distillation:
{
len
(
compressed_sd
.
keys
())
}
"
)
...
...
examples/distillation/train.py
View file @
448c4672
...
...
@@ -273,7 +273,7 @@ def main():
token_probs
=
None
train_lm_seq_dataset
=
LmSeqsDataset
(
params
=
args
,
data
=
data
)
logger
.
info
(
f
"Data loader created."
)
logger
.
info
(
"Data loader created."
)
# STUDENT #
logger
.
info
(
f
"Loading student config from
{
args
.
student_config
}
"
)
...
...
@@ -288,7 +288,7 @@ def main():
if
args
.
n_gpu
>
0
:
student
.
to
(
f
"cuda:
{
args
.
local_rank
}
"
)
logger
.
info
(
f
"Student loaded."
)
logger
.
info
(
"Student loaded."
)
# TEACHER #
teacher
=
teacher_model_class
.
from_pretrained
(
args
.
teacher_name
,
output_hidden_states
=
True
)
...
...
setup.cfg
View file @
448c4672
...
...
@@ -36,5 +36,5 @@ multi_line_output = 3
use_parentheses = True
[flake8]
ignore = E203, E501, W503
ignore = E203, E501,
E741,
W503
max-line-length = 119
setup.py
View file @
448c4672
...
...
@@ -79,7 +79,7 @@ extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rt
extras
[
"quality"
]
=
[
"black"
,
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort"
,
"flake8
==3.7.9
"
,
"flake8"
,
]
extras
[
"dev"
]
=
extras
[
"testing"
]
+
extras
[
"quality"
]
+
[
"mecab-python3"
,
"scikit-learn"
,
"tensorflow"
,
"torch"
]
...
...
src/transformers/convert_marian_to_pytorch.py
View file @
448c4672
...
...
@@ -226,7 +226,7 @@ def lmap(f, x) -> List:
def
fetch_test_set
(
test_set_url
):
import
wget
fname
=
wget
.
download
(
test_set_url
,
f
"opus_test.txt"
)
fname
=
wget
.
download
(
test_set_url
,
"opus_test.txt"
)
lns
=
Path
(
fname
).
open
().
readlines
()
src
=
lmap
(
str
.
strip
,
lns
[::
4
])
gold
=
lmap
(
str
.
strip
,
lns
[
1
::
4
])
...
...
src/transformers/data/datasets/glue.py
View file @
448c4672
...
...
@@ -114,7 +114,7 @@ class GlueDataset(Dataset):
torch
.
save
(
self
.
features
,
cached_features_file
)
# ^ This seems to take a lot of time so I want to investigate why and how we can improve.
logger
.
info
(
f
"Saving features into cached file %s [took %.3f s]"
,
cached_features_file
,
time
.
time
()
-
start
"Saving features into cached file %s [took %.3f s]"
,
cached_features_file
,
time
.
time
()
-
start
)
def
__len__
(
self
):
...
...
src/transformers/data/datasets/language_modeling.py
View file @
448c4672
...
...
@@ -65,7 +65,7 @@ class TextDataset(Dataset):
with
open
(
cached_features_file
,
"wb"
)
as
handle
:
pickle
.
dump
(
self
.
examples
,
handle
,
protocol
=
pickle
.
HIGHEST_PROTOCOL
)
logger
.
info
(
f
"Saving features into cached file %s [took %.3f s]"
,
cached_features_file
,
time
.
time
()
-
start
"Saving features into cached file %s [took %.3f s]"
,
cached_features_file
,
time
.
time
()
-
start
)
def
__len__
(
self
):
...
...
src/transformers/pipelines.py
View file @
448c4672
...
...
@@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
from
contextlib
import
contextmanager
from
itertools
import
chain
from
os.path
import
abspath
,
exists
from
typing
import
Any
,
Dict
,
Iterable
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Iterable
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
import
numpy
as
np
...
...
@@ -58,6 +58,10 @@ if is_torch_available():
AutoModelWithLMHead
,
)
if
TYPE_CHECKING
:
from
.modeling_utils
import
PreTrainedModel
from
.modeling_tf_utils
import
TFPreTrainedModel
logger
=
logging
.
getLogger
(
__name__
)
...
...
tests/test_tokenization_common.py
View file @
448c4672
...
...
@@ -19,11 +19,21 @@ import pickle
import
shutil
import
tempfile
from
collections
import
OrderedDict
from
typing
import
Dict
,
Tuple
,
Union
from
typing
import
TYPE_CHECKING
,
Dict
,
Tuple
,
Union
from
tests.utils
import
require_tf
,
require_torch
if
TYPE_CHECKING
:
from
transformers
import
(
PretrainedConfig
,
PreTrainedTokenizer
,
PreTrainedTokenizerFast
,
PreTrainedModel
,
TFPreTrainedModel
,
)
def
merge_model_tokenizer_mappings
(
model_mapping
:
Dict
[
"PretrainedConfig"
,
Union
[
"PreTrainedModel"
,
"TFPreTrainedModel"
]],
tokenizer_mapping
:
Dict
[
"PretrainedConfig"
,
Tuple
[
"PreTrainedTokenizer"
,
"PreTrainedTokenizerFast"
]],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment