Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
715fa638
Commit
715fa638
authored
Jan 14, 2020
by
Julien Chaumond
Browse files
Merge branch 'master' into from_scratch_training
parents
764f836d
100e3b6f
Changes
27
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
89 additions
and
59 deletions
+89
-59
src/transformers/file_utils.py
src/transformers/file_utils.py
+8
-11
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+3
-0
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+3
-0
src/transformers/modeling_roberta.py
src/transformers/modeling_roberta.py
+3
-0
src/transformers/modeling_t5.py
src/transformers/modeling_t5.py
+1
-0
src/transformers/pipelines.py
src/transformers/pipelines.py
+64
-48
tests/test_modeling_common.py
tests/test_modeling_common.py
+7
-0
No files found.
src/transformers/file_utils.py
View file @
715fa638
...
...
@@ -384,9 +384,6 @@ def get_from_cache(
else
:
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
# we are copying the file before closing it, so flush to avoid truncation
temp_file
.
flush
()
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
os
.
rename
(
temp_file
.
name
,
cache_path
)
...
...
src/transformers/modeling_albert.py
View file @
715fa638
...
...
@@ -579,6 +579,9 @@ class AlbertMLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
embedding_size
,
config
.
vocab_size
)
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
activation
(
hidden_states
)
...
...
src/transformers/modeling_bert.py
View file @
715fa638
...
...
@@ -481,6 +481,9 @@ class BertLMPredictionHead(nn.Module):
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
...
src/transformers/modeling_roberta.py
View file @
715fa638
...
...
@@ -306,6 +306,9 @@ class RobertaLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
vocab_size
,
bias
=
False
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
features
,
**
kwargs
):
x
=
self
.
dense
(
features
)
x
=
gelu
(
x
)
...
...
src/transformers/modeling_t5.py
View file @
715fa638
...
...
@@ -286,6 +286,7 @@ class T5Attention(nn.Module):
bidirectional
=
not
self
.
is_decoder
,
num_buckets
=
self
.
relative_attention_num_buckets
,
)
rp_bucket
=
rp_bucket
.
to
(
self
.
relative_attention_bias
.
weight
.
device
)
values
=
self
.
relative_attention_bias
(
rp_bucket
)
# shape (qlen, klen, num_heads)
values
=
values
.
permute
([
2
,
0
,
1
]).
unsqueeze
(
0
)
# shape (1, num_heads, qlen, klen)
return
values
...
...
src/transformers/pipelines.py
View file @
715fa638
...
...
@@ -705,9 +705,19 @@ class QuestionAnsweringPipeline(Pipeline):
# Convert inputs to features
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
features
=
squad_convert_examples_to_features
(
examples
,
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
features_list
=
[
squad_convert_examples_to_features
(
[
example
],
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
,
)
for
example
in
examples
]
all_answers
=
[]
for
features
,
example
in
zip
(
features_list
,
examples
):
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
# Manage tensor allocation on correct device
...
...
@@ -724,13 +734,16 @@ class QuestionAnsweringPipeline(Pipeline):
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
answers
=
[]
for
(
example
,
feature
,
start_
,
end_
)
in
zip
(
examples
,
features
,
start
,
end
):
for
(
feature
,
start_
,
end_
)
in
zip
(
features
,
start
,
end
):
# Normalize logits and spans to retrieve the answer
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
# Mask padding and question
start_
,
end_
=
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
)
start_
,
end_
=
(
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
)
# TODO : What happens if not possible
# Mask CLS
...
...
@@ -751,9 +764,12 @@ class QuestionAnsweringPipeline(Pipeline):
}
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
]
if
len
(
answers
)
==
1
:
return
answers
[
0
]
return
answers
answers
=
sorted
(
answers
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[:
kwargs
[
"topk"
]]
all_answers
+=
answers
if
len
(
all_answers
)
==
1
:
return
all_answers
[
0
]
return
all_answers
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
"""
...
...
tests/test_modeling_common.py
View file @
715fa638
...
...
@@ -485,6 +485,8 @@ class ModelTesterMixin:
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
model
(
**
inputs_dict
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
...
...
@@ -492,6 +494,11 @@ class ModelTesterMixin:
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
# Input ids should be clamped to the maximum size of the vocabulary
inputs_dict
[
"input_ids"
].
clamp_
(
max
=
model_vocab_size
-
15
-
1
)
model
(
**
inputs_dict
)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment