Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
715fa638
Commit
715fa638
authored
Jan 14, 2020
by
Julien Chaumond
Browse files
Merge branch 'master' into from_scratch_training
parents
764f836d
100e3b6f
Changes
27
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
89 additions
and
59 deletions
+89
-59
src/transformers/file_utils.py
src/transformers/file_utils.py
+8
-11
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+3
-0
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+3
-0
src/transformers/modeling_roberta.py
src/transformers/modeling_roberta.py
+3
-0
src/transformers/modeling_t5.py
src/transformers/modeling_t5.py
+1
-0
src/transformers/pipelines.py
src/transformers/pipelines.py
+64
-48
tests/test_modeling_common.py
tests/test_modeling_common.py
+7
-0
No files found.
src/transformers/file_utils.py
View file @
715fa638
...
@@ -384,9 +384,6 @@ def get_from_cache(
...
@@ -384,9 +384,6 @@ def get_from_cache(
else
:
else
:
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
# we are copying the file before closing it, so flush to avoid truncation
temp_file
.
flush
()
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
os
.
rename
(
temp_file
.
name
,
cache_path
)
os
.
rename
(
temp_file
.
name
,
cache_path
)
...
...
src/transformers/modeling_albert.py
View file @
715fa638
...
@@ -579,6 +579,9 @@ class AlbertMLMHead(nn.Module):
...
@@ -579,6 +579,9 @@ class AlbertMLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
embedding_size
,
config
.
vocab_size
)
self
.
decoder
=
nn
.
Linear
(
config
.
embedding_size
,
config
.
vocab_size
)
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
activation
(
hidden_states
)
hidden_states
=
self
.
activation
(
hidden_states
)
...
...
src/transformers/modeling_bert.py
View file @
715fa638
...
@@ -481,6 +481,9 @@ class BertLMPredictionHead(nn.Module):
...
@@ -481,6 +481,9 @@ class BertLMPredictionHead(nn.Module):
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
...
src/transformers/modeling_roberta.py
View file @
715fa638
...
@@ -306,6 +306,9 @@ class RobertaLMHead(nn.Module):
...
@@ -306,6 +306,9 @@ class RobertaLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
vocab_size
,
bias
=
False
)
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
vocab_size
,
bias
=
False
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
features
,
**
kwargs
):
def
forward
(
self
,
features
,
**
kwargs
):
x
=
self
.
dense
(
features
)
x
=
self
.
dense
(
features
)
x
=
gelu
(
x
)
x
=
gelu
(
x
)
...
...
src/transformers/modeling_t5.py
View file @
715fa638
...
@@ -286,6 +286,7 @@ class T5Attention(nn.Module):
...
@@ -286,6 +286,7 @@ class T5Attention(nn.Module):
bidirectional
=
not
self
.
is_decoder
,
bidirectional
=
not
self
.
is_decoder
,
num_buckets
=
self
.
relative_attention_num_buckets
,
num_buckets
=
self
.
relative_attention_num_buckets
,
)
)
rp_bucket
=
rp_bucket
.
to
(
self
.
relative_attention_bias
.
weight
.
device
)
values
=
self
.
relative_attention_bias
(
rp_bucket
)
# shape (qlen, klen, num_heads)
values
=
self
.
relative_attention_bias
(
rp_bucket
)
# shape (qlen, klen, num_heads)
values
=
values
.
permute
([
2
,
0
,
1
]).
unsqueeze
(
0
)
# shape (1, num_heads, qlen, klen)
values
=
values
.
permute
([
2
,
0
,
1
]).
unsqueeze
(
0
)
# shape (1, num_heads, qlen, klen)
return
values
return
values
...
...
src/transformers/pipelines.py
View file @
715fa638
...
@@ -705,9 +705,19 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -705,9 +705,19 @@ class QuestionAnsweringPipeline(Pipeline):
# Convert inputs to features
# Convert inputs to features
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
features
=
squad_convert_examples_to_features
(
features_list
=
[
examples
,
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
squad_convert_examples_to_features
(
[
example
],
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
,
)
)
for
example
in
examples
]
all_answers
=
[]
for
features
,
example
in
zip
(
features_list
,
examples
):
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
# Manage tensor allocation on correct device
# Manage tensor allocation on correct device
...
@@ -724,13 +734,16 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -724,13 +734,16 @@ class QuestionAnsweringPipeline(Pipeline):
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
answers
=
[]
answers
=
[]
for
(
example
,
feature
,
start_
,
end_
)
in
zip
(
examples
,
features
,
start
,
end
):
for
(
feature
,
start_
,
end_
)
in
zip
(
features
,
start
,
end
):
# Normalize logits and spans to retrieve the answer
# Normalize logits and spans to retrieve the answer
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
# Mask padding and question
# Mask padding and question
start_
,
end_
=
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
)
start_
,
end_
=
(
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
)
# TODO : What happens if not possible
# TODO : What happens if not possible
# Mask CLS
# Mask CLS
...
@@ -751,9 +764,12 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -751,9 +764,12 @@ class QuestionAnsweringPipeline(Pipeline):
}
}
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
]
]
if
len
(
answers
)
==
1
:
answers
=
sorted
(
answers
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[:
kwargs
[
"topk"
]]
return
answers
[
0
]
all_answers
+=
answers
return
answers
if
len
(
all_answers
)
==
1
:
return
all_answers
[
0
]
return
all_answers
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
"""
"""
...
...
tests/test_modeling_common.py
View file @
715fa638
...
@@ -485,6 +485,8 @@ class ModelTesterMixin:
...
@@ -485,6 +485,8 @@ class ModelTesterMixin:
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
model
(
**
inputs_dict
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
...
@@ -492,6 +494,11 @@ class ModelTesterMixin:
...
@@ -492,6 +494,11 @@ class ModelTesterMixin:
# Check that it actually resizes the embeddings matrix
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
# Input ids should be clamped to the maximum size of the vocabulary
inputs_dict
[
"input_ids"
].
clamp_
(
max
=
model_vocab_size
-
15
-
1
)
model
(
**
inputs_dict
)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment